{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.010031634190783, "eval_steps": 500, "global_step": 500000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.0200632683815657e-05, "grad_norm": 53923.87890625, "learning_rate": 2e-09, "loss": 11190.6531, "step": 10 }, { "epoch": 4.040126536763131e-05, "grad_norm": 31481.373046875, "learning_rate": 4e-09, "loss": 6702.8453, "step": 20 }, { "epoch": 6.060189805144697e-05, "grad_norm": 94327.2421875, "learning_rate": 6e-09, "loss": 11761.1297, "step": 30 }, { "epoch": 8.080253073526263e-05, "grad_norm": 2396.031982421875, "learning_rate": 8e-09, "loss": 10950.9859, "step": 40 }, { "epoch": 0.00010100316341907829, "grad_norm": 84567.4375, "learning_rate": 1e-08, "loss": 6328.3699, "step": 50 }, { "epoch": 0.00012120379610289395, "grad_norm": 20180.349609375, "learning_rate": 1.2e-08, "loss": 4705.0789, "step": 60 }, { "epoch": 0.0001414044287867096, "grad_norm": 56339.94140625, "learning_rate": 1.4000000000000001e-08, "loss": 2920.2211, "step": 70 }, { "epoch": 0.00016160506147052525, "grad_norm": 94438.09375, "learning_rate": 1.6e-08, "loss": 11749.7031, "step": 80 }, { "epoch": 0.0001818056941543409, "grad_norm": 359109.53125, "learning_rate": 1.8000000000000002e-08, "loss": 7842.2344, "step": 90 }, { "epoch": 0.00020200632683815657, "grad_norm": 144460.0625, "learning_rate": 2e-08, "loss": 11707.0898, "step": 100 }, { "epoch": 0.00022220695952197223, "grad_norm": 19017.02734375, "learning_rate": 2.2000000000000002e-08, "loss": 6195.4258, "step": 110 }, { "epoch": 0.0002424075922057879, "grad_norm": 187837.125, "learning_rate": 2.4e-08, "loss": 11033.1922, "step": 120 }, { "epoch": 0.0002626082248896035, "grad_norm": 228166.671875, "learning_rate": 2.6e-08, "loss": 12855.7203, "step": 130 }, { "epoch": 0.0002828088575734192, "grad_norm": 9420.595703125, "learning_rate": 2.8000000000000003e-08, "loss": 13911.0156, "step": 140 }, { "epoch": 0.00030300949025723485, "grad_norm": 133901.953125, "learning_rate": 3.0000000000000004e-08, "loss": 12800.3633, "step": 150 }, { "epoch": 0.0003232101229410505, "grad_norm": 74125.75, "learning_rate": 3.2e-08, "loss": 16841.6375, "step": 160 }, { "epoch": 0.00034341075562486617, "grad_norm": 5931.787109375, "learning_rate": 3.4e-08, "loss": 13093.9797, "step": 170 }, { "epoch": 0.0003636113883086818, "grad_norm": 96991.3203125, "learning_rate": 3.6000000000000005e-08, "loss": 8154.0461, "step": 180 }, { "epoch": 0.0003838120209924975, "grad_norm": 710426.9375, "learning_rate": 3.8e-08, "loss": 9887.9602, "step": 190 }, { "epoch": 0.00040401265367631315, "grad_norm": 147726.28125, "learning_rate": 4e-08, "loss": 10000.4984, "step": 200 }, { "epoch": 0.0004242132863601288, "grad_norm": 23706.50390625, "learning_rate": 4.2e-08, "loss": 13326.1141, "step": 210 }, { "epoch": 0.00044441391904394446, "grad_norm": 3907.357666015625, "learning_rate": 4.4000000000000004e-08, "loss": 6808.2406, "step": 220 }, { "epoch": 0.0004646145517277601, "grad_norm": 151128.125, "learning_rate": 4.6e-08, "loss": 4694.7812, "step": 230 }, { "epoch": 0.0004848151844115758, "grad_norm": 60076.5703125, "learning_rate": 4.8e-08, "loss": 16183.5938, "step": 240 }, { "epoch": 0.0005050158170953914, "grad_norm": 176168.0625, "learning_rate": 5.0000000000000004e-08, "loss": 14982.325, "step": 250 }, { "epoch": 0.000525216449779207, "grad_norm": 355067.125, "learning_rate": 5.2e-08, "loss": 10628.5781, "step": 260 }, { "epoch": 0.0005454170824630227, "grad_norm": 2258.249267578125, "learning_rate": 5.400000000000001e-08, "loss": 2604.7973, "step": 270 }, { "epoch": 0.0005656177151468384, "grad_norm": 54840.34375, "learning_rate": 5.6000000000000005e-08, "loss": 19849.9625, "step": 280 }, { "epoch": 0.000585818347830654, "grad_norm": 256138.921875, "learning_rate": 5.8e-08, "loss": 15155.9797, "step": 290 }, { "epoch": 0.0006060189805144697, "grad_norm": 405202.625, "learning_rate": 6.000000000000001e-08, "loss": 8962.5992, "step": 300 }, { "epoch": 0.0006262196131982854, "grad_norm": 154683.28125, "learning_rate": 6.2e-08, "loss": 14093.1812, "step": 310 }, { "epoch": 0.000646420245882101, "grad_norm": 4835.3369140625, "learning_rate": 6.4e-08, "loss": 7364.8641, "step": 320 }, { "epoch": 0.0006666208785659167, "grad_norm": 36731.96484375, "learning_rate": 6.600000000000001e-08, "loss": 8496.725, "step": 330 }, { "epoch": 0.0006868215112497323, "grad_norm": 97727.046875, "learning_rate": 6.8e-08, "loss": 14535.7844, "step": 340 }, { "epoch": 0.000707022143933548, "grad_norm": 65981.203125, "learning_rate": 7e-08, "loss": 7268.2555, "step": 350 }, { "epoch": 0.0007272227766173637, "grad_norm": 111682.3203125, "learning_rate": 7.200000000000001e-08, "loss": 5990.5883, "step": 360 }, { "epoch": 0.0007474234093011793, "grad_norm": 25845.736328125, "learning_rate": 7.400000000000001e-08, "loss": 4674.1367, "step": 370 }, { "epoch": 0.000767624041984995, "grad_norm": 45240.98828125, "learning_rate": 7.6e-08, "loss": 7020.4766, "step": 380 }, { "epoch": 0.0007878246746688106, "grad_norm": 88482.0234375, "learning_rate": 7.8e-08, "loss": 5484.6184, "step": 390 }, { "epoch": 0.0008080253073526263, "grad_norm": 769803.6875, "learning_rate": 8e-08, "loss": 14912.2484, "step": 400 }, { "epoch": 0.000828225940036442, "grad_norm": 1450.2479248046875, "learning_rate": 8.200000000000002e-08, "loss": 3157.6937, "step": 410 }, { "epoch": 0.0008484265727202576, "grad_norm": 4920.45361328125, "learning_rate": 8.4e-08, "loss": 9072.7812, "step": 420 }, { "epoch": 0.0008686272054040733, "grad_norm": 14000.513671875, "learning_rate": 8.6e-08, "loss": 6111.4602, "step": 430 }, { "epoch": 0.0008888278380878889, "grad_norm": 113354.3828125, "learning_rate": 8.800000000000001e-08, "loss": 4964.1973, "step": 440 }, { "epoch": 0.0009090284707717046, "grad_norm": 139299.328125, "learning_rate": 9e-08, "loss": 6483.9102, "step": 450 }, { "epoch": 0.0009292291034555202, "grad_norm": 94681.5859375, "learning_rate": 9.2e-08, "loss": 6532.3113, "step": 460 }, { "epoch": 0.0009494297361393359, "grad_norm": 8707.5322265625, "learning_rate": 9.400000000000001e-08, "loss": 4417.5992, "step": 470 }, { "epoch": 0.0009696303688231516, "grad_norm": 4678.69482421875, "learning_rate": 9.6e-08, "loss": 10345.35, "step": 480 }, { "epoch": 0.0009898310015069671, "grad_norm": 283228.09375, "learning_rate": 9.8e-08, "loss": 16157.3203, "step": 490 }, { "epoch": 0.0010100316341907828, "grad_norm": 42487.6796875, "learning_rate": 1.0000000000000001e-07, "loss": 6271.3824, "step": 500 }, { "epoch": 0.0010302322668745984, "grad_norm": 30644.8828125, "learning_rate": 1.0200000000000001e-07, "loss": 7378.6242, "step": 510 }, { "epoch": 0.001050432899558414, "grad_norm": 34058.63671875, "learning_rate": 1.04e-07, "loss": 5153.1023, "step": 520 }, { "epoch": 0.0010706335322422298, "grad_norm": 73918.796875, "learning_rate": 1.0600000000000001e-07, "loss": 4539.6203, "step": 530 }, { "epoch": 0.0010908341649260454, "grad_norm": 11928.5673828125, "learning_rate": 1.0800000000000001e-07, "loss": 4183.957, "step": 540 }, { "epoch": 0.001111034797609861, "grad_norm": 39769.78515625, "learning_rate": 1.1e-07, "loss": 12480.0539, "step": 550 }, { "epoch": 0.0011312354302936767, "grad_norm": 41894.4296875, "learning_rate": 1.1200000000000001e-07, "loss": 13515.3453, "step": 560 }, { "epoch": 0.0011514360629774924, "grad_norm": 33994.19140625, "learning_rate": 1.1400000000000001e-07, "loss": 5308.6777, "step": 570 }, { "epoch": 0.001171636695661308, "grad_norm": 199024.390625, "learning_rate": 1.16e-07, "loss": 4081.0934, "step": 580 }, { "epoch": 0.0011918373283451237, "grad_norm": 24599.05859375, "learning_rate": 1.1800000000000001e-07, "loss": 4688.6086, "step": 590 }, { "epoch": 0.0012120379610289394, "grad_norm": 34193.47265625, "learning_rate": 1.2000000000000002e-07, "loss": 6985.8953, "step": 600 }, { "epoch": 0.001232238593712755, "grad_norm": 122171.6328125, "learning_rate": 1.22e-07, "loss": 6608.7367, "step": 610 }, { "epoch": 0.0012524392263965707, "grad_norm": 193130.015625, "learning_rate": 1.24e-07, "loss": 9689.1953, "step": 620 }, { "epoch": 0.0012726398590803864, "grad_norm": 117169.7265625, "learning_rate": 1.2600000000000002e-07, "loss": 6653.4883, "step": 630 }, { "epoch": 0.001292840491764202, "grad_norm": 19277.9609375, "learning_rate": 1.28e-07, "loss": 11444.1484, "step": 640 }, { "epoch": 0.0013130411244480177, "grad_norm": 30533.201171875, "learning_rate": 1.3e-07, "loss": 6534.2086, "step": 650 }, { "epoch": 0.0013332417571318333, "grad_norm": 32998.06640625, "learning_rate": 1.3200000000000002e-07, "loss": 7290.4141, "step": 660 }, { "epoch": 0.001353442389815649, "grad_norm": 88731.0, "learning_rate": 1.34e-07, "loss": 6324.9395, "step": 670 }, { "epoch": 0.0013736430224994647, "grad_norm": 163031.28125, "learning_rate": 1.36e-07, "loss": 4616.5512, "step": 680 }, { "epoch": 0.0013938436551832803, "grad_norm": 8317.552734375, "learning_rate": 1.3800000000000002e-07, "loss": 7622.1172, "step": 690 }, { "epoch": 0.001414044287867096, "grad_norm": 17513.41796875, "learning_rate": 1.4e-07, "loss": 3420.8336, "step": 700 }, { "epoch": 0.0014342449205509116, "grad_norm": 876.556884765625, "learning_rate": 1.4200000000000003e-07, "loss": 1773.4969, "step": 710 }, { "epoch": 0.0014544455532347273, "grad_norm": 9758.8115234375, "learning_rate": 1.4400000000000002e-07, "loss": 3517.2855, "step": 720 }, { "epoch": 0.001474646185918543, "grad_norm": 84989.25, "learning_rate": 1.46e-07, "loss": 9050.9242, "step": 730 }, { "epoch": 0.0014948468186023586, "grad_norm": 344.9932556152344, "learning_rate": 1.4800000000000003e-07, "loss": 3876.3699, "step": 740 }, { "epoch": 0.0015150474512861743, "grad_norm": 29079.724609375, "learning_rate": 1.5000000000000002e-07, "loss": 2694.5836, "step": 750 }, { "epoch": 0.00153524808396999, "grad_norm": 110780.765625, "learning_rate": 1.52e-07, "loss": 3430.8789, "step": 760 }, { "epoch": 0.0015554487166538056, "grad_norm": 32923.171875, "learning_rate": 1.5400000000000003e-07, "loss": 1710.9211, "step": 770 }, { "epoch": 0.0015756493493376213, "grad_norm": 136050.5, "learning_rate": 1.56e-07, "loss": 3303.3711, "step": 780 }, { "epoch": 0.001595849982021437, "grad_norm": 5649.0830078125, "learning_rate": 1.5800000000000004e-07, "loss": 1151.9271, "step": 790 }, { "epoch": 0.0016160506147052526, "grad_norm": 0.0, "learning_rate": 1.6e-07, "loss": 2342.3744, "step": 800 }, { "epoch": 0.0016362512473890682, "grad_norm": 142335.484375, "learning_rate": 1.62e-07, "loss": 2907.3264, "step": 810 }, { "epoch": 0.001656451880072884, "grad_norm": 146.9744415283203, "learning_rate": 1.6400000000000004e-07, "loss": 5796.2066, "step": 820 }, { "epoch": 0.0016766525127566996, "grad_norm": 17191.080078125, "learning_rate": 1.66e-07, "loss": 2045.8344, "step": 830 }, { "epoch": 0.0016968531454405152, "grad_norm": 10990.34765625, "learning_rate": 1.68e-07, "loss": 1216.6827, "step": 840 }, { "epoch": 0.0017170537781243309, "grad_norm": 49835.70703125, "learning_rate": 1.7000000000000001e-07, "loss": 5094.1359, "step": 850 }, { "epoch": 0.0017372544108081465, "grad_norm": 28776.912109375, "learning_rate": 1.72e-07, "loss": 1044.743, "step": 860 }, { "epoch": 0.0017574550434919622, "grad_norm": 269605.90625, "learning_rate": 1.74e-07, "loss": 3005.1738, "step": 870 }, { "epoch": 0.0017776556761757779, "grad_norm": 39451.890625, "learning_rate": 1.7600000000000001e-07, "loss": 1182.071, "step": 880 }, { "epoch": 0.0017978563088595935, "grad_norm": 39278.44140625, "learning_rate": 1.78e-07, "loss": 1155.6063, "step": 890 }, { "epoch": 0.0018180569415434092, "grad_norm": 212119.546875, "learning_rate": 1.8e-07, "loss": 2523.0363, "step": 900 }, { "epoch": 0.0018382575742272248, "grad_norm": 27779.00390625, "learning_rate": 1.8200000000000002e-07, "loss": 1762.3598, "step": 910 }, { "epoch": 0.0018584582069110405, "grad_norm": 32488.822265625, "learning_rate": 1.84e-07, "loss": 983.6926, "step": 920 }, { "epoch": 0.0018786588395948562, "grad_norm": 12769.9326171875, "learning_rate": 1.86e-07, "loss": 989.6883, "step": 930 }, { "epoch": 0.0018988594722786718, "grad_norm": 65542.2421875, "learning_rate": 1.8800000000000002e-07, "loss": 2831.1389, "step": 940 }, { "epoch": 0.0019190601049624875, "grad_norm": 4519.58154296875, "learning_rate": 1.9e-07, "loss": 827.3979, "step": 950 }, { "epoch": 0.0019392607376463031, "grad_norm": 26085.068359375, "learning_rate": 1.92e-07, "loss": 1941.0563, "step": 960 }, { "epoch": 0.0019594613703301186, "grad_norm": 3401.947265625, "learning_rate": 1.9400000000000002e-07, "loss": 1473.7372, "step": 970 }, { "epoch": 0.0019796620030139342, "grad_norm": 16324.25, "learning_rate": 1.96e-07, "loss": 1216.5513, "step": 980 }, { "epoch": 0.00199986263569775, "grad_norm": 85971.421875, "learning_rate": 1.9800000000000003e-07, "loss": 1252.972, "step": 990 }, { "epoch": 0.0020200632683815656, "grad_norm": 756.13671875, "learning_rate": 2.0000000000000002e-07, "loss": 528.1301, "step": 1000 }, { "epoch": 0.0020402639010653812, "grad_norm": 12201.5576171875, "learning_rate": 2.02e-07, "loss": 763.7943, "step": 1010 }, { "epoch": 0.002060464533749197, "grad_norm": 645.1385498046875, "learning_rate": 2.0400000000000003e-07, "loss": 645.4757, "step": 1020 }, { "epoch": 0.0020806651664330125, "grad_norm": 1174.2017822265625, "learning_rate": 2.0600000000000002e-07, "loss": 659.9264, "step": 1030 }, { "epoch": 0.002100865799116828, "grad_norm": 6592.74609375, "learning_rate": 2.08e-07, "loss": 649.2892, "step": 1040 }, { "epoch": 0.002121066431800644, "grad_norm": 395.39044189453125, "learning_rate": 2.1000000000000003e-07, "loss": 834.6862, "step": 1050 }, { "epoch": 0.0021412670644844595, "grad_norm": 681.7734375, "learning_rate": 2.1200000000000002e-07, "loss": 363.4987, "step": 1060 }, { "epoch": 0.002161467697168275, "grad_norm": 1329.6507568359375, "learning_rate": 2.14e-07, "loss": 256.3153, "step": 1070 }, { "epoch": 0.002181668329852091, "grad_norm": 1161.2518310546875, "learning_rate": 2.1600000000000003e-07, "loss": 139.2401, "step": 1080 }, { "epoch": 0.0022018689625359065, "grad_norm": 923.1829833984375, "learning_rate": 2.1800000000000002e-07, "loss": 426.0063, "step": 1090 }, { "epoch": 0.002222069595219722, "grad_norm": 56266.19921875, "learning_rate": 2.2e-07, "loss": 873.3449, "step": 1100 }, { "epoch": 0.002242270227903538, "grad_norm": 1356.5955810546875, "learning_rate": 2.2200000000000003e-07, "loss": 269.9149, "step": 1110 }, { "epoch": 0.0022624708605873535, "grad_norm": 10559.9951171875, "learning_rate": 2.2400000000000002e-07, "loss": 389.6608, "step": 1120 }, { "epoch": 0.002282671493271169, "grad_norm": 3751.23583984375, "learning_rate": 2.26e-07, "loss": 291.6529, "step": 1130 }, { "epoch": 0.002302872125954985, "grad_norm": 723.4539184570312, "learning_rate": 2.2800000000000003e-07, "loss": 138.6132, "step": 1140 }, { "epoch": 0.0023230727586388005, "grad_norm": 4419.2978515625, "learning_rate": 2.3000000000000002e-07, "loss": 254.6667, "step": 1150 }, { "epoch": 0.002343273391322616, "grad_norm": 2005.2738037109375, "learning_rate": 2.32e-07, "loss": 277.7994, "step": 1160 }, { "epoch": 0.0023634740240064318, "grad_norm": 3394.00830078125, "learning_rate": 2.3400000000000003e-07, "loss": 224.182, "step": 1170 }, { "epoch": 0.0023836746566902474, "grad_norm": 10766.623046875, "learning_rate": 2.3600000000000002e-07, "loss": 347.5208, "step": 1180 }, { "epoch": 0.002403875289374063, "grad_norm": 4218.79443359375, "learning_rate": 2.3800000000000004e-07, "loss": 206.401, "step": 1190 }, { "epoch": 0.0024240759220578788, "grad_norm": 1104.5687255859375, "learning_rate": 2.4000000000000003e-07, "loss": 114.5487, "step": 1200 }, { "epoch": 0.0024442765547416944, "grad_norm": 1300.7989501953125, "learning_rate": 2.42e-07, "loss": 234.2995, "step": 1210 }, { "epoch": 0.00246447718742551, "grad_norm": 1130.30126953125, "learning_rate": 2.44e-07, "loss": 247.4048, "step": 1220 }, { "epoch": 0.0024846778201093257, "grad_norm": 2269.242919921875, "learning_rate": 2.46e-07, "loss": 253.1561, "step": 1230 }, { "epoch": 0.0025048784527931414, "grad_norm": 394.59478759765625, "learning_rate": 2.48e-07, "loss": 165.1429, "step": 1240 }, { "epoch": 0.002525079085476957, "grad_norm": 1333.0682373046875, "learning_rate": 2.5000000000000004e-07, "loss": 201.2458, "step": 1250 }, { "epoch": 0.0025452797181607727, "grad_norm": 1274.926513671875, "learning_rate": 2.5200000000000003e-07, "loss": 261.6738, "step": 1260 }, { "epoch": 0.0025654803508445884, "grad_norm": 483.8572082519531, "learning_rate": 2.54e-07, "loss": 120.1031, "step": 1270 }, { "epoch": 0.002585680983528404, "grad_norm": 10894.4541015625, "learning_rate": 2.56e-07, "loss": 171.6628, "step": 1280 }, { "epoch": 0.0026058816162122197, "grad_norm": 258.6720275878906, "learning_rate": 2.58e-07, "loss": 133.907, "step": 1290 }, { "epoch": 0.0026260822488960354, "grad_norm": 1113.8052978515625, "learning_rate": 2.6e-07, "loss": 222.548, "step": 1300 }, { "epoch": 0.002646282881579851, "grad_norm": 759.9689331054688, "learning_rate": 2.6200000000000004e-07, "loss": 172.5715, "step": 1310 }, { "epoch": 0.0026664835142636667, "grad_norm": 401.5726013183594, "learning_rate": 2.6400000000000003e-07, "loss": 73.3196, "step": 1320 }, { "epoch": 0.0026866841469474823, "grad_norm": 152.62496948242188, "learning_rate": 2.66e-07, "loss": 145.9025, "step": 1330 }, { "epoch": 0.002706884779631298, "grad_norm": 1285.0067138671875, "learning_rate": 2.68e-07, "loss": 105.9384, "step": 1340 }, { "epoch": 0.0027270854123151137, "grad_norm": 670.4628295898438, "learning_rate": 2.7e-07, "loss": 190.21, "step": 1350 }, { "epoch": 0.0027472860449989293, "grad_norm": 803.2987670898438, "learning_rate": 2.72e-07, "loss": 163.7937, "step": 1360 }, { "epoch": 0.002767486677682745, "grad_norm": 1183.79931640625, "learning_rate": 2.7400000000000004e-07, "loss": 133.2012, "step": 1370 }, { "epoch": 0.0027876873103665606, "grad_norm": 7921.79443359375, "learning_rate": 2.7600000000000004e-07, "loss": 160.7435, "step": 1380 }, { "epoch": 0.0028078879430503763, "grad_norm": 872.034423828125, "learning_rate": 2.7800000000000003e-07, "loss": 180.3065, "step": 1390 }, { "epoch": 0.002828088575734192, "grad_norm": 621.529052734375, "learning_rate": 2.8e-07, "loss": 79.1374, "step": 1400 }, { "epoch": 0.0028482892084180076, "grad_norm": 446.7962341308594, "learning_rate": 2.82e-07, "loss": 101.9051, "step": 1410 }, { "epoch": 0.0028684898411018233, "grad_norm": 1207.8538818359375, "learning_rate": 2.8400000000000005e-07, "loss": 134.654, "step": 1420 }, { "epoch": 0.002888690473785639, "grad_norm": 1251.4404296875, "learning_rate": 2.8600000000000005e-07, "loss": 166.572, "step": 1430 }, { "epoch": 0.0029088911064694546, "grad_norm": 1883.7379150390625, "learning_rate": 2.8800000000000004e-07, "loss": 171.6576, "step": 1440 }, { "epoch": 0.0029290917391532703, "grad_norm": 711.5152587890625, "learning_rate": 2.9000000000000003e-07, "loss": 120.1529, "step": 1450 }, { "epoch": 0.002949292371837086, "grad_norm": 550.8199462890625, "learning_rate": 2.92e-07, "loss": 118.2687, "step": 1460 }, { "epoch": 0.0029694930045209016, "grad_norm": 738.0032958984375, "learning_rate": 2.94e-07, "loss": 94.0608, "step": 1470 }, { "epoch": 0.0029896936372047172, "grad_norm": 8268.734375, "learning_rate": 2.9600000000000006e-07, "loss": 168.4, "step": 1480 }, { "epoch": 0.003009894269888533, "grad_norm": 337.34771728515625, "learning_rate": 2.9800000000000005e-07, "loss": 97.4655, "step": 1490 }, { "epoch": 0.0030300949025723486, "grad_norm": 701.8085327148438, "learning_rate": 3.0000000000000004e-07, "loss": 67.8698, "step": 1500 }, { "epoch": 0.0030502955352561642, "grad_norm": 587.8469848632812, "learning_rate": 3.0200000000000003e-07, "loss": 61.4984, "step": 1510 }, { "epoch": 0.00307049616793998, "grad_norm": 387.486328125, "learning_rate": 3.04e-07, "loss": 99.6889, "step": 1520 }, { "epoch": 0.0030906968006237955, "grad_norm": 273.0340576171875, "learning_rate": 3.06e-07, "loss": 113.8831, "step": 1530 }, { "epoch": 0.003110897433307611, "grad_norm": 878.6118774414062, "learning_rate": 3.0800000000000006e-07, "loss": 261.1807, "step": 1540 }, { "epoch": 0.003131098065991427, "grad_norm": 310.3023681640625, "learning_rate": 3.1000000000000005e-07, "loss": 83.4456, "step": 1550 }, { "epoch": 0.0031512986986752425, "grad_norm": 574.5494995117188, "learning_rate": 3.12e-07, "loss": 100.7633, "step": 1560 }, { "epoch": 0.003171499331359058, "grad_norm": 578.2866821289062, "learning_rate": 3.14e-07, "loss": 146.2573, "step": 1570 }, { "epoch": 0.003191699964042874, "grad_norm": 0.0, "learning_rate": 3.160000000000001e-07, "loss": 102.6991, "step": 1580 }, { "epoch": 0.0032119005967266895, "grad_norm": 724.9436645507812, "learning_rate": 3.1800000000000007e-07, "loss": 76.9629, "step": 1590 }, { "epoch": 0.003232101229410505, "grad_norm": 220.40814208984375, "learning_rate": 3.2e-07, "loss": 92.2895, "step": 1600 }, { "epoch": 0.003252301862094321, "grad_norm": 897.6793212890625, "learning_rate": 3.22e-07, "loss": 111.6379, "step": 1610 }, { "epoch": 0.0032725024947781365, "grad_norm": 1624.5076904296875, "learning_rate": 3.24e-07, "loss": 113.7374, "step": 1620 }, { "epoch": 0.003292703127461952, "grad_norm": 427.6394958496094, "learning_rate": 3.26e-07, "loss": 98.5245, "step": 1630 }, { "epoch": 0.003312903760145768, "grad_norm": 626.013427734375, "learning_rate": 3.280000000000001e-07, "loss": 110.2756, "step": 1640 }, { "epoch": 0.0033331043928295835, "grad_norm": 256.1670227050781, "learning_rate": 3.3e-07, "loss": 77.9919, "step": 1650 }, { "epoch": 0.003353305025513399, "grad_norm": 130.15560913085938, "learning_rate": 3.32e-07, "loss": 64.8664, "step": 1660 }, { "epoch": 0.0033735056581972148, "grad_norm": 408.1759338378906, "learning_rate": 3.34e-07, "loss": 90.402, "step": 1670 }, { "epoch": 0.0033937062908810304, "grad_norm": 1482.815673828125, "learning_rate": 3.36e-07, "loss": 80.6527, "step": 1680 }, { "epoch": 0.003413906923564846, "grad_norm": 845.4937133789062, "learning_rate": 3.38e-07, "loss": 143.1215, "step": 1690 }, { "epoch": 0.0034341075562486618, "grad_norm": 176.4639892578125, "learning_rate": 3.4000000000000003e-07, "loss": 77.861, "step": 1700 }, { "epoch": 0.0034543081889324774, "grad_norm": 272.4393005371094, "learning_rate": 3.42e-07, "loss": 97.3008, "step": 1710 }, { "epoch": 0.003474508821616293, "grad_norm": 409.6681823730469, "learning_rate": 3.44e-07, "loss": 106.455, "step": 1720 }, { "epoch": 0.0034947094543001087, "grad_norm": 762.1159057617188, "learning_rate": 3.46e-07, "loss": 87.6736, "step": 1730 }, { "epoch": 0.0035149100869839244, "grad_norm": 370.94580078125, "learning_rate": 3.48e-07, "loss": 111.3797, "step": 1740 }, { "epoch": 0.00353511071966774, "grad_norm": 409.2515869140625, "learning_rate": 3.5000000000000004e-07, "loss": 63.3152, "step": 1750 }, { "epoch": 0.0035553113523515557, "grad_norm": 319.2989196777344, "learning_rate": 3.5200000000000003e-07, "loss": 135.3019, "step": 1760 }, { "epoch": 0.0035755119850353714, "grad_norm": 725.1497192382812, "learning_rate": 3.54e-07, "loss": 148.4355, "step": 1770 }, { "epoch": 0.003595712617719187, "grad_norm": 320.7232666015625, "learning_rate": 3.56e-07, "loss": 83.9237, "step": 1780 }, { "epoch": 0.0036159132504030027, "grad_norm": 283.264892578125, "learning_rate": 3.58e-07, "loss": 56.9223, "step": 1790 }, { "epoch": 0.0036361138830868184, "grad_norm": 741.1524658203125, "learning_rate": 3.6e-07, "loss": 122.0766, "step": 1800 }, { "epoch": 0.003656314515770634, "grad_norm": 525.659912109375, "learning_rate": 3.6200000000000004e-07, "loss": 77.7157, "step": 1810 }, { "epoch": 0.0036765151484544497, "grad_norm": 265.23931884765625, "learning_rate": 3.6400000000000003e-07, "loss": 103.1503, "step": 1820 }, { "epoch": 0.0036967157811382653, "grad_norm": 198.87283325195312, "learning_rate": 3.66e-07, "loss": 181.6109, "step": 1830 }, { "epoch": 0.003716916413822081, "grad_norm": 239.8751983642578, "learning_rate": 3.68e-07, "loss": 124.3553, "step": 1840 }, { "epoch": 0.0037371170465058967, "grad_norm": 222.1876983642578, "learning_rate": 3.7e-07, "loss": 53.033, "step": 1850 }, { "epoch": 0.0037573176791897123, "grad_norm": 745.248046875, "learning_rate": 3.72e-07, "loss": 96.2789, "step": 1860 }, { "epoch": 0.003777518311873528, "grad_norm": 390.7842712402344, "learning_rate": 3.7400000000000004e-07, "loss": 123.4972, "step": 1870 }, { "epoch": 0.0037977189445573436, "grad_norm": 648.6305541992188, "learning_rate": 3.7600000000000003e-07, "loss": 91.3318, "step": 1880 }, { "epoch": 0.0038179195772411593, "grad_norm": 256.4770812988281, "learning_rate": 3.78e-07, "loss": 121.9718, "step": 1890 }, { "epoch": 0.003838120209924975, "grad_norm": 168.90353393554688, "learning_rate": 3.8e-07, "loss": 63.0424, "step": 1900 }, { "epoch": 0.0038583208426087906, "grad_norm": 240.97494506835938, "learning_rate": 3.82e-07, "loss": 51.5701, "step": 1910 }, { "epoch": 0.0038785214752926063, "grad_norm": 474.8635559082031, "learning_rate": 3.84e-07, "loss": 102.6302, "step": 1920 }, { "epoch": 0.003898722107976422, "grad_norm": 171.1105499267578, "learning_rate": 3.8600000000000004e-07, "loss": 105.4091, "step": 1930 }, { "epoch": 0.003918922740660237, "grad_norm": 978.9006958007812, "learning_rate": 3.8800000000000003e-07, "loss": 90.4481, "step": 1940 }, { "epoch": 0.003939123373344053, "grad_norm": 503.3761901855469, "learning_rate": 3.9e-07, "loss": 53.9602, "step": 1950 }, { "epoch": 0.0039593240060278685, "grad_norm": 883.4078369140625, "learning_rate": 3.92e-07, "loss": 104.1193, "step": 1960 }, { "epoch": 0.003979524638711684, "grad_norm": 215.3091583251953, "learning_rate": 3.94e-07, "loss": 84.7708, "step": 1970 }, { "epoch": 0.0039997252713955, "grad_norm": 808.7025146484375, "learning_rate": 3.9600000000000005e-07, "loss": 81.121, "step": 1980 }, { "epoch": 0.0040199259040793155, "grad_norm": 2208.437744140625, "learning_rate": 3.9800000000000004e-07, "loss": 110.1418, "step": 1990 }, { "epoch": 0.004040126536763131, "grad_norm": 1036.852294921875, "learning_rate": 4.0000000000000003e-07, "loss": 65.58, "step": 2000 }, { "epoch": 0.004060327169446947, "grad_norm": 1023.2064208984375, "learning_rate": 4.02e-07, "loss": 72.2675, "step": 2010 }, { "epoch": 0.0040805278021307624, "grad_norm": 309.74053955078125, "learning_rate": 4.04e-07, "loss": 80.9556, "step": 2020 }, { "epoch": 0.004100728434814578, "grad_norm": 2340.11474609375, "learning_rate": 4.06e-07, "loss": 122.8507, "step": 2030 }, { "epoch": 0.004120929067498394, "grad_norm": 812.0780639648438, "learning_rate": 4.0800000000000005e-07, "loss": 82.087, "step": 2040 }, { "epoch": 0.004141129700182209, "grad_norm": 620.1171875, "learning_rate": 4.1000000000000004e-07, "loss": 135.2536, "step": 2050 }, { "epoch": 0.004161330332866025, "grad_norm": 397.59918212890625, "learning_rate": 4.1200000000000004e-07, "loss": 67.8296, "step": 2060 }, { "epoch": 0.004181530965549841, "grad_norm": 1489.7840576171875, "learning_rate": 4.1400000000000003e-07, "loss": 126.8532, "step": 2070 }, { "epoch": 0.004201731598233656, "grad_norm": 616.4906005859375, "learning_rate": 4.16e-07, "loss": 55.3689, "step": 2080 }, { "epoch": 0.004221932230917472, "grad_norm": 667.4445190429688, "learning_rate": 4.18e-07, "loss": 151.7231, "step": 2090 }, { "epoch": 0.004242132863601288, "grad_norm": 166.24490356445312, "learning_rate": 4.2000000000000006e-07, "loss": 50.4639, "step": 2100 }, { "epoch": 0.004262333496285103, "grad_norm": 645.3438110351562, "learning_rate": 4.2200000000000005e-07, "loss": 98.3256, "step": 2110 }, { "epoch": 0.004282534128968919, "grad_norm": 0.0, "learning_rate": 4.2400000000000004e-07, "loss": 78.1808, "step": 2120 }, { "epoch": 0.004302734761652735, "grad_norm": 2541.501953125, "learning_rate": 4.2600000000000003e-07, "loss": 126.9038, "step": 2130 }, { "epoch": 0.00432293539433655, "grad_norm": 1283.16748046875, "learning_rate": 4.28e-07, "loss": 113.4463, "step": 2140 }, { "epoch": 0.004343136027020366, "grad_norm": 223.9454803466797, "learning_rate": 4.3e-07, "loss": 51.7043, "step": 2150 }, { "epoch": 0.004363336659704182, "grad_norm": 393.07379150390625, "learning_rate": 4.3200000000000006e-07, "loss": 95.2752, "step": 2160 }, { "epoch": 0.004383537292387997, "grad_norm": 397.95623779296875, "learning_rate": 4.3400000000000005e-07, "loss": 147.9133, "step": 2170 }, { "epoch": 0.004403737925071813, "grad_norm": 1937.923828125, "learning_rate": 4.3600000000000004e-07, "loss": 131.0532, "step": 2180 }, { "epoch": 0.004423938557755629, "grad_norm": 191.96998596191406, "learning_rate": 4.3800000000000003e-07, "loss": 55.4813, "step": 2190 }, { "epoch": 0.004444139190439444, "grad_norm": 369.5476989746094, "learning_rate": 4.4e-07, "loss": 58.5196, "step": 2200 }, { "epoch": 0.00446433982312326, "grad_norm": 434.48492431640625, "learning_rate": 4.4200000000000007e-07, "loss": 116.2767, "step": 2210 }, { "epoch": 0.004484540455807076, "grad_norm": 574.214599609375, "learning_rate": 4.4400000000000006e-07, "loss": 89.2209, "step": 2220 }, { "epoch": 0.004504741088490891, "grad_norm": 407.47869873046875, "learning_rate": 4.4600000000000005e-07, "loss": 96.8389, "step": 2230 }, { "epoch": 0.004524941721174707, "grad_norm": 496.7345275878906, "learning_rate": 4.4800000000000004e-07, "loss": 70.1286, "step": 2240 }, { "epoch": 0.004545142353858523, "grad_norm": 404.6977844238281, "learning_rate": 4.5000000000000003e-07, "loss": 95.7177, "step": 2250 }, { "epoch": 0.004565342986542338, "grad_norm": 67.34314727783203, "learning_rate": 4.52e-07, "loss": 84.8124, "step": 2260 }, { "epoch": 0.004585543619226154, "grad_norm": 556.2034301757812, "learning_rate": 4.5400000000000007e-07, "loss": 100.2321, "step": 2270 }, { "epoch": 0.00460574425190997, "grad_norm": 1782.4268798828125, "learning_rate": 4.5600000000000006e-07, "loss": 90.9134, "step": 2280 }, { "epoch": 0.004625944884593785, "grad_norm": 1612.2445068359375, "learning_rate": 4.5800000000000005e-07, "loss": 88.7543, "step": 2290 }, { "epoch": 0.004646145517277601, "grad_norm": 628.4484252929688, "learning_rate": 4.6000000000000004e-07, "loss": 99.8417, "step": 2300 }, { "epoch": 0.004666346149961417, "grad_norm": 1752.2567138671875, "learning_rate": 4.6200000000000003e-07, "loss": 146.0064, "step": 2310 }, { "epoch": 0.004686546782645232, "grad_norm": 309.2041931152344, "learning_rate": 4.64e-07, "loss": 63.6057, "step": 2320 }, { "epoch": 0.004706747415329048, "grad_norm": 0.0, "learning_rate": 4.6600000000000007e-07, "loss": 42.3388, "step": 2330 }, { "epoch": 0.0047269480480128636, "grad_norm": 1079.05419921875, "learning_rate": 4.6800000000000006e-07, "loss": 90.5097, "step": 2340 }, { "epoch": 0.004747148680696679, "grad_norm": 1342.211181640625, "learning_rate": 4.7000000000000005e-07, "loss": 98.0591, "step": 2350 }, { "epoch": 0.004767349313380495, "grad_norm": 368.1920471191406, "learning_rate": 4.7200000000000004e-07, "loss": 48.3136, "step": 2360 }, { "epoch": 0.0047875499460643105, "grad_norm": 509.5962829589844, "learning_rate": 4.7400000000000004e-07, "loss": 97.4415, "step": 2370 }, { "epoch": 0.004807750578748126, "grad_norm": 315.66851806640625, "learning_rate": 4.760000000000001e-07, "loss": 73.8353, "step": 2380 }, { "epoch": 0.004827951211431942, "grad_norm": 772.7283325195312, "learning_rate": 4.78e-07, "loss": 92.2587, "step": 2390 }, { "epoch": 0.0048481518441157575, "grad_norm": 176.966796875, "learning_rate": 4.800000000000001e-07, "loss": 72.658, "step": 2400 }, { "epoch": 0.004868352476799573, "grad_norm": 1083.6396484375, "learning_rate": 4.82e-07, "loss": 50.5263, "step": 2410 }, { "epoch": 0.004888553109483389, "grad_norm": 721.8502197265625, "learning_rate": 4.84e-07, "loss": 69.9158, "step": 2420 }, { "epoch": 0.0049087537421672045, "grad_norm": 415.0045471191406, "learning_rate": 4.86e-07, "loss": 50.0465, "step": 2430 }, { "epoch": 0.00492895437485102, "grad_norm": 416.3141784667969, "learning_rate": 4.88e-07, "loss": 80.2151, "step": 2440 }, { "epoch": 0.004949155007534836, "grad_norm": 264.28076171875, "learning_rate": 4.900000000000001e-07, "loss": 91.9892, "step": 2450 }, { "epoch": 0.0049693556402186515, "grad_norm": 591.03125, "learning_rate": 4.92e-07, "loss": 72.2008, "step": 2460 }, { "epoch": 0.004989556272902467, "grad_norm": 296.367431640625, "learning_rate": 4.940000000000001e-07, "loss": 93.1947, "step": 2470 }, { "epoch": 0.005009756905586283, "grad_norm": 571.5848388671875, "learning_rate": 4.96e-07, "loss": 78.4365, "step": 2480 }, { "epoch": 0.0050299575382700985, "grad_norm": 311.3548889160156, "learning_rate": 4.98e-07, "loss": 91.3378, "step": 2490 }, { "epoch": 0.005050158170953914, "grad_norm": 395.5120544433594, "learning_rate": 5.000000000000001e-07, "loss": 110.4002, "step": 2500 }, { "epoch": 0.00507035880363773, "grad_norm": 579.2111206054688, "learning_rate": 5.02e-07, "loss": 84.049, "step": 2510 }, { "epoch": 0.0050905594363215454, "grad_norm": 301.018310546875, "learning_rate": 5.040000000000001e-07, "loss": 91.3211, "step": 2520 }, { "epoch": 0.005110760069005361, "grad_norm": 354.5301208496094, "learning_rate": 5.06e-07, "loss": 123.5217, "step": 2530 }, { "epoch": 0.005130960701689177, "grad_norm": 526.6905517578125, "learning_rate": 5.08e-07, "loss": 60.0568, "step": 2540 }, { "epoch": 0.005151161334372992, "grad_norm": 330.15716552734375, "learning_rate": 5.1e-07, "loss": 57.2245, "step": 2550 }, { "epoch": 0.005171361967056808, "grad_norm": 404.4146728515625, "learning_rate": 5.12e-07, "loss": 57.9126, "step": 2560 }, { "epoch": 0.005191562599740624, "grad_norm": 333.4172058105469, "learning_rate": 5.140000000000001e-07, "loss": 81.0482, "step": 2570 }, { "epoch": 0.005211763232424439, "grad_norm": 153.51580810546875, "learning_rate": 5.16e-07, "loss": 130.1856, "step": 2580 }, { "epoch": 0.005231963865108255, "grad_norm": 509.97943115234375, "learning_rate": 5.180000000000001e-07, "loss": 90.3688, "step": 2590 }, { "epoch": 0.005252164497792071, "grad_norm": 637.9168090820312, "learning_rate": 5.2e-07, "loss": 63.145, "step": 2600 }, { "epoch": 0.005272365130475886, "grad_norm": 4365.08251953125, "learning_rate": 5.22e-07, "loss": 116.8154, "step": 2610 }, { "epoch": 0.005292565763159702, "grad_norm": 130.59400939941406, "learning_rate": 5.240000000000001e-07, "loss": 64.9527, "step": 2620 }, { "epoch": 0.005312766395843518, "grad_norm": 702.160400390625, "learning_rate": 5.26e-07, "loss": 157.5744, "step": 2630 }, { "epoch": 0.005332967028527333, "grad_norm": 1046.60205078125, "learning_rate": 5.280000000000001e-07, "loss": 99.74, "step": 2640 }, { "epoch": 0.005353167661211149, "grad_norm": 896.314208984375, "learning_rate": 5.3e-07, "loss": 97.2923, "step": 2650 }, { "epoch": 0.005373368293894965, "grad_norm": 276.4017639160156, "learning_rate": 5.32e-07, "loss": 69.9471, "step": 2660 }, { "epoch": 0.00539356892657878, "grad_norm": 762.4893188476562, "learning_rate": 5.340000000000001e-07, "loss": 83.9612, "step": 2670 }, { "epoch": 0.005413769559262596, "grad_norm": 491.28704833984375, "learning_rate": 5.36e-07, "loss": 97.4143, "step": 2680 }, { "epoch": 0.005433970191946412, "grad_norm": 498.9420166015625, "learning_rate": 5.380000000000001e-07, "loss": 44.2771, "step": 2690 }, { "epoch": 0.005454170824630227, "grad_norm": 348.0418395996094, "learning_rate": 5.4e-07, "loss": 96.6881, "step": 2700 }, { "epoch": 0.005474371457314043, "grad_norm": 322.1388854980469, "learning_rate": 5.420000000000001e-07, "loss": 87.6681, "step": 2710 }, { "epoch": 0.005494572089997859, "grad_norm": 333.8533020019531, "learning_rate": 5.44e-07, "loss": 40.6599, "step": 2720 }, { "epoch": 0.005514772722681674, "grad_norm": 940.8037719726562, "learning_rate": 5.46e-07, "loss": 93.437, "step": 2730 }, { "epoch": 0.00553497335536549, "grad_norm": 683.1279296875, "learning_rate": 5.480000000000001e-07, "loss": 102.9487, "step": 2740 }, { "epoch": 0.005555173988049306, "grad_norm": 347.8869934082031, "learning_rate": 5.5e-07, "loss": 67.341, "step": 2750 }, { "epoch": 0.005575374620733121, "grad_norm": 1191.3267822265625, "learning_rate": 5.520000000000001e-07, "loss": 123.9453, "step": 2760 }, { "epoch": 0.005595575253416937, "grad_norm": 376.0352783203125, "learning_rate": 5.54e-07, "loss": 63.6353, "step": 2770 }, { "epoch": 0.005615775886100753, "grad_norm": 472.8700866699219, "learning_rate": 5.560000000000001e-07, "loss": 140.3814, "step": 2780 }, { "epoch": 0.005635976518784568, "grad_norm": 148.10728454589844, "learning_rate": 5.580000000000001e-07, "loss": 116.0335, "step": 2790 }, { "epoch": 0.005656177151468384, "grad_norm": 133.42227172851562, "learning_rate": 5.6e-07, "loss": 61.8537, "step": 2800 }, { "epoch": 0.0056763777841522, "grad_norm": 623.093017578125, "learning_rate": 5.620000000000001e-07, "loss": 51.7723, "step": 2810 }, { "epoch": 0.005696578416836015, "grad_norm": 397.35955810546875, "learning_rate": 5.64e-07, "loss": 83.3068, "step": 2820 }, { "epoch": 0.005716779049519831, "grad_norm": 234.75509643554688, "learning_rate": 5.660000000000001e-07, "loss": 78.4024, "step": 2830 }, { "epoch": 0.0057369796822036466, "grad_norm": 672.2842407226562, "learning_rate": 5.680000000000001e-07, "loss": 91.5166, "step": 2840 }, { "epoch": 0.005757180314887462, "grad_norm": 459.7839050292969, "learning_rate": 5.7e-07, "loss": 106.9227, "step": 2850 }, { "epoch": 0.005777380947571278, "grad_norm": 439.89263916015625, "learning_rate": 5.720000000000001e-07, "loss": 63.0749, "step": 2860 }, { "epoch": 0.0057975815802550935, "grad_norm": 295.2407531738281, "learning_rate": 5.74e-07, "loss": 71.3883, "step": 2870 }, { "epoch": 0.005817782212938909, "grad_norm": 746.9201049804688, "learning_rate": 5.760000000000001e-07, "loss": 87.3212, "step": 2880 }, { "epoch": 0.005837982845622725, "grad_norm": 1127.5311279296875, "learning_rate": 5.78e-07, "loss": 66.133, "step": 2890 }, { "epoch": 0.0058581834783065405, "grad_norm": 388.0009460449219, "learning_rate": 5.800000000000001e-07, "loss": 68.9822, "step": 2900 }, { "epoch": 0.005878384110990356, "grad_norm": 658.0363159179688, "learning_rate": 5.820000000000001e-07, "loss": 81.5071, "step": 2910 }, { "epoch": 0.005898584743674172, "grad_norm": 801.0643310546875, "learning_rate": 5.84e-07, "loss": 131.1549, "step": 2920 }, { "epoch": 0.0059187853763579875, "grad_norm": 505.9928894042969, "learning_rate": 5.860000000000001e-07, "loss": 58.5654, "step": 2930 }, { "epoch": 0.005938986009041803, "grad_norm": 120.71829986572266, "learning_rate": 5.88e-07, "loss": 44.8346, "step": 2940 }, { "epoch": 0.005959186641725619, "grad_norm": 836.5155639648438, "learning_rate": 5.900000000000001e-07, "loss": 50.3091, "step": 2950 }, { "epoch": 0.0059793872744094345, "grad_norm": 96.69508361816406, "learning_rate": 5.920000000000001e-07, "loss": 64.0625, "step": 2960 }, { "epoch": 0.00599958790709325, "grad_norm": 215.1513671875, "learning_rate": 5.94e-07, "loss": 101.1558, "step": 2970 }, { "epoch": 0.006019788539777066, "grad_norm": 386.72760009765625, "learning_rate": 5.960000000000001e-07, "loss": 85.0313, "step": 2980 }, { "epoch": 0.0060399891724608815, "grad_norm": 421.5184326171875, "learning_rate": 5.98e-07, "loss": 59.8098, "step": 2990 }, { "epoch": 0.006060189805144697, "grad_norm": 278.1398010253906, "learning_rate": 6.000000000000001e-07, "loss": 39.2784, "step": 3000 }, { "epoch": 0.006080390437828513, "grad_norm": 83.4456558227539, "learning_rate": 6.02e-07, "loss": 56.996, "step": 3010 }, { "epoch": 0.0061005910705123284, "grad_norm": 4074.596923828125, "learning_rate": 6.040000000000001e-07, "loss": 91.0782, "step": 3020 }, { "epoch": 0.006120791703196144, "grad_norm": 703.187744140625, "learning_rate": 6.060000000000001e-07, "loss": 70.4064, "step": 3030 }, { "epoch": 0.00614099233587996, "grad_norm": 388.83740234375, "learning_rate": 6.08e-07, "loss": 65.0244, "step": 3040 }, { "epoch": 0.006161192968563775, "grad_norm": 778.409912109375, "learning_rate": 6.100000000000001e-07, "loss": 66.5418, "step": 3050 }, { "epoch": 0.006181393601247591, "grad_norm": 292.41107177734375, "learning_rate": 6.12e-07, "loss": 73.5113, "step": 3060 }, { "epoch": 0.006201594233931407, "grad_norm": 472.3177490234375, "learning_rate": 6.140000000000001e-07, "loss": 64.3484, "step": 3070 }, { "epoch": 0.006221794866615222, "grad_norm": 0.0, "learning_rate": 6.160000000000001e-07, "loss": 64.0285, "step": 3080 }, { "epoch": 0.006241995499299038, "grad_norm": 366.9558410644531, "learning_rate": 6.180000000000001e-07, "loss": 65.4473, "step": 3090 }, { "epoch": 0.006262196131982854, "grad_norm": 254.2306365966797, "learning_rate": 6.200000000000001e-07, "loss": 57.0732, "step": 3100 }, { "epoch": 0.006282396764666669, "grad_norm": 391.7763977050781, "learning_rate": 6.22e-07, "loss": 85.0918, "step": 3110 }, { "epoch": 0.006302597397350485, "grad_norm": 50.7371826171875, "learning_rate": 6.24e-07, "loss": 45.6437, "step": 3120 }, { "epoch": 0.006322798030034301, "grad_norm": 416.0440979003906, "learning_rate": 6.260000000000001e-07, "loss": 47.1279, "step": 3130 }, { "epoch": 0.006342998662718116, "grad_norm": 560.7935791015625, "learning_rate": 6.28e-07, "loss": 102.8892, "step": 3140 }, { "epoch": 0.006363199295401932, "grad_norm": 163.31375122070312, "learning_rate": 6.3e-07, "loss": 65.3272, "step": 3150 }, { "epoch": 0.006383399928085748, "grad_norm": 1011.8523559570312, "learning_rate": 6.320000000000002e-07, "loss": 126.9466, "step": 3160 }, { "epoch": 0.006403600560769563, "grad_norm": 510.6329040527344, "learning_rate": 6.34e-07, "loss": 75.4278, "step": 3170 }, { "epoch": 0.006423801193453379, "grad_norm": 260.3512878417969, "learning_rate": 6.360000000000001e-07, "loss": 68.827, "step": 3180 }, { "epoch": 0.006444001826137195, "grad_norm": 251.49667358398438, "learning_rate": 6.38e-07, "loss": 68.8889, "step": 3190 }, { "epoch": 0.00646420245882101, "grad_norm": 217.80223083496094, "learning_rate": 6.4e-07, "loss": 88.6401, "step": 3200 }, { "epoch": 0.006484403091504826, "grad_norm": 360.5980529785156, "learning_rate": 6.42e-07, "loss": 100.1559, "step": 3210 }, { "epoch": 0.006504603724188642, "grad_norm": 274.5466613769531, "learning_rate": 6.44e-07, "loss": 63.6319, "step": 3220 }, { "epoch": 0.006524804356872457, "grad_norm": 459.5018310546875, "learning_rate": 6.460000000000001e-07, "loss": 69.2221, "step": 3230 }, { "epoch": 0.006545004989556273, "grad_norm": 939.0576782226562, "learning_rate": 6.48e-07, "loss": 116.141, "step": 3240 }, { "epoch": 0.006565205622240089, "grad_norm": 987.2379150390625, "learning_rate": 6.5e-07, "loss": 101.1766, "step": 3250 }, { "epoch": 0.006585406254923904, "grad_norm": 163.37191772460938, "learning_rate": 6.52e-07, "loss": 67.0046, "step": 3260 }, { "epoch": 0.00660560688760772, "grad_norm": 162.5904083251953, "learning_rate": 6.54e-07, "loss": 79.1543, "step": 3270 }, { "epoch": 0.006625807520291536, "grad_norm": 558.1742553710938, "learning_rate": 6.560000000000002e-07, "loss": 68.6586, "step": 3280 }, { "epoch": 0.006646008152975351, "grad_norm": 885.2354736328125, "learning_rate": 6.58e-07, "loss": 103.7334, "step": 3290 }, { "epoch": 0.006666208785659167, "grad_norm": 432.77313232421875, "learning_rate": 6.6e-07, "loss": 94.9779, "step": 3300 }, { "epoch": 0.006686409418342983, "grad_norm": 213.41763305664062, "learning_rate": 6.62e-07, "loss": 78.0312, "step": 3310 }, { "epoch": 0.006706610051026798, "grad_norm": 547.9923095703125, "learning_rate": 6.64e-07, "loss": 38.0541, "step": 3320 }, { "epoch": 0.006726810683710614, "grad_norm": 644.8719482421875, "learning_rate": 6.660000000000002e-07, "loss": 90.221, "step": 3330 }, { "epoch": 0.0067470113163944296, "grad_norm": 217.31407165527344, "learning_rate": 6.68e-07, "loss": 53.177, "step": 3340 }, { "epoch": 0.006767211949078245, "grad_norm": 669.16015625, "learning_rate": 6.7e-07, "loss": 84.8348, "step": 3350 }, { "epoch": 0.006787412581762061, "grad_norm": 1355.8944091796875, "learning_rate": 6.72e-07, "loss": 58.3167, "step": 3360 }, { "epoch": 0.0068076132144458765, "grad_norm": 404.7748107910156, "learning_rate": 6.74e-07, "loss": 58.0044, "step": 3370 }, { "epoch": 0.006827813847129692, "grad_norm": 504.4396667480469, "learning_rate": 6.76e-07, "loss": 65.1514, "step": 3380 }, { "epoch": 0.006848014479813508, "grad_norm": 144.97848510742188, "learning_rate": 6.78e-07, "loss": 87.1179, "step": 3390 }, { "epoch": 0.0068682151124973235, "grad_norm": 323.25250244140625, "learning_rate": 6.800000000000001e-07, "loss": 54.013, "step": 3400 }, { "epoch": 0.006888415745181139, "grad_norm": 734.9896850585938, "learning_rate": 6.82e-07, "loss": 67.875, "step": 3410 }, { "epoch": 0.006908616377864955, "grad_norm": 427.2493896484375, "learning_rate": 6.84e-07, "loss": 61.3893, "step": 3420 }, { "epoch": 0.0069288170105487705, "grad_norm": 1184.6534423828125, "learning_rate": 6.86e-07, "loss": 52.0654, "step": 3430 }, { "epoch": 0.006949017643232586, "grad_norm": 2005.87548828125, "learning_rate": 6.88e-07, "loss": 139.6364, "step": 3440 }, { "epoch": 0.006969218275916402, "grad_norm": 204.87718200683594, "learning_rate": 6.900000000000001e-07, "loss": 63.3256, "step": 3450 }, { "epoch": 0.0069894189086002175, "grad_norm": 229.4895782470703, "learning_rate": 6.92e-07, "loss": 70.9809, "step": 3460 }, { "epoch": 0.007009619541284033, "grad_norm": 617.552734375, "learning_rate": 6.94e-07, "loss": 91.987, "step": 3470 }, { "epoch": 0.007029820173967849, "grad_norm": 167.73480224609375, "learning_rate": 6.96e-07, "loss": 86.4587, "step": 3480 }, { "epoch": 0.0070500208066516645, "grad_norm": 546.7920532226562, "learning_rate": 6.98e-07, "loss": 61.1811, "step": 3490 }, { "epoch": 0.00707022143933548, "grad_norm": 584.3468627929688, "learning_rate": 7.000000000000001e-07, "loss": 94.3613, "step": 3500 }, { "epoch": 0.007090422072019296, "grad_norm": 96.41542053222656, "learning_rate": 7.02e-07, "loss": 132.8943, "step": 3510 }, { "epoch": 0.0071106227047031114, "grad_norm": 304.6600036621094, "learning_rate": 7.040000000000001e-07, "loss": 52.2359, "step": 3520 }, { "epoch": 0.007130823337386927, "grad_norm": 193.15573120117188, "learning_rate": 7.06e-07, "loss": 40.7302, "step": 3530 }, { "epoch": 0.007151023970070743, "grad_norm": 306.5334777832031, "learning_rate": 7.08e-07, "loss": 49.6074, "step": 3540 }, { "epoch": 0.007171224602754558, "grad_norm": 379.9753112792969, "learning_rate": 7.1e-07, "loss": 75.7879, "step": 3550 }, { "epoch": 0.007191425235438374, "grad_norm": 577.312744140625, "learning_rate": 7.12e-07, "loss": 119.3714, "step": 3560 }, { "epoch": 0.00721162586812219, "grad_norm": 193.5618896484375, "learning_rate": 7.140000000000001e-07, "loss": 53.7637, "step": 3570 }, { "epoch": 0.007231826500806005, "grad_norm": 250.73524475097656, "learning_rate": 7.16e-07, "loss": 38.9904, "step": 3580 }, { "epoch": 0.007252027133489821, "grad_norm": 132.2377471923828, "learning_rate": 7.18e-07, "loss": 76.2111, "step": 3590 }, { "epoch": 0.007272227766173637, "grad_norm": 285.2049560546875, "learning_rate": 7.2e-07, "loss": 57.733, "step": 3600 }, { "epoch": 0.007292428398857452, "grad_norm": 420.5237731933594, "learning_rate": 7.22e-07, "loss": 100.7195, "step": 3610 }, { "epoch": 0.007312629031541268, "grad_norm": 210.8728485107422, "learning_rate": 7.240000000000001e-07, "loss": 46.5242, "step": 3620 }, { "epoch": 0.007332829664225084, "grad_norm": 569.1387939453125, "learning_rate": 7.26e-07, "loss": 81.4837, "step": 3630 }, { "epoch": 0.007353030296908899, "grad_norm": 557.1551513671875, "learning_rate": 7.280000000000001e-07, "loss": 68.7304, "step": 3640 }, { "epoch": 0.007373230929592715, "grad_norm": 312.92474365234375, "learning_rate": 7.3e-07, "loss": 110.908, "step": 3650 }, { "epoch": 0.007393431562276531, "grad_norm": 426.03033447265625, "learning_rate": 7.32e-07, "loss": 81.1232, "step": 3660 }, { "epoch": 0.007413632194960346, "grad_norm": 359.4331970214844, "learning_rate": 7.340000000000001e-07, "loss": 51.9507, "step": 3670 }, { "epoch": 0.007433832827644162, "grad_norm": 347.0911865234375, "learning_rate": 7.36e-07, "loss": 52.275, "step": 3680 }, { "epoch": 0.007454033460327978, "grad_norm": 262.1907043457031, "learning_rate": 7.380000000000001e-07, "loss": 73.5493, "step": 3690 }, { "epoch": 0.007474234093011793, "grad_norm": 552.703125, "learning_rate": 7.4e-07, "loss": 96.4462, "step": 3700 }, { "epoch": 0.007494434725695609, "grad_norm": 266.08892822265625, "learning_rate": 7.420000000000001e-07, "loss": 114.8576, "step": 3710 }, { "epoch": 0.007514635358379425, "grad_norm": 221.34103393554688, "learning_rate": 7.44e-07, "loss": 87.7745, "step": 3720 }, { "epoch": 0.00753483599106324, "grad_norm": 223.4447784423828, "learning_rate": 7.46e-07, "loss": 61.8933, "step": 3730 }, { "epoch": 0.007555036623747056, "grad_norm": 0.0, "learning_rate": 7.480000000000001e-07, "loss": 67.8658, "step": 3740 }, { "epoch": 0.007575237256430872, "grad_norm": 290.140869140625, "learning_rate": 7.5e-07, "loss": 42.5857, "step": 3750 }, { "epoch": 0.007595437889114687, "grad_norm": 0.0, "learning_rate": 7.520000000000001e-07, "loss": 62.6451, "step": 3760 }, { "epoch": 0.007615638521798503, "grad_norm": 257.16168212890625, "learning_rate": 7.54e-07, "loss": 55.0062, "step": 3770 }, { "epoch": 0.007635839154482319, "grad_norm": 2145.101806640625, "learning_rate": 7.56e-07, "loss": 81.785, "step": 3780 }, { "epoch": 0.007656039787166134, "grad_norm": 233.93472290039062, "learning_rate": 7.580000000000001e-07, "loss": 84.2146, "step": 3790 }, { "epoch": 0.00767624041984995, "grad_norm": 555.6514282226562, "learning_rate": 7.6e-07, "loss": 87.3414, "step": 3800 }, { "epoch": 0.007696441052533766, "grad_norm": 228.10726928710938, "learning_rate": 7.620000000000001e-07, "loss": 93.3665, "step": 3810 }, { "epoch": 0.007716641685217581, "grad_norm": 984.4570922851562, "learning_rate": 7.64e-07, "loss": 70.6439, "step": 3820 }, { "epoch": 0.007736842317901397, "grad_norm": 397.5260925292969, "learning_rate": 7.660000000000001e-07, "loss": 50.3701, "step": 3830 }, { "epoch": 0.0077570429505852126, "grad_norm": 165.91798400878906, "learning_rate": 7.68e-07, "loss": 58.4298, "step": 3840 }, { "epoch": 0.007777243583269028, "grad_norm": 162.36831665039062, "learning_rate": 7.7e-07, "loss": 69.1128, "step": 3850 }, { "epoch": 0.007797444215952844, "grad_norm": 635.5795288085938, "learning_rate": 7.720000000000001e-07, "loss": 59.6272, "step": 3860 }, { "epoch": 0.00781764484863666, "grad_norm": 665.6697998046875, "learning_rate": 7.74e-07, "loss": 85.1185, "step": 3870 }, { "epoch": 0.007837845481320474, "grad_norm": 320.8567810058594, "learning_rate": 7.760000000000001e-07, "loss": 99.7412, "step": 3880 }, { "epoch": 0.00785804611400429, "grad_norm": 653.353515625, "learning_rate": 7.78e-07, "loss": 62.5054, "step": 3890 }, { "epoch": 0.007878246746688106, "grad_norm": 280.89288330078125, "learning_rate": 7.8e-07, "loss": 56.3481, "step": 3900 }, { "epoch": 0.007898447379371922, "grad_norm": 562.4810791015625, "learning_rate": 7.820000000000001e-07, "loss": 104.2953, "step": 3910 }, { "epoch": 0.007918648012055737, "grad_norm": 109.06721496582031, "learning_rate": 7.84e-07, "loss": 38.6364, "step": 3920 }, { "epoch": 0.007938848644739553, "grad_norm": 671.8179931640625, "learning_rate": 7.860000000000001e-07, "loss": 71.9219, "step": 3930 }, { "epoch": 0.007959049277423368, "grad_norm": 1626.2884521484375, "learning_rate": 7.88e-07, "loss": 117.0405, "step": 3940 }, { "epoch": 0.007979249910107185, "grad_norm": 423.0011901855469, "learning_rate": 7.900000000000001e-07, "loss": 98.9427, "step": 3950 }, { "epoch": 0.007999450542791, "grad_norm": 249.0320587158203, "learning_rate": 7.920000000000001e-07, "loss": 35.8351, "step": 3960 }, { "epoch": 0.008019651175474816, "grad_norm": 370.9219970703125, "learning_rate": 7.94e-07, "loss": 56.7547, "step": 3970 }, { "epoch": 0.008039851808158631, "grad_norm": 530.4095458984375, "learning_rate": 7.960000000000001e-07, "loss": 67.1569, "step": 3980 }, { "epoch": 0.008060052440842447, "grad_norm": 158.11224365234375, "learning_rate": 7.98e-07, "loss": 108.0477, "step": 3990 }, { "epoch": 0.008080253073526262, "grad_norm": 463.1640930175781, "learning_rate": 8.000000000000001e-07, "loss": 69.416, "step": 4000 }, { "epoch": 0.008100453706210079, "grad_norm": 272.4097595214844, "learning_rate": 8.02e-07, "loss": 55.1751, "step": 4010 }, { "epoch": 0.008120654338893894, "grad_norm": 861.3814697265625, "learning_rate": 8.04e-07, "loss": 95.0951, "step": 4020 }, { "epoch": 0.00814085497157771, "grad_norm": 284.91253662109375, "learning_rate": 8.060000000000001e-07, "loss": 50.293, "step": 4030 }, { "epoch": 0.008161055604261525, "grad_norm": 2034.650390625, "learning_rate": 8.08e-07, "loss": 84.4948, "step": 4040 }, { "epoch": 0.008181256236945341, "grad_norm": 353.3307189941406, "learning_rate": 8.100000000000001e-07, "loss": 94.3878, "step": 4050 }, { "epoch": 0.008201456869629156, "grad_norm": 318.0084533691406, "learning_rate": 8.12e-07, "loss": 79.4199, "step": 4060 }, { "epoch": 0.008221657502312973, "grad_norm": 596.888916015625, "learning_rate": 8.140000000000001e-07, "loss": 77.661, "step": 4070 }, { "epoch": 0.008241858134996788, "grad_norm": 1134.7655029296875, "learning_rate": 8.160000000000001e-07, "loss": 74.8946, "step": 4080 }, { "epoch": 0.008262058767680604, "grad_norm": 272.5075378417969, "learning_rate": 8.18e-07, "loss": 33.778, "step": 4090 }, { "epoch": 0.008282259400364419, "grad_norm": 646.912109375, "learning_rate": 8.200000000000001e-07, "loss": 47.5918, "step": 4100 }, { "epoch": 0.008302460033048235, "grad_norm": 0.0, "learning_rate": 8.22e-07, "loss": 80.6978, "step": 4110 }, { "epoch": 0.00832266066573205, "grad_norm": 1061.0557861328125, "learning_rate": 8.240000000000001e-07, "loss": 96.934, "step": 4120 }, { "epoch": 0.008342861298415867, "grad_norm": 252.33795166015625, "learning_rate": 8.260000000000001e-07, "loss": 49.7461, "step": 4130 }, { "epoch": 0.008363061931099681, "grad_norm": 451.06182861328125, "learning_rate": 8.280000000000001e-07, "loss": 62.4195, "step": 4140 }, { "epoch": 0.008383262563783498, "grad_norm": 231.14459228515625, "learning_rate": 8.300000000000001e-07, "loss": 95.5714, "step": 4150 }, { "epoch": 0.008403463196467313, "grad_norm": 327.4312438964844, "learning_rate": 8.32e-07, "loss": 48.3054, "step": 4160 }, { "epoch": 0.00842366382915113, "grad_norm": 212.70620727539062, "learning_rate": 8.340000000000001e-07, "loss": 69.7625, "step": 4170 }, { "epoch": 0.008443864461834944, "grad_norm": 434.6066589355469, "learning_rate": 8.36e-07, "loss": 81.8974, "step": 4180 }, { "epoch": 0.00846406509451876, "grad_norm": 239.29566955566406, "learning_rate": 8.380000000000001e-07, "loss": 55.5368, "step": 4190 }, { "epoch": 0.008484265727202575, "grad_norm": 97.5559310913086, "learning_rate": 8.400000000000001e-07, "loss": 64.1812, "step": 4200 }, { "epoch": 0.008504466359886392, "grad_norm": 438.04998779296875, "learning_rate": 8.42e-07, "loss": 64.864, "step": 4210 }, { "epoch": 0.008524666992570207, "grad_norm": 280.9324035644531, "learning_rate": 8.440000000000001e-07, "loss": 47.0536, "step": 4220 }, { "epoch": 0.008544867625254023, "grad_norm": 819.087158203125, "learning_rate": 8.46e-07, "loss": 97.6323, "step": 4230 }, { "epoch": 0.008565068257937838, "grad_norm": 226.83958435058594, "learning_rate": 8.480000000000001e-07, "loss": 44.1923, "step": 4240 }, { "epoch": 0.008585268890621655, "grad_norm": 550.0735473632812, "learning_rate": 8.500000000000001e-07, "loss": 73.0341, "step": 4250 }, { "epoch": 0.00860546952330547, "grad_norm": 336.7578125, "learning_rate": 8.520000000000001e-07, "loss": 46.2401, "step": 4260 }, { "epoch": 0.008625670155989286, "grad_norm": 430.9688720703125, "learning_rate": 8.540000000000001e-07, "loss": 79.4178, "step": 4270 }, { "epoch": 0.0086458707886731, "grad_norm": 373.40435791015625, "learning_rate": 8.56e-07, "loss": 78.5011, "step": 4280 }, { "epoch": 0.008666071421356917, "grad_norm": 310.5127868652344, "learning_rate": 8.580000000000001e-07, "loss": 43.2333, "step": 4290 }, { "epoch": 0.008686272054040732, "grad_norm": 188.89881896972656, "learning_rate": 8.6e-07, "loss": 45.9694, "step": 4300 }, { "epoch": 0.008706472686724549, "grad_norm": 238.16249084472656, "learning_rate": 8.620000000000001e-07, "loss": 73.1508, "step": 4310 }, { "epoch": 0.008726673319408363, "grad_norm": 364.6396484375, "learning_rate": 8.640000000000001e-07, "loss": 102.6995, "step": 4320 }, { "epoch": 0.00874687395209218, "grad_norm": 485.0395812988281, "learning_rate": 8.66e-07, "loss": 73.1986, "step": 4330 }, { "epoch": 0.008767074584775995, "grad_norm": 331.8861389160156, "learning_rate": 8.680000000000001e-07, "loss": 70.5903, "step": 4340 }, { "epoch": 0.008787275217459811, "grad_norm": 969.1685180664062, "learning_rate": 8.7e-07, "loss": 84.9042, "step": 4350 }, { "epoch": 0.008807475850143626, "grad_norm": 228.73049926757812, "learning_rate": 8.720000000000001e-07, "loss": 45.2126, "step": 4360 }, { "epoch": 0.008827676482827443, "grad_norm": 264.8658752441406, "learning_rate": 8.740000000000001e-07, "loss": 39.3901, "step": 4370 }, { "epoch": 0.008847877115511257, "grad_norm": 494.5270080566406, "learning_rate": 8.760000000000001e-07, "loss": 58.1788, "step": 4380 }, { "epoch": 0.008868077748195074, "grad_norm": 557.9083862304688, "learning_rate": 8.780000000000001e-07, "loss": 53.9349, "step": 4390 }, { "epoch": 0.008888278380878889, "grad_norm": 89.04096221923828, "learning_rate": 8.8e-07, "loss": 82.1393, "step": 4400 }, { "epoch": 0.008908479013562705, "grad_norm": 512.5380249023438, "learning_rate": 8.820000000000001e-07, "loss": 47.3848, "step": 4410 }, { "epoch": 0.00892867964624652, "grad_norm": 276.95355224609375, "learning_rate": 8.840000000000001e-07, "loss": 57.3783, "step": 4420 }, { "epoch": 0.008948880278930336, "grad_norm": 287.1134033203125, "learning_rate": 8.860000000000001e-07, "loss": 55.039, "step": 4430 }, { "epoch": 0.008969080911614151, "grad_norm": 267.1001281738281, "learning_rate": 8.880000000000001e-07, "loss": 69.6506, "step": 4440 }, { "epoch": 0.008989281544297968, "grad_norm": 649.2479248046875, "learning_rate": 8.900000000000001e-07, "loss": 61.4946, "step": 4450 }, { "epoch": 0.009009482176981783, "grad_norm": 406.3837585449219, "learning_rate": 8.920000000000001e-07, "loss": 108.2006, "step": 4460 }, { "epoch": 0.0090296828096656, "grad_norm": 674.54736328125, "learning_rate": 8.94e-07, "loss": 84.1102, "step": 4470 }, { "epoch": 0.009049883442349414, "grad_norm": 600.7890014648438, "learning_rate": 8.960000000000001e-07, "loss": 79.3854, "step": 4480 }, { "epoch": 0.00907008407503323, "grad_norm": 572.2371215820312, "learning_rate": 8.980000000000001e-07, "loss": 60.3734, "step": 4490 }, { "epoch": 0.009090284707717045, "grad_norm": 379.3972473144531, "learning_rate": 9.000000000000001e-07, "loss": 70.3783, "step": 4500 }, { "epoch": 0.009110485340400862, "grad_norm": 361.14947509765625, "learning_rate": 9.020000000000001e-07, "loss": 46.8975, "step": 4510 }, { "epoch": 0.009130685973084677, "grad_norm": 1072.69091796875, "learning_rate": 9.04e-07, "loss": 73.2144, "step": 4520 }, { "epoch": 0.009150886605768493, "grad_norm": 327.35418701171875, "learning_rate": 9.060000000000001e-07, "loss": 50.3718, "step": 4530 }, { "epoch": 0.009171087238452308, "grad_norm": 1424.2442626953125, "learning_rate": 9.080000000000001e-07, "loss": 86.4619, "step": 4540 }, { "epoch": 0.009191287871136124, "grad_norm": 533.746337890625, "learning_rate": 9.100000000000001e-07, "loss": 56.2301, "step": 4550 }, { "epoch": 0.00921148850381994, "grad_norm": 193.33677673339844, "learning_rate": 9.120000000000001e-07, "loss": 63.7453, "step": 4560 }, { "epoch": 0.009231689136503756, "grad_norm": 408.7142028808594, "learning_rate": 9.140000000000001e-07, "loss": 37.8314, "step": 4570 }, { "epoch": 0.00925188976918757, "grad_norm": 276.11761474609375, "learning_rate": 9.160000000000001e-07, "loss": 75.9652, "step": 4580 }, { "epoch": 0.009272090401871387, "grad_norm": 379.5744934082031, "learning_rate": 9.180000000000001e-07, "loss": 27.9624, "step": 4590 }, { "epoch": 0.009292291034555202, "grad_norm": 271.5459289550781, "learning_rate": 9.200000000000001e-07, "loss": 59.3876, "step": 4600 }, { "epoch": 0.009312491667239018, "grad_norm": 633.37841796875, "learning_rate": 9.220000000000001e-07, "loss": 54.8147, "step": 4610 }, { "epoch": 0.009332692299922833, "grad_norm": 537.9527587890625, "learning_rate": 9.240000000000001e-07, "loss": 91.1402, "step": 4620 }, { "epoch": 0.00935289293260665, "grad_norm": 901.7552490234375, "learning_rate": 9.260000000000001e-07, "loss": 67.0893, "step": 4630 }, { "epoch": 0.009373093565290464, "grad_norm": 203.12220764160156, "learning_rate": 9.28e-07, "loss": 39.8242, "step": 4640 }, { "epoch": 0.009393294197974281, "grad_norm": 242.10169982910156, "learning_rate": 9.300000000000001e-07, "loss": 54.106, "step": 4650 }, { "epoch": 0.009413494830658096, "grad_norm": 427.68878173828125, "learning_rate": 9.320000000000001e-07, "loss": 67.1843, "step": 4660 }, { "epoch": 0.009433695463341912, "grad_norm": 1889.26025390625, "learning_rate": 9.340000000000001e-07, "loss": 76.9373, "step": 4670 }, { "epoch": 0.009453896096025727, "grad_norm": 683.579345703125, "learning_rate": 9.360000000000001e-07, "loss": 45.6459, "step": 4680 }, { "epoch": 0.009474096728709544, "grad_norm": 309.3541564941406, "learning_rate": 9.380000000000001e-07, "loss": 48.1851, "step": 4690 }, { "epoch": 0.009494297361393358, "grad_norm": 504.6006164550781, "learning_rate": 9.400000000000001e-07, "loss": 58.9366, "step": 4700 }, { "epoch": 0.009514497994077175, "grad_norm": 215.350341796875, "learning_rate": 9.420000000000002e-07, "loss": 109.1762, "step": 4710 }, { "epoch": 0.00953469862676099, "grad_norm": 310.1644287109375, "learning_rate": 9.440000000000001e-07, "loss": 53.0887, "step": 4720 }, { "epoch": 0.009554899259444806, "grad_norm": 1641.510986328125, "learning_rate": 9.460000000000001e-07, "loss": 101.7377, "step": 4730 }, { "epoch": 0.009575099892128621, "grad_norm": 725.5571899414062, "learning_rate": 9.480000000000001e-07, "loss": 43.9996, "step": 4740 }, { "epoch": 0.009595300524812438, "grad_norm": 146.89962768554688, "learning_rate": 9.500000000000001e-07, "loss": 46.4426, "step": 4750 }, { "epoch": 0.009615501157496252, "grad_norm": 598.1292724609375, "learning_rate": 9.520000000000002e-07, "loss": 60.877, "step": 4760 }, { "epoch": 0.009635701790180069, "grad_norm": 234.6605224609375, "learning_rate": 9.54e-07, "loss": 62.6214, "step": 4770 }, { "epoch": 0.009655902422863884, "grad_norm": 104.78776550292969, "learning_rate": 9.56e-07, "loss": 40.8794, "step": 4780 }, { "epoch": 0.0096761030555477, "grad_norm": 83.54643249511719, "learning_rate": 9.58e-07, "loss": 89.3152, "step": 4790 }, { "epoch": 0.009696303688231515, "grad_norm": 143.19361877441406, "learning_rate": 9.600000000000001e-07, "loss": 64.8195, "step": 4800 }, { "epoch": 0.009716504320915332, "grad_norm": 0.0, "learning_rate": 9.62e-07, "loss": 38.7168, "step": 4810 }, { "epoch": 0.009736704953599146, "grad_norm": 81.60344696044922, "learning_rate": 9.64e-07, "loss": 34.7054, "step": 4820 }, { "epoch": 0.009756905586282963, "grad_norm": 159.15573120117188, "learning_rate": 9.660000000000002e-07, "loss": 67.257, "step": 4830 }, { "epoch": 0.009777106218966778, "grad_norm": 127.69849395751953, "learning_rate": 9.68e-07, "loss": 51.138, "step": 4840 }, { "epoch": 0.009797306851650594, "grad_norm": 147.4921875, "learning_rate": 9.7e-07, "loss": 66.1911, "step": 4850 }, { "epoch": 0.009817507484334409, "grad_norm": 421.4625549316406, "learning_rate": 9.72e-07, "loss": 92.7325, "step": 4860 }, { "epoch": 0.009837708117018226, "grad_norm": 555.5694580078125, "learning_rate": 9.740000000000001e-07, "loss": 55.303, "step": 4870 }, { "epoch": 0.00985790874970204, "grad_norm": 212.89842224121094, "learning_rate": 9.76e-07, "loss": 38.8169, "step": 4880 }, { "epoch": 0.009878109382385857, "grad_norm": 214.94821166992188, "learning_rate": 9.78e-07, "loss": 58.6234, "step": 4890 }, { "epoch": 0.009898310015069672, "grad_norm": 233.50970458984375, "learning_rate": 9.800000000000001e-07, "loss": 38.7064, "step": 4900 }, { "epoch": 0.009918510647753488, "grad_norm": 201.45559692382812, "learning_rate": 9.82e-07, "loss": 55.6037, "step": 4910 }, { "epoch": 0.009938711280437303, "grad_norm": 359.8877868652344, "learning_rate": 9.84e-07, "loss": 21.9692, "step": 4920 }, { "epoch": 0.00995891191312112, "grad_norm": 148.5193634033203, "learning_rate": 9.86e-07, "loss": 110.8313, "step": 4930 }, { "epoch": 0.009979112545804934, "grad_norm": 352.68670654296875, "learning_rate": 9.880000000000001e-07, "loss": 52.8, "step": 4940 }, { "epoch": 0.00999931317848875, "grad_norm": 155.9781951904297, "learning_rate": 9.9e-07, "loss": 64.2732, "step": 4950 }, { "epoch": 0.010019513811172566, "grad_norm": 462.4185485839844, "learning_rate": 9.92e-07, "loss": 58.8974, "step": 4960 }, { "epoch": 0.010039714443856382, "grad_norm": 466.60101318359375, "learning_rate": 9.940000000000001e-07, "loss": 77.7639, "step": 4970 }, { "epoch": 0.010059915076540197, "grad_norm": 153.48069763183594, "learning_rate": 9.96e-07, "loss": 87.2155, "step": 4980 }, { "epoch": 0.010080115709224013, "grad_norm": 213.0729217529297, "learning_rate": 9.98e-07, "loss": 34.6899, "step": 4990 }, { "epoch": 0.010100316341907828, "grad_norm": 205.93505859375, "learning_rate": 1.0000000000000002e-06, "loss": 17.777, "step": 5000 }, { "epoch": 0.010120516974591645, "grad_norm": 391.05419921875, "learning_rate": 1.002e-06, "loss": 75.1621, "step": 5010 }, { "epoch": 0.01014071760727546, "grad_norm": 415.27191162109375, "learning_rate": 1.004e-06, "loss": 40.2877, "step": 5020 }, { "epoch": 0.010160918239959276, "grad_norm": 248.3871307373047, "learning_rate": 1.006e-06, "loss": 53.1042, "step": 5030 }, { "epoch": 0.010181118872643091, "grad_norm": 147.35623168945312, "learning_rate": 1.0080000000000001e-06, "loss": 48.1084, "step": 5040 }, { "epoch": 0.010201319505326907, "grad_norm": 774.68994140625, "learning_rate": 1.01e-06, "loss": 47.644, "step": 5050 }, { "epoch": 0.010221520138010722, "grad_norm": 453.93475341796875, "learning_rate": 1.012e-06, "loss": 39.2599, "step": 5060 }, { "epoch": 0.010241720770694539, "grad_norm": 784.4867553710938, "learning_rate": 1.0140000000000002e-06, "loss": 71.6847, "step": 5070 }, { "epoch": 0.010261921403378354, "grad_norm": 316.0508728027344, "learning_rate": 1.016e-06, "loss": 36.6935, "step": 5080 }, { "epoch": 0.01028212203606217, "grad_norm": 574.6668701171875, "learning_rate": 1.018e-06, "loss": 90.5564, "step": 5090 }, { "epoch": 0.010302322668745985, "grad_norm": 542.2628173828125, "learning_rate": 1.02e-06, "loss": 64.977, "step": 5100 }, { "epoch": 0.010322523301429801, "grad_norm": 0.0, "learning_rate": 1.0220000000000001e-06, "loss": 35.8991, "step": 5110 }, { "epoch": 0.010342723934113616, "grad_norm": 139.60174560546875, "learning_rate": 1.024e-06, "loss": 55.9987, "step": 5120 }, { "epoch": 0.010362924566797433, "grad_norm": 496.6190490722656, "learning_rate": 1.026e-06, "loss": 55.0757, "step": 5130 }, { "epoch": 0.010383125199481247, "grad_norm": 903.2974243164062, "learning_rate": 1.0280000000000002e-06, "loss": 98.7816, "step": 5140 }, { "epoch": 0.010403325832165064, "grad_norm": 1704.468017578125, "learning_rate": 1.03e-06, "loss": 83.7941, "step": 5150 }, { "epoch": 0.010423526464848879, "grad_norm": 392.80694580078125, "learning_rate": 1.032e-06, "loss": 49.3805, "step": 5160 }, { "epoch": 0.010443727097532695, "grad_norm": 769.7274780273438, "learning_rate": 1.0340000000000002e-06, "loss": 54.265, "step": 5170 }, { "epoch": 0.01046392773021651, "grad_norm": 262.495849609375, "learning_rate": 1.0360000000000001e-06, "loss": 49.4835, "step": 5180 }, { "epoch": 0.010484128362900327, "grad_norm": 354.4650573730469, "learning_rate": 1.038e-06, "loss": 58.4036, "step": 5190 }, { "epoch": 0.010504328995584141, "grad_norm": 319.3759765625, "learning_rate": 1.04e-06, "loss": 77.3174, "step": 5200 }, { "epoch": 0.010524529628267958, "grad_norm": 344.6273498535156, "learning_rate": 1.0420000000000001e-06, "loss": 51.2461, "step": 5210 }, { "epoch": 0.010544730260951773, "grad_norm": 460.6028747558594, "learning_rate": 1.044e-06, "loss": 74.9375, "step": 5220 }, { "epoch": 0.01056493089363559, "grad_norm": 230.86172485351562, "learning_rate": 1.046e-06, "loss": 75.7538, "step": 5230 }, { "epoch": 0.010585131526319404, "grad_norm": 254.6680450439453, "learning_rate": 1.0480000000000002e-06, "loss": 94.6375, "step": 5240 }, { "epoch": 0.01060533215900322, "grad_norm": 450.4857482910156, "learning_rate": 1.0500000000000001e-06, "loss": 60.7722, "step": 5250 }, { "epoch": 0.010625532791687035, "grad_norm": 107.6410140991211, "learning_rate": 1.052e-06, "loss": 106.6686, "step": 5260 }, { "epoch": 0.010645733424370852, "grad_norm": 270.1158752441406, "learning_rate": 1.054e-06, "loss": 78.6808, "step": 5270 }, { "epoch": 0.010665934057054667, "grad_norm": 148.73060607910156, "learning_rate": 1.0560000000000001e-06, "loss": 43.5331, "step": 5280 }, { "epoch": 0.010686134689738483, "grad_norm": 289.5079345703125, "learning_rate": 1.058e-06, "loss": 45.197, "step": 5290 }, { "epoch": 0.010706335322422298, "grad_norm": 868.6351928710938, "learning_rate": 1.06e-06, "loss": 66.1827, "step": 5300 }, { "epoch": 0.010726535955106115, "grad_norm": 21.161470413208008, "learning_rate": 1.0620000000000002e-06, "loss": 51.2971, "step": 5310 }, { "epoch": 0.01074673658778993, "grad_norm": 97.87639617919922, "learning_rate": 1.064e-06, "loss": 44.4276, "step": 5320 }, { "epoch": 0.010766937220473746, "grad_norm": 521.3983154296875, "learning_rate": 1.066e-06, "loss": 64.9885, "step": 5330 }, { "epoch": 0.01078713785315756, "grad_norm": 89.270263671875, "learning_rate": 1.0680000000000002e-06, "loss": 70.2955, "step": 5340 }, { "epoch": 0.010807338485841377, "grad_norm": 128.8278350830078, "learning_rate": 1.0700000000000001e-06, "loss": 27.768, "step": 5350 }, { "epoch": 0.010827539118525192, "grad_norm": 376.74713134765625, "learning_rate": 1.072e-06, "loss": 106.4042, "step": 5360 }, { "epoch": 0.010847739751209009, "grad_norm": 203.68896484375, "learning_rate": 1.074e-06, "loss": 70.999, "step": 5370 }, { "epoch": 0.010867940383892823, "grad_norm": 485.4808044433594, "learning_rate": 1.0760000000000002e-06, "loss": 48.0643, "step": 5380 }, { "epoch": 0.01088814101657664, "grad_norm": 504.2447814941406, "learning_rate": 1.078e-06, "loss": 100.0353, "step": 5390 }, { "epoch": 0.010908341649260455, "grad_norm": 446.1485900878906, "learning_rate": 1.08e-06, "loss": 73.879, "step": 5400 }, { "epoch": 0.010928542281944271, "grad_norm": 1484.305908203125, "learning_rate": 1.0820000000000002e-06, "loss": 48.5967, "step": 5410 }, { "epoch": 0.010948742914628086, "grad_norm": 198.24713134765625, "learning_rate": 1.0840000000000001e-06, "loss": 131.6296, "step": 5420 }, { "epoch": 0.010968943547311902, "grad_norm": 544.75048828125, "learning_rate": 1.086e-06, "loss": 113.2358, "step": 5430 }, { "epoch": 0.010989144179995717, "grad_norm": 242.82785034179688, "learning_rate": 1.088e-06, "loss": 44.9892, "step": 5440 }, { "epoch": 0.011009344812679534, "grad_norm": 625.7445678710938, "learning_rate": 1.0900000000000002e-06, "loss": 35.7564, "step": 5450 }, { "epoch": 0.011029545445363349, "grad_norm": 190.70973205566406, "learning_rate": 1.092e-06, "loss": 65.1648, "step": 5460 }, { "epoch": 0.011049746078047165, "grad_norm": 119.76960754394531, "learning_rate": 1.094e-06, "loss": 37.5588, "step": 5470 }, { "epoch": 0.01106994671073098, "grad_norm": 625.7455444335938, "learning_rate": 1.0960000000000002e-06, "loss": 55.0435, "step": 5480 }, { "epoch": 0.011090147343414796, "grad_norm": 189.6007080078125, "learning_rate": 1.0980000000000001e-06, "loss": 26.3923, "step": 5490 }, { "epoch": 0.011110347976098611, "grad_norm": 280.0751953125, "learning_rate": 1.1e-06, "loss": 53.7186, "step": 5500 }, { "epoch": 0.011130548608782428, "grad_norm": 389.1221008300781, "learning_rate": 1.1020000000000002e-06, "loss": 74.9716, "step": 5510 }, { "epoch": 0.011150749241466243, "grad_norm": 235.479736328125, "learning_rate": 1.1040000000000001e-06, "loss": 64.5132, "step": 5520 }, { "epoch": 0.011170949874150059, "grad_norm": 269.3278503417969, "learning_rate": 1.106e-06, "loss": 48.4141, "step": 5530 }, { "epoch": 0.011191150506833874, "grad_norm": 374.6064147949219, "learning_rate": 1.108e-06, "loss": 43.2091, "step": 5540 }, { "epoch": 0.01121135113951769, "grad_norm": 293.3682556152344, "learning_rate": 1.1100000000000002e-06, "loss": 51.9987, "step": 5550 }, { "epoch": 0.011231551772201505, "grad_norm": 200.1291046142578, "learning_rate": 1.1120000000000001e-06, "loss": 49.3602, "step": 5560 }, { "epoch": 0.011251752404885322, "grad_norm": 1394.725830078125, "learning_rate": 1.114e-06, "loss": 64.0705, "step": 5570 }, { "epoch": 0.011271953037569137, "grad_norm": 152.62893676757812, "learning_rate": 1.1160000000000002e-06, "loss": 51.071, "step": 5580 }, { "epoch": 0.011292153670252953, "grad_norm": 520.5482788085938, "learning_rate": 1.1180000000000001e-06, "loss": 49.0973, "step": 5590 }, { "epoch": 0.011312354302936768, "grad_norm": 81.66506958007812, "learning_rate": 1.12e-06, "loss": 49.4622, "step": 5600 }, { "epoch": 0.011332554935620584, "grad_norm": 196.39434814453125, "learning_rate": 1.122e-06, "loss": 39.1781, "step": 5610 }, { "epoch": 0.0113527555683044, "grad_norm": 247.77777099609375, "learning_rate": 1.1240000000000002e-06, "loss": 63.0183, "step": 5620 }, { "epoch": 0.011372956200988216, "grad_norm": 326.1257019042969, "learning_rate": 1.126e-06, "loss": 32.5829, "step": 5630 }, { "epoch": 0.01139315683367203, "grad_norm": 274.5641174316406, "learning_rate": 1.128e-06, "loss": 44.1051, "step": 5640 }, { "epoch": 0.011413357466355847, "grad_norm": 799.1707153320312, "learning_rate": 1.1300000000000002e-06, "loss": 72.0528, "step": 5650 }, { "epoch": 0.011433558099039662, "grad_norm": 798.7354125976562, "learning_rate": 1.1320000000000001e-06, "loss": 62.9749, "step": 5660 }, { "epoch": 0.011453758731723478, "grad_norm": 689.4683837890625, "learning_rate": 1.134e-06, "loss": 56.8538, "step": 5670 }, { "epoch": 0.011473959364407293, "grad_norm": 91.51876068115234, "learning_rate": 1.1360000000000002e-06, "loss": 52.9392, "step": 5680 }, { "epoch": 0.01149415999709111, "grad_norm": 604.7167358398438, "learning_rate": 1.1380000000000002e-06, "loss": 85.9244, "step": 5690 }, { "epoch": 0.011514360629774924, "grad_norm": 278.5229797363281, "learning_rate": 1.14e-06, "loss": 36.0941, "step": 5700 }, { "epoch": 0.011534561262458741, "grad_norm": 141.50462341308594, "learning_rate": 1.142e-06, "loss": 46.1342, "step": 5710 }, { "epoch": 0.011554761895142556, "grad_norm": 183.74325561523438, "learning_rate": 1.1440000000000002e-06, "loss": 72.641, "step": 5720 }, { "epoch": 0.011574962527826372, "grad_norm": 335.34368896484375, "learning_rate": 1.1460000000000001e-06, "loss": 60.9087, "step": 5730 }, { "epoch": 0.011595163160510187, "grad_norm": 600.9601440429688, "learning_rate": 1.148e-06, "loss": 55.4076, "step": 5740 }, { "epoch": 0.011615363793194004, "grad_norm": 184.05262756347656, "learning_rate": 1.1500000000000002e-06, "loss": 30.1972, "step": 5750 }, { "epoch": 0.011635564425877818, "grad_norm": 272.0022277832031, "learning_rate": 1.1520000000000002e-06, "loss": 45.1121, "step": 5760 }, { "epoch": 0.011655765058561635, "grad_norm": 185.00238037109375, "learning_rate": 1.154e-06, "loss": 46.5943, "step": 5770 }, { "epoch": 0.01167596569124545, "grad_norm": 487.53546142578125, "learning_rate": 1.156e-06, "loss": 77.1075, "step": 5780 }, { "epoch": 0.011696166323929266, "grad_norm": 630.9016723632812, "learning_rate": 1.1580000000000002e-06, "loss": 67.3831, "step": 5790 }, { "epoch": 0.011716366956613081, "grad_norm": 590.6622314453125, "learning_rate": 1.1600000000000001e-06, "loss": 64.0027, "step": 5800 }, { "epoch": 0.011736567589296898, "grad_norm": 328.929443359375, "learning_rate": 1.162e-06, "loss": 88.1448, "step": 5810 }, { "epoch": 0.011756768221980712, "grad_norm": 595.9011840820312, "learning_rate": 1.1640000000000002e-06, "loss": 48.9839, "step": 5820 }, { "epoch": 0.011776968854664529, "grad_norm": 113.38141632080078, "learning_rate": 1.1660000000000001e-06, "loss": 41.3584, "step": 5830 }, { "epoch": 0.011797169487348344, "grad_norm": 145.9916229248047, "learning_rate": 1.168e-06, "loss": 44.1648, "step": 5840 }, { "epoch": 0.01181737012003216, "grad_norm": 422.31072998046875, "learning_rate": 1.1700000000000002e-06, "loss": 43.1625, "step": 5850 }, { "epoch": 0.011837570752715975, "grad_norm": 244.43748474121094, "learning_rate": 1.1720000000000002e-06, "loss": 39.2552, "step": 5860 }, { "epoch": 0.011857771385399792, "grad_norm": 741.5382080078125, "learning_rate": 1.1740000000000001e-06, "loss": 85.1464, "step": 5870 }, { "epoch": 0.011877972018083606, "grad_norm": 137.34132385253906, "learning_rate": 1.176e-06, "loss": 39.7684, "step": 5880 }, { "epoch": 0.011898172650767423, "grad_norm": 168.93310546875, "learning_rate": 1.1780000000000002e-06, "loss": 44.8789, "step": 5890 }, { "epoch": 0.011918373283451238, "grad_norm": 527.37109375, "learning_rate": 1.1800000000000001e-06, "loss": 59.5935, "step": 5900 }, { "epoch": 0.011938573916135054, "grad_norm": 727.48388671875, "learning_rate": 1.182e-06, "loss": 62.9447, "step": 5910 }, { "epoch": 0.011958774548818869, "grad_norm": 856.63037109375, "learning_rate": 1.1840000000000002e-06, "loss": 59.4102, "step": 5920 }, { "epoch": 0.011978975181502685, "grad_norm": 248.2429656982422, "learning_rate": 1.1860000000000002e-06, "loss": 35.7237, "step": 5930 }, { "epoch": 0.0119991758141865, "grad_norm": 2163.494873046875, "learning_rate": 1.188e-06, "loss": 74.9237, "step": 5940 }, { "epoch": 0.012019376446870317, "grad_norm": 178.40087890625, "learning_rate": 1.19e-06, "loss": 56.371, "step": 5950 }, { "epoch": 0.012039577079554132, "grad_norm": 165.2622833251953, "learning_rate": 1.1920000000000002e-06, "loss": 68.1716, "step": 5960 }, { "epoch": 0.012059777712237948, "grad_norm": 293.82452392578125, "learning_rate": 1.1940000000000001e-06, "loss": 71.9859, "step": 5970 }, { "epoch": 0.012079978344921763, "grad_norm": 326.6194763183594, "learning_rate": 1.196e-06, "loss": 81.718, "step": 5980 }, { "epoch": 0.01210017897760558, "grad_norm": 283.8609924316406, "learning_rate": 1.1980000000000002e-06, "loss": 44.4809, "step": 5990 }, { "epoch": 0.012120379610289394, "grad_norm": 391.8148193359375, "learning_rate": 1.2000000000000002e-06, "loss": 29.84, "step": 6000 }, { "epoch": 0.01214058024297321, "grad_norm": 1112.9716796875, "learning_rate": 1.202e-06, "loss": 63.5214, "step": 6010 }, { "epoch": 0.012160780875657026, "grad_norm": 563.9324340820312, "learning_rate": 1.204e-06, "loss": 53.1909, "step": 6020 }, { "epoch": 0.012180981508340842, "grad_norm": 228.2725830078125, "learning_rate": 1.2060000000000002e-06, "loss": 42.3938, "step": 6030 }, { "epoch": 0.012201182141024657, "grad_norm": 72.51380920410156, "learning_rate": 1.2080000000000001e-06, "loss": 45.15, "step": 6040 }, { "epoch": 0.012221382773708473, "grad_norm": 252.85032653808594, "learning_rate": 1.21e-06, "loss": 37.6156, "step": 6050 }, { "epoch": 0.012241583406392288, "grad_norm": 230.00648498535156, "learning_rate": 1.2120000000000002e-06, "loss": 64.7433, "step": 6060 }, { "epoch": 0.012261784039076105, "grad_norm": 479.84747314453125, "learning_rate": 1.214e-06, "loss": 121.4636, "step": 6070 }, { "epoch": 0.01228198467175992, "grad_norm": 82.25321960449219, "learning_rate": 1.216e-06, "loss": 67.0744, "step": 6080 }, { "epoch": 0.012302185304443734, "grad_norm": 272.3522644042969, "learning_rate": 1.2180000000000002e-06, "loss": 59.3473, "step": 6090 }, { "epoch": 0.01232238593712755, "grad_norm": 243.43600463867188, "learning_rate": 1.2200000000000002e-06, "loss": 84.6064, "step": 6100 }, { "epoch": 0.012342586569811366, "grad_norm": 440.7323913574219, "learning_rate": 1.2220000000000001e-06, "loss": 50.6768, "step": 6110 }, { "epoch": 0.012362787202495182, "grad_norm": 181.9730224609375, "learning_rate": 1.224e-06, "loss": 58.5162, "step": 6120 }, { "epoch": 0.012382987835178997, "grad_norm": 885.5496826171875, "learning_rate": 1.2260000000000002e-06, "loss": 58.3693, "step": 6130 }, { "epoch": 0.012403188467862813, "grad_norm": 539.1258544921875, "learning_rate": 1.2280000000000001e-06, "loss": 49.8868, "step": 6140 }, { "epoch": 0.012423389100546628, "grad_norm": 0.0, "learning_rate": 1.23e-06, "loss": 28.3966, "step": 6150 }, { "epoch": 0.012443589733230445, "grad_norm": 475.5496520996094, "learning_rate": 1.2320000000000002e-06, "loss": 56.5615, "step": 6160 }, { "epoch": 0.01246379036591426, "grad_norm": 108.66901397705078, "learning_rate": 1.234e-06, "loss": 72.028, "step": 6170 }, { "epoch": 0.012483990998598076, "grad_norm": 1397.043212890625, "learning_rate": 1.2360000000000001e-06, "loss": 64.1062, "step": 6180 }, { "epoch": 0.012504191631281891, "grad_norm": 108.44282531738281, "learning_rate": 1.238e-06, "loss": 66.9558, "step": 6190 }, { "epoch": 0.012524392263965707, "grad_norm": 512.5440673828125, "learning_rate": 1.2400000000000002e-06, "loss": 97.0489, "step": 6200 }, { "epoch": 0.012544592896649522, "grad_norm": 525.9227905273438, "learning_rate": 1.2420000000000001e-06, "loss": 67.52, "step": 6210 }, { "epoch": 0.012564793529333339, "grad_norm": 298.5191650390625, "learning_rate": 1.244e-06, "loss": 60.6737, "step": 6220 }, { "epoch": 0.012584994162017154, "grad_norm": 991.5989379882812, "learning_rate": 1.2460000000000002e-06, "loss": 100.3664, "step": 6230 }, { "epoch": 0.01260519479470097, "grad_norm": 971.19970703125, "learning_rate": 1.248e-06, "loss": 54.2717, "step": 6240 }, { "epoch": 0.012625395427384785, "grad_norm": 326.2781982421875, "learning_rate": 1.25e-06, "loss": 32.4217, "step": 6250 }, { "epoch": 0.012645596060068601, "grad_norm": 326.0747985839844, "learning_rate": 1.2520000000000003e-06, "loss": 62.2493, "step": 6260 }, { "epoch": 0.012665796692752416, "grad_norm": 125.75269317626953, "learning_rate": 1.2540000000000002e-06, "loss": 40.1945, "step": 6270 }, { "epoch": 0.012685997325436233, "grad_norm": 161.60801696777344, "learning_rate": 1.256e-06, "loss": 56.1344, "step": 6280 }, { "epoch": 0.012706197958120048, "grad_norm": 51.292423248291016, "learning_rate": 1.258e-06, "loss": 71.8206, "step": 6290 }, { "epoch": 0.012726398590803864, "grad_norm": 1829.5567626953125, "learning_rate": 1.26e-06, "loss": 71.7415, "step": 6300 }, { "epoch": 0.012746599223487679, "grad_norm": 463.5782775878906, "learning_rate": 1.2620000000000002e-06, "loss": 57.2545, "step": 6310 }, { "epoch": 0.012766799856171495, "grad_norm": 1028.679443359375, "learning_rate": 1.2640000000000003e-06, "loss": 50.0133, "step": 6320 }, { "epoch": 0.01278700048885531, "grad_norm": 437.41339111328125, "learning_rate": 1.266e-06, "loss": 34.0428, "step": 6330 }, { "epoch": 0.012807201121539127, "grad_norm": 245.99520874023438, "learning_rate": 1.268e-06, "loss": 43.0666, "step": 6340 }, { "epoch": 0.012827401754222941, "grad_norm": 209.23678588867188, "learning_rate": 1.2700000000000001e-06, "loss": 106.4088, "step": 6350 }, { "epoch": 0.012847602386906758, "grad_norm": 310.7594299316406, "learning_rate": 1.2720000000000003e-06, "loss": 64.6688, "step": 6360 }, { "epoch": 0.012867803019590573, "grad_norm": 155.32568359375, "learning_rate": 1.2740000000000002e-06, "loss": 41.7638, "step": 6370 }, { "epoch": 0.01288800365227439, "grad_norm": 265.0366516113281, "learning_rate": 1.276e-06, "loss": 48.7156, "step": 6380 }, { "epoch": 0.012908204284958204, "grad_norm": 830.7291259765625, "learning_rate": 1.278e-06, "loss": 55.6232, "step": 6390 }, { "epoch": 0.01292840491764202, "grad_norm": 2724.1865234375, "learning_rate": 1.28e-06, "loss": 104.9945, "step": 6400 }, { "epoch": 0.012948605550325835, "grad_norm": 443.6117248535156, "learning_rate": 1.2820000000000002e-06, "loss": 76.5873, "step": 6410 }, { "epoch": 0.012968806183009652, "grad_norm": 267.8358459472656, "learning_rate": 1.284e-06, "loss": 57.6679, "step": 6420 }, { "epoch": 0.012989006815693467, "grad_norm": 393.7155456542969, "learning_rate": 1.286e-06, "loss": 44.1063, "step": 6430 }, { "epoch": 0.013009207448377283, "grad_norm": 72.9302978515625, "learning_rate": 1.288e-06, "loss": 59.049, "step": 6440 }, { "epoch": 0.013029408081061098, "grad_norm": 279.80352783203125, "learning_rate": 1.2900000000000001e-06, "loss": 52.4332, "step": 6450 }, { "epoch": 0.013049608713744915, "grad_norm": 410.0312194824219, "learning_rate": 1.2920000000000003e-06, "loss": 74.9647, "step": 6460 }, { "epoch": 0.01306980934642873, "grad_norm": 128.3466339111328, "learning_rate": 1.294e-06, "loss": 56.1195, "step": 6470 }, { "epoch": 0.013090009979112546, "grad_norm": 637.7115478515625, "learning_rate": 1.296e-06, "loss": 65.2464, "step": 6480 }, { "epoch": 0.01311021061179636, "grad_norm": 246.60552978515625, "learning_rate": 1.2980000000000001e-06, "loss": 75.2727, "step": 6490 }, { "epoch": 0.013130411244480177, "grad_norm": 276.9471435546875, "learning_rate": 1.3e-06, "loss": 67.3878, "step": 6500 }, { "epoch": 0.013150611877163992, "grad_norm": 717.5218505859375, "learning_rate": 1.3020000000000002e-06, "loss": 43.676, "step": 6510 }, { "epoch": 0.013170812509847809, "grad_norm": 505.0317687988281, "learning_rate": 1.304e-06, "loss": 60.7305, "step": 6520 }, { "epoch": 0.013191013142531623, "grad_norm": 234.5377960205078, "learning_rate": 1.306e-06, "loss": 110.1103, "step": 6530 }, { "epoch": 0.01321121377521544, "grad_norm": 134.81161499023438, "learning_rate": 1.308e-06, "loss": 42.9563, "step": 6540 }, { "epoch": 0.013231414407899255, "grad_norm": 188.2266387939453, "learning_rate": 1.3100000000000002e-06, "loss": 59.2885, "step": 6550 }, { "epoch": 0.013251615040583071, "grad_norm": 204.31874084472656, "learning_rate": 1.3120000000000003e-06, "loss": 67.4519, "step": 6560 }, { "epoch": 0.013271815673266886, "grad_norm": 640.6788330078125, "learning_rate": 1.314e-06, "loss": 86.337, "step": 6570 }, { "epoch": 0.013292016305950703, "grad_norm": 1200.475341796875, "learning_rate": 1.316e-06, "loss": 49.6043, "step": 6580 }, { "epoch": 0.013312216938634517, "grad_norm": 552.6397094726562, "learning_rate": 1.3180000000000001e-06, "loss": 70.3574, "step": 6590 }, { "epoch": 0.013332417571318334, "grad_norm": 228.55966186523438, "learning_rate": 1.32e-06, "loss": 49.4, "step": 6600 }, { "epoch": 0.013352618204002149, "grad_norm": 91.66958618164062, "learning_rate": 1.3220000000000002e-06, "loss": 27.4222, "step": 6610 }, { "epoch": 0.013372818836685965, "grad_norm": 266.33013916015625, "learning_rate": 1.324e-06, "loss": 55.1865, "step": 6620 }, { "epoch": 0.01339301946936978, "grad_norm": 178.34083557128906, "learning_rate": 1.326e-06, "loss": 32.3691, "step": 6630 }, { "epoch": 0.013413220102053596, "grad_norm": 143.40811157226562, "learning_rate": 1.328e-06, "loss": 69.1419, "step": 6640 }, { "epoch": 0.013433420734737411, "grad_norm": 352.6832580566406, "learning_rate": 1.3300000000000002e-06, "loss": 45.734, "step": 6650 }, { "epoch": 0.013453621367421228, "grad_norm": 521.5436401367188, "learning_rate": 1.3320000000000003e-06, "loss": 59.6328, "step": 6660 }, { "epoch": 0.013473822000105043, "grad_norm": 155.1114959716797, "learning_rate": 1.334e-06, "loss": 58.974, "step": 6670 }, { "epoch": 0.013494022632788859, "grad_norm": 419.2751159667969, "learning_rate": 1.336e-06, "loss": 89.4178, "step": 6680 }, { "epoch": 0.013514223265472674, "grad_norm": 239.21461486816406, "learning_rate": 1.3380000000000001e-06, "loss": 35.0729, "step": 6690 }, { "epoch": 0.01353442389815649, "grad_norm": 1473.1988525390625, "learning_rate": 1.34e-06, "loss": 63.1642, "step": 6700 }, { "epoch": 0.013554624530840305, "grad_norm": 63.2901725769043, "learning_rate": 1.3420000000000002e-06, "loss": 33.0321, "step": 6710 }, { "epoch": 0.013574825163524122, "grad_norm": 134.3507537841797, "learning_rate": 1.344e-06, "loss": 39.8013, "step": 6720 }, { "epoch": 0.013595025796207937, "grad_norm": 198.5120086669922, "learning_rate": 1.3460000000000001e-06, "loss": 100.5731, "step": 6730 }, { "epoch": 0.013615226428891753, "grad_norm": 446.2283630371094, "learning_rate": 1.348e-06, "loss": 59.876, "step": 6740 }, { "epoch": 0.013635427061575568, "grad_norm": 153.77020263671875, "learning_rate": 1.3500000000000002e-06, "loss": 27.3906, "step": 6750 }, { "epoch": 0.013655627694259384, "grad_norm": 345.5477600097656, "learning_rate": 1.352e-06, "loss": 30.2925, "step": 6760 }, { "epoch": 0.0136758283269432, "grad_norm": 870.7298583984375, "learning_rate": 1.354e-06, "loss": 63.0514, "step": 6770 }, { "epoch": 0.013696028959627016, "grad_norm": 604.126953125, "learning_rate": 1.356e-06, "loss": 54.4766, "step": 6780 }, { "epoch": 0.01371622959231083, "grad_norm": 481.68951416015625, "learning_rate": 1.3580000000000002e-06, "loss": 38.1397, "step": 6790 }, { "epoch": 0.013736430224994647, "grad_norm": 161.20230102539062, "learning_rate": 1.3600000000000001e-06, "loss": 56.5243, "step": 6800 }, { "epoch": 0.013756630857678462, "grad_norm": 594.2686157226562, "learning_rate": 1.362e-06, "loss": 162.3009, "step": 6810 }, { "epoch": 0.013776831490362278, "grad_norm": 614.6820678710938, "learning_rate": 1.364e-06, "loss": 38.4719, "step": 6820 }, { "epoch": 0.013797032123046093, "grad_norm": 276.0633850097656, "learning_rate": 1.3660000000000001e-06, "loss": 71.6093, "step": 6830 }, { "epoch": 0.01381723275572991, "grad_norm": 584.8493041992188, "learning_rate": 1.368e-06, "loss": 77.0056, "step": 6840 }, { "epoch": 0.013837433388413724, "grad_norm": 321.65252685546875, "learning_rate": 1.3700000000000002e-06, "loss": 73.6566, "step": 6850 }, { "epoch": 0.013857634021097541, "grad_norm": 323.55426025390625, "learning_rate": 1.372e-06, "loss": 51.3812, "step": 6860 }, { "epoch": 0.013877834653781356, "grad_norm": 139.42430114746094, "learning_rate": 1.374e-06, "loss": 32.1982, "step": 6870 }, { "epoch": 0.013898035286465172, "grad_norm": 362.964111328125, "learning_rate": 1.376e-06, "loss": 31.2049, "step": 6880 }, { "epoch": 0.013918235919148987, "grad_norm": 134.5757293701172, "learning_rate": 1.3780000000000002e-06, "loss": 45.1476, "step": 6890 }, { "epoch": 0.013938436551832804, "grad_norm": 228.7288360595703, "learning_rate": 1.3800000000000001e-06, "loss": 35.6554, "step": 6900 }, { "epoch": 0.013958637184516618, "grad_norm": 282.1006164550781, "learning_rate": 1.382e-06, "loss": 41.5031, "step": 6910 }, { "epoch": 0.013978837817200435, "grad_norm": 322.1584777832031, "learning_rate": 1.384e-06, "loss": 60.9524, "step": 6920 }, { "epoch": 0.01399903844988425, "grad_norm": 178.137451171875, "learning_rate": 1.3860000000000002e-06, "loss": 40.1866, "step": 6930 }, { "epoch": 0.014019239082568066, "grad_norm": 603.7406616210938, "learning_rate": 1.388e-06, "loss": 50.6219, "step": 6940 }, { "epoch": 0.014039439715251881, "grad_norm": 284.7754821777344, "learning_rate": 1.3900000000000002e-06, "loss": 27.277, "step": 6950 }, { "epoch": 0.014059640347935698, "grad_norm": 1241.1451416015625, "learning_rate": 1.392e-06, "loss": 84.9548, "step": 6960 }, { "epoch": 0.014079840980619512, "grad_norm": 423.69976806640625, "learning_rate": 1.3940000000000001e-06, "loss": 56.0265, "step": 6970 }, { "epoch": 0.014100041613303329, "grad_norm": 488.623291015625, "learning_rate": 1.396e-06, "loss": 53.7844, "step": 6980 }, { "epoch": 0.014120242245987144, "grad_norm": 102.38019561767578, "learning_rate": 1.3980000000000002e-06, "loss": 21.3216, "step": 6990 }, { "epoch": 0.01414044287867096, "grad_norm": 173.0083465576172, "learning_rate": 1.4000000000000001e-06, "loss": 45.6968, "step": 7000 }, { "epoch": 0.014160643511354775, "grad_norm": 305.0091247558594, "learning_rate": 1.402e-06, "loss": 39.0303, "step": 7010 }, { "epoch": 0.014180844144038592, "grad_norm": 459.939208984375, "learning_rate": 1.404e-06, "loss": 73.1526, "step": 7020 }, { "epoch": 0.014201044776722406, "grad_norm": 170.4021453857422, "learning_rate": 1.4060000000000002e-06, "loss": 45.711, "step": 7030 }, { "epoch": 0.014221245409406223, "grad_norm": 237.15792846679688, "learning_rate": 1.4080000000000001e-06, "loss": 52.2809, "step": 7040 }, { "epoch": 0.014241446042090038, "grad_norm": 1582.811767578125, "learning_rate": 1.41e-06, "loss": 55.2645, "step": 7050 }, { "epoch": 0.014261646674773854, "grad_norm": 174.53512573242188, "learning_rate": 1.412e-06, "loss": 25.8873, "step": 7060 }, { "epoch": 0.014281847307457669, "grad_norm": 548.8692626953125, "learning_rate": 1.4140000000000001e-06, "loss": 40.4999, "step": 7070 }, { "epoch": 0.014302047940141486, "grad_norm": 273.18402099609375, "learning_rate": 1.416e-06, "loss": 43.2874, "step": 7080 }, { "epoch": 0.0143222485728253, "grad_norm": 229.1833038330078, "learning_rate": 1.4180000000000002e-06, "loss": 45.1278, "step": 7090 }, { "epoch": 0.014342449205509117, "grad_norm": 356.5712890625, "learning_rate": 1.42e-06, "loss": 36.204, "step": 7100 }, { "epoch": 0.014362649838192932, "grad_norm": 488.5222473144531, "learning_rate": 1.4220000000000001e-06, "loss": 88.5467, "step": 7110 }, { "epoch": 0.014382850470876748, "grad_norm": 264.17913818359375, "learning_rate": 1.424e-06, "loss": 33.6878, "step": 7120 }, { "epoch": 0.014403051103560563, "grad_norm": 101.51790618896484, "learning_rate": 1.4260000000000002e-06, "loss": 34.4206, "step": 7130 }, { "epoch": 0.01442325173624438, "grad_norm": 471.9056701660156, "learning_rate": 1.4280000000000001e-06, "loss": 58.0212, "step": 7140 }, { "epoch": 0.014443452368928194, "grad_norm": 436.0234680175781, "learning_rate": 1.43e-06, "loss": 30.5576, "step": 7150 }, { "epoch": 0.01446365300161201, "grad_norm": 123.70526123046875, "learning_rate": 1.432e-06, "loss": 29.7103, "step": 7160 }, { "epoch": 0.014483853634295826, "grad_norm": 262.009765625, "learning_rate": 1.4340000000000002e-06, "loss": 65.1372, "step": 7170 }, { "epoch": 0.014504054266979642, "grad_norm": 54.096317291259766, "learning_rate": 1.436e-06, "loss": 27.3467, "step": 7180 }, { "epoch": 0.014524254899663457, "grad_norm": 691.3617553710938, "learning_rate": 1.4380000000000003e-06, "loss": 49.493, "step": 7190 }, { "epoch": 0.014544455532347273, "grad_norm": 224.14955139160156, "learning_rate": 1.44e-06, "loss": 55.5638, "step": 7200 }, { "epoch": 0.014564656165031088, "grad_norm": 54.54121017456055, "learning_rate": 1.4420000000000001e-06, "loss": 50.6267, "step": 7210 }, { "epoch": 0.014584856797714905, "grad_norm": 532.8018798828125, "learning_rate": 1.444e-06, "loss": 53.4124, "step": 7220 }, { "epoch": 0.01460505743039872, "grad_norm": 681.6290283203125, "learning_rate": 1.4460000000000002e-06, "loss": 56.6136, "step": 7230 }, { "epoch": 0.014625258063082536, "grad_norm": 269.7127685546875, "learning_rate": 1.4480000000000002e-06, "loss": 39.6635, "step": 7240 }, { "epoch": 0.014645458695766351, "grad_norm": 203.48785400390625, "learning_rate": 1.45e-06, "loss": 28.3652, "step": 7250 }, { "epoch": 0.014665659328450167, "grad_norm": 201.3723602294922, "learning_rate": 1.452e-06, "loss": 41.6336, "step": 7260 }, { "epoch": 0.014685859961133982, "grad_norm": 179.6065216064453, "learning_rate": 1.4540000000000002e-06, "loss": 62.6191, "step": 7270 }, { "epoch": 0.014706060593817799, "grad_norm": 690.5343017578125, "learning_rate": 1.4560000000000001e-06, "loss": 51.5913, "step": 7280 }, { "epoch": 0.014726261226501614, "grad_norm": 121.47522735595703, "learning_rate": 1.4580000000000003e-06, "loss": 36.9549, "step": 7290 }, { "epoch": 0.01474646185918543, "grad_norm": 240.26461791992188, "learning_rate": 1.46e-06, "loss": 89.7094, "step": 7300 }, { "epoch": 0.014766662491869245, "grad_norm": 202.04205322265625, "learning_rate": 1.4620000000000001e-06, "loss": 46.0317, "step": 7310 }, { "epoch": 0.014786863124553061, "grad_norm": 104.77202606201172, "learning_rate": 1.464e-06, "loss": 62.2657, "step": 7320 }, { "epoch": 0.014807063757236876, "grad_norm": 317.5575256347656, "learning_rate": 1.4660000000000002e-06, "loss": 51.1359, "step": 7330 }, { "epoch": 0.014827264389920693, "grad_norm": 61.72543716430664, "learning_rate": 1.4680000000000002e-06, "loss": 84.4159, "step": 7340 }, { "epoch": 0.014847465022604507, "grad_norm": 262.47906494140625, "learning_rate": 1.4700000000000001e-06, "loss": 49.778, "step": 7350 }, { "epoch": 0.014867665655288324, "grad_norm": 0.0, "learning_rate": 1.472e-06, "loss": 52.8639, "step": 7360 }, { "epoch": 0.014887866287972139, "grad_norm": 86.41136932373047, "learning_rate": 1.4740000000000002e-06, "loss": 52.4906, "step": 7370 }, { "epoch": 0.014908066920655955, "grad_norm": 210.46620178222656, "learning_rate": 1.4760000000000001e-06, "loss": 73.8401, "step": 7380 }, { "epoch": 0.01492826755333977, "grad_norm": 658.1229248046875, "learning_rate": 1.478e-06, "loss": 81.975, "step": 7390 }, { "epoch": 0.014948468186023587, "grad_norm": 476.41900634765625, "learning_rate": 1.48e-06, "loss": 76.9813, "step": 7400 }, { "epoch": 0.014968668818707401, "grad_norm": 391.5830383300781, "learning_rate": 1.4820000000000002e-06, "loss": 61.0732, "step": 7410 }, { "epoch": 0.014988869451391218, "grad_norm": 824.1578979492188, "learning_rate": 1.4840000000000001e-06, "loss": 44.4346, "step": 7420 }, { "epoch": 0.015009070084075033, "grad_norm": 177.28871154785156, "learning_rate": 1.4860000000000003e-06, "loss": 47.4054, "step": 7430 }, { "epoch": 0.01502927071675885, "grad_norm": 158.52125549316406, "learning_rate": 1.488e-06, "loss": 54.5121, "step": 7440 }, { "epoch": 0.015049471349442664, "grad_norm": 204.8341522216797, "learning_rate": 1.4900000000000001e-06, "loss": 72.0585, "step": 7450 }, { "epoch": 0.01506967198212648, "grad_norm": 427.7160339355469, "learning_rate": 1.492e-06, "loss": 54.8663, "step": 7460 }, { "epoch": 0.015089872614810295, "grad_norm": 132.15272521972656, "learning_rate": 1.4940000000000002e-06, "loss": 43.7779, "step": 7470 }, { "epoch": 0.015110073247494112, "grad_norm": 382.5353088378906, "learning_rate": 1.4960000000000002e-06, "loss": 62.4497, "step": 7480 }, { "epoch": 0.015130273880177927, "grad_norm": 273.1387023925781, "learning_rate": 1.498e-06, "loss": 87.6474, "step": 7490 }, { "epoch": 0.015150474512861743, "grad_norm": 237.03143310546875, "learning_rate": 1.5e-06, "loss": 91.2072, "step": 7500 }, { "epoch": 0.015170675145545558, "grad_norm": 594.238525390625, "learning_rate": 1.5020000000000002e-06, "loss": 64.7865, "step": 7510 }, { "epoch": 0.015190875778229375, "grad_norm": 359.99005126953125, "learning_rate": 1.5040000000000001e-06, "loss": 32.348, "step": 7520 }, { "epoch": 0.01521107641091319, "grad_norm": 230.90707397460938, "learning_rate": 1.5060000000000003e-06, "loss": 93.0239, "step": 7530 }, { "epoch": 0.015231277043597006, "grad_norm": 848.487060546875, "learning_rate": 1.508e-06, "loss": 49.5891, "step": 7540 }, { "epoch": 0.01525147767628082, "grad_norm": 516.8324584960938, "learning_rate": 1.5100000000000002e-06, "loss": 32.6952, "step": 7550 }, { "epoch": 0.015271678308964637, "grad_norm": 409.9013366699219, "learning_rate": 1.512e-06, "loss": 56.0514, "step": 7560 }, { "epoch": 0.015291878941648452, "grad_norm": 575.2659301757812, "learning_rate": 1.5140000000000002e-06, "loss": 54.5658, "step": 7570 }, { "epoch": 0.015312079574332269, "grad_norm": 620.3074340820312, "learning_rate": 1.5160000000000002e-06, "loss": 57.7719, "step": 7580 }, { "epoch": 0.015332280207016083, "grad_norm": 377.2959289550781, "learning_rate": 1.5180000000000001e-06, "loss": 34.4951, "step": 7590 }, { "epoch": 0.0153524808396999, "grad_norm": 303.0909118652344, "learning_rate": 1.52e-06, "loss": 32.1425, "step": 7600 }, { "epoch": 0.015372681472383715, "grad_norm": 1054.06298828125, "learning_rate": 1.5220000000000002e-06, "loss": 75.1854, "step": 7610 }, { "epoch": 0.015392882105067531, "grad_norm": 184.84564208984375, "learning_rate": 1.5240000000000001e-06, "loss": 53.9128, "step": 7620 }, { "epoch": 0.015413082737751346, "grad_norm": 121.82855224609375, "learning_rate": 1.5260000000000003e-06, "loss": 68.7028, "step": 7630 }, { "epoch": 0.015433283370435162, "grad_norm": 176.220947265625, "learning_rate": 1.528e-06, "loss": 22.662, "step": 7640 }, { "epoch": 0.015453484003118977, "grad_norm": 185.2891387939453, "learning_rate": 1.5300000000000002e-06, "loss": 48.3537, "step": 7650 }, { "epoch": 0.015473684635802794, "grad_norm": 472.9952697753906, "learning_rate": 1.5320000000000001e-06, "loss": 82.1461, "step": 7660 }, { "epoch": 0.015493885268486609, "grad_norm": 177.1582489013672, "learning_rate": 1.5340000000000003e-06, "loss": 36.7319, "step": 7670 }, { "epoch": 0.015514085901170425, "grad_norm": 1533.5411376953125, "learning_rate": 1.536e-06, "loss": 62.834, "step": 7680 }, { "epoch": 0.01553428653385424, "grad_norm": 264.4908142089844, "learning_rate": 1.5380000000000001e-06, "loss": 38.5009, "step": 7690 }, { "epoch": 0.015554487166538056, "grad_norm": 447.58978271484375, "learning_rate": 1.54e-06, "loss": 46.4973, "step": 7700 }, { "epoch": 0.015574687799221871, "grad_norm": 60.77960968017578, "learning_rate": 1.5420000000000002e-06, "loss": 43.2578, "step": 7710 }, { "epoch": 0.015594888431905688, "grad_norm": 185.21131896972656, "learning_rate": 1.5440000000000002e-06, "loss": 69.0782, "step": 7720 }, { "epoch": 0.015615089064589503, "grad_norm": 1794.189697265625, "learning_rate": 1.546e-06, "loss": 34.5296, "step": 7730 }, { "epoch": 0.01563528969727332, "grad_norm": 178.63865661621094, "learning_rate": 1.548e-06, "loss": 38.211, "step": 7740 }, { "epoch": 0.015655490329957136, "grad_norm": 418.35028076171875, "learning_rate": 1.5500000000000002e-06, "loss": 34.0296, "step": 7750 }, { "epoch": 0.01567569096264095, "grad_norm": 139.04905700683594, "learning_rate": 1.5520000000000001e-06, "loss": 53.6939, "step": 7760 }, { "epoch": 0.015695891595324765, "grad_norm": 290.563720703125, "learning_rate": 1.5540000000000003e-06, "loss": 73.5191, "step": 7770 }, { "epoch": 0.01571609222800858, "grad_norm": 369.7440490722656, "learning_rate": 1.556e-06, "loss": 37.0936, "step": 7780 }, { "epoch": 0.015736292860692398, "grad_norm": 427.1689758300781, "learning_rate": 1.5580000000000002e-06, "loss": 57.4097, "step": 7790 }, { "epoch": 0.01575649349337621, "grad_norm": 392.7759094238281, "learning_rate": 1.56e-06, "loss": 62.6377, "step": 7800 }, { "epoch": 0.015776694126060028, "grad_norm": 514.4950561523438, "learning_rate": 1.5620000000000002e-06, "loss": 65.5359, "step": 7810 }, { "epoch": 0.015796894758743844, "grad_norm": 394.7283020019531, "learning_rate": 1.5640000000000002e-06, "loss": 32.3608, "step": 7820 }, { "epoch": 0.01581709539142766, "grad_norm": 522.7518310546875, "learning_rate": 1.566e-06, "loss": 23.4018, "step": 7830 }, { "epoch": 0.015837296024111474, "grad_norm": 146.63308715820312, "learning_rate": 1.568e-06, "loss": 58.5988, "step": 7840 }, { "epoch": 0.01585749665679529, "grad_norm": 133.8489990234375, "learning_rate": 1.5700000000000002e-06, "loss": 31.7483, "step": 7850 }, { "epoch": 0.015877697289479107, "grad_norm": 180.6443634033203, "learning_rate": 1.5720000000000002e-06, "loss": 65.1659, "step": 7860 }, { "epoch": 0.015897897922162924, "grad_norm": 228.48204040527344, "learning_rate": 1.5740000000000003e-06, "loss": 45.5145, "step": 7870 }, { "epoch": 0.015918098554846737, "grad_norm": 584.9379272460938, "learning_rate": 1.576e-06, "loss": 44.3535, "step": 7880 }, { "epoch": 0.015938299187530553, "grad_norm": 641.3094482421875, "learning_rate": 1.5780000000000002e-06, "loss": 61.2566, "step": 7890 }, { "epoch": 0.01595849982021437, "grad_norm": 325.6277160644531, "learning_rate": 1.5800000000000001e-06, "loss": 59.0131, "step": 7900 }, { "epoch": 0.015978700452898186, "grad_norm": 276.489501953125, "learning_rate": 1.5820000000000003e-06, "loss": 38.9154, "step": 7910 }, { "epoch": 0.015998901085582, "grad_norm": 329.33636474609375, "learning_rate": 1.5840000000000002e-06, "loss": 75.7755, "step": 7920 }, { "epoch": 0.016019101718265816, "grad_norm": 420.45574951171875, "learning_rate": 1.586e-06, "loss": 57.9747, "step": 7930 }, { "epoch": 0.016039302350949632, "grad_norm": 149.86245727539062, "learning_rate": 1.588e-06, "loss": 62.4833, "step": 7940 }, { "epoch": 0.01605950298363345, "grad_norm": 390.672607421875, "learning_rate": 1.5900000000000002e-06, "loss": 76.5779, "step": 7950 }, { "epoch": 0.016079703616317262, "grad_norm": 207.01571655273438, "learning_rate": 1.5920000000000002e-06, "loss": 31.6585, "step": 7960 }, { "epoch": 0.01609990424900108, "grad_norm": 426.03326416015625, "learning_rate": 1.594e-06, "loss": 67.9595, "step": 7970 }, { "epoch": 0.016120104881684895, "grad_norm": 192.54335021972656, "learning_rate": 1.596e-06, "loss": 37.7804, "step": 7980 }, { "epoch": 0.01614030551436871, "grad_norm": 521.6817016601562, "learning_rate": 1.5980000000000002e-06, "loss": 58.4441, "step": 7990 }, { "epoch": 0.016160506147052525, "grad_norm": 398.355712890625, "learning_rate": 1.6000000000000001e-06, "loss": 59.8929, "step": 8000 }, { "epoch": 0.01618070677973634, "grad_norm": 267.3633117675781, "learning_rate": 1.6020000000000003e-06, "loss": 55.6435, "step": 8010 }, { "epoch": 0.016200907412420158, "grad_norm": 202.33763122558594, "learning_rate": 1.604e-06, "loss": 42.5254, "step": 8020 }, { "epoch": 0.016221108045103974, "grad_norm": 191.3097686767578, "learning_rate": 1.606e-06, "loss": 39.6608, "step": 8030 }, { "epoch": 0.016241308677787787, "grad_norm": 625.6720581054688, "learning_rate": 1.608e-06, "loss": 78.6341, "step": 8040 }, { "epoch": 0.016261509310471604, "grad_norm": 171.45823669433594, "learning_rate": 1.6100000000000003e-06, "loss": 80.1601, "step": 8050 }, { "epoch": 0.01628170994315542, "grad_norm": 208.61375427246094, "learning_rate": 1.6120000000000002e-06, "loss": 44.9717, "step": 8060 }, { "epoch": 0.016301910575839237, "grad_norm": 532.2265625, "learning_rate": 1.614e-06, "loss": 91.363, "step": 8070 }, { "epoch": 0.01632211120852305, "grad_norm": 145.6019744873047, "learning_rate": 1.616e-06, "loss": 67.9158, "step": 8080 }, { "epoch": 0.016342311841206866, "grad_norm": 212.4868621826172, "learning_rate": 1.6180000000000002e-06, "loss": 32.0172, "step": 8090 }, { "epoch": 0.016362512473890683, "grad_norm": 748.1085205078125, "learning_rate": 1.6200000000000002e-06, "loss": 60.1593, "step": 8100 }, { "epoch": 0.0163827131065745, "grad_norm": 503.207275390625, "learning_rate": 1.6220000000000003e-06, "loss": 95.2564, "step": 8110 }, { "epoch": 0.016402913739258312, "grad_norm": 135.9218292236328, "learning_rate": 1.624e-06, "loss": 40.6468, "step": 8120 }, { "epoch": 0.01642311437194213, "grad_norm": 714.2936401367188, "learning_rate": 1.626e-06, "loss": 58.8864, "step": 8130 }, { "epoch": 0.016443315004625945, "grad_norm": 557.3250122070312, "learning_rate": 1.6280000000000001e-06, "loss": 32.9455, "step": 8140 }, { "epoch": 0.016463515637309762, "grad_norm": 181.55857849121094, "learning_rate": 1.6300000000000003e-06, "loss": 58.6144, "step": 8150 }, { "epoch": 0.016483716269993575, "grad_norm": 307.7644958496094, "learning_rate": 1.6320000000000002e-06, "loss": 40.2831, "step": 8160 }, { "epoch": 0.01650391690267739, "grad_norm": 439.642578125, "learning_rate": 1.634e-06, "loss": 25.8979, "step": 8170 }, { "epoch": 0.016524117535361208, "grad_norm": 209.7728271484375, "learning_rate": 1.636e-06, "loss": 70.178, "step": 8180 }, { "epoch": 0.016544318168045025, "grad_norm": 126.69316864013672, "learning_rate": 1.6380000000000002e-06, "loss": 41.2764, "step": 8190 }, { "epoch": 0.016564518800728838, "grad_norm": 222.9364013671875, "learning_rate": 1.6400000000000002e-06, "loss": 64.4757, "step": 8200 }, { "epoch": 0.016584719433412654, "grad_norm": 159.58859252929688, "learning_rate": 1.6420000000000003e-06, "loss": 42.3814, "step": 8210 }, { "epoch": 0.01660492006609647, "grad_norm": 229.76573181152344, "learning_rate": 1.644e-06, "loss": 42.2024, "step": 8220 }, { "epoch": 0.016625120698780287, "grad_norm": 310.381103515625, "learning_rate": 1.646e-06, "loss": 70.4634, "step": 8230 }, { "epoch": 0.0166453213314641, "grad_norm": 77.79309844970703, "learning_rate": 1.6480000000000001e-06, "loss": 70.0215, "step": 8240 }, { "epoch": 0.016665521964147917, "grad_norm": 212.04446411132812, "learning_rate": 1.6500000000000003e-06, "loss": 34.7536, "step": 8250 }, { "epoch": 0.016685722596831733, "grad_norm": 133.7471160888672, "learning_rate": 1.6520000000000002e-06, "loss": 54.0732, "step": 8260 }, { "epoch": 0.01670592322951555, "grad_norm": 172.90106201171875, "learning_rate": 1.654e-06, "loss": 43.6643, "step": 8270 }, { "epoch": 0.016726123862199363, "grad_norm": 296.1474304199219, "learning_rate": 1.6560000000000001e-06, "loss": 53.8716, "step": 8280 }, { "epoch": 0.01674632449488318, "grad_norm": 286.97637939453125, "learning_rate": 1.6580000000000003e-06, "loss": 87.2274, "step": 8290 }, { "epoch": 0.016766525127566996, "grad_norm": 245.01783752441406, "learning_rate": 1.6600000000000002e-06, "loss": 63.5678, "step": 8300 }, { "epoch": 0.016786725760250813, "grad_norm": 153.63438415527344, "learning_rate": 1.662e-06, "loss": 72.0676, "step": 8310 }, { "epoch": 0.016806926392934626, "grad_norm": 113.81085205078125, "learning_rate": 1.664e-06, "loss": 45.5424, "step": 8320 }, { "epoch": 0.016827127025618442, "grad_norm": 354.77874755859375, "learning_rate": 1.666e-06, "loss": 83.0096, "step": 8330 }, { "epoch": 0.01684732765830226, "grad_norm": 278.74951171875, "learning_rate": 1.6680000000000002e-06, "loss": 58.2536, "step": 8340 }, { "epoch": 0.016867528290986075, "grad_norm": 94.84683227539062, "learning_rate": 1.6700000000000003e-06, "loss": 57.4371, "step": 8350 }, { "epoch": 0.016887728923669888, "grad_norm": 539.45068359375, "learning_rate": 1.672e-06, "loss": 53.5464, "step": 8360 }, { "epoch": 0.016907929556353705, "grad_norm": 157.49522399902344, "learning_rate": 1.674e-06, "loss": 42.6541, "step": 8370 }, { "epoch": 0.01692813018903752, "grad_norm": 154.23117065429688, "learning_rate": 1.6760000000000001e-06, "loss": 30.0077, "step": 8380 }, { "epoch": 0.016948330821721338, "grad_norm": 193.352294921875, "learning_rate": 1.6780000000000003e-06, "loss": 36.0281, "step": 8390 }, { "epoch": 0.01696853145440515, "grad_norm": 267.4693603515625, "learning_rate": 1.6800000000000002e-06, "loss": 32.7522, "step": 8400 }, { "epoch": 0.016988732087088967, "grad_norm": 136.2726287841797, "learning_rate": 1.682e-06, "loss": 43.8714, "step": 8410 }, { "epoch": 0.017008932719772784, "grad_norm": 359.4302673339844, "learning_rate": 1.684e-06, "loss": 48.5446, "step": 8420 }, { "epoch": 0.0170291333524566, "grad_norm": 224.54190063476562, "learning_rate": 1.686e-06, "loss": 30.0631, "step": 8430 }, { "epoch": 0.017049333985140414, "grad_norm": 189.47006225585938, "learning_rate": 1.6880000000000002e-06, "loss": 38.865, "step": 8440 }, { "epoch": 0.01706953461782423, "grad_norm": 342.8333740234375, "learning_rate": 1.6900000000000003e-06, "loss": 76.4439, "step": 8450 }, { "epoch": 0.017089735250508047, "grad_norm": 199.6516876220703, "learning_rate": 1.692e-06, "loss": 56.1121, "step": 8460 }, { "epoch": 0.017109935883191863, "grad_norm": 312.7919921875, "learning_rate": 1.694e-06, "loss": 92.1416, "step": 8470 }, { "epoch": 0.017130136515875676, "grad_norm": 166.10073852539062, "learning_rate": 1.6960000000000002e-06, "loss": 44.8351, "step": 8480 }, { "epoch": 0.017150337148559493, "grad_norm": 207.41058349609375, "learning_rate": 1.6980000000000003e-06, "loss": 92.3383, "step": 8490 }, { "epoch": 0.01717053778124331, "grad_norm": 562.9660034179688, "learning_rate": 1.7000000000000002e-06, "loss": 47.6634, "step": 8500 }, { "epoch": 0.017190738413927126, "grad_norm": 348.60614013671875, "learning_rate": 1.702e-06, "loss": 63.8574, "step": 8510 }, { "epoch": 0.01721093904661094, "grad_norm": 300.07232666015625, "learning_rate": 1.7040000000000001e-06, "loss": 45.8859, "step": 8520 }, { "epoch": 0.017231139679294755, "grad_norm": 70.5987548828125, "learning_rate": 1.706e-06, "loss": 25.5977, "step": 8530 }, { "epoch": 0.017251340311978572, "grad_norm": 340.52850341796875, "learning_rate": 1.7080000000000002e-06, "loss": 79.7864, "step": 8540 }, { "epoch": 0.01727154094466239, "grad_norm": 1215.22998046875, "learning_rate": 1.7100000000000004e-06, "loss": 93.5066, "step": 8550 }, { "epoch": 0.0172917415773462, "grad_norm": 478.7080993652344, "learning_rate": 1.712e-06, "loss": 61.3044, "step": 8560 }, { "epoch": 0.017311942210030018, "grad_norm": 461.803955078125, "learning_rate": 1.714e-06, "loss": 35.9294, "step": 8570 }, { "epoch": 0.017332142842713835, "grad_norm": 148.80918884277344, "learning_rate": 1.7160000000000002e-06, "loss": 20.3312, "step": 8580 }, { "epoch": 0.01735234347539765, "grad_norm": 302.2602233886719, "learning_rate": 1.7180000000000003e-06, "loss": 32.0875, "step": 8590 }, { "epoch": 0.017372544108081464, "grad_norm": 324.1483459472656, "learning_rate": 1.72e-06, "loss": 75.4901, "step": 8600 }, { "epoch": 0.01739274474076528, "grad_norm": 464.5686950683594, "learning_rate": 1.722e-06, "loss": 60.9012, "step": 8610 }, { "epoch": 0.017412945373449097, "grad_norm": 368.91156005859375, "learning_rate": 1.7240000000000001e-06, "loss": 58.6987, "step": 8620 }, { "epoch": 0.017433146006132914, "grad_norm": 325.759765625, "learning_rate": 1.726e-06, "loss": 44.6632, "step": 8630 }, { "epoch": 0.017453346638816727, "grad_norm": 367.0731201171875, "learning_rate": 1.7280000000000002e-06, "loss": 60.4444, "step": 8640 }, { "epoch": 0.017473547271500543, "grad_norm": 197.75381469726562, "learning_rate": 1.73e-06, "loss": 29.7279, "step": 8650 }, { "epoch": 0.01749374790418436, "grad_norm": 140.41017150878906, "learning_rate": 1.732e-06, "loss": 42.1983, "step": 8660 }, { "epoch": 0.017513948536868176, "grad_norm": 320.8959655761719, "learning_rate": 1.734e-06, "loss": 31.4198, "step": 8670 }, { "epoch": 0.01753414916955199, "grad_norm": 246.10142517089844, "learning_rate": 1.7360000000000002e-06, "loss": 65.0271, "step": 8680 }, { "epoch": 0.017554349802235806, "grad_norm": 166.71719360351562, "learning_rate": 1.7380000000000003e-06, "loss": 42.2566, "step": 8690 }, { "epoch": 0.017574550434919622, "grad_norm": 390.1472473144531, "learning_rate": 1.74e-06, "loss": 50.2177, "step": 8700 }, { "epoch": 0.01759475106760344, "grad_norm": 512.3148193359375, "learning_rate": 1.742e-06, "loss": 60.6125, "step": 8710 }, { "epoch": 0.017614951700287252, "grad_norm": 476.39697265625, "learning_rate": 1.7440000000000002e-06, "loss": 19.9044, "step": 8720 }, { "epoch": 0.01763515233297107, "grad_norm": 173.92884826660156, "learning_rate": 1.746e-06, "loss": 43.3168, "step": 8730 }, { "epoch": 0.017655352965654885, "grad_norm": 310.982421875, "learning_rate": 1.7480000000000002e-06, "loss": 61.8324, "step": 8740 }, { "epoch": 0.0176755535983387, "grad_norm": 107.7964096069336, "learning_rate": 1.75e-06, "loss": 74.4783, "step": 8750 }, { "epoch": 0.017695754231022515, "grad_norm": 328.3822326660156, "learning_rate": 1.7520000000000001e-06, "loss": 46.7681, "step": 8760 }, { "epoch": 0.01771595486370633, "grad_norm": 255.00950622558594, "learning_rate": 1.754e-06, "loss": 45.994, "step": 8770 }, { "epoch": 0.017736155496390148, "grad_norm": 274.7867431640625, "learning_rate": 1.7560000000000002e-06, "loss": 30.315, "step": 8780 }, { "epoch": 0.017756356129073964, "grad_norm": 163.5213165283203, "learning_rate": 1.7580000000000004e-06, "loss": 68.677, "step": 8790 }, { "epoch": 0.017776556761757777, "grad_norm": 175.73757934570312, "learning_rate": 1.76e-06, "loss": 59.5222, "step": 8800 }, { "epoch": 0.017796757394441594, "grad_norm": 172.07005310058594, "learning_rate": 1.762e-06, "loss": 45.3956, "step": 8810 }, { "epoch": 0.01781695802712541, "grad_norm": 266.42950439453125, "learning_rate": 1.7640000000000002e-06, "loss": 47.1464, "step": 8820 }, { "epoch": 0.017837158659809227, "grad_norm": 237.3404541015625, "learning_rate": 1.7660000000000001e-06, "loss": 35.751, "step": 8830 }, { "epoch": 0.01785735929249304, "grad_norm": 530.183837890625, "learning_rate": 1.7680000000000003e-06, "loss": 46.7701, "step": 8840 }, { "epoch": 0.017877559925176856, "grad_norm": 373.7405090332031, "learning_rate": 1.77e-06, "loss": 63.2638, "step": 8850 }, { "epoch": 0.017897760557860673, "grad_norm": 266.2484436035156, "learning_rate": 1.7720000000000001e-06, "loss": 32.2605, "step": 8860 }, { "epoch": 0.01791796119054449, "grad_norm": 236.54971313476562, "learning_rate": 1.774e-06, "loss": 47.3467, "step": 8870 }, { "epoch": 0.017938161823228303, "grad_norm": 500.63543701171875, "learning_rate": 1.7760000000000002e-06, "loss": 38.4035, "step": 8880 }, { "epoch": 0.01795836245591212, "grad_norm": 306.7944030761719, "learning_rate": 1.7780000000000004e-06, "loss": 44.2697, "step": 8890 }, { "epoch": 0.017978563088595936, "grad_norm": 207.76585388183594, "learning_rate": 1.7800000000000001e-06, "loss": 37.9325, "step": 8900 }, { "epoch": 0.017998763721279752, "grad_norm": 360.8248291015625, "learning_rate": 1.782e-06, "loss": 39.0374, "step": 8910 }, { "epoch": 0.018018964353963565, "grad_norm": 225.53347778320312, "learning_rate": 1.7840000000000002e-06, "loss": 62.5793, "step": 8920 }, { "epoch": 0.018039164986647382, "grad_norm": 456.56378173828125, "learning_rate": 1.7860000000000001e-06, "loss": 54.6402, "step": 8930 }, { "epoch": 0.0180593656193312, "grad_norm": 494.0373840332031, "learning_rate": 1.788e-06, "loss": 39.5546, "step": 8940 }, { "epoch": 0.018079566252015015, "grad_norm": 70.10115051269531, "learning_rate": 1.79e-06, "loss": 41.9712, "step": 8950 }, { "epoch": 0.018099766884698828, "grad_norm": 400.0278625488281, "learning_rate": 1.7920000000000002e-06, "loss": 66.9228, "step": 8960 }, { "epoch": 0.018119967517382644, "grad_norm": 791.7115478515625, "learning_rate": 1.794e-06, "loss": 45.3158, "step": 8970 }, { "epoch": 0.01814016815006646, "grad_norm": 282.29608154296875, "learning_rate": 1.7960000000000003e-06, "loss": 39.976, "step": 8980 }, { "epoch": 0.018160368782750277, "grad_norm": 830.5479736328125, "learning_rate": 1.798e-06, "loss": 50.4328, "step": 8990 }, { "epoch": 0.01818056941543409, "grad_norm": 152.0670166015625, "learning_rate": 1.8000000000000001e-06, "loss": 45.7134, "step": 9000 }, { "epoch": 0.018200770048117907, "grad_norm": 356.0193176269531, "learning_rate": 1.802e-06, "loss": 50.7072, "step": 9010 }, { "epoch": 0.018220970680801724, "grad_norm": 507.36407470703125, "learning_rate": 1.8040000000000002e-06, "loss": 42.5805, "step": 9020 }, { "epoch": 0.01824117131348554, "grad_norm": 148.1385040283203, "learning_rate": 1.8060000000000002e-06, "loss": 39.2019, "step": 9030 }, { "epoch": 0.018261371946169353, "grad_norm": 202.6971893310547, "learning_rate": 1.808e-06, "loss": 43.0697, "step": 9040 }, { "epoch": 0.01828157257885317, "grad_norm": 360.75933837890625, "learning_rate": 1.81e-06, "loss": 37.975, "step": 9050 }, { "epoch": 0.018301773211536986, "grad_norm": 467.71026611328125, "learning_rate": 1.8120000000000002e-06, "loss": 53.9012, "step": 9060 }, { "epoch": 0.018321973844220803, "grad_norm": 324.80096435546875, "learning_rate": 1.8140000000000001e-06, "loss": 58.478, "step": 9070 }, { "epoch": 0.018342174476904616, "grad_norm": 256.92626953125, "learning_rate": 1.8160000000000003e-06, "loss": 22.6759, "step": 9080 }, { "epoch": 0.018362375109588432, "grad_norm": 221.60714721679688, "learning_rate": 1.818e-06, "loss": 70.4228, "step": 9090 }, { "epoch": 0.01838257574227225, "grad_norm": 140.0928192138672, "learning_rate": 1.8200000000000002e-06, "loss": 77.8925, "step": 9100 }, { "epoch": 0.018402776374956065, "grad_norm": 230.2388458251953, "learning_rate": 1.822e-06, "loss": 42.5445, "step": 9110 }, { "epoch": 0.01842297700763988, "grad_norm": 558.058349609375, "learning_rate": 1.8240000000000002e-06, "loss": 91.0409, "step": 9120 }, { "epoch": 0.018443177640323695, "grad_norm": 0.0, "learning_rate": 1.8260000000000002e-06, "loss": 39.4935, "step": 9130 }, { "epoch": 0.01846337827300751, "grad_norm": 338.200439453125, "learning_rate": 1.8280000000000001e-06, "loss": 52.3009, "step": 9140 }, { "epoch": 0.018483578905691325, "grad_norm": 445.2908935546875, "learning_rate": 1.83e-06, "loss": 44.6936, "step": 9150 }, { "epoch": 0.01850377953837514, "grad_norm": 329.6321105957031, "learning_rate": 1.8320000000000002e-06, "loss": 31.597, "step": 9160 }, { "epoch": 0.018523980171058958, "grad_norm": 255.5352020263672, "learning_rate": 1.8340000000000001e-06, "loss": 50.9383, "step": 9170 }, { "epoch": 0.018544180803742774, "grad_norm": 91.73799133300781, "learning_rate": 1.8360000000000003e-06, "loss": 50.919, "step": 9180 }, { "epoch": 0.018564381436426587, "grad_norm": 530.8141479492188, "learning_rate": 1.838e-06, "loss": 51.3711, "step": 9190 }, { "epoch": 0.018584582069110404, "grad_norm": 295.52374267578125, "learning_rate": 1.8400000000000002e-06, "loss": 78.0878, "step": 9200 }, { "epoch": 0.01860478270179422, "grad_norm": 371.4159240722656, "learning_rate": 1.8420000000000001e-06, "loss": 56.7314, "step": 9210 }, { "epoch": 0.018624983334478037, "grad_norm": 103.23512268066406, "learning_rate": 1.8440000000000003e-06, "loss": 34.3231, "step": 9220 }, { "epoch": 0.01864518396716185, "grad_norm": 320.5956115722656, "learning_rate": 1.846e-06, "loss": 40.2181, "step": 9230 }, { "epoch": 0.018665384599845666, "grad_norm": 59.08152389526367, "learning_rate": 1.8480000000000001e-06, "loss": 30.9277, "step": 9240 }, { "epoch": 0.018685585232529483, "grad_norm": 439.66595458984375, "learning_rate": 1.85e-06, "loss": 38.5015, "step": 9250 }, { "epoch": 0.0187057858652133, "grad_norm": 220.00411987304688, "learning_rate": 1.8520000000000002e-06, "loss": 36.6626, "step": 9260 }, { "epoch": 0.018725986497897112, "grad_norm": 226.76979064941406, "learning_rate": 1.8540000000000002e-06, "loss": 28.3686, "step": 9270 }, { "epoch": 0.01874618713058093, "grad_norm": 500.39068603515625, "learning_rate": 1.856e-06, "loss": 44.2918, "step": 9280 }, { "epoch": 0.018766387763264746, "grad_norm": 486.3710632324219, "learning_rate": 1.858e-06, "loss": 24.5672, "step": 9290 }, { "epoch": 0.018786588395948562, "grad_norm": 565.016357421875, "learning_rate": 1.8600000000000002e-06, "loss": 59.8933, "step": 9300 }, { "epoch": 0.018806789028632375, "grad_norm": 516.0799560546875, "learning_rate": 1.8620000000000001e-06, "loss": 71.5785, "step": 9310 }, { "epoch": 0.01882698966131619, "grad_norm": 382.5215148925781, "learning_rate": 1.8640000000000003e-06, "loss": 53.8169, "step": 9320 }, { "epoch": 0.018847190294000008, "grad_norm": 320.8269958496094, "learning_rate": 1.866e-06, "loss": 57.8409, "step": 9330 }, { "epoch": 0.018867390926683825, "grad_norm": 162.46218872070312, "learning_rate": 1.8680000000000002e-06, "loss": 63.3945, "step": 9340 }, { "epoch": 0.018887591559367638, "grad_norm": 79.13054656982422, "learning_rate": 1.87e-06, "loss": 22.84, "step": 9350 }, { "epoch": 0.018907792192051454, "grad_norm": 263.0729064941406, "learning_rate": 1.8720000000000002e-06, "loss": 37.4684, "step": 9360 }, { "epoch": 0.01892799282473527, "grad_norm": 380.9259338378906, "learning_rate": 1.8740000000000002e-06, "loss": 57.6389, "step": 9370 }, { "epoch": 0.018948193457419087, "grad_norm": 776.1428833007812, "learning_rate": 1.8760000000000001e-06, "loss": 33.6125, "step": 9380 }, { "epoch": 0.0189683940901029, "grad_norm": 129.7312774658203, "learning_rate": 1.878e-06, "loss": 29.1419, "step": 9390 }, { "epoch": 0.018988594722786717, "grad_norm": 584.4346923828125, "learning_rate": 1.8800000000000002e-06, "loss": 66.9572, "step": 9400 }, { "epoch": 0.019008795355470533, "grad_norm": 269.2972106933594, "learning_rate": 1.8820000000000001e-06, "loss": 58.4688, "step": 9410 }, { "epoch": 0.01902899598815435, "grad_norm": 82.5880355834961, "learning_rate": 1.8840000000000003e-06, "loss": 45.3889, "step": 9420 }, { "epoch": 0.019049196620838163, "grad_norm": 291.2400207519531, "learning_rate": 1.886e-06, "loss": 54.5736, "step": 9430 }, { "epoch": 0.01906939725352198, "grad_norm": 3058.456298828125, "learning_rate": 1.8880000000000002e-06, "loss": 54.1991, "step": 9440 }, { "epoch": 0.019089597886205796, "grad_norm": 136.60354614257812, "learning_rate": 1.8900000000000001e-06, "loss": 32.779, "step": 9450 }, { "epoch": 0.019109798518889613, "grad_norm": 356.87255859375, "learning_rate": 1.8920000000000003e-06, "loss": 35.1289, "step": 9460 }, { "epoch": 0.019129999151573426, "grad_norm": 363.86920166015625, "learning_rate": 1.8940000000000002e-06, "loss": 48.3396, "step": 9470 }, { "epoch": 0.019150199784257242, "grad_norm": 406.7328796386719, "learning_rate": 1.8960000000000001e-06, "loss": 48.6558, "step": 9480 }, { "epoch": 0.01917040041694106, "grad_norm": 1153.3697509765625, "learning_rate": 1.898e-06, "loss": 63.5402, "step": 9490 }, { "epoch": 0.019190601049624875, "grad_norm": 310.006103515625, "learning_rate": 1.9000000000000002e-06, "loss": 48.6226, "step": 9500 }, { "epoch": 0.01921080168230869, "grad_norm": 129.1284942626953, "learning_rate": 1.9020000000000002e-06, "loss": 30.8695, "step": 9510 }, { "epoch": 0.019231002314992505, "grad_norm": 215.62721252441406, "learning_rate": 1.9040000000000003e-06, "loss": 65.9087, "step": 9520 }, { "epoch": 0.01925120294767632, "grad_norm": 212.12661743164062, "learning_rate": 1.906e-06, "loss": 49.643, "step": 9530 }, { "epoch": 0.019271403580360138, "grad_norm": 208.60308837890625, "learning_rate": 1.908e-06, "loss": 42.0915, "step": 9540 }, { "epoch": 0.01929160421304395, "grad_norm": 361.6742858886719, "learning_rate": 1.9100000000000003e-06, "loss": 41.3087, "step": 9550 }, { "epoch": 0.019311804845727767, "grad_norm": 513.896484375, "learning_rate": 1.912e-06, "loss": 52.231, "step": 9560 }, { "epoch": 0.019332005478411584, "grad_norm": 157.21034240722656, "learning_rate": 1.9140000000000002e-06, "loss": 27.2277, "step": 9570 }, { "epoch": 0.0193522061110954, "grad_norm": 114.7160873413086, "learning_rate": 1.916e-06, "loss": 36.5882, "step": 9580 }, { "epoch": 0.019372406743779214, "grad_norm": 0.0, "learning_rate": 1.918e-06, "loss": 52.1878, "step": 9590 }, { "epoch": 0.01939260737646303, "grad_norm": 1020.5518798828125, "learning_rate": 1.9200000000000003e-06, "loss": 27.813, "step": 9600 }, { "epoch": 0.019412808009146847, "grad_norm": 434.9315185546875, "learning_rate": 1.9220000000000004e-06, "loss": 35.4451, "step": 9610 }, { "epoch": 0.019433008641830663, "grad_norm": 1793.9923095703125, "learning_rate": 1.924e-06, "loss": 29.0849, "step": 9620 }, { "epoch": 0.019453209274514476, "grad_norm": 163.08656311035156, "learning_rate": 1.9260000000000003e-06, "loss": 100.4541, "step": 9630 }, { "epoch": 0.019473409907198293, "grad_norm": 378.39068603515625, "learning_rate": 1.928e-06, "loss": 34.0765, "step": 9640 }, { "epoch": 0.01949361053988211, "grad_norm": 79.46031188964844, "learning_rate": 1.93e-06, "loss": 44.4575, "step": 9650 }, { "epoch": 0.019513811172565926, "grad_norm": 582.2685546875, "learning_rate": 1.9320000000000003e-06, "loss": 50.456, "step": 9660 }, { "epoch": 0.01953401180524974, "grad_norm": 202.60601806640625, "learning_rate": 1.934e-06, "loss": 42.0544, "step": 9670 }, { "epoch": 0.019554212437933555, "grad_norm": 574.6217651367188, "learning_rate": 1.936e-06, "loss": 31.1172, "step": 9680 }, { "epoch": 0.019574413070617372, "grad_norm": 352.7018127441406, "learning_rate": 1.9380000000000003e-06, "loss": 60.0929, "step": 9690 }, { "epoch": 0.01959461370330119, "grad_norm": 324.1878662109375, "learning_rate": 1.94e-06, "loss": 73.8136, "step": 9700 }, { "epoch": 0.019614814335985, "grad_norm": 272.65777587890625, "learning_rate": 1.942e-06, "loss": 35.1319, "step": 9710 }, { "epoch": 0.019635014968668818, "grad_norm": 37.96261215209961, "learning_rate": 1.944e-06, "loss": 24.9621, "step": 9720 }, { "epoch": 0.019655215601352635, "grad_norm": 821.6961059570312, "learning_rate": 1.946e-06, "loss": 47.0221, "step": 9730 }, { "epoch": 0.01967541623403645, "grad_norm": 972.8956909179688, "learning_rate": 1.9480000000000002e-06, "loss": 70.0484, "step": 9740 }, { "epoch": 0.019695616866720264, "grad_norm": 270.1746520996094, "learning_rate": 1.9500000000000004e-06, "loss": 27.4442, "step": 9750 }, { "epoch": 0.01971581749940408, "grad_norm": 163.73524475097656, "learning_rate": 1.952e-06, "loss": 39.364, "step": 9760 }, { "epoch": 0.019736018132087897, "grad_norm": 186.28646850585938, "learning_rate": 1.9540000000000003e-06, "loss": 45.451, "step": 9770 }, { "epoch": 0.019756218764771714, "grad_norm": 136.76901245117188, "learning_rate": 1.956e-06, "loss": 26.4623, "step": 9780 }, { "epoch": 0.019776419397455527, "grad_norm": 232.40606689453125, "learning_rate": 1.958e-06, "loss": 76.0848, "step": 9790 }, { "epoch": 0.019796620030139343, "grad_norm": 181.39744567871094, "learning_rate": 1.9600000000000003e-06, "loss": 61.2081, "step": 9800 }, { "epoch": 0.01981682066282316, "grad_norm": 510.6937561035156, "learning_rate": 1.9620000000000004e-06, "loss": 34.806, "step": 9810 }, { "epoch": 0.019837021295506976, "grad_norm": 470.3630065917969, "learning_rate": 1.964e-06, "loss": 43.0595, "step": 9820 }, { "epoch": 0.01985722192819079, "grad_norm": 381.4649658203125, "learning_rate": 1.9660000000000003e-06, "loss": 18.4898, "step": 9830 }, { "epoch": 0.019877422560874606, "grad_norm": 83.20272827148438, "learning_rate": 1.968e-06, "loss": 41.4432, "step": 9840 }, { "epoch": 0.019897623193558422, "grad_norm": 302.71673583984375, "learning_rate": 1.97e-06, "loss": 43.7179, "step": 9850 }, { "epoch": 0.01991782382624224, "grad_norm": 456.73748779296875, "learning_rate": 1.972e-06, "loss": 72.2715, "step": 9860 }, { "epoch": 0.019938024458926052, "grad_norm": 470.3398742675781, "learning_rate": 1.974e-06, "loss": 54.9278, "step": 9870 }, { "epoch": 0.01995822509160987, "grad_norm": 220.35501098632812, "learning_rate": 1.9760000000000002e-06, "loss": 65.0079, "step": 9880 }, { "epoch": 0.019978425724293685, "grad_norm": 427.4583740234375, "learning_rate": 1.9780000000000004e-06, "loss": 46.6106, "step": 9890 }, { "epoch": 0.0199986263569775, "grad_norm": 167.46343994140625, "learning_rate": 1.98e-06, "loss": 40.6886, "step": 9900 }, { "epoch": 0.020018826989661315, "grad_norm": 582.0673217773438, "learning_rate": 1.982e-06, "loss": 24.0038, "step": 9910 }, { "epoch": 0.02003902762234513, "grad_norm": 397.1203918457031, "learning_rate": 1.984e-06, "loss": 36.55, "step": 9920 }, { "epoch": 0.020059228255028948, "grad_norm": 193.5870361328125, "learning_rate": 1.986e-06, "loss": 56.7177, "step": 9930 }, { "epoch": 0.020079428887712764, "grad_norm": 321.87945556640625, "learning_rate": 1.9880000000000003e-06, "loss": 79.4858, "step": 9940 }, { "epoch": 0.020099629520396577, "grad_norm": 209.6648712158203, "learning_rate": 1.9900000000000004e-06, "loss": 29.5404, "step": 9950 }, { "epoch": 0.020119830153080394, "grad_norm": 112.41207122802734, "learning_rate": 1.992e-06, "loss": 40.6446, "step": 9960 }, { "epoch": 0.02014003078576421, "grad_norm": 572.4705810546875, "learning_rate": 1.9940000000000003e-06, "loss": 53.9859, "step": 9970 }, { "epoch": 0.020160231418448027, "grad_norm": 223.58258056640625, "learning_rate": 1.996e-06, "loss": 51.1967, "step": 9980 }, { "epoch": 0.02018043205113184, "grad_norm": 0.0, "learning_rate": 1.998e-06, "loss": 40.1106, "step": 9990 }, { "epoch": 0.020200632683815656, "grad_norm": 428.3232421875, "learning_rate": 2.0000000000000003e-06, "loss": 34.0316, "step": 10000 }, { "epoch": 0.020220833316499473, "grad_norm": 295.87774658203125, "learning_rate": 2.002e-06, "loss": 34.769, "step": 10010 }, { "epoch": 0.02024103394918329, "grad_norm": 99.37332153320312, "learning_rate": 2.004e-06, "loss": 52.353, "step": 10020 }, { "epoch": 0.020261234581867103, "grad_norm": 432.0141296386719, "learning_rate": 2.0060000000000004e-06, "loss": 32.164, "step": 10030 }, { "epoch": 0.02028143521455092, "grad_norm": 176.73822021484375, "learning_rate": 2.008e-06, "loss": 25.2481, "step": 10040 }, { "epoch": 0.020301635847234736, "grad_norm": 205.23367309570312, "learning_rate": 2.0100000000000002e-06, "loss": 46.4443, "step": 10050 }, { "epoch": 0.020321836479918552, "grad_norm": 587.490966796875, "learning_rate": 2.012e-06, "loss": 63.2728, "step": 10060 }, { "epoch": 0.020342037112602365, "grad_norm": 278.8817138671875, "learning_rate": 2.014e-06, "loss": 53.831, "step": 10070 }, { "epoch": 0.020362237745286182, "grad_norm": 488.0081787109375, "learning_rate": 2.0160000000000003e-06, "loss": 53.8497, "step": 10080 }, { "epoch": 0.02038243837797, "grad_norm": 141.2911376953125, "learning_rate": 2.0180000000000004e-06, "loss": 25.1832, "step": 10090 }, { "epoch": 0.020402639010653815, "grad_norm": 338.76025390625, "learning_rate": 2.02e-06, "loss": 56.9978, "step": 10100 }, { "epoch": 0.020422839643337628, "grad_norm": 610.8556518554688, "learning_rate": 2.022e-06, "loss": 76.4532, "step": 10110 }, { "epoch": 0.020443040276021444, "grad_norm": 0.0, "learning_rate": 2.024e-06, "loss": 54.1623, "step": 10120 }, { "epoch": 0.02046324090870526, "grad_norm": 151.93629455566406, "learning_rate": 2.026e-06, "loss": 34.2133, "step": 10130 }, { "epoch": 0.020483441541389077, "grad_norm": 532.8419799804688, "learning_rate": 2.0280000000000003e-06, "loss": 37.7632, "step": 10140 }, { "epoch": 0.02050364217407289, "grad_norm": 102.22826385498047, "learning_rate": 2.0300000000000005e-06, "loss": 55.4871, "step": 10150 }, { "epoch": 0.020523842806756707, "grad_norm": 275.99969482421875, "learning_rate": 2.032e-06, "loss": 56.1115, "step": 10160 }, { "epoch": 0.020544043439440524, "grad_norm": 234.353271484375, "learning_rate": 2.0340000000000003e-06, "loss": 69.768, "step": 10170 }, { "epoch": 0.02056424407212434, "grad_norm": 200.06785583496094, "learning_rate": 2.036e-06, "loss": 59.5012, "step": 10180 }, { "epoch": 0.020584444704808153, "grad_norm": 564.83447265625, "learning_rate": 2.0380000000000002e-06, "loss": 69.7025, "step": 10190 }, { "epoch": 0.02060464533749197, "grad_norm": 219.81317138671875, "learning_rate": 2.04e-06, "loss": 21.419, "step": 10200 }, { "epoch": 0.020624845970175786, "grad_norm": 97.41570281982422, "learning_rate": 2.042e-06, "loss": 33.3034, "step": 10210 }, { "epoch": 0.020645046602859603, "grad_norm": 287.6110534667969, "learning_rate": 2.0440000000000003e-06, "loss": 75.8817, "step": 10220 }, { "epoch": 0.020665247235543416, "grad_norm": 204.6526336669922, "learning_rate": 2.0460000000000004e-06, "loss": 25.8786, "step": 10230 }, { "epoch": 0.020685447868227232, "grad_norm": 218.83187866210938, "learning_rate": 2.048e-06, "loss": 56.8296, "step": 10240 }, { "epoch": 0.02070564850091105, "grad_norm": 420.64398193359375, "learning_rate": 2.05e-06, "loss": 30.5088, "step": 10250 }, { "epoch": 0.020725849133594865, "grad_norm": 391.420654296875, "learning_rate": 2.052e-06, "loss": 40.9669, "step": 10260 }, { "epoch": 0.02074604976627868, "grad_norm": 182.8827362060547, "learning_rate": 2.054e-06, "loss": 45.7673, "step": 10270 }, { "epoch": 0.020766250398962495, "grad_norm": 268.2335205078125, "learning_rate": 2.0560000000000003e-06, "loss": 30.8503, "step": 10280 }, { "epoch": 0.02078645103164631, "grad_norm": 258.5160827636719, "learning_rate": 2.0580000000000005e-06, "loss": 28.1244, "step": 10290 }, { "epoch": 0.020806651664330128, "grad_norm": 645.3989868164062, "learning_rate": 2.06e-06, "loss": 40.2535, "step": 10300 }, { "epoch": 0.02082685229701394, "grad_norm": 150.24166870117188, "learning_rate": 2.062e-06, "loss": 50.1544, "step": 10310 }, { "epoch": 0.020847052929697758, "grad_norm": 149.16676330566406, "learning_rate": 2.064e-06, "loss": 35.9737, "step": 10320 }, { "epoch": 0.020867253562381574, "grad_norm": 98.6734848022461, "learning_rate": 2.066e-06, "loss": 39.077, "step": 10330 }, { "epoch": 0.02088745419506539, "grad_norm": 347.6166687011719, "learning_rate": 2.0680000000000004e-06, "loss": 59.9156, "step": 10340 }, { "epoch": 0.020907654827749204, "grad_norm": 369.2608642578125, "learning_rate": 2.07e-06, "loss": 54.759, "step": 10350 }, { "epoch": 0.02092785546043302, "grad_norm": 418.8649597167969, "learning_rate": 2.0720000000000002e-06, "loss": 36.2663, "step": 10360 }, { "epoch": 0.020948056093116837, "grad_norm": 260.66864013671875, "learning_rate": 2.0740000000000004e-06, "loss": 60.89, "step": 10370 }, { "epoch": 0.020968256725800653, "grad_norm": 320.0971374511719, "learning_rate": 2.076e-06, "loss": 39.269, "step": 10380 }, { "epoch": 0.020988457358484466, "grad_norm": 321.69744873046875, "learning_rate": 2.0780000000000003e-06, "loss": 59.6879, "step": 10390 }, { "epoch": 0.021008657991168283, "grad_norm": 260.941650390625, "learning_rate": 2.08e-06, "loss": 41.2504, "step": 10400 }, { "epoch": 0.0210288586238521, "grad_norm": 898.9729614257812, "learning_rate": 2.082e-06, "loss": 44.8987, "step": 10410 }, { "epoch": 0.021049059256535916, "grad_norm": 362.7566223144531, "learning_rate": 2.0840000000000003e-06, "loss": 49.0057, "step": 10420 }, { "epoch": 0.02106925988921973, "grad_norm": 0.0, "learning_rate": 2.0860000000000004e-06, "loss": 13.9838, "step": 10430 }, { "epoch": 0.021089460521903546, "grad_norm": 219.5571746826172, "learning_rate": 2.088e-06, "loss": 64.3665, "step": 10440 }, { "epoch": 0.021109661154587362, "grad_norm": 261.9483947753906, "learning_rate": 2.09e-06, "loss": 34.239, "step": 10450 }, { "epoch": 0.02112986178727118, "grad_norm": 152.23330688476562, "learning_rate": 2.092e-06, "loss": 68.9592, "step": 10460 }, { "epoch": 0.02115006241995499, "grad_norm": 668.8399658203125, "learning_rate": 2.094e-06, "loss": 73.6005, "step": 10470 }, { "epoch": 0.021170263052638808, "grad_norm": 359.2843322753906, "learning_rate": 2.0960000000000003e-06, "loss": 56.6298, "step": 10480 }, { "epoch": 0.021190463685322625, "grad_norm": 266.23931884765625, "learning_rate": 2.098e-06, "loss": 33.1824, "step": 10490 }, { "epoch": 0.02121066431800644, "grad_norm": 362.0553283691406, "learning_rate": 2.1000000000000002e-06, "loss": 76.1807, "step": 10500 }, { "epoch": 0.021230864950690254, "grad_norm": 602.5369873046875, "learning_rate": 2.102e-06, "loss": 34.5589, "step": 10510 }, { "epoch": 0.02125106558337407, "grad_norm": 640.1030883789062, "learning_rate": 2.104e-06, "loss": 57.8576, "step": 10520 }, { "epoch": 0.021271266216057887, "grad_norm": 205.66590881347656, "learning_rate": 2.1060000000000002e-06, "loss": 22.8045, "step": 10530 }, { "epoch": 0.021291466848741704, "grad_norm": 349.5789489746094, "learning_rate": 2.108e-06, "loss": 68.5073, "step": 10540 }, { "epoch": 0.021311667481425517, "grad_norm": 423.2580261230469, "learning_rate": 2.11e-06, "loss": 47.9727, "step": 10550 }, { "epoch": 0.021331868114109333, "grad_norm": 137.4739227294922, "learning_rate": 2.1120000000000003e-06, "loss": 63.5778, "step": 10560 }, { "epoch": 0.02135206874679315, "grad_norm": 322.4593505859375, "learning_rate": 2.1140000000000004e-06, "loss": 32.455, "step": 10570 }, { "epoch": 0.021372269379476967, "grad_norm": 453.06103515625, "learning_rate": 2.116e-06, "loss": 73.3719, "step": 10580 }, { "epoch": 0.02139247001216078, "grad_norm": 618.5270385742188, "learning_rate": 2.118e-06, "loss": 45.3458, "step": 10590 }, { "epoch": 0.021412670644844596, "grad_norm": 159.2731475830078, "learning_rate": 2.12e-06, "loss": 53.7363, "step": 10600 }, { "epoch": 0.021432871277528413, "grad_norm": 232.80833435058594, "learning_rate": 2.122e-06, "loss": 67.0886, "step": 10610 }, { "epoch": 0.02145307191021223, "grad_norm": 751.0171508789062, "learning_rate": 2.1240000000000003e-06, "loss": 49.5216, "step": 10620 }, { "epoch": 0.021473272542896042, "grad_norm": 177.41848754882812, "learning_rate": 2.1260000000000005e-06, "loss": 34.3979, "step": 10630 }, { "epoch": 0.02149347317557986, "grad_norm": 353.36212158203125, "learning_rate": 2.128e-06, "loss": 50.934, "step": 10640 }, { "epoch": 0.021513673808263675, "grad_norm": 282.5599060058594, "learning_rate": 2.13e-06, "loss": 45.4779, "step": 10650 }, { "epoch": 0.021533874440947492, "grad_norm": 263.4095764160156, "learning_rate": 2.132e-06, "loss": 62.4613, "step": 10660 }, { "epoch": 0.021554075073631305, "grad_norm": 144.73733520507812, "learning_rate": 2.1340000000000002e-06, "loss": 40.568, "step": 10670 }, { "epoch": 0.02157427570631512, "grad_norm": 194.8964080810547, "learning_rate": 2.1360000000000004e-06, "loss": 44.4007, "step": 10680 }, { "epoch": 0.021594476338998938, "grad_norm": 255.43075561523438, "learning_rate": 2.138e-06, "loss": 49.0514, "step": 10690 }, { "epoch": 0.021614676971682754, "grad_norm": 237.04420471191406, "learning_rate": 2.1400000000000003e-06, "loss": 43.1618, "step": 10700 }, { "epoch": 0.021634877604366567, "grad_norm": 464.1810302734375, "learning_rate": 2.142e-06, "loss": 71.1446, "step": 10710 }, { "epoch": 0.021655078237050384, "grad_norm": 101.17876434326172, "learning_rate": 2.144e-06, "loss": 59.5601, "step": 10720 }, { "epoch": 0.0216752788697342, "grad_norm": 790.9976806640625, "learning_rate": 2.1460000000000003e-06, "loss": 48.3918, "step": 10730 }, { "epoch": 0.021695479502418017, "grad_norm": 182.5692901611328, "learning_rate": 2.148e-06, "loss": 56.1286, "step": 10740 }, { "epoch": 0.02171568013510183, "grad_norm": 371.9630432128906, "learning_rate": 2.15e-06, "loss": 50.3102, "step": 10750 }, { "epoch": 0.021735880767785647, "grad_norm": 190.81410217285156, "learning_rate": 2.1520000000000003e-06, "loss": 55.3936, "step": 10760 }, { "epoch": 0.021756081400469463, "grad_norm": 192.5929412841797, "learning_rate": 2.1540000000000005e-06, "loss": 46.2463, "step": 10770 }, { "epoch": 0.02177628203315328, "grad_norm": 376.2552185058594, "learning_rate": 2.156e-06, "loss": 63.2759, "step": 10780 }, { "epoch": 0.021796482665837093, "grad_norm": 238.96560668945312, "learning_rate": 2.158e-06, "loss": 76.4359, "step": 10790 }, { "epoch": 0.02181668329852091, "grad_norm": 586.9270629882812, "learning_rate": 2.16e-06, "loss": 59.5129, "step": 10800 }, { "epoch": 0.021836883931204726, "grad_norm": 115.39266204833984, "learning_rate": 2.1620000000000002e-06, "loss": 33.8499, "step": 10810 }, { "epoch": 0.021857084563888542, "grad_norm": 250.37428283691406, "learning_rate": 2.1640000000000004e-06, "loss": 50.3432, "step": 10820 }, { "epoch": 0.021877285196572355, "grad_norm": 188.4289093017578, "learning_rate": 2.166e-06, "loss": 40.697, "step": 10830 }, { "epoch": 0.021897485829256172, "grad_norm": 452.99798583984375, "learning_rate": 2.1680000000000002e-06, "loss": 73.5972, "step": 10840 }, { "epoch": 0.02191768646193999, "grad_norm": 196.8468780517578, "learning_rate": 2.17e-06, "loss": 27.6284, "step": 10850 }, { "epoch": 0.021937887094623805, "grad_norm": 0.0, "learning_rate": 2.172e-06, "loss": 50.4311, "step": 10860 }, { "epoch": 0.021958087727307618, "grad_norm": 1111.32421875, "learning_rate": 2.1740000000000003e-06, "loss": 50.3253, "step": 10870 }, { "epoch": 0.021978288359991435, "grad_norm": 155.26210021972656, "learning_rate": 2.176e-06, "loss": 61.6213, "step": 10880 }, { "epoch": 0.02199848899267525, "grad_norm": 87.15393829345703, "learning_rate": 2.178e-06, "loss": 50.7253, "step": 10890 }, { "epoch": 0.022018689625359068, "grad_norm": 756.1099853515625, "learning_rate": 2.1800000000000003e-06, "loss": 78.7654, "step": 10900 }, { "epoch": 0.02203889025804288, "grad_norm": 376.6233825683594, "learning_rate": 2.182e-06, "loss": 63.6584, "step": 10910 }, { "epoch": 0.022059090890726697, "grad_norm": 341.4061584472656, "learning_rate": 2.184e-06, "loss": 65.9076, "step": 10920 }, { "epoch": 0.022079291523410514, "grad_norm": 274.8430480957031, "learning_rate": 2.186e-06, "loss": 52.9071, "step": 10930 }, { "epoch": 0.02209949215609433, "grad_norm": 603.0218505859375, "learning_rate": 2.188e-06, "loss": 35.5991, "step": 10940 }, { "epoch": 0.022119692788778143, "grad_norm": 0.0, "learning_rate": 2.19e-06, "loss": 48.9943, "step": 10950 }, { "epoch": 0.02213989342146196, "grad_norm": 609.9144897460938, "learning_rate": 2.1920000000000004e-06, "loss": 38.0919, "step": 10960 }, { "epoch": 0.022160094054145776, "grad_norm": 105.98370361328125, "learning_rate": 2.194e-06, "loss": 41.4985, "step": 10970 }, { "epoch": 0.022180294686829593, "grad_norm": 575.1429443359375, "learning_rate": 2.1960000000000002e-06, "loss": 46.5475, "step": 10980 }, { "epoch": 0.022200495319513406, "grad_norm": 583.4144287109375, "learning_rate": 2.198e-06, "loss": 67.3561, "step": 10990 }, { "epoch": 0.022220695952197222, "grad_norm": 634.2700805664062, "learning_rate": 2.2e-06, "loss": 45.7536, "step": 11000 }, { "epoch": 0.02224089658488104, "grad_norm": 81.18490600585938, "learning_rate": 2.2020000000000003e-06, "loss": 27.0009, "step": 11010 }, { "epoch": 0.022261097217564856, "grad_norm": 332.67291259765625, "learning_rate": 2.2040000000000004e-06, "loss": 53.5956, "step": 11020 }, { "epoch": 0.02228129785024867, "grad_norm": 394.4222412109375, "learning_rate": 2.206e-06, "loss": 48.1988, "step": 11030 }, { "epoch": 0.022301498482932485, "grad_norm": 176.25352478027344, "learning_rate": 2.2080000000000003e-06, "loss": 44.3855, "step": 11040 }, { "epoch": 0.0223216991156163, "grad_norm": 328.1612243652344, "learning_rate": 2.21e-06, "loss": 30.4991, "step": 11050 }, { "epoch": 0.022341899748300118, "grad_norm": 304.53131103515625, "learning_rate": 2.212e-06, "loss": 48.3507, "step": 11060 }, { "epoch": 0.02236210038098393, "grad_norm": 242.24176025390625, "learning_rate": 2.2140000000000003e-06, "loss": 47.413, "step": 11070 }, { "epoch": 0.022382301013667748, "grad_norm": 0.0, "learning_rate": 2.216e-06, "loss": 41.6289, "step": 11080 }, { "epoch": 0.022402501646351564, "grad_norm": 332.69659423828125, "learning_rate": 2.218e-06, "loss": 78.4257, "step": 11090 }, { "epoch": 0.02242270227903538, "grad_norm": 307.95697021484375, "learning_rate": 2.2200000000000003e-06, "loss": 33.0658, "step": 11100 }, { "epoch": 0.022442902911719194, "grad_norm": 338.1942138671875, "learning_rate": 2.222e-06, "loss": 27.2714, "step": 11110 }, { "epoch": 0.02246310354440301, "grad_norm": 290.9442138671875, "learning_rate": 2.2240000000000002e-06, "loss": 55.7893, "step": 11120 }, { "epoch": 0.022483304177086827, "grad_norm": 177.29563903808594, "learning_rate": 2.226e-06, "loss": 21.8646, "step": 11130 }, { "epoch": 0.022503504809770643, "grad_norm": 226.6997528076172, "learning_rate": 2.228e-06, "loss": 22.9179, "step": 11140 }, { "epoch": 0.022523705442454457, "grad_norm": 160.12940979003906, "learning_rate": 2.2300000000000002e-06, "loss": 38.8846, "step": 11150 }, { "epoch": 0.022543906075138273, "grad_norm": 73.85074615478516, "learning_rate": 2.2320000000000004e-06, "loss": 59.4805, "step": 11160 }, { "epoch": 0.02256410670782209, "grad_norm": 429.3727722167969, "learning_rate": 2.234e-06, "loss": 64.2051, "step": 11170 }, { "epoch": 0.022584307340505906, "grad_norm": 332.2426452636719, "learning_rate": 2.2360000000000003e-06, "loss": 44.1833, "step": 11180 }, { "epoch": 0.02260450797318972, "grad_norm": 464.2291564941406, "learning_rate": 2.238e-06, "loss": 36.5558, "step": 11190 }, { "epoch": 0.022624708605873536, "grad_norm": 335.1011047363281, "learning_rate": 2.24e-06, "loss": 43.435, "step": 11200 }, { "epoch": 0.022644909238557352, "grad_norm": 359.72283935546875, "learning_rate": 2.2420000000000003e-06, "loss": 26.3622, "step": 11210 }, { "epoch": 0.02266510987124117, "grad_norm": 1684.224365234375, "learning_rate": 2.244e-06, "loss": 68.1199, "step": 11220 }, { "epoch": 0.022685310503924982, "grad_norm": 143.28843688964844, "learning_rate": 2.246e-06, "loss": 43.7691, "step": 11230 }, { "epoch": 0.0227055111366088, "grad_norm": 0.0, "learning_rate": 2.2480000000000003e-06, "loss": 31.2114, "step": 11240 }, { "epoch": 0.022725711769292615, "grad_norm": 377.8698425292969, "learning_rate": 2.25e-06, "loss": 102.1561, "step": 11250 }, { "epoch": 0.02274591240197643, "grad_norm": 400.2671813964844, "learning_rate": 2.252e-06, "loss": 23.6856, "step": 11260 }, { "epoch": 0.022766113034660244, "grad_norm": 167.99783325195312, "learning_rate": 2.254e-06, "loss": 38.0844, "step": 11270 }, { "epoch": 0.02278631366734406, "grad_norm": 129.82763671875, "learning_rate": 2.256e-06, "loss": 58.0657, "step": 11280 }, { "epoch": 0.022806514300027877, "grad_norm": 341.7176818847656, "learning_rate": 2.2580000000000002e-06, "loss": 52.8704, "step": 11290 }, { "epoch": 0.022826714932711694, "grad_norm": 905.6890869140625, "learning_rate": 2.2600000000000004e-06, "loss": 51.6271, "step": 11300 }, { "epoch": 0.022846915565395507, "grad_norm": 127.9675521850586, "learning_rate": 2.262e-06, "loss": 42.4492, "step": 11310 }, { "epoch": 0.022867116198079324, "grad_norm": 293.3504333496094, "learning_rate": 2.2640000000000003e-06, "loss": 59.1196, "step": 11320 }, { "epoch": 0.02288731683076314, "grad_norm": 251.96609497070312, "learning_rate": 2.266e-06, "loss": 24.466, "step": 11330 }, { "epoch": 0.022907517463446957, "grad_norm": 325.12579345703125, "learning_rate": 2.268e-06, "loss": 50.2213, "step": 11340 }, { "epoch": 0.02292771809613077, "grad_norm": 107.30098724365234, "learning_rate": 2.2700000000000003e-06, "loss": 37.4759, "step": 11350 }, { "epoch": 0.022947918728814586, "grad_norm": 84.32604217529297, "learning_rate": 2.2720000000000004e-06, "loss": 34.1462, "step": 11360 }, { "epoch": 0.022968119361498403, "grad_norm": 232.92617797851562, "learning_rate": 2.274e-06, "loss": 55.2713, "step": 11370 }, { "epoch": 0.02298831999418222, "grad_norm": 1127.71484375, "learning_rate": 2.2760000000000003e-06, "loss": 65.332, "step": 11380 }, { "epoch": 0.023008520626866032, "grad_norm": 276.7524108886719, "learning_rate": 2.278e-06, "loss": 53.6203, "step": 11390 }, { "epoch": 0.02302872125954985, "grad_norm": 206.6869659423828, "learning_rate": 2.28e-06, "loss": 31.0706, "step": 11400 }, { "epoch": 0.023048921892233665, "grad_norm": 883.7406005859375, "learning_rate": 2.282e-06, "loss": 51.9863, "step": 11410 }, { "epoch": 0.023069122524917482, "grad_norm": 282.30181884765625, "learning_rate": 2.284e-06, "loss": 35.6595, "step": 11420 }, { "epoch": 0.023089323157601295, "grad_norm": 44.85686111450195, "learning_rate": 2.2860000000000002e-06, "loss": 49.5147, "step": 11430 }, { "epoch": 0.02310952379028511, "grad_norm": 102.28099822998047, "learning_rate": 2.2880000000000004e-06, "loss": 31.908, "step": 11440 }, { "epoch": 0.023129724422968928, "grad_norm": 450.2377624511719, "learning_rate": 2.29e-06, "loss": 50.0444, "step": 11450 }, { "epoch": 0.023149925055652745, "grad_norm": 177.01107788085938, "learning_rate": 2.2920000000000002e-06, "loss": 20.6122, "step": 11460 }, { "epoch": 0.023170125688336558, "grad_norm": 474.9612121582031, "learning_rate": 2.294e-06, "loss": 43.8831, "step": 11470 }, { "epoch": 0.023190326321020374, "grad_norm": 103.68824005126953, "learning_rate": 2.296e-06, "loss": 41.9279, "step": 11480 }, { "epoch": 0.02321052695370419, "grad_norm": 429.5981140136719, "learning_rate": 2.2980000000000003e-06, "loss": 53.122, "step": 11490 }, { "epoch": 0.023230727586388007, "grad_norm": 138.7250213623047, "learning_rate": 2.3000000000000004e-06, "loss": 51.4907, "step": 11500 }, { "epoch": 0.02325092821907182, "grad_norm": 508.7729187011719, "learning_rate": 2.302e-06, "loss": 69.3198, "step": 11510 }, { "epoch": 0.023271128851755637, "grad_norm": 766.4754028320312, "learning_rate": 2.3040000000000003e-06, "loss": 70.6145, "step": 11520 }, { "epoch": 0.023291329484439453, "grad_norm": 181.5928192138672, "learning_rate": 2.306e-06, "loss": 31.484, "step": 11530 }, { "epoch": 0.02331153011712327, "grad_norm": 429.3660888671875, "learning_rate": 2.308e-06, "loss": 55.6529, "step": 11540 }, { "epoch": 0.023331730749807083, "grad_norm": 225.7996063232422, "learning_rate": 2.3100000000000003e-06, "loss": 29.2211, "step": 11550 }, { "epoch": 0.0233519313824909, "grad_norm": 138.4298095703125, "learning_rate": 2.312e-06, "loss": 38.6305, "step": 11560 }, { "epoch": 0.023372132015174716, "grad_norm": 262.80926513671875, "learning_rate": 2.314e-06, "loss": 52.4794, "step": 11570 }, { "epoch": 0.023392332647858533, "grad_norm": 100.20157623291016, "learning_rate": 2.3160000000000004e-06, "loss": 60.256, "step": 11580 }, { "epoch": 0.023412533280542346, "grad_norm": 135.98326110839844, "learning_rate": 2.318e-06, "loss": 37.8744, "step": 11590 }, { "epoch": 0.023432733913226162, "grad_norm": 144.24481201171875, "learning_rate": 2.3200000000000002e-06, "loss": 53.2419, "step": 11600 }, { "epoch": 0.02345293454590998, "grad_norm": 316.55731201171875, "learning_rate": 2.322e-06, "loss": 41.4914, "step": 11610 }, { "epoch": 0.023473135178593795, "grad_norm": 482.2180480957031, "learning_rate": 2.324e-06, "loss": 32.2248, "step": 11620 }, { "epoch": 0.023493335811277608, "grad_norm": 333.2016296386719, "learning_rate": 2.3260000000000003e-06, "loss": 43.0864, "step": 11630 }, { "epoch": 0.023513536443961425, "grad_norm": 664.9674682617188, "learning_rate": 2.3280000000000004e-06, "loss": 58.9295, "step": 11640 }, { "epoch": 0.02353373707664524, "grad_norm": 295.85565185546875, "learning_rate": 2.33e-06, "loss": 32.0055, "step": 11650 }, { "epoch": 0.023553937709329058, "grad_norm": 132.8603057861328, "learning_rate": 2.3320000000000003e-06, "loss": 34.6134, "step": 11660 }, { "epoch": 0.02357413834201287, "grad_norm": 101.38378143310547, "learning_rate": 2.334e-06, "loss": 30.0764, "step": 11670 }, { "epoch": 0.023594338974696687, "grad_norm": 271.4847106933594, "learning_rate": 2.336e-06, "loss": 54.246, "step": 11680 }, { "epoch": 0.023614539607380504, "grad_norm": 214.5764617919922, "learning_rate": 2.3380000000000003e-06, "loss": 41.0704, "step": 11690 }, { "epoch": 0.02363474024006432, "grad_norm": 362.39111328125, "learning_rate": 2.3400000000000005e-06, "loss": 82.1394, "step": 11700 }, { "epoch": 0.023654940872748133, "grad_norm": 662.4210815429688, "learning_rate": 2.342e-06, "loss": 60.6132, "step": 11710 }, { "epoch": 0.02367514150543195, "grad_norm": 629.7857055664062, "learning_rate": 2.3440000000000003e-06, "loss": 56.8645, "step": 11720 }, { "epoch": 0.023695342138115767, "grad_norm": 327.68603515625, "learning_rate": 2.346e-06, "loss": 40.2616, "step": 11730 }, { "epoch": 0.023715542770799583, "grad_norm": 552.84228515625, "learning_rate": 2.3480000000000002e-06, "loss": 51.6613, "step": 11740 }, { "epoch": 0.023735743403483396, "grad_norm": 529.7648315429688, "learning_rate": 2.35e-06, "loss": 46.0532, "step": 11750 }, { "epoch": 0.023755944036167213, "grad_norm": 64.5331802368164, "learning_rate": 2.352e-06, "loss": 35.8116, "step": 11760 }, { "epoch": 0.02377614466885103, "grad_norm": 124.65154266357422, "learning_rate": 2.3540000000000002e-06, "loss": 33.9766, "step": 11770 }, { "epoch": 0.023796345301534846, "grad_norm": 498.1767883300781, "learning_rate": 2.3560000000000004e-06, "loss": 52.5094, "step": 11780 }, { "epoch": 0.02381654593421866, "grad_norm": 1204.731201171875, "learning_rate": 2.358e-06, "loss": 73.6493, "step": 11790 }, { "epoch": 0.023836746566902475, "grad_norm": 674.3571166992188, "learning_rate": 2.3600000000000003e-06, "loss": 69.2688, "step": 11800 }, { "epoch": 0.023856947199586292, "grad_norm": 837.708984375, "learning_rate": 2.362e-06, "loss": 59.2901, "step": 11810 }, { "epoch": 0.02387714783227011, "grad_norm": 197.61724853515625, "learning_rate": 2.364e-06, "loss": 40.6122, "step": 11820 }, { "epoch": 0.02389734846495392, "grad_norm": 182.92202758789062, "learning_rate": 2.3660000000000003e-06, "loss": 25.1579, "step": 11830 }, { "epoch": 0.023917549097637738, "grad_norm": 320.63275146484375, "learning_rate": 2.3680000000000005e-06, "loss": 24.202, "step": 11840 }, { "epoch": 0.023937749730321554, "grad_norm": 217.47265625, "learning_rate": 2.37e-06, "loss": 39.2445, "step": 11850 }, { "epoch": 0.02395795036300537, "grad_norm": 267.4107666015625, "learning_rate": 2.3720000000000003e-06, "loss": 39.8156, "step": 11860 }, { "epoch": 0.023978150995689184, "grad_norm": 179.326904296875, "learning_rate": 2.374e-06, "loss": 44.9014, "step": 11870 }, { "epoch": 0.023998351628373, "grad_norm": 162.7501678466797, "learning_rate": 2.376e-06, "loss": 27.9884, "step": 11880 }, { "epoch": 0.024018552261056817, "grad_norm": 263.8769226074219, "learning_rate": 2.3780000000000004e-06, "loss": 59.1842, "step": 11890 }, { "epoch": 0.024038752893740634, "grad_norm": 280.52777099609375, "learning_rate": 2.38e-06, "loss": 32.3505, "step": 11900 }, { "epoch": 0.024058953526424447, "grad_norm": 837.8172607421875, "learning_rate": 2.3820000000000002e-06, "loss": 48.3469, "step": 11910 }, { "epoch": 0.024079154159108263, "grad_norm": 256.5196228027344, "learning_rate": 2.3840000000000004e-06, "loss": 32.892, "step": 11920 }, { "epoch": 0.02409935479179208, "grad_norm": 184.33314514160156, "learning_rate": 2.386e-06, "loss": 62.4075, "step": 11930 }, { "epoch": 0.024119555424475896, "grad_norm": 103.71321868896484, "learning_rate": 2.3880000000000003e-06, "loss": 32.6163, "step": 11940 }, { "epoch": 0.02413975605715971, "grad_norm": 422.09619140625, "learning_rate": 2.39e-06, "loss": 57.9794, "step": 11950 }, { "epoch": 0.024159956689843526, "grad_norm": 182.3944854736328, "learning_rate": 2.392e-06, "loss": 37.7552, "step": 11960 }, { "epoch": 0.024180157322527342, "grad_norm": 166.8063201904297, "learning_rate": 2.3940000000000003e-06, "loss": 22.269, "step": 11970 }, { "epoch": 0.02420035795521116, "grad_norm": 320.1501770019531, "learning_rate": 2.3960000000000004e-06, "loss": 26.1345, "step": 11980 }, { "epoch": 0.024220558587894972, "grad_norm": 384.1492919921875, "learning_rate": 2.398e-06, "loss": 46.6723, "step": 11990 }, { "epoch": 0.02424075922057879, "grad_norm": 219.89227294921875, "learning_rate": 2.4000000000000003e-06, "loss": 40.9178, "step": 12000 }, { "epoch": 0.024260959853262605, "grad_norm": 167.34783935546875, "learning_rate": 2.402e-06, "loss": 38.7879, "step": 12010 }, { "epoch": 0.02428116048594642, "grad_norm": 146.40858459472656, "learning_rate": 2.404e-06, "loss": 34.6729, "step": 12020 }, { "epoch": 0.024301361118630235, "grad_norm": 232.331787109375, "learning_rate": 2.4060000000000003e-06, "loss": 42.1024, "step": 12030 }, { "epoch": 0.02432156175131405, "grad_norm": 419.0692443847656, "learning_rate": 2.408e-06, "loss": 40.2177, "step": 12040 }, { "epoch": 0.024341762383997868, "grad_norm": 372.771240234375, "learning_rate": 2.4100000000000002e-06, "loss": 58.1593, "step": 12050 }, { "epoch": 0.024361963016681684, "grad_norm": 468.7264709472656, "learning_rate": 2.4120000000000004e-06, "loss": 39.7467, "step": 12060 }, { "epoch": 0.024382163649365497, "grad_norm": 288.8561706542969, "learning_rate": 2.414e-06, "loss": 25.8942, "step": 12070 }, { "epoch": 0.024402364282049314, "grad_norm": 179.99534606933594, "learning_rate": 2.4160000000000002e-06, "loss": 23.4446, "step": 12080 }, { "epoch": 0.02442256491473313, "grad_norm": 147.3505096435547, "learning_rate": 2.418e-06, "loss": 63.0682, "step": 12090 }, { "epoch": 0.024442765547416947, "grad_norm": 247.7600860595703, "learning_rate": 2.42e-06, "loss": 32.3235, "step": 12100 }, { "epoch": 0.02446296618010076, "grad_norm": 198.24481201171875, "learning_rate": 2.4220000000000003e-06, "loss": 48.3672, "step": 12110 }, { "epoch": 0.024483166812784576, "grad_norm": 162.43408203125, "learning_rate": 2.4240000000000004e-06, "loss": 30.9279, "step": 12120 }, { "epoch": 0.024503367445468393, "grad_norm": 73.84188842773438, "learning_rate": 2.426e-06, "loss": 49.4583, "step": 12130 }, { "epoch": 0.02452356807815221, "grad_norm": 272.1013488769531, "learning_rate": 2.428e-06, "loss": 50.8269, "step": 12140 }, { "epoch": 0.024543768710836023, "grad_norm": 484.6285095214844, "learning_rate": 2.43e-06, "loss": 37.9022, "step": 12150 }, { "epoch": 0.02456396934351984, "grad_norm": 750.84814453125, "learning_rate": 2.432e-06, "loss": 37.3827, "step": 12160 }, { "epoch": 0.024584169976203656, "grad_norm": 86.62442016601562, "learning_rate": 2.4340000000000003e-06, "loss": 60.0082, "step": 12170 }, { "epoch": 0.02460437060888747, "grad_norm": 231.9526824951172, "learning_rate": 2.4360000000000005e-06, "loss": 35.1838, "step": 12180 }, { "epoch": 0.024624571241571285, "grad_norm": 390.3502502441406, "learning_rate": 2.438e-06, "loss": 52.821, "step": 12190 }, { "epoch": 0.0246447718742551, "grad_norm": 243.09341430664062, "learning_rate": 2.4400000000000004e-06, "loss": 38.8829, "step": 12200 }, { "epoch": 0.024664972506938918, "grad_norm": 244.0282440185547, "learning_rate": 2.442e-06, "loss": 59.1749, "step": 12210 }, { "epoch": 0.02468517313962273, "grad_norm": 215.91539001464844, "learning_rate": 2.4440000000000002e-06, "loss": 42.9404, "step": 12220 }, { "epoch": 0.024705373772306548, "grad_norm": 202.6841583251953, "learning_rate": 2.4460000000000004e-06, "loss": 98.0985, "step": 12230 }, { "epoch": 0.024725574404990364, "grad_norm": 262.54583740234375, "learning_rate": 2.448e-06, "loss": 40.7682, "step": 12240 }, { "epoch": 0.02474577503767418, "grad_norm": 592.45166015625, "learning_rate": 2.4500000000000003e-06, "loss": 52.5005, "step": 12250 }, { "epoch": 0.024765975670357994, "grad_norm": 413.14630126953125, "learning_rate": 2.4520000000000004e-06, "loss": 40.7458, "step": 12260 }, { "epoch": 0.02478617630304181, "grad_norm": 544.0794677734375, "learning_rate": 2.454e-06, "loss": 36.2715, "step": 12270 }, { "epoch": 0.024806376935725627, "grad_norm": 139.6324462890625, "learning_rate": 2.4560000000000003e-06, "loss": 39.0732, "step": 12280 }, { "epoch": 0.024826577568409443, "grad_norm": 394.745849609375, "learning_rate": 2.458e-06, "loss": 59.4374, "step": 12290 }, { "epoch": 0.024846778201093257, "grad_norm": 147.4309844970703, "learning_rate": 2.46e-06, "loss": 35.3366, "step": 12300 }, { "epoch": 0.024866978833777073, "grad_norm": 192.0829620361328, "learning_rate": 2.4620000000000003e-06, "loss": 38.2872, "step": 12310 }, { "epoch": 0.02488717946646089, "grad_norm": 221.22621154785156, "learning_rate": 2.4640000000000005e-06, "loss": 51.6989, "step": 12320 }, { "epoch": 0.024907380099144706, "grad_norm": 220.3570098876953, "learning_rate": 2.466e-06, "loss": 51.1456, "step": 12330 }, { "epoch": 0.02492758073182852, "grad_norm": 204.1791229248047, "learning_rate": 2.468e-06, "loss": 39.9895, "step": 12340 }, { "epoch": 0.024947781364512336, "grad_norm": 536.9046630859375, "learning_rate": 2.47e-06, "loss": 57.7417, "step": 12350 }, { "epoch": 0.024967981997196152, "grad_norm": 102.09723663330078, "learning_rate": 2.4720000000000002e-06, "loss": 37.2909, "step": 12360 }, { "epoch": 0.02498818262987997, "grad_norm": 194.3726806640625, "learning_rate": 2.4740000000000004e-06, "loss": 53.6684, "step": 12370 }, { "epoch": 0.025008383262563782, "grad_norm": 320.6475830078125, "learning_rate": 2.476e-06, "loss": 34.7807, "step": 12380 }, { "epoch": 0.0250285838952476, "grad_norm": 561.5262451171875, "learning_rate": 2.4780000000000002e-06, "loss": 39.5933, "step": 12390 }, { "epoch": 0.025048784527931415, "grad_norm": 332.9320373535156, "learning_rate": 2.4800000000000004e-06, "loss": 70.3745, "step": 12400 }, { "epoch": 0.02506898516061523, "grad_norm": 205.99209594726562, "learning_rate": 2.482e-06, "loss": 40.2439, "step": 12410 }, { "epoch": 0.025089185793299044, "grad_norm": 0.0, "learning_rate": 2.4840000000000003e-06, "loss": 39.8004, "step": 12420 }, { "epoch": 0.02510938642598286, "grad_norm": 852.4554443359375, "learning_rate": 2.486e-06, "loss": 61.8194, "step": 12430 }, { "epoch": 0.025129587058666678, "grad_norm": 153.1422882080078, "learning_rate": 2.488e-06, "loss": 36.5243, "step": 12440 }, { "epoch": 0.025149787691350494, "grad_norm": 327.2110595703125, "learning_rate": 2.4900000000000003e-06, "loss": 35.6449, "step": 12450 }, { "epoch": 0.025169988324034307, "grad_norm": 77.2816162109375, "learning_rate": 2.4920000000000005e-06, "loss": 36.9364, "step": 12460 }, { "epoch": 0.025190188956718124, "grad_norm": 149.07992553710938, "learning_rate": 2.494e-06, "loss": 40.7493, "step": 12470 }, { "epoch": 0.02521038958940194, "grad_norm": 260.4246826171875, "learning_rate": 2.496e-06, "loss": 36.5645, "step": 12480 }, { "epoch": 0.025230590222085757, "grad_norm": 827.9938354492188, "learning_rate": 2.498e-06, "loss": 64.0744, "step": 12490 }, { "epoch": 0.02525079085476957, "grad_norm": 42.32184982299805, "learning_rate": 2.5e-06, "loss": 53.2336, "step": 12500 }, { "epoch": 0.025270991487453386, "grad_norm": 113.64704895019531, "learning_rate": 2.502e-06, "loss": 31.5933, "step": 12510 }, { "epoch": 0.025291192120137203, "grad_norm": 228.80465698242188, "learning_rate": 2.5040000000000005e-06, "loss": 69.6794, "step": 12520 }, { "epoch": 0.02531139275282102, "grad_norm": 290.43316650390625, "learning_rate": 2.5060000000000002e-06, "loss": 80.3926, "step": 12530 }, { "epoch": 0.025331593385504832, "grad_norm": 691.0927734375, "learning_rate": 2.5080000000000004e-06, "loss": 70.1921, "step": 12540 }, { "epoch": 0.02535179401818865, "grad_norm": 497.67596435546875, "learning_rate": 2.51e-06, "loss": 51.5074, "step": 12550 }, { "epoch": 0.025371994650872465, "grad_norm": 639.02734375, "learning_rate": 2.512e-06, "loss": 46.7611, "step": 12560 }, { "epoch": 0.025392195283556282, "grad_norm": 421.69775390625, "learning_rate": 2.5140000000000004e-06, "loss": 25.5028, "step": 12570 }, { "epoch": 0.025412395916240095, "grad_norm": 302.17926025390625, "learning_rate": 2.516e-06, "loss": 25.9738, "step": 12580 }, { "epoch": 0.02543259654892391, "grad_norm": 1800.4085693359375, "learning_rate": 2.5180000000000003e-06, "loss": 84.086, "step": 12590 }, { "epoch": 0.025452797181607728, "grad_norm": 369.5431213378906, "learning_rate": 2.52e-06, "loss": 48.0536, "step": 12600 }, { "epoch": 0.025472997814291545, "grad_norm": 195.2100067138672, "learning_rate": 2.522e-06, "loss": 33.2023, "step": 12610 }, { "epoch": 0.025493198446975358, "grad_norm": 110.87615203857422, "learning_rate": 2.5240000000000003e-06, "loss": 33.1774, "step": 12620 }, { "epoch": 0.025513399079659174, "grad_norm": 465.6731872558594, "learning_rate": 2.526e-06, "loss": 38.0672, "step": 12630 }, { "epoch": 0.02553359971234299, "grad_norm": 652.328857421875, "learning_rate": 2.5280000000000006e-06, "loss": 65.4745, "step": 12640 }, { "epoch": 0.025553800345026807, "grad_norm": 158.18020629882812, "learning_rate": 2.5300000000000003e-06, "loss": 57.1146, "step": 12650 }, { "epoch": 0.02557400097771062, "grad_norm": 688.0474853515625, "learning_rate": 2.532e-06, "loss": 65.0838, "step": 12660 }, { "epoch": 0.025594201610394437, "grad_norm": 187.83697509765625, "learning_rate": 2.5340000000000002e-06, "loss": 35.3431, "step": 12670 }, { "epoch": 0.025614402243078253, "grad_norm": 210.55734252929688, "learning_rate": 2.536e-06, "loss": 67.4747, "step": 12680 }, { "epoch": 0.02563460287576207, "grad_norm": 347.1117858886719, "learning_rate": 2.5380000000000005e-06, "loss": 63.4759, "step": 12690 }, { "epoch": 0.025654803508445883, "grad_norm": 304.1865539550781, "learning_rate": 2.5400000000000002e-06, "loss": 50.6389, "step": 12700 }, { "epoch": 0.0256750041411297, "grad_norm": 770.149658203125, "learning_rate": 2.542e-06, "loss": 34.676, "step": 12710 }, { "epoch": 0.025695204773813516, "grad_norm": 471.50360107421875, "learning_rate": 2.5440000000000005e-06, "loss": 49.6496, "step": 12720 }, { "epoch": 0.025715405406497333, "grad_norm": 415.54638671875, "learning_rate": 2.5460000000000003e-06, "loss": 35.9618, "step": 12730 }, { "epoch": 0.025735606039181146, "grad_norm": 259.369384765625, "learning_rate": 2.5480000000000004e-06, "loss": 47.4393, "step": 12740 }, { "epoch": 0.025755806671864962, "grad_norm": 183.06747436523438, "learning_rate": 2.55e-06, "loss": 63.4061, "step": 12750 }, { "epoch": 0.02577600730454878, "grad_norm": 313.0703430175781, "learning_rate": 2.552e-06, "loss": 51.2251, "step": 12760 }, { "epoch": 0.025796207937232595, "grad_norm": 428.6019287109375, "learning_rate": 2.5540000000000004e-06, "loss": 78.3767, "step": 12770 }, { "epoch": 0.025816408569916408, "grad_norm": 1129.5443115234375, "learning_rate": 2.556e-06, "loss": 65.8121, "step": 12780 }, { "epoch": 0.025836609202600225, "grad_norm": 162.56626892089844, "learning_rate": 2.5580000000000003e-06, "loss": 31.728, "step": 12790 }, { "epoch": 0.02585680983528404, "grad_norm": 126.5612564086914, "learning_rate": 2.56e-06, "loss": 19.8975, "step": 12800 }, { "epoch": 0.025877010467967858, "grad_norm": 397.88043212890625, "learning_rate": 2.562e-06, "loss": 55.4258, "step": 12810 }, { "epoch": 0.02589721110065167, "grad_norm": 143.2298583984375, "learning_rate": 2.5640000000000004e-06, "loss": 21.3907, "step": 12820 }, { "epoch": 0.025917411733335487, "grad_norm": 261.054443359375, "learning_rate": 2.566e-06, "loss": 34.9374, "step": 12830 }, { "epoch": 0.025937612366019304, "grad_norm": 527.8646240234375, "learning_rate": 2.568e-06, "loss": 41.207, "step": 12840 }, { "epoch": 0.02595781299870312, "grad_norm": 621.5526123046875, "learning_rate": 2.5700000000000004e-06, "loss": 36.6336, "step": 12850 }, { "epoch": 0.025978013631386934, "grad_norm": 140.3446044921875, "learning_rate": 2.572e-06, "loss": 49.7532, "step": 12860 }, { "epoch": 0.02599821426407075, "grad_norm": 346.9598693847656, "learning_rate": 2.5740000000000003e-06, "loss": 49.7518, "step": 12870 }, { "epoch": 0.026018414896754567, "grad_norm": 483.1822204589844, "learning_rate": 2.576e-06, "loss": 64.7486, "step": 12880 }, { "epoch": 0.026038615529438383, "grad_norm": 1136.1334228515625, "learning_rate": 2.578e-06, "loss": 72.2066, "step": 12890 }, { "epoch": 0.026058816162122196, "grad_norm": 828.27490234375, "learning_rate": 2.5800000000000003e-06, "loss": 42.903, "step": 12900 }, { "epoch": 0.026079016794806013, "grad_norm": 318.1702880859375, "learning_rate": 2.582e-06, "loss": 29.6651, "step": 12910 }, { "epoch": 0.02609921742748983, "grad_norm": 380.7034912109375, "learning_rate": 2.5840000000000006e-06, "loss": 33.1748, "step": 12920 }, { "epoch": 0.026119418060173646, "grad_norm": 126.53182220458984, "learning_rate": 2.5860000000000003e-06, "loss": 45.5106, "step": 12930 }, { "epoch": 0.02613961869285746, "grad_norm": 115.6380386352539, "learning_rate": 2.588e-06, "loss": 29.5763, "step": 12940 }, { "epoch": 0.026159819325541275, "grad_norm": 195.40853881835938, "learning_rate": 2.59e-06, "loss": 62.1918, "step": 12950 }, { "epoch": 0.026180019958225092, "grad_norm": 266.1535949707031, "learning_rate": 2.592e-06, "loss": 51.0725, "step": 12960 }, { "epoch": 0.02620022059090891, "grad_norm": 397.3973083496094, "learning_rate": 2.5940000000000005e-06, "loss": 51.1837, "step": 12970 }, { "epoch": 0.02622042122359272, "grad_norm": 377.22857666015625, "learning_rate": 2.5960000000000002e-06, "loss": 39.8632, "step": 12980 }, { "epoch": 0.026240621856276538, "grad_norm": 243.01968383789062, "learning_rate": 2.598e-06, "loss": 49.2325, "step": 12990 }, { "epoch": 0.026260822488960354, "grad_norm": 216.60301208496094, "learning_rate": 2.6e-06, "loss": 52.4981, "step": 13000 }, { "epoch": 0.02628102312164417, "grad_norm": 330.66827392578125, "learning_rate": 2.6020000000000002e-06, "loss": 49.6132, "step": 13010 }, { "epoch": 0.026301223754327984, "grad_norm": 150.80157470703125, "learning_rate": 2.6040000000000004e-06, "loss": 45.7862, "step": 13020 }, { "epoch": 0.0263214243870118, "grad_norm": 776.96923828125, "learning_rate": 2.606e-06, "loss": 50.5029, "step": 13030 }, { "epoch": 0.026341625019695617, "grad_norm": 314.0888977050781, "learning_rate": 2.608e-06, "loss": 35.3366, "step": 13040 }, { "epoch": 0.026361825652379434, "grad_norm": 644.3829956054688, "learning_rate": 2.6100000000000004e-06, "loss": 58.2894, "step": 13050 }, { "epoch": 0.026382026285063247, "grad_norm": 548.5953369140625, "learning_rate": 2.612e-06, "loss": 51.212, "step": 13060 }, { "epoch": 0.026402226917747063, "grad_norm": 233.57923889160156, "learning_rate": 2.6140000000000003e-06, "loss": 38.8444, "step": 13070 }, { "epoch": 0.02642242755043088, "grad_norm": 338.64599609375, "learning_rate": 2.616e-06, "loss": 63.3778, "step": 13080 }, { "epoch": 0.026442628183114696, "grad_norm": 1110.03955078125, "learning_rate": 2.618e-06, "loss": 27.2652, "step": 13090 }, { "epoch": 0.02646282881579851, "grad_norm": 688.3786010742188, "learning_rate": 2.6200000000000003e-06, "loss": 44.4872, "step": 13100 }, { "epoch": 0.026483029448482326, "grad_norm": 264.7286071777344, "learning_rate": 2.622e-06, "loss": 39.3549, "step": 13110 }, { "epoch": 0.026503230081166142, "grad_norm": 232.47683715820312, "learning_rate": 2.6240000000000006e-06, "loss": 36.5702, "step": 13120 }, { "epoch": 0.02652343071384996, "grad_norm": 133.58416748046875, "learning_rate": 2.6260000000000004e-06, "loss": 26.6831, "step": 13130 }, { "epoch": 0.026543631346533772, "grad_norm": 289.0917053222656, "learning_rate": 2.628e-06, "loss": 43.7781, "step": 13140 }, { "epoch": 0.02656383197921759, "grad_norm": 462.2491760253906, "learning_rate": 2.6300000000000002e-06, "loss": 52.0494, "step": 13150 }, { "epoch": 0.026584032611901405, "grad_norm": 206.20498657226562, "learning_rate": 2.632e-06, "loss": 34.422, "step": 13160 }, { "epoch": 0.02660423324458522, "grad_norm": 263.14501953125, "learning_rate": 2.6340000000000005e-06, "loss": 30.4136, "step": 13170 }, { "epoch": 0.026624433877269035, "grad_norm": 188.69781494140625, "learning_rate": 2.6360000000000003e-06, "loss": 29.0286, "step": 13180 }, { "epoch": 0.02664463450995285, "grad_norm": 272.5163269042969, "learning_rate": 2.638e-06, "loss": 43.7283, "step": 13190 }, { "epoch": 0.026664835142636668, "grad_norm": 427.29913330078125, "learning_rate": 2.64e-06, "loss": 64.6453, "step": 13200 }, { "epoch": 0.026685035775320484, "grad_norm": 537.2530517578125, "learning_rate": 2.6420000000000003e-06, "loss": 42.8637, "step": 13210 }, { "epoch": 0.026705236408004297, "grad_norm": 192.36712646484375, "learning_rate": 2.6440000000000004e-06, "loss": 58.9084, "step": 13220 }, { "epoch": 0.026725437040688114, "grad_norm": 397.050537109375, "learning_rate": 2.646e-06, "loss": 32.1473, "step": 13230 }, { "epoch": 0.02674563767337193, "grad_norm": 219.69398498535156, "learning_rate": 2.648e-06, "loss": 42.4961, "step": 13240 }, { "epoch": 0.026765838306055747, "grad_norm": 699.4884643554688, "learning_rate": 2.6500000000000005e-06, "loss": 62.0551, "step": 13250 }, { "epoch": 0.02678603893873956, "grad_norm": 62.28037643432617, "learning_rate": 2.652e-06, "loss": 58.6225, "step": 13260 }, { "epoch": 0.026806239571423376, "grad_norm": 376.9830017089844, "learning_rate": 2.6540000000000003e-06, "loss": 33.5195, "step": 13270 }, { "epoch": 0.026826440204107193, "grad_norm": 297.31134033203125, "learning_rate": 2.656e-06, "loss": 37.0247, "step": 13280 }, { "epoch": 0.02684664083679101, "grad_norm": 357.4164733886719, "learning_rate": 2.6580000000000002e-06, "loss": 36.754, "step": 13290 }, { "epoch": 0.026866841469474823, "grad_norm": 120.303466796875, "learning_rate": 2.6600000000000004e-06, "loss": 29.5279, "step": 13300 }, { "epoch": 0.02688704210215864, "grad_norm": 137.94912719726562, "learning_rate": 2.662e-06, "loss": 18.347, "step": 13310 }, { "epoch": 0.026907242734842456, "grad_norm": 337.5869445800781, "learning_rate": 2.6640000000000007e-06, "loss": 59.7195, "step": 13320 }, { "epoch": 0.026927443367526272, "grad_norm": 68.42863464355469, "learning_rate": 2.6660000000000004e-06, "loss": 30.0269, "step": 13330 }, { "epoch": 0.026947644000210085, "grad_norm": 327.6662902832031, "learning_rate": 2.668e-06, "loss": 61.589, "step": 13340 }, { "epoch": 0.0269678446328939, "grad_norm": 426.8439636230469, "learning_rate": 2.6700000000000003e-06, "loss": 33.5915, "step": 13350 }, { "epoch": 0.026988045265577718, "grad_norm": 207.46495056152344, "learning_rate": 2.672e-06, "loss": 35.1658, "step": 13360 }, { "epoch": 0.027008245898261535, "grad_norm": 77.23761749267578, "learning_rate": 2.6740000000000006e-06, "loss": 56.5197, "step": 13370 }, { "epoch": 0.027028446530945348, "grad_norm": 143.40882873535156, "learning_rate": 2.6760000000000003e-06, "loss": 33.6824, "step": 13380 }, { "epoch": 0.027048647163629164, "grad_norm": 129.73329162597656, "learning_rate": 2.678e-06, "loss": 57.8632, "step": 13390 }, { "epoch": 0.02706884779631298, "grad_norm": 508.5555725097656, "learning_rate": 2.68e-06, "loss": 63.2176, "step": 13400 }, { "epoch": 0.027089048428996797, "grad_norm": 263.2941589355469, "learning_rate": 2.6820000000000003e-06, "loss": 45.9968, "step": 13410 }, { "epoch": 0.02710924906168061, "grad_norm": 284.66729736328125, "learning_rate": 2.6840000000000005e-06, "loss": 45.1564, "step": 13420 }, { "epoch": 0.027129449694364427, "grad_norm": 310.67510986328125, "learning_rate": 2.686e-06, "loss": 38.5243, "step": 13430 }, { "epoch": 0.027149650327048244, "grad_norm": 639.0134887695312, "learning_rate": 2.688e-06, "loss": 32.2366, "step": 13440 }, { "epoch": 0.02716985095973206, "grad_norm": 413.2801513671875, "learning_rate": 2.6900000000000005e-06, "loss": 48.5381, "step": 13450 }, { "epoch": 0.027190051592415873, "grad_norm": 732.5756225585938, "learning_rate": 2.6920000000000002e-06, "loss": 73.3593, "step": 13460 }, { "epoch": 0.02721025222509969, "grad_norm": 360.3016357421875, "learning_rate": 2.694e-06, "loss": 69.0746, "step": 13470 }, { "epoch": 0.027230452857783506, "grad_norm": 252.23690795898438, "learning_rate": 2.696e-06, "loss": 20.4108, "step": 13480 }, { "epoch": 0.027250653490467323, "grad_norm": 295.58880615234375, "learning_rate": 2.6980000000000003e-06, "loss": 67.5751, "step": 13490 }, { "epoch": 0.027270854123151136, "grad_norm": 766.6262817382812, "learning_rate": 2.7000000000000004e-06, "loss": 71.6042, "step": 13500 }, { "epoch": 0.027291054755834952, "grad_norm": 520.2201538085938, "learning_rate": 2.702e-06, "loss": 42.6372, "step": 13510 }, { "epoch": 0.02731125538851877, "grad_norm": 304.60626220703125, "learning_rate": 2.704e-06, "loss": 49.5386, "step": 13520 }, { "epoch": 0.027331456021202585, "grad_norm": 546.1720581054688, "learning_rate": 2.7060000000000004e-06, "loss": 33.5799, "step": 13530 }, { "epoch": 0.0273516566538864, "grad_norm": 290.587158203125, "learning_rate": 2.708e-06, "loss": 42.5067, "step": 13540 }, { "epoch": 0.027371857286570215, "grad_norm": 387.0436706542969, "learning_rate": 2.7100000000000003e-06, "loss": 52.699, "step": 13550 }, { "epoch": 0.02739205791925403, "grad_norm": 160.3758087158203, "learning_rate": 2.712e-06, "loss": 43.6411, "step": 13560 }, { "epoch": 0.027412258551937848, "grad_norm": 710.7739868164062, "learning_rate": 2.7139999999999998e-06, "loss": 54.2144, "step": 13570 }, { "epoch": 0.02743245918462166, "grad_norm": 779.4195556640625, "learning_rate": 2.7160000000000003e-06, "loss": 39.0722, "step": 13580 }, { "epoch": 0.027452659817305478, "grad_norm": 325.6453857421875, "learning_rate": 2.718e-06, "loss": 63.6406, "step": 13590 }, { "epoch": 0.027472860449989294, "grad_norm": 817.9602661132812, "learning_rate": 2.7200000000000002e-06, "loss": 44.9397, "step": 13600 }, { "epoch": 0.02749306108267311, "grad_norm": 210.01571655273438, "learning_rate": 2.7220000000000004e-06, "loss": 47.9602, "step": 13610 }, { "epoch": 0.027513261715356924, "grad_norm": 241.36550903320312, "learning_rate": 2.724e-06, "loss": 34.7302, "step": 13620 }, { "epoch": 0.02753346234804074, "grad_norm": 545.60986328125, "learning_rate": 2.7260000000000002e-06, "loss": 43.2214, "step": 13630 }, { "epoch": 0.027553662980724557, "grad_norm": 194.1279296875, "learning_rate": 2.728e-06, "loss": 35.1627, "step": 13640 }, { "epoch": 0.027573863613408373, "grad_norm": 214.6461181640625, "learning_rate": 2.7300000000000005e-06, "loss": 36.497, "step": 13650 }, { "epoch": 0.027594064246092186, "grad_norm": 0.0, "learning_rate": 2.7320000000000003e-06, "loss": 41.2425, "step": 13660 }, { "epoch": 0.027614264878776003, "grad_norm": 578.6611328125, "learning_rate": 2.734e-06, "loss": 29.3606, "step": 13670 }, { "epoch": 0.02763446551145982, "grad_norm": 166.12017822265625, "learning_rate": 2.736e-06, "loss": 31.9724, "step": 13680 }, { "epoch": 0.027654666144143636, "grad_norm": 316.96319580078125, "learning_rate": 2.7380000000000003e-06, "loss": 50.3944, "step": 13690 }, { "epoch": 0.02767486677682745, "grad_norm": 161.17103576660156, "learning_rate": 2.7400000000000004e-06, "loss": 62.0337, "step": 13700 }, { "epoch": 0.027695067409511265, "grad_norm": 205.12124633789062, "learning_rate": 2.742e-06, "loss": 21.5958, "step": 13710 }, { "epoch": 0.027715268042195082, "grad_norm": 468.7744140625, "learning_rate": 2.744e-06, "loss": 41.5816, "step": 13720 }, { "epoch": 0.0277354686748789, "grad_norm": 446.5870361328125, "learning_rate": 2.7460000000000005e-06, "loss": 39.0956, "step": 13730 }, { "epoch": 0.02775566930756271, "grad_norm": 493.59649658203125, "learning_rate": 2.748e-06, "loss": 42.4779, "step": 13740 }, { "epoch": 0.027775869940246528, "grad_norm": 621.54931640625, "learning_rate": 2.7500000000000004e-06, "loss": 60.1891, "step": 13750 }, { "epoch": 0.027796070572930345, "grad_norm": 264.574462890625, "learning_rate": 2.752e-06, "loss": 37.5747, "step": 13760 }, { "epoch": 0.02781627120561416, "grad_norm": 222.49783325195312, "learning_rate": 2.754e-06, "loss": 83.6344, "step": 13770 }, { "epoch": 0.027836471838297974, "grad_norm": 481.7549743652344, "learning_rate": 2.7560000000000004e-06, "loss": 35.1609, "step": 13780 }, { "epoch": 0.02785667247098179, "grad_norm": 163.02459716796875, "learning_rate": 2.758e-06, "loss": 29.9774, "step": 13790 }, { "epoch": 0.027876873103665607, "grad_norm": 485.0875549316406, "learning_rate": 2.7600000000000003e-06, "loss": 37.7058, "step": 13800 }, { "epoch": 0.027897073736349424, "grad_norm": 489.6974792480469, "learning_rate": 2.7620000000000004e-06, "loss": 53.9531, "step": 13810 }, { "epoch": 0.027917274369033237, "grad_norm": 306.7885437011719, "learning_rate": 2.764e-06, "loss": 28.6377, "step": 13820 }, { "epoch": 0.027937475001717053, "grad_norm": 233.24801635742188, "learning_rate": 2.7660000000000003e-06, "loss": 51.3816, "step": 13830 }, { "epoch": 0.02795767563440087, "grad_norm": 194.1627197265625, "learning_rate": 2.768e-06, "loss": 30.7128, "step": 13840 }, { "epoch": 0.027977876267084686, "grad_norm": 219.78501892089844, "learning_rate": 2.7700000000000006e-06, "loss": 33.9265, "step": 13850 }, { "epoch": 0.0279980768997685, "grad_norm": 385.09161376953125, "learning_rate": 2.7720000000000003e-06, "loss": 33.7642, "step": 13860 }, { "epoch": 0.028018277532452316, "grad_norm": 386.9944152832031, "learning_rate": 2.774e-06, "loss": 36.7425, "step": 13870 }, { "epoch": 0.028038478165136133, "grad_norm": 374.6046142578125, "learning_rate": 2.776e-06, "loss": 41.3784, "step": 13880 }, { "epoch": 0.02805867879781995, "grad_norm": 368.2095947265625, "learning_rate": 2.7780000000000003e-06, "loss": 44.7794, "step": 13890 }, { "epoch": 0.028078879430503762, "grad_norm": 236.08596801757812, "learning_rate": 2.7800000000000005e-06, "loss": 38.6825, "step": 13900 }, { "epoch": 0.02809908006318758, "grad_norm": 155.6656494140625, "learning_rate": 2.7820000000000002e-06, "loss": 24.5149, "step": 13910 }, { "epoch": 0.028119280695871395, "grad_norm": 414.65521240234375, "learning_rate": 2.784e-06, "loss": 38.0893, "step": 13920 }, { "epoch": 0.02813948132855521, "grad_norm": 179.47329711914062, "learning_rate": 2.7860000000000005e-06, "loss": 21.6537, "step": 13930 }, { "epoch": 0.028159681961239025, "grad_norm": 0.0, "learning_rate": 2.7880000000000002e-06, "loss": 35.5274, "step": 13940 }, { "epoch": 0.02817988259392284, "grad_norm": 391.2088623046875, "learning_rate": 2.7900000000000004e-06, "loss": 54.0023, "step": 13950 }, { "epoch": 0.028200083226606658, "grad_norm": 0.0, "learning_rate": 2.792e-06, "loss": 30.6593, "step": 13960 }, { "epoch": 0.028220283859290474, "grad_norm": 697.277099609375, "learning_rate": 2.794e-06, "loss": 39.3925, "step": 13970 }, { "epoch": 0.028240484491974287, "grad_norm": 253.71261596679688, "learning_rate": 2.7960000000000004e-06, "loss": 45.516, "step": 13980 }, { "epoch": 0.028260685124658104, "grad_norm": 311.915283203125, "learning_rate": 2.798e-06, "loss": 47.509, "step": 13990 }, { "epoch": 0.02828088575734192, "grad_norm": 199.053466796875, "learning_rate": 2.8000000000000003e-06, "loss": 44.9459, "step": 14000 }, { "epoch": 0.028301086390025737, "grad_norm": 138.94992065429688, "learning_rate": 2.8020000000000004e-06, "loss": 48.0473, "step": 14010 }, { "epoch": 0.02832128702270955, "grad_norm": 343.2397766113281, "learning_rate": 2.804e-06, "loss": 37.7469, "step": 14020 }, { "epoch": 0.028341487655393367, "grad_norm": 212.633056640625, "learning_rate": 2.8060000000000003e-06, "loss": 58.752, "step": 14030 }, { "epoch": 0.028361688288077183, "grad_norm": 375.6622619628906, "learning_rate": 2.808e-06, "loss": 48.8352, "step": 14040 }, { "epoch": 0.028381888920761, "grad_norm": 192.02890014648438, "learning_rate": 2.8100000000000006e-06, "loss": 40.6832, "step": 14050 }, { "epoch": 0.028402089553444813, "grad_norm": 207.75172424316406, "learning_rate": 2.8120000000000004e-06, "loss": 25.5494, "step": 14060 }, { "epoch": 0.02842229018612863, "grad_norm": 255.52146911621094, "learning_rate": 2.814e-06, "loss": 38.2666, "step": 14070 }, { "epoch": 0.028442490818812446, "grad_norm": 401.4888610839844, "learning_rate": 2.8160000000000002e-06, "loss": 31.604, "step": 14080 }, { "epoch": 0.028462691451496262, "grad_norm": 555.644287109375, "learning_rate": 2.8180000000000004e-06, "loss": 43.2263, "step": 14090 }, { "epoch": 0.028482892084180075, "grad_norm": 273.0107727050781, "learning_rate": 2.82e-06, "loss": 52.7406, "step": 14100 }, { "epoch": 0.028503092716863892, "grad_norm": 289.6844177246094, "learning_rate": 2.8220000000000003e-06, "loss": 40.1442, "step": 14110 }, { "epoch": 0.02852329334954771, "grad_norm": 195.53314208984375, "learning_rate": 2.824e-06, "loss": 52.0327, "step": 14120 }, { "epoch": 0.028543493982231525, "grad_norm": 105.094482421875, "learning_rate": 2.8260000000000006e-06, "loss": 14.9896, "step": 14130 }, { "epoch": 0.028563694614915338, "grad_norm": 293.82733154296875, "learning_rate": 2.8280000000000003e-06, "loss": 48.3372, "step": 14140 }, { "epoch": 0.028583895247599155, "grad_norm": 220.0674285888672, "learning_rate": 2.83e-06, "loss": 59.7571, "step": 14150 }, { "epoch": 0.02860409588028297, "grad_norm": 377.78070068359375, "learning_rate": 2.832e-06, "loss": 26.5201, "step": 14160 }, { "epoch": 0.028624296512966788, "grad_norm": 126.24993133544922, "learning_rate": 2.834e-06, "loss": 43.7487, "step": 14170 }, { "epoch": 0.0286444971456506, "grad_norm": 546.8043823242188, "learning_rate": 2.8360000000000005e-06, "loss": 48.3139, "step": 14180 }, { "epoch": 0.028664697778334417, "grad_norm": 473.9862060546875, "learning_rate": 2.838e-06, "loss": 59.5939, "step": 14190 }, { "epoch": 0.028684898411018234, "grad_norm": 206.07139587402344, "learning_rate": 2.84e-06, "loss": 61.2306, "step": 14200 }, { "epoch": 0.02870509904370205, "grad_norm": 273.6217346191406, "learning_rate": 2.8420000000000005e-06, "loss": 52.4803, "step": 14210 }, { "epoch": 0.028725299676385863, "grad_norm": 273.65350341796875, "learning_rate": 2.8440000000000002e-06, "loss": 46.6362, "step": 14220 }, { "epoch": 0.02874550030906968, "grad_norm": 158.20556640625, "learning_rate": 2.8460000000000004e-06, "loss": 56.6499, "step": 14230 }, { "epoch": 0.028765700941753496, "grad_norm": 189.9134521484375, "learning_rate": 2.848e-06, "loss": 47.0693, "step": 14240 }, { "epoch": 0.028785901574437313, "grad_norm": 617.3346557617188, "learning_rate": 2.85e-06, "loss": 53.713, "step": 14250 }, { "epoch": 0.028806102207121126, "grad_norm": 384.42730712890625, "learning_rate": 2.8520000000000004e-06, "loss": 17.1577, "step": 14260 }, { "epoch": 0.028826302839804942, "grad_norm": 130.1586151123047, "learning_rate": 2.854e-06, "loss": 38.444, "step": 14270 }, { "epoch": 0.02884650347248876, "grad_norm": 185.76953125, "learning_rate": 2.8560000000000003e-06, "loss": 63.1636, "step": 14280 }, { "epoch": 0.028866704105172575, "grad_norm": 222.17724609375, "learning_rate": 2.8580000000000004e-06, "loss": 37.5885, "step": 14290 }, { "epoch": 0.02888690473785639, "grad_norm": 538.6173706054688, "learning_rate": 2.86e-06, "loss": 69.121, "step": 14300 }, { "epoch": 0.028907105370540205, "grad_norm": 256.5049743652344, "learning_rate": 2.8620000000000003e-06, "loss": 53.3776, "step": 14310 }, { "epoch": 0.02892730600322402, "grad_norm": 438.0295104980469, "learning_rate": 2.864e-06, "loss": 61.6038, "step": 14320 }, { "epoch": 0.028947506635907838, "grad_norm": 422.3350830078125, "learning_rate": 2.8660000000000006e-06, "loss": 29.7985, "step": 14330 }, { "epoch": 0.02896770726859165, "grad_norm": 372.87750244140625, "learning_rate": 2.8680000000000003e-06, "loss": 48.5137, "step": 14340 }, { "epoch": 0.028987907901275468, "grad_norm": 181.89967346191406, "learning_rate": 2.87e-06, "loss": 47.3947, "step": 14350 }, { "epoch": 0.029008108533959284, "grad_norm": 3538.191162109375, "learning_rate": 2.872e-06, "loss": 73.3289, "step": 14360 }, { "epoch": 0.0290283091666431, "grad_norm": 153.22349548339844, "learning_rate": 2.874e-06, "loss": 47.7966, "step": 14370 }, { "epoch": 0.029048509799326914, "grad_norm": 291.0286865234375, "learning_rate": 2.8760000000000005e-06, "loss": 55.1475, "step": 14380 }, { "epoch": 0.02906871043201073, "grad_norm": 407.62689208984375, "learning_rate": 2.8780000000000002e-06, "loss": 24.9843, "step": 14390 }, { "epoch": 0.029088911064694547, "grad_norm": 0.0, "learning_rate": 2.88e-06, "loss": 29.9337, "step": 14400 }, { "epoch": 0.029109111697378363, "grad_norm": 262.6056823730469, "learning_rate": 2.8820000000000005e-06, "loss": 25.8135, "step": 14410 }, { "epoch": 0.029129312330062176, "grad_norm": 607.558349609375, "learning_rate": 2.8840000000000003e-06, "loss": 37.9367, "step": 14420 }, { "epoch": 0.029149512962745993, "grad_norm": 149.4924774169922, "learning_rate": 2.8860000000000004e-06, "loss": 29.7891, "step": 14430 }, { "epoch": 0.02916971359542981, "grad_norm": 143.46609497070312, "learning_rate": 2.888e-06, "loss": 57.2059, "step": 14440 }, { "epoch": 0.029189914228113626, "grad_norm": 415.89849853515625, "learning_rate": 2.89e-06, "loss": 44.5231, "step": 14450 }, { "epoch": 0.02921011486079744, "grad_norm": 192.35862731933594, "learning_rate": 2.8920000000000004e-06, "loss": 53.7222, "step": 14460 }, { "epoch": 0.029230315493481256, "grad_norm": 115.19097900390625, "learning_rate": 2.894e-06, "loss": 28.8211, "step": 14470 }, { "epoch": 0.029250516126165072, "grad_norm": 249.1744842529297, "learning_rate": 2.8960000000000003e-06, "loss": 42.8624, "step": 14480 }, { "epoch": 0.02927071675884889, "grad_norm": 872.6121826171875, "learning_rate": 2.8980000000000005e-06, "loss": 60.4381, "step": 14490 }, { "epoch": 0.029290917391532702, "grad_norm": 93.78887939453125, "learning_rate": 2.9e-06, "loss": 42.8204, "step": 14500 }, { "epoch": 0.02931111802421652, "grad_norm": 132.31845092773438, "learning_rate": 2.9020000000000003e-06, "loss": 35.3548, "step": 14510 }, { "epoch": 0.029331318656900335, "grad_norm": 411.2016296386719, "learning_rate": 2.904e-06, "loss": 28.9453, "step": 14520 }, { "epoch": 0.02935151928958415, "grad_norm": 188.8401336669922, "learning_rate": 2.9060000000000006e-06, "loss": 29.3066, "step": 14530 }, { "epoch": 0.029371719922267964, "grad_norm": 0.0, "learning_rate": 2.9080000000000004e-06, "loss": 36.3695, "step": 14540 }, { "epoch": 0.02939192055495178, "grad_norm": 289.1012268066406, "learning_rate": 2.91e-06, "loss": 39.166, "step": 14550 }, { "epoch": 0.029412121187635597, "grad_norm": 338.1182556152344, "learning_rate": 2.9120000000000002e-06, "loss": 46.6367, "step": 14560 }, { "epoch": 0.029432321820319414, "grad_norm": 0.0, "learning_rate": 2.914e-06, "loss": 28.3837, "step": 14570 }, { "epoch": 0.029452522453003227, "grad_norm": 173.60679626464844, "learning_rate": 2.9160000000000005e-06, "loss": 35.0737, "step": 14580 }, { "epoch": 0.029472723085687044, "grad_norm": 412.3799743652344, "learning_rate": 2.9180000000000003e-06, "loss": 30.8762, "step": 14590 }, { "epoch": 0.02949292371837086, "grad_norm": 608.8279418945312, "learning_rate": 2.92e-06, "loss": 39.7951, "step": 14600 }, { "epoch": 0.029513124351054677, "grad_norm": 122.60462951660156, "learning_rate": 2.9220000000000006e-06, "loss": 48.3944, "step": 14610 }, { "epoch": 0.02953332498373849, "grad_norm": 538.561279296875, "learning_rate": 2.9240000000000003e-06, "loss": 45.6956, "step": 14620 }, { "epoch": 0.029553525616422306, "grad_norm": 433.58740234375, "learning_rate": 2.9260000000000004e-06, "loss": 47.8393, "step": 14630 }, { "epoch": 0.029573726249106123, "grad_norm": 411.8701171875, "learning_rate": 2.928e-06, "loss": 35.4545, "step": 14640 }, { "epoch": 0.02959392688178994, "grad_norm": 576.345947265625, "learning_rate": 2.93e-06, "loss": 37.223, "step": 14650 }, { "epoch": 0.029614127514473752, "grad_norm": 52.74928283691406, "learning_rate": 2.9320000000000005e-06, "loss": 63.0068, "step": 14660 }, { "epoch": 0.02963432814715757, "grad_norm": 490.1223449707031, "learning_rate": 2.934e-06, "loss": 37.4007, "step": 14670 }, { "epoch": 0.029654528779841385, "grad_norm": 467.13983154296875, "learning_rate": 2.9360000000000003e-06, "loss": 50.5344, "step": 14680 }, { "epoch": 0.029674729412525202, "grad_norm": 628.7852783203125, "learning_rate": 2.9380000000000005e-06, "loss": 40.5954, "step": 14690 }, { "epoch": 0.029694930045209015, "grad_norm": 372.11767578125, "learning_rate": 2.9400000000000002e-06, "loss": 19.131, "step": 14700 }, { "epoch": 0.02971513067789283, "grad_norm": 0.0, "learning_rate": 2.9420000000000004e-06, "loss": 16.068, "step": 14710 }, { "epoch": 0.029735331310576648, "grad_norm": 202.9538116455078, "learning_rate": 2.944e-06, "loss": 54.7595, "step": 14720 }, { "epoch": 0.029755531943260465, "grad_norm": 1496.111083984375, "learning_rate": 2.946e-06, "loss": 32.5831, "step": 14730 }, { "epoch": 0.029775732575944278, "grad_norm": 297.0837707519531, "learning_rate": 2.9480000000000004e-06, "loss": 34.2947, "step": 14740 }, { "epoch": 0.029795933208628094, "grad_norm": 479.2887268066406, "learning_rate": 2.95e-06, "loss": 48.2584, "step": 14750 }, { "epoch": 0.02981613384131191, "grad_norm": 280.76580810546875, "learning_rate": 2.9520000000000003e-06, "loss": 34.7202, "step": 14760 }, { "epoch": 0.029836334473995727, "grad_norm": 438.716552734375, "learning_rate": 2.954e-06, "loss": 43.0325, "step": 14770 }, { "epoch": 0.02985653510667954, "grad_norm": 633.862548828125, "learning_rate": 2.956e-06, "loss": 52.4967, "step": 14780 }, { "epoch": 0.029876735739363357, "grad_norm": 256.7657165527344, "learning_rate": 2.9580000000000003e-06, "loss": 56.6148, "step": 14790 }, { "epoch": 0.029896936372047173, "grad_norm": 365.7373046875, "learning_rate": 2.96e-06, "loss": 75.8906, "step": 14800 }, { "epoch": 0.02991713700473099, "grad_norm": 468.0849609375, "learning_rate": 2.9620000000000006e-06, "loss": 39.912, "step": 14810 }, { "epoch": 0.029937337637414803, "grad_norm": 234.64068603515625, "learning_rate": 2.9640000000000003e-06, "loss": 43.8119, "step": 14820 }, { "epoch": 0.02995753827009862, "grad_norm": 441.35003662109375, "learning_rate": 2.966e-06, "loss": 48.699, "step": 14830 }, { "epoch": 0.029977738902782436, "grad_norm": 343.6715393066406, "learning_rate": 2.9680000000000002e-06, "loss": 47.7009, "step": 14840 }, { "epoch": 0.029997939535466252, "grad_norm": 167.99188232421875, "learning_rate": 2.97e-06, "loss": 20.7662, "step": 14850 }, { "epoch": 0.030018140168150065, "grad_norm": 647.2653198242188, "learning_rate": 2.9720000000000005e-06, "loss": 36.0663, "step": 14860 }, { "epoch": 0.030038340800833882, "grad_norm": 291.9883117675781, "learning_rate": 2.9740000000000002e-06, "loss": 25.8121, "step": 14870 }, { "epoch": 0.0300585414335177, "grad_norm": 965.8220825195312, "learning_rate": 2.976e-06, "loss": 44.3664, "step": 14880 }, { "epoch": 0.030078742066201515, "grad_norm": 216.04820251464844, "learning_rate": 2.9780000000000005e-06, "loss": 55.5217, "step": 14890 }, { "epoch": 0.030098942698885328, "grad_norm": 287.53582763671875, "learning_rate": 2.9800000000000003e-06, "loss": 37.6422, "step": 14900 }, { "epoch": 0.030119143331569145, "grad_norm": 218.70355224609375, "learning_rate": 2.9820000000000004e-06, "loss": 20.8934, "step": 14910 }, { "epoch": 0.03013934396425296, "grad_norm": 399.158447265625, "learning_rate": 2.984e-06, "loss": 53.9392, "step": 14920 }, { "epoch": 0.030159544596936778, "grad_norm": 312.56719970703125, "learning_rate": 2.986e-06, "loss": 29.4282, "step": 14930 }, { "epoch": 0.03017974522962059, "grad_norm": 436.681884765625, "learning_rate": 2.9880000000000004e-06, "loss": 52.193, "step": 14940 }, { "epoch": 0.030199945862304407, "grad_norm": 107.99313354492188, "learning_rate": 2.99e-06, "loss": 40.2972, "step": 14950 }, { "epoch": 0.030220146494988224, "grad_norm": 203.16688537597656, "learning_rate": 2.9920000000000003e-06, "loss": 32.1198, "step": 14960 }, { "epoch": 0.03024034712767204, "grad_norm": 0.0, "learning_rate": 2.994e-06, "loss": 45.8876, "step": 14970 }, { "epoch": 0.030260547760355853, "grad_norm": 228.07589721679688, "learning_rate": 2.996e-06, "loss": 53.8313, "step": 14980 }, { "epoch": 0.03028074839303967, "grad_norm": 84.29973602294922, "learning_rate": 2.9980000000000003e-06, "loss": 48.5111, "step": 14990 }, { "epoch": 0.030300949025723486, "grad_norm": 464.9461975097656, "learning_rate": 3e-06, "loss": 49.4351, "step": 15000 }, { "epoch": 0.030321149658407303, "grad_norm": 245.02064514160156, "learning_rate": 3.0020000000000006e-06, "loss": 49.8666, "step": 15010 }, { "epoch": 0.030341350291091116, "grad_norm": 142.01158142089844, "learning_rate": 3.0040000000000004e-06, "loss": 47.032, "step": 15020 }, { "epoch": 0.030361550923774933, "grad_norm": 539.7145385742188, "learning_rate": 3.006e-06, "loss": 41.1906, "step": 15030 }, { "epoch": 0.03038175155645875, "grad_norm": 479.6602783203125, "learning_rate": 3.0080000000000003e-06, "loss": 43.9905, "step": 15040 }, { "epoch": 0.030401952189142566, "grad_norm": 900.4759521484375, "learning_rate": 3.01e-06, "loss": 44.7197, "step": 15050 }, { "epoch": 0.03042215282182638, "grad_norm": 273.6351013183594, "learning_rate": 3.0120000000000006e-06, "loss": 40.2843, "step": 15060 }, { "epoch": 0.030442353454510195, "grad_norm": 273.24273681640625, "learning_rate": 3.0140000000000003e-06, "loss": 48.8051, "step": 15070 }, { "epoch": 0.030462554087194012, "grad_norm": 362.49017333984375, "learning_rate": 3.016e-06, "loss": 45.3135, "step": 15080 }, { "epoch": 0.03048275471987783, "grad_norm": 149.7844696044922, "learning_rate": 3.0180000000000006e-06, "loss": 27.1673, "step": 15090 }, { "epoch": 0.03050295535256164, "grad_norm": 1535.1077880859375, "learning_rate": 3.0200000000000003e-06, "loss": 40.7697, "step": 15100 }, { "epoch": 0.030523155985245458, "grad_norm": 236.7692413330078, "learning_rate": 3.0220000000000005e-06, "loss": 33.7874, "step": 15110 }, { "epoch": 0.030543356617929274, "grad_norm": 228.4298858642578, "learning_rate": 3.024e-06, "loss": 26.6198, "step": 15120 }, { "epoch": 0.03056355725061309, "grad_norm": 341.5628662109375, "learning_rate": 3.026e-06, "loss": 22.8394, "step": 15130 }, { "epoch": 0.030583757883296904, "grad_norm": 546.5899047851562, "learning_rate": 3.0280000000000005e-06, "loss": 28.9967, "step": 15140 }, { "epoch": 0.03060395851598072, "grad_norm": 361.4396667480469, "learning_rate": 3.0300000000000002e-06, "loss": 60.0779, "step": 15150 }, { "epoch": 0.030624159148664537, "grad_norm": 505.96490478515625, "learning_rate": 3.0320000000000004e-06, "loss": 60.3548, "step": 15160 }, { "epoch": 0.030644359781348354, "grad_norm": 543.9578247070312, "learning_rate": 3.034e-06, "loss": 67.5969, "step": 15170 }, { "epoch": 0.030664560414032167, "grad_norm": 215.66294860839844, "learning_rate": 3.0360000000000002e-06, "loss": 52.4373, "step": 15180 }, { "epoch": 0.030684761046715983, "grad_norm": 350.5191650390625, "learning_rate": 3.0380000000000004e-06, "loss": 48.218, "step": 15190 }, { "epoch": 0.0307049616793998, "grad_norm": 500.9207763671875, "learning_rate": 3.04e-06, "loss": 74.0889, "step": 15200 }, { "epoch": 0.030725162312083613, "grad_norm": 1889.1630859375, "learning_rate": 3.0420000000000007e-06, "loss": 87.9034, "step": 15210 }, { "epoch": 0.03074536294476743, "grad_norm": 233.08013916015625, "learning_rate": 3.0440000000000004e-06, "loss": 32.6646, "step": 15220 }, { "epoch": 0.030765563577451246, "grad_norm": 179.83616638183594, "learning_rate": 3.046e-06, "loss": 41.0835, "step": 15230 }, { "epoch": 0.030785764210135062, "grad_norm": 279.4805908203125, "learning_rate": 3.0480000000000003e-06, "loss": 35.3801, "step": 15240 }, { "epoch": 0.030805964842818875, "grad_norm": 139.05160522460938, "learning_rate": 3.05e-06, "loss": 61.3396, "step": 15250 }, { "epoch": 0.030826165475502692, "grad_norm": 101.37811279296875, "learning_rate": 3.0520000000000006e-06, "loss": 30.0831, "step": 15260 }, { "epoch": 0.03084636610818651, "grad_norm": 133.32022094726562, "learning_rate": 3.0540000000000003e-06, "loss": 31.8323, "step": 15270 }, { "epoch": 0.030866566740870325, "grad_norm": 185.78453063964844, "learning_rate": 3.056e-06, "loss": 26.8349, "step": 15280 }, { "epoch": 0.030886767373554138, "grad_norm": 1197.7213134765625, "learning_rate": 3.0580000000000006e-06, "loss": 43.0256, "step": 15290 }, { "epoch": 0.030906968006237955, "grad_norm": 356.7593688964844, "learning_rate": 3.0600000000000003e-06, "loss": 41.2964, "step": 15300 }, { "epoch": 0.03092716863892177, "grad_norm": 142.41143798828125, "learning_rate": 3.0620000000000005e-06, "loss": 38.84, "step": 15310 }, { "epoch": 0.030947369271605588, "grad_norm": 277.917724609375, "learning_rate": 3.0640000000000002e-06, "loss": 26.3447, "step": 15320 }, { "epoch": 0.0309675699042894, "grad_norm": 197.03167724609375, "learning_rate": 3.066e-06, "loss": 47.5748, "step": 15330 }, { "epoch": 0.030987770536973217, "grad_norm": 383.22467041015625, "learning_rate": 3.0680000000000005e-06, "loss": 29.2476, "step": 15340 }, { "epoch": 0.031007971169657034, "grad_norm": 355.5647888183594, "learning_rate": 3.0700000000000003e-06, "loss": 73.3655, "step": 15350 }, { "epoch": 0.03102817180234085, "grad_norm": 399.4779357910156, "learning_rate": 3.072e-06, "loss": 20.0992, "step": 15360 }, { "epoch": 0.031048372435024663, "grad_norm": 288.90704345703125, "learning_rate": 3.074e-06, "loss": 62.7703, "step": 15370 }, { "epoch": 0.03106857306770848, "grad_norm": 574.322021484375, "learning_rate": 3.0760000000000003e-06, "loss": 36.9132, "step": 15380 }, { "epoch": 0.031088773700392296, "grad_norm": 351.8114318847656, "learning_rate": 3.0780000000000004e-06, "loss": 52.2067, "step": 15390 }, { "epoch": 0.031108974333076113, "grad_norm": 291.94268798828125, "learning_rate": 3.08e-06, "loss": 30.7991, "step": 15400 }, { "epoch": 0.031129174965759926, "grad_norm": 162.35025024414062, "learning_rate": 3.082e-06, "loss": 36.186, "step": 15410 }, { "epoch": 0.031149375598443742, "grad_norm": 1095.189208984375, "learning_rate": 3.0840000000000005e-06, "loss": 58.4939, "step": 15420 }, { "epoch": 0.03116957623112756, "grad_norm": 219.0703582763672, "learning_rate": 3.086e-06, "loss": 31.9261, "step": 15430 }, { "epoch": 0.031189776863811376, "grad_norm": 272.61138916015625, "learning_rate": 3.0880000000000003e-06, "loss": 38.4935, "step": 15440 }, { "epoch": 0.03120997749649519, "grad_norm": 336.4731140136719, "learning_rate": 3.09e-06, "loss": 46.5931, "step": 15450 }, { "epoch": 0.031230178129179005, "grad_norm": 254.10296630859375, "learning_rate": 3.092e-06, "loss": 41.9977, "step": 15460 }, { "epoch": 0.03125037876186282, "grad_norm": 403.6255798339844, "learning_rate": 3.0940000000000004e-06, "loss": 28.1615, "step": 15470 }, { "epoch": 0.03127057939454664, "grad_norm": 367.24835205078125, "learning_rate": 3.096e-06, "loss": 27.9527, "step": 15480 }, { "epoch": 0.03129078002723045, "grad_norm": 377.1778564453125, "learning_rate": 3.0980000000000007e-06, "loss": 45.268, "step": 15490 }, { "epoch": 0.03131098065991427, "grad_norm": 213.51766967773438, "learning_rate": 3.1000000000000004e-06, "loss": 67.9812, "step": 15500 }, { "epoch": 0.031331181292598084, "grad_norm": 266.1339416503906, "learning_rate": 3.102e-06, "loss": 58.3044, "step": 15510 }, { "epoch": 0.0313513819252819, "grad_norm": 619.6426391601562, "learning_rate": 3.1040000000000003e-06, "loss": 41.9807, "step": 15520 }, { "epoch": 0.03137158255796572, "grad_norm": 183.75096130371094, "learning_rate": 3.106e-06, "loss": 40.6779, "step": 15530 }, { "epoch": 0.03139178319064953, "grad_norm": 350.3226318359375, "learning_rate": 3.1080000000000006e-06, "loss": 33.9908, "step": 15540 }, { "epoch": 0.03141198382333334, "grad_norm": 341.83709716796875, "learning_rate": 3.1100000000000003e-06, "loss": 47.329, "step": 15550 }, { "epoch": 0.03143218445601716, "grad_norm": 164.04696655273438, "learning_rate": 3.112e-06, "loss": 34.1863, "step": 15560 }, { "epoch": 0.031452385088700976, "grad_norm": 92.58802032470703, "learning_rate": 3.114e-06, "loss": 21.3626, "step": 15570 }, { "epoch": 0.031472585721384796, "grad_norm": 233.59539794921875, "learning_rate": 3.1160000000000003e-06, "loss": 60.9179, "step": 15580 }, { "epoch": 0.03149278635406861, "grad_norm": 341.7328186035156, "learning_rate": 3.1180000000000005e-06, "loss": 43.5866, "step": 15590 }, { "epoch": 0.03151298698675242, "grad_norm": 490.1520690917969, "learning_rate": 3.12e-06, "loss": 41.5713, "step": 15600 }, { "epoch": 0.03153318761943624, "grad_norm": 238.5013885498047, "learning_rate": 3.122e-06, "loss": 25.2562, "step": 15610 }, { "epoch": 0.031553388252120056, "grad_norm": 259.8772888183594, "learning_rate": 3.1240000000000005e-06, "loss": 23.8128, "step": 15620 }, { "epoch": 0.03157358888480387, "grad_norm": 985.0008544921875, "learning_rate": 3.1260000000000002e-06, "loss": 45.4137, "step": 15630 }, { "epoch": 0.03159378951748769, "grad_norm": 468.80902099609375, "learning_rate": 3.1280000000000004e-06, "loss": 55.5236, "step": 15640 }, { "epoch": 0.0316139901501715, "grad_norm": 303.0762023925781, "learning_rate": 3.13e-06, "loss": 34.767, "step": 15650 }, { "epoch": 0.03163419078285532, "grad_norm": 313.0398864746094, "learning_rate": 3.132e-06, "loss": 35.7496, "step": 15660 }, { "epoch": 0.031654391415539135, "grad_norm": 284.9385681152344, "learning_rate": 3.1340000000000004e-06, "loss": 37.4289, "step": 15670 }, { "epoch": 0.03167459204822295, "grad_norm": 607.725341796875, "learning_rate": 3.136e-06, "loss": 48.7561, "step": 15680 }, { "epoch": 0.03169479268090677, "grad_norm": 662.9429931640625, "learning_rate": 3.1380000000000003e-06, "loss": 57.1808, "step": 15690 }, { "epoch": 0.03171499331359058, "grad_norm": 0.0, "learning_rate": 3.1400000000000004e-06, "loss": 23.3515, "step": 15700 }, { "epoch": 0.031735193946274394, "grad_norm": 449.1476135253906, "learning_rate": 3.142e-06, "loss": 20.0895, "step": 15710 }, { "epoch": 0.031755394578958214, "grad_norm": 451.9173583984375, "learning_rate": 3.1440000000000003e-06, "loss": 62.3167, "step": 15720 }, { "epoch": 0.03177559521164203, "grad_norm": 426.8009033203125, "learning_rate": 3.146e-06, "loss": 38.765, "step": 15730 }, { "epoch": 0.03179579584432585, "grad_norm": 490.55657958984375, "learning_rate": 3.1480000000000006e-06, "loss": 33.2712, "step": 15740 }, { "epoch": 0.03181599647700966, "grad_norm": 284.5474548339844, "learning_rate": 3.1500000000000003e-06, "loss": 68.2743, "step": 15750 }, { "epoch": 0.03183619710969347, "grad_norm": 838.3622436523438, "learning_rate": 3.152e-06, "loss": 87.6176, "step": 15760 }, { "epoch": 0.03185639774237729, "grad_norm": 221.1287078857422, "learning_rate": 3.154e-06, "loss": 25.268, "step": 15770 }, { "epoch": 0.031876598375061106, "grad_norm": 351.37347412109375, "learning_rate": 3.1560000000000004e-06, "loss": 35.132, "step": 15780 }, { "epoch": 0.03189679900774492, "grad_norm": 274.52935791015625, "learning_rate": 3.1580000000000005e-06, "loss": 32.246, "step": 15790 }, { "epoch": 0.03191699964042874, "grad_norm": 195.85987854003906, "learning_rate": 3.1600000000000002e-06, "loss": 20.6875, "step": 15800 }, { "epoch": 0.03193720027311255, "grad_norm": 162.9860382080078, "learning_rate": 3.162e-06, "loss": 48.9927, "step": 15810 }, { "epoch": 0.03195740090579637, "grad_norm": 211.91612243652344, "learning_rate": 3.1640000000000005e-06, "loss": 52.8758, "step": 15820 }, { "epoch": 0.031977601538480185, "grad_norm": 257.0917053222656, "learning_rate": 3.1660000000000003e-06, "loss": 30.9359, "step": 15830 }, { "epoch": 0.031997802171164, "grad_norm": 219.13868713378906, "learning_rate": 3.1680000000000004e-06, "loss": 66.0529, "step": 15840 }, { "epoch": 0.03201800280384782, "grad_norm": 396.2012023925781, "learning_rate": 3.17e-06, "loss": 41.4766, "step": 15850 }, { "epoch": 0.03203820343653163, "grad_norm": 219.76678466796875, "learning_rate": 3.172e-06, "loss": 45.5544, "step": 15860 }, { "epoch": 0.032058404069215445, "grad_norm": 174.1364288330078, "learning_rate": 3.1740000000000004e-06, "loss": 42.8552, "step": 15870 }, { "epoch": 0.032078604701899265, "grad_norm": 1696.34765625, "learning_rate": 3.176e-06, "loss": 46.3909, "step": 15880 }, { "epoch": 0.03209880533458308, "grad_norm": 1550.7750244140625, "learning_rate": 3.1780000000000003e-06, "loss": 57.4185, "step": 15890 }, { "epoch": 0.0321190059672669, "grad_norm": 209.40576171875, "learning_rate": 3.1800000000000005e-06, "loss": 59.7446, "step": 15900 }, { "epoch": 0.03213920659995071, "grad_norm": 327.4063415527344, "learning_rate": 3.182e-06, "loss": 77.1886, "step": 15910 }, { "epoch": 0.032159407232634524, "grad_norm": 285.25787353515625, "learning_rate": 3.1840000000000003e-06, "loss": 58.3573, "step": 15920 }, { "epoch": 0.032179607865318344, "grad_norm": 163.0392303466797, "learning_rate": 3.186e-06, "loss": 46.9996, "step": 15930 }, { "epoch": 0.03219980849800216, "grad_norm": 597.7408447265625, "learning_rate": 3.188e-06, "loss": 62.0551, "step": 15940 }, { "epoch": 0.03222000913068597, "grad_norm": 571.7894287109375, "learning_rate": 3.1900000000000004e-06, "loss": 36.5087, "step": 15950 }, { "epoch": 0.03224020976336979, "grad_norm": 191.1591339111328, "learning_rate": 3.192e-06, "loss": 60.5348, "step": 15960 }, { "epoch": 0.0322604103960536, "grad_norm": 187.69650268554688, "learning_rate": 3.1940000000000003e-06, "loss": 39.4925, "step": 15970 }, { "epoch": 0.03228061102873742, "grad_norm": 220.7599639892578, "learning_rate": 3.1960000000000004e-06, "loss": 38.1752, "step": 15980 }, { "epoch": 0.032300811661421236, "grad_norm": 248.64682006835938, "learning_rate": 3.198e-06, "loss": 61.2816, "step": 15990 }, { "epoch": 0.03232101229410505, "grad_norm": 169.95626831054688, "learning_rate": 3.2000000000000003e-06, "loss": 44.1116, "step": 16000 }, { "epoch": 0.03234121292678887, "grad_norm": 101.71955108642578, "learning_rate": 3.202e-06, "loss": 38.8841, "step": 16010 }, { "epoch": 0.03236141355947268, "grad_norm": 255.8649444580078, "learning_rate": 3.2040000000000006e-06, "loss": 60.4106, "step": 16020 }, { "epoch": 0.032381614192156495, "grad_norm": 0.0, "learning_rate": 3.2060000000000003e-06, "loss": 49.8229, "step": 16030 }, { "epoch": 0.032401814824840315, "grad_norm": 540.7418212890625, "learning_rate": 3.208e-06, "loss": 82.7604, "step": 16040 }, { "epoch": 0.03242201545752413, "grad_norm": 110.37312316894531, "learning_rate": 3.21e-06, "loss": 58.6589, "step": 16050 }, { "epoch": 0.03244221609020795, "grad_norm": 54.10726547241211, "learning_rate": 3.212e-06, "loss": 45.6225, "step": 16060 }, { "epoch": 0.03246241672289176, "grad_norm": 850.87158203125, "learning_rate": 3.2140000000000005e-06, "loss": 54.9133, "step": 16070 }, { "epoch": 0.032482617355575574, "grad_norm": 95.17237854003906, "learning_rate": 3.216e-06, "loss": 32.0136, "step": 16080 }, { "epoch": 0.032502817988259394, "grad_norm": 334.0519104003906, "learning_rate": 3.218e-06, "loss": 47.6611, "step": 16090 }, { "epoch": 0.03252301862094321, "grad_norm": 752.5732421875, "learning_rate": 3.2200000000000005e-06, "loss": 26.4173, "step": 16100 }, { "epoch": 0.03254321925362702, "grad_norm": 231.43211364746094, "learning_rate": 3.2220000000000002e-06, "loss": 53.5237, "step": 16110 }, { "epoch": 0.03256341988631084, "grad_norm": 359.0640869140625, "learning_rate": 3.2240000000000004e-06, "loss": 69.9289, "step": 16120 }, { "epoch": 0.03258362051899465, "grad_norm": 177.7603759765625, "learning_rate": 3.226e-06, "loss": 13.9175, "step": 16130 }, { "epoch": 0.03260382115167847, "grad_norm": 638.7717895507812, "learning_rate": 3.228e-06, "loss": 39.0148, "step": 16140 }, { "epoch": 0.032624021784362287, "grad_norm": 236.43675231933594, "learning_rate": 3.2300000000000004e-06, "loss": 52.6042, "step": 16150 }, { "epoch": 0.0326442224170461, "grad_norm": 212.41543579101562, "learning_rate": 3.232e-06, "loss": 36.9812, "step": 16160 }, { "epoch": 0.03266442304972992, "grad_norm": 0.0, "learning_rate": 3.2340000000000003e-06, "loss": 32.0287, "step": 16170 }, { "epoch": 0.03268462368241373, "grad_norm": 134.27476501464844, "learning_rate": 3.2360000000000004e-06, "loss": 26.6532, "step": 16180 }, { "epoch": 0.032704824315097546, "grad_norm": 361.854736328125, "learning_rate": 3.238e-06, "loss": 20.5622, "step": 16190 }, { "epoch": 0.032725024947781366, "grad_norm": 679.5897827148438, "learning_rate": 3.2400000000000003e-06, "loss": 40.6005, "step": 16200 }, { "epoch": 0.03274522558046518, "grad_norm": 289.02850341796875, "learning_rate": 3.242e-06, "loss": 50.7658, "step": 16210 }, { "epoch": 0.032765426213149, "grad_norm": 565.7950439453125, "learning_rate": 3.2440000000000006e-06, "loss": 41.4966, "step": 16220 }, { "epoch": 0.03278562684583281, "grad_norm": 301.0735168457031, "learning_rate": 3.2460000000000003e-06, "loss": 52.04, "step": 16230 }, { "epoch": 0.032805827478516625, "grad_norm": 315.11602783203125, "learning_rate": 3.248e-06, "loss": 33.1049, "step": 16240 }, { "epoch": 0.032826028111200445, "grad_norm": 535.5096435546875, "learning_rate": 3.2500000000000002e-06, "loss": 41.3681, "step": 16250 }, { "epoch": 0.03284622874388426, "grad_norm": 177.64854431152344, "learning_rate": 3.252e-06, "loss": 46.2062, "step": 16260 }, { "epoch": 0.03286642937656807, "grad_norm": 439.66912841796875, "learning_rate": 3.2540000000000005e-06, "loss": 25.0758, "step": 16270 }, { "epoch": 0.03288663000925189, "grad_norm": 83.80502319335938, "learning_rate": 3.2560000000000003e-06, "loss": 37.0161, "step": 16280 }, { "epoch": 0.032906830641935704, "grad_norm": 255.8190155029297, "learning_rate": 3.258e-06, "loss": 30.2858, "step": 16290 }, { "epoch": 0.032927031274619524, "grad_norm": 201.57406616210938, "learning_rate": 3.2600000000000006e-06, "loss": 26.3917, "step": 16300 }, { "epoch": 0.03294723190730334, "grad_norm": 391.9530944824219, "learning_rate": 3.2620000000000003e-06, "loss": 45.2382, "step": 16310 }, { "epoch": 0.03296743253998715, "grad_norm": 606.6541137695312, "learning_rate": 3.2640000000000004e-06, "loss": 38.3148, "step": 16320 }, { "epoch": 0.03298763317267097, "grad_norm": 360.1717834472656, "learning_rate": 3.266e-06, "loss": 34.8371, "step": 16330 }, { "epoch": 0.03300783380535478, "grad_norm": 272.30224609375, "learning_rate": 3.268e-06, "loss": 26.325, "step": 16340 }, { "epoch": 0.033028034438038596, "grad_norm": 331.2907409667969, "learning_rate": 3.2700000000000005e-06, "loss": 57.071, "step": 16350 }, { "epoch": 0.033048235070722416, "grad_norm": 229.40760803222656, "learning_rate": 3.272e-06, "loss": 34.362, "step": 16360 }, { "epoch": 0.03306843570340623, "grad_norm": 320.0472412109375, "learning_rate": 3.2740000000000003e-06, "loss": 44.5907, "step": 16370 }, { "epoch": 0.03308863633609005, "grad_norm": 221.1917266845703, "learning_rate": 3.2760000000000005e-06, "loss": 34.2471, "step": 16380 }, { "epoch": 0.03310883696877386, "grad_norm": 116.16138458251953, "learning_rate": 3.278e-06, "loss": 36.7874, "step": 16390 }, { "epoch": 0.033129037601457675, "grad_norm": 554.9132690429688, "learning_rate": 3.2800000000000004e-06, "loss": 80.2446, "step": 16400 }, { "epoch": 0.033149238234141495, "grad_norm": 280.5957946777344, "learning_rate": 3.282e-06, "loss": 47.3203, "step": 16410 }, { "epoch": 0.03316943886682531, "grad_norm": 163.01815795898438, "learning_rate": 3.2840000000000007e-06, "loss": 33.6053, "step": 16420 }, { "epoch": 0.03318963949950912, "grad_norm": 52.739620208740234, "learning_rate": 3.2860000000000004e-06, "loss": 26.1459, "step": 16430 }, { "epoch": 0.03320984013219294, "grad_norm": 239.11659240722656, "learning_rate": 3.288e-06, "loss": 30.3653, "step": 16440 }, { "epoch": 0.033230040764876755, "grad_norm": 275.11968994140625, "learning_rate": 3.2900000000000003e-06, "loss": 41.6632, "step": 16450 }, { "epoch": 0.033250241397560575, "grad_norm": 345.49859619140625, "learning_rate": 3.292e-06, "loss": 36.4217, "step": 16460 }, { "epoch": 0.03327044203024439, "grad_norm": 110.75786590576172, "learning_rate": 3.2940000000000006e-06, "loss": 24.8681, "step": 16470 }, { "epoch": 0.0332906426629282, "grad_norm": 128.3035430908203, "learning_rate": 3.2960000000000003e-06, "loss": 46.4255, "step": 16480 }, { "epoch": 0.03331084329561202, "grad_norm": 183.91575622558594, "learning_rate": 3.298e-06, "loss": 25.5651, "step": 16490 }, { "epoch": 0.033331043928295834, "grad_norm": 237.99591064453125, "learning_rate": 3.3000000000000006e-06, "loss": 54.5775, "step": 16500 }, { "epoch": 0.03335124456097965, "grad_norm": 201.19725036621094, "learning_rate": 3.3020000000000003e-06, "loss": 21.5429, "step": 16510 }, { "epoch": 0.03337144519366347, "grad_norm": 204.90586853027344, "learning_rate": 3.3040000000000005e-06, "loss": 24.6825, "step": 16520 }, { "epoch": 0.03339164582634728, "grad_norm": 432.8783874511719, "learning_rate": 3.306e-06, "loss": 35.1481, "step": 16530 }, { "epoch": 0.0334118464590311, "grad_norm": 608.1990356445312, "learning_rate": 3.308e-06, "loss": 71.6465, "step": 16540 }, { "epoch": 0.03343204709171491, "grad_norm": 454.04656982421875, "learning_rate": 3.3100000000000005e-06, "loss": 51.4599, "step": 16550 }, { "epoch": 0.033452247724398726, "grad_norm": 344.5522155761719, "learning_rate": 3.3120000000000002e-06, "loss": 69.415, "step": 16560 }, { "epoch": 0.033472448357082546, "grad_norm": 169.66287231445312, "learning_rate": 3.314e-06, "loss": 54.8354, "step": 16570 }, { "epoch": 0.03349264898976636, "grad_norm": 254.67308044433594, "learning_rate": 3.3160000000000005e-06, "loss": 44.4905, "step": 16580 }, { "epoch": 0.03351284962245017, "grad_norm": 291.4891662597656, "learning_rate": 3.3180000000000003e-06, "loss": 39.8378, "step": 16590 }, { "epoch": 0.03353305025513399, "grad_norm": 171.07656860351562, "learning_rate": 3.3200000000000004e-06, "loss": 47.5656, "step": 16600 }, { "epoch": 0.033553250887817805, "grad_norm": 246.68692016601562, "learning_rate": 3.322e-06, "loss": 24.6602, "step": 16610 }, { "epoch": 0.033573451520501625, "grad_norm": 180.96722412109375, "learning_rate": 3.324e-06, "loss": 46.0831, "step": 16620 }, { "epoch": 0.03359365215318544, "grad_norm": 140.794189453125, "learning_rate": 3.3260000000000004e-06, "loss": 39.4703, "step": 16630 }, { "epoch": 0.03361385278586925, "grad_norm": 545.4083251953125, "learning_rate": 3.328e-06, "loss": 50.4772, "step": 16640 }, { "epoch": 0.03363405341855307, "grad_norm": 330.9933776855469, "learning_rate": 3.3300000000000003e-06, "loss": 39.8815, "step": 16650 }, { "epoch": 0.033654254051236884, "grad_norm": 128.7200469970703, "learning_rate": 3.332e-06, "loss": 35.7967, "step": 16660 }, { "epoch": 0.0336744546839207, "grad_norm": 504.47491455078125, "learning_rate": 3.334e-06, "loss": 40.6771, "step": 16670 }, { "epoch": 0.03369465531660452, "grad_norm": 90.31434631347656, "learning_rate": 3.3360000000000003e-06, "loss": 34.5851, "step": 16680 }, { "epoch": 0.03371485594928833, "grad_norm": 331.6734619140625, "learning_rate": 3.338e-06, "loss": 38.0597, "step": 16690 }, { "epoch": 0.03373505658197215, "grad_norm": 667.7931518554688, "learning_rate": 3.3400000000000006e-06, "loss": 63.363, "step": 16700 }, { "epoch": 0.03375525721465596, "grad_norm": 124.91834259033203, "learning_rate": 3.3420000000000004e-06, "loss": 47.1453, "step": 16710 }, { "epoch": 0.033775457847339777, "grad_norm": 199.29672241210938, "learning_rate": 3.344e-06, "loss": 41.0515, "step": 16720 }, { "epoch": 0.033795658480023597, "grad_norm": 142.48536682128906, "learning_rate": 3.3460000000000002e-06, "loss": 27.9276, "step": 16730 }, { "epoch": 0.03381585911270741, "grad_norm": 473.9076843261719, "learning_rate": 3.348e-06, "loss": 42.4857, "step": 16740 }, { "epoch": 0.03383605974539122, "grad_norm": 142.67420959472656, "learning_rate": 3.3500000000000005e-06, "loss": 39.2613, "step": 16750 }, { "epoch": 0.03385626037807504, "grad_norm": 145.9368896484375, "learning_rate": 3.3520000000000003e-06, "loss": 24.6056, "step": 16760 }, { "epoch": 0.033876461010758856, "grad_norm": 465.4949035644531, "learning_rate": 3.354e-06, "loss": 34.2869, "step": 16770 }, { "epoch": 0.033896661643442676, "grad_norm": 848.554443359375, "learning_rate": 3.3560000000000006e-06, "loss": 38.7238, "step": 16780 }, { "epoch": 0.03391686227612649, "grad_norm": 298.4035339355469, "learning_rate": 3.3580000000000003e-06, "loss": 39.6302, "step": 16790 }, { "epoch": 0.0339370629088103, "grad_norm": 583.6204833984375, "learning_rate": 3.3600000000000004e-06, "loss": 23.1098, "step": 16800 }, { "epoch": 0.03395726354149412, "grad_norm": 333.96661376953125, "learning_rate": 3.362e-06, "loss": 54.6294, "step": 16810 }, { "epoch": 0.033977464174177935, "grad_norm": 162.26942443847656, "learning_rate": 3.364e-06, "loss": 42.0531, "step": 16820 }, { "epoch": 0.03399766480686175, "grad_norm": 725.6404418945312, "learning_rate": 3.3660000000000005e-06, "loss": 49.5222, "step": 16830 }, { "epoch": 0.03401786543954557, "grad_norm": 172.42709350585938, "learning_rate": 3.368e-06, "loss": 23.7073, "step": 16840 }, { "epoch": 0.03403806607222938, "grad_norm": 243.4844970703125, "learning_rate": 3.3700000000000003e-06, "loss": 46.36, "step": 16850 }, { "epoch": 0.0340582667049132, "grad_norm": 167.36048889160156, "learning_rate": 3.372e-06, "loss": 34.9312, "step": 16860 }, { "epoch": 0.034078467337597014, "grad_norm": 327.28363037109375, "learning_rate": 3.3740000000000002e-06, "loss": 47.2789, "step": 16870 }, { "epoch": 0.03409866797028083, "grad_norm": 202.80242919921875, "learning_rate": 3.3760000000000004e-06, "loss": 33.5107, "step": 16880 }, { "epoch": 0.03411886860296465, "grad_norm": 168.95278930664062, "learning_rate": 3.378e-06, "loss": 15.3987, "step": 16890 }, { "epoch": 0.03413906923564846, "grad_norm": 402.2037658691406, "learning_rate": 3.3800000000000007e-06, "loss": 34.0796, "step": 16900 }, { "epoch": 0.03415926986833227, "grad_norm": 215.1862335205078, "learning_rate": 3.3820000000000004e-06, "loss": 31.7497, "step": 16910 }, { "epoch": 0.03417947050101609, "grad_norm": 315.1021423339844, "learning_rate": 3.384e-06, "loss": 37.4485, "step": 16920 }, { "epoch": 0.034199671133699906, "grad_norm": 432.75323486328125, "learning_rate": 3.3860000000000003e-06, "loss": 45.0758, "step": 16930 }, { "epoch": 0.034219871766383726, "grad_norm": 295.7484436035156, "learning_rate": 3.388e-06, "loss": 29.9935, "step": 16940 }, { "epoch": 0.03424007239906754, "grad_norm": 208.1071319580078, "learning_rate": 3.3900000000000006e-06, "loss": 22.3504, "step": 16950 }, { "epoch": 0.03426027303175135, "grad_norm": 519.549072265625, "learning_rate": 3.3920000000000003e-06, "loss": 59.7024, "step": 16960 }, { "epoch": 0.03428047366443517, "grad_norm": 242.1359405517578, "learning_rate": 3.394e-06, "loss": 82.6781, "step": 16970 }, { "epoch": 0.034300674297118985, "grad_norm": 135.22019958496094, "learning_rate": 3.3960000000000006e-06, "loss": 17.6293, "step": 16980 }, { "epoch": 0.0343208749298028, "grad_norm": 231.86219787597656, "learning_rate": 3.3980000000000003e-06, "loss": 39.3439, "step": 16990 }, { "epoch": 0.03434107556248662, "grad_norm": 76.2967300415039, "learning_rate": 3.4000000000000005e-06, "loss": 20.6721, "step": 17000 }, { "epoch": 0.03436127619517043, "grad_norm": 1032.177734375, "learning_rate": 3.402e-06, "loss": 77.5976, "step": 17010 }, { "epoch": 0.03438147682785425, "grad_norm": 173.25665283203125, "learning_rate": 3.404e-06, "loss": 34.028, "step": 17020 }, { "epoch": 0.034401677460538065, "grad_norm": 418.9309997558594, "learning_rate": 3.4060000000000005e-06, "loss": 28.1858, "step": 17030 }, { "epoch": 0.03442187809322188, "grad_norm": 55.288516998291016, "learning_rate": 3.4080000000000002e-06, "loss": 49.1182, "step": 17040 }, { "epoch": 0.0344420787259057, "grad_norm": 512.934814453125, "learning_rate": 3.4100000000000004e-06, "loss": 45.6756, "step": 17050 }, { "epoch": 0.03446227935858951, "grad_norm": 161.65658569335938, "learning_rate": 3.412e-06, "loss": 41.0394, "step": 17060 }, { "epoch": 0.034482479991273324, "grad_norm": 243.16500854492188, "learning_rate": 3.4140000000000003e-06, "loss": 29.2915, "step": 17070 }, { "epoch": 0.034502680623957144, "grad_norm": 916.8419799804688, "learning_rate": 3.4160000000000004e-06, "loss": 39.2391, "step": 17080 }, { "epoch": 0.03452288125664096, "grad_norm": 517.4654541015625, "learning_rate": 3.418e-06, "loss": 60.4674, "step": 17090 }, { "epoch": 0.03454308188932478, "grad_norm": 518.3997192382812, "learning_rate": 3.4200000000000007e-06, "loss": 56.1459, "step": 17100 }, { "epoch": 0.03456328252200859, "grad_norm": 198.74916076660156, "learning_rate": 3.4220000000000004e-06, "loss": 36.9409, "step": 17110 }, { "epoch": 0.0345834831546924, "grad_norm": 357.126953125, "learning_rate": 3.424e-06, "loss": 26.4918, "step": 17120 }, { "epoch": 0.03460368378737622, "grad_norm": 357.0582275390625, "learning_rate": 3.4260000000000003e-06, "loss": 26.8618, "step": 17130 }, { "epoch": 0.034623884420060036, "grad_norm": 242.4788360595703, "learning_rate": 3.428e-06, "loss": 54.0453, "step": 17140 }, { "epoch": 0.03464408505274385, "grad_norm": 343.68231201171875, "learning_rate": 3.4300000000000006e-06, "loss": 43.7619, "step": 17150 }, { "epoch": 0.03466428568542767, "grad_norm": 768.6111450195312, "learning_rate": 3.4320000000000003e-06, "loss": 40.2745, "step": 17160 }, { "epoch": 0.03468448631811148, "grad_norm": 0.0, "learning_rate": 3.434e-06, "loss": 37.4576, "step": 17170 }, { "epoch": 0.0347046869507953, "grad_norm": 132.35975646972656, "learning_rate": 3.4360000000000006e-06, "loss": 31.2433, "step": 17180 }, { "epoch": 0.034724887583479115, "grad_norm": 479.4120178222656, "learning_rate": 3.4380000000000004e-06, "loss": 48.1209, "step": 17190 }, { "epoch": 0.03474508821616293, "grad_norm": 171.34060668945312, "learning_rate": 3.44e-06, "loss": 84.3219, "step": 17200 }, { "epoch": 0.03476528884884675, "grad_norm": 1929.4791259765625, "learning_rate": 3.4420000000000002e-06, "loss": 67.5582, "step": 17210 }, { "epoch": 0.03478548948153056, "grad_norm": 197.84034729003906, "learning_rate": 3.444e-06, "loss": 39.4922, "step": 17220 }, { "epoch": 0.034805690114214374, "grad_norm": 468.0546875, "learning_rate": 3.4460000000000005e-06, "loss": 29.5396, "step": 17230 }, { "epoch": 0.034825890746898194, "grad_norm": 153.83279418945312, "learning_rate": 3.4480000000000003e-06, "loss": 29.1, "step": 17240 }, { "epoch": 0.03484609137958201, "grad_norm": 277.006591796875, "learning_rate": 3.45e-06, "loss": 40.5101, "step": 17250 }, { "epoch": 0.03486629201226583, "grad_norm": 0.0, "learning_rate": 3.452e-06, "loss": 32.0646, "step": 17260 }, { "epoch": 0.03488649264494964, "grad_norm": 151.42449951171875, "learning_rate": 3.4540000000000003e-06, "loss": 48.2011, "step": 17270 }, { "epoch": 0.034906693277633453, "grad_norm": 410.3793640136719, "learning_rate": 3.4560000000000005e-06, "loss": 52.3885, "step": 17280 }, { "epoch": 0.034926893910317273, "grad_norm": 387.1470642089844, "learning_rate": 3.458e-06, "loss": 36.0569, "step": 17290 }, { "epoch": 0.03494709454300109, "grad_norm": 205.1016845703125, "learning_rate": 3.46e-06, "loss": 34.1463, "step": 17300 }, { "epoch": 0.0349672951756849, "grad_norm": 345.90338134765625, "learning_rate": 3.4620000000000005e-06, "loss": 47.2561, "step": 17310 }, { "epoch": 0.03498749580836872, "grad_norm": 290.951904296875, "learning_rate": 3.464e-06, "loss": 31.5661, "step": 17320 }, { "epoch": 0.03500769644105253, "grad_norm": 237.23367309570312, "learning_rate": 3.4660000000000004e-06, "loss": 43.0512, "step": 17330 }, { "epoch": 0.03502789707373635, "grad_norm": 419.7666320800781, "learning_rate": 3.468e-06, "loss": 27.4696, "step": 17340 }, { "epoch": 0.035048097706420166, "grad_norm": 598.3642578125, "learning_rate": 3.4700000000000002e-06, "loss": 25.1128, "step": 17350 }, { "epoch": 0.03506829833910398, "grad_norm": 516.69921875, "learning_rate": 3.4720000000000004e-06, "loss": 29.9179, "step": 17360 }, { "epoch": 0.0350884989717878, "grad_norm": 1076.3408203125, "learning_rate": 3.474e-06, "loss": 41.6534, "step": 17370 }, { "epoch": 0.03510869960447161, "grad_norm": 108.95550537109375, "learning_rate": 3.4760000000000007e-06, "loss": 13.1956, "step": 17380 }, { "epoch": 0.035128900237155425, "grad_norm": 326.40289306640625, "learning_rate": 3.4780000000000004e-06, "loss": 51.8279, "step": 17390 }, { "epoch": 0.035149100869839245, "grad_norm": 281.5604248046875, "learning_rate": 3.48e-06, "loss": 36.4133, "step": 17400 }, { "epoch": 0.03516930150252306, "grad_norm": 271.30865478515625, "learning_rate": 3.4820000000000003e-06, "loss": 27.5878, "step": 17410 }, { "epoch": 0.03518950213520688, "grad_norm": 580.0592651367188, "learning_rate": 3.484e-06, "loss": 60.9713, "step": 17420 }, { "epoch": 0.03520970276789069, "grad_norm": 392.9876708984375, "learning_rate": 3.4860000000000006e-06, "loss": 71.791, "step": 17430 }, { "epoch": 0.035229903400574504, "grad_norm": 274.8899841308594, "learning_rate": 3.4880000000000003e-06, "loss": 30.0427, "step": 17440 }, { "epoch": 0.035250104033258324, "grad_norm": 386.48516845703125, "learning_rate": 3.49e-06, "loss": 36.2768, "step": 17450 }, { "epoch": 0.03527030466594214, "grad_norm": 133.5108184814453, "learning_rate": 3.492e-06, "loss": 54.4602, "step": 17460 }, { "epoch": 0.03529050529862595, "grad_norm": 250.3053436279297, "learning_rate": 3.4940000000000003e-06, "loss": 61.0798, "step": 17470 }, { "epoch": 0.03531070593130977, "grad_norm": 529.8038330078125, "learning_rate": 3.4960000000000005e-06, "loss": 57.6145, "step": 17480 }, { "epoch": 0.03533090656399358, "grad_norm": 224.66844177246094, "learning_rate": 3.4980000000000002e-06, "loss": 70.1266, "step": 17490 }, { "epoch": 0.0353511071966774, "grad_norm": 360.2770080566406, "learning_rate": 3.5e-06, "loss": 29.7575, "step": 17500 }, { "epoch": 0.035371307829361216, "grad_norm": 338.8011474609375, "learning_rate": 3.5020000000000005e-06, "loss": 34.674, "step": 17510 }, { "epoch": 0.03539150846204503, "grad_norm": 96.25777435302734, "learning_rate": 3.5040000000000002e-06, "loss": 38.4156, "step": 17520 }, { "epoch": 0.03541170909472885, "grad_norm": 344.6164855957031, "learning_rate": 3.5060000000000004e-06, "loss": 29.9177, "step": 17530 }, { "epoch": 0.03543190972741266, "grad_norm": 179.86849975585938, "learning_rate": 3.508e-06, "loss": 41.1072, "step": 17540 }, { "epoch": 0.035452110360096475, "grad_norm": 198.28713989257812, "learning_rate": 3.5100000000000003e-06, "loss": 48.3304, "step": 17550 }, { "epoch": 0.035472310992780295, "grad_norm": 134.15838623046875, "learning_rate": 3.5120000000000004e-06, "loss": 20.6231, "step": 17560 }, { "epoch": 0.03549251162546411, "grad_norm": 172.27017211914062, "learning_rate": 3.514e-06, "loss": 36.7563, "step": 17570 }, { "epoch": 0.03551271225814793, "grad_norm": 379.590576171875, "learning_rate": 3.5160000000000007e-06, "loss": 43.2796, "step": 17580 }, { "epoch": 0.03553291289083174, "grad_norm": 375.34832763671875, "learning_rate": 3.5180000000000005e-06, "loss": 41.8047, "step": 17590 }, { "epoch": 0.035553113523515555, "grad_norm": 344.058837890625, "learning_rate": 3.52e-06, "loss": 38.5008, "step": 17600 }, { "epoch": 0.035573314156199375, "grad_norm": 1424.0748291015625, "learning_rate": 3.5220000000000003e-06, "loss": 74.2714, "step": 17610 }, { "epoch": 0.03559351478888319, "grad_norm": 591.5083618164062, "learning_rate": 3.524e-06, "loss": 46.3843, "step": 17620 }, { "epoch": 0.035613715421567, "grad_norm": 226.62847900390625, "learning_rate": 3.5260000000000006e-06, "loss": 29.5881, "step": 17630 }, { "epoch": 0.03563391605425082, "grad_norm": 182.4596710205078, "learning_rate": 3.5280000000000004e-06, "loss": 32.0225, "step": 17640 }, { "epoch": 0.035654116686934634, "grad_norm": 185.31124877929688, "learning_rate": 3.53e-06, "loss": 75.8514, "step": 17650 }, { "epoch": 0.035674317319618454, "grad_norm": 262.7769775390625, "learning_rate": 3.5320000000000002e-06, "loss": 15.7194, "step": 17660 }, { "epoch": 0.03569451795230227, "grad_norm": 173.9873504638672, "learning_rate": 3.5340000000000004e-06, "loss": 23.0087, "step": 17670 }, { "epoch": 0.03571471858498608, "grad_norm": 240.368408203125, "learning_rate": 3.5360000000000005e-06, "loss": 43.2808, "step": 17680 }, { "epoch": 0.0357349192176699, "grad_norm": 81.28392028808594, "learning_rate": 3.5380000000000003e-06, "loss": 33.6645, "step": 17690 }, { "epoch": 0.03575511985035371, "grad_norm": 42.356163024902344, "learning_rate": 3.54e-06, "loss": 39.296, "step": 17700 }, { "epoch": 0.035775320483037526, "grad_norm": 127.28237915039062, "learning_rate": 3.5420000000000006e-06, "loss": 40.4099, "step": 17710 }, { "epoch": 0.035795521115721346, "grad_norm": 197.6748504638672, "learning_rate": 3.5440000000000003e-06, "loss": 44.6826, "step": 17720 }, { "epoch": 0.03581572174840516, "grad_norm": 803.0872192382812, "learning_rate": 3.5460000000000004e-06, "loss": 49.538, "step": 17730 }, { "epoch": 0.03583592238108898, "grad_norm": 210.82821655273438, "learning_rate": 3.548e-06, "loss": 19.9511, "step": 17740 }, { "epoch": 0.03585612301377279, "grad_norm": 308.3215026855469, "learning_rate": 3.5500000000000003e-06, "loss": 52.9195, "step": 17750 }, { "epoch": 0.035876323646456605, "grad_norm": 180.1688232421875, "learning_rate": 3.5520000000000005e-06, "loss": 38.3887, "step": 17760 }, { "epoch": 0.035896524279140425, "grad_norm": 201.2501678466797, "learning_rate": 3.554e-06, "loss": 64.2966, "step": 17770 }, { "epoch": 0.03591672491182424, "grad_norm": 207.5270538330078, "learning_rate": 3.5560000000000008e-06, "loss": 34.003, "step": 17780 }, { "epoch": 0.03593692554450805, "grad_norm": 73.19796752929688, "learning_rate": 3.5580000000000005e-06, "loss": 20.9719, "step": 17790 }, { "epoch": 0.03595712617719187, "grad_norm": 296.2001037597656, "learning_rate": 3.5600000000000002e-06, "loss": 60.007, "step": 17800 }, { "epoch": 0.035977326809875684, "grad_norm": 141.45248413085938, "learning_rate": 3.5620000000000004e-06, "loss": 32.5284, "step": 17810 }, { "epoch": 0.035997527442559504, "grad_norm": 1157.9305419921875, "learning_rate": 3.564e-06, "loss": 49.5719, "step": 17820 }, { "epoch": 0.03601772807524332, "grad_norm": 334.35662841796875, "learning_rate": 3.566e-06, "loss": 38.4939, "step": 17830 }, { "epoch": 0.03603792870792713, "grad_norm": 325.7164611816406, "learning_rate": 3.5680000000000004e-06, "loss": 32.4559, "step": 17840 }, { "epoch": 0.03605812934061095, "grad_norm": 1370.3516845703125, "learning_rate": 3.57e-06, "loss": 70.2296, "step": 17850 }, { "epoch": 0.036078329973294763, "grad_norm": 216.684814453125, "learning_rate": 3.5720000000000003e-06, "loss": 27.2375, "step": 17860 }, { "epoch": 0.03609853060597858, "grad_norm": 298.2733459472656, "learning_rate": 3.5740000000000004e-06, "loss": 37.2959, "step": 17870 }, { "epoch": 0.0361187312386624, "grad_norm": 164.08706665039062, "learning_rate": 3.576e-06, "loss": 32.76, "step": 17880 }, { "epoch": 0.03613893187134621, "grad_norm": 392.2862854003906, "learning_rate": 3.5780000000000003e-06, "loss": 25.8495, "step": 17890 }, { "epoch": 0.03615913250403003, "grad_norm": 433.18853759765625, "learning_rate": 3.58e-06, "loss": 48.7175, "step": 17900 }, { "epoch": 0.03617933313671384, "grad_norm": 179.50746154785156, "learning_rate": 3.5820000000000006e-06, "loss": 32.113, "step": 17910 }, { "epoch": 0.036199533769397656, "grad_norm": 325.757568359375, "learning_rate": 3.5840000000000003e-06, "loss": 73.899, "step": 17920 }, { "epoch": 0.036219734402081476, "grad_norm": 260.6400146484375, "learning_rate": 3.586e-06, "loss": 39.0384, "step": 17930 }, { "epoch": 0.03623993503476529, "grad_norm": 263.32781982421875, "learning_rate": 3.588e-06, "loss": 47.6717, "step": 17940 }, { "epoch": 0.0362601356674491, "grad_norm": 277.36676025390625, "learning_rate": 3.5900000000000004e-06, "loss": 22.1398, "step": 17950 }, { "epoch": 0.03628033630013292, "grad_norm": 138.4691162109375, "learning_rate": 3.5920000000000005e-06, "loss": 31.8357, "step": 17960 }, { "epoch": 0.036300536932816735, "grad_norm": 1346.462158203125, "learning_rate": 3.5940000000000002e-06, "loss": 55.4747, "step": 17970 }, { "epoch": 0.036320737565500555, "grad_norm": 588.6998291015625, "learning_rate": 3.596e-06, "loss": 46.6048, "step": 17980 }, { "epoch": 0.03634093819818437, "grad_norm": 234.89425659179688, "learning_rate": 3.5980000000000005e-06, "loss": 47.1785, "step": 17990 }, { "epoch": 0.03636113883086818, "grad_norm": 705.8159790039062, "learning_rate": 3.6000000000000003e-06, "loss": 42.2352, "step": 18000 }, { "epoch": 0.036381339463552, "grad_norm": 297.0065612792969, "learning_rate": 3.6020000000000004e-06, "loss": 37.3573, "step": 18010 }, { "epoch": 0.036401540096235814, "grad_norm": 134.91839599609375, "learning_rate": 3.604e-06, "loss": 38.2756, "step": 18020 }, { "epoch": 0.03642174072891963, "grad_norm": 156.5696258544922, "learning_rate": 3.606e-06, "loss": 39.7929, "step": 18030 }, { "epoch": 0.03644194136160345, "grad_norm": 135.5779571533203, "learning_rate": 3.6080000000000004e-06, "loss": 28.3954, "step": 18040 }, { "epoch": 0.03646214199428726, "grad_norm": 330.52752685546875, "learning_rate": 3.61e-06, "loss": 19.5382, "step": 18050 }, { "epoch": 0.03648234262697108, "grad_norm": 236.32223510742188, "learning_rate": 3.6120000000000003e-06, "loss": 15.5008, "step": 18060 }, { "epoch": 0.03650254325965489, "grad_norm": 335.78741455078125, "learning_rate": 3.6140000000000005e-06, "loss": 37.0629, "step": 18070 }, { "epoch": 0.036522743892338706, "grad_norm": 323.3526916503906, "learning_rate": 3.616e-06, "loss": 41.3302, "step": 18080 }, { "epoch": 0.036542944525022526, "grad_norm": 233.77789306640625, "learning_rate": 3.6180000000000003e-06, "loss": 35.0574, "step": 18090 }, { "epoch": 0.03656314515770634, "grad_norm": 276.2250671386719, "learning_rate": 3.62e-06, "loss": 15.4496, "step": 18100 }, { "epoch": 0.03658334579039015, "grad_norm": 685.2402954101562, "learning_rate": 3.6220000000000006e-06, "loss": 93.1495, "step": 18110 }, { "epoch": 0.03660354642307397, "grad_norm": 795.9677734375, "learning_rate": 3.6240000000000004e-06, "loss": 46.3088, "step": 18120 }, { "epoch": 0.036623747055757785, "grad_norm": 173.06130981445312, "learning_rate": 3.626e-06, "loss": 54.6929, "step": 18130 }, { "epoch": 0.036643947688441605, "grad_norm": 653.5007934570312, "learning_rate": 3.6280000000000002e-06, "loss": 46.8532, "step": 18140 }, { "epoch": 0.03666414832112542, "grad_norm": 282.2478942871094, "learning_rate": 3.6300000000000004e-06, "loss": 40.0089, "step": 18150 }, { "epoch": 0.03668434895380923, "grad_norm": 331.37506103515625, "learning_rate": 3.6320000000000005e-06, "loss": 26.7366, "step": 18160 }, { "epoch": 0.03670454958649305, "grad_norm": 511.5520324707031, "learning_rate": 3.6340000000000003e-06, "loss": 31.3037, "step": 18170 }, { "epoch": 0.036724750219176865, "grad_norm": 44.16664505004883, "learning_rate": 3.636e-06, "loss": 10.1394, "step": 18180 }, { "epoch": 0.03674495085186068, "grad_norm": 114.39652252197266, "learning_rate": 3.6380000000000006e-06, "loss": 47.2661, "step": 18190 }, { "epoch": 0.0367651514845445, "grad_norm": 585.932373046875, "learning_rate": 3.6400000000000003e-06, "loss": 47.4173, "step": 18200 }, { "epoch": 0.03678535211722831, "grad_norm": 344.4446716308594, "learning_rate": 3.6420000000000005e-06, "loss": 43.2008, "step": 18210 }, { "epoch": 0.03680555274991213, "grad_norm": 193.8816375732422, "learning_rate": 3.644e-06, "loss": 16.2233, "step": 18220 }, { "epoch": 0.036825753382595944, "grad_norm": 372.8602294921875, "learning_rate": 3.646e-06, "loss": 28.3199, "step": 18230 }, { "epoch": 0.03684595401527976, "grad_norm": 395.961181640625, "learning_rate": 3.6480000000000005e-06, "loss": 32.6986, "step": 18240 }, { "epoch": 0.03686615464796358, "grad_norm": 179.2494659423828, "learning_rate": 3.65e-06, "loss": 33.1535, "step": 18250 }, { "epoch": 0.03688635528064739, "grad_norm": 140.24179077148438, "learning_rate": 3.6520000000000004e-06, "loss": 23.5548, "step": 18260 }, { "epoch": 0.0369065559133312, "grad_norm": 186.63511657714844, "learning_rate": 3.6540000000000005e-06, "loss": 43.1165, "step": 18270 }, { "epoch": 0.03692675654601502, "grad_norm": 361.3440246582031, "learning_rate": 3.6560000000000002e-06, "loss": 28.2619, "step": 18280 }, { "epoch": 0.036946957178698836, "grad_norm": 343.0387268066406, "learning_rate": 3.6580000000000004e-06, "loss": 48.4111, "step": 18290 }, { "epoch": 0.03696715781138265, "grad_norm": 113.34236145019531, "learning_rate": 3.66e-06, "loss": 29.8769, "step": 18300 }, { "epoch": 0.03698735844406647, "grad_norm": 809.7240600585938, "learning_rate": 3.6620000000000007e-06, "loss": 86.7366, "step": 18310 }, { "epoch": 0.03700755907675028, "grad_norm": 658.0784912109375, "learning_rate": 3.6640000000000004e-06, "loss": 49.2616, "step": 18320 }, { "epoch": 0.0370277597094341, "grad_norm": 373.86627197265625, "learning_rate": 3.666e-06, "loss": 39.0814, "step": 18330 }, { "epoch": 0.037047960342117915, "grad_norm": 615.6790161132812, "learning_rate": 3.6680000000000003e-06, "loss": 47.3284, "step": 18340 }, { "epoch": 0.03706816097480173, "grad_norm": 189.26522827148438, "learning_rate": 3.6700000000000004e-06, "loss": 25.9669, "step": 18350 }, { "epoch": 0.03708836160748555, "grad_norm": 463.74786376953125, "learning_rate": 3.6720000000000006e-06, "loss": 38.0474, "step": 18360 }, { "epoch": 0.03710856224016936, "grad_norm": 302.9651184082031, "learning_rate": 3.6740000000000003e-06, "loss": 34.4072, "step": 18370 }, { "epoch": 0.037128762872853174, "grad_norm": 354.4803161621094, "learning_rate": 3.676e-06, "loss": 34.9851, "step": 18380 }, { "epoch": 0.037148963505536994, "grad_norm": 258.1112365722656, "learning_rate": 3.6780000000000006e-06, "loss": 39.068, "step": 18390 }, { "epoch": 0.03716916413822081, "grad_norm": 469.5314636230469, "learning_rate": 3.6800000000000003e-06, "loss": 28.9282, "step": 18400 }, { "epoch": 0.03718936477090463, "grad_norm": 454.90411376953125, "learning_rate": 3.6820000000000005e-06, "loss": 28.2511, "step": 18410 }, { "epoch": 0.03720956540358844, "grad_norm": 1558.954345703125, "learning_rate": 3.6840000000000002e-06, "loss": 66.9195, "step": 18420 }, { "epoch": 0.037229766036272254, "grad_norm": 204.2699432373047, "learning_rate": 3.686e-06, "loss": 41.2852, "step": 18430 }, { "epoch": 0.037249966668956074, "grad_norm": 241.36293029785156, "learning_rate": 3.6880000000000005e-06, "loss": 25.3009, "step": 18440 }, { "epoch": 0.03727016730163989, "grad_norm": 360.6550598144531, "learning_rate": 3.6900000000000002e-06, "loss": 31.9733, "step": 18450 }, { "epoch": 0.0372903679343237, "grad_norm": 164.91815185546875, "learning_rate": 3.692e-06, "loss": 41.6198, "step": 18460 }, { "epoch": 0.03731056856700752, "grad_norm": 270.8958740234375, "learning_rate": 3.6940000000000005e-06, "loss": 34.7206, "step": 18470 }, { "epoch": 0.03733076919969133, "grad_norm": 142.66688537597656, "learning_rate": 3.6960000000000003e-06, "loss": 53.1157, "step": 18480 }, { "epoch": 0.03735096983237515, "grad_norm": 103.28597259521484, "learning_rate": 3.6980000000000004e-06, "loss": 36.4304, "step": 18490 }, { "epoch": 0.037371170465058966, "grad_norm": 283.4157409667969, "learning_rate": 3.7e-06, "loss": 49.9509, "step": 18500 }, { "epoch": 0.03739137109774278, "grad_norm": 236.04534912109375, "learning_rate": 3.702e-06, "loss": 25.3822, "step": 18510 }, { "epoch": 0.0374115717304266, "grad_norm": 605.6878051757812, "learning_rate": 3.7040000000000005e-06, "loss": 45.2703, "step": 18520 }, { "epoch": 0.03743177236311041, "grad_norm": 405.3072204589844, "learning_rate": 3.706e-06, "loss": 58.2901, "step": 18530 }, { "epoch": 0.037451972995794225, "grad_norm": 50.85472869873047, "learning_rate": 3.7080000000000003e-06, "loss": 34.8958, "step": 18540 }, { "epoch": 0.037472173628478045, "grad_norm": 141.93861389160156, "learning_rate": 3.7100000000000005e-06, "loss": 40.354, "step": 18550 }, { "epoch": 0.03749237426116186, "grad_norm": 161.23402404785156, "learning_rate": 3.712e-06, "loss": 39.1741, "step": 18560 }, { "epoch": 0.03751257489384568, "grad_norm": 369.1956787109375, "learning_rate": 3.7140000000000004e-06, "loss": 66.3072, "step": 18570 }, { "epoch": 0.03753277552652949, "grad_norm": 254.17889404296875, "learning_rate": 3.716e-06, "loss": 37.6095, "step": 18580 }, { "epoch": 0.037552976159213304, "grad_norm": 80.26202392578125, "learning_rate": 3.7180000000000007e-06, "loss": 36.8261, "step": 18590 }, { "epoch": 0.037573176791897124, "grad_norm": 220.4662322998047, "learning_rate": 3.7200000000000004e-06, "loss": 52.8877, "step": 18600 }, { "epoch": 0.03759337742458094, "grad_norm": 233.2047119140625, "learning_rate": 3.722e-06, "loss": 23.2824, "step": 18610 }, { "epoch": 0.03761357805726475, "grad_norm": 270.5653991699219, "learning_rate": 3.7240000000000003e-06, "loss": 31.3964, "step": 18620 }, { "epoch": 0.03763377868994857, "grad_norm": 267.2872314453125, "learning_rate": 3.726e-06, "loss": 42.1942, "step": 18630 }, { "epoch": 0.03765397932263238, "grad_norm": 108.20490264892578, "learning_rate": 3.7280000000000006e-06, "loss": 65.9927, "step": 18640 }, { "epoch": 0.0376741799553162, "grad_norm": 626.9059448242188, "learning_rate": 3.7300000000000003e-06, "loss": 53.3173, "step": 18650 }, { "epoch": 0.037694380588000016, "grad_norm": 302.4452819824219, "learning_rate": 3.732e-06, "loss": 50.0128, "step": 18660 }, { "epoch": 0.03771458122068383, "grad_norm": 431.94061279296875, "learning_rate": 3.7340000000000006e-06, "loss": 46.6632, "step": 18670 }, { "epoch": 0.03773478185336765, "grad_norm": 245.79000854492188, "learning_rate": 3.7360000000000003e-06, "loss": 32.4549, "step": 18680 }, { "epoch": 0.03775498248605146, "grad_norm": 111.78962707519531, "learning_rate": 3.7380000000000005e-06, "loss": 21.797, "step": 18690 }, { "epoch": 0.037775183118735275, "grad_norm": 243.8290557861328, "learning_rate": 3.74e-06, "loss": 19.0147, "step": 18700 }, { "epoch": 0.037795383751419095, "grad_norm": 132.75537109375, "learning_rate": 3.742e-06, "loss": 51.0273, "step": 18710 }, { "epoch": 0.03781558438410291, "grad_norm": 412.0173034667969, "learning_rate": 3.7440000000000005e-06, "loss": 58.325, "step": 18720 }, { "epoch": 0.03783578501678673, "grad_norm": 366.5070495605469, "learning_rate": 3.7460000000000002e-06, "loss": 29.5474, "step": 18730 }, { "epoch": 0.03785598564947054, "grad_norm": 338.2908020019531, "learning_rate": 3.7480000000000004e-06, "loss": 32.188, "step": 18740 }, { "epoch": 0.037876186282154355, "grad_norm": 180.07687377929688, "learning_rate": 3.7500000000000005e-06, "loss": 48.9227, "step": 18750 }, { "epoch": 0.037896386914838175, "grad_norm": 287.295654296875, "learning_rate": 3.7520000000000002e-06, "loss": 44.9424, "step": 18760 }, { "epoch": 0.03791658754752199, "grad_norm": 185.7970733642578, "learning_rate": 3.7540000000000004e-06, "loss": 20.7312, "step": 18770 }, { "epoch": 0.0379367881802058, "grad_norm": 232.04095458984375, "learning_rate": 3.756e-06, "loss": 35.24, "step": 18780 }, { "epoch": 0.03795698881288962, "grad_norm": 447.64984130859375, "learning_rate": 3.7580000000000007e-06, "loss": 33.0793, "step": 18790 }, { "epoch": 0.037977189445573434, "grad_norm": 252.61468505859375, "learning_rate": 3.7600000000000004e-06, "loss": 34.0651, "step": 18800 }, { "epoch": 0.037997390078257254, "grad_norm": 308.70257568359375, "learning_rate": 3.762e-06, "loss": 45.3956, "step": 18810 }, { "epoch": 0.03801759071094107, "grad_norm": 107.71540069580078, "learning_rate": 3.7640000000000003e-06, "loss": 48.4428, "step": 18820 }, { "epoch": 0.03803779134362488, "grad_norm": 96.10125732421875, "learning_rate": 3.766e-06, "loss": 23.3869, "step": 18830 }, { "epoch": 0.0380579919763087, "grad_norm": 525.4846801757812, "learning_rate": 3.7680000000000006e-06, "loss": 41.9808, "step": 18840 }, { "epoch": 0.03807819260899251, "grad_norm": 365.93792724609375, "learning_rate": 3.7700000000000003e-06, "loss": 35.5262, "step": 18850 }, { "epoch": 0.038098393241676326, "grad_norm": 52.49779510498047, "learning_rate": 3.772e-06, "loss": 22.7564, "step": 18860 }, { "epoch": 0.038118593874360146, "grad_norm": 230.902587890625, "learning_rate": 3.7740000000000006e-06, "loss": 44.9496, "step": 18870 }, { "epoch": 0.03813879450704396, "grad_norm": 283.4836120605469, "learning_rate": 3.7760000000000004e-06, "loss": 21.6097, "step": 18880 }, { "epoch": 0.03815899513972778, "grad_norm": 292.7654113769531, "learning_rate": 3.7780000000000005e-06, "loss": 41.9978, "step": 18890 }, { "epoch": 0.03817919577241159, "grad_norm": 609.5935668945312, "learning_rate": 3.7800000000000002e-06, "loss": 37.1636, "step": 18900 }, { "epoch": 0.038199396405095405, "grad_norm": 320.7429504394531, "learning_rate": 3.782e-06, "loss": 50.1103, "step": 18910 }, { "epoch": 0.038219597037779225, "grad_norm": 288.2169494628906, "learning_rate": 3.7840000000000005e-06, "loss": 44.5782, "step": 18920 }, { "epoch": 0.03823979767046304, "grad_norm": 626.2681884765625, "learning_rate": 3.7860000000000003e-06, "loss": 46.2624, "step": 18930 }, { "epoch": 0.03825999830314685, "grad_norm": 517.8096313476562, "learning_rate": 3.7880000000000004e-06, "loss": 30.11, "step": 18940 }, { "epoch": 0.03828019893583067, "grad_norm": 305.6544494628906, "learning_rate": 3.79e-06, "loss": 45.8447, "step": 18950 }, { "epoch": 0.038300399568514484, "grad_norm": 858.4501953125, "learning_rate": 3.7920000000000003e-06, "loss": 52.7129, "step": 18960 }, { "epoch": 0.038320600201198304, "grad_norm": 226.44557189941406, "learning_rate": 3.7940000000000004e-06, "loss": 41.7739, "step": 18970 }, { "epoch": 0.03834080083388212, "grad_norm": 164.94886779785156, "learning_rate": 3.796e-06, "loss": 33.123, "step": 18980 }, { "epoch": 0.03836100146656593, "grad_norm": 281.3467102050781, "learning_rate": 3.7980000000000007e-06, "loss": 40.324, "step": 18990 }, { "epoch": 0.03838120209924975, "grad_norm": 225.93795776367188, "learning_rate": 3.8000000000000005e-06, "loss": 41.4061, "step": 19000 }, { "epoch": 0.038401402731933564, "grad_norm": 244.1658172607422, "learning_rate": 3.802e-06, "loss": 11.2858, "step": 19010 }, { "epoch": 0.03842160336461738, "grad_norm": 226.78929138183594, "learning_rate": 3.8040000000000003e-06, "loss": 46.7068, "step": 19020 }, { "epoch": 0.0384418039973012, "grad_norm": 1077.2154541015625, "learning_rate": 3.806e-06, "loss": 53.4091, "step": 19030 }, { "epoch": 0.03846200462998501, "grad_norm": 291.53515625, "learning_rate": 3.8080000000000006e-06, "loss": 34.4839, "step": 19040 }, { "epoch": 0.03848220526266883, "grad_norm": 165.29818725585938, "learning_rate": 3.8100000000000004e-06, "loss": 26.9524, "step": 19050 }, { "epoch": 0.03850240589535264, "grad_norm": 1038.2501220703125, "learning_rate": 3.812e-06, "loss": 43.0698, "step": 19060 }, { "epoch": 0.038522606528036456, "grad_norm": 471.1520080566406, "learning_rate": 3.8140000000000007e-06, "loss": 35.13, "step": 19070 }, { "epoch": 0.038542807160720276, "grad_norm": 216.5349578857422, "learning_rate": 3.816e-06, "loss": 19.9845, "step": 19080 }, { "epoch": 0.03856300779340409, "grad_norm": 259.1549987792969, "learning_rate": 3.818e-06, "loss": 45.4686, "step": 19090 }, { "epoch": 0.0385832084260879, "grad_norm": 477.9991149902344, "learning_rate": 3.820000000000001e-06, "loss": 42.4486, "step": 19100 }, { "epoch": 0.03860340905877172, "grad_norm": 517.9622192382812, "learning_rate": 3.822e-06, "loss": 46.368, "step": 19110 }, { "epoch": 0.038623609691455535, "grad_norm": 248.01715087890625, "learning_rate": 3.824e-06, "loss": 33.1944, "step": 19120 }, { "epoch": 0.038643810324139355, "grad_norm": 464.5331726074219, "learning_rate": 3.826e-06, "loss": 28.8234, "step": 19130 }, { "epoch": 0.03866401095682317, "grad_norm": 316.1875305175781, "learning_rate": 3.8280000000000004e-06, "loss": 80.0554, "step": 19140 }, { "epoch": 0.03868421158950698, "grad_norm": 285.3381042480469, "learning_rate": 3.830000000000001e-06, "loss": 42.0916, "step": 19150 }, { "epoch": 0.0387044122221908, "grad_norm": 96.07527923583984, "learning_rate": 3.832e-06, "loss": 31.475, "step": 19160 }, { "epoch": 0.038724612854874614, "grad_norm": 209.89808654785156, "learning_rate": 3.834000000000001e-06, "loss": 18.6147, "step": 19170 }, { "epoch": 0.03874481348755843, "grad_norm": 112.06613159179688, "learning_rate": 3.836e-06, "loss": 41.1837, "step": 19180 }, { "epoch": 0.03876501412024225, "grad_norm": 317.4769287109375, "learning_rate": 3.838e-06, "loss": 24.4347, "step": 19190 }, { "epoch": 0.03878521475292606, "grad_norm": 225.33377075195312, "learning_rate": 3.8400000000000005e-06, "loss": 50.5251, "step": 19200 }, { "epoch": 0.03880541538560988, "grad_norm": 681.8043212890625, "learning_rate": 3.842e-06, "loss": 36.7092, "step": 19210 }, { "epoch": 0.03882561601829369, "grad_norm": 200.1357421875, "learning_rate": 3.844000000000001e-06, "loss": 27.7072, "step": 19220 }, { "epoch": 0.038845816650977506, "grad_norm": 232.6201934814453, "learning_rate": 3.846e-06, "loss": 26.3682, "step": 19230 }, { "epoch": 0.038866017283661326, "grad_norm": 284.7813720703125, "learning_rate": 3.848e-06, "loss": 47.8822, "step": 19240 }, { "epoch": 0.03888621791634514, "grad_norm": 383.19256591796875, "learning_rate": 3.85e-06, "loss": 59.0909, "step": 19250 }, { "epoch": 0.03890641854902895, "grad_norm": 0.0, "learning_rate": 3.8520000000000006e-06, "loss": 23.762, "step": 19260 }, { "epoch": 0.03892661918171277, "grad_norm": 593.6499633789062, "learning_rate": 3.854000000000001e-06, "loss": 37.7503, "step": 19270 }, { "epoch": 0.038946819814396585, "grad_norm": 264.9753112792969, "learning_rate": 3.856e-06, "loss": 36.3482, "step": 19280 }, { "epoch": 0.038967020447080405, "grad_norm": 91.10636138916016, "learning_rate": 3.858e-06, "loss": 24.6681, "step": 19290 }, { "epoch": 0.03898722107976422, "grad_norm": 320.2542724609375, "learning_rate": 3.86e-06, "loss": 54.1367, "step": 19300 }, { "epoch": 0.03900742171244803, "grad_norm": 239.6652069091797, "learning_rate": 3.8620000000000005e-06, "loss": 40.8294, "step": 19310 }, { "epoch": 0.03902762234513185, "grad_norm": 142.2595977783203, "learning_rate": 3.864000000000001e-06, "loss": 30.216, "step": 19320 }, { "epoch": 0.039047822977815665, "grad_norm": 303.9486083984375, "learning_rate": 3.866e-06, "loss": 38.39, "step": 19330 }, { "epoch": 0.03906802361049948, "grad_norm": 333.3627624511719, "learning_rate": 3.868e-06, "loss": 26.5589, "step": 19340 }, { "epoch": 0.0390882242431833, "grad_norm": 53.61131286621094, "learning_rate": 3.87e-06, "loss": 24.1422, "step": 19350 }, { "epoch": 0.03910842487586711, "grad_norm": 289.007080078125, "learning_rate": 3.872e-06, "loss": 37.2298, "step": 19360 }, { "epoch": 0.03912862550855093, "grad_norm": 287.193359375, "learning_rate": 3.8740000000000005e-06, "loss": 51.0063, "step": 19370 }, { "epoch": 0.039148826141234744, "grad_norm": 469.2152404785156, "learning_rate": 3.876000000000001e-06, "loss": 35.0327, "step": 19380 }, { "epoch": 0.03916902677391856, "grad_norm": 320.2119140625, "learning_rate": 3.878e-06, "loss": 39.3611, "step": 19390 }, { "epoch": 0.03918922740660238, "grad_norm": 195.95912170410156, "learning_rate": 3.88e-06, "loss": 49.972, "step": 19400 }, { "epoch": 0.03920942803928619, "grad_norm": 1175.9329833984375, "learning_rate": 3.882e-06, "loss": 54.6578, "step": 19410 }, { "epoch": 0.03922962867197, "grad_norm": 141.44297790527344, "learning_rate": 3.884e-06, "loss": 40.9149, "step": 19420 }, { "epoch": 0.03924982930465382, "grad_norm": 155.56942749023438, "learning_rate": 3.8860000000000006e-06, "loss": 30.3314, "step": 19430 }, { "epoch": 0.039270029937337636, "grad_norm": 519.1924438476562, "learning_rate": 3.888e-06, "loss": 56.5439, "step": 19440 }, { "epoch": 0.039290230570021456, "grad_norm": 239.65721130371094, "learning_rate": 3.89e-06, "loss": 18.4781, "step": 19450 }, { "epoch": 0.03931043120270527, "grad_norm": 151.64952087402344, "learning_rate": 3.892e-06, "loss": 32.0155, "step": 19460 }, { "epoch": 0.03933063183538908, "grad_norm": 422.05474853515625, "learning_rate": 3.894e-06, "loss": 40.1132, "step": 19470 }, { "epoch": 0.0393508324680729, "grad_norm": 191.34115600585938, "learning_rate": 3.8960000000000005e-06, "loss": 32.5095, "step": 19480 }, { "epoch": 0.039371033100756715, "grad_norm": 364.09027099609375, "learning_rate": 3.898e-06, "loss": 29.6174, "step": 19490 }, { "epoch": 0.03939123373344053, "grad_norm": 545.3656005859375, "learning_rate": 3.900000000000001e-06, "loss": 60.0512, "step": 19500 }, { "epoch": 0.03941143436612435, "grad_norm": 255.3214569091797, "learning_rate": 3.902e-06, "loss": 40.6808, "step": 19510 }, { "epoch": 0.03943163499880816, "grad_norm": 0.0, "learning_rate": 3.904e-06, "loss": 27.8718, "step": 19520 }, { "epoch": 0.03945183563149198, "grad_norm": 545.8305053710938, "learning_rate": 3.906e-06, "loss": 24.319, "step": 19530 }, { "epoch": 0.039472036264175794, "grad_norm": 407.67645263671875, "learning_rate": 3.9080000000000005e-06, "loss": 39.4998, "step": 19540 }, { "epoch": 0.03949223689685961, "grad_norm": 402.3353271484375, "learning_rate": 3.910000000000001e-06, "loss": 37.6578, "step": 19550 }, { "epoch": 0.03951243752954343, "grad_norm": 445.6011962890625, "learning_rate": 3.912e-06, "loss": 13.8341, "step": 19560 }, { "epoch": 0.03953263816222724, "grad_norm": 472.3326110839844, "learning_rate": 3.914000000000001e-06, "loss": 37.3775, "step": 19570 }, { "epoch": 0.039552838794911054, "grad_norm": 294.15313720703125, "learning_rate": 3.916e-06, "loss": 58.8191, "step": 19580 }, { "epoch": 0.039573039427594874, "grad_norm": 235.44285583496094, "learning_rate": 3.9180000000000004e-06, "loss": 25.5726, "step": 19590 }, { "epoch": 0.03959324006027869, "grad_norm": 177.14979553222656, "learning_rate": 3.920000000000001e-06, "loss": 30.3149, "step": 19600 }, { "epoch": 0.03961344069296251, "grad_norm": 402.8600769042969, "learning_rate": 3.922e-06, "loss": 73.4992, "step": 19610 }, { "epoch": 0.03963364132564632, "grad_norm": 366.7654724121094, "learning_rate": 3.924000000000001e-06, "loss": 32.7771, "step": 19620 }, { "epoch": 0.03965384195833013, "grad_norm": 274.6293029785156, "learning_rate": 3.926e-06, "loss": 30.0265, "step": 19630 }, { "epoch": 0.03967404259101395, "grad_norm": 284.4570007324219, "learning_rate": 3.928e-06, "loss": 45.13, "step": 19640 }, { "epoch": 0.039694243223697766, "grad_norm": 297.3571472167969, "learning_rate": 3.9300000000000005e-06, "loss": 42.0707, "step": 19650 }, { "epoch": 0.03971444385638158, "grad_norm": 138.06423950195312, "learning_rate": 3.932000000000001e-06, "loss": 26.141, "step": 19660 }, { "epoch": 0.0397346444890654, "grad_norm": 228.03729248046875, "learning_rate": 3.934000000000001e-06, "loss": 54.6825, "step": 19670 }, { "epoch": 0.03975484512174921, "grad_norm": 1055.2249755859375, "learning_rate": 3.936e-06, "loss": 42.8682, "step": 19680 }, { "epoch": 0.03977504575443303, "grad_norm": 754.5296630859375, "learning_rate": 3.938e-06, "loss": 31.198, "step": 19690 }, { "epoch": 0.039795246387116845, "grad_norm": 162.82290649414062, "learning_rate": 3.94e-06, "loss": 31.4654, "step": 19700 }, { "epoch": 0.03981544701980066, "grad_norm": 218.39324951171875, "learning_rate": 3.9420000000000005e-06, "loss": 44.2964, "step": 19710 }, { "epoch": 0.03983564765248448, "grad_norm": 477.2374572753906, "learning_rate": 3.944e-06, "loss": 37.4174, "step": 19720 }, { "epoch": 0.03985584828516829, "grad_norm": 147.75048828125, "learning_rate": 3.946e-06, "loss": 36.2973, "step": 19730 }, { "epoch": 0.039876048917852104, "grad_norm": 326.9672546386719, "learning_rate": 3.948e-06, "loss": 49.3826, "step": 19740 }, { "epoch": 0.039896249550535924, "grad_norm": 533.1397094726562, "learning_rate": 3.95e-06, "loss": 27.5203, "step": 19750 }, { "epoch": 0.03991645018321974, "grad_norm": 180.598876953125, "learning_rate": 3.9520000000000004e-06, "loss": 28.9483, "step": 19760 }, { "epoch": 0.03993665081590356, "grad_norm": 232.1848602294922, "learning_rate": 3.954e-06, "loss": 23.4742, "step": 19770 }, { "epoch": 0.03995685144858737, "grad_norm": 228.30003356933594, "learning_rate": 3.956000000000001e-06, "loss": 20.7568, "step": 19780 }, { "epoch": 0.03997705208127118, "grad_norm": 302.8138122558594, "learning_rate": 3.958e-06, "loss": 28.9807, "step": 19790 }, { "epoch": 0.039997252713955, "grad_norm": 259.61859130859375, "learning_rate": 3.96e-06, "loss": 35.4427, "step": 19800 }, { "epoch": 0.040017453346638816, "grad_norm": 313.1875, "learning_rate": 3.962e-06, "loss": 41.8897, "step": 19810 }, { "epoch": 0.04003765397932263, "grad_norm": 242.72906494140625, "learning_rate": 3.964e-06, "loss": 33.5848, "step": 19820 }, { "epoch": 0.04005785461200645, "grad_norm": 214.87872314453125, "learning_rate": 3.966000000000001e-06, "loss": 47.5657, "step": 19830 }, { "epoch": 0.04007805524469026, "grad_norm": 604.9654541015625, "learning_rate": 3.968e-06, "loss": 41.5319, "step": 19840 }, { "epoch": 0.04009825587737408, "grad_norm": 210.2468719482422, "learning_rate": 3.97e-06, "loss": 42.5415, "step": 19850 }, { "epoch": 0.040118456510057895, "grad_norm": 387.0686950683594, "learning_rate": 3.972e-06, "loss": 62.9873, "step": 19860 }, { "epoch": 0.04013865714274171, "grad_norm": 15.22915267944336, "learning_rate": 3.974e-06, "loss": 14.7855, "step": 19870 }, { "epoch": 0.04015885777542553, "grad_norm": 208.395263671875, "learning_rate": 3.9760000000000006e-06, "loss": 24.5231, "step": 19880 }, { "epoch": 0.04017905840810934, "grad_norm": 517.0321655273438, "learning_rate": 3.978e-06, "loss": 25.0212, "step": 19890 }, { "epoch": 0.040199259040793155, "grad_norm": 262.9371032714844, "learning_rate": 3.980000000000001e-06, "loss": 47.0449, "step": 19900 }, { "epoch": 0.040219459673476975, "grad_norm": 286.5919494628906, "learning_rate": 3.982e-06, "loss": 57.3217, "step": 19910 }, { "epoch": 0.04023966030616079, "grad_norm": 294.7496032714844, "learning_rate": 3.984e-06, "loss": 25.2327, "step": 19920 }, { "epoch": 0.04025986093884461, "grad_norm": 184.26539611816406, "learning_rate": 3.9860000000000005e-06, "loss": 59.7884, "step": 19930 }, { "epoch": 0.04028006157152842, "grad_norm": 470.822509765625, "learning_rate": 3.988000000000001e-06, "loss": 26.862, "step": 19940 }, { "epoch": 0.040300262204212234, "grad_norm": 194.01406860351562, "learning_rate": 3.990000000000001e-06, "loss": 37.1457, "step": 19950 }, { "epoch": 0.040320462836896054, "grad_norm": 187.3774871826172, "learning_rate": 3.992e-06, "loss": 24.7482, "step": 19960 }, { "epoch": 0.04034066346957987, "grad_norm": 1887.197265625, "learning_rate": 3.994e-06, "loss": 40.298, "step": 19970 }, { "epoch": 0.04036086410226368, "grad_norm": 173.4396514892578, "learning_rate": 3.996e-06, "loss": 43.1376, "step": 19980 }, { "epoch": 0.0403810647349475, "grad_norm": 233.09146118164062, "learning_rate": 3.9980000000000005e-06, "loss": 25.7777, "step": 19990 }, { "epoch": 0.04040126536763131, "grad_norm": 180.28233337402344, "learning_rate": 4.000000000000001e-06, "loss": 27.7832, "step": 20000 }, { "epoch": 0.04042146600031513, "grad_norm": 299.1110534667969, "learning_rate": 4.002e-06, "loss": 34.6171, "step": 20010 }, { "epoch": 0.040441666632998946, "grad_norm": 148.36790466308594, "learning_rate": 4.004e-06, "loss": 23.9772, "step": 20020 }, { "epoch": 0.04046186726568276, "grad_norm": 244.75901794433594, "learning_rate": 4.006e-06, "loss": 35.057, "step": 20030 }, { "epoch": 0.04048206789836658, "grad_norm": 373.9143981933594, "learning_rate": 4.008e-06, "loss": 43.9603, "step": 20040 }, { "epoch": 0.04050226853105039, "grad_norm": 181.92359924316406, "learning_rate": 4.0100000000000006e-06, "loss": 40.7404, "step": 20050 }, { "epoch": 0.040522469163734205, "grad_norm": 125.74781036376953, "learning_rate": 4.012000000000001e-06, "loss": 56.9312, "step": 20060 }, { "epoch": 0.040542669796418025, "grad_norm": 236.4709930419922, "learning_rate": 4.014e-06, "loss": 46.1195, "step": 20070 }, { "epoch": 0.04056287042910184, "grad_norm": 422.9813232421875, "learning_rate": 4.016e-06, "loss": 30.1763, "step": 20080 }, { "epoch": 0.04058307106178566, "grad_norm": 456.4159851074219, "learning_rate": 4.018e-06, "loss": 39.7835, "step": 20090 }, { "epoch": 0.04060327169446947, "grad_norm": 18.531490325927734, "learning_rate": 4.0200000000000005e-06, "loss": 20.453, "step": 20100 }, { "epoch": 0.040623472327153284, "grad_norm": 153.16604614257812, "learning_rate": 4.022000000000001e-06, "loss": 32.1924, "step": 20110 }, { "epoch": 0.040643672959837104, "grad_norm": 1214.3763427734375, "learning_rate": 4.024e-06, "loss": 41.2925, "step": 20120 }, { "epoch": 0.04066387359252092, "grad_norm": 230.2184295654297, "learning_rate": 4.026e-06, "loss": 39.5028, "step": 20130 }, { "epoch": 0.04068407422520473, "grad_norm": 389.66790771484375, "learning_rate": 4.028e-06, "loss": 42.375, "step": 20140 }, { "epoch": 0.04070427485788855, "grad_norm": 101.8477554321289, "learning_rate": 4.03e-06, "loss": 37.2737, "step": 20150 }, { "epoch": 0.040724475490572364, "grad_norm": 723.5213623046875, "learning_rate": 4.0320000000000005e-06, "loss": 56.4912, "step": 20160 }, { "epoch": 0.040744676123256184, "grad_norm": 85.57526397705078, "learning_rate": 4.034e-06, "loss": 36.6772, "step": 20170 }, { "epoch": 0.04076487675594, "grad_norm": 316.2557678222656, "learning_rate": 4.036000000000001e-06, "loss": 47.8334, "step": 20180 }, { "epoch": 0.04078507738862381, "grad_norm": 386.5885314941406, "learning_rate": 4.038e-06, "loss": 40.2421, "step": 20190 }, { "epoch": 0.04080527802130763, "grad_norm": 204.37364196777344, "learning_rate": 4.04e-06, "loss": 37.975, "step": 20200 }, { "epoch": 0.04082547865399144, "grad_norm": 338.7021789550781, "learning_rate": 4.0420000000000004e-06, "loss": 35.5396, "step": 20210 }, { "epoch": 0.040845679286675256, "grad_norm": 986.453857421875, "learning_rate": 4.044e-06, "loss": 43.4821, "step": 20220 }, { "epoch": 0.040865879919359076, "grad_norm": 68.52318572998047, "learning_rate": 4.046000000000001e-06, "loss": 18.0724, "step": 20230 }, { "epoch": 0.04088608055204289, "grad_norm": 468.6248474121094, "learning_rate": 4.048e-06, "loss": 28.5707, "step": 20240 }, { "epoch": 0.04090628118472671, "grad_norm": 644.6000366210938, "learning_rate": 4.05e-06, "loss": 39.6615, "step": 20250 }, { "epoch": 0.04092648181741052, "grad_norm": 287.26025390625, "learning_rate": 4.052e-06, "loss": 50.3263, "step": 20260 }, { "epoch": 0.040946682450094335, "grad_norm": 647.2701416015625, "learning_rate": 4.0540000000000005e-06, "loss": 30.8261, "step": 20270 }, { "epoch": 0.040966883082778155, "grad_norm": 458.3521423339844, "learning_rate": 4.056000000000001e-06, "loss": 50.49, "step": 20280 }, { "epoch": 0.04098708371546197, "grad_norm": 237.10055541992188, "learning_rate": 4.058e-06, "loss": 44.8675, "step": 20290 }, { "epoch": 0.04100728434814578, "grad_norm": 402.80694580078125, "learning_rate": 4.060000000000001e-06, "loss": 30.175, "step": 20300 }, { "epoch": 0.0410274849808296, "grad_norm": 393.222900390625, "learning_rate": 4.062e-06, "loss": 42.801, "step": 20310 }, { "epoch": 0.041047685613513414, "grad_norm": 556.2861328125, "learning_rate": 4.064e-06, "loss": 36.0398, "step": 20320 }, { "epoch": 0.041067886246197234, "grad_norm": 253.1947479248047, "learning_rate": 4.0660000000000005e-06, "loss": 41.7622, "step": 20330 }, { "epoch": 0.04108808687888105, "grad_norm": 388.42388916015625, "learning_rate": 4.068000000000001e-06, "loss": 41.8073, "step": 20340 }, { "epoch": 0.04110828751156486, "grad_norm": 107.13079071044922, "learning_rate": 4.07e-06, "loss": 42.0569, "step": 20350 }, { "epoch": 0.04112848814424868, "grad_norm": 231.18760681152344, "learning_rate": 4.072e-06, "loss": 30.3361, "step": 20360 }, { "epoch": 0.04114868877693249, "grad_norm": 170.3965606689453, "learning_rate": 4.074e-06, "loss": 51.8707, "step": 20370 }, { "epoch": 0.041168889409616306, "grad_norm": 620.3305053710938, "learning_rate": 4.0760000000000004e-06, "loss": 60.2367, "step": 20380 }, { "epoch": 0.041189090042300126, "grad_norm": 195.74493408203125, "learning_rate": 4.078000000000001e-06, "loss": 25.1182, "step": 20390 }, { "epoch": 0.04120929067498394, "grad_norm": 331.8030700683594, "learning_rate": 4.08e-06, "loss": 37.5133, "step": 20400 }, { "epoch": 0.04122949130766776, "grad_norm": 455.5628356933594, "learning_rate": 4.082e-06, "loss": 36.1817, "step": 20410 }, { "epoch": 0.04124969194035157, "grad_norm": 310.407958984375, "learning_rate": 4.084e-06, "loss": 27.2958, "step": 20420 }, { "epoch": 0.041269892573035385, "grad_norm": 335.69842529296875, "learning_rate": 4.086e-06, "loss": 47.8984, "step": 20430 }, { "epoch": 0.041290093205719205, "grad_norm": 379.6198425292969, "learning_rate": 4.0880000000000005e-06, "loss": 44.3755, "step": 20440 }, { "epoch": 0.04131029383840302, "grad_norm": 163.5937042236328, "learning_rate": 4.09e-06, "loss": 24.2145, "step": 20450 }, { "epoch": 0.04133049447108683, "grad_norm": 272.02838134765625, "learning_rate": 4.092000000000001e-06, "loss": 25.5929, "step": 20460 }, { "epoch": 0.04135069510377065, "grad_norm": 97.86607360839844, "learning_rate": 4.094e-06, "loss": 34.764, "step": 20470 }, { "epoch": 0.041370895736454465, "grad_norm": 679.9865112304688, "learning_rate": 4.096e-06, "loss": 44.7761, "step": 20480 }, { "epoch": 0.041391096369138285, "grad_norm": 208.69107055664062, "learning_rate": 4.098e-06, "loss": 54.0235, "step": 20490 }, { "epoch": 0.0414112970018221, "grad_norm": 361.1912841796875, "learning_rate": 4.1e-06, "loss": 67.8025, "step": 20500 }, { "epoch": 0.04143149763450591, "grad_norm": 134.15077209472656, "learning_rate": 4.102000000000001e-06, "loss": 17.8437, "step": 20510 }, { "epoch": 0.04145169826718973, "grad_norm": 405.97833251953125, "learning_rate": 4.104e-06, "loss": 32.6409, "step": 20520 }, { "epoch": 0.041471898899873544, "grad_norm": 265.6661682128906, "learning_rate": 4.106e-06, "loss": 35.7546, "step": 20530 }, { "epoch": 0.04149209953255736, "grad_norm": 149.30752563476562, "learning_rate": 4.108e-06, "loss": 30.4391, "step": 20540 }, { "epoch": 0.04151230016524118, "grad_norm": 705.0855712890625, "learning_rate": 4.1100000000000005e-06, "loss": 38.2185, "step": 20550 }, { "epoch": 0.04153250079792499, "grad_norm": 206.291748046875, "learning_rate": 4.112000000000001e-06, "loss": 51.6587, "step": 20560 }, { "epoch": 0.04155270143060881, "grad_norm": 219.1594696044922, "learning_rate": 4.114e-06, "loss": 26.1015, "step": 20570 }, { "epoch": 0.04157290206329262, "grad_norm": 230.81301879882812, "learning_rate": 4.116000000000001e-06, "loss": 23.8538, "step": 20580 }, { "epoch": 0.041593102695976436, "grad_norm": 557.5405883789062, "learning_rate": 4.118e-06, "loss": 42.3369, "step": 20590 }, { "epoch": 0.041613303328660256, "grad_norm": 212.0529022216797, "learning_rate": 4.12e-06, "loss": 50.0415, "step": 20600 }, { "epoch": 0.04163350396134407, "grad_norm": 275.3895263671875, "learning_rate": 4.1220000000000005e-06, "loss": 29.3245, "step": 20610 }, { "epoch": 0.04165370459402788, "grad_norm": 270.3549499511719, "learning_rate": 4.124e-06, "loss": 35.1507, "step": 20620 }, { "epoch": 0.0416739052267117, "grad_norm": 164.70761108398438, "learning_rate": 4.126000000000001e-06, "loss": 45.2891, "step": 20630 }, { "epoch": 0.041694105859395515, "grad_norm": 162.1063232421875, "learning_rate": 4.128e-06, "loss": 46.8911, "step": 20640 }, { "epoch": 0.041714306492079335, "grad_norm": 265.4299011230469, "learning_rate": 4.13e-06, "loss": 30.3315, "step": 20650 }, { "epoch": 0.04173450712476315, "grad_norm": 111.43751525878906, "learning_rate": 4.132e-06, "loss": 20.5942, "step": 20660 }, { "epoch": 0.04175470775744696, "grad_norm": 324.6489562988281, "learning_rate": 4.1340000000000006e-06, "loss": 24.4084, "step": 20670 }, { "epoch": 0.04177490839013078, "grad_norm": 273.3630065917969, "learning_rate": 4.136000000000001e-06, "loss": 31.9613, "step": 20680 }, { "epoch": 0.041795109022814594, "grad_norm": 263.85955810546875, "learning_rate": 4.138e-06, "loss": 31.6124, "step": 20690 }, { "epoch": 0.04181530965549841, "grad_norm": 346.3900451660156, "learning_rate": 4.14e-06, "loss": 37.1704, "step": 20700 }, { "epoch": 0.04183551028818223, "grad_norm": 366.6315002441406, "learning_rate": 4.142e-06, "loss": 28.5815, "step": 20710 }, { "epoch": 0.04185571092086604, "grad_norm": 206.1935272216797, "learning_rate": 4.1440000000000005e-06, "loss": 30.2259, "step": 20720 }, { "epoch": 0.04187591155354986, "grad_norm": 236.19627380371094, "learning_rate": 4.146000000000001e-06, "loss": 26.418, "step": 20730 }, { "epoch": 0.041896112186233674, "grad_norm": 804.4663696289062, "learning_rate": 4.148000000000001e-06, "loss": 61.5932, "step": 20740 }, { "epoch": 0.04191631281891749, "grad_norm": 399.11236572265625, "learning_rate": 4.15e-06, "loss": 47.6362, "step": 20750 }, { "epoch": 0.04193651345160131, "grad_norm": 271.60321044921875, "learning_rate": 4.152e-06, "loss": 42.8877, "step": 20760 }, { "epoch": 0.04195671408428512, "grad_norm": 190.75071716308594, "learning_rate": 4.154e-06, "loss": 38.686, "step": 20770 }, { "epoch": 0.04197691471696893, "grad_norm": 105.6722640991211, "learning_rate": 4.1560000000000005e-06, "loss": 31.446, "step": 20780 }, { "epoch": 0.04199711534965275, "grad_norm": 289.6357421875, "learning_rate": 4.158000000000001e-06, "loss": 39.2534, "step": 20790 }, { "epoch": 0.042017315982336566, "grad_norm": 147.4788818359375, "learning_rate": 4.16e-06, "loss": 36.1763, "step": 20800 }, { "epoch": 0.042037516615020386, "grad_norm": 233.79751586914062, "learning_rate": 4.162e-06, "loss": 42.5497, "step": 20810 }, { "epoch": 0.0420577172477042, "grad_norm": 256.3533630371094, "learning_rate": 4.164e-06, "loss": 22.9329, "step": 20820 }, { "epoch": 0.04207791788038801, "grad_norm": 216.10049438476562, "learning_rate": 4.1660000000000004e-06, "loss": 44.9921, "step": 20830 }, { "epoch": 0.04209811851307183, "grad_norm": 319.9030456542969, "learning_rate": 4.168000000000001e-06, "loss": 26.3658, "step": 20840 }, { "epoch": 0.042118319145755645, "grad_norm": 329.2654113769531, "learning_rate": 4.17e-06, "loss": 42.2898, "step": 20850 }, { "epoch": 0.04213851977843946, "grad_norm": 198.0332489013672, "learning_rate": 4.172000000000001e-06, "loss": 35.2678, "step": 20860 }, { "epoch": 0.04215872041112328, "grad_norm": 231.90435791015625, "learning_rate": 4.174e-06, "loss": 27.8649, "step": 20870 }, { "epoch": 0.04217892104380709, "grad_norm": 403.4598388671875, "learning_rate": 4.176e-06, "loss": 50.7998, "step": 20880 }, { "epoch": 0.04219912167649091, "grad_norm": 291.4205017089844, "learning_rate": 4.1780000000000005e-06, "loss": 46.2099, "step": 20890 }, { "epoch": 0.042219322309174724, "grad_norm": 306.63055419921875, "learning_rate": 4.18e-06, "loss": 30.5958, "step": 20900 }, { "epoch": 0.04223952294185854, "grad_norm": 555.853515625, "learning_rate": 4.182000000000001e-06, "loss": 35.6532, "step": 20910 }, { "epoch": 0.04225972357454236, "grad_norm": 183.94618225097656, "learning_rate": 4.184e-06, "loss": 31.8897, "step": 20920 }, { "epoch": 0.04227992420722617, "grad_norm": 149.83074951171875, "learning_rate": 4.186e-06, "loss": 38.0345, "step": 20930 }, { "epoch": 0.04230012483990998, "grad_norm": 118.59393310546875, "learning_rate": 4.188e-06, "loss": 16.5319, "step": 20940 }, { "epoch": 0.0423203254725938, "grad_norm": 175.36904907226562, "learning_rate": 4.1900000000000005e-06, "loss": 20.8217, "step": 20950 }, { "epoch": 0.042340526105277616, "grad_norm": 308.11614990234375, "learning_rate": 4.192000000000001e-06, "loss": 36.8492, "step": 20960 }, { "epoch": 0.042360726737961436, "grad_norm": 520.227294921875, "learning_rate": 4.194e-06, "loss": 24.6954, "step": 20970 }, { "epoch": 0.04238092737064525, "grad_norm": 168.68067932128906, "learning_rate": 4.196e-06, "loss": 52.7015, "step": 20980 }, { "epoch": 0.04240112800332906, "grad_norm": 223.79693603515625, "learning_rate": 4.198e-06, "loss": 39.5725, "step": 20990 }, { "epoch": 0.04242132863601288, "grad_norm": 495.4461975097656, "learning_rate": 4.2000000000000004e-06, "loss": 37.8309, "step": 21000 }, { "epoch": 0.042441529268696696, "grad_norm": 274.291259765625, "learning_rate": 4.202000000000001e-06, "loss": 29.7146, "step": 21010 }, { "epoch": 0.04246172990138051, "grad_norm": 109.29052734375, "learning_rate": 4.204e-06, "loss": 42.5909, "step": 21020 }, { "epoch": 0.04248193053406433, "grad_norm": 396.0324401855469, "learning_rate": 4.206e-06, "loss": 38.3959, "step": 21030 }, { "epoch": 0.04250213116674814, "grad_norm": 520.785400390625, "learning_rate": 4.208e-06, "loss": 38.8013, "step": 21040 }, { "epoch": 0.04252233179943196, "grad_norm": 302.38604736328125, "learning_rate": 4.21e-06, "loss": 38.9812, "step": 21050 }, { "epoch": 0.042542532432115775, "grad_norm": 134.0892333984375, "learning_rate": 4.2120000000000005e-06, "loss": 29.6942, "step": 21060 }, { "epoch": 0.04256273306479959, "grad_norm": 342.7864685058594, "learning_rate": 4.214000000000001e-06, "loss": 30.0566, "step": 21070 }, { "epoch": 0.04258293369748341, "grad_norm": 223.21559143066406, "learning_rate": 4.216e-06, "loss": 40.129, "step": 21080 }, { "epoch": 0.04260313433016722, "grad_norm": 116.55540466308594, "learning_rate": 4.218e-06, "loss": 31.4512, "step": 21090 }, { "epoch": 0.042623334962851034, "grad_norm": 98.7749252319336, "learning_rate": 4.22e-06, "loss": 19.5832, "step": 21100 }, { "epoch": 0.042643535595534854, "grad_norm": 640.8407592773438, "learning_rate": 4.222e-06, "loss": 29.6755, "step": 21110 }, { "epoch": 0.04266373622821867, "grad_norm": 403.4369812011719, "learning_rate": 4.2240000000000006e-06, "loss": 33.7782, "step": 21120 }, { "epoch": 0.04268393686090249, "grad_norm": 287.1921081542969, "learning_rate": 4.226e-06, "loss": 22.9665, "step": 21130 }, { "epoch": 0.0427041374935863, "grad_norm": 321.8796691894531, "learning_rate": 4.228000000000001e-06, "loss": 27.9206, "step": 21140 }, { "epoch": 0.04272433812627011, "grad_norm": 338.6152038574219, "learning_rate": 4.23e-06, "loss": 39.3508, "step": 21150 }, { "epoch": 0.04274453875895393, "grad_norm": 272.7464904785156, "learning_rate": 4.232e-06, "loss": 33.3479, "step": 21160 }, { "epoch": 0.042764739391637746, "grad_norm": 286.60662841796875, "learning_rate": 4.2340000000000005e-06, "loss": 32.9905, "step": 21170 }, { "epoch": 0.04278494002432156, "grad_norm": 349.53533935546875, "learning_rate": 4.236e-06, "loss": 28.7499, "step": 21180 }, { "epoch": 0.04280514065700538, "grad_norm": 142.68948364257812, "learning_rate": 4.238000000000001e-06, "loss": 21.5154, "step": 21190 }, { "epoch": 0.04282534128968919, "grad_norm": 251.5746307373047, "learning_rate": 4.24e-06, "loss": 46.1122, "step": 21200 }, { "epoch": 0.04284554192237301, "grad_norm": 70.00657653808594, "learning_rate": 4.242e-06, "loss": 32.7837, "step": 21210 }, { "epoch": 0.042865742555056825, "grad_norm": 280.9669494628906, "learning_rate": 4.244e-06, "loss": 67.4386, "step": 21220 }, { "epoch": 0.04288594318774064, "grad_norm": 388.27288818359375, "learning_rate": 4.2460000000000005e-06, "loss": 53.6686, "step": 21230 }, { "epoch": 0.04290614382042446, "grad_norm": 425.47283935546875, "learning_rate": 4.248000000000001e-06, "loss": 42.1568, "step": 21240 }, { "epoch": 0.04292634445310827, "grad_norm": 123.08135223388672, "learning_rate": 4.25e-06, "loss": 31.9323, "step": 21250 }, { "epoch": 0.042946545085792084, "grad_norm": 419.52117919921875, "learning_rate": 4.252000000000001e-06, "loss": 32.1854, "step": 21260 }, { "epoch": 0.042966745718475904, "grad_norm": 123.61571502685547, "learning_rate": 4.254e-06, "loss": 44.1942, "step": 21270 }, { "epoch": 0.04298694635115972, "grad_norm": 684.3426513671875, "learning_rate": 4.256e-06, "loss": 41.342, "step": 21280 }, { "epoch": 0.04300714698384353, "grad_norm": 292.04412841796875, "learning_rate": 4.2580000000000006e-06, "loss": 43.7548, "step": 21290 }, { "epoch": 0.04302734761652735, "grad_norm": 433.35906982421875, "learning_rate": 4.26e-06, "loss": 33.365, "step": 21300 }, { "epoch": 0.043047548249211164, "grad_norm": 497.30364990234375, "learning_rate": 4.262000000000001e-06, "loss": 38.0372, "step": 21310 }, { "epoch": 0.043067748881894984, "grad_norm": 220.22071838378906, "learning_rate": 4.264e-06, "loss": 41.565, "step": 21320 }, { "epoch": 0.0430879495145788, "grad_norm": 228.95925903320312, "learning_rate": 4.266e-06, "loss": 28.0936, "step": 21330 }, { "epoch": 0.04310815014726261, "grad_norm": 290.1015930175781, "learning_rate": 4.2680000000000005e-06, "loss": 39.5108, "step": 21340 }, { "epoch": 0.04312835077994643, "grad_norm": 156.27627563476562, "learning_rate": 4.270000000000001e-06, "loss": 25.5435, "step": 21350 }, { "epoch": 0.04314855141263024, "grad_norm": 278.43896484375, "learning_rate": 4.272000000000001e-06, "loss": 30.0249, "step": 21360 }, { "epoch": 0.043168752045314056, "grad_norm": 208.07400512695312, "learning_rate": 4.274e-06, "loss": 38.6718, "step": 21370 }, { "epoch": 0.043188952677997876, "grad_norm": 236.41712951660156, "learning_rate": 4.276e-06, "loss": 30.3445, "step": 21380 }, { "epoch": 0.04320915331068169, "grad_norm": 189.0663299560547, "learning_rate": 4.278e-06, "loss": 55.6279, "step": 21390 }, { "epoch": 0.04322935394336551, "grad_norm": 336.48468017578125, "learning_rate": 4.2800000000000005e-06, "loss": 29.7341, "step": 21400 }, { "epoch": 0.04324955457604932, "grad_norm": 248.12747192382812, "learning_rate": 4.282000000000001e-06, "loss": 37.267, "step": 21410 }, { "epoch": 0.043269755208733135, "grad_norm": 127.51718139648438, "learning_rate": 4.284e-06, "loss": 47.6077, "step": 21420 }, { "epoch": 0.043289955841416955, "grad_norm": 164.20303344726562, "learning_rate": 4.286e-06, "loss": 46.6643, "step": 21430 }, { "epoch": 0.04331015647410077, "grad_norm": 188.94960021972656, "learning_rate": 4.288e-06, "loss": 29.428, "step": 21440 }, { "epoch": 0.04333035710678458, "grad_norm": 419.154296875, "learning_rate": 4.2900000000000004e-06, "loss": 28.4682, "step": 21450 }, { "epoch": 0.0433505577394684, "grad_norm": 520.4407958984375, "learning_rate": 4.292000000000001e-06, "loss": 50.1902, "step": 21460 }, { "epoch": 0.043370758372152214, "grad_norm": 432.8997497558594, "learning_rate": 4.294000000000001e-06, "loss": 31.0926, "step": 21470 }, { "epoch": 0.043390959004836034, "grad_norm": 120.74382019042969, "learning_rate": 4.296e-06, "loss": 37.4484, "step": 21480 }, { "epoch": 0.04341115963751985, "grad_norm": 279.5553283691406, "learning_rate": 4.298e-06, "loss": 50.7712, "step": 21490 }, { "epoch": 0.04343136027020366, "grad_norm": 33.08868408203125, "learning_rate": 4.3e-06, "loss": 46.8983, "step": 21500 }, { "epoch": 0.04345156090288748, "grad_norm": 435.3478088378906, "learning_rate": 4.3020000000000005e-06, "loss": 25.8913, "step": 21510 }, { "epoch": 0.04347176153557129, "grad_norm": 842.780517578125, "learning_rate": 4.304000000000001e-06, "loss": 85.3646, "step": 21520 }, { "epoch": 0.043491962168255106, "grad_norm": 349.991455078125, "learning_rate": 4.306e-06, "loss": 31.8702, "step": 21530 }, { "epoch": 0.043512162800938926, "grad_norm": 271.3420104980469, "learning_rate": 4.308000000000001e-06, "loss": 45.9639, "step": 21540 }, { "epoch": 0.04353236343362274, "grad_norm": 260.9608154296875, "learning_rate": 4.31e-06, "loss": 31.7394, "step": 21550 }, { "epoch": 0.04355256406630656, "grad_norm": 207.44227600097656, "learning_rate": 4.312e-06, "loss": 19.6438, "step": 21560 }, { "epoch": 0.04357276469899037, "grad_norm": 57.47203063964844, "learning_rate": 4.3140000000000005e-06, "loss": 34.4854, "step": 21570 }, { "epoch": 0.043592965331674186, "grad_norm": 391.72833251953125, "learning_rate": 4.316e-06, "loss": 46.3629, "step": 21580 }, { "epoch": 0.043613165964358006, "grad_norm": 594.5343627929688, "learning_rate": 4.318000000000001e-06, "loss": 35.9651, "step": 21590 }, { "epoch": 0.04363336659704182, "grad_norm": 274.3625793457031, "learning_rate": 4.32e-06, "loss": 34.0643, "step": 21600 }, { "epoch": 0.04365356722972563, "grad_norm": 380.2254943847656, "learning_rate": 4.322e-06, "loss": 38.6491, "step": 21610 }, { "epoch": 0.04367376786240945, "grad_norm": 157.87857055664062, "learning_rate": 4.3240000000000004e-06, "loss": 56.405, "step": 21620 }, { "epoch": 0.043693968495093265, "grad_norm": 228.67489624023438, "learning_rate": 4.326000000000001e-06, "loss": 49.5288, "step": 21630 }, { "epoch": 0.043714169127777085, "grad_norm": 221.23074340820312, "learning_rate": 4.328000000000001e-06, "loss": 36.981, "step": 21640 }, { "epoch": 0.0437343697604609, "grad_norm": 90.14576721191406, "learning_rate": 4.33e-06, "loss": 41.8231, "step": 21650 }, { "epoch": 0.04375457039314471, "grad_norm": 211.10719299316406, "learning_rate": 4.332e-06, "loss": 44.7344, "step": 21660 }, { "epoch": 0.04377477102582853, "grad_norm": 0.0, "learning_rate": 4.334e-06, "loss": 23.928, "step": 21670 }, { "epoch": 0.043794971658512344, "grad_norm": 1181.165771484375, "learning_rate": 4.3360000000000005e-06, "loss": 48.592, "step": 21680 }, { "epoch": 0.04381517229119616, "grad_norm": 353.056884765625, "learning_rate": 4.338000000000001e-06, "loss": 43.0181, "step": 21690 }, { "epoch": 0.04383537292387998, "grad_norm": 395.79608154296875, "learning_rate": 4.34e-06, "loss": 53.2542, "step": 21700 }, { "epoch": 0.04385557355656379, "grad_norm": 294.8266296386719, "learning_rate": 4.342e-06, "loss": 38.2674, "step": 21710 }, { "epoch": 0.04387577418924761, "grad_norm": 188.2626953125, "learning_rate": 4.344e-06, "loss": 27.6681, "step": 21720 }, { "epoch": 0.04389597482193142, "grad_norm": 518.6006469726562, "learning_rate": 4.346e-06, "loss": 50.1186, "step": 21730 }, { "epoch": 0.043916175454615236, "grad_norm": 304.2127380371094, "learning_rate": 4.3480000000000006e-06, "loss": 44.4629, "step": 21740 }, { "epoch": 0.043936376087299056, "grad_norm": 539.2412109375, "learning_rate": 4.350000000000001e-06, "loss": 41.0213, "step": 21750 }, { "epoch": 0.04395657671998287, "grad_norm": 463.7889709472656, "learning_rate": 4.352e-06, "loss": 38.7556, "step": 21760 }, { "epoch": 0.04397677735266668, "grad_norm": 161.10955810546875, "learning_rate": 4.354e-06, "loss": 40.5037, "step": 21770 }, { "epoch": 0.0439969779853505, "grad_norm": 140.80426025390625, "learning_rate": 4.356e-06, "loss": 38.4569, "step": 21780 }, { "epoch": 0.044017178618034315, "grad_norm": 207.1370849609375, "learning_rate": 4.3580000000000005e-06, "loss": 34.5314, "step": 21790 }, { "epoch": 0.044037379250718135, "grad_norm": 253.5378875732422, "learning_rate": 4.360000000000001e-06, "loss": 39.3193, "step": 21800 }, { "epoch": 0.04405757988340195, "grad_norm": 108.22684478759766, "learning_rate": 4.362e-06, "loss": 23.1773, "step": 21810 }, { "epoch": 0.04407778051608576, "grad_norm": 425.2170104980469, "learning_rate": 4.364e-06, "loss": 29.889, "step": 21820 }, { "epoch": 0.04409798114876958, "grad_norm": 706.4791870117188, "learning_rate": 4.366e-06, "loss": 39.8734, "step": 21830 }, { "epoch": 0.044118181781453394, "grad_norm": 236.09535217285156, "learning_rate": 4.368e-06, "loss": 24.5879, "step": 21840 }, { "epoch": 0.04413838241413721, "grad_norm": 411.9450378417969, "learning_rate": 4.3700000000000005e-06, "loss": 39.2007, "step": 21850 }, { "epoch": 0.04415858304682103, "grad_norm": 348.68603515625, "learning_rate": 4.372e-06, "loss": 28.8362, "step": 21860 }, { "epoch": 0.04417878367950484, "grad_norm": 189.90162658691406, "learning_rate": 4.374000000000001e-06, "loss": 27.3795, "step": 21870 }, { "epoch": 0.04419898431218866, "grad_norm": 339.9339904785156, "learning_rate": 4.376e-06, "loss": 54.8887, "step": 21880 }, { "epoch": 0.044219184944872474, "grad_norm": 380.402099609375, "learning_rate": 4.378e-06, "loss": 25.3555, "step": 21890 }, { "epoch": 0.04423938557755629, "grad_norm": 377.67132568359375, "learning_rate": 4.38e-06, "loss": 25.6329, "step": 21900 }, { "epoch": 0.04425958621024011, "grad_norm": 246.21377563476562, "learning_rate": 4.382e-06, "loss": 19.7301, "step": 21910 }, { "epoch": 0.04427978684292392, "grad_norm": 190.0476531982422, "learning_rate": 4.384000000000001e-06, "loss": 28.2195, "step": 21920 }, { "epoch": 0.04429998747560773, "grad_norm": 166.9833984375, "learning_rate": 4.386e-06, "loss": 24.1486, "step": 21930 }, { "epoch": 0.04432018810829155, "grad_norm": 77.43944549560547, "learning_rate": 4.388e-06, "loss": 22.8599, "step": 21940 }, { "epoch": 0.044340388740975366, "grad_norm": 1095.1483154296875, "learning_rate": 4.39e-06, "loss": 53.9709, "step": 21950 }, { "epoch": 0.044360589373659186, "grad_norm": 165.46905517578125, "learning_rate": 4.3920000000000005e-06, "loss": 18.0595, "step": 21960 }, { "epoch": 0.044380790006343, "grad_norm": 486.6594543457031, "learning_rate": 4.394000000000001e-06, "loss": 71.8171, "step": 21970 }, { "epoch": 0.04440099063902681, "grad_norm": 72.8458023071289, "learning_rate": 4.396e-06, "loss": 65.0912, "step": 21980 }, { "epoch": 0.04442119127171063, "grad_norm": 237.4270477294922, "learning_rate": 4.398000000000001e-06, "loss": 44.7832, "step": 21990 }, { "epoch": 0.044441391904394445, "grad_norm": 73.35201263427734, "learning_rate": 4.4e-06, "loss": 23.8745, "step": 22000 }, { "epoch": 0.04446159253707826, "grad_norm": 401.0522766113281, "learning_rate": 4.402e-06, "loss": 39.8531, "step": 22010 }, { "epoch": 0.04448179316976208, "grad_norm": 1233.225830078125, "learning_rate": 4.4040000000000005e-06, "loss": 47.4299, "step": 22020 }, { "epoch": 0.04450199380244589, "grad_norm": 439.6102294921875, "learning_rate": 4.406000000000001e-06, "loss": 44.3661, "step": 22030 }, { "epoch": 0.04452219443512971, "grad_norm": 547.6974487304688, "learning_rate": 4.408000000000001e-06, "loss": 31.4482, "step": 22040 }, { "epoch": 0.044542395067813524, "grad_norm": 462.153076171875, "learning_rate": 4.41e-06, "loss": 46.3094, "step": 22050 }, { "epoch": 0.04456259570049734, "grad_norm": 514.1149291992188, "learning_rate": 4.412e-06, "loss": 33.3876, "step": 22060 }, { "epoch": 0.04458279633318116, "grad_norm": 239.8423309326172, "learning_rate": 4.4140000000000004e-06, "loss": 48.2995, "step": 22070 }, { "epoch": 0.04460299696586497, "grad_norm": 205.03448486328125, "learning_rate": 4.416000000000001e-06, "loss": 35.4499, "step": 22080 }, { "epoch": 0.04462319759854878, "grad_norm": 301.2530212402344, "learning_rate": 4.418000000000001e-06, "loss": 36.846, "step": 22090 }, { "epoch": 0.0446433982312326, "grad_norm": 161.7266082763672, "learning_rate": 4.42e-06, "loss": 26.2522, "step": 22100 }, { "epoch": 0.044663598863916416, "grad_norm": 326.98480224609375, "learning_rate": 4.422e-06, "loss": 31.52, "step": 22110 }, { "epoch": 0.044683799496600236, "grad_norm": 437.6254577636719, "learning_rate": 4.424e-06, "loss": 19.189, "step": 22120 }, { "epoch": 0.04470400012928405, "grad_norm": 61.288272857666016, "learning_rate": 4.4260000000000005e-06, "loss": 20.0697, "step": 22130 }, { "epoch": 0.04472420076196786, "grad_norm": 176.42001342773438, "learning_rate": 4.428000000000001e-06, "loss": 35.582, "step": 22140 }, { "epoch": 0.04474440139465168, "grad_norm": 538.8167724609375, "learning_rate": 4.430000000000001e-06, "loss": 37.036, "step": 22150 }, { "epoch": 0.044764602027335496, "grad_norm": 238.05960083007812, "learning_rate": 4.432e-06, "loss": 38.2989, "step": 22160 }, { "epoch": 0.04478480266001931, "grad_norm": 362.31927490234375, "learning_rate": 4.434e-06, "loss": 31.1769, "step": 22170 }, { "epoch": 0.04480500329270313, "grad_norm": 496.01898193359375, "learning_rate": 4.436e-06, "loss": 28.2128, "step": 22180 }, { "epoch": 0.04482520392538694, "grad_norm": 193.86314392089844, "learning_rate": 4.438e-06, "loss": 23.9492, "step": 22190 }, { "epoch": 0.04484540455807076, "grad_norm": 318.4892883300781, "learning_rate": 4.440000000000001e-06, "loss": 68.4978, "step": 22200 }, { "epoch": 0.044865605190754575, "grad_norm": 0.0, "learning_rate": 4.442e-06, "loss": 43.5876, "step": 22210 }, { "epoch": 0.04488580582343839, "grad_norm": 324.33563232421875, "learning_rate": 4.444e-06, "loss": 25.8551, "step": 22220 }, { "epoch": 0.04490600645612221, "grad_norm": 0.0, "learning_rate": 4.446e-06, "loss": 36.7645, "step": 22230 }, { "epoch": 0.04492620708880602, "grad_norm": 240.1009063720703, "learning_rate": 4.4480000000000004e-06, "loss": 26.5525, "step": 22240 }, { "epoch": 0.044946407721489834, "grad_norm": 107.0228500366211, "learning_rate": 4.450000000000001e-06, "loss": 15.311, "step": 22250 }, { "epoch": 0.044966608354173654, "grad_norm": 250.37899780273438, "learning_rate": 4.452e-06, "loss": 19.0226, "step": 22260 }, { "epoch": 0.04498680898685747, "grad_norm": 408.0428466796875, "learning_rate": 4.454000000000001e-06, "loss": 35.7738, "step": 22270 }, { "epoch": 0.04500700961954129, "grad_norm": 497.276611328125, "learning_rate": 4.456e-06, "loss": 50.8318, "step": 22280 }, { "epoch": 0.0450272102522251, "grad_norm": 828.6140747070312, "learning_rate": 4.458e-06, "loss": 64.4539, "step": 22290 }, { "epoch": 0.04504741088490891, "grad_norm": 229.49667358398438, "learning_rate": 4.4600000000000005e-06, "loss": 30.9743, "step": 22300 }, { "epoch": 0.04506761151759273, "grad_norm": 166.751953125, "learning_rate": 4.462e-06, "loss": 24.7609, "step": 22310 }, { "epoch": 0.045087812150276546, "grad_norm": 321.2261657714844, "learning_rate": 4.464000000000001e-06, "loss": 50.5056, "step": 22320 }, { "epoch": 0.04510801278296036, "grad_norm": 189.7879638671875, "learning_rate": 4.466e-06, "loss": 35.26, "step": 22330 }, { "epoch": 0.04512821341564418, "grad_norm": 265.6359558105469, "learning_rate": 4.468e-06, "loss": 29.7004, "step": 22340 }, { "epoch": 0.04514841404832799, "grad_norm": 202.7790069580078, "learning_rate": 4.47e-06, "loss": 29.928, "step": 22350 }, { "epoch": 0.04516861468101181, "grad_norm": 326.2535095214844, "learning_rate": 4.4720000000000006e-06, "loss": 26.8285, "step": 22360 }, { "epoch": 0.045188815313695625, "grad_norm": 251.0867462158203, "learning_rate": 4.474000000000001e-06, "loss": 28.2856, "step": 22370 }, { "epoch": 0.04520901594637944, "grad_norm": 463.17926025390625, "learning_rate": 4.476e-06, "loss": 37.4765, "step": 22380 }, { "epoch": 0.04522921657906326, "grad_norm": 163.80303955078125, "learning_rate": 4.478e-06, "loss": 54.5256, "step": 22390 }, { "epoch": 0.04524941721174707, "grad_norm": 276.209228515625, "learning_rate": 4.48e-06, "loss": 64.2981, "step": 22400 }, { "epoch": 0.045269617844430884, "grad_norm": 460.2864990234375, "learning_rate": 4.4820000000000005e-06, "loss": 35.9574, "step": 22410 }, { "epoch": 0.045289818477114704, "grad_norm": 284.27789306640625, "learning_rate": 4.484000000000001e-06, "loss": 37.4327, "step": 22420 }, { "epoch": 0.04531001910979852, "grad_norm": 414.3496398925781, "learning_rate": 4.486000000000001e-06, "loss": 46.6762, "step": 22430 }, { "epoch": 0.04533021974248234, "grad_norm": 368.27386474609375, "learning_rate": 4.488e-06, "loss": 49.7956, "step": 22440 }, { "epoch": 0.04535042037516615, "grad_norm": 172.6141815185547, "learning_rate": 4.49e-06, "loss": 33.3679, "step": 22450 }, { "epoch": 0.045370621007849964, "grad_norm": 346.7876892089844, "learning_rate": 4.492e-06, "loss": 31.3557, "step": 22460 }, { "epoch": 0.045390821640533784, "grad_norm": 860.9100341796875, "learning_rate": 4.4940000000000005e-06, "loss": 44.6085, "step": 22470 }, { "epoch": 0.0454110222732176, "grad_norm": 557.9060668945312, "learning_rate": 4.496000000000001e-06, "loss": 62.5046, "step": 22480 }, { "epoch": 0.04543122290590141, "grad_norm": 284.54559326171875, "learning_rate": 4.498e-06, "loss": 33.3931, "step": 22490 }, { "epoch": 0.04545142353858523, "grad_norm": 426.3665466308594, "learning_rate": 4.5e-06, "loss": 31.1432, "step": 22500 }, { "epoch": 0.04547162417126904, "grad_norm": 445.5400085449219, "learning_rate": 4.502e-06, "loss": 33.4698, "step": 22510 }, { "epoch": 0.04549182480395286, "grad_norm": 338.7800598144531, "learning_rate": 4.504e-06, "loss": 47.3292, "step": 22520 }, { "epoch": 0.045512025436636676, "grad_norm": 173.97119140625, "learning_rate": 4.5060000000000006e-06, "loss": 44.0834, "step": 22530 }, { "epoch": 0.04553222606932049, "grad_norm": 486.3160400390625, "learning_rate": 4.508e-06, "loss": 42.7409, "step": 22540 }, { "epoch": 0.04555242670200431, "grad_norm": 233.2928924560547, "learning_rate": 4.510000000000001e-06, "loss": 27.2495, "step": 22550 }, { "epoch": 0.04557262733468812, "grad_norm": 176.17970275878906, "learning_rate": 4.512e-06, "loss": 32.9823, "step": 22560 }, { "epoch": 0.045592827967371935, "grad_norm": 146.0012969970703, "learning_rate": 4.514e-06, "loss": 65.5993, "step": 22570 }, { "epoch": 0.045613028600055755, "grad_norm": 1499.24951171875, "learning_rate": 4.5160000000000005e-06, "loss": 41.9914, "step": 22580 }, { "epoch": 0.04563322923273957, "grad_norm": 288.2677917480469, "learning_rate": 4.518e-06, "loss": 36.5384, "step": 22590 }, { "epoch": 0.04565342986542339, "grad_norm": 125.41619873046875, "learning_rate": 4.520000000000001e-06, "loss": 17.3458, "step": 22600 }, { "epoch": 0.0456736304981072, "grad_norm": 190.7774658203125, "learning_rate": 4.522e-06, "loss": 33.4024, "step": 22610 }, { "epoch": 0.045693831130791014, "grad_norm": 141.61460876464844, "learning_rate": 4.524e-06, "loss": 41.1783, "step": 22620 }, { "epoch": 0.045714031763474834, "grad_norm": 381.7168884277344, "learning_rate": 4.526e-06, "loss": 42.1798, "step": 22630 }, { "epoch": 0.04573423239615865, "grad_norm": 215.71304321289062, "learning_rate": 4.5280000000000005e-06, "loss": 39.072, "step": 22640 }, { "epoch": 0.04575443302884246, "grad_norm": 262.1174011230469, "learning_rate": 4.530000000000001e-06, "loss": 28.3565, "step": 22650 }, { "epoch": 0.04577463366152628, "grad_norm": 820.0463256835938, "learning_rate": 4.532e-06, "loss": 20.1159, "step": 22660 }, { "epoch": 0.04579483429421009, "grad_norm": 133.69586181640625, "learning_rate": 4.534000000000001e-06, "loss": 27.6988, "step": 22670 }, { "epoch": 0.04581503492689391, "grad_norm": 500.9422912597656, "learning_rate": 4.536e-06, "loss": 43.5426, "step": 22680 }, { "epoch": 0.045835235559577726, "grad_norm": 231.44622802734375, "learning_rate": 4.5380000000000004e-06, "loss": 48.5581, "step": 22690 }, { "epoch": 0.04585543619226154, "grad_norm": 51.6878662109375, "learning_rate": 4.540000000000001e-06, "loss": 45.099, "step": 22700 }, { "epoch": 0.04587563682494536, "grad_norm": 428.0320739746094, "learning_rate": 4.542e-06, "loss": 46.1896, "step": 22710 }, { "epoch": 0.04589583745762917, "grad_norm": 276.8353271484375, "learning_rate": 4.544000000000001e-06, "loss": 53.6931, "step": 22720 }, { "epoch": 0.045916038090312986, "grad_norm": 207.6174774169922, "learning_rate": 4.546e-06, "loss": 38.4589, "step": 22730 }, { "epoch": 0.045936238722996806, "grad_norm": 288.7878112792969, "learning_rate": 4.548e-06, "loss": 20.045, "step": 22740 }, { "epoch": 0.04595643935568062, "grad_norm": 286.56524658203125, "learning_rate": 4.5500000000000005e-06, "loss": 31.5989, "step": 22750 }, { "epoch": 0.04597663998836444, "grad_norm": 162.22901916503906, "learning_rate": 4.552000000000001e-06, "loss": 39.6249, "step": 22760 }, { "epoch": 0.04599684062104825, "grad_norm": 80.63327026367188, "learning_rate": 4.554000000000001e-06, "loss": 26.3245, "step": 22770 }, { "epoch": 0.046017041253732065, "grad_norm": 214.0733642578125, "learning_rate": 4.556e-06, "loss": 32.5252, "step": 22780 }, { "epoch": 0.046037241886415885, "grad_norm": 284.2151184082031, "learning_rate": 4.558e-06, "loss": 27.7922, "step": 22790 }, { "epoch": 0.0460574425190997, "grad_norm": 321.2484130859375, "learning_rate": 4.56e-06, "loss": 44.2803, "step": 22800 }, { "epoch": 0.04607764315178351, "grad_norm": 273.5098876953125, "learning_rate": 4.5620000000000005e-06, "loss": 29.518, "step": 22810 }, { "epoch": 0.04609784378446733, "grad_norm": 347.76922607421875, "learning_rate": 4.564e-06, "loss": 31.0171, "step": 22820 }, { "epoch": 0.046118044417151144, "grad_norm": 159.8347930908203, "learning_rate": 4.566000000000001e-06, "loss": 42.2288, "step": 22830 }, { "epoch": 0.046138245049834964, "grad_norm": 556.943359375, "learning_rate": 4.568e-06, "loss": 44.7816, "step": 22840 }, { "epoch": 0.04615844568251878, "grad_norm": 98.52095794677734, "learning_rate": 4.57e-06, "loss": 31.4136, "step": 22850 }, { "epoch": 0.04617864631520259, "grad_norm": 495.7933654785156, "learning_rate": 4.5720000000000004e-06, "loss": 42.1625, "step": 22860 }, { "epoch": 0.04619884694788641, "grad_norm": 695.1187133789062, "learning_rate": 4.574e-06, "loss": 33.75, "step": 22870 }, { "epoch": 0.04621904758057022, "grad_norm": 390.0658874511719, "learning_rate": 4.576000000000001e-06, "loss": 41.6841, "step": 22880 }, { "epoch": 0.046239248213254036, "grad_norm": 176.88433837890625, "learning_rate": 4.578e-06, "loss": 35.6987, "step": 22890 }, { "epoch": 0.046259448845937856, "grad_norm": 300.9981689453125, "learning_rate": 4.58e-06, "loss": 28.2005, "step": 22900 }, { "epoch": 0.04627964947862167, "grad_norm": 331.2634582519531, "learning_rate": 4.582e-06, "loss": 46.8877, "step": 22910 }, { "epoch": 0.04629985011130549, "grad_norm": 222.0252685546875, "learning_rate": 4.5840000000000005e-06, "loss": 39.4623, "step": 22920 }, { "epoch": 0.0463200507439893, "grad_norm": 448.3998107910156, "learning_rate": 4.586000000000001e-06, "loss": 46.6825, "step": 22930 }, { "epoch": 0.046340251376673115, "grad_norm": 439.2770690917969, "learning_rate": 4.588e-06, "loss": 22.501, "step": 22940 }, { "epoch": 0.046360452009356935, "grad_norm": 316.9297790527344, "learning_rate": 4.590000000000001e-06, "loss": 32.2445, "step": 22950 }, { "epoch": 0.04638065264204075, "grad_norm": 340.693115234375, "learning_rate": 4.592e-06, "loss": 52.5817, "step": 22960 }, { "epoch": 0.04640085327472456, "grad_norm": 202.79469299316406, "learning_rate": 4.594e-06, "loss": 31.8135, "step": 22970 }, { "epoch": 0.04642105390740838, "grad_norm": 185.64169311523438, "learning_rate": 4.5960000000000006e-06, "loss": 33.0467, "step": 22980 }, { "epoch": 0.046441254540092194, "grad_norm": 204.25270080566406, "learning_rate": 4.598e-06, "loss": 34.204, "step": 22990 }, { "epoch": 0.046461455172776014, "grad_norm": 352.415771484375, "learning_rate": 4.600000000000001e-06, "loss": 40.1466, "step": 23000 }, { "epoch": 0.04648165580545983, "grad_norm": 327.9806213378906, "learning_rate": 4.602e-06, "loss": 38.3975, "step": 23010 }, { "epoch": 0.04650185643814364, "grad_norm": 184.09864807128906, "learning_rate": 4.604e-06, "loss": 28.5088, "step": 23020 }, { "epoch": 0.04652205707082746, "grad_norm": 130.41419982910156, "learning_rate": 4.6060000000000005e-06, "loss": 41.1854, "step": 23030 }, { "epoch": 0.046542257703511274, "grad_norm": 115.1039047241211, "learning_rate": 4.608000000000001e-06, "loss": 28.461, "step": 23040 }, { "epoch": 0.04656245833619509, "grad_norm": 230.45530700683594, "learning_rate": 4.610000000000001e-06, "loss": 36.4934, "step": 23050 }, { "epoch": 0.04658265896887891, "grad_norm": 317.94512939453125, "learning_rate": 4.612e-06, "loss": 27.899, "step": 23060 }, { "epoch": 0.04660285960156272, "grad_norm": 74.50390625, "learning_rate": 4.614e-06, "loss": 52.6528, "step": 23070 }, { "epoch": 0.04662306023424654, "grad_norm": 182.2034149169922, "learning_rate": 4.616e-06, "loss": 26.8433, "step": 23080 }, { "epoch": 0.04664326086693035, "grad_norm": 379.2212829589844, "learning_rate": 4.6180000000000005e-06, "loss": 28.4662, "step": 23090 }, { "epoch": 0.046663461499614166, "grad_norm": 420.8704528808594, "learning_rate": 4.620000000000001e-06, "loss": 52.9373, "step": 23100 }, { "epoch": 0.046683662132297986, "grad_norm": 327.8398742675781, "learning_rate": 4.622e-06, "loss": 68.1064, "step": 23110 }, { "epoch": 0.0467038627649818, "grad_norm": 269.2337646484375, "learning_rate": 4.624e-06, "loss": 36.871, "step": 23120 }, { "epoch": 0.04672406339766561, "grad_norm": 403.05230712890625, "learning_rate": 4.626e-06, "loss": 47.3688, "step": 23130 }, { "epoch": 0.04674426403034943, "grad_norm": 294.1803894042969, "learning_rate": 4.628e-06, "loss": 36.4654, "step": 23140 }, { "epoch": 0.046764464663033245, "grad_norm": 231.492431640625, "learning_rate": 4.6300000000000006e-06, "loss": 24.473, "step": 23150 }, { "epoch": 0.046784665295717065, "grad_norm": 206.8830108642578, "learning_rate": 4.632000000000001e-06, "loss": 16.1609, "step": 23160 }, { "epoch": 0.04680486592840088, "grad_norm": 96.38932800292969, "learning_rate": 4.634e-06, "loss": 19.4687, "step": 23170 }, { "epoch": 0.04682506656108469, "grad_norm": 355.86529541015625, "learning_rate": 4.636e-06, "loss": 40.5259, "step": 23180 }, { "epoch": 0.04684526719376851, "grad_norm": 146.88897705078125, "learning_rate": 4.638e-06, "loss": 31.2758, "step": 23190 }, { "epoch": 0.046865467826452324, "grad_norm": 359.35687255859375, "learning_rate": 4.6400000000000005e-06, "loss": 26.0358, "step": 23200 }, { "epoch": 0.04688566845913614, "grad_norm": 405.2716369628906, "learning_rate": 4.642000000000001e-06, "loss": 23.5433, "step": 23210 }, { "epoch": 0.04690586909181996, "grad_norm": 217.33326721191406, "learning_rate": 4.644e-06, "loss": 15.0379, "step": 23220 }, { "epoch": 0.04692606972450377, "grad_norm": 296.63568115234375, "learning_rate": 4.646000000000001e-06, "loss": 33.5255, "step": 23230 }, { "epoch": 0.04694627035718759, "grad_norm": 478.4248046875, "learning_rate": 4.648e-06, "loss": 32.1893, "step": 23240 }, { "epoch": 0.0469664709898714, "grad_norm": 618.0040893554688, "learning_rate": 4.65e-06, "loss": 50.2852, "step": 23250 }, { "epoch": 0.046986671622555216, "grad_norm": 283.837158203125, "learning_rate": 4.6520000000000005e-06, "loss": 35.7771, "step": 23260 }, { "epoch": 0.047006872255239036, "grad_norm": 168.71023559570312, "learning_rate": 4.654e-06, "loss": 53.302, "step": 23270 }, { "epoch": 0.04702707288792285, "grad_norm": 208.5245819091797, "learning_rate": 4.656000000000001e-06, "loss": 21.3794, "step": 23280 }, { "epoch": 0.04704727352060666, "grad_norm": 282.94671630859375, "learning_rate": 4.658e-06, "loss": 49.0335, "step": 23290 }, { "epoch": 0.04706747415329048, "grad_norm": 404.1675720214844, "learning_rate": 4.66e-06, "loss": 35.3006, "step": 23300 }, { "epoch": 0.047087674785974296, "grad_norm": 196.1449432373047, "learning_rate": 4.6620000000000004e-06, "loss": 17.8496, "step": 23310 }, { "epoch": 0.047107875418658116, "grad_norm": 84.58887481689453, "learning_rate": 4.664000000000001e-06, "loss": 25.4964, "step": 23320 }, { "epoch": 0.04712807605134193, "grad_norm": 243.4213104248047, "learning_rate": 4.666000000000001e-06, "loss": 21.0078, "step": 23330 }, { "epoch": 0.04714827668402574, "grad_norm": 146.38462829589844, "learning_rate": 4.668e-06, "loss": 34.3094, "step": 23340 }, { "epoch": 0.04716847731670956, "grad_norm": 229.5850372314453, "learning_rate": 4.670000000000001e-06, "loss": 55.6094, "step": 23350 }, { "epoch": 0.047188677949393375, "grad_norm": 302.9608459472656, "learning_rate": 4.672e-06, "loss": 72.3272, "step": 23360 }, { "epoch": 0.04720887858207719, "grad_norm": 294.19195556640625, "learning_rate": 4.6740000000000005e-06, "loss": 41.2477, "step": 23370 }, { "epoch": 0.04722907921476101, "grad_norm": 592.6897583007812, "learning_rate": 4.676000000000001e-06, "loss": 86.9821, "step": 23380 }, { "epoch": 0.04724927984744482, "grad_norm": 867.7962646484375, "learning_rate": 4.678e-06, "loss": 38.9236, "step": 23390 }, { "epoch": 0.04726948048012864, "grad_norm": 158.65023803710938, "learning_rate": 4.680000000000001e-06, "loss": 27.6499, "step": 23400 }, { "epoch": 0.047289681112812454, "grad_norm": 1215.8109130859375, "learning_rate": 4.682e-06, "loss": 31.3746, "step": 23410 }, { "epoch": 0.04730988174549627, "grad_norm": 180.79165649414062, "learning_rate": 4.684e-06, "loss": 27.6862, "step": 23420 }, { "epoch": 0.04733008237818009, "grad_norm": 157.53724670410156, "learning_rate": 4.6860000000000005e-06, "loss": 32.7872, "step": 23430 }, { "epoch": 0.0473502830108639, "grad_norm": 244.28480529785156, "learning_rate": 4.688000000000001e-06, "loss": 34.7033, "step": 23440 }, { "epoch": 0.04737048364354771, "grad_norm": 579.8212890625, "learning_rate": 4.69e-06, "loss": 51.4724, "step": 23450 }, { "epoch": 0.04739068427623153, "grad_norm": 153.45106506347656, "learning_rate": 4.692e-06, "loss": 23.1818, "step": 23460 }, { "epoch": 0.047410884908915346, "grad_norm": 422.50054931640625, "learning_rate": 4.694e-06, "loss": 29.7317, "step": 23470 }, { "epoch": 0.047431085541599166, "grad_norm": 139.63380432128906, "learning_rate": 4.6960000000000004e-06, "loss": 30.317, "step": 23480 }, { "epoch": 0.04745128617428298, "grad_norm": 100.78041076660156, "learning_rate": 4.698000000000001e-06, "loss": 36.1749, "step": 23490 }, { "epoch": 0.04747148680696679, "grad_norm": 207.262451171875, "learning_rate": 4.7e-06, "loss": 51.0102, "step": 23500 }, { "epoch": 0.04749168743965061, "grad_norm": 126.28245544433594, "learning_rate": 4.702e-06, "loss": 22.0329, "step": 23510 }, { "epoch": 0.047511888072334425, "grad_norm": 212.24520874023438, "learning_rate": 4.704e-06, "loss": 21.8121, "step": 23520 }, { "epoch": 0.04753208870501824, "grad_norm": 281.8320007324219, "learning_rate": 4.706e-06, "loss": 31.1365, "step": 23530 }, { "epoch": 0.04755228933770206, "grad_norm": 827.1615600585938, "learning_rate": 4.7080000000000005e-06, "loss": 52.7212, "step": 23540 }, { "epoch": 0.04757248997038587, "grad_norm": 113.65986633300781, "learning_rate": 4.71e-06, "loss": 13.2839, "step": 23550 }, { "epoch": 0.04759269060306969, "grad_norm": 307.02117919921875, "learning_rate": 4.712000000000001e-06, "loss": 51.0451, "step": 23560 }, { "epoch": 0.047612891235753504, "grad_norm": 174.39028930664062, "learning_rate": 4.714e-06, "loss": 34.7957, "step": 23570 }, { "epoch": 0.04763309186843732, "grad_norm": 319.22381591796875, "learning_rate": 4.716e-06, "loss": 36.6892, "step": 23580 }, { "epoch": 0.04765329250112114, "grad_norm": 254.41700744628906, "learning_rate": 4.718e-06, "loss": 39.8172, "step": 23590 }, { "epoch": 0.04767349313380495, "grad_norm": 111.16841125488281, "learning_rate": 4.7200000000000005e-06, "loss": 34.5332, "step": 23600 }, { "epoch": 0.047693693766488764, "grad_norm": 448.9316101074219, "learning_rate": 4.722000000000001e-06, "loss": 39.8942, "step": 23610 }, { "epoch": 0.047713894399172584, "grad_norm": 153.71954345703125, "learning_rate": 4.724e-06, "loss": 41.6392, "step": 23620 }, { "epoch": 0.0477340950318564, "grad_norm": 462.3153381347656, "learning_rate": 4.726000000000001e-06, "loss": 40.9412, "step": 23630 }, { "epoch": 0.04775429566454022, "grad_norm": 641.5802001953125, "learning_rate": 4.728e-06, "loss": 31.3092, "step": 23640 }, { "epoch": 0.04777449629722403, "grad_norm": 218.2231903076172, "learning_rate": 4.7300000000000005e-06, "loss": 25.8591, "step": 23650 }, { "epoch": 0.04779469692990784, "grad_norm": 214.20103454589844, "learning_rate": 4.732000000000001e-06, "loss": 54.9613, "step": 23660 }, { "epoch": 0.04781489756259166, "grad_norm": 378.3574523925781, "learning_rate": 4.734e-06, "loss": 48.9097, "step": 23670 }, { "epoch": 0.047835098195275476, "grad_norm": 457.8273010253906, "learning_rate": 4.736000000000001e-06, "loss": 28.7984, "step": 23680 }, { "epoch": 0.04785529882795929, "grad_norm": 349.72003173828125, "learning_rate": 4.738e-06, "loss": 24.9877, "step": 23690 }, { "epoch": 0.04787549946064311, "grad_norm": 246.29293823242188, "learning_rate": 4.74e-06, "loss": 38.0288, "step": 23700 }, { "epoch": 0.04789570009332692, "grad_norm": 395.36767578125, "learning_rate": 4.7420000000000005e-06, "loss": 28.9101, "step": 23710 }, { "epoch": 0.04791590072601074, "grad_norm": 389.5787353515625, "learning_rate": 4.744000000000001e-06, "loss": 36.7952, "step": 23720 }, { "epoch": 0.047936101358694555, "grad_norm": 132.60447692871094, "learning_rate": 4.746000000000001e-06, "loss": 36.2736, "step": 23730 }, { "epoch": 0.04795630199137837, "grad_norm": 308.73663330078125, "learning_rate": 4.748e-06, "loss": 46.022, "step": 23740 }, { "epoch": 0.04797650262406219, "grad_norm": 264.12005615234375, "learning_rate": 4.75e-06, "loss": 41.8519, "step": 23750 }, { "epoch": 0.047996703256746, "grad_norm": 318.1417236328125, "learning_rate": 4.752e-06, "loss": 52.5378, "step": 23760 }, { "epoch": 0.048016903889429814, "grad_norm": 1270.9510498046875, "learning_rate": 4.7540000000000006e-06, "loss": 33.6496, "step": 23770 }, { "epoch": 0.048037104522113634, "grad_norm": 382.4970703125, "learning_rate": 4.756000000000001e-06, "loss": 54.3435, "step": 23780 }, { "epoch": 0.04805730515479745, "grad_norm": 195.578369140625, "learning_rate": 4.758e-06, "loss": 31.6482, "step": 23790 }, { "epoch": 0.04807750578748127, "grad_norm": 2730.391357421875, "learning_rate": 4.76e-06, "loss": 65.4916, "step": 23800 }, { "epoch": 0.04809770642016508, "grad_norm": 317.44732666015625, "learning_rate": 4.762e-06, "loss": 39.0879, "step": 23810 }, { "epoch": 0.04811790705284889, "grad_norm": 333.9771423339844, "learning_rate": 4.7640000000000005e-06, "loss": 27.7276, "step": 23820 }, { "epoch": 0.04813810768553271, "grad_norm": 268.1827697753906, "learning_rate": 4.766000000000001e-06, "loss": 21.4827, "step": 23830 }, { "epoch": 0.048158308318216526, "grad_norm": 403.4129333496094, "learning_rate": 4.768000000000001e-06, "loss": 34.6014, "step": 23840 }, { "epoch": 0.04817850895090034, "grad_norm": 183.9833221435547, "learning_rate": 4.77e-06, "loss": 34.5106, "step": 23850 }, { "epoch": 0.04819870958358416, "grad_norm": 218.63064575195312, "learning_rate": 4.772e-06, "loss": 35.0868, "step": 23860 }, { "epoch": 0.04821891021626797, "grad_norm": 198.93484497070312, "learning_rate": 4.774e-06, "loss": 29.3855, "step": 23870 }, { "epoch": 0.04823911084895179, "grad_norm": 190.12913513183594, "learning_rate": 4.7760000000000005e-06, "loss": 38.2024, "step": 23880 }, { "epoch": 0.048259311481635606, "grad_norm": 455.5904235839844, "learning_rate": 4.778000000000001e-06, "loss": 62.9891, "step": 23890 }, { "epoch": 0.04827951211431942, "grad_norm": 462.7550964355469, "learning_rate": 4.78e-06, "loss": 35.2439, "step": 23900 }, { "epoch": 0.04829971274700324, "grad_norm": 349.5313720703125, "learning_rate": 4.782e-06, "loss": 31.416, "step": 23910 }, { "epoch": 0.04831991337968705, "grad_norm": 250.37452697753906, "learning_rate": 4.784e-06, "loss": 29.0089, "step": 23920 }, { "epoch": 0.048340114012370865, "grad_norm": 147.19610595703125, "learning_rate": 4.7860000000000004e-06, "loss": 20.3112, "step": 23930 }, { "epoch": 0.048360314645054685, "grad_norm": 201.39181518554688, "learning_rate": 4.7880000000000006e-06, "loss": 31.44, "step": 23940 }, { "epoch": 0.0483805152777385, "grad_norm": 361.5483703613281, "learning_rate": 4.79e-06, "loss": 37.8608, "step": 23950 }, { "epoch": 0.04840071591042232, "grad_norm": 206.08018493652344, "learning_rate": 4.792000000000001e-06, "loss": 39.1472, "step": 23960 }, { "epoch": 0.04842091654310613, "grad_norm": 700.2271728515625, "learning_rate": 4.794e-06, "loss": 35.026, "step": 23970 }, { "epoch": 0.048441117175789944, "grad_norm": 576.4944458007812, "learning_rate": 4.796e-06, "loss": 65.6605, "step": 23980 }, { "epoch": 0.048461317808473764, "grad_norm": 193.9888458251953, "learning_rate": 4.7980000000000005e-06, "loss": 31.1491, "step": 23990 }, { "epoch": 0.04848151844115758, "grad_norm": 119.54864501953125, "learning_rate": 4.800000000000001e-06, "loss": 30.3052, "step": 24000 }, { "epoch": 0.04850171907384139, "grad_norm": 323.5423583984375, "learning_rate": 4.802000000000001e-06, "loss": 33.0274, "step": 24010 }, { "epoch": 0.04852191970652521, "grad_norm": 318.2510070800781, "learning_rate": 4.804e-06, "loss": 24.0945, "step": 24020 }, { "epoch": 0.04854212033920902, "grad_norm": 179.5808868408203, "learning_rate": 4.806000000000001e-06, "loss": 24.4455, "step": 24030 }, { "epoch": 0.04856232097189284, "grad_norm": 364.0338134765625, "learning_rate": 4.808e-06, "loss": 21.3244, "step": 24040 }, { "epoch": 0.048582521604576656, "grad_norm": 151.34703063964844, "learning_rate": 4.8100000000000005e-06, "loss": 39.3955, "step": 24050 }, { "epoch": 0.04860272223726047, "grad_norm": 122.34613800048828, "learning_rate": 4.812000000000001e-06, "loss": 63.3171, "step": 24060 }, { "epoch": 0.04862292286994429, "grad_norm": 267.1424865722656, "learning_rate": 4.814e-06, "loss": 18.3931, "step": 24070 }, { "epoch": 0.0486431235026281, "grad_norm": 950.0606079101562, "learning_rate": 4.816e-06, "loss": 32.9106, "step": 24080 }, { "epoch": 0.048663324135311915, "grad_norm": 300.7304992675781, "learning_rate": 4.818e-06, "loss": 51.0602, "step": 24090 }, { "epoch": 0.048683524767995735, "grad_norm": 103.13603973388672, "learning_rate": 4.8200000000000004e-06, "loss": 24.1989, "step": 24100 }, { "epoch": 0.04870372540067955, "grad_norm": 270.5731201171875, "learning_rate": 4.822000000000001e-06, "loss": 44.562, "step": 24110 }, { "epoch": 0.04872392603336337, "grad_norm": 472.94952392578125, "learning_rate": 4.824000000000001e-06, "loss": 30.1158, "step": 24120 }, { "epoch": 0.04874412666604718, "grad_norm": 278.97955322265625, "learning_rate": 4.826e-06, "loss": 19.623, "step": 24130 }, { "epoch": 0.048764327298730994, "grad_norm": 309.7064208984375, "learning_rate": 4.828e-06, "loss": 46.6352, "step": 24140 }, { "epoch": 0.048784527931414814, "grad_norm": 470.1871032714844, "learning_rate": 4.83e-06, "loss": 30.1528, "step": 24150 }, { "epoch": 0.04880472856409863, "grad_norm": 327.0283203125, "learning_rate": 4.8320000000000005e-06, "loss": 29.6028, "step": 24160 }, { "epoch": 0.04882492919678244, "grad_norm": 229.8971710205078, "learning_rate": 4.834000000000001e-06, "loss": 53.3474, "step": 24170 }, { "epoch": 0.04884512982946626, "grad_norm": 467.5422668457031, "learning_rate": 4.836e-06, "loss": 59.9558, "step": 24180 }, { "epoch": 0.048865330462150074, "grad_norm": 429.14031982421875, "learning_rate": 4.838e-06, "loss": 44.1095, "step": 24190 }, { "epoch": 0.048885531094833894, "grad_norm": 257.4012451171875, "learning_rate": 4.84e-06, "loss": 47.1438, "step": 24200 }, { "epoch": 0.04890573172751771, "grad_norm": 777.3746948242188, "learning_rate": 4.842e-06, "loss": 48.5945, "step": 24210 }, { "epoch": 0.04892593236020152, "grad_norm": 0.0, "learning_rate": 4.8440000000000005e-06, "loss": 32.9124, "step": 24220 }, { "epoch": 0.04894613299288534, "grad_norm": 0.0, "learning_rate": 4.846e-06, "loss": 33.6002, "step": 24230 }, { "epoch": 0.04896633362556915, "grad_norm": 516.6734008789062, "learning_rate": 4.848000000000001e-06, "loss": 40.2189, "step": 24240 }, { "epoch": 0.048986534258252966, "grad_norm": 345.6383972167969, "learning_rate": 4.85e-06, "loss": 35.3943, "step": 24250 }, { "epoch": 0.049006734890936786, "grad_norm": 380.5145263671875, "learning_rate": 4.852e-06, "loss": 40.5262, "step": 24260 }, { "epoch": 0.0490269355236206, "grad_norm": 199.27227783203125, "learning_rate": 4.8540000000000005e-06, "loss": 37.8575, "step": 24270 }, { "epoch": 0.04904713615630442, "grad_norm": 102.84506225585938, "learning_rate": 4.856e-06, "loss": 37.8529, "step": 24280 }, { "epoch": 0.04906733678898823, "grad_norm": 63.01902770996094, "learning_rate": 4.858000000000001e-06, "loss": 46.2456, "step": 24290 }, { "epoch": 0.049087537421672045, "grad_norm": 436.06353759765625, "learning_rate": 4.86e-06, "loss": 43.021, "step": 24300 }, { "epoch": 0.049107738054355865, "grad_norm": 280.2841491699219, "learning_rate": 4.862e-06, "loss": 45.4342, "step": 24310 }, { "epoch": 0.04912793868703968, "grad_norm": 2215.090087890625, "learning_rate": 4.864e-06, "loss": 59.689, "step": 24320 }, { "epoch": 0.04914813931972349, "grad_norm": 717.96728515625, "learning_rate": 4.8660000000000005e-06, "loss": 51.3898, "step": 24330 }, { "epoch": 0.04916833995240731, "grad_norm": 51.60733413696289, "learning_rate": 4.868000000000001e-06, "loss": 39.6074, "step": 24340 }, { "epoch": 0.049188540585091124, "grad_norm": 257.23394775390625, "learning_rate": 4.87e-06, "loss": 50.6462, "step": 24350 }, { "epoch": 0.04920874121777494, "grad_norm": 543.4814453125, "learning_rate": 4.872000000000001e-06, "loss": 38.5462, "step": 24360 }, { "epoch": 0.04922894185045876, "grad_norm": 535.580322265625, "learning_rate": 4.874e-06, "loss": 60.0278, "step": 24370 }, { "epoch": 0.04924914248314257, "grad_norm": 315.3434753417969, "learning_rate": 4.876e-06, "loss": 30.5877, "step": 24380 }, { "epoch": 0.04926934311582639, "grad_norm": 246.52528381347656, "learning_rate": 4.8780000000000006e-06, "loss": 40.9422, "step": 24390 }, { "epoch": 0.0492895437485102, "grad_norm": 164.6389617919922, "learning_rate": 4.880000000000001e-06, "loss": 24.2486, "step": 24400 }, { "epoch": 0.049309744381194016, "grad_norm": 214.39744567871094, "learning_rate": 4.882000000000001e-06, "loss": 30.9379, "step": 24410 }, { "epoch": 0.049329945013877836, "grad_norm": 413.4559020996094, "learning_rate": 4.884e-06, "loss": 32.2273, "step": 24420 }, { "epoch": 0.04935014564656165, "grad_norm": 113.43142700195312, "learning_rate": 4.886e-06, "loss": 19.4128, "step": 24430 }, { "epoch": 0.04937034627924546, "grad_norm": 420.569580078125, "learning_rate": 4.8880000000000005e-06, "loss": 29.2555, "step": 24440 }, { "epoch": 0.04939054691192928, "grad_norm": 395.5831604003906, "learning_rate": 4.890000000000001e-06, "loss": 28.5215, "step": 24450 }, { "epoch": 0.049410747544613096, "grad_norm": 2232.471923828125, "learning_rate": 4.892000000000001e-06, "loss": 57.3816, "step": 24460 }, { "epoch": 0.049430948177296916, "grad_norm": 754.9512939453125, "learning_rate": 4.894e-06, "loss": 50.3239, "step": 24470 }, { "epoch": 0.04945114880998073, "grad_norm": 266.9847106933594, "learning_rate": 4.896e-06, "loss": 34.9834, "step": 24480 }, { "epoch": 0.04947134944266454, "grad_norm": 239.0906982421875, "learning_rate": 4.898e-06, "loss": 63.9679, "step": 24490 }, { "epoch": 0.04949155007534836, "grad_norm": 416.3080749511719, "learning_rate": 4.9000000000000005e-06, "loss": 33.8062, "step": 24500 }, { "epoch": 0.049511750708032175, "grad_norm": 422.41717529296875, "learning_rate": 4.902000000000001e-06, "loss": 48.0187, "step": 24510 }, { "epoch": 0.04953195134071599, "grad_norm": 47.665061950683594, "learning_rate": 4.904000000000001e-06, "loss": 28.1573, "step": 24520 }, { "epoch": 0.04955215197339981, "grad_norm": 1689.254638671875, "learning_rate": 4.906e-06, "loss": 30.9967, "step": 24530 }, { "epoch": 0.04957235260608362, "grad_norm": 294.19708251953125, "learning_rate": 4.908e-06, "loss": 30.892, "step": 24540 }, { "epoch": 0.04959255323876744, "grad_norm": 579.6477661132812, "learning_rate": 4.9100000000000004e-06, "loss": 25.8263, "step": 24550 }, { "epoch": 0.049612753871451254, "grad_norm": 206.0701904296875, "learning_rate": 4.9120000000000006e-06, "loss": 45.736, "step": 24560 }, { "epoch": 0.04963295450413507, "grad_norm": 135.27198791503906, "learning_rate": 4.914000000000001e-06, "loss": 25.2339, "step": 24570 }, { "epoch": 0.04965315513681889, "grad_norm": 182.81849670410156, "learning_rate": 4.916e-06, "loss": 21.7471, "step": 24580 }, { "epoch": 0.0496733557695027, "grad_norm": 298.253662109375, "learning_rate": 4.918e-06, "loss": 48.9007, "step": 24590 }, { "epoch": 0.04969355640218651, "grad_norm": 207.74354553222656, "learning_rate": 4.92e-06, "loss": 28.1884, "step": 24600 }, { "epoch": 0.04971375703487033, "grad_norm": 225.69656372070312, "learning_rate": 4.9220000000000005e-06, "loss": 26.7641, "step": 24610 }, { "epoch": 0.049733957667554146, "grad_norm": 586.1776733398438, "learning_rate": 4.924000000000001e-06, "loss": 28.939, "step": 24620 }, { "epoch": 0.049754158300237966, "grad_norm": 221.6424102783203, "learning_rate": 4.926e-06, "loss": 19.5308, "step": 24630 }, { "epoch": 0.04977435893292178, "grad_norm": 372.4377136230469, "learning_rate": 4.928000000000001e-06, "loss": 33.1445, "step": 24640 }, { "epoch": 0.04979455956560559, "grad_norm": 318.5388488769531, "learning_rate": 4.93e-06, "loss": 38.5788, "step": 24650 }, { "epoch": 0.04981476019828941, "grad_norm": 430.4581298828125, "learning_rate": 4.932e-06, "loss": 32.7186, "step": 24660 }, { "epoch": 0.049834960830973225, "grad_norm": 592.735107421875, "learning_rate": 4.9340000000000005e-06, "loss": 66.3138, "step": 24670 }, { "epoch": 0.04985516146365704, "grad_norm": 174.4611053466797, "learning_rate": 4.936e-06, "loss": 36.9905, "step": 24680 }, { "epoch": 0.04987536209634086, "grad_norm": 478.0341491699219, "learning_rate": 4.938000000000001e-06, "loss": 31.3537, "step": 24690 }, { "epoch": 0.04989556272902467, "grad_norm": 196.75765991210938, "learning_rate": 4.94e-06, "loss": 47.4222, "step": 24700 }, { "epoch": 0.04991576336170849, "grad_norm": 58.69709777832031, "learning_rate": 4.942e-06, "loss": 21.3752, "step": 24710 }, { "epoch": 0.049935963994392304, "grad_norm": 486.0718078613281, "learning_rate": 4.9440000000000004e-06, "loss": 54.6722, "step": 24720 }, { "epoch": 0.04995616462707612, "grad_norm": 129.3841552734375, "learning_rate": 4.946000000000001e-06, "loss": 45.3962, "step": 24730 }, { "epoch": 0.04997636525975994, "grad_norm": 278.1208801269531, "learning_rate": 4.948000000000001e-06, "loss": 26.8522, "step": 24740 }, { "epoch": 0.04999656589244375, "grad_norm": 145.47007751464844, "learning_rate": 4.95e-06, "loss": 30.2089, "step": 24750 }, { "epoch": 0.050016766525127564, "grad_norm": 226.84033203125, "learning_rate": 4.952e-06, "loss": 50.0605, "step": 24760 }, { "epoch": 0.050036967157811384, "grad_norm": 824.7215576171875, "learning_rate": 4.954e-06, "loss": 62.0493, "step": 24770 }, { "epoch": 0.0500571677904952, "grad_norm": 478.236083984375, "learning_rate": 4.9560000000000005e-06, "loss": 44.8555, "step": 24780 }, { "epoch": 0.05007736842317902, "grad_norm": 201.0460205078125, "learning_rate": 4.958000000000001e-06, "loss": 50.4313, "step": 24790 }, { "epoch": 0.05009756905586283, "grad_norm": 145.509521484375, "learning_rate": 4.960000000000001e-06, "loss": 36.7353, "step": 24800 }, { "epoch": 0.05011776968854664, "grad_norm": 402.31878662109375, "learning_rate": 4.962e-06, "loss": 32.9939, "step": 24810 }, { "epoch": 0.05013797032123046, "grad_norm": 234.0636444091797, "learning_rate": 4.964e-06, "loss": 61.5186, "step": 24820 }, { "epoch": 0.050158170953914276, "grad_norm": 607.6090698242188, "learning_rate": 4.966e-06, "loss": 37.3607, "step": 24830 }, { "epoch": 0.05017837158659809, "grad_norm": 349.5828552246094, "learning_rate": 4.9680000000000005e-06, "loss": 39.6627, "step": 24840 }, { "epoch": 0.05019857221928191, "grad_norm": 397.7245178222656, "learning_rate": 4.970000000000001e-06, "loss": 55.7625, "step": 24850 }, { "epoch": 0.05021877285196572, "grad_norm": 346.4983825683594, "learning_rate": 4.972e-06, "loss": 36.6427, "step": 24860 }, { "epoch": 0.05023897348464954, "grad_norm": 598.6224365234375, "learning_rate": 4.974e-06, "loss": 37.0437, "step": 24870 }, { "epoch": 0.050259174117333355, "grad_norm": 1130.189697265625, "learning_rate": 4.976e-06, "loss": 47.2999, "step": 24880 }, { "epoch": 0.05027937475001717, "grad_norm": 229.73851013183594, "learning_rate": 4.9780000000000005e-06, "loss": 41.8469, "step": 24890 }, { "epoch": 0.05029957538270099, "grad_norm": 271.1686706542969, "learning_rate": 4.980000000000001e-06, "loss": 25.6, "step": 24900 }, { "epoch": 0.0503197760153848, "grad_norm": 190.49305725097656, "learning_rate": 4.982e-06, "loss": 22.7171, "step": 24910 }, { "epoch": 0.050339976648068614, "grad_norm": 289.3872375488281, "learning_rate": 4.984000000000001e-06, "loss": 35.1021, "step": 24920 }, { "epoch": 0.050360177280752434, "grad_norm": 211.41136169433594, "learning_rate": 4.986e-06, "loss": 15.4786, "step": 24930 }, { "epoch": 0.05038037791343625, "grad_norm": 23.84859848022461, "learning_rate": 4.988e-06, "loss": 14.7752, "step": 24940 }, { "epoch": 0.05040057854612007, "grad_norm": 76.77351379394531, "learning_rate": 4.9900000000000005e-06, "loss": 39.287, "step": 24950 }, { "epoch": 0.05042077917880388, "grad_norm": 100.47135162353516, "learning_rate": 4.992e-06, "loss": 38.4203, "step": 24960 }, { "epoch": 0.05044097981148769, "grad_norm": 242.27645874023438, "learning_rate": 4.994000000000001e-06, "loss": 31.9942, "step": 24970 }, { "epoch": 0.05046118044417151, "grad_norm": 206.26193237304688, "learning_rate": 4.996e-06, "loss": 42.5401, "step": 24980 }, { "epoch": 0.050481381076855326, "grad_norm": 204.83905029296875, "learning_rate": 4.998e-06, "loss": 41.3591, "step": 24990 }, { "epoch": 0.05050158170953914, "grad_norm": 309.94384765625, "learning_rate": 5e-06, "loss": 20.4132, "step": 25000 }, { "epoch": 0.05052178234222296, "grad_norm": 168.29742431640625, "learning_rate": 5.0020000000000006e-06, "loss": 27.091, "step": 25010 }, { "epoch": 0.05054198297490677, "grad_norm": 172.75070190429688, "learning_rate": 5.004e-06, "loss": 37.192, "step": 25020 }, { "epoch": 0.05056218360759059, "grad_norm": 26.10698127746582, "learning_rate": 5.006000000000001e-06, "loss": 26.7612, "step": 25030 }, { "epoch": 0.050582384240274406, "grad_norm": 229.56671142578125, "learning_rate": 5.008000000000001e-06, "loss": 40.8294, "step": 25040 }, { "epoch": 0.05060258487295822, "grad_norm": 189.19723510742188, "learning_rate": 5.01e-06, "loss": 29.231, "step": 25050 }, { "epoch": 0.05062278550564204, "grad_norm": 141.651123046875, "learning_rate": 5.0120000000000005e-06, "loss": 26.9244, "step": 25060 }, { "epoch": 0.05064298613832585, "grad_norm": 221.5508270263672, "learning_rate": 5.014e-06, "loss": 25.417, "step": 25070 }, { "epoch": 0.050663186771009665, "grad_norm": 1631.3509521484375, "learning_rate": 5.016000000000001e-06, "loss": 66.5537, "step": 25080 }, { "epoch": 0.050683387403693485, "grad_norm": 1022.8755493164062, "learning_rate": 5.018000000000001e-06, "loss": 61.4052, "step": 25090 }, { "epoch": 0.0507035880363773, "grad_norm": 200.91578674316406, "learning_rate": 5.02e-06, "loss": 17.9103, "step": 25100 }, { "epoch": 0.05072378866906112, "grad_norm": 176.06973266601562, "learning_rate": 5.022e-06, "loss": 29.0015, "step": 25110 }, { "epoch": 0.05074398930174493, "grad_norm": 12.300407409667969, "learning_rate": 5.024e-06, "loss": 40.7127, "step": 25120 }, { "epoch": 0.050764189934428744, "grad_norm": 339.54718017578125, "learning_rate": 5.026000000000001e-06, "loss": 69.4734, "step": 25130 }, { "epoch": 0.050784390567112564, "grad_norm": 235.63682556152344, "learning_rate": 5.028000000000001e-06, "loss": 32.2105, "step": 25140 }, { "epoch": 0.05080459119979638, "grad_norm": 0.0, "learning_rate": 5.03e-06, "loss": 49.7788, "step": 25150 }, { "epoch": 0.05082479183248019, "grad_norm": 233.39610290527344, "learning_rate": 5.032e-06, "loss": 26.8807, "step": 25160 }, { "epoch": 0.05084499246516401, "grad_norm": 73.63380432128906, "learning_rate": 5.0339999999999996e-06, "loss": 77.4919, "step": 25170 }, { "epoch": 0.05086519309784782, "grad_norm": 91.75942993164062, "learning_rate": 5.0360000000000006e-06, "loss": 32.0141, "step": 25180 }, { "epoch": 0.05088539373053164, "grad_norm": 197.76980590820312, "learning_rate": 5.038000000000001e-06, "loss": 24.7526, "step": 25190 }, { "epoch": 0.050905594363215456, "grad_norm": 320.0301818847656, "learning_rate": 5.04e-06, "loss": 24.9324, "step": 25200 }, { "epoch": 0.05092579499589927, "grad_norm": 355.5118103027344, "learning_rate": 5.042e-06, "loss": 24.0708, "step": 25210 }, { "epoch": 0.05094599562858309, "grad_norm": 271.39019775390625, "learning_rate": 5.044e-06, "loss": 47.9224, "step": 25220 }, { "epoch": 0.0509661962612669, "grad_norm": 832.6948852539062, "learning_rate": 5.0460000000000005e-06, "loss": 55.8801, "step": 25230 }, { "epoch": 0.050986396893950715, "grad_norm": 267.4934997558594, "learning_rate": 5.048000000000001e-06, "loss": 43.9065, "step": 25240 }, { "epoch": 0.051006597526634535, "grad_norm": 336.28656005859375, "learning_rate": 5.050000000000001e-06, "loss": 37.7766, "step": 25250 }, { "epoch": 0.05102679815931835, "grad_norm": 299.2195739746094, "learning_rate": 5.052e-06, "loss": 28.921, "step": 25260 }, { "epoch": 0.05104699879200217, "grad_norm": 394.19537353515625, "learning_rate": 5.054e-06, "loss": 32.4268, "step": 25270 }, { "epoch": 0.05106719942468598, "grad_norm": 200.19863891601562, "learning_rate": 5.056000000000001e-06, "loss": 37.6336, "step": 25280 }, { "epoch": 0.051087400057369795, "grad_norm": 83.1452865600586, "learning_rate": 5.0580000000000005e-06, "loss": 43.7902, "step": 25290 }, { "epoch": 0.051107600690053615, "grad_norm": 0.0, "learning_rate": 5.060000000000001e-06, "loss": 45.6579, "step": 25300 }, { "epoch": 0.05112780132273743, "grad_norm": 0.0, "learning_rate": 5.062e-06, "loss": 20.0547, "step": 25310 }, { "epoch": 0.05114800195542124, "grad_norm": 127.34980010986328, "learning_rate": 5.064e-06, "loss": 32.569, "step": 25320 }, { "epoch": 0.05116820258810506, "grad_norm": 170.7129669189453, "learning_rate": 5.066000000000001e-06, "loss": 39.3602, "step": 25330 }, { "epoch": 0.051188403220788874, "grad_norm": 324.8763732910156, "learning_rate": 5.0680000000000004e-06, "loss": 36.0647, "step": 25340 }, { "epoch": 0.051208603853472694, "grad_norm": 316.6885986328125, "learning_rate": 5.070000000000001e-06, "loss": 36.0602, "step": 25350 }, { "epoch": 0.05122880448615651, "grad_norm": 406.1856994628906, "learning_rate": 5.072e-06, "loss": 32.2987, "step": 25360 }, { "epoch": 0.05124900511884032, "grad_norm": 147.37759399414062, "learning_rate": 5.074e-06, "loss": 24.2839, "step": 25370 }, { "epoch": 0.05126920575152414, "grad_norm": 298.5223693847656, "learning_rate": 5.076000000000001e-06, "loss": 50.2136, "step": 25380 }, { "epoch": 0.05128940638420795, "grad_norm": 258.162353515625, "learning_rate": 5.078e-06, "loss": 28.1227, "step": 25390 }, { "epoch": 0.051309607016891766, "grad_norm": 118.05210876464844, "learning_rate": 5.0800000000000005e-06, "loss": 35.7461, "step": 25400 }, { "epoch": 0.051329807649575586, "grad_norm": 137.1616668701172, "learning_rate": 5.082000000000001e-06, "loss": 21.1173, "step": 25410 }, { "epoch": 0.0513500082822594, "grad_norm": 280.388671875, "learning_rate": 5.084e-06, "loss": 33.4522, "step": 25420 }, { "epoch": 0.05137020891494322, "grad_norm": 270.8780212402344, "learning_rate": 5.086000000000001e-06, "loss": 17.8381, "step": 25430 }, { "epoch": 0.05139040954762703, "grad_norm": 330.8589782714844, "learning_rate": 5.088000000000001e-06, "loss": 23.7958, "step": 25440 }, { "epoch": 0.051410610180310845, "grad_norm": 679.8912353515625, "learning_rate": 5.09e-06, "loss": 36.8784, "step": 25450 }, { "epoch": 0.051430810812994665, "grad_norm": 321.5596923828125, "learning_rate": 5.0920000000000005e-06, "loss": 27.305, "step": 25460 }, { "epoch": 0.05145101144567848, "grad_norm": 392.6862487792969, "learning_rate": 5.094e-06, "loss": 27.0215, "step": 25470 }, { "epoch": 0.05147121207836229, "grad_norm": 397.2980651855469, "learning_rate": 5.096000000000001e-06, "loss": 33.796, "step": 25480 }, { "epoch": 0.05149141271104611, "grad_norm": 93.10610961914062, "learning_rate": 5.098000000000001e-06, "loss": 41.1551, "step": 25490 }, { "epoch": 0.051511613343729924, "grad_norm": 266.022216796875, "learning_rate": 5.1e-06, "loss": 20.5632, "step": 25500 }, { "epoch": 0.051531813976413744, "grad_norm": 311.3656311035156, "learning_rate": 5.1020000000000004e-06, "loss": 49.5471, "step": 25510 }, { "epoch": 0.05155201460909756, "grad_norm": 344.9988098144531, "learning_rate": 5.104e-06, "loss": 35.7951, "step": 25520 }, { "epoch": 0.05157221524178137, "grad_norm": 582.4403076171875, "learning_rate": 5.106000000000001e-06, "loss": 51.6275, "step": 25530 }, { "epoch": 0.05159241587446519, "grad_norm": 994.2134399414062, "learning_rate": 5.108000000000001e-06, "loss": 58.0603, "step": 25540 }, { "epoch": 0.051612616507149, "grad_norm": 182.7637176513672, "learning_rate": 5.11e-06, "loss": 38.8857, "step": 25550 }, { "epoch": 0.051632817139832816, "grad_norm": 122.5313491821289, "learning_rate": 5.112e-06, "loss": 40.5169, "step": 25560 }, { "epoch": 0.051653017772516636, "grad_norm": 575.2615966796875, "learning_rate": 5.114e-06, "loss": 51.9876, "step": 25570 }, { "epoch": 0.05167321840520045, "grad_norm": 208.16482543945312, "learning_rate": 5.116000000000001e-06, "loss": 40.4793, "step": 25580 }, { "epoch": 0.05169341903788427, "grad_norm": 381.1067199707031, "learning_rate": 5.118000000000001e-06, "loss": 58.526, "step": 25590 }, { "epoch": 0.05171361967056808, "grad_norm": 214.27349853515625, "learning_rate": 5.12e-06, "loss": 26.4821, "step": 25600 }, { "epoch": 0.051733820303251896, "grad_norm": 199.1859893798828, "learning_rate": 5.122e-06, "loss": 37.75, "step": 25610 }, { "epoch": 0.051754020935935716, "grad_norm": 128.3622283935547, "learning_rate": 5.124e-06, "loss": 62.8657, "step": 25620 }, { "epoch": 0.05177422156861953, "grad_norm": 716.8430786132812, "learning_rate": 5.126e-06, "loss": 44.1205, "step": 25630 }, { "epoch": 0.05179442220130334, "grad_norm": 255.53382873535156, "learning_rate": 5.128000000000001e-06, "loss": 28.9036, "step": 25640 }, { "epoch": 0.05181462283398716, "grad_norm": 240.45191955566406, "learning_rate": 5.130000000000001e-06, "loss": 54.3725, "step": 25650 }, { "epoch": 0.051834823466670975, "grad_norm": 275.36627197265625, "learning_rate": 5.132e-06, "loss": 48.693, "step": 25660 }, { "epoch": 0.051855024099354795, "grad_norm": 62.26723098754883, "learning_rate": 5.134e-06, "loss": 32.1962, "step": 25670 }, { "epoch": 0.05187522473203861, "grad_norm": 298.8677062988281, "learning_rate": 5.136e-06, "loss": 38.3734, "step": 25680 }, { "epoch": 0.05189542536472242, "grad_norm": 152.68917846679688, "learning_rate": 5.138000000000001e-06, "loss": 22.8874, "step": 25690 }, { "epoch": 0.05191562599740624, "grad_norm": 344.0263977050781, "learning_rate": 5.140000000000001e-06, "loss": 40.0395, "step": 25700 }, { "epoch": 0.051935826630090054, "grad_norm": 1125.92578125, "learning_rate": 5.142e-06, "loss": 32.1101, "step": 25710 }, { "epoch": 0.05195602726277387, "grad_norm": 189.13072204589844, "learning_rate": 5.144e-06, "loss": 22.5643, "step": 25720 }, { "epoch": 0.05197622789545769, "grad_norm": 251.3233184814453, "learning_rate": 5.1459999999999995e-06, "loss": 32.8446, "step": 25730 }, { "epoch": 0.0519964285281415, "grad_norm": 380.9353942871094, "learning_rate": 5.1480000000000005e-06, "loss": 39.3014, "step": 25740 }, { "epoch": 0.05201662916082532, "grad_norm": 276.3848876953125, "learning_rate": 5.150000000000001e-06, "loss": 38.3851, "step": 25750 }, { "epoch": 0.05203682979350913, "grad_norm": 24.855220794677734, "learning_rate": 5.152e-06, "loss": 35.0689, "step": 25760 }, { "epoch": 0.052057030426192946, "grad_norm": 223.92782592773438, "learning_rate": 5.154e-06, "loss": 34.6124, "step": 25770 }, { "epoch": 0.052077231058876766, "grad_norm": 0.0, "learning_rate": 5.156e-06, "loss": 24.43, "step": 25780 }, { "epoch": 0.05209743169156058, "grad_norm": 311.9063415527344, "learning_rate": 5.158e-06, "loss": 50.9617, "step": 25790 }, { "epoch": 0.05211763232424439, "grad_norm": 155.0714874267578, "learning_rate": 5.1600000000000006e-06, "loss": 29.0458, "step": 25800 }, { "epoch": 0.05213783295692821, "grad_norm": 345.9964294433594, "learning_rate": 5.162000000000001e-06, "loss": 42.5227, "step": 25810 }, { "epoch": 0.052158033589612025, "grad_norm": 50.573219299316406, "learning_rate": 5.164e-06, "loss": 38.1386, "step": 25820 }, { "epoch": 0.052178234222295845, "grad_norm": 182.77481079101562, "learning_rate": 5.166e-06, "loss": 42.2662, "step": 25830 }, { "epoch": 0.05219843485497966, "grad_norm": 518.151123046875, "learning_rate": 5.168000000000001e-06, "loss": 48.8751, "step": 25840 }, { "epoch": 0.05221863548766347, "grad_norm": 145.83743286132812, "learning_rate": 5.1700000000000005e-06, "loss": 40.3494, "step": 25850 }, { "epoch": 0.05223883612034729, "grad_norm": 130.442138671875, "learning_rate": 5.172000000000001e-06, "loss": 40.4268, "step": 25860 }, { "epoch": 0.052259036753031105, "grad_norm": 364.4100646972656, "learning_rate": 5.174e-06, "loss": 34.6093, "step": 25870 }, { "epoch": 0.05227923738571492, "grad_norm": 276.7270202636719, "learning_rate": 5.176e-06, "loss": 44.1146, "step": 25880 }, { "epoch": 0.05229943801839874, "grad_norm": 236.02598571777344, "learning_rate": 5.178000000000001e-06, "loss": 31.9908, "step": 25890 }, { "epoch": 0.05231963865108255, "grad_norm": 578.2430419921875, "learning_rate": 5.18e-06, "loss": 49.6647, "step": 25900 }, { "epoch": 0.05233983928376637, "grad_norm": 127.69097900390625, "learning_rate": 5.1820000000000005e-06, "loss": 57.4682, "step": 25910 }, { "epoch": 0.052360039916450184, "grad_norm": 445.4470520019531, "learning_rate": 5.184e-06, "loss": 41.7614, "step": 25920 }, { "epoch": 0.052380240549134, "grad_norm": 255.9884796142578, "learning_rate": 5.186e-06, "loss": 37.9165, "step": 25930 }, { "epoch": 0.05240044118181782, "grad_norm": 147.18260192871094, "learning_rate": 5.188000000000001e-06, "loss": 31.0442, "step": 25940 }, { "epoch": 0.05242064181450163, "grad_norm": 271.1940612792969, "learning_rate": 5.19e-06, "loss": 19.2495, "step": 25950 }, { "epoch": 0.05244084244718544, "grad_norm": 166.0475616455078, "learning_rate": 5.1920000000000004e-06, "loss": 29.3152, "step": 25960 }, { "epoch": 0.05246104307986926, "grad_norm": 21.80970001220703, "learning_rate": 5.194e-06, "loss": 14.9573, "step": 25970 }, { "epoch": 0.052481243712553076, "grad_norm": 205.1424560546875, "learning_rate": 5.196e-06, "loss": 29.5009, "step": 25980 }, { "epoch": 0.052501444345236896, "grad_norm": 46.37547302246094, "learning_rate": 5.198000000000001e-06, "loss": 22.006, "step": 25990 }, { "epoch": 0.05252164497792071, "grad_norm": 313.6761169433594, "learning_rate": 5.2e-06, "loss": 27.8893, "step": 26000 }, { "epoch": 0.05254184561060452, "grad_norm": 447.4788818359375, "learning_rate": 5.202e-06, "loss": 31.3305, "step": 26010 }, { "epoch": 0.05256204624328834, "grad_norm": 501.9764404296875, "learning_rate": 5.2040000000000005e-06, "loss": 43.5533, "step": 26020 }, { "epoch": 0.052582246875972155, "grad_norm": 222.5516357421875, "learning_rate": 5.206e-06, "loss": 26.3817, "step": 26030 }, { "epoch": 0.05260244750865597, "grad_norm": 301.2765197753906, "learning_rate": 5.208000000000001e-06, "loss": 32.4432, "step": 26040 }, { "epoch": 0.05262264814133979, "grad_norm": 418.61083984375, "learning_rate": 5.210000000000001e-06, "loss": 21.62, "step": 26050 }, { "epoch": 0.0526428487740236, "grad_norm": 100.27120208740234, "learning_rate": 5.212e-06, "loss": 46.033, "step": 26060 }, { "epoch": 0.05266304940670742, "grad_norm": 413.5134582519531, "learning_rate": 5.214e-06, "loss": 28.965, "step": 26070 }, { "epoch": 0.052683250039391234, "grad_norm": 511.71527099609375, "learning_rate": 5.216e-06, "loss": 37.3845, "step": 26080 }, { "epoch": 0.05270345067207505, "grad_norm": 473.4715270996094, "learning_rate": 5.218000000000001e-06, "loss": 35.7959, "step": 26090 }, { "epoch": 0.05272365130475887, "grad_norm": 95.87857818603516, "learning_rate": 5.220000000000001e-06, "loss": 14.787, "step": 26100 }, { "epoch": 0.05274385193744268, "grad_norm": 418.3697814941406, "learning_rate": 5.222e-06, "loss": 22.2829, "step": 26110 }, { "epoch": 0.05276405257012649, "grad_norm": 318.85186767578125, "learning_rate": 5.224e-06, "loss": 36.4422, "step": 26120 }, { "epoch": 0.05278425320281031, "grad_norm": 3006.05517578125, "learning_rate": 5.226e-06, "loss": 65.3184, "step": 26130 }, { "epoch": 0.052804453835494126, "grad_norm": 0.0, "learning_rate": 5.228000000000001e-06, "loss": 61.7301, "step": 26140 }, { "epoch": 0.052824654468177946, "grad_norm": 153.2739715576172, "learning_rate": 5.230000000000001e-06, "loss": 32.0589, "step": 26150 }, { "epoch": 0.05284485510086176, "grad_norm": 318.295654296875, "learning_rate": 5.232e-06, "loss": 32.5959, "step": 26160 }, { "epoch": 0.05286505573354557, "grad_norm": 278.9473876953125, "learning_rate": 5.234e-06, "loss": 44.3797, "step": 26170 }, { "epoch": 0.05288525636622939, "grad_norm": 51.19458770751953, "learning_rate": 5.236e-06, "loss": 15.9609, "step": 26180 }, { "epoch": 0.052905456998913206, "grad_norm": 256.02593994140625, "learning_rate": 5.2380000000000005e-06, "loss": 27.4539, "step": 26190 }, { "epoch": 0.05292565763159702, "grad_norm": 178.62783813476562, "learning_rate": 5.240000000000001e-06, "loss": 25.5177, "step": 26200 }, { "epoch": 0.05294585826428084, "grad_norm": 544.4048461914062, "learning_rate": 5.242000000000001e-06, "loss": 32.0198, "step": 26210 }, { "epoch": 0.05296605889696465, "grad_norm": 292.59735107421875, "learning_rate": 5.244e-06, "loss": 24.6876, "step": 26220 }, { "epoch": 0.05298625952964847, "grad_norm": 280.0207824707031, "learning_rate": 5.246e-06, "loss": 38.3856, "step": 26230 }, { "epoch": 0.053006460162332285, "grad_norm": 496.90130615234375, "learning_rate": 5.248000000000001e-06, "loss": 24.6253, "step": 26240 }, { "epoch": 0.0530266607950161, "grad_norm": 97.17536926269531, "learning_rate": 5.2500000000000006e-06, "loss": 53.5876, "step": 26250 }, { "epoch": 0.05304686142769992, "grad_norm": 33.68693161010742, "learning_rate": 5.252000000000001e-06, "loss": 39.8057, "step": 26260 }, { "epoch": 0.05306706206038373, "grad_norm": 477.8589782714844, "learning_rate": 5.254e-06, "loss": 52.2761, "step": 26270 }, { "epoch": 0.053087262693067544, "grad_norm": 80.3221664428711, "learning_rate": 5.256e-06, "loss": 24.1321, "step": 26280 }, { "epoch": 0.053107463325751364, "grad_norm": 232.54776000976562, "learning_rate": 5.258000000000001e-06, "loss": 29.3421, "step": 26290 }, { "epoch": 0.05312766395843518, "grad_norm": 324.77777099609375, "learning_rate": 5.2600000000000005e-06, "loss": 43.9612, "step": 26300 }, { "epoch": 0.053147864591119, "grad_norm": 122.5701675415039, "learning_rate": 5.262000000000001e-06, "loss": 44.4152, "step": 26310 }, { "epoch": 0.05316806522380281, "grad_norm": 682.695556640625, "learning_rate": 5.264e-06, "loss": 61.449, "step": 26320 }, { "epoch": 0.05318826585648662, "grad_norm": 293.5277099609375, "learning_rate": 5.266e-06, "loss": 33.1803, "step": 26330 }, { "epoch": 0.05320846648917044, "grad_norm": 268.4401550292969, "learning_rate": 5.268000000000001e-06, "loss": 30.8599, "step": 26340 }, { "epoch": 0.053228667121854256, "grad_norm": 649.7645874023438, "learning_rate": 5.27e-06, "loss": 54.7787, "step": 26350 }, { "epoch": 0.05324886775453807, "grad_norm": 704.7943115234375, "learning_rate": 5.2720000000000005e-06, "loss": 53.2063, "step": 26360 }, { "epoch": 0.05326906838722189, "grad_norm": 102.46438598632812, "learning_rate": 5.274e-06, "loss": 31.2985, "step": 26370 }, { "epoch": 0.0532892690199057, "grad_norm": 12.662109375, "learning_rate": 5.276e-06, "loss": 19.0728, "step": 26380 }, { "epoch": 0.05330946965258952, "grad_norm": 749.9951782226562, "learning_rate": 5.278000000000001e-06, "loss": 24.9564, "step": 26390 }, { "epoch": 0.053329670285273335, "grad_norm": 298.95306396484375, "learning_rate": 5.28e-06, "loss": 24.4522, "step": 26400 }, { "epoch": 0.05334987091795715, "grad_norm": 101.92510223388672, "learning_rate": 5.282e-06, "loss": 29.7092, "step": 26410 }, { "epoch": 0.05337007155064097, "grad_norm": 254.97119140625, "learning_rate": 5.2840000000000006e-06, "loss": 62.7614, "step": 26420 }, { "epoch": 0.05339027218332478, "grad_norm": 280.0879821777344, "learning_rate": 5.286e-06, "loss": 20.787, "step": 26430 }, { "epoch": 0.053410472816008595, "grad_norm": 187.2125244140625, "learning_rate": 5.288000000000001e-06, "loss": 21.367, "step": 26440 }, { "epoch": 0.053430673448692415, "grad_norm": 16.383705139160156, "learning_rate": 5.290000000000001e-06, "loss": 34.2538, "step": 26450 }, { "epoch": 0.05345087408137623, "grad_norm": 104.46593475341797, "learning_rate": 5.292e-06, "loss": 49.3187, "step": 26460 }, { "epoch": 0.05347107471406005, "grad_norm": 281.2092590332031, "learning_rate": 5.2940000000000005e-06, "loss": 56.4401, "step": 26470 }, { "epoch": 0.05349127534674386, "grad_norm": 359.28289794921875, "learning_rate": 5.296e-06, "loss": 17.9456, "step": 26480 }, { "epoch": 0.053511475979427674, "grad_norm": 278.7646179199219, "learning_rate": 5.298000000000001e-06, "loss": 37.337, "step": 26490 }, { "epoch": 0.053531676612111494, "grad_norm": 270.59771728515625, "learning_rate": 5.300000000000001e-06, "loss": 69.6157, "step": 26500 }, { "epoch": 0.05355187724479531, "grad_norm": 266.4070739746094, "learning_rate": 5.302e-06, "loss": 25.7739, "step": 26510 }, { "epoch": 0.05357207787747912, "grad_norm": 178.55990600585938, "learning_rate": 5.304e-06, "loss": 28.3638, "step": 26520 }, { "epoch": 0.05359227851016294, "grad_norm": 330.7003479003906, "learning_rate": 5.306e-06, "loss": 48.5015, "step": 26530 }, { "epoch": 0.05361247914284675, "grad_norm": 294.023193359375, "learning_rate": 5.308000000000001e-06, "loss": 13.1143, "step": 26540 }, { "epoch": 0.05363267977553057, "grad_norm": 479.3330383300781, "learning_rate": 5.310000000000001e-06, "loss": 45.4788, "step": 26550 }, { "epoch": 0.053652880408214386, "grad_norm": 32.162471771240234, "learning_rate": 5.312e-06, "loss": 39.8791, "step": 26560 }, { "epoch": 0.0536730810408982, "grad_norm": 276.650390625, "learning_rate": 5.314e-06, "loss": 28.8852, "step": 26570 }, { "epoch": 0.05369328167358202, "grad_norm": 329.8418884277344, "learning_rate": 5.3160000000000004e-06, "loss": 28.9081, "step": 26580 }, { "epoch": 0.05371348230626583, "grad_norm": 245.5834503173828, "learning_rate": 5.318000000000001e-06, "loss": 32.9793, "step": 26590 }, { "epoch": 0.053733682938949645, "grad_norm": 0.0, "learning_rate": 5.320000000000001e-06, "loss": 46.4772, "step": 26600 }, { "epoch": 0.053753883571633465, "grad_norm": 290.5738525390625, "learning_rate": 5.322000000000001e-06, "loss": 28.0701, "step": 26610 }, { "epoch": 0.05377408420431728, "grad_norm": 163.2632598876953, "learning_rate": 5.324e-06, "loss": 19.0528, "step": 26620 }, { "epoch": 0.0537942848370011, "grad_norm": 699.838134765625, "learning_rate": 5.326e-06, "loss": 35.6098, "step": 26630 }, { "epoch": 0.05381448546968491, "grad_norm": 168.70632934570312, "learning_rate": 5.328000000000001e-06, "loss": 22.2081, "step": 26640 }, { "epoch": 0.053834686102368724, "grad_norm": 351.04754638671875, "learning_rate": 5.330000000000001e-06, "loss": 52.3379, "step": 26650 }, { "epoch": 0.053854886735052544, "grad_norm": 158.24989318847656, "learning_rate": 5.332000000000001e-06, "loss": 63.6774, "step": 26660 }, { "epoch": 0.05387508736773636, "grad_norm": 271.58984375, "learning_rate": 5.334e-06, "loss": 36.8823, "step": 26670 }, { "epoch": 0.05389528800042017, "grad_norm": 213.49502563476562, "learning_rate": 5.336e-06, "loss": 41.1772, "step": 26680 }, { "epoch": 0.05391548863310399, "grad_norm": 292.98907470703125, "learning_rate": 5.338000000000001e-06, "loss": 41.6909, "step": 26690 }, { "epoch": 0.0539356892657878, "grad_norm": 144.72177124023438, "learning_rate": 5.3400000000000005e-06, "loss": 19.5633, "step": 26700 }, { "epoch": 0.05395588989847162, "grad_norm": 330.7893371582031, "learning_rate": 5.342000000000001e-06, "loss": 48.425, "step": 26710 }, { "epoch": 0.053976090531155436, "grad_norm": 401.33642578125, "learning_rate": 5.344e-06, "loss": 31.6166, "step": 26720 }, { "epoch": 0.05399629116383925, "grad_norm": 254.48562622070312, "learning_rate": 5.346e-06, "loss": 35.8798, "step": 26730 }, { "epoch": 0.05401649179652307, "grad_norm": 301.33349609375, "learning_rate": 5.348000000000001e-06, "loss": 47.6349, "step": 26740 }, { "epoch": 0.05403669242920688, "grad_norm": 591.2595825195312, "learning_rate": 5.3500000000000004e-06, "loss": 19.9916, "step": 26750 }, { "epoch": 0.054056893061890696, "grad_norm": 496.22747802734375, "learning_rate": 5.352000000000001e-06, "loss": 40.1107, "step": 26760 }, { "epoch": 0.054077093694574516, "grad_norm": 1382.7857666015625, "learning_rate": 5.354e-06, "loss": 32.3988, "step": 26770 }, { "epoch": 0.05409729432725833, "grad_norm": 283.477294921875, "learning_rate": 5.356e-06, "loss": 26.7485, "step": 26780 }, { "epoch": 0.05411749495994215, "grad_norm": 100.99266052246094, "learning_rate": 5.358000000000001e-06, "loss": 24.1203, "step": 26790 }, { "epoch": 0.05413769559262596, "grad_norm": 266.820068359375, "learning_rate": 5.36e-06, "loss": 26.8739, "step": 26800 }, { "epoch": 0.054157896225309775, "grad_norm": 827.0459594726562, "learning_rate": 5.3620000000000005e-06, "loss": 47.8409, "step": 26810 }, { "epoch": 0.054178096857993595, "grad_norm": 1373.1201171875, "learning_rate": 5.364000000000001e-06, "loss": 23.0138, "step": 26820 }, { "epoch": 0.05419829749067741, "grad_norm": 189.86802673339844, "learning_rate": 5.366e-06, "loss": 34.7973, "step": 26830 }, { "epoch": 0.05421849812336122, "grad_norm": 211.27232360839844, "learning_rate": 5.368000000000001e-06, "loss": 53.2872, "step": 26840 }, { "epoch": 0.05423869875604504, "grad_norm": 282.1607971191406, "learning_rate": 5.370000000000001e-06, "loss": 25.2158, "step": 26850 }, { "epoch": 0.054258899388728854, "grad_norm": 277.02081298828125, "learning_rate": 5.372e-06, "loss": 22.8109, "step": 26860 }, { "epoch": 0.054279100021412674, "grad_norm": 247.22166442871094, "learning_rate": 5.3740000000000006e-06, "loss": 29.4528, "step": 26870 }, { "epoch": 0.05429930065409649, "grad_norm": 268.8120422363281, "learning_rate": 5.376e-06, "loss": 32.1493, "step": 26880 }, { "epoch": 0.0543195012867803, "grad_norm": 317.6857604980469, "learning_rate": 5.378e-06, "loss": 53.8354, "step": 26890 }, { "epoch": 0.05433970191946412, "grad_norm": 131.99346923828125, "learning_rate": 5.380000000000001e-06, "loss": 33.4117, "step": 26900 }, { "epoch": 0.05435990255214793, "grad_norm": 480.4944763183594, "learning_rate": 5.382e-06, "loss": 28.9685, "step": 26910 }, { "epoch": 0.054380103184831746, "grad_norm": 142.2969207763672, "learning_rate": 5.3840000000000005e-06, "loss": 24.8345, "step": 26920 }, { "epoch": 0.054400303817515566, "grad_norm": 215.11526489257812, "learning_rate": 5.386e-06, "loss": 36.5683, "step": 26930 }, { "epoch": 0.05442050445019938, "grad_norm": 714.2116088867188, "learning_rate": 5.388e-06, "loss": 39.6467, "step": 26940 }, { "epoch": 0.0544407050828832, "grad_norm": 238.73333740234375, "learning_rate": 5.390000000000001e-06, "loss": 32.7786, "step": 26950 }, { "epoch": 0.05446090571556701, "grad_norm": 332.47503662109375, "learning_rate": 5.392e-06, "loss": 39.974, "step": 26960 }, { "epoch": 0.054481106348250825, "grad_norm": 634.87353515625, "learning_rate": 5.394e-06, "loss": 47.614, "step": 26970 }, { "epoch": 0.054501306980934645, "grad_norm": 95.62773895263672, "learning_rate": 5.3960000000000005e-06, "loss": 23.9315, "step": 26980 }, { "epoch": 0.05452150761361846, "grad_norm": 170.20867919921875, "learning_rate": 5.398e-06, "loss": 62.4917, "step": 26990 }, { "epoch": 0.05454170824630227, "grad_norm": 233.08860778808594, "learning_rate": 5.400000000000001e-06, "loss": 23.2774, "step": 27000 }, { "epoch": 0.05456190887898609, "grad_norm": 293.8749694824219, "learning_rate": 5.402000000000001e-06, "loss": 29.11, "step": 27010 }, { "epoch": 0.054582109511669905, "grad_norm": 278.05780029296875, "learning_rate": 5.404e-06, "loss": 41.1057, "step": 27020 }, { "epoch": 0.054602310144353725, "grad_norm": 533.8052368164062, "learning_rate": 5.406e-06, "loss": 34.572, "step": 27030 }, { "epoch": 0.05462251077703754, "grad_norm": 140.2530975341797, "learning_rate": 5.408e-06, "loss": 17.1894, "step": 27040 }, { "epoch": 0.05464271140972135, "grad_norm": 369.7204895019531, "learning_rate": 5.410000000000001e-06, "loss": 22.6965, "step": 27050 }, { "epoch": 0.05466291204240517, "grad_norm": 92.70873260498047, "learning_rate": 5.412000000000001e-06, "loss": 27.6275, "step": 27060 }, { "epoch": 0.054683112675088984, "grad_norm": 501.4557189941406, "learning_rate": 5.414e-06, "loss": 41.4441, "step": 27070 }, { "epoch": 0.0547033133077728, "grad_norm": 132.97772216796875, "learning_rate": 5.416e-06, "loss": 40.0948, "step": 27080 }, { "epoch": 0.05472351394045662, "grad_norm": 203.58592224121094, "learning_rate": 5.418e-06, "loss": 47.4659, "step": 27090 }, { "epoch": 0.05474371457314043, "grad_norm": 65.01156616210938, "learning_rate": 5.420000000000001e-06, "loss": 42.7345, "step": 27100 }, { "epoch": 0.05476391520582425, "grad_norm": 414.1986389160156, "learning_rate": 5.422000000000001e-06, "loss": 28.7471, "step": 27110 }, { "epoch": 0.05478411583850806, "grad_norm": 125.97358703613281, "learning_rate": 5.424e-06, "loss": 34.8441, "step": 27120 }, { "epoch": 0.054804316471191876, "grad_norm": 236.07020568847656, "learning_rate": 5.426e-06, "loss": 27.8279, "step": 27130 }, { "epoch": 0.054824517103875696, "grad_norm": 313.4919128417969, "learning_rate": 5.4279999999999995e-06, "loss": 41.6764, "step": 27140 }, { "epoch": 0.05484471773655951, "grad_norm": 678.3914184570312, "learning_rate": 5.4300000000000005e-06, "loss": 17.3617, "step": 27150 }, { "epoch": 0.05486491836924332, "grad_norm": 513.9019775390625, "learning_rate": 5.432000000000001e-06, "loss": 28.2987, "step": 27160 }, { "epoch": 0.05488511900192714, "grad_norm": 477.1949157714844, "learning_rate": 5.434e-06, "loss": 42.073, "step": 27170 }, { "epoch": 0.054905319634610955, "grad_norm": 237.6758575439453, "learning_rate": 5.436e-06, "loss": 27.9914, "step": 27180 }, { "epoch": 0.054925520267294775, "grad_norm": 216.21881103515625, "learning_rate": 5.438e-06, "loss": 20.9298, "step": 27190 }, { "epoch": 0.05494572089997859, "grad_norm": 261.8713073730469, "learning_rate": 5.4400000000000004e-06, "loss": 26.3971, "step": 27200 }, { "epoch": 0.0549659215326624, "grad_norm": 630.341796875, "learning_rate": 5.442000000000001e-06, "loss": 70.297, "step": 27210 }, { "epoch": 0.05498612216534622, "grad_norm": 200.7746124267578, "learning_rate": 5.444000000000001e-06, "loss": 46.5561, "step": 27220 }, { "epoch": 0.055006322798030034, "grad_norm": 218.84713745117188, "learning_rate": 5.446e-06, "loss": 31.2484, "step": 27230 }, { "epoch": 0.05502652343071385, "grad_norm": 215.00804138183594, "learning_rate": 5.448e-06, "loss": 20.0487, "step": 27240 }, { "epoch": 0.05504672406339767, "grad_norm": 290.6727294921875, "learning_rate": 5.450000000000001e-06, "loss": 49.8053, "step": 27250 }, { "epoch": 0.05506692469608148, "grad_norm": 701.814453125, "learning_rate": 5.4520000000000005e-06, "loss": 33.642, "step": 27260 }, { "epoch": 0.0550871253287653, "grad_norm": 189.5720977783203, "learning_rate": 5.454000000000001e-06, "loss": 14.2722, "step": 27270 }, { "epoch": 0.05510732596144911, "grad_norm": 334.04248046875, "learning_rate": 5.456e-06, "loss": 47.4323, "step": 27280 }, { "epoch": 0.055127526594132926, "grad_norm": 275.90533447265625, "learning_rate": 5.458e-06, "loss": 43.109, "step": 27290 }, { "epoch": 0.055147727226816746, "grad_norm": 229.8544158935547, "learning_rate": 5.460000000000001e-06, "loss": 50.9386, "step": 27300 }, { "epoch": 0.05516792785950056, "grad_norm": 70.80242156982422, "learning_rate": 5.462e-06, "loss": 16.4116, "step": 27310 }, { "epoch": 0.05518812849218437, "grad_norm": 202.52389526367188, "learning_rate": 5.4640000000000005e-06, "loss": 63.8582, "step": 27320 }, { "epoch": 0.05520832912486819, "grad_norm": 423.4708251953125, "learning_rate": 5.466e-06, "loss": 36.3824, "step": 27330 }, { "epoch": 0.055228529757552006, "grad_norm": 269.0473937988281, "learning_rate": 5.468e-06, "loss": 24.4635, "step": 27340 }, { "epoch": 0.05524873039023582, "grad_norm": 710.0853881835938, "learning_rate": 5.470000000000001e-06, "loss": 19.5535, "step": 27350 }, { "epoch": 0.05526893102291964, "grad_norm": 201.416259765625, "learning_rate": 5.472e-06, "loss": 19.9012, "step": 27360 }, { "epoch": 0.05528913165560345, "grad_norm": 217.0193634033203, "learning_rate": 5.4740000000000004e-06, "loss": 40.0359, "step": 27370 }, { "epoch": 0.05530933228828727, "grad_norm": 219.5017547607422, "learning_rate": 5.476000000000001e-06, "loss": 27.3678, "step": 27380 }, { "epoch": 0.055329532920971085, "grad_norm": 407.7970275878906, "learning_rate": 5.478e-06, "loss": 40.9125, "step": 27390 }, { "epoch": 0.0553497335536549, "grad_norm": 236.43528747558594, "learning_rate": 5.480000000000001e-06, "loss": 35.7535, "step": 27400 }, { "epoch": 0.05536993418633872, "grad_norm": 455.4217529296875, "learning_rate": 5.482000000000001e-06, "loss": 28.5454, "step": 27410 }, { "epoch": 0.05539013481902253, "grad_norm": 272.7969665527344, "learning_rate": 5.484e-06, "loss": 28.9281, "step": 27420 }, { "epoch": 0.055410335451706344, "grad_norm": 293.01141357421875, "learning_rate": 5.4860000000000005e-06, "loss": 32.8941, "step": 27430 }, { "epoch": 0.055430536084390164, "grad_norm": 271.666259765625, "learning_rate": 5.488e-06, "loss": 28.3948, "step": 27440 }, { "epoch": 0.05545073671707398, "grad_norm": 213.1638641357422, "learning_rate": 5.490000000000001e-06, "loss": 19.2447, "step": 27450 }, { "epoch": 0.0554709373497578, "grad_norm": 272.0662536621094, "learning_rate": 5.492000000000001e-06, "loss": 23.6043, "step": 27460 }, { "epoch": 0.05549113798244161, "grad_norm": 872.599365234375, "learning_rate": 5.494e-06, "loss": 34.8074, "step": 27470 }, { "epoch": 0.05551133861512542, "grad_norm": 368.00726318359375, "learning_rate": 5.496e-06, "loss": 36.6738, "step": 27480 }, { "epoch": 0.05553153924780924, "grad_norm": 167.2593536376953, "learning_rate": 5.498e-06, "loss": 16.0826, "step": 27490 }, { "epoch": 0.055551739880493056, "grad_norm": 121.56707763671875, "learning_rate": 5.500000000000001e-06, "loss": 44.1299, "step": 27500 }, { "epoch": 0.05557194051317687, "grad_norm": 250.26280212402344, "learning_rate": 5.502000000000001e-06, "loss": 28.2281, "step": 27510 }, { "epoch": 0.05559214114586069, "grad_norm": 809.2537231445312, "learning_rate": 5.504e-06, "loss": 38.4691, "step": 27520 }, { "epoch": 0.0556123417785445, "grad_norm": 118.10892486572266, "learning_rate": 5.506e-06, "loss": 16.6549, "step": 27530 }, { "epoch": 0.05563254241122832, "grad_norm": 311.76824951171875, "learning_rate": 5.508e-06, "loss": 12.46, "step": 27540 }, { "epoch": 0.055652743043912135, "grad_norm": 415.524169921875, "learning_rate": 5.510000000000001e-06, "loss": 30.7873, "step": 27550 }, { "epoch": 0.05567294367659595, "grad_norm": 308.05615234375, "learning_rate": 5.512000000000001e-06, "loss": 27.4349, "step": 27560 }, { "epoch": 0.05569314430927977, "grad_norm": 50.37138748168945, "learning_rate": 5.514e-06, "loss": 29.0309, "step": 27570 }, { "epoch": 0.05571334494196358, "grad_norm": 83.71073150634766, "learning_rate": 5.516e-06, "loss": 33.4112, "step": 27580 }, { "epoch": 0.055733545574647395, "grad_norm": 265.9757995605469, "learning_rate": 5.518e-06, "loss": 46.7522, "step": 27590 }, { "epoch": 0.055753746207331215, "grad_norm": 232.81224060058594, "learning_rate": 5.5200000000000005e-06, "loss": 35.8502, "step": 27600 }, { "epoch": 0.05577394684001503, "grad_norm": 319.1461486816406, "learning_rate": 5.522000000000001e-06, "loss": 25.4308, "step": 27610 }, { "epoch": 0.05579414747269885, "grad_norm": 169.481689453125, "learning_rate": 5.524000000000001e-06, "loss": 23.3878, "step": 27620 }, { "epoch": 0.05581434810538266, "grad_norm": 664.6361083984375, "learning_rate": 5.526e-06, "loss": 44.5176, "step": 27630 }, { "epoch": 0.055834548738066474, "grad_norm": 1140.2938232421875, "learning_rate": 5.528e-06, "loss": 30.452, "step": 27640 }, { "epoch": 0.055854749370750294, "grad_norm": 319.57135009765625, "learning_rate": 5.530000000000001e-06, "loss": 52.7124, "step": 27650 }, { "epoch": 0.05587495000343411, "grad_norm": 54.73879623413086, "learning_rate": 5.5320000000000006e-06, "loss": 24.1591, "step": 27660 }, { "epoch": 0.05589515063611792, "grad_norm": 785.5198364257812, "learning_rate": 5.534000000000001e-06, "loss": 27.0061, "step": 27670 }, { "epoch": 0.05591535126880174, "grad_norm": 267.6286926269531, "learning_rate": 5.536e-06, "loss": 41.1423, "step": 27680 }, { "epoch": 0.05593555190148555, "grad_norm": 417.5861511230469, "learning_rate": 5.538e-06, "loss": 30.7695, "step": 27690 }, { "epoch": 0.05595575253416937, "grad_norm": 187.53968811035156, "learning_rate": 5.540000000000001e-06, "loss": 41.2794, "step": 27700 }, { "epoch": 0.055975953166853186, "grad_norm": 73.38299560546875, "learning_rate": 5.5420000000000005e-06, "loss": 27.3123, "step": 27710 }, { "epoch": 0.055996153799537, "grad_norm": 241.59844970703125, "learning_rate": 5.544000000000001e-06, "loss": 34.9381, "step": 27720 }, { "epoch": 0.05601635443222082, "grad_norm": 237.23684692382812, "learning_rate": 5.546e-06, "loss": 34.615, "step": 27730 }, { "epoch": 0.05603655506490463, "grad_norm": 121.40789031982422, "learning_rate": 5.548e-06, "loss": 49.5217, "step": 27740 }, { "epoch": 0.056056755697588445, "grad_norm": 435.14739990234375, "learning_rate": 5.550000000000001e-06, "loss": 35.3092, "step": 27750 }, { "epoch": 0.056076956330272265, "grad_norm": 358.0445861816406, "learning_rate": 5.552e-06, "loss": 37.7498, "step": 27760 }, { "epoch": 0.05609715696295608, "grad_norm": 98.90144348144531, "learning_rate": 5.5540000000000005e-06, "loss": 16.4781, "step": 27770 }, { "epoch": 0.0561173575956399, "grad_norm": 363.83441162109375, "learning_rate": 5.556000000000001e-06, "loss": 26.2663, "step": 27780 }, { "epoch": 0.05613755822832371, "grad_norm": 129.157958984375, "learning_rate": 5.558e-06, "loss": 21.4152, "step": 27790 }, { "epoch": 0.056157758861007524, "grad_norm": 73.41030883789062, "learning_rate": 5.560000000000001e-06, "loss": 38.6229, "step": 27800 }, { "epoch": 0.056177959493691344, "grad_norm": 336.43450927734375, "learning_rate": 5.562000000000001e-06, "loss": 22.2797, "step": 27810 }, { "epoch": 0.05619816012637516, "grad_norm": 134.70677185058594, "learning_rate": 5.5640000000000004e-06, "loss": 37.4374, "step": 27820 }, { "epoch": 0.05621836075905897, "grad_norm": 1571.126708984375, "learning_rate": 5.566000000000001e-06, "loss": 47.6408, "step": 27830 }, { "epoch": 0.05623856139174279, "grad_norm": 207.42276000976562, "learning_rate": 5.568e-06, "loss": 47.119, "step": 27840 }, { "epoch": 0.0562587620244266, "grad_norm": 51.244422912597656, "learning_rate": 5.570000000000001e-06, "loss": 28.488, "step": 27850 }, { "epoch": 0.05627896265711042, "grad_norm": 310.72515869140625, "learning_rate": 5.572000000000001e-06, "loss": 33.9096, "step": 27860 }, { "epoch": 0.056299163289794237, "grad_norm": 281.1764221191406, "learning_rate": 5.574e-06, "loss": 37.2766, "step": 27870 }, { "epoch": 0.05631936392247805, "grad_norm": 209.36428833007812, "learning_rate": 5.5760000000000005e-06, "loss": 26.9244, "step": 27880 }, { "epoch": 0.05633956455516187, "grad_norm": 291.9185791015625, "learning_rate": 5.578e-06, "loss": 31.6012, "step": 27890 }, { "epoch": 0.05635976518784568, "grad_norm": 114.68229675292969, "learning_rate": 5.580000000000001e-06, "loss": 30.0874, "step": 27900 }, { "epoch": 0.056379965820529496, "grad_norm": 189.79425048828125, "learning_rate": 5.582000000000001e-06, "loss": 32.7017, "step": 27910 }, { "epoch": 0.056400166453213316, "grad_norm": 104.35430145263672, "learning_rate": 5.584e-06, "loss": 32.6785, "step": 27920 }, { "epoch": 0.05642036708589713, "grad_norm": 350.67803955078125, "learning_rate": 5.586e-06, "loss": 29.663, "step": 27930 }, { "epoch": 0.05644056771858095, "grad_norm": 228.2940216064453, "learning_rate": 5.588e-06, "loss": 34.4288, "step": 27940 }, { "epoch": 0.05646076835126476, "grad_norm": 179.36038208007812, "learning_rate": 5.590000000000001e-06, "loss": 45.1922, "step": 27950 }, { "epoch": 0.056480968983948575, "grad_norm": 489.2247619628906, "learning_rate": 5.592000000000001e-06, "loss": 22.3541, "step": 27960 }, { "epoch": 0.056501169616632395, "grad_norm": 477.16754150390625, "learning_rate": 5.594e-06, "loss": 50.7443, "step": 27970 }, { "epoch": 0.05652137024931621, "grad_norm": 48.09757614135742, "learning_rate": 5.596e-06, "loss": 33.0479, "step": 27980 }, { "epoch": 0.05654157088200002, "grad_norm": 164.65948486328125, "learning_rate": 5.5980000000000004e-06, "loss": 32.4233, "step": 27990 }, { "epoch": 0.05656177151468384, "grad_norm": 173.7892608642578, "learning_rate": 5.600000000000001e-06, "loss": 21.5362, "step": 28000 }, { "epoch": 0.056581972147367654, "grad_norm": 0.0, "learning_rate": 5.602000000000001e-06, "loss": 49.6719, "step": 28010 }, { "epoch": 0.056602172780051474, "grad_norm": 678.3466186523438, "learning_rate": 5.604000000000001e-06, "loss": 51.9068, "step": 28020 }, { "epoch": 0.05662237341273529, "grad_norm": 605.5780639648438, "learning_rate": 5.606e-06, "loss": 31.8107, "step": 28030 }, { "epoch": 0.0566425740454191, "grad_norm": 459.156494140625, "learning_rate": 5.608e-06, "loss": 38.9947, "step": 28040 }, { "epoch": 0.05666277467810292, "grad_norm": 226.96746826171875, "learning_rate": 5.610000000000001e-06, "loss": 34.4967, "step": 28050 }, { "epoch": 0.05668297531078673, "grad_norm": 288.8797607421875, "learning_rate": 5.612000000000001e-06, "loss": 30.0391, "step": 28060 }, { "epoch": 0.056703175943470546, "grad_norm": 271.6133728027344, "learning_rate": 5.614000000000001e-06, "loss": 40.521, "step": 28070 }, { "epoch": 0.056723376576154366, "grad_norm": 228.9929656982422, "learning_rate": 5.616e-06, "loss": 29.9447, "step": 28080 }, { "epoch": 0.05674357720883818, "grad_norm": 62.38369369506836, "learning_rate": 5.618e-06, "loss": 32.8618, "step": 28090 }, { "epoch": 0.056763777841522, "grad_norm": 350.6247253417969, "learning_rate": 5.620000000000001e-06, "loss": 33.0409, "step": 28100 }, { "epoch": 0.05678397847420581, "grad_norm": 110.48347473144531, "learning_rate": 5.6220000000000006e-06, "loss": 43.9248, "step": 28110 }, { "epoch": 0.056804179106889625, "grad_norm": 449.659423828125, "learning_rate": 5.624000000000001e-06, "loss": 36.8526, "step": 28120 }, { "epoch": 0.056824379739573445, "grad_norm": 196.14952087402344, "learning_rate": 5.626e-06, "loss": 47.3784, "step": 28130 }, { "epoch": 0.05684458037225726, "grad_norm": 429.9928283691406, "learning_rate": 5.628e-06, "loss": 39.7922, "step": 28140 }, { "epoch": 0.05686478100494107, "grad_norm": 403.69195556640625, "learning_rate": 5.63e-06, "loss": 26.7484, "step": 28150 }, { "epoch": 0.05688498163762489, "grad_norm": 131.8024139404297, "learning_rate": 5.6320000000000005e-06, "loss": 20.5494, "step": 28160 }, { "epoch": 0.056905182270308705, "grad_norm": 29.945541381835938, "learning_rate": 5.634000000000001e-06, "loss": 34.5065, "step": 28170 }, { "epoch": 0.056925382902992525, "grad_norm": 462.5126037597656, "learning_rate": 5.636000000000001e-06, "loss": 19.5062, "step": 28180 }, { "epoch": 0.05694558353567634, "grad_norm": 408.87646484375, "learning_rate": 5.638e-06, "loss": 42.0507, "step": 28190 }, { "epoch": 0.05696578416836015, "grad_norm": 431.03851318359375, "learning_rate": 5.64e-06, "loss": 58.8392, "step": 28200 }, { "epoch": 0.05698598480104397, "grad_norm": 179.0051727294922, "learning_rate": 5.642000000000001e-06, "loss": 29.5277, "step": 28210 }, { "epoch": 0.057006185433727784, "grad_norm": 1321.76904296875, "learning_rate": 5.6440000000000005e-06, "loss": 34.0521, "step": 28220 }, { "epoch": 0.0570263860664116, "grad_norm": 169.49526977539062, "learning_rate": 5.646000000000001e-06, "loss": 35.672, "step": 28230 }, { "epoch": 0.05704658669909542, "grad_norm": 193.328125, "learning_rate": 5.648e-06, "loss": 29.4588, "step": 28240 }, { "epoch": 0.05706678733177923, "grad_norm": 204.71014404296875, "learning_rate": 5.65e-06, "loss": 40.0932, "step": 28250 }, { "epoch": 0.05708698796446305, "grad_norm": 345.37451171875, "learning_rate": 5.652000000000001e-06, "loss": 34.0953, "step": 28260 }, { "epoch": 0.05710718859714686, "grad_norm": 737.114990234375, "learning_rate": 5.654e-06, "loss": 67.489, "step": 28270 }, { "epoch": 0.057127389229830676, "grad_norm": 617.7103881835938, "learning_rate": 5.6560000000000006e-06, "loss": 34.781, "step": 28280 }, { "epoch": 0.057147589862514496, "grad_norm": 165.49501037597656, "learning_rate": 5.658e-06, "loss": 29.7071, "step": 28290 }, { "epoch": 0.05716779049519831, "grad_norm": 39.62914276123047, "learning_rate": 5.66e-06, "loss": 31.455, "step": 28300 }, { "epoch": 0.05718799112788212, "grad_norm": 74.19182586669922, "learning_rate": 5.662000000000001e-06, "loss": 40.9167, "step": 28310 }, { "epoch": 0.05720819176056594, "grad_norm": 81.1659927368164, "learning_rate": 5.664e-06, "loss": 10.7764, "step": 28320 }, { "epoch": 0.057228392393249755, "grad_norm": 726.7647094726562, "learning_rate": 5.6660000000000005e-06, "loss": 31.8086, "step": 28330 }, { "epoch": 0.057248593025933575, "grad_norm": 653.2161865234375, "learning_rate": 5.668e-06, "loss": 33.9627, "step": 28340 }, { "epoch": 0.05726879365861739, "grad_norm": 179.55702209472656, "learning_rate": 5.67e-06, "loss": 28.1007, "step": 28350 }, { "epoch": 0.0572889942913012, "grad_norm": 215.1195831298828, "learning_rate": 5.672000000000001e-06, "loss": 29.7669, "step": 28360 }, { "epoch": 0.05730919492398502, "grad_norm": 1270.41943359375, "learning_rate": 5.674e-06, "loss": 44.9606, "step": 28370 }, { "epoch": 0.057329395556668834, "grad_norm": 164.34056091308594, "learning_rate": 5.676e-06, "loss": 22.5871, "step": 28380 }, { "epoch": 0.05734959618935265, "grad_norm": 269.60076904296875, "learning_rate": 5.6780000000000005e-06, "loss": 18.3009, "step": 28390 }, { "epoch": 0.05736979682203647, "grad_norm": 260.3559265136719, "learning_rate": 5.68e-06, "loss": 25.6771, "step": 28400 }, { "epoch": 0.05738999745472028, "grad_norm": 85.6640396118164, "learning_rate": 5.682000000000001e-06, "loss": 11.7355, "step": 28410 }, { "epoch": 0.0574101980874041, "grad_norm": 235.91586303710938, "learning_rate": 5.684000000000001e-06, "loss": 52.0014, "step": 28420 }, { "epoch": 0.05743039872008791, "grad_norm": 212.94891357421875, "learning_rate": 5.686e-06, "loss": 36.7373, "step": 28430 }, { "epoch": 0.057450599352771727, "grad_norm": 185.3372802734375, "learning_rate": 5.6880000000000004e-06, "loss": 19.8588, "step": 28440 }, { "epoch": 0.057470799985455547, "grad_norm": 123.33940124511719, "learning_rate": 5.69e-06, "loss": 22.2032, "step": 28450 }, { "epoch": 0.05749100061813936, "grad_norm": 296.97210693359375, "learning_rate": 5.692000000000001e-06, "loss": 35.2885, "step": 28460 }, { "epoch": 0.05751120125082317, "grad_norm": 407.0281066894531, "learning_rate": 5.694000000000001e-06, "loss": 50.2104, "step": 28470 }, { "epoch": 0.05753140188350699, "grad_norm": 277.5745544433594, "learning_rate": 5.696e-06, "loss": 23.568, "step": 28480 }, { "epoch": 0.057551602516190806, "grad_norm": 0.0, "learning_rate": 5.698e-06, "loss": 35.8239, "step": 28490 }, { "epoch": 0.057571803148874626, "grad_norm": 240.3690948486328, "learning_rate": 5.7e-06, "loss": 36.2666, "step": 28500 }, { "epoch": 0.05759200378155844, "grad_norm": 330.2105407714844, "learning_rate": 5.702000000000001e-06, "loss": 38.4249, "step": 28510 }, { "epoch": 0.05761220441424225, "grad_norm": 511.2622375488281, "learning_rate": 5.704000000000001e-06, "loss": 38.1614, "step": 28520 }, { "epoch": 0.05763240504692607, "grad_norm": 141.976806640625, "learning_rate": 5.706e-06, "loss": 28.2254, "step": 28530 }, { "epoch": 0.057652605679609885, "grad_norm": 52.43581771850586, "learning_rate": 5.708e-06, "loss": 31.7612, "step": 28540 }, { "epoch": 0.0576728063122937, "grad_norm": 175.9099578857422, "learning_rate": 5.71e-06, "loss": 39.151, "step": 28550 }, { "epoch": 0.05769300694497752, "grad_norm": 74.60289764404297, "learning_rate": 5.7120000000000005e-06, "loss": 34.9076, "step": 28560 }, { "epoch": 0.05771320757766133, "grad_norm": 172.93077087402344, "learning_rate": 5.714000000000001e-06, "loss": 34.3388, "step": 28570 }, { "epoch": 0.05773340821034515, "grad_norm": 116.9732437133789, "learning_rate": 5.716000000000001e-06, "loss": 60.8548, "step": 28580 }, { "epoch": 0.057753608843028964, "grad_norm": 168.976806640625, "learning_rate": 5.718e-06, "loss": 39.1669, "step": 28590 }, { "epoch": 0.05777380947571278, "grad_norm": 197.0828094482422, "learning_rate": 5.72e-06, "loss": 37.104, "step": 28600 }, { "epoch": 0.0577940101083966, "grad_norm": 741.955078125, "learning_rate": 5.722000000000001e-06, "loss": 28.1663, "step": 28610 }, { "epoch": 0.05781421074108041, "grad_norm": 1241.803466796875, "learning_rate": 5.724000000000001e-06, "loss": 39.2669, "step": 28620 }, { "epoch": 0.05783441137376422, "grad_norm": 211.02798461914062, "learning_rate": 5.726000000000001e-06, "loss": 32.6483, "step": 28630 }, { "epoch": 0.05785461200644804, "grad_norm": 352.0150451660156, "learning_rate": 5.728e-06, "loss": 33.1587, "step": 28640 }, { "epoch": 0.057874812639131856, "grad_norm": 120.12027740478516, "learning_rate": 5.73e-06, "loss": 22.8094, "step": 28650 }, { "epoch": 0.057895013271815676, "grad_norm": 551.2969970703125, "learning_rate": 5.732000000000001e-06, "loss": 22.3281, "step": 28660 }, { "epoch": 0.05791521390449949, "grad_norm": 643.1717529296875, "learning_rate": 5.7340000000000005e-06, "loss": 49.6617, "step": 28670 }, { "epoch": 0.0579354145371833, "grad_norm": 143.7513885498047, "learning_rate": 5.736000000000001e-06, "loss": 17.4144, "step": 28680 }, { "epoch": 0.05795561516986712, "grad_norm": 549.3804931640625, "learning_rate": 5.738e-06, "loss": 32.8441, "step": 28690 }, { "epoch": 0.057975815802550935, "grad_norm": 154.7832794189453, "learning_rate": 5.74e-06, "loss": 60.6092, "step": 28700 }, { "epoch": 0.05799601643523475, "grad_norm": 223.884765625, "learning_rate": 5.742000000000001e-06, "loss": 40.1144, "step": 28710 }, { "epoch": 0.05801621706791857, "grad_norm": 478.8924255371094, "learning_rate": 5.744e-06, "loss": 37.4116, "step": 28720 }, { "epoch": 0.05803641770060238, "grad_norm": 398.9019775390625, "learning_rate": 5.7460000000000006e-06, "loss": 26.208, "step": 28730 }, { "epoch": 0.0580566183332862, "grad_norm": 765.0367431640625, "learning_rate": 5.748e-06, "loss": 34.3935, "step": 28740 }, { "epoch": 0.058076818965970015, "grad_norm": 195.24693298339844, "learning_rate": 5.75e-06, "loss": 22.7524, "step": 28750 }, { "epoch": 0.05809701959865383, "grad_norm": 558.7227783203125, "learning_rate": 5.752000000000001e-06, "loss": 47.8686, "step": 28760 }, { "epoch": 0.05811722023133765, "grad_norm": 324.2857666015625, "learning_rate": 5.754e-06, "loss": 43.4827, "step": 28770 }, { "epoch": 0.05813742086402146, "grad_norm": 525.3078002929688, "learning_rate": 5.7560000000000005e-06, "loss": 38.2029, "step": 28780 }, { "epoch": 0.058157621496705274, "grad_norm": 335.6280517578125, "learning_rate": 5.758000000000001e-06, "loss": 30.1702, "step": 28790 }, { "epoch": 0.058177822129389094, "grad_norm": 159.77574157714844, "learning_rate": 5.76e-06, "loss": 26.1151, "step": 28800 }, { "epoch": 0.05819802276207291, "grad_norm": 220.54000854492188, "learning_rate": 5.762000000000001e-06, "loss": 45.9751, "step": 28810 }, { "epoch": 0.05821822339475673, "grad_norm": 698.6255493164062, "learning_rate": 5.764000000000001e-06, "loss": 40.2558, "step": 28820 }, { "epoch": 0.05823842402744054, "grad_norm": 132.11264038085938, "learning_rate": 5.766e-06, "loss": 38.9438, "step": 28830 }, { "epoch": 0.05825862466012435, "grad_norm": 289.2243347167969, "learning_rate": 5.7680000000000005e-06, "loss": 31.6742, "step": 28840 }, { "epoch": 0.05827882529280817, "grad_norm": 244.9074249267578, "learning_rate": 5.77e-06, "loss": 28.119, "step": 28850 }, { "epoch": 0.058299025925491986, "grad_norm": 185.5641326904297, "learning_rate": 5.772000000000001e-06, "loss": 21.0201, "step": 28860 }, { "epoch": 0.0583192265581758, "grad_norm": 353.79730224609375, "learning_rate": 5.774000000000001e-06, "loss": 37.7505, "step": 28870 }, { "epoch": 0.05833942719085962, "grad_norm": 234.84043884277344, "learning_rate": 5.776e-06, "loss": 21.7855, "step": 28880 }, { "epoch": 0.05835962782354343, "grad_norm": 635.247314453125, "learning_rate": 5.778e-06, "loss": 32.1038, "step": 28890 }, { "epoch": 0.05837982845622725, "grad_norm": 323.54229736328125, "learning_rate": 5.78e-06, "loss": 31.718, "step": 28900 }, { "epoch": 0.058400029088911065, "grad_norm": 369.57379150390625, "learning_rate": 5.782000000000001e-06, "loss": 42.7006, "step": 28910 }, { "epoch": 0.05842022972159488, "grad_norm": 451.6365051269531, "learning_rate": 5.784000000000001e-06, "loss": 26.5316, "step": 28920 }, { "epoch": 0.0584404303542787, "grad_norm": 212.52005004882812, "learning_rate": 5.786e-06, "loss": 25.2294, "step": 28930 }, { "epoch": 0.05846063098696251, "grad_norm": 108.28263092041016, "learning_rate": 5.788e-06, "loss": 26.0932, "step": 28940 }, { "epoch": 0.058480831619646324, "grad_norm": 445.3345031738281, "learning_rate": 5.7900000000000005e-06, "loss": 59.6575, "step": 28950 }, { "epoch": 0.058501032252330144, "grad_norm": 256.53936767578125, "learning_rate": 5.792000000000001e-06, "loss": 57.367, "step": 28960 }, { "epoch": 0.05852123288501396, "grad_norm": 147.56431579589844, "learning_rate": 5.794000000000001e-06, "loss": 26.8793, "step": 28970 }, { "epoch": 0.05854143351769778, "grad_norm": 123.61126708984375, "learning_rate": 5.796000000000001e-06, "loss": 42.0545, "step": 28980 }, { "epoch": 0.05856163415038159, "grad_norm": 268.7838134765625, "learning_rate": 5.798e-06, "loss": 36.3227, "step": 28990 }, { "epoch": 0.058581834783065403, "grad_norm": 292.09002685546875, "learning_rate": 5.8e-06, "loss": 19.2175, "step": 29000 }, { "epoch": 0.058602035415749223, "grad_norm": 200.91424560546875, "learning_rate": 5.802000000000001e-06, "loss": 20.4458, "step": 29010 }, { "epoch": 0.05862223604843304, "grad_norm": 357.77789306640625, "learning_rate": 5.804000000000001e-06, "loss": 24.6646, "step": 29020 }, { "epoch": 0.05864243668111685, "grad_norm": 248.0347900390625, "learning_rate": 5.806000000000001e-06, "loss": 44.2378, "step": 29030 }, { "epoch": 0.05866263731380067, "grad_norm": 348.2041320800781, "learning_rate": 5.808e-06, "loss": 30.943, "step": 29040 }, { "epoch": 0.05868283794648448, "grad_norm": 500.89837646484375, "learning_rate": 5.81e-06, "loss": 39.8576, "step": 29050 }, { "epoch": 0.0587030385791683, "grad_norm": 179.7689208984375, "learning_rate": 5.812000000000001e-06, "loss": 33.3345, "step": 29060 }, { "epoch": 0.058723239211852116, "grad_norm": 130.61712646484375, "learning_rate": 5.814000000000001e-06, "loss": 29.4376, "step": 29070 }, { "epoch": 0.05874343984453593, "grad_norm": 204.2139129638672, "learning_rate": 5.816000000000001e-06, "loss": 26.1031, "step": 29080 }, { "epoch": 0.05876364047721975, "grad_norm": 120.33717346191406, "learning_rate": 5.818e-06, "loss": 26.7508, "step": 29090 }, { "epoch": 0.05878384110990356, "grad_norm": 343.2328796386719, "learning_rate": 5.82e-06, "loss": 32.6627, "step": 29100 }, { "epoch": 0.058804041742587375, "grad_norm": 474.8799743652344, "learning_rate": 5.822000000000001e-06, "loss": 31.7774, "step": 29110 }, { "epoch": 0.058824242375271195, "grad_norm": 309.3840026855469, "learning_rate": 5.8240000000000005e-06, "loss": 39.5971, "step": 29120 }, { "epoch": 0.05884444300795501, "grad_norm": 342.31536865234375, "learning_rate": 5.826000000000001e-06, "loss": 32.9897, "step": 29130 }, { "epoch": 0.05886464364063883, "grad_norm": 226.64828491210938, "learning_rate": 5.828e-06, "loss": 19.2715, "step": 29140 }, { "epoch": 0.05888484427332264, "grad_norm": 461.4400329589844, "learning_rate": 5.83e-06, "loss": 32.8384, "step": 29150 }, { "epoch": 0.058905044906006454, "grad_norm": 258.3500671386719, "learning_rate": 5.832000000000001e-06, "loss": 21.5534, "step": 29160 }, { "epoch": 0.058925245538690274, "grad_norm": 89.9924545288086, "learning_rate": 5.834e-06, "loss": 24.3038, "step": 29170 }, { "epoch": 0.05894544617137409, "grad_norm": 367.8519592285156, "learning_rate": 5.8360000000000005e-06, "loss": 30.6568, "step": 29180 }, { "epoch": 0.0589656468040579, "grad_norm": 165.57937622070312, "learning_rate": 5.838000000000001e-06, "loss": 44.2925, "step": 29190 }, { "epoch": 0.05898584743674172, "grad_norm": 573.236328125, "learning_rate": 5.84e-06, "loss": 25.7111, "step": 29200 }, { "epoch": 0.05900604806942553, "grad_norm": 450.0783386230469, "learning_rate": 5.842000000000001e-06, "loss": 35.015, "step": 29210 }, { "epoch": 0.05902624870210935, "grad_norm": 205.9366455078125, "learning_rate": 5.844000000000001e-06, "loss": 25.1905, "step": 29220 }, { "epoch": 0.059046449334793166, "grad_norm": 350.72412109375, "learning_rate": 5.8460000000000004e-06, "loss": 38.9316, "step": 29230 }, { "epoch": 0.05906664996747698, "grad_norm": 337.3996887207031, "learning_rate": 5.848000000000001e-06, "loss": 22.1368, "step": 29240 }, { "epoch": 0.0590868506001608, "grad_norm": 385.4148864746094, "learning_rate": 5.85e-06, "loss": 20.0345, "step": 29250 }, { "epoch": 0.05910705123284461, "grad_norm": 95.8175277709961, "learning_rate": 5.852000000000001e-06, "loss": 19.8754, "step": 29260 }, { "epoch": 0.059127251865528425, "grad_norm": 223.6885986328125, "learning_rate": 5.854000000000001e-06, "loss": 39.7757, "step": 29270 }, { "epoch": 0.059147452498212245, "grad_norm": 160.87008666992188, "learning_rate": 5.856e-06, "loss": 21.9172, "step": 29280 }, { "epoch": 0.05916765313089606, "grad_norm": 271.30487060546875, "learning_rate": 5.8580000000000005e-06, "loss": 31.8076, "step": 29290 }, { "epoch": 0.05918785376357988, "grad_norm": 257.5179443359375, "learning_rate": 5.86e-06, "loss": 19.5301, "step": 29300 }, { "epoch": 0.05920805439626369, "grad_norm": 146.6499481201172, "learning_rate": 5.862000000000001e-06, "loss": 31.6518, "step": 29310 }, { "epoch": 0.059228255028947505, "grad_norm": 107.93689727783203, "learning_rate": 5.864000000000001e-06, "loss": 33.6268, "step": 29320 }, { "epoch": 0.059248455661631325, "grad_norm": 361.7021789550781, "learning_rate": 5.866e-06, "loss": 29.1773, "step": 29330 }, { "epoch": 0.05926865629431514, "grad_norm": 270.900390625, "learning_rate": 5.868e-06, "loss": 34.4935, "step": 29340 }, { "epoch": 0.05928885692699895, "grad_norm": 327.2626647949219, "learning_rate": 5.8700000000000005e-06, "loss": 28.6962, "step": 29350 }, { "epoch": 0.05930905755968277, "grad_norm": 115.41912078857422, "learning_rate": 5.872000000000001e-06, "loss": 46.249, "step": 29360 }, { "epoch": 0.059329258192366584, "grad_norm": 258.4614562988281, "learning_rate": 5.874000000000001e-06, "loss": 26.679, "step": 29370 }, { "epoch": 0.059349458825050404, "grad_norm": 437.05517578125, "learning_rate": 5.876000000000001e-06, "loss": 37.588, "step": 29380 }, { "epoch": 0.05936965945773422, "grad_norm": 190.70053100585938, "learning_rate": 5.878e-06, "loss": 39.1928, "step": 29390 }, { "epoch": 0.05938986009041803, "grad_norm": 158.3235321044922, "learning_rate": 5.8800000000000005e-06, "loss": 48.6115, "step": 29400 }, { "epoch": 0.05941006072310185, "grad_norm": 112.90347290039062, "learning_rate": 5.882e-06, "loss": 32.5309, "step": 29410 }, { "epoch": 0.05943026135578566, "grad_norm": 180.59510803222656, "learning_rate": 5.884000000000001e-06, "loss": 31.55, "step": 29420 }, { "epoch": 0.059450461988469476, "grad_norm": 619.6239013671875, "learning_rate": 5.886000000000001e-06, "loss": 31.7058, "step": 29430 }, { "epoch": 0.059470662621153296, "grad_norm": 703.5104370117188, "learning_rate": 5.888e-06, "loss": 32.703, "step": 29440 }, { "epoch": 0.05949086325383711, "grad_norm": 757.9566040039062, "learning_rate": 5.89e-06, "loss": 38.2091, "step": 29450 }, { "epoch": 0.05951106388652093, "grad_norm": 320.00457763671875, "learning_rate": 5.892e-06, "loss": 38.4453, "step": 29460 }, { "epoch": 0.05953126451920474, "grad_norm": 264.1900939941406, "learning_rate": 5.894000000000001e-06, "loss": 27.7647, "step": 29470 }, { "epoch": 0.059551465151888555, "grad_norm": 412.1574401855469, "learning_rate": 5.896000000000001e-06, "loss": 34.5328, "step": 29480 }, { "epoch": 0.059571665784572375, "grad_norm": 375.3290710449219, "learning_rate": 5.898e-06, "loss": 25.5949, "step": 29490 }, { "epoch": 0.05959186641725619, "grad_norm": 142.4037628173828, "learning_rate": 5.9e-06, "loss": 38.3004, "step": 29500 }, { "epoch": 0.05961206704994, "grad_norm": 44.930084228515625, "learning_rate": 5.9019999999999996e-06, "loss": 36.8279, "step": 29510 }, { "epoch": 0.05963226768262382, "grad_norm": 331.5057373046875, "learning_rate": 5.9040000000000006e-06, "loss": 38.1948, "step": 29520 }, { "epoch": 0.059652468315307634, "grad_norm": 649.745361328125, "learning_rate": 5.906000000000001e-06, "loss": 41.5221, "step": 29530 }, { "epoch": 0.059672668947991454, "grad_norm": 274.5308532714844, "learning_rate": 5.908e-06, "loss": 29.7708, "step": 29540 }, { "epoch": 0.05969286958067527, "grad_norm": 380.6266174316406, "learning_rate": 5.91e-06, "loss": 31.5874, "step": 29550 }, { "epoch": 0.05971307021335908, "grad_norm": 197.39340209960938, "learning_rate": 5.912e-06, "loss": 18.6248, "step": 29560 }, { "epoch": 0.0597332708460429, "grad_norm": 140.67674255371094, "learning_rate": 5.9140000000000005e-06, "loss": 23.7791, "step": 29570 }, { "epoch": 0.059753471478726713, "grad_norm": 672.4180908203125, "learning_rate": 5.916000000000001e-06, "loss": 41.9087, "step": 29580 }, { "epoch": 0.05977367211141053, "grad_norm": 911.4059448242188, "learning_rate": 5.918000000000001e-06, "loss": 30.6264, "step": 29590 }, { "epoch": 0.05979387274409435, "grad_norm": 365.6069030761719, "learning_rate": 5.92e-06, "loss": 35.3698, "step": 29600 }, { "epoch": 0.05981407337677816, "grad_norm": 341.8504333496094, "learning_rate": 5.922e-06, "loss": 28.0727, "step": 29610 }, { "epoch": 0.05983427400946198, "grad_norm": 143.74244689941406, "learning_rate": 5.924000000000001e-06, "loss": 42.1398, "step": 29620 }, { "epoch": 0.05985447464214579, "grad_norm": 418.2632141113281, "learning_rate": 5.9260000000000005e-06, "loss": 63.2239, "step": 29630 }, { "epoch": 0.059874675274829606, "grad_norm": 136.97076416015625, "learning_rate": 5.928000000000001e-06, "loss": 21.4157, "step": 29640 }, { "epoch": 0.059894875907513426, "grad_norm": 182.17835998535156, "learning_rate": 5.93e-06, "loss": 27.4692, "step": 29650 }, { "epoch": 0.05991507654019724, "grad_norm": 405.9251708984375, "learning_rate": 5.932e-06, "loss": 26.0449, "step": 29660 }, { "epoch": 0.05993527717288105, "grad_norm": 234.43829345703125, "learning_rate": 5.934000000000001e-06, "loss": 35.9235, "step": 29670 }, { "epoch": 0.05995547780556487, "grad_norm": 141.06016540527344, "learning_rate": 5.9360000000000004e-06, "loss": 27.0417, "step": 29680 }, { "epoch": 0.059975678438248685, "grad_norm": 157.56759643554688, "learning_rate": 5.9380000000000006e-06, "loss": 48.4821, "step": 29690 }, { "epoch": 0.059995879070932505, "grad_norm": 146.55078125, "learning_rate": 5.94e-06, "loss": 30.3019, "step": 29700 }, { "epoch": 0.06001607970361632, "grad_norm": 337.427490234375, "learning_rate": 5.942e-06, "loss": 26.6493, "step": 29710 }, { "epoch": 0.06003628033630013, "grad_norm": 0.0, "learning_rate": 5.944000000000001e-06, "loss": 20.7929, "step": 29720 }, { "epoch": 0.06005648096898395, "grad_norm": 646.2410888671875, "learning_rate": 5.946e-06, "loss": 33.8569, "step": 29730 }, { "epoch": 0.060076681601667764, "grad_norm": 164.72579956054688, "learning_rate": 5.9480000000000005e-06, "loss": 44.7932, "step": 29740 }, { "epoch": 0.06009688223435158, "grad_norm": 399.6468505859375, "learning_rate": 5.950000000000001e-06, "loss": 33.6226, "step": 29750 }, { "epoch": 0.0601170828670354, "grad_norm": 267.7900390625, "learning_rate": 5.952e-06, "loss": 22.5515, "step": 29760 }, { "epoch": 0.06013728349971921, "grad_norm": 222.00254821777344, "learning_rate": 5.954000000000001e-06, "loss": 15.104, "step": 29770 }, { "epoch": 0.06015748413240303, "grad_norm": 176.11509704589844, "learning_rate": 5.956000000000001e-06, "loss": 37.7277, "step": 29780 }, { "epoch": 0.06017768476508684, "grad_norm": 313.67169189453125, "learning_rate": 5.958e-06, "loss": 39.6555, "step": 29790 }, { "epoch": 0.060197885397770656, "grad_norm": 686.8892211914062, "learning_rate": 5.9600000000000005e-06, "loss": 36.0911, "step": 29800 }, { "epoch": 0.060218086030454476, "grad_norm": 1625.1280517578125, "learning_rate": 5.962e-06, "loss": 39.3501, "step": 29810 }, { "epoch": 0.06023828666313829, "grad_norm": 413.2889099121094, "learning_rate": 5.964000000000001e-06, "loss": 38.0857, "step": 29820 }, { "epoch": 0.0602584872958221, "grad_norm": 1452.6138916015625, "learning_rate": 5.966000000000001e-06, "loss": 50.7307, "step": 29830 }, { "epoch": 0.06027868792850592, "grad_norm": 146.70254516601562, "learning_rate": 5.968e-06, "loss": 21.3677, "step": 29840 }, { "epoch": 0.060298888561189735, "grad_norm": 277.4604797363281, "learning_rate": 5.9700000000000004e-06, "loss": 14.105, "step": 29850 }, { "epoch": 0.060319089193873555, "grad_norm": 413.4416809082031, "learning_rate": 5.972e-06, "loss": 56.9844, "step": 29860 }, { "epoch": 0.06033928982655737, "grad_norm": 941.7999267578125, "learning_rate": 5.974000000000001e-06, "loss": 45.5479, "step": 29870 }, { "epoch": 0.06035949045924118, "grad_norm": 236.96214294433594, "learning_rate": 5.976000000000001e-06, "loss": 21.664, "step": 29880 }, { "epoch": 0.060379691091925, "grad_norm": 190.88253784179688, "learning_rate": 5.978e-06, "loss": 26.9611, "step": 29890 }, { "epoch": 0.060399891724608815, "grad_norm": 395.4151916503906, "learning_rate": 5.98e-06, "loss": 42.275, "step": 29900 }, { "epoch": 0.06042009235729263, "grad_norm": 126.76368713378906, "learning_rate": 5.982e-06, "loss": 19.3393, "step": 29910 }, { "epoch": 0.06044029298997645, "grad_norm": 142.51112365722656, "learning_rate": 5.984000000000001e-06, "loss": 21.7192, "step": 29920 }, { "epoch": 0.06046049362266026, "grad_norm": 233.38385009765625, "learning_rate": 5.986000000000001e-06, "loss": 29.6551, "step": 29930 }, { "epoch": 0.06048069425534408, "grad_norm": 406.4140319824219, "learning_rate": 5.988e-06, "loss": 32.2254, "step": 29940 }, { "epoch": 0.060500894888027894, "grad_norm": 192.3277587890625, "learning_rate": 5.99e-06, "loss": 39.156, "step": 29950 }, { "epoch": 0.06052109552071171, "grad_norm": 187.3136749267578, "learning_rate": 5.992e-06, "loss": 25.4599, "step": 29960 }, { "epoch": 0.06054129615339553, "grad_norm": 132.228271484375, "learning_rate": 5.9940000000000005e-06, "loss": 30.0272, "step": 29970 }, { "epoch": 0.06056149678607934, "grad_norm": 327.5674133300781, "learning_rate": 5.996000000000001e-06, "loss": 24.4915, "step": 29980 }, { "epoch": 0.06058169741876315, "grad_norm": 621.1312866210938, "learning_rate": 5.998000000000001e-06, "loss": 32.4333, "step": 29990 }, { "epoch": 0.06060189805144697, "grad_norm": 273.5018310546875, "learning_rate": 6e-06, "loss": 34.5051, "step": 30000 }, { "epoch": 0.060622098684130786, "grad_norm": 101.80760192871094, "learning_rate": 6.002e-06, "loss": 34.653, "step": 30010 }, { "epoch": 0.060642299316814606, "grad_norm": 327.6451416015625, "learning_rate": 6.004000000000001e-06, "loss": 31.9465, "step": 30020 }, { "epoch": 0.06066249994949842, "grad_norm": 46.82477951049805, "learning_rate": 6.006000000000001e-06, "loss": 29.5885, "step": 30030 }, { "epoch": 0.06068270058218223, "grad_norm": 457.5846252441406, "learning_rate": 6.008000000000001e-06, "loss": 30.2508, "step": 30040 }, { "epoch": 0.06070290121486605, "grad_norm": 390.32666015625, "learning_rate": 6.01e-06, "loss": 27.8488, "step": 30050 }, { "epoch": 0.060723101847549865, "grad_norm": 226.388671875, "learning_rate": 6.012e-06, "loss": 37.2616, "step": 30060 }, { "epoch": 0.06074330248023368, "grad_norm": 182.182373046875, "learning_rate": 6.014000000000001e-06, "loss": 28.5383, "step": 30070 }, { "epoch": 0.0607635031129175, "grad_norm": 147.3685760498047, "learning_rate": 6.0160000000000005e-06, "loss": 26.1269, "step": 30080 }, { "epoch": 0.06078370374560131, "grad_norm": 387.9313659667969, "learning_rate": 6.018000000000001e-06, "loss": 32.0864, "step": 30090 }, { "epoch": 0.06080390437828513, "grad_norm": 278.3341369628906, "learning_rate": 6.02e-06, "loss": 34.2456, "step": 30100 }, { "epoch": 0.060824105010968944, "grad_norm": 742.5695190429688, "learning_rate": 6.022e-06, "loss": 36.5555, "step": 30110 }, { "epoch": 0.06084430564365276, "grad_norm": 136.59478759765625, "learning_rate": 6.024000000000001e-06, "loss": 21.64, "step": 30120 }, { "epoch": 0.06086450627633658, "grad_norm": 242.92019653320312, "learning_rate": 6.026e-06, "loss": 50.0677, "step": 30130 }, { "epoch": 0.06088470690902039, "grad_norm": 209.75247192382812, "learning_rate": 6.0280000000000006e-06, "loss": 27.8245, "step": 30140 }, { "epoch": 0.060904907541704204, "grad_norm": 280.1364440917969, "learning_rate": 6.030000000000001e-06, "loss": 39.3758, "step": 30150 }, { "epoch": 0.060925108174388024, "grad_norm": 387.22650146484375, "learning_rate": 6.032e-06, "loss": 43.3904, "step": 30160 }, { "epoch": 0.06094530880707184, "grad_norm": 217.00314331054688, "learning_rate": 6.034000000000001e-06, "loss": 25.1091, "step": 30170 }, { "epoch": 0.06096550943975566, "grad_norm": 767.8530883789062, "learning_rate": 6.036000000000001e-06, "loss": 44.334, "step": 30180 }, { "epoch": 0.06098571007243947, "grad_norm": 376.458251953125, "learning_rate": 6.0380000000000005e-06, "loss": 29.7355, "step": 30190 }, { "epoch": 0.06100591070512328, "grad_norm": 539.1009521484375, "learning_rate": 6.040000000000001e-06, "loss": 44.6731, "step": 30200 }, { "epoch": 0.0610261113378071, "grad_norm": 8.135963439941406, "learning_rate": 6.042e-06, "loss": 28.0779, "step": 30210 }, { "epoch": 0.061046311970490916, "grad_norm": 169.79579162597656, "learning_rate": 6.044000000000001e-06, "loss": 20.2681, "step": 30220 }, { "epoch": 0.06106651260317473, "grad_norm": 354.596435546875, "learning_rate": 6.046000000000001e-06, "loss": 37.5877, "step": 30230 }, { "epoch": 0.06108671323585855, "grad_norm": 176.68296813964844, "learning_rate": 6.048e-06, "loss": 32.9427, "step": 30240 }, { "epoch": 0.06110691386854236, "grad_norm": 382.47320556640625, "learning_rate": 6.0500000000000005e-06, "loss": 14.5175, "step": 30250 }, { "epoch": 0.06112711450122618, "grad_norm": 351.77276611328125, "learning_rate": 6.052e-06, "loss": 26.4403, "step": 30260 }, { "epoch": 0.061147315133909995, "grad_norm": 276.05780029296875, "learning_rate": 6.054000000000001e-06, "loss": 45.0718, "step": 30270 }, { "epoch": 0.06116751576659381, "grad_norm": 535.48046875, "learning_rate": 6.056000000000001e-06, "loss": 29.234, "step": 30280 }, { "epoch": 0.06118771639927763, "grad_norm": 349.74420166015625, "learning_rate": 6.058e-06, "loss": 21.8729, "step": 30290 }, { "epoch": 0.06120791703196144, "grad_norm": 214.1453399658203, "learning_rate": 6.0600000000000004e-06, "loss": 46.9653, "step": 30300 }, { "epoch": 0.061228117664645254, "grad_norm": 506.78753662109375, "learning_rate": 6.062e-06, "loss": 57.6653, "step": 30310 }, { "epoch": 0.061248318297329074, "grad_norm": 418.99456787109375, "learning_rate": 6.064000000000001e-06, "loss": 35.4212, "step": 30320 }, { "epoch": 0.06126851893001289, "grad_norm": 276.8995666503906, "learning_rate": 6.066000000000001e-06, "loss": 21.0864, "step": 30330 }, { "epoch": 0.06128871956269671, "grad_norm": 318.2470397949219, "learning_rate": 6.068e-06, "loss": 44.3292, "step": 30340 }, { "epoch": 0.06130892019538052, "grad_norm": 181.05960083007812, "learning_rate": 6.07e-06, "loss": 29.2028, "step": 30350 }, { "epoch": 0.06132912082806433, "grad_norm": 432.36029052734375, "learning_rate": 6.0720000000000005e-06, "loss": 33.3237, "step": 30360 }, { "epoch": 0.06134932146074815, "grad_norm": 281.9981689453125, "learning_rate": 6.074000000000001e-06, "loss": 37.4251, "step": 30370 }, { "epoch": 0.061369522093431966, "grad_norm": 261.13568115234375, "learning_rate": 6.076000000000001e-06, "loss": 24.9861, "step": 30380 }, { "epoch": 0.06138972272611578, "grad_norm": 446.2455139160156, "learning_rate": 6.078000000000001e-06, "loss": 33.9992, "step": 30390 }, { "epoch": 0.0614099233587996, "grad_norm": 235.94387817382812, "learning_rate": 6.08e-06, "loss": 43.7818, "step": 30400 }, { "epoch": 0.06143012399148341, "grad_norm": 311.6600341796875, "learning_rate": 6.082e-06, "loss": 32.9079, "step": 30410 }, { "epoch": 0.061450324624167225, "grad_norm": 164.4243927001953, "learning_rate": 6.084000000000001e-06, "loss": 42.889, "step": 30420 }, { "epoch": 0.061470525256851045, "grad_norm": 194.65325927734375, "learning_rate": 6.086000000000001e-06, "loss": 33.9423, "step": 30430 }, { "epoch": 0.06149072588953486, "grad_norm": 297.55780029296875, "learning_rate": 6.088000000000001e-06, "loss": 48.1404, "step": 30440 }, { "epoch": 0.06151092652221868, "grad_norm": 0.0, "learning_rate": 6.09e-06, "loss": 17.0046, "step": 30450 }, { "epoch": 0.06153112715490249, "grad_norm": 130.99853515625, "learning_rate": 6.092e-06, "loss": 16.8665, "step": 30460 }, { "epoch": 0.061551327787586305, "grad_norm": 255.55697631835938, "learning_rate": 6.094000000000001e-06, "loss": 25.4466, "step": 30470 }, { "epoch": 0.061571528420270125, "grad_norm": 472.2217712402344, "learning_rate": 6.096000000000001e-06, "loss": 26.564, "step": 30480 }, { "epoch": 0.06159172905295394, "grad_norm": 275.3928527832031, "learning_rate": 6.098000000000001e-06, "loss": 46.937, "step": 30490 }, { "epoch": 0.06161192968563775, "grad_norm": 251.95071411132812, "learning_rate": 6.1e-06, "loss": 37.0635, "step": 30500 }, { "epoch": 0.06163213031832157, "grad_norm": 216.62257385253906, "learning_rate": 6.102e-06, "loss": 17.3043, "step": 30510 }, { "epoch": 0.061652330951005384, "grad_norm": 69.45738220214844, "learning_rate": 6.104000000000001e-06, "loss": 23.1206, "step": 30520 }, { "epoch": 0.061672531583689204, "grad_norm": 203.24461364746094, "learning_rate": 6.1060000000000005e-06, "loss": 37.486, "step": 30530 }, { "epoch": 0.06169273221637302, "grad_norm": 476.6273498535156, "learning_rate": 6.108000000000001e-06, "loss": 31.8076, "step": 30540 }, { "epoch": 0.06171293284905683, "grad_norm": 437.2422790527344, "learning_rate": 6.110000000000001e-06, "loss": 27.1038, "step": 30550 }, { "epoch": 0.06173313348174065, "grad_norm": 68.24126434326172, "learning_rate": 6.112e-06, "loss": 26.6663, "step": 30560 }, { "epoch": 0.06175333411442446, "grad_norm": 22.674991607666016, "learning_rate": 6.114000000000001e-06, "loss": 18.3345, "step": 30570 }, { "epoch": 0.061773534747108276, "grad_norm": 90.71672058105469, "learning_rate": 6.116000000000001e-06, "loss": 27.8331, "step": 30580 }, { "epoch": 0.061793735379792096, "grad_norm": 361.9411315917969, "learning_rate": 6.1180000000000005e-06, "loss": 37.8184, "step": 30590 }, { "epoch": 0.06181393601247591, "grad_norm": 558.2506713867188, "learning_rate": 6.120000000000001e-06, "loss": 26.6005, "step": 30600 }, { "epoch": 0.06183413664515973, "grad_norm": 139.87620544433594, "learning_rate": 6.122e-06, "loss": 33.3035, "step": 30610 }, { "epoch": 0.06185433727784354, "grad_norm": 136.3228759765625, "learning_rate": 6.124000000000001e-06, "loss": 36.9319, "step": 30620 }, { "epoch": 0.061874537910527355, "grad_norm": 170.49241638183594, "learning_rate": 6.126000000000001e-06, "loss": 28.5941, "step": 30630 }, { "epoch": 0.061894738543211175, "grad_norm": 477.5794677734375, "learning_rate": 6.1280000000000005e-06, "loss": 35.1771, "step": 30640 }, { "epoch": 0.06191493917589499, "grad_norm": 349.8186340332031, "learning_rate": 6.130000000000001e-06, "loss": 19.9806, "step": 30650 }, { "epoch": 0.0619351398085788, "grad_norm": 514.4276733398438, "learning_rate": 6.132e-06, "loss": 22.9305, "step": 30660 }, { "epoch": 0.06195534044126262, "grad_norm": 267.6043395996094, "learning_rate": 6.134e-06, "loss": 22.0032, "step": 30670 }, { "epoch": 0.061975541073946434, "grad_norm": 278.1367492675781, "learning_rate": 6.136000000000001e-06, "loss": 55.7413, "step": 30680 }, { "epoch": 0.061995741706630254, "grad_norm": 1046.7203369140625, "learning_rate": 6.138e-06, "loss": 32.193, "step": 30690 }, { "epoch": 0.06201594233931407, "grad_norm": 191.1005401611328, "learning_rate": 6.1400000000000005e-06, "loss": 30.135, "step": 30700 }, { "epoch": 0.06203614297199788, "grad_norm": 605.7132568359375, "learning_rate": 6.142e-06, "loss": 31.9819, "step": 30710 }, { "epoch": 0.0620563436046817, "grad_norm": 248.0739288330078, "learning_rate": 6.144e-06, "loss": 18.6615, "step": 30720 }, { "epoch": 0.062076544237365514, "grad_norm": 619.3324584960938, "learning_rate": 6.146000000000001e-06, "loss": 23.0207, "step": 30730 }, { "epoch": 0.06209674487004933, "grad_norm": 468.2471618652344, "learning_rate": 6.148e-06, "loss": 35.1285, "step": 30740 }, { "epoch": 0.06211694550273315, "grad_norm": 163.19607543945312, "learning_rate": 6.15e-06, "loss": 30.7459, "step": 30750 }, { "epoch": 0.06213714613541696, "grad_norm": 80.8536148071289, "learning_rate": 6.1520000000000006e-06, "loss": 22.0217, "step": 30760 }, { "epoch": 0.06215734676810078, "grad_norm": 274.7729187011719, "learning_rate": 6.154e-06, "loss": 38.0904, "step": 30770 }, { "epoch": 0.06217754740078459, "grad_norm": 313.94451904296875, "learning_rate": 6.156000000000001e-06, "loss": 29.6225, "step": 30780 }, { "epoch": 0.062197748033468406, "grad_norm": 124.19025421142578, "learning_rate": 6.158000000000001e-06, "loss": 41.8659, "step": 30790 }, { "epoch": 0.062217948666152226, "grad_norm": 588.3652954101562, "learning_rate": 6.16e-06, "loss": 34.6823, "step": 30800 }, { "epoch": 0.06223814929883604, "grad_norm": 213.0446319580078, "learning_rate": 6.1620000000000005e-06, "loss": 33.2519, "step": 30810 }, { "epoch": 0.06225834993151985, "grad_norm": 74.74567413330078, "learning_rate": 6.164e-06, "loss": 39.3659, "step": 30820 }, { "epoch": 0.06227855056420367, "grad_norm": 715.4902954101562, "learning_rate": 6.166000000000001e-06, "loss": 37.7848, "step": 30830 }, { "epoch": 0.062298751196887485, "grad_norm": 350.371826171875, "learning_rate": 6.168000000000001e-06, "loss": 21.8584, "step": 30840 }, { "epoch": 0.062318951829571305, "grad_norm": 128.76576232910156, "learning_rate": 6.17e-06, "loss": 25.2108, "step": 30850 }, { "epoch": 0.06233915246225512, "grad_norm": 234.1318359375, "learning_rate": 6.172e-06, "loss": 21.6887, "step": 30860 }, { "epoch": 0.06235935309493893, "grad_norm": 248.8392333984375, "learning_rate": 6.174e-06, "loss": 25.499, "step": 30870 }, { "epoch": 0.06237955372762275, "grad_norm": 642.6930541992188, "learning_rate": 6.176000000000001e-06, "loss": 67.7303, "step": 30880 }, { "epoch": 0.062399754360306564, "grad_norm": 237.97731018066406, "learning_rate": 6.178000000000001e-06, "loss": 16.3025, "step": 30890 }, { "epoch": 0.06241995499299038, "grad_norm": 163.39462280273438, "learning_rate": 6.18e-06, "loss": 39.714, "step": 30900 }, { "epoch": 0.0624401556256742, "grad_norm": 883.2078857421875, "learning_rate": 6.182e-06, "loss": 29.9178, "step": 30910 }, { "epoch": 0.06246035625835801, "grad_norm": 309.5052490234375, "learning_rate": 6.184e-06, "loss": 16.0204, "step": 30920 }, { "epoch": 0.06248055689104183, "grad_norm": 345.2524719238281, "learning_rate": 6.1860000000000006e-06, "loss": 32.9796, "step": 30930 }, { "epoch": 0.06250075752372564, "grad_norm": 279.456298828125, "learning_rate": 6.188000000000001e-06, "loss": 31.8682, "step": 30940 }, { "epoch": 0.06252095815640946, "grad_norm": 108.81139373779297, "learning_rate": 6.190000000000001e-06, "loss": 31.8013, "step": 30950 }, { "epoch": 0.06254115878909328, "grad_norm": 355.4000244140625, "learning_rate": 6.192e-06, "loss": 22.9933, "step": 30960 }, { "epoch": 0.0625613594217771, "grad_norm": 116.89779663085938, "learning_rate": 6.194e-06, "loss": 34.6513, "step": 30970 }, { "epoch": 0.0625815600544609, "grad_norm": 386.8528137207031, "learning_rate": 6.196000000000001e-06, "loss": 41.3564, "step": 30980 }, { "epoch": 0.06260176068714472, "grad_norm": 1027.4593505859375, "learning_rate": 6.198000000000001e-06, "loss": 21.2024, "step": 30990 }, { "epoch": 0.06262196131982854, "grad_norm": 504.21697998046875, "learning_rate": 6.200000000000001e-06, "loss": 50.8168, "step": 31000 }, { "epoch": 0.06264216195251235, "grad_norm": 241.14385986328125, "learning_rate": 6.202e-06, "loss": 22.5473, "step": 31010 }, { "epoch": 0.06266236258519617, "grad_norm": 928.2057495117188, "learning_rate": 6.204e-06, "loss": 32.5549, "step": 31020 }, { "epoch": 0.06268256321787999, "grad_norm": 161.94825744628906, "learning_rate": 6.206000000000001e-06, "loss": 20.8841, "step": 31030 }, { "epoch": 0.0627027638505638, "grad_norm": 479.74334716796875, "learning_rate": 6.2080000000000005e-06, "loss": 35.738, "step": 31040 }, { "epoch": 0.06272296448324761, "grad_norm": 467.5372009277344, "learning_rate": 6.210000000000001e-06, "loss": 25.9807, "step": 31050 }, { "epoch": 0.06274316511593143, "grad_norm": 459.4605407714844, "learning_rate": 6.212e-06, "loss": 42.3993, "step": 31060 }, { "epoch": 0.06276336574861524, "grad_norm": 135.55148315429688, "learning_rate": 6.214e-06, "loss": 30.4692, "step": 31070 }, { "epoch": 0.06278356638129906, "grad_norm": 268.8945617675781, "learning_rate": 6.216000000000001e-06, "loss": 21.0544, "step": 31080 }, { "epoch": 0.06280376701398288, "grad_norm": 113.16512298583984, "learning_rate": 6.2180000000000004e-06, "loss": 28.2533, "step": 31090 }, { "epoch": 0.06282396764666669, "grad_norm": 599.943603515625, "learning_rate": 6.220000000000001e-06, "loss": 37.3994, "step": 31100 }, { "epoch": 0.0628441682793505, "grad_norm": 219.7691650390625, "learning_rate": 6.222e-06, "loss": 35.7652, "step": 31110 }, { "epoch": 0.06286436891203433, "grad_norm": 370.2511291503906, "learning_rate": 6.224e-06, "loss": 38.8161, "step": 31120 }, { "epoch": 0.06288456954471815, "grad_norm": 755.7047729492188, "learning_rate": 6.226000000000001e-06, "loss": 51.8621, "step": 31130 }, { "epoch": 0.06290477017740195, "grad_norm": 173.30239868164062, "learning_rate": 6.228e-06, "loss": 23.396, "step": 31140 }, { "epoch": 0.06292497081008577, "grad_norm": 108.97765350341797, "learning_rate": 6.2300000000000005e-06, "loss": 22.8525, "step": 31150 }, { "epoch": 0.06294517144276959, "grad_norm": 151.0491180419922, "learning_rate": 6.232000000000001e-06, "loss": 27.081, "step": 31160 }, { "epoch": 0.0629653720754534, "grad_norm": 464.9666748046875, "learning_rate": 6.234e-06, "loss": 51.5493, "step": 31170 }, { "epoch": 0.06298557270813722, "grad_norm": 216.05465698242188, "learning_rate": 6.236000000000001e-06, "loss": 41.577, "step": 31180 }, { "epoch": 0.06300577334082104, "grad_norm": 525.0491333007812, "learning_rate": 6.238000000000001e-06, "loss": 37.8237, "step": 31190 }, { "epoch": 0.06302597397350485, "grad_norm": 493.6592712402344, "learning_rate": 6.24e-06, "loss": 49.5139, "step": 31200 }, { "epoch": 0.06304617460618867, "grad_norm": 242.93325805664062, "learning_rate": 6.2420000000000005e-06, "loss": 16.9568, "step": 31210 }, { "epoch": 0.06306637523887249, "grad_norm": 312.7550048828125, "learning_rate": 6.244e-06, "loss": 44.0575, "step": 31220 }, { "epoch": 0.06308657587155629, "grad_norm": 104.01026153564453, "learning_rate": 6.246000000000001e-06, "loss": 33.0106, "step": 31230 }, { "epoch": 0.06310677650424011, "grad_norm": 683.4473876953125, "learning_rate": 6.248000000000001e-06, "loss": 37.1409, "step": 31240 }, { "epoch": 0.06312697713692393, "grad_norm": 302.6859436035156, "learning_rate": 6.25e-06, "loss": 35.7305, "step": 31250 }, { "epoch": 0.06314717776960774, "grad_norm": 149.64959716796875, "learning_rate": 6.2520000000000004e-06, "loss": 31.0736, "step": 31260 }, { "epoch": 0.06316737840229156, "grad_norm": 0.0, "learning_rate": 6.254e-06, "loss": 39.2854, "step": 31270 }, { "epoch": 0.06318757903497538, "grad_norm": 378.40081787109375, "learning_rate": 6.256000000000001e-06, "loss": 21.5534, "step": 31280 }, { "epoch": 0.0632077796676592, "grad_norm": 483.0423889160156, "learning_rate": 6.258000000000001e-06, "loss": 36.5317, "step": 31290 }, { "epoch": 0.063227980300343, "grad_norm": 287.85260009765625, "learning_rate": 6.26e-06, "loss": 50.7559, "step": 31300 }, { "epoch": 0.06324818093302682, "grad_norm": 21.486614227294922, "learning_rate": 6.262e-06, "loss": 39.1988, "step": 31310 }, { "epoch": 0.06326838156571064, "grad_norm": 331.4032287597656, "learning_rate": 6.264e-06, "loss": 43.859, "step": 31320 }, { "epoch": 0.06328858219839445, "grad_norm": 162.57525634765625, "learning_rate": 6.266000000000001e-06, "loss": 30.7219, "step": 31330 }, { "epoch": 0.06330878283107827, "grad_norm": 225.4456024169922, "learning_rate": 6.268000000000001e-06, "loss": 21.402, "step": 31340 }, { "epoch": 0.06332898346376209, "grad_norm": 245.62960815429688, "learning_rate": 6.27e-06, "loss": 50.0203, "step": 31350 }, { "epoch": 0.0633491840964459, "grad_norm": 370.8708190917969, "learning_rate": 6.272e-06, "loss": 26.6223, "step": 31360 }, { "epoch": 0.06336938472912972, "grad_norm": 535.5399169921875, "learning_rate": 6.274e-06, "loss": 30.886, "step": 31370 }, { "epoch": 0.06338958536181354, "grad_norm": 179.83529663085938, "learning_rate": 6.2760000000000006e-06, "loss": 37.2484, "step": 31380 }, { "epoch": 0.06340978599449734, "grad_norm": 177.88954162597656, "learning_rate": 6.278000000000001e-06, "loss": 36.631, "step": 31390 }, { "epoch": 0.06342998662718116, "grad_norm": 320.6230163574219, "learning_rate": 6.280000000000001e-06, "loss": 26.9575, "step": 31400 }, { "epoch": 0.06345018725986498, "grad_norm": 301.15283203125, "learning_rate": 6.282e-06, "loss": 28.8396, "step": 31410 }, { "epoch": 0.06347038789254879, "grad_norm": 161.06358337402344, "learning_rate": 6.284e-06, "loss": 25.2607, "step": 31420 }, { "epoch": 0.06349058852523261, "grad_norm": 453.5301513671875, "learning_rate": 6.286000000000001e-06, "loss": 31.7918, "step": 31430 }, { "epoch": 0.06351078915791643, "grad_norm": 236.03463745117188, "learning_rate": 6.288000000000001e-06, "loss": 46.3517, "step": 31440 }, { "epoch": 0.06353098979060025, "grad_norm": 174.88978576660156, "learning_rate": 6.290000000000001e-06, "loss": 27.5495, "step": 31450 }, { "epoch": 0.06355119042328405, "grad_norm": 604.4745483398438, "learning_rate": 6.292e-06, "loss": 29.4492, "step": 31460 }, { "epoch": 0.06357139105596787, "grad_norm": 399.3653259277344, "learning_rate": 6.294e-06, "loss": 42.497, "step": 31470 }, { "epoch": 0.0635915916886517, "grad_norm": 255.41188049316406, "learning_rate": 6.296000000000001e-06, "loss": 33.6953, "step": 31480 }, { "epoch": 0.0636117923213355, "grad_norm": 367.0548400878906, "learning_rate": 6.2980000000000005e-06, "loss": 37.5313, "step": 31490 }, { "epoch": 0.06363199295401932, "grad_norm": 523.047607421875, "learning_rate": 6.300000000000001e-06, "loss": 34.2881, "step": 31500 }, { "epoch": 0.06365219358670314, "grad_norm": 78.09286499023438, "learning_rate": 6.302e-06, "loss": 32.4923, "step": 31510 }, { "epoch": 0.06367239421938695, "grad_norm": 149.17787170410156, "learning_rate": 6.304e-06, "loss": 21.4322, "step": 31520 }, { "epoch": 0.06369259485207077, "grad_norm": 230.476318359375, "learning_rate": 6.306000000000001e-06, "loss": 41.6998, "step": 31530 }, { "epoch": 0.06371279548475459, "grad_norm": 170.48338317871094, "learning_rate": 6.308e-06, "loss": 22.8535, "step": 31540 }, { "epoch": 0.06373299611743839, "grad_norm": 216.60531616210938, "learning_rate": 6.3100000000000006e-06, "loss": 30.7322, "step": 31550 }, { "epoch": 0.06375319675012221, "grad_norm": 403.4285583496094, "learning_rate": 6.312000000000001e-06, "loss": 36.0672, "step": 31560 }, { "epoch": 0.06377339738280603, "grad_norm": 627.25341796875, "learning_rate": 6.314e-06, "loss": 15.5175, "step": 31570 }, { "epoch": 0.06379359801548984, "grad_norm": 505.1824035644531, "learning_rate": 6.316000000000001e-06, "loss": 35.7025, "step": 31580 }, { "epoch": 0.06381379864817366, "grad_norm": 193.27236938476562, "learning_rate": 6.318000000000001e-06, "loss": 22.3384, "step": 31590 }, { "epoch": 0.06383399928085748, "grad_norm": 72.33329772949219, "learning_rate": 6.3200000000000005e-06, "loss": 48.2999, "step": 31600 }, { "epoch": 0.0638541999135413, "grad_norm": 488.8270568847656, "learning_rate": 6.322000000000001e-06, "loss": 35.5087, "step": 31610 }, { "epoch": 0.0638744005462251, "grad_norm": 349.91180419921875, "learning_rate": 6.324e-06, "loss": 17.6751, "step": 31620 }, { "epoch": 0.06389460117890892, "grad_norm": 386.2161865234375, "learning_rate": 6.326000000000001e-06, "loss": 29.5315, "step": 31630 }, { "epoch": 0.06391480181159274, "grad_norm": 315.99072265625, "learning_rate": 6.328000000000001e-06, "loss": 23.8086, "step": 31640 }, { "epoch": 0.06393500244427655, "grad_norm": 429.48541259765625, "learning_rate": 6.33e-06, "loss": 28.3555, "step": 31650 }, { "epoch": 0.06395520307696037, "grad_norm": 361.6824951171875, "learning_rate": 6.3320000000000005e-06, "loss": 40.4723, "step": 31660 }, { "epoch": 0.06397540370964419, "grad_norm": 195.50439453125, "learning_rate": 6.334e-06, "loss": 7.5259, "step": 31670 }, { "epoch": 0.063995604342328, "grad_norm": 1015.4526977539062, "learning_rate": 6.336000000000001e-06, "loss": 38.0878, "step": 31680 }, { "epoch": 0.06401580497501182, "grad_norm": 258.359619140625, "learning_rate": 6.338000000000001e-06, "loss": 32.6677, "step": 31690 }, { "epoch": 0.06403600560769564, "grad_norm": 239.80706787109375, "learning_rate": 6.34e-06, "loss": 25.6251, "step": 31700 }, { "epoch": 0.06405620624037944, "grad_norm": 334.3829040527344, "learning_rate": 6.3420000000000004e-06, "loss": 50.864, "step": 31710 }, { "epoch": 0.06407640687306326, "grad_norm": 183.28131103515625, "learning_rate": 6.344e-06, "loss": 29.444, "step": 31720 }, { "epoch": 0.06409660750574708, "grad_norm": 30.78969955444336, "learning_rate": 6.346000000000001e-06, "loss": 30.965, "step": 31730 }, { "epoch": 0.06411680813843089, "grad_norm": 365.88946533203125, "learning_rate": 6.348000000000001e-06, "loss": 38.7027, "step": 31740 }, { "epoch": 0.06413700877111471, "grad_norm": 251.9341278076172, "learning_rate": 6.35e-06, "loss": 26.6599, "step": 31750 }, { "epoch": 0.06415720940379853, "grad_norm": 601.3215942382812, "learning_rate": 6.352e-06, "loss": 20.5381, "step": 31760 }, { "epoch": 0.06417741003648235, "grad_norm": 346.9103088378906, "learning_rate": 6.3540000000000005e-06, "loss": 56.2796, "step": 31770 }, { "epoch": 0.06419761066916616, "grad_norm": 273.9833068847656, "learning_rate": 6.356000000000001e-06, "loss": 54.1161, "step": 31780 }, { "epoch": 0.06421781130184998, "grad_norm": 235.94200134277344, "learning_rate": 6.358000000000001e-06, "loss": 22.5506, "step": 31790 }, { "epoch": 0.0642380119345338, "grad_norm": 217.29452514648438, "learning_rate": 6.360000000000001e-06, "loss": 37.0023, "step": 31800 }, { "epoch": 0.0642582125672176, "grad_norm": 42.5190544128418, "learning_rate": 6.362e-06, "loss": 26.3441, "step": 31810 }, { "epoch": 0.06427841319990142, "grad_norm": 633.7603759765625, "learning_rate": 6.364e-06, "loss": 27.003, "step": 31820 }, { "epoch": 0.06429861383258524, "grad_norm": 313.87615966796875, "learning_rate": 6.366000000000001e-06, "loss": 47.9475, "step": 31830 }, { "epoch": 0.06431881446526905, "grad_norm": 301.42657470703125, "learning_rate": 6.368000000000001e-06, "loss": 9.5569, "step": 31840 }, { "epoch": 0.06433901509795287, "grad_norm": 417.0672912597656, "learning_rate": 6.370000000000001e-06, "loss": 43.0948, "step": 31850 }, { "epoch": 0.06435921573063669, "grad_norm": 405.72381591796875, "learning_rate": 6.372e-06, "loss": 21.5961, "step": 31860 }, { "epoch": 0.0643794163633205, "grad_norm": 126.4342041015625, "learning_rate": 6.374e-06, "loss": 42.5767, "step": 31870 }, { "epoch": 0.06439961699600431, "grad_norm": 215.33985900878906, "learning_rate": 6.376e-06, "loss": 18.0165, "step": 31880 }, { "epoch": 0.06441981762868813, "grad_norm": 396.76251220703125, "learning_rate": 6.378000000000001e-06, "loss": 22.2276, "step": 31890 }, { "epoch": 0.06444001826137194, "grad_norm": 77.96216583251953, "learning_rate": 6.380000000000001e-06, "loss": 30.147, "step": 31900 }, { "epoch": 0.06446021889405576, "grad_norm": 81.74349975585938, "learning_rate": 6.382e-06, "loss": 31.5569, "step": 31910 }, { "epoch": 0.06448041952673958, "grad_norm": 187.0025634765625, "learning_rate": 6.384e-06, "loss": 30.6539, "step": 31920 }, { "epoch": 0.0645006201594234, "grad_norm": 277.3128662109375, "learning_rate": 6.386e-06, "loss": 32.4836, "step": 31930 }, { "epoch": 0.0645208207921072, "grad_norm": 278.8846130371094, "learning_rate": 6.3880000000000005e-06, "loss": 47.4009, "step": 31940 }, { "epoch": 0.06454102142479103, "grad_norm": 274.27716064453125, "learning_rate": 6.390000000000001e-06, "loss": 25.1324, "step": 31950 }, { "epoch": 0.06456122205747485, "grad_norm": 256.1378173828125, "learning_rate": 6.392000000000001e-06, "loss": 31.5127, "step": 31960 }, { "epoch": 0.06458142269015865, "grad_norm": 707.9869995117188, "learning_rate": 6.394e-06, "loss": 39.2091, "step": 31970 }, { "epoch": 0.06460162332284247, "grad_norm": 365.60479736328125, "learning_rate": 6.396e-06, "loss": 22.9649, "step": 31980 }, { "epoch": 0.06462182395552629, "grad_norm": 287.0870666503906, "learning_rate": 6.398000000000001e-06, "loss": 37.4186, "step": 31990 }, { "epoch": 0.0646420245882101, "grad_norm": 857.7494506835938, "learning_rate": 6.4000000000000006e-06, "loss": 35.1439, "step": 32000 }, { "epoch": 0.06466222522089392, "grad_norm": 230.5009002685547, "learning_rate": 6.402000000000001e-06, "loss": 32.6803, "step": 32010 }, { "epoch": 0.06468242585357774, "grad_norm": 0.0, "learning_rate": 6.404e-06, "loss": 42.9031, "step": 32020 }, { "epoch": 0.06470262648626154, "grad_norm": 888.7249145507812, "learning_rate": 6.406e-06, "loss": 40.7608, "step": 32030 }, { "epoch": 0.06472282711894536, "grad_norm": 168.53466796875, "learning_rate": 6.408000000000001e-06, "loss": 32.6981, "step": 32040 }, { "epoch": 0.06474302775162918, "grad_norm": 362.97369384765625, "learning_rate": 6.4100000000000005e-06, "loss": 39.7574, "step": 32050 }, { "epoch": 0.06476322838431299, "grad_norm": 614.9750366210938, "learning_rate": 6.412000000000001e-06, "loss": 38.8388, "step": 32060 }, { "epoch": 0.06478342901699681, "grad_norm": 768.3527221679688, "learning_rate": 6.414e-06, "loss": 49.6656, "step": 32070 }, { "epoch": 0.06480362964968063, "grad_norm": 301.6710510253906, "learning_rate": 6.416e-06, "loss": 44.5362, "step": 32080 }, { "epoch": 0.06482383028236445, "grad_norm": 352.0856018066406, "learning_rate": 6.418000000000001e-06, "loss": 23.2298, "step": 32090 }, { "epoch": 0.06484403091504826, "grad_norm": 325.6612854003906, "learning_rate": 6.42e-06, "loss": 36.4103, "step": 32100 }, { "epoch": 0.06486423154773208, "grad_norm": 246.91787719726562, "learning_rate": 6.4220000000000005e-06, "loss": 31.4514, "step": 32110 }, { "epoch": 0.0648844321804159, "grad_norm": 144.6194305419922, "learning_rate": 6.424e-06, "loss": 20.466, "step": 32120 }, { "epoch": 0.0649046328130997, "grad_norm": 513.7384643554688, "learning_rate": 6.426e-06, "loss": 30.9926, "step": 32130 }, { "epoch": 0.06492483344578352, "grad_norm": 284.63824462890625, "learning_rate": 6.428000000000001e-06, "loss": 28.3641, "step": 32140 }, { "epoch": 0.06494503407846734, "grad_norm": 315.3289489746094, "learning_rate": 6.43e-06, "loss": 26.0518, "step": 32150 }, { "epoch": 0.06496523471115115, "grad_norm": 370.83734130859375, "learning_rate": 6.432e-06, "loss": 26.6824, "step": 32160 }, { "epoch": 0.06498543534383497, "grad_norm": 280.5288391113281, "learning_rate": 6.4340000000000006e-06, "loss": 38.9883, "step": 32170 }, { "epoch": 0.06500563597651879, "grad_norm": 185.65391540527344, "learning_rate": 6.436e-06, "loss": 27.2957, "step": 32180 }, { "epoch": 0.0650258366092026, "grad_norm": 311.3098449707031, "learning_rate": 6.438000000000001e-06, "loss": 26.5012, "step": 32190 }, { "epoch": 0.06504603724188641, "grad_norm": 797.0654907226562, "learning_rate": 6.440000000000001e-06, "loss": 35.6305, "step": 32200 }, { "epoch": 0.06506623787457023, "grad_norm": 422.99603271484375, "learning_rate": 6.442e-06, "loss": 28.8308, "step": 32210 }, { "epoch": 0.06508643850725404, "grad_norm": 251.99163818359375, "learning_rate": 6.4440000000000005e-06, "loss": 28.949, "step": 32220 }, { "epoch": 0.06510663913993786, "grad_norm": 390.827392578125, "learning_rate": 6.446e-06, "loss": 30.2691, "step": 32230 }, { "epoch": 0.06512683977262168, "grad_norm": 596.1945190429688, "learning_rate": 6.448000000000001e-06, "loss": 25.4957, "step": 32240 }, { "epoch": 0.0651470404053055, "grad_norm": 728.2542724609375, "learning_rate": 6.450000000000001e-06, "loss": 39.0351, "step": 32250 }, { "epoch": 0.0651672410379893, "grad_norm": 0.0, "learning_rate": 6.452e-06, "loss": 25.1864, "step": 32260 }, { "epoch": 0.06518744167067313, "grad_norm": 219.91683959960938, "learning_rate": 6.454e-06, "loss": 38.9675, "step": 32270 }, { "epoch": 0.06520764230335695, "grad_norm": 166.8685760498047, "learning_rate": 6.456e-06, "loss": 48.1337, "step": 32280 }, { "epoch": 0.06522784293604075, "grad_norm": 282.66595458984375, "learning_rate": 6.458000000000001e-06, "loss": 41.6651, "step": 32290 }, { "epoch": 0.06524804356872457, "grad_norm": 502.9345397949219, "learning_rate": 6.460000000000001e-06, "loss": 27.2782, "step": 32300 }, { "epoch": 0.06526824420140839, "grad_norm": 226.3009490966797, "learning_rate": 6.462e-06, "loss": 25.7543, "step": 32310 }, { "epoch": 0.0652884448340922, "grad_norm": 631.1998291015625, "learning_rate": 6.464e-06, "loss": 29.3713, "step": 32320 }, { "epoch": 0.06530864546677602, "grad_norm": 144.0883331298828, "learning_rate": 6.4660000000000004e-06, "loss": 41.0915, "step": 32330 }, { "epoch": 0.06532884609945984, "grad_norm": 184.23727416992188, "learning_rate": 6.468000000000001e-06, "loss": 36.1916, "step": 32340 }, { "epoch": 0.06534904673214365, "grad_norm": 225.6820526123047, "learning_rate": 6.470000000000001e-06, "loss": 20.168, "step": 32350 }, { "epoch": 0.06536924736482747, "grad_norm": 117.45604705810547, "learning_rate": 6.472000000000001e-06, "loss": 23.8179, "step": 32360 }, { "epoch": 0.06538944799751129, "grad_norm": 146.49359130859375, "learning_rate": 6.474e-06, "loss": 23.6253, "step": 32370 }, { "epoch": 0.06540964863019509, "grad_norm": 301.24334716796875, "learning_rate": 6.476e-06, "loss": 38.1462, "step": 32380 }, { "epoch": 0.06542984926287891, "grad_norm": 316.0094909667969, "learning_rate": 6.478000000000001e-06, "loss": 29.6188, "step": 32390 }, { "epoch": 0.06545004989556273, "grad_norm": 124.8753662109375, "learning_rate": 6.480000000000001e-06, "loss": 20.0932, "step": 32400 }, { "epoch": 0.06547025052824655, "grad_norm": 499.7335510253906, "learning_rate": 6.482000000000001e-06, "loss": 46.8105, "step": 32410 }, { "epoch": 0.06549045116093036, "grad_norm": 231.91221618652344, "learning_rate": 6.484e-06, "loss": 37.2112, "step": 32420 }, { "epoch": 0.06551065179361418, "grad_norm": 46.266719818115234, "learning_rate": 6.486e-06, "loss": 31.7644, "step": 32430 }, { "epoch": 0.065530852426298, "grad_norm": 239.9111785888672, "learning_rate": 6.488000000000001e-06, "loss": 29.2133, "step": 32440 }, { "epoch": 0.0655510530589818, "grad_norm": 671.6369018554688, "learning_rate": 6.4900000000000005e-06, "loss": 46.1061, "step": 32450 }, { "epoch": 0.06557125369166562, "grad_norm": 236.44606018066406, "learning_rate": 6.492000000000001e-06, "loss": 37.9851, "step": 32460 }, { "epoch": 0.06559145432434944, "grad_norm": 506.38763427734375, "learning_rate": 6.494e-06, "loss": 22.3111, "step": 32470 }, { "epoch": 0.06561165495703325, "grad_norm": 447.9753112792969, "learning_rate": 6.496e-06, "loss": 39.3929, "step": 32480 }, { "epoch": 0.06563185558971707, "grad_norm": 207.03341674804688, "learning_rate": 6.498000000000001e-06, "loss": 35.3965, "step": 32490 }, { "epoch": 0.06565205622240089, "grad_norm": 121.53023529052734, "learning_rate": 6.5000000000000004e-06, "loss": 13.667, "step": 32500 }, { "epoch": 0.0656722568550847, "grad_norm": 223.34454345703125, "learning_rate": 6.502000000000001e-06, "loss": 56.5771, "step": 32510 }, { "epoch": 0.06569245748776852, "grad_norm": 142.69459533691406, "learning_rate": 6.504e-06, "loss": 27.5246, "step": 32520 }, { "epoch": 0.06571265812045234, "grad_norm": 299.4814453125, "learning_rate": 6.506e-06, "loss": 33.9023, "step": 32530 }, { "epoch": 0.06573285875313614, "grad_norm": 297.7349548339844, "learning_rate": 6.508000000000001e-06, "loss": 46.3944, "step": 32540 }, { "epoch": 0.06575305938581996, "grad_norm": 255.85806274414062, "learning_rate": 6.51e-06, "loss": 52.674, "step": 32550 }, { "epoch": 0.06577326001850378, "grad_norm": 0.0, "learning_rate": 6.5120000000000005e-06, "loss": 48.5663, "step": 32560 }, { "epoch": 0.0657934606511876, "grad_norm": 361.13751220703125, "learning_rate": 6.514000000000001e-06, "loss": 28.9672, "step": 32570 }, { "epoch": 0.06581366128387141, "grad_norm": 134.90330505371094, "learning_rate": 6.516e-06, "loss": 34.8168, "step": 32580 }, { "epoch": 0.06583386191655523, "grad_norm": 426.1916809082031, "learning_rate": 6.518000000000001e-06, "loss": 53.0908, "step": 32590 }, { "epoch": 0.06585406254923905, "grad_norm": 398.34185791015625, "learning_rate": 6.520000000000001e-06, "loss": 42.7177, "step": 32600 }, { "epoch": 0.06587426318192285, "grad_norm": 126.80687713623047, "learning_rate": 6.522e-06, "loss": 34.6576, "step": 32610 }, { "epoch": 0.06589446381460667, "grad_norm": 438.81744384765625, "learning_rate": 6.5240000000000006e-06, "loss": 24.524, "step": 32620 }, { "epoch": 0.0659146644472905, "grad_norm": 714.6431884765625, "learning_rate": 6.526e-06, "loss": 34.9471, "step": 32630 }, { "epoch": 0.0659348650799743, "grad_norm": 455.115234375, "learning_rate": 6.528000000000001e-06, "loss": 28.3848, "step": 32640 }, { "epoch": 0.06595506571265812, "grad_norm": 279.6618347167969, "learning_rate": 6.530000000000001e-06, "loss": 46.1341, "step": 32650 }, { "epoch": 0.06597526634534194, "grad_norm": 208.78688049316406, "learning_rate": 6.532e-06, "loss": 39.0124, "step": 32660 }, { "epoch": 0.06599546697802575, "grad_norm": 321.90948486328125, "learning_rate": 6.5340000000000005e-06, "loss": 26.6713, "step": 32670 }, { "epoch": 0.06601566761070957, "grad_norm": 497.13671875, "learning_rate": 6.536e-06, "loss": 44.0567, "step": 32680 }, { "epoch": 0.06603586824339339, "grad_norm": 239.60646057128906, "learning_rate": 6.538000000000001e-06, "loss": 47.399, "step": 32690 }, { "epoch": 0.06605606887607719, "grad_norm": 521.83935546875, "learning_rate": 6.540000000000001e-06, "loss": 49.8124, "step": 32700 }, { "epoch": 0.06607626950876101, "grad_norm": 92.99955749511719, "learning_rate": 6.542e-06, "loss": 33.817, "step": 32710 }, { "epoch": 0.06609647014144483, "grad_norm": 100.21802520751953, "learning_rate": 6.544e-06, "loss": 22.2892, "step": 32720 }, { "epoch": 0.06611667077412865, "grad_norm": 362.736572265625, "learning_rate": 6.5460000000000005e-06, "loss": 35.784, "step": 32730 }, { "epoch": 0.06613687140681246, "grad_norm": 119.27322387695312, "learning_rate": 6.548000000000001e-06, "loss": 21.0406, "step": 32740 }, { "epoch": 0.06615707203949628, "grad_norm": 278.6366271972656, "learning_rate": 6.550000000000001e-06, "loss": 33.9941, "step": 32750 }, { "epoch": 0.0661772726721801, "grad_norm": 298.3982849121094, "learning_rate": 6.552000000000001e-06, "loss": 35.6264, "step": 32760 }, { "epoch": 0.0661974733048639, "grad_norm": 151.81033325195312, "learning_rate": 6.554e-06, "loss": 30.9567, "step": 32770 }, { "epoch": 0.06621767393754772, "grad_norm": 67.10016632080078, "learning_rate": 6.556e-06, "loss": 33.6461, "step": 32780 }, { "epoch": 0.06623787457023154, "grad_norm": 351.99578857421875, "learning_rate": 6.558000000000001e-06, "loss": 18.9068, "step": 32790 }, { "epoch": 0.06625807520291535, "grad_norm": 374.946533203125, "learning_rate": 6.560000000000001e-06, "loss": 32.4607, "step": 32800 }, { "epoch": 0.06627827583559917, "grad_norm": 269.2948303222656, "learning_rate": 6.562000000000001e-06, "loss": 30.2373, "step": 32810 }, { "epoch": 0.06629847646828299, "grad_norm": 859.1407470703125, "learning_rate": 6.564e-06, "loss": 36.6777, "step": 32820 }, { "epoch": 0.0663186771009668, "grad_norm": 274.2523498535156, "learning_rate": 6.566e-06, "loss": 36.063, "step": 32830 }, { "epoch": 0.06633887773365062, "grad_norm": 597.974365234375, "learning_rate": 6.568000000000001e-06, "loss": 21.9826, "step": 32840 }, { "epoch": 0.06635907836633444, "grad_norm": 183.3679962158203, "learning_rate": 6.570000000000001e-06, "loss": 18.5156, "step": 32850 }, { "epoch": 0.06637927899901824, "grad_norm": 412.25689697265625, "learning_rate": 6.572000000000001e-06, "loss": 19.5776, "step": 32860 }, { "epoch": 0.06639947963170206, "grad_norm": 136.04270935058594, "learning_rate": 6.574e-06, "loss": 30.0052, "step": 32870 }, { "epoch": 0.06641968026438588, "grad_norm": 318.11456298828125, "learning_rate": 6.576e-06, "loss": 23.499, "step": 32880 }, { "epoch": 0.0664398808970697, "grad_norm": 169.84576416015625, "learning_rate": 6.578000000000001e-06, "loss": 31.3224, "step": 32890 }, { "epoch": 0.06646008152975351, "grad_norm": 88.23900604248047, "learning_rate": 6.5800000000000005e-06, "loss": 23.1529, "step": 32900 }, { "epoch": 0.06648028216243733, "grad_norm": 178.4117431640625, "learning_rate": 6.582000000000001e-06, "loss": 21.718, "step": 32910 }, { "epoch": 0.06650048279512115, "grad_norm": 26.720149993896484, "learning_rate": 6.584e-06, "loss": 30.7851, "step": 32920 }, { "epoch": 0.06652068342780496, "grad_norm": 348.2189025878906, "learning_rate": 6.586e-06, "loss": 27.1858, "step": 32930 }, { "epoch": 0.06654088406048878, "grad_norm": 680.848876953125, "learning_rate": 6.588000000000001e-06, "loss": 34.3593, "step": 32940 }, { "epoch": 0.0665610846931726, "grad_norm": 270.20550537109375, "learning_rate": 6.5900000000000004e-06, "loss": 19.6648, "step": 32950 }, { "epoch": 0.0665812853258564, "grad_norm": 288.2405700683594, "learning_rate": 6.592000000000001e-06, "loss": 30.9113, "step": 32960 }, { "epoch": 0.06660148595854022, "grad_norm": 205.9509735107422, "learning_rate": 6.594000000000001e-06, "loss": 24.7371, "step": 32970 }, { "epoch": 0.06662168659122404, "grad_norm": 360.29217529296875, "learning_rate": 6.596e-06, "loss": 25.1308, "step": 32980 }, { "epoch": 0.06664188722390785, "grad_norm": 194.65927124023438, "learning_rate": 6.598000000000001e-06, "loss": 23.4347, "step": 32990 }, { "epoch": 0.06666208785659167, "grad_norm": 288.93182373046875, "learning_rate": 6.600000000000001e-06, "loss": 47.0336, "step": 33000 }, { "epoch": 0.06668228848927549, "grad_norm": 188.95547485351562, "learning_rate": 6.6020000000000005e-06, "loss": 30.1425, "step": 33010 }, { "epoch": 0.0667024891219593, "grad_norm": 282.5049133300781, "learning_rate": 6.604000000000001e-06, "loss": 26.2927, "step": 33020 }, { "epoch": 0.06672268975464311, "grad_norm": 247.5895233154297, "learning_rate": 6.606e-06, "loss": 20.946, "step": 33030 }, { "epoch": 0.06674289038732693, "grad_norm": 73.73970794677734, "learning_rate": 6.608000000000001e-06, "loss": 37.6398, "step": 33040 }, { "epoch": 0.06676309102001075, "grad_norm": 603.597412109375, "learning_rate": 6.610000000000001e-06, "loss": 42.4184, "step": 33050 }, { "epoch": 0.06678329165269456, "grad_norm": 270.785400390625, "learning_rate": 6.612e-06, "loss": 19.0604, "step": 33060 }, { "epoch": 0.06680349228537838, "grad_norm": 177.11611938476562, "learning_rate": 6.6140000000000005e-06, "loss": 20.8227, "step": 33070 }, { "epoch": 0.0668236929180622, "grad_norm": 349.0059509277344, "learning_rate": 6.616e-06, "loss": 25.5373, "step": 33080 }, { "epoch": 0.066843893550746, "grad_norm": 422.9224548339844, "learning_rate": 6.618000000000001e-06, "loss": 39.4842, "step": 33090 }, { "epoch": 0.06686409418342983, "grad_norm": 330.58489990234375, "learning_rate": 6.620000000000001e-06, "loss": 21.5781, "step": 33100 }, { "epoch": 0.06688429481611365, "grad_norm": 92.37420654296875, "learning_rate": 6.622e-06, "loss": 40.4062, "step": 33110 }, { "epoch": 0.06690449544879745, "grad_norm": 404.5038146972656, "learning_rate": 6.6240000000000004e-06, "loss": 23.3225, "step": 33120 }, { "epoch": 0.06692469608148127, "grad_norm": 62.47111892700195, "learning_rate": 6.626000000000001e-06, "loss": 36.3375, "step": 33130 }, { "epoch": 0.06694489671416509, "grad_norm": 246.58319091796875, "learning_rate": 6.628e-06, "loss": 24.2121, "step": 33140 }, { "epoch": 0.0669650973468489, "grad_norm": 531.6671142578125, "learning_rate": 6.630000000000001e-06, "loss": 43.2375, "step": 33150 }, { "epoch": 0.06698529797953272, "grad_norm": 625.0757446289062, "learning_rate": 6.632000000000001e-06, "loss": 25.065, "step": 33160 }, { "epoch": 0.06700549861221654, "grad_norm": 262.08795166015625, "learning_rate": 6.634e-06, "loss": 32.6656, "step": 33170 }, { "epoch": 0.06702569924490034, "grad_norm": 553.5176391601562, "learning_rate": 6.6360000000000005e-06, "loss": 36.4868, "step": 33180 }, { "epoch": 0.06704589987758416, "grad_norm": 241.9178924560547, "learning_rate": 6.638e-06, "loss": 48.5743, "step": 33190 }, { "epoch": 0.06706610051026798, "grad_norm": 332.1084289550781, "learning_rate": 6.640000000000001e-06, "loss": 32.6446, "step": 33200 }, { "epoch": 0.0670863011429518, "grad_norm": 299.3291015625, "learning_rate": 6.642000000000001e-06, "loss": 35.5875, "step": 33210 }, { "epoch": 0.06710650177563561, "grad_norm": 104.87538146972656, "learning_rate": 6.644e-06, "loss": 29.3714, "step": 33220 }, { "epoch": 0.06712670240831943, "grad_norm": 242.60638427734375, "learning_rate": 6.646e-06, "loss": 25.3969, "step": 33230 }, { "epoch": 0.06714690304100325, "grad_norm": 422.2679748535156, "learning_rate": 6.648e-06, "loss": 38.9253, "step": 33240 }, { "epoch": 0.06716710367368706, "grad_norm": 412.9209289550781, "learning_rate": 6.650000000000001e-06, "loss": 42.5106, "step": 33250 }, { "epoch": 0.06718730430637088, "grad_norm": 367.3992614746094, "learning_rate": 6.652000000000001e-06, "loss": 39.1589, "step": 33260 }, { "epoch": 0.0672075049390547, "grad_norm": 365.3205261230469, "learning_rate": 6.654e-06, "loss": 35.5857, "step": 33270 }, { "epoch": 0.0672277055717385, "grad_norm": 378.38421630859375, "learning_rate": 6.656e-06, "loss": 55.893, "step": 33280 }, { "epoch": 0.06724790620442232, "grad_norm": 504.4052734375, "learning_rate": 6.658e-06, "loss": 35.7893, "step": 33290 }, { "epoch": 0.06726810683710614, "grad_norm": 108.6670913696289, "learning_rate": 6.660000000000001e-06, "loss": 23.6681, "step": 33300 }, { "epoch": 0.06728830746978995, "grad_norm": 271.6363830566406, "learning_rate": 6.662000000000001e-06, "loss": 16.8175, "step": 33310 }, { "epoch": 0.06730850810247377, "grad_norm": 203.37770080566406, "learning_rate": 6.664e-06, "loss": 34.6366, "step": 33320 }, { "epoch": 0.06732870873515759, "grad_norm": 173.56015014648438, "learning_rate": 6.666e-06, "loss": 30.8938, "step": 33330 }, { "epoch": 0.0673489093678414, "grad_norm": 83.42192840576172, "learning_rate": 6.668e-06, "loss": 33.6979, "step": 33340 }, { "epoch": 0.06736911000052521, "grad_norm": 262.2205810546875, "learning_rate": 6.6700000000000005e-06, "loss": 22.9726, "step": 33350 }, { "epoch": 0.06738931063320903, "grad_norm": 230.3037109375, "learning_rate": 6.672000000000001e-06, "loss": 32.0468, "step": 33360 }, { "epoch": 0.06740951126589285, "grad_norm": 532.1932373046875, "learning_rate": 6.674000000000001e-06, "loss": 24.6395, "step": 33370 }, { "epoch": 0.06742971189857666, "grad_norm": 148.4678192138672, "learning_rate": 6.676e-06, "loss": 39.7879, "step": 33380 }, { "epoch": 0.06744991253126048, "grad_norm": 141.72572326660156, "learning_rate": 6.678e-06, "loss": 30.3851, "step": 33390 }, { "epoch": 0.0674701131639443, "grad_norm": 114.70024871826172, "learning_rate": 6.680000000000001e-06, "loss": 33.2664, "step": 33400 }, { "epoch": 0.0674903137966281, "grad_norm": 57.21114730834961, "learning_rate": 6.6820000000000006e-06, "loss": 31.1881, "step": 33410 }, { "epoch": 0.06751051442931193, "grad_norm": 139.46958923339844, "learning_rate": 6.684000000000001e-06, "loss": 28.2396, "step": 33420 }, { "epoch": 0.06753071506199575, "grad_norm": 261.0444641113281, "learning_rate": 6.686e-06, "loss": 34.3243, "step": 33430 }, { "epoch": 0.06755091569467955, "grad_norm": 274.47125244140625, "learning_rate": 6.688e-06, "loss": 27.1973, "step": 33440 }, { "epoch": 0.06757111632736337, "grad_norm": 91.66280364990234, "learning_rate": 6.690000000000001e-06, "loss": 41.5254, "step": 33450 }, { "epoch": 0.06759131696004719, "grad_norm": 565.2498779296875, "learning_rate": 6.6920000000000005e-06, "loss": 28.3334, "step": 33460 }, { "epoch": 0.067611517592731, "grad_norm": 332.7196350097656, "learning_rate": 6.694000000000001e-06, "loss": 28.9104, "step": 33470 }, { "epoch": 0.06763171822541482, "grad_norm": 125.0504379272461, "learning_rate": 6.696e-06, "loss": 27.1868, "step": 33480 }, { "epoch": 0.06765191885809864, "grad_norm": 349.99664306640625, "learning_rate": 6.698e-06, "loss": 32.5222, "step": 33490 }, { "epoch": 0.06767211949078245, "grad_norm": 227.42576599121094, "learning_rate": 6.700000000000001e-06, "loss": 26.7263, "step": 33500 }, { "epoch": 0.06769232012346627, "grad_norm": 667.581787109375, "learning_rate": 6.702e-06, "loss": 32.7161, "step": 33510 }, { "epoch": 0.06771252075615009, "grad_norm": 359.27490234375, "learning_rate": 6.7040000000000005e-06, "loss": 21.117, "step": 33520 }, { "epoch": 0.06773272138883389, "grad_norm": 487.3028869628906, "learning_rate": 6.706000000000001e-06, "loss": 34.4992, "step": 33530 }, { "epoch": 0.06775292202151771, "grad_norm": 296.8011474609375, "learning_rate": 6.708e-06, "loss": 30.075, "step": 33540 }, { "epoch": 0.06777312265420153, "grad_norm": 287.0123291015625, "learning_rate": 6.710000000000001e-06, "loss": 23.3729, "step": 33550 }, { "epoch": 0.06779332328688535, "grad_norm": 189.02500915527344, "learning_rate": 6.712000000000001e-06, "loss": 46.273, "step": 33560 }, { "epoch": 0.06781352391956916, "grad_norm": 255.7252197265625, "learning_rate": 6.7140000000000004e-06, "loss": 26.6086, "step": 33570 }, { "epoch": 0.06783372455225298, "grad_norm": 304.9705810546875, "learning_rate": 6.716000000000001e-06, "loss": 29.9635, "step": 33580 }, { "epoch": 0.0678539251849368, "grad_norm": 170.6367645263672, "learning_rate": 6.718e-06, "loss": 35.7287, "step": 33590 }, { "epoch": 0.0678741258176206, "grad_norm": 489.3557434082031, "learning_rate": 6.720000000000001e-06, "loss": 32.5018, "step": 33600 }, { "epoch": 0.06789432645030442, "grad_norm": 198.56910705566406, "learning_rate": 6.722000000000001e-06, "loss": 67.4204, "step": 33610 }, { "epoch": 0.06791452708298824, "grad_norm": 609.1077880859375, "learning_rate": 6.724e-06, "loss": 47.5712, "step": 33620 }, { "epoch": 0.06793472771567205, "grad_norm": 624.0452880859375, "learning_rate": 6.7260000000000005e-06, "loss": 36.4219, "step": 33630 }, { "epoch": 0.06795492834835587, "grad_norm": 149.0255126953125, "learning_rate": 6.728e-06, "loss": 25.9751, "step": 33640 }, { "epoch": 0.06797512898103969, "grad_norm": 210.6195526123047, "learning_rate": 6.730000000000001e-06, "loss": 30.1722, "step": 33650 }, { "epoch": 0.0679953296137235, "grad_norm": 440.34173583984375, "learning_rate": 6.732000000000001e-06, "loss": 30.5773, "step": 33660 }, { "epoch": 0.06801553024640732, "grad_norm": 140.45077514648438, "learning_rate": 6.734e-06, "loss": 23.2591, "step": 33670 }, { "epoch": 0.06803573087909114, "grad_norm": 383.9178466796875, "learning_rate": 6.736e-06, "loss": 49.6295, "step": 33680 }, { "epoch": 0.06805593151177494, "grad_norm": 155.09527587890625, "learning_rate": 6.738e-06, "loss": 36.1396, "step": 33690 }, { "epoch": 0.06807613214445876, "grad_norm": 713.4521484375, "learning_rate": 6.740000000000001e-06, "loss": 38.7304, "step": 33700 }, { "epoch": 0.06809633277714258, "grad_norm": 217.56887817382812, "learning_rate": 6.742000000000001e-06, "loss": 22.794, "step": 33710 }, { "epoch": 0.0681165334098264, "grad_norm": 1133.9342041015625, "learning_rate": 6.744e-06, "loss": 49.7349, "step": 33720 }, { "epoch": 0.06813673404251021, "grad_norm": 432.6061706542969, "learning_rate": 6.746e-06, "loss": 54.5403, "step": 33730 }, { "epoch": 0.06815693467519403, "grad_norm": 241.99195861816406, "learning_rate": 6.7480000000000004e-06, "loss": 35.1401, "step": 33740 }, { "epoch": 0.06817713530787785, "grad_norm": 148.23487854003906, "learning_rate": 6.750000000000001e-06, "loss": 24.8467, "step": 33750 }, { "epoch": 0.06819733594056165, "grad_norm": 665.7949829101562, "learning_rate": 6.752000000000001e-06, "loss": 39.2982, "step": 33760 }, { "epoch": 0.06821753657324547, "grad_norm": 509.8280029296875, "learning_rate": 6.754000000000001e-06, "loss": 25.7277, "step": 33770 }, { "epoch": 0.0682377372059293, "grad_norm": 275.6345520019531, "learning_rate": 6.756e-06, "loss": 28.6231, "step": 33780 }, { "epoch": 0.0682579378386131, "grad_norm": 326.71014404296875, "learning_rate": 6.758e-06, "loss": 31.9501, "step": 33790 }, { "epoch": 0.06827813847129692, "grad_norm": 78.26876831054688, "learning_rate": 6.760000000000001e-06, "loss": 40.5902, "step": 33800 }, { "epoch": 0.06829833910398074, "grad_norm": 400.4212646484375, "learning_rate": 6.762000000000001e-06, "loss": 29.9435, "step": 33810 }, { "epoch": 0.06831853973666455, "grad_norm": 211.5572052001953, "learning_rate": 6.764000000000001e-06, "loss": 26.9268, "step": 33820 }, { "epoch": 0.06833874036934837, "grad_norm": 353.80517578125, "learning_rate": 6.766e-06, "loss": 35.1211, "step": 33830 }, { "epoch": 0.06835894100203219, "grad_norm": 279.5445556640625, "learning_rate": 6.768e-06, "loss": 35.38, "step": 33840 }, { "epoch": 0.06837914163471599, "grad_norm": 842.8280639648438, "learning_rate": 6.770000000000001e-06, "loss": 45.6965, "step": 33850 }, { "epoch": 0.06839934226739981, "grad_norm": 317.0405578613281, "learning_rate": 6.7720000000000006e-06, "loss": 21.8275, "step": 33860 }, { "epoch": 0.06841954290008363, "grad_norm": 347.109375, "learning_rate": 6.774000000000001e-06, "loss": 42.1279, "step": 33870 }, { "epoch": 0.06843974353276745, "grad_norm": 181.1343536376953, "learning_rate": 6.776e-06, "loss": 37.2308, "step": 33880 }, { "epoch": 0.06845994416545126, "grad_norm": 384.95751953125, "learning_rate": 6.778e-06, "loss": 24.2325, "step": 33890 }, { "epoch": 0.06848014479813508, "grad_norm": 198.47860717773438, "learning_rate": 6.780000000000001e-06, "loss": 24.9311, "step": 33900 }, { "epoch": 0.0685003454308189, "grad_norm": 294.48089599609375, "learning_rate": 6.7820000000000005e-06, "loss": 55.431, "step": 33910 }, { "epoch": 0.0685205460635027, "grad_norm": 389.7652587890625, "learning_rate": 6.784000000000001e-06, "loss": 29.4878, "step": 33920 }, { "epoch": 0.06854074669618652, "grad_norm": 270.4581604003906, "learning_rate": 6.786000000000001e-06, "loss": 43.2502, "step": 33930 }, { "epoch": 0.06856094732887034, "grad_norm": 570.0082397460938, "learning_rate": 6.788e-06, "loss": 40.6503, "step": 33940 }, { "epoch": 0.06858114796155415, "grad_norm": 258.30816650390625, "learning_rate": 6.790000000000001e-06, "loss": 29.3034, "step": 33950 }, { "epoch": 0.06860134859423797, "grad_norm": 137.6088104248047, "learning_rate": 6.792000000000001e-06, "loss": 50.9893, "step": 33960 }, { "epoch": 0.06862154922692179, "grad_norm": 315.00726318359375, "learning_rate": 6.7940000000000005e-06, "loss": 38.9181, "step": 33970 }, { "epoch": 0.0686417498596056, "grad_norm": 210.84100341796875, "learning_rate": 6.796000000000001e-06, "loss": 44.9176, "step": 33980 }, { "epoch": 0.06866195049228942, "grad_norm": 0.0, "learning_rate": 6.798e-06, "loss": 62.335, "step": 33990 }, { "epoch": 0.06868215112497324, "grad_norm": 187.07171630859375, "learning_rate": 6.800000000000001e-06, "loss": 31.0383, "step": 34000 }, { "epoch": 0.06870235175765704, "grad_norm": 235.85838317871094, "learning_rate": 6.802000000000001e-06, "loss": 30.1693, "step": 34010 }, { "epoch": 0.06872255239034086, "grad_norm": 218.34291076660156, "learning_rate": 6.804e-06, "loss": 34.4639, "step": 34020 }, { "epoch": 0.06874275302302468, "grad_norm": 313.0340881347656, "learning_rate": 6.8060000000000006e-06, "loss": 33.1673, "step": 34030 }, { "epoch": 0.0687629536557085, "grad_norm": 71.44450378417969, "learning_rate": 6.808e-06, "loss": 26.5414, "step": 34040 }, { "epoch": 0.06878315428839231, "grad_norm": 1108.696533203125, "learning_rate": 6.810000000000001e-06, "loss": 32.3472, "step": 34050 }, { "epoch": 0.06880335492107613, "grad_norm": 166.51649475097656, "learning_rate": 6.812000000000001e-06, "loss": 41.9269, "step": 34060 }, { "epoch": 0.06882355555375995, "grad_norm": 349.6643371582031, "learning_rate": 6.814e-06, "loss": 38.2226, "step": 34070 }, { "epoch": 0.06884375618644376, "grad_norm": 466.3902587890625, "learning_rate": 6.8160000000000005e-06, "loss": 39.0952, "step": 34080 }, { "epoch": 0.06886395681912758, "grad_norm": 159.2125701904297, "learning_rate": 6.818e-06, "loss": 26.2511, "step": 34090 }, { "epoch": 0.0688841574518114, "grad_norm": 52.105865478515625, "learning_rate": 6.820000000000001e-06, "loss": 48.5719, "step": 34100 }, { "epoch": 0.0689043580844952, "grad_norm": 134.4409942626953, "learning_rate": 6.822000000000001e-06, "loss": 28.5988, "step": 34110 }, { "epoch": 0.06892455871717902, "grad_norm": 343.80908203125, "learning_rate": 6.824e-06, "loss": 27.3933, "step": 34120 }, { "epoch": 0.06894475934986284, "grad_norm": 237.59693908691406, "learning_rate": 6.826e-06, "loss": 17.6016, "step": 34130 }, { "epoch": 0.06896495998254665, "grad_norm": 355.73590087890625, "learning_rate": 6.8280000000000005e-06, "loss": 40.2889, "step": 34140 }, { "epoch": 0.06898516061523047, "grad_norm": 373.65869140625, "learning_rate": 6.830000000000001e-06, "loss": 17.1657, "step": 34150 }, { "epoch": 0.06900536124791429, "grad_norm": 330.5454406738281, "learning_rate": 6.832000000000001e-06, "loss": 23.2348, "step": 34160 }, { "epoch": 0.0690255618805981, "grad_norm": 357.036376953125, "learning_rate": 6.834000000000001e-06, "loss": 25.8255, "step": 34170 }, { "epoch": 0.06904576251328191, "grad_norm": 335.3196716308594, "learning_rate": 6.836e-06, "loss": 20.1001, "step": 34180 }, { "epoch": 0.06906596314596573, "grad_norm": 277.5600891113281, "learning_rate": 6.8380000000000004e-06, "loss": 30.819, "step": 34190 }, { "epoch": 0.06908616377864955, "grad_norm": 131.50033569335938, "learning_rate": 6.8400000000000014e-06, "loss": 39.4422, "step": 34200 }, { "epoch": 0.06910636441133336, "grad_norm": 137.25634765625, "learning_rate": 6.842000000000001e-06, "loss": 42.1543, "step": 34210 }, { "epoch": 0.06912656504401718, "grad_norm": 216.78941345214844, "learning_rate": 6.844000000000001e-06, "loss": 31.9499, "step": 34220 }, { "epoch": 0.069146765676701, "grad_norm": 359.0471496582031, "learning_rate": 6.846e-06, "loss": 23.6016, "step": 34230 }, { "epoch": 0.0691669663093848, "grad_norm": 46.460201263427734, "learning_rate": 6.848e-06, "loss": 22.2552, "step": 34240 }, { "epoch": 0.06918716694206863, "grad_norm": 456.8470764160156, "learning_rate": 6.850000000000001e-06, "loss": 33.8211, "step": 34250 }, { "epoch": 0.06920736757475245, "grad_norm": 154.02618408203125, "learning_rate": 6.852000000000001e-06, "loss": 34.7319, "step": 34260 }, { "epoch": 0.06922756820743625, "grad_norm": 483.8904724121094, "learning_rate": 6.854000000000001e-06, "loss": 34.0068, "step": 34270 }, { "epoch": 0.06924776884012007, "grad_norm": 474.10205078125, "learning_rate": 6.856e-06, "loss": 31.9939, "step": 34280 }, { "epoch": 0.06926796947280389, "grad_norm": 220.82957458496094, "learning_rate": 6.858e-06, "loss": 38.9482, "step": 34290 }, { "epoch": 0.0692881701054877, "grad_norm": 1521.428466796875, "learning_rate": 6.860000000000001e-06, "loss": 34.3127, "step": 34300 }, { "epoch": 0.06930837073817152, "grad_norm": 437.8863525390625, "learning_rate": 6.8620000000000005e-06, "loss": 29.4293, "step": 34310 }, { "epoch": 0.06932857137085534, "grad_norm": 590.9774780273438, "learning_rate": 6.864000000000001e-06, "loss": 29.6548, "step": 34320 }, { "epoch": 0.06934877200353914, "grad_norm": 554.6246337890625, "learning_rate": 6.866000000000001e-06, "loss": 43.4738, "step": 34330 }, { "epoch": 0.06936897263622296, "grad_norm": 300.1055603027344, "learning_rate": 6.868e-06, "loss": 19.6097, "step": 34340 }, { "epoch": 0.06938917326890678, "grad_norm": 403.37371826171875, "learning_rate": 6.870000000000001e-06, "loss": 27.8304, "step": 34350 }, { "epoch": 0.0694093739015906, "grad_norm": 952.3486938476562, "learning_rate": 6.872000000000001e-06, "loss": 50.0311, "step": 34360 }, { "epoch": 0.06942957453427441, "grad_norm": 457.46136474609375, "learning_rate": 6.874000000000001e-06, "loss": 46.5268, "step": 34370 }, { "epoch": 0.06944977516695823, "grad_norm": 460.9429931640625, "learning_rate": 6.876000000000001e-06, "loss": 38.496, "step": 34380 }, { "epoch": 0.06946997579964205, "grad_norm": 312.8623046875, "learning_rate": 6.878e-06, "loss": 35.7279, "step": 34390 }, { "epoch": 0.06949017643232586, "grad_norm": 304.2538146972656, "learning_rate": 6.88e-06, "loss": 19.9553, "step": 34400 }, { "epoch": 0.06951037706500968, "grad_norm": 325.4416198730469, "learning_rate": 6.882000000000001e-06, "loss": 34.3668, "step": 34410 }, { "epoch": 0.0695305776976935, "grad_norm": 385.85260009765625, "learning_rate": 6.8840000000000005e-06, "loss": 21.0294, "step": 34420 }, { "epoch": 0.0695507783303773, "grad_norm": 124.8081283569336, "learning_rate": 6.886000000000001e-06, "loss": 30.4506, "step": 34430 }, { "epoch": 0.06957097896306112, "grad_norm": 420.5198059082031, "learning_rate": 6.888e-06, "loss": 48.1705, "step": 34440 }, { "epoch": 0.06959117959574494, "grad_norm": 412.04864501953125, "learning_rate": 6.89e-06, "loss": 33.6329, "step": 34450 }, { "epoch": 0.06961138022842875, "grad_norm": 220.5589599609375, "learning_rate": 6.892000000000001e-06, "loss": 30.0771, "step": 34460 }, { "epoch": 0.06963158086111257, "grad_norm": 590.5316162109375, "learning_rate": 6.894e-06, "loss": 32.4819, "step": 34470 }, { "epoch": 0.06965178149379639, "grad_norm": 172.81411743164062, "learning_rate": 6.8960000000000006e-06, "loss": 18.125, "step": 34480 }, { "epoch": 0.0696719821264802, "grad_norm": 225.4380645751953, "learning_rate": 6.898e-06, "loss": 40.0995, "step": 34490 }, { "epoch": 0.06969218275916401, "grad_norm": 67.72576904296875, "learning_rate": 6.9e-06, "loss": 22.1745, "step": 34500 }, { "epoch": 0.06971238339184783, "grad_norm": 319.0522155761719, "learning_rate": 6.902000000000001e-06, "loss": 16.5079, "step": 34510 }, { "epoch": 0.06973258402453165, "grad_norm": 130.47085571289062, "learning_rate": 6.904e-06, "loss": 16.1903, "step": 34520 }, { "epoch": 0.06975278465721546, "grad_norm": 246.37319946289062, "learning_rate": 6.9060000000000005e-06, "loss": 24.0866, "step": 34530 }, { "epoch": 0.06977298528989928, "grad_norm": 241.96591186523438, "learning_rate": 6.908000000000001e-06, "loss": 31.9853, "step": 34540 }, { "epoch": 0.0697931859225831, "grad_norm": 554.8908081054688, "learning_rate": 6.91e-06, "loss": 32.4843, "step": 34550 }, { "epoch": 0.06981338655526691, "grad_norm": 374.53179931640625, "learning_rate": 6.912000000000001e-06, "loss": 37.9353, "step": 34560 }, { "epoch": 0.06983358718795073, "grad_norm": 172.09381103515625, "learning_rate": 6.914000000000001e-06, "loss": 52.4013, "step": 34570 }, { "epoch": 0.06985378782063455, "grad_norm": 517.7269287109375, "learning_rate": 6.916e-06, "loss": 28.1022, "step": 34580 }, { "epoch": 0.06987398845331835, "grad_norm": 507.33935546875, "learning_rate": 6.9180000000000005e-06, "loss": 36.4014, "step": 34590 }, { "epoch": 0.06989418908600217, "grad_norm": 468.7700500488281, "learning_rate": 6.92e-06, "loss": 32.8819, "step": 34600 }, { "epoch": 0.06991438971868599, "grad_norm": 267.4627685546875, "learning_rate": 6.922000000000001e-06, "loss": 27.6147, "step": 34610 }, { "epoch": 0.0699345903513698, "grad_norm": 209.7281494140625, "learning_rate": 6.924000000000001e-06, "loss": 37.6682, "step": 34620 }, { "epoch": 0.06995479098405362, "grad_norm": 343.9045104980469, "learning_rate": 6.926e-06, "loss": 36.8786, "step": 34630 }, { "epoch": 0.06997499161673744, "grad_norm": 56.6214599609375, "learning_rate": 6.928e-06, "loss": 29.8932, "step": 34640 }, { "epoch": 0.06999519224942125, "grad_norm": 132.18646240234375, "learning_rate": 6.93e-06, "loss": 36.8503, "step": 34650 }, { "epoch": 0.07001539288210507, "grad_norm": 77.00069427490234, "learning_rate": 6.932000000000001e-06, "loss": 20.4764, "step": 34660 }, { "epoch": 0.07003559351478889, "grad_norm": 161.56048583984375, "learning_rate": 6.934000000000001e-06, "loss": 32.5958, "step": 34670 }, { "epoch": 0.0700557941474727, "grad_norm": 253.38754272460938, "learning_rate": 6.936e-06, "loss": 36.4599, "step": 34680 }, { "epoch": 0.07007599478015651, "grad_norm": 264.5137634277344, "learning_rate": 6.938e-06, "loss": 36.7512, "step": 34690 }, { "epoch": 0.07009619541284033, "grad_norm": 168.30471801757812, "learning_rate": 6.9400000000000005e-06, "loss": 34.0025, "step": 34700 }, { "epoch": 0.07011639604552415, "grad_norm": 414.605224609375, "learning_rate": 6.942000000000001e-06, "loss": 49.5055, "step": 34710 }, { "epoch": 0.07013659667820796, "grad_norm": 237.5443878173828, "learning_rate": 6.944000000000001e-06, "loss": 21.7798, "step": 34720 }, { "epoch": 0.07015679731089178, "grad_norm": 362.0570068359375, "learning_rate": 6.946000000000001e-06, "loss": 42.6096, "step": 34730 }, { "epoch": 0.0701769979435756, "grad_norm": 2925.27685546875, "learning_rate": 6.948e-06, "loss": 54.8819, "step": 34740 }, { "epoch": 0.0701971985762594, "grad_norm": 469.2974853515625, "learning_rate": 6.95e-06, "loss": 43.5039, "step": 34750 }, { "epoch": 0.07021739920894322, "grad_norm": 517.8648681640625, "learning_rate": 6.952000000000001e-06, "loss": 43.2592, "step": 34760 }, { "epoch": 0.07023759984162704, "grad_norm": 201.96371459960938, "learning_rate": 6.954000000000001e-06, "loss": 24.8031, "step": 34770 }, { "epoch": 0.07025780047431085, "grad_norm": 93.35232543945312, "learning_rate": 6.956000000000001e-06, "loss": 30.0516, "step": 34780 }, { "epoch": 0.07027800110699467, "grad_norm": 164.8859100341797, "learning_rate": 6.958e-06, "loss": 18.3808, "step": 34790 }, { "epoch": 0.07029820173967849, "grad_norm": 143.04205322265625, "learning_rate": 6.96e-06, "loss": 16.9089, "step": 34800 }, { "epoch": 0.0703184023723623, "grad_norm": 126.35950469970703, "learning_rate": 6.962000000000001e-06, "loss": 62.8902, "step": 34810 }, { "epoch": 0.07033860300504612, "grad_norm": 98.6904296875, "learning_rate": 6.964000000000001e-06, "loss": 40.0357, "step": 34820 }, { "epoch": 0.07035880363772994, "grad_norm": 319.1390380859375, "learning_rate": 6.966000000000001e-06, "loss": 38.6581, "step": 34830 }, { "epoch": 0.07037900427041376, "grad_norm": 398.7595520019531, "learning_rate": 6.968e-06, "loss": 35.9182, "step": 34840 }, { "epoch": 0.07039920490309756, "grad_norm": 449.69281005859375, "learning_rate": 6.97e-06, "loss": 36.4452, "step": 34850 }, { "epoch": 0.07041940553578138, "grad_norm": 321.99365234375, "learning_rate": 6.972000000000001e-06, "loss": 24.6509, "step": 34860 }, { "epoch": 0.0704396061684652, "grad_norm": 351.2120056152344, "learning_rate": 6.9740000000000005e-06, "loss": 23.6849, "step": 34870 }, { "epoch": 0.07045980680114901, "grad_norm": 404.4240417480469, "learning_rate": 6.976000000000001e-06, "loss": 31.0676, "step": 34880 }, { "epoch": 0.07048000743383283, "grad_norm": 217.9336395263672, "learning_rate": 6.978e-06, "loss": 19.2602, "step": 34890 }, { "epoch": 0.07050020806651665, "grad_norm": 199.38644409179688, "learning_rate": 6.98e-06, "loss": 25.631, "step": 34900 }, { "epoch": 0.07052040869920045, "grad_norm": 227.09054565429688, "learning_rate": 6.982000000000001e-06, "loss": 42.8131, "step": 34910 }, { "epoch": 0.07054060933188427, "grad_norm": 324.820556640625, "learning_rate": 6.984e-06, "loss": 21.7767, "step": 34920 }, { "epoch": 0.0705608099645681, "grad_norm": 156.3030548095703, "learning_rate": 6.9860000000000005e-06, "loss": 14.8754, "step": 34930 }, { "epoch": 0.0705810105972519, "grad_norm": 831.7560424804688, "learning_rate": 6.988000000000001e-06, "loss": 64.299, "step": 34940 }, { "epoch": 0.07060121122993572, "grad_norm": 207.90586853027344, "learning_rate": 6.99e-06, "loss": 18.5747, "step": 34950 }, { "epoch": 0.07062141186261954, "grad_norm": 232.10052490234375, "learning_rate": 6.992000000000001e-06, "loss": 43.5273, "step": 34960 }, { "epoch": 0.07064161249530335, "grad_norm": 169.20326232910156, "learning_rate": 6.994000000000001e-06, "loss": 34.1876, "step": 34970 }, { "epoch": 0.07066181312798717, "grad_norm": 89.45970916748047, "learning_rate": 6.9960000000000004e-06, "loss": 19.3803, "step": 34980 }, { "epoch": 0.07068201376067099, "grad_norm": 204.75509643554688, "learning_rate": 6.998000000000001e-06, "loss": 36.7757, "step": 34990 }, { "epoch": 0.0707022143933548, "grad_norm": 121.06097412109375, "learning_rate": 7e-06, "loss": 49.9149, "step": 35000 }, { "epoch": 0.07072241502603861, "grad_norm": 427.1971435546875, "learning_rate": 7.002000000000001e-06, "loss": 36.2474, "step": 35010 }, { "epoch": 0.07074261565872243, "grad_norm": 757.795166015625, "learning_rate": 7.004000000000001e-06, "loss": 47.5225, "step": 35020 }, { "epoch": 0.07076281629140625, "grad_norm": 506.86590576171875, "learning_rate": 7.006e-06, "loss": 27.3849, "step": 35030 }, { "epoch": 0.07078301692409006, "grad_norm": 56.06354904174805, "learning_rate": 7.0080000000000005e-06, "loss": 20.2224, "step": 35040 }, { "epoch": 0.07080321755677388, "grad_norm": 753.7413330078125, "learning_rate": 7.01e-06, "loss": 39.7816, "step": 35050 }, { "epoch": 0.0708234181894577, "grad_norm": 457.9371643066406, "learning_rate": 7.012000000000001e-06, "loss": 25.066, "step": 35060 }, { "epoch": 0.0708436188221415, "grad_norm": 421.9063415527344, "learning_rate": 7.014000000000001e-06, "loss": 29.595, "step": 35070 }, { "epoch": 0.07086381945482532, "grad_norm": 151.16021728515625, "learning_rate": 7.016e-06, "loss": 25.1056, "step": 35080 }, { "epoch": 0.07088402008750914, "grad_norm": 455.0291442871094, "learning_rate": 7.018e-06, "loss": 39.3701, "step": 35090 }, { "epoch": 0.07090422072019295, "grad_norm": 354.1529235839844, "learning_rate": 7.0200000000000006e-06, "loss": 48.788, "step": 35100 }, { "epoch": 0.07092442135287677, "grad_norm": 159.92800903320312, "learning_rate": 7.022000000000001e-06, "loss": 34.6131, "step": 35110 }, { "epoch": 0.07094462198556059, "grad_norm": 175.41734313964844, "learning_rate": 7.024000000000001e-06, "loss": 38.3396, "step": 35120 }, { "epoch": 0.0709648226182444, "grad_norm": 212.8771514892578, "learning_rate": 7.026000000000001e-06, "loss": 19.889, "step": 35130 }, { "epoch": 0.07098502325092822, "grad_norm": 209.4644317626953, "learning_rate": 7.028e-06, "loss": 26.7744, "step": 35140 }, { "epoch": 0.07100522388361204, "grad_norm": 248.4313201904297, "learning_rate": 7.0300000000000005e-06, "loss": 36.6498, "step": 35150 }, { "epoch": 0.07102542451629586, "grad_norm": 261.8291320800781, "learning_rate": 7.0320000000000015e-06, "loss": 33.2744, "step": 35160 }, { "epoch": 0.07104562514897966, "grad_norm": 364.89007568359375, "learning_rate": 7.034000000000001e-06, "loss": 19.8874, "step": 35170 }, { "epoch": 0.07106582578166348, "grad_norm": 126.75142669677734, "learning_rate": 7.036000000000001e-06, "loss": 44.0355, "step": 35180 }, { "epoch": 0.0710860264143473, "grad_norm": 345.26690673828125, "learning_rate": 7.038e-06, "loss": 42.2944, "step": 35190 }, { "epoch": 0.07110622704703111, "grad_norm": 204.852294921875, "learning_rate": 7.04e-06, "loss": 50.4622, "step": 35200 }, { "epoch": 0.07112642767971493, "grad_norm": 146.93386840820312, "learning_rate": 7.042000000000001e-06, "loss": 36.3933, "step": 35210 }, { "epoch": 0.07114662831239875, "grad_norm": 237.3188018798828, "learning_rate": 7.044000000000001e-06, "loss": 30.2315, "step": 35220 }, { "epoch": 0.07116682894508256, "grad_norm": 1404.38330078125, "learning_rate": 7.046000000000001e-06, "loss": 39.7198, "step": 35230 }, { "epoch": 0.07118702957776638, "grad_norm": 0.0, "learning_rate": 7.048e-06, "loss": 39.3675, "step": 35240 }, { "epoch": 0.0712072302104502, "grad_norm": 86.126708984375, "learning_rate": 7.05e-06, "loss": 33.8652, "step": 35250 }, { "epoch": 0.071227430843134, "grad_norm": 158.10472106933594, "learning_rate": 7.052000000000001e-06, "loss": 23.1999, "step": 35260 }, { "epoch": 0.07124763147581782, "grad_norm": 158.67898559570312, "learning_rate": 7.0540000000000006e-06, "loss": 28.4177, "step": 35270 }, { "epoch": 0.07126783210850164, "grad_norm": 145.89862060546875, "learning_rate": 7.056000000000001e-06, "loss": 20.1317, "step": 35280 }, { "epoch": 0.07128803274118545, "grad_norm": 1404.579833984375, "learning_rate": 7.058e-06, "loss": 33.2969, "step": 35290 }, { "epoch": 0.07130823337386927, "grad_norm": 179.96006774902344, "learning_rate": 7.06e-06, "loss": 41.6885, "step": 35300 }, { "epoch": 0.07132843400655309, "grad_norm": 342.3746337890625, "learning_rate": 7.062000000000001e-06, "loss": 20.0803, "step": 35310 }, { "epoch": 0.07134863463923691, "grad_norm": 169.6373748779297, "learning_rate": 7.0640000000000005e-06, "loss": 39.8404, "step": 35320 }, { "epoch": 0.07136883527192071, "grad_norm": 754.5001831054688, "learning_rate": 7.066000000000001e-06, "loss": 38.9786, "step": 35330 }, { "epoch": 0.07138903590460453, "grad_norm": 667.7453002929688, "learning_rate": 7.068000000000001e-06, "loss": 41.057, "step": 35340 }, { "epoch": 0.07140923653728835, "grad_norm": 189.60873413085938, "learning_rate": 7.07e-06, "loss": 66.6823, "step": 35350 }, { "epoch": 0.07142943716997216, "grad_norm": 619.8598022460938, "learning_rate": 7.072000000000001e-06, "loss": 26.6669, "step": 35360 }, { "epoch": 0.07144963780265598, "grad_norm": 444.90740966796875, "learning_rate": 7.074000000000001e-06, "loss": 34.8379, "step": 35370 }, { "epoch": 0.0714698384353398, "grad_norm": 129.306640625, "learning_rate": 7.0760000000000005e-06, "loss": 38.6515, "step": 35380 }, { "epoch": 0.0714900390680236, "grad_norm": 203.2989959716797, "learning_rate": 7.078000000000001e-06, "loss": 32.5827, "step": 35390 }, { "epoch": 0.07151023970070743, "grad_norm": 389.9929504394531, "learning_rate": 7.08e-06, "loss": 29.3143, "step": 35400 }, { "epoch": 0.07153044033339125, "grad_norm": 503.62615966796875, "learning_rate": 7.082000000000001e-06, "loss": 35.6175, "step": 35410 }, { "epoch": 0.07155064096607505, "grad_norm": 166.6268768310547, "learning_rate": 7.084000000000001e-06, "loss": 23.7046, "step": 35420 }, { "epoch": 0.07157084159875887, "grad_norm": 574.958251953125, "learning_rate": 7.0860000000000004e-06, "loss": 30.0085, "step": 35430 }, { "epoch": 0.07159104223144269, "grad_norm": 389.37750244140625, "learning_rate": 7.088000000000001e-06, "loss": 21.9367, "step": 35440 }, { "epoch": 0.0716112428641265, "grad_norm": 133.2786407470703, "learning_rate": 7.09e-06, "loss": 29.8151, "step": 35450 }, { "epoch": 0.07163144349681032, "grad_norm": 317.72625732421875, "learning_rate": 7.092000000000001e-06, "loss": 43.1836, "step": 35460 }, { "epoch": 0.07165164412949414, "grad_norm": 205.08111572265625, "learning_rate": 7.094000000000001e-06, "loss": 32.3441, "step": 35470 }, { "epoch": 0.07167184476217796, "grad_norm": 272.10723876953125, "learning_rate": 7.096e-06, "loss": 20.6606, "step": 35480 }, { "epoch": 0.07169204539486176, "grad_norm": 551.941162109375, "learning_rate": 7.0980000000000005e-06, "loss": 19.7589, "step": 35490 }, { "epoch": 0.07171224602754558, "grad_norm": 349.5267028808594, "learning_rate": 7.100000000000001e-06, "loss": 40.1324, "step": 35500 }, { "epoch": 0.0717324466602294, "grad_norm": 132.4320526123047, "learning_rate": 7.102000000000001e-06, "loss": 16.9375, "step": 35510 }, { "epoch": 0.07175264729291321, "grad_norm": 255.74122619628906, "learning_rate": 7.104000000000001e-06, "loss": 28.4028, "step": 35520 }, { "epoch": 0.07177284792559703, "grad_norm": 424.9040222167969, "learning_rate": 7.106000000000001e-06, "loss": 28.7946, "step": 35530 }, { "epoch": 0.07179304855828085, "grad_norm": 316.9095458984375, "learning_rate": 7.108e-06, "loss": 33.1519, "step": 35540 }, { "epoch": 0.07181324919096466, "grad_norm": 110.77220916748047, "learning_rate": 7.1100000000000005e-06, "loss": 46.4442, "step": 35550 }, { "epoch": 0.07183344982364848, "grad_norm": 205.39889526367188, "learning_rate": 7.1120000000000015e-06, "loss": 32.5679, "step": 35560 }, { "epoch": 0.0718536504563323, "grad_norm": 244.43821716308594, "learning_rate": 7.114000000000001e-06, "loss": 30.8648, "step": 35570 }, { "epoch": 0.0718738510890161, "grad_norm": 160.8514404296875, "learning_rate": 7.116000000000001e-06, "loss": 29.2878, "step": 35580 }, { "epoch": 0.07189405172169992, "grad_norm": 469.5392761230469, "learning_rate": 7.118e-06, "loss": 40.2692, "step": 35590 }, { "epoch": 0.07191425235438374, "grad_norm": 185.44644165039062, "learning_rate": 7.1200000000000004e-06, "loss": 13.3216, "step": 35600 }, { "epoch": 0.07193445298706755, "grad_norm": 156.66006469726562, "learning_rate": 7.1220000000000014e-06, "loss": 16.5548, "step": 35610 }, { "epoch": 0.07195465361975137, "grad_norm": 358.92999267578125, "learning_rate": 7.124000000000001e-06, "loss": 22.2153, "step": 35620 }, { "epoch": 0.07197485425243519, "grad_norm": 348.792236328125, "learning_rate": 7.126000000000001e-06, "loss": 28.6465, "step": 35630 }, { "epoch": 0.07199505488511901, "grad_norm": 39.33981704711914, "learning_rate": 7.128e-06, "loss": 39.4559, "step": 35640 }, { "epoch": 0.07201525551780281, "grad_norm": 273.9315185546875, "learning_rate": 7.13e-06, "loss": 25.6712, "step": 35650 }, { "epoch": 0.07203545615048663, "grad_norm": 204.81541442871094, "learning_rate": 7.132e-06, "loss": 15.7049, "step": 35660 }, { "epoch": 0.07205565678317045, "grad_norm": 286.1951904296875, "learning_rate": 7.134000000000001e-06, "loss": 24.5248, "step": 35670 }, { "epoch": 0.07207585741585426, "grad_norm": 366.5784912109375, "learning_rate": 7.136000000000001e-06, "loss": 37.0885, "step": 35680 }, { "epoch": 0.07209605804853808, "grad_norm": 176.7564239501953, "learning_rate": 7.138e-06, "loss": 35.4139, "step": 35690 }, { "epoch": 0.0721162586812219, "grad_norm": 454.78857421875, "learning_rate": 7.14e-06, "loss": 43.87, "step": 35700 }, { "epoch": 0.07213645931390571, "grad_norm": 145.7947998046875, "learning_rate": 7.142e-06, "loss": 30.3516, "step": 35710 }, { "epoch": 0.07215665994658953, "grad_norm": 270.06512451171875, "learning_rate": 7.1440000000000005e-06, "loss": 15.9939, "step": 35720 }, { "epoch": 0.07217686057927335, "grad_norm": 176.67660522460938, "learning_rate": 7.146000000000001e-06, "loss": 25.8761, "step": 35730 }, { "epoch": 0.07219706121195715, "grad_norm": 178.91583251953125, "learning_rate": 7.148000000000001e-06, "loss": 27.143, "step": 35740 }, { "epoch": 0.07221726184464097, "grad_norm": 35.9091796875, "learning_rate": 7.15e-06, "loss": 45.5767, "step": 35750 }, { "epoch": 0.0722374624773248, "grad_norm": 321.44866943359375, "learning_rate": 7.152e-06, "loss": 30.9835, "step": 35760 }, { "epoch": 0.0722576631100086, "grad_norm": 133.48434448242188, "learning_rate": 7.154000000000001e-06, "loss": 37.3391, "step": 35770 }, { "epoch": 0.07227786374269242, "grad_norm": 526.0672607421875, "learning_rate": 7.156000000000001e-06, "loss": 42.9168, "step": 35780 }, { "epoch": 0.07229806437537624, "grad_norm": 288.9527587890625, "learning_rate": 7.158000000000001e-06, "loss": 25.5186, "step": 35790 }, { "epoch": 0.07231826500806006, "grad_norm": 316.61944580078125, "learning_rate": 7.16e-06, "loss": 41.4972, "step": 35800 }, { "epoch": 0.07233846564074387, "grad_norm": 258.3380432128906, "learning_rate": 7.162e-06, "loss": 18.056, "step": 35810 }, { "epoch": 0.07235866627342769, "grad_norm": 395.2965087890625, "learning_rate": 7.164000000000001e-06, "loss": 39.9333, "step": 35820 }, { "epoch": 0.0723788669061115, "grad_norm": 326.03717041015625, "learning_rate": 7.1660000000000005e-06, "loss": 18.934, "step": 35830 }, { "epoch": 0.07239906753879531, "grad_norm": 456.2828674316406, "learning_rate": 7.168000000000001e-06, "loss": 22.9084, "step": 35840 }, { "epoch": 0.07241926817147913, "grad_norm": 268.70550537109375, "learning_rate": 7.17e-06, "loss": 29.9401, "step": 35850 }, { "epoch": 0.07243946880416295, "grad_norm": 181.02793884277344, "learning_rate": 7.172e-06, "loss": 25.7866, "step": 35860 }, { "epoch": 0.07245966943684676, "grad_norm": 313.85791015625, "learning_rate": 7.174000000000001e-06, "loss": 40.4792, "step": 35870 }, { "epoch": 0.07247987006953058, "grad_norm": 0.0, "learning_rate": 7.176e-06, "loss": 28.5235, "step": 35880 }, { "epoch": 0.0725000707022144, "grad_norm": 173.0997772216797, "learning_rate": 7.1780000000000006e-06, "loss": 17.9554, "step": 35890 }, { "epoch": 0.0725202713348982, "grad_norm": 494.1507568359375, "learning_rate": 7.180000000000001e-06, "loss": 43.1713, "step": 35900 }, { "epoch": 0.07254047196758202, "grad_norm": 587.50146484375, "learning_rate": 7.182e-06, "loss": 30.6655, "step": 35910 }, { "epoch": 0.07256067260026584, "grad_norm": 556.1171264648438, "learning_rate": 7.184000000000001e-06, "loss": 49.4425, "step": 35920 }, { "epoch": 0.07258087323294965, "grad_norm": 141.56065368652344, "learning_rate": 7.186000000000001e-06, "loss": 39.0699, "step": 35930 }, { "epoch": 0.07260107386563347, "grad_norm": 211.27560424804688, "learning_rate": 7.1880000000000005e-06, "loss": 27.5508, "step": 35940 }, { "epoch": 0.07262127449831729, "grad_norm": 337.15216064453125, "learning_rate": 7.190000000000001e-06, "loss": 19.8618, "step": 35950 }, { "epoch": 0.07264147513100111, "grad_norm": 410.3082580566406, "learning_rate": 7.192e-06, "loss": 28.8214, "step": 35960 }, { "epoch": 0.07266167576368492, "grad_norm": 141.10684204101562, "learning_rate": 7.194000000000001e-06, "loss": 21.9792, "step": 35970 }, { "epoch": 0.07268187639636874, "grad_norm": 233.47531127929688, "learning_rate": 7.196000000000001e-06, "loss": 35.2204, "step": 35980 }, { "epoch": 0.07270207702905256, "grad_norm": 203.5105438232422, "learning_rate": 7.198e-06, "loss": 18.1653, "step": 35990 }, { "epoch": 0.07272227766173636, "grad_norm": 735.6315307617188, "learning_rate": 7.2000000000000005e-06, "loss": 36.5236, "step": 36000 }, { "epoch": 0.07274247829442018, "grad_norm": 452.4701232910156, "learning_rate": 7.202e-06, "loss": 43.1764, "step": 36010 }, { "epoch": 0.072762678927104, "grad_norm": 547.6006469726562, "learning_rate": 7.204000000000001e-06, "loss": 29.6055, "step": 36020 }, { "epoch": 0.07278287955978781, "grad_norm": 261.2068786621094, "learning_rate": 7.206000000000001e-06, "loss": 30.5008, "step": 36030 }, { "epoch": 0.07280308019247163, "grad_norm": 254.03211975097656, "learning_rate": 7.208e-06, "loss": 23.1348, "step": 36040 }, { "epoch": 0.07282328082515545, "grad_norm": 198.9160919189453, "learning_rate": 7.2100000000000004e-06, "loss": 19.0444, "step": 36050 }, { "epoch": 0.07284348145783925, "grad_norm": 238.08937072753906, "learning_rate": 7.212e-06, "loss": 25.6352, "step": 36060 }, { "epoch": 0.07286368209052307, "grad_norm": 330.58935546875, "learning_rate": 7.214000000000001e-06, "loss": 24.7367, "step": 36070 }, { "epoch": 0.0728838827232069, "grad_norm": 230.72210693359375, "learning_rate": 7.216000000000001e-06, "loss": 17.4687, "step": 36080 }, { "epoch": 0.0729040833558907, "grad_norm": 313.8332214355469, "learning_rate": 7.218e-06, "loss": 29.0529, "step": 36090 }, { "epoch": 0.07292428398857452, "grad_norm": 130.81948852539062, "learning_rate": 7.22e-06, "loss": 40.0051, "step": 36100 }, { "epoch": 0.07294448462125834, "grad_norm": 175.20887756347656, "learning_rate": 7.2220000000000005e-06, "loss": 20.7462, "step": 36110 }, { "epoch": 0.07296468525394216, "grad_norm": 602.9423217773438, "learning_rate": 7.224000000000001e-06, "loss": 42.191, "step": 36120 }, { "epoch": 0.07298488588662597, "grad_norm": 258.12628173828125, "learning_rate": 7.226000000000001e-06, "loss": 17.287, "step": 36130 }, { "epoch": 0.07300508651930979, "grad_norm": 233.0981903076172, "learning_rate": 7.228000000000001e-06, "loss": 26.7039, "step": 36140 }, { "epoch": 0.0730252871519936, "grad_norm": 203.48635864257812, "learning_rate": 7.23e-06, "loss": 25.4507, "step": 36150 }, { "epoch": 0.07304548778467741, "grad_norm": 491.010986328125, "learning_rate": 7.232e-06, "loss": 22.1525, "step": 36160 }, { "epoch": 0.07306568841736123, "grad_norm": 358.2745056152344, "learning_rate": 7.234000000000001e-06, "loss": 27.5791, "step": 36170 }, { "epoch": 0.07308588905004505, "grad_norm": 249.3122100830078, "learning_rate": 7.236000000000001e-06, "loss": 36.7611, "step": 36180 }, { "epoch": 0.07310608968272886, "grad_norm": 120.18719482421875, "learning_rate": 7.238000000000001e-06, "loss": 23.0408, "step": 36190 }, { "epoch": 0.07312629031541268, "grad_norm": 193.0401153564453, "learning_rate": 7.24e-06, "loss": 13.1624, "step": 36200 }, { "epoch": 0.0731464909480965, "grad_norm": 191.195556640625, "learning_rate": 7.242e-06, "loss": 31.5979, "step": 36210 }, { "epoch": 0.0731666915807803, "grad_norm": 195.62342834472656, "learning_rate": 7.244000000000001e-06, "loss": 27.4461, "step": 36220 }, { "epoch": 0.07318689221346412, "grad_norm": 73.44220733642578, "learning_rate": 7.246000000000001e-06, "loss": 33.2908, "step": 36230 }, { "epoch": 0.07320709284614794, "grad_norm": 261.4033508300781, "learning_rate": 7.248000000000001e-06, "loss": 20.6873, "step": 36240 }, { "epoch": 0.07322729347883175, "grad_norm": 850.6642456054688, "learning_rate": 7.25e-06, "loss": 28.8736, "step": 36250 }, { "epoch": 0.07324749411151557, "grad_norm": 663.4951171875, "learning_rate": 7.252e-06, "loss": 42.89, "step": 36260 }, { "epoch": 0.07326769474419939, "grad_norm": 589.6317749023438, "learning_rate": 7.254000000000001e-06, "loss": 29.1498, "step": 36270 }, { "epoch": 0.07328789537688321, "grad_norm": 258.9990234375, "learning_rate": 7.2560000000000005e-06, "loss": 29.9007, "step": 36280 }, { "epoch": 0.07330809600956702, "grad_norm": 543.8953247070312, "learning_rate": 7.258000000000001e-06, "loss": 28.8293, "step": 36290 }, { "epoch": 0.07332829664225084, "grad_norm": 395.2126159667969, "learning_rate": 7.260000000000001e-06, "loss": 39.2529, "step": 36300 }, { "epoch": 0.07334849727493466, "grad_norm": 191.6095733642578, "learning_rate": 7.262e-06, "loss": 38.7575, "step": 36310 }, { "epoch": 0.07336869790761846, "grad_norm": 263.6365661621094, "learning_rate": 7.264000000000001e-06, "loss": 22.5126, "step": 36320 }, { "epoch": 0.07338889854030228, "grad_norm": 135.82113647460938, "learning_rate": 7.266000000000001e-06, "loss": 29.7597, "step": 36330 }, { "epoch": 0.0734090991729861, "grad_norm": 433.3985290527344, "learning_rate": 7.2680000000000005e-06, "loss": 38.3407, "step": 36340 }, { "epoch": 0.07342929980566991, "grad_norm": 428.9453125, "learning_rate": 7.270000000000001e-06, "loss": 26.7119, "step": 36350 }, { "epoch": 0.07344950043835373, "grad_norm": 229.5213623046875, "learning_rate": 7.272e-06, "loss": 29.5679, "step": 36360 }, { "epoch": 0.07346970107103755, "grad_norm": 335.07843017578125, "learning_rate": 7.274000000000001e-06, "loss": 33.7141, "step": 36370 }, { "epoch": 0.07348990170372136, "grad_norm": 182.32470703125, "learning_rate": 7.276000000000001e-06, "loss": 29.1863, "step": 36380 }, { "epoch": 0.07351010233640518, "grad_norm": 382.3972473144531, "learning_rate": 7.2780000000000005e-06, "loss": 38.5361, "step": 36390 }, { "epoch": 0.073530302969089, "grad_norm": 217.6338348388672, "learning_rate": 7.280000000000001e-06, "loss": 19.6521, "step": 36400 }, { "epoch": 0.0735505036017728, "grad_norm": 548.1901245117188, "learning_rate": 7.282e-06, "loss": 41.9048, "step": 36410 }, { "epoch": 0.07357070423445662, "grad_norm": 223.6837921142578, "learning_rate": 7.284000000000001e-06, "loss": 31.4085, "step": 36420 }, { "epoch": 0.07359090486714044, "grad_norm": 524.5628051757812, "learning_rate": 7.286000000000001e-06, "loss": 37.9711, "step": 36430 }, { "epoch": 0.07361110549982426, "grad_norm": 280.7080383300781, "learning_rate": 7.288e-06, "loss": 25.1712, "step": 36440 }, { "epoch": 0.07363130613250807, "grad_norm": 180.32369995117188, "learning_rate": 7.2900000000000005e-06, "loss": 17.5053, "step": 36450 }, { "epoch": 0.07365150676519189, "grad_norm": 558.983154296875, "learning_rate": 7.292e-06, "loss": 50.4983, "step": 36460 }, { "epoch": 0.07367170739787571, "grad_norm": 230.17555236816406, "learning_rate": 7.294000000000001e-06, "loss": 35.6801, "step": 36470 }, { "epoch": 0.07369190803055951, "grad_norm": 435.7958068847656, "learning_rate": 7.296000000000001e-06, "loss": 24.1494, "step": 36480 }, { "epoch": 0.07371210866324333, "grad_norm": 189.82095336914062, "learning_rate": 7.298e-06, "loss": 35.3752, "step": 36490 }, { "epoch": 0.07373230929592715, "grad_norm": 323.5209655761719, "learning_rate": 7.3e-06, "loss": 27.6517, "step": 36500 }, { "epoch": 0.07375250992861096, "grad_norm": 191.7749481201172, "learning_rate": 7.3020000000000006e-06, "loss": 23.3653, "step": 36510 }, { "epoch": 0.07377271056129478, "grad_norm": 238.01119995117188, "learning_rate": 7.304000000000001e-06, "loss": 27.0703, "step": 36520 }, { "epoch": 0.0737929111939786, "grad_norm": 351.7221374511719, "learning_rate": 7.306000000000001e-06, "loss": 29.5084, "step": 36530 }, { "epoch": 0.0738131118266624, "grad_norm": 332.8731689453125, "learning_rate": 7.308000000000001e-06, "loss": 40.6823, "step": 36540 }, { "epoch": 0.07383331245934623, "grad_norm": 81.83951568603516, "learning_rate": 7.31e-06, "loss": 34.5878, "step": 36550 }, { "epoch": 0.07385351309203005, "grad_norm": 484.3802185058594, "learning_rate": 7.3120000000000005e-06, "loss": 29.3434, "step": 36560 }, { "epoch": 0.07387371372471385, "grad_norm": 191.95716857910156, "learning_rate": 7.3140000000000015e-06, "loss": 56.769, "step": 36570 }, { "epoch": 0.07389391435739767, "grad_norm": 264.5630798339844, "learning_rate": 7.316000000000001e-06, "loss": 27.6115, "step": 36580 }, { "epoch": 0.07391411499008149, "grad_norm": 270.2891540527344, "learning_rate": 7.318000000000001e-06, "loss": 33.833, "step": 36590 }, { "epoch": 0.0739343156227653, "grad_norm": 869.6041259765625, "learning_rate": 7.32e-06, "loss": 64.153, "step": 36600 }, { "epoch": 0.07395451625544912, "grad_norm": 268.8966979980469, "learning_rate": 7.322e-06, "loss": 22.4746, "step": 36610 }, { "epoch": 0.07397471688813294, "grad_norm": 341.6988220214844, "learning_rate": 7.324000000000001e-06, "loss": 43.2121, "step": 36620 }, { "epoch": 0.07399491752081676, "grad_norm": 80.72615051269531, "learning_rate": 7.326000000000001e-06, "loss": 45.8597, "step": 36630 }, { "epoch": 0.07401511815350056, "grad_norm": 89.74231719970703, "learning_rate": 7.328000000000001e-06, "loss": 24.935, "step": 36640 }, { "epoch": 0.07403531878618438, "grad_norm": 529.3466796875, "learning_rate": 7.33e-06, "loss": 38.5412, "step": 36650 }, { "epoch": 0.0740555194188682, "grad_norm": 614.40869140625, "learning_rate": 7.332e-06, "loss": 49.5567, "step": 36660 }, { "epoch": 0.07407572005155201, "grad_norm": 405.7143859863281, "learning_rate": 7.334000000000001e-06, "loss": 23.8698, "step": 36670 }, { "epoch": 0.07409592068423583, "grad_norm": 385.6219787597656, "learning_rate": 7.3360000000000006e-06, "loss": 41.4413, "step": 36680 }, { "epoch": 0.07411612131691965, "grad_norm": 132.39012145996094, "learning_rate": 7.338000000000001e-06, "loss": 31.1189, "step": 36690 }, { "epoch": 0.07413632194960346, "grad_norm": 286.5935363769531, "learning_rate": 7.340000000000001e-06, "loss": 19.5979, "step": 36700 }, { "epoch": 0.07415652258228728, "grad_norm": 176.44528198242188, "learning_rate": 7.342e-06, "loss": 19.5919, "step": 36710 }, { "epoch": 0.0741767232149711, "grad_norm": 59.7669563293457, "learning_rate": 7.344000000000001e-06, "loss": 31.4608, "step": 36720 }, { "epoch": 0.0741969238476549, "grad_norm": 143.26528930664062, "learning_rate": 7.346000000000001e-06, "loss": 47.1855, "step": 36730 }, { "epoch": 0.07421712448033872, "grad_norm": 183.0047149658203, "learning_rate": 7.348000000000001e-06, "loss": 53.4816, "step": 36740 }, { "epoch": 0.07423732511302254, "grad_norm": 155.7193603515625, "learning_rate": 7.350000000000001e-06, "loss": 29.6226, "step": 36750 }, { "epoch": 0.07425752574570635, "grad_norm": 216.11434936523438, "learning_rate": 7.352e-06, "loss": 31.5426, "step": 36760 }, { "epoch": 0.07427772637839017, "grad_norm": 473.5368347167969, "learning_rate": 7.354000000000001e-06, "loss": 21.6675, "step": 36770 }, { "epoch": 0.07429792701107399, "grad_norm": 320.8930969238281, "learning_rate": 7.356000000000001e-06, "loss": 32.4083, "step": 36780 }, { "epoch": 0.07431812764375781, "grad_norm": 245.28684997558594, "learning_rate": 7.3580000000000005e-06, "loss": 42.2323, "step": 36790 }, { "epoch": 0.07433832827644161, "grad_norm": 89.3472671508789, "learning_rate": 7.360000000000001e-06, "loss": 19.9996, "step": 36800 }, { "epoch": 0.07435852890912543, "grad_norm": 114.49677276611328, "learning_rate": 7.362e-06, "loss": 48.1019, "step": 36810 }, { "epoch": 0.07437872954180925, "grad_norm": 206.83221435546875, "learning_rate": 7.364000000000001e-06, "loss": 53.2229, "step": 36820 }, { "epoch": 0.07439893017449306, "grad_norm": 108.27088928222656, "learning_rate": 7.366000000000001e-06, "loss": 23.8194, "step": 36830 }, { "epoch": 0.07441913080717688, "grad_norm": 153.98977661132812, "learning_rate": 7.3680000000000004e-06, "loss": 20.7451, "step": 36840 }, { "epoch": 0.0744393314398607, "grad_norm": 354.7840576171875, "learning_rate": 7.370000000000001e-06, "loss": 31.3028, "step": 36850 }, { "epoch": 0.07445953207254451, "grad_norm": 255.68814086914062, "learning_rate": 7.372e-06, "loss": 31.3395, "step": 36860 }, { "epoch": 0.07447973270522833, "grad_norm": 152.50454711914062, "learning_rate": 7.374000000000001e-06, "loss": 9.5246, "step": 36870 }, { "epoch": 0.07449993333791215, "grad_norm": 436.1549987792969, "learning_rate": 7.376000000000001e-06, "loss": 23.5639, "step": 36880 }, { "epoch": 0.07452013397059595, "grad_norm": 249.85977172851562, "learning_rate": 7.378e-06, "loss": 22.5474, "step": 36890 }, { "epoch": 0.07454033460327977, "grad_norm": 201.94671630859375, "learning_rate": 7.3800000000000005e-06, "loss": 28.7942, "step": 36900 }, { "epoch": 0.0745605352359636, "grad_norm": 189.82913208007812, "learning_rate": 7.382000000000001e-06, "loss": 30.4418, "step": 36910 }, { "epoch": 0.0745807358686474, "grad_norm": 288.5106201171875, "learning_rate": 7.384e-06, "loss": 36.8619, "step": 36920 }, { "epoch": 0.07460093650133122, "grad_norm": 255.6656036376953, "learning_rate": 7.386000000000001e-06, "loss": 28.3292, "step": 36930 }, { "epoch": 0.07462113713401504, "grad_norm": 375.67779541015625, "learning_rate": 7.388000000000001e-06, "loss": 33.4336, "step": 36940 }, { "epoch": 0.07464133776669886, "grad_norm": 166.8861846923828, "learning_rate": 7.39e-06, "loss": 34.9891, "step": 36950 }, { "epoch": 0.07466153839938267, "grad_norm": 191.3308868408203, "learning_rate": 7.3920000000000005e-06, "loss": 19.7515, "step": 36960 }, { "epoch": 0.07468173903206649, "grad_norm": 167.85284423828125, "learning_rate": 7.394e-06, "loss": 23.0447, "step": 36970 }, { "epoch": 0.0747019396647503, "grad_norm": 336.843994140625, "learning_rate": 7.396000000000001e-06, "loss": 32.6012, "step": 36980 }, { "epoch": 0.07472214029743411, "grad_norm": 90.44078826904297, "learning_rate": 7.398000000000001e-06, "loss": 22.6133, "step": 36990 }, { "epoch": 0.07474234093011793, "grad_norm": 75.26500701904297, "learning_rate": 7.4e-06, "loss": 30.9286, "step": 37000 }, { "epoch": 0.07476254156280175, "grad_norm": 261.5397033691406, "learning_rate": 7.4020000000000005e-06, "loss": 30.2495, "step": 37010 }, { "epoch": 0.07478274219548556, "grad_norm": 306.8426513671875, "learning_rate": 7.404e-06, "loss": 24.6559, "step": 37020 }, { "epoch": 0.07480294282816938, "grad_norm": 482.0, "learning_rate": 7.406000000000001e-06, "loss": 25.0202, "step": 37030 }, { "epoch": 0.0748231434608532, "grad_norm": 294.6149597167969, "learning_rate": 7.408000000000001e-06, "loss": 25.1587, "step": 37040 }, { "epoch": 0.074843344093537, "grad_norm": 87.03323364257812, "learning_rate": 7.41e-06, "loss": 35.6619, "step": 37050 }, { "epoch": 0.07486354472622082, "grad_norm": 175.4351043701172, "learning_rate": 7.412e-06, "loss": 18.4965, "step": 37060 }, { "epoch": 0.07488374535890464, "grad_norm": 302.5224304199219, "learning_rate": 7.4140000000000005e-06, "loss": 34.8364, "step": 37070 }, { "epoch": 0.07490394599158845, "grad_norm": 178.20249938964844, "learning_rate": 7.416000000000001e-06, "loss": 39.6743, "step": 37080 }, { "epoch": 0.07492414662427227, "grad_norm": 250.53619384765625, "learning_rate": 7.418000000000001e-06, "loss": 34.1762, "step": 37090 }, { "epoch": 0.07494434725695609, "grad_norm": 179.6147003173828, "learning_rate": 7.420000000000001e-06, "loss": 42.44, "step": 37100 }, { "epoch": 0.07496454788963991, "grad_norm": 83.07459259033203, "learning_rate": 7.422e-06, "loss": 28.5571, "step": 37110 }, { "epoch": 0.07498474852232372, "grad_norm": 395.6041564941406, "learning_rate": 7.424e-06, "loss": 16.7306, "step": 37120 }, { "epoch": 0.07500494915500754, "grad_norm": 347.8148498535156, "learning_rate": 7.426000000000001e-06, "loss": 29.2702, "step": 37130 }, { "epoch": 0.07502514978769136, "grad_norm": 223.30548095703125, "learning_rate": 7.428000000000001e-06, "loss": 26.2259, "step": 37140 }, { "epoch": 0.07504535042037516, "grad_norm": 216.8474578857422, "learning_rate": 7.430000000000001e-06, "loss": 45.1262, "step": 37150 }, { "epoch": 0.07506555105305898, "grad_norm": 258.45086669921875, "learning_rate": 7.432e-06, "loss": 23.6454, "step": 37160 }, { "epoch": 0.0750857516857428, "grad_norm": 217.2241973876953, "learning_rate": 7.434e-06, "loss": 24.0112, "step": 37170 }, { "epoch": 0.07510595231842661, "grad_norm": 192.0157928466797, "learning_rate": 7.436000000000001e-06, "loss": 13.5141, "step": 37180 }, { "epoch": 0.07512615295111043, "grad_norm": 722.029052734375, "learning_rate": 7.438000000000001e-06, "loss": 59.7819, "step": 37190 }, { "epoch": 0.07514635358379425, "grad_norm": 482.7039489746094, "learning_rate": 7.440000000000001e-06, "loss": 42.0754, "step": 37200 }, { "epoch": 0.07516655421647805, "grad_norm": 571.5675659179688, "learning_rate": 7.442e-06, "loss": 49.6428, "step": 37210 }, { "epoch": 0.07518675484916187, "grad_norm": 207.1539764404297, "learning_rate": 7.444e-06, "loss": 21.3692, "step": 37220 }, { "epoch": 0.0752069554818457, "grad_norm": 60.72225570678711, "learning_rate": 7.446000000000001e-06, "loss": 25.9848, "step": 37230 }, { "epoch": 0.0752271561145295, "grad_norm": 249.90875244140625, "learning_rate": 7.4480000000000005e-06, "loss": 41.3829, "step": 37240 }, { "epoch": 0.07524735674721332, "grad_norm": 173.2308807373047, "learning_rate": 7.450000000000001e-06, "loss": 28.5587, "step": 37250 }, { "epoch": 0.07526755737989714, "grad_norm": 213.67919921875, "learning_rate": 7.452e-06, "loss": 30.5814, "step": 37260 }, { "epoch": 0.07528775801258096, "grad_norm": 273.2736511230469, "learning_rate": 7.454e-06, "loss": 32.5555, "step": 37270 }, { "epoch": 0.07530795864526477, "grad_norm": 182.23330688476562, "learning_rate": 7.456000000000001e-06, "loss": 43.5554, "step": 37280 }, { "epoch": 0.07532815927794859, "grad_norm": 147.9858856201172, "learning_rate": 7.458e-06, "loss": 18.7032, "step": 37290 }, { "epoch": 0.0753483599106324, "grad_norm": 272.28851318359375, "learning_rate": 7.4600000000000006e-06, "loss": 19.1545, "step": 37300 }, { "epoch": 0.07536856054331621, "grad_norm": 363.4236755371094, "learning_rate": 7.462000000000001e-06, "loss": 27.2805, "step": 37310 }, { "epoch": 0.07538876117600003, "grad_norm": 289.0731201171875, "learning_rate": 7.464e-06, "loss": 38.4966, "step": 37320 }, { "epoch": 0.07540896180868385, "grad_norm": 97.92449951171875, "learning_rate": 7.466000000000001e-06, "loss": 32.4639, "step": 37330 }, { "epoch": 0.07542916244136766, "grad_norm": 375.31787109375, "learning_rate": 7.468000000000001e-06, "loss": 35.2377, "step": 37340 }, { "epoch": 0.07544936307405148, "grad_norm": 188.35894775390625, "learning_rate": 7.4700000000000005e-06, "loss": 31.1343, "step": 37350 }, { "epoch": 0.0754695637067353, "grad_norm": 208.91656494140625, "learning_rate": 7.472000000000001e-06, "loss": 22.7961, "step": 37360 }, { "epoch": 0.0754897643394191, "grad_norm": 167.84564208984375, "learning_rate": 7.474e-06, "loss": 20.9727, "step": 37370 }, { "epoch": 0.07550996497210292, "grad_norm": 362.0840759277344, "learning_rate": 7.476000000000001e-06, "loss": 21.6613, "step": 37380 }, { "epoch": 0.07553016560478674, "grad_norm": 477.2515563964844, "learning_rate": 7.478000000000001e-06, "loss": 34.0898, "step": 37390 }, { "epoch": 0.07555036623747055, "grad_norm": 318.6207580566406, "learning_rate": 7.48e-06, "loss": 28.9804, "step": 37400 }, { "epoch": 0.07557056687015437, "grad_norm": 667.9202270507812, "learning_rate": 7.4820000000000005e-06, "loss": 38.8118, "step": 37410 }, { "epoch": 0.07559076750283819, "grad_norm": 352.80755615234375, "learning_rate": 7.484e-06, "loss": 31.6918, "step": 37420 }, { "epoch": 0.07561096813552201, "grad_norm": 356.363525390625, "learning_rate": 7.486000000000001e-06, "loss": 30.3008, "step": 37430 }, { "epoch": 0.07563116876820582, "grad_norm": 191.04046630859375, "learning_rate": 7.488000000000001e-06, "loss": 39.7366, "step": 37440 }, { "epoch": 0.07565136940088964, "grad_norm": 243.32534790039062, "learning_rate": 7.49e-06, "loss": 37.3634, "step": 37450 }, { "epoch": 0.07567157003357346, "grad_norm": 116.10124206542969, "learning_rate": 7.4920000000000004e-06, "loss": 28.5891, "step": 37460 }, { "epoch": 0.07569177066625726, "grad_norm": 164.012451171875, "learning_rate": 7.494000000000001e-06, "loss": 30.3125, "step": 37470 }, { "epoch": 0.07571197129894108, "grad_norm": 231.16278076171875, "learning_rate": 7.496000000000001e-06, "loss": 27.0886, "step": 37480 }, { "epoch": 0.0757321719316249, "grad_norm": 90.63762664794922, "learning_rate": 7.498000000000001e-06, "loss": 17.6784, "step": 37490 }, { "epoch": 0.07575237256430871, "grad_norm": 274.13909912109375, "learning_rate": 7.500000000000001e-06, "loss": 36.5043, "step": 37500 }, { "epoch": 0.07577257319699253, "grad_norm": 177.11863708496094, "learning_rate": 7.502e-06, "loss": 22.2184, "step": 37510 }, { "epoch": 0.07579277382967635, "grad_norm": 319.9680480957031, "learning_rate": 7.5040000000000005e-06, "loss": 25.2852, "step": 37520 }, { "epoch": 0.07581297446236016, "grad_norm": 231.00592041015625, "learning_rate": 7.506000000000001e-06, "loss": 33.0575, "step": 37530 }, { "epoch": 0.07583317509504398, "grad_norm": 165.6636199951172, "learning_rate": 7.508000000000001e-06, "loss": 24.9959, "step": 37540 }, { "epoch": 0.0758533757277278, "grad_norm": 421.14990234375, "learning_rate": 7.510000000000001e-06, "loss": 17.8614, "step": 37550 }, { "epoch": 0.0758735763604116, "grad_norm": 580.3252563476562, "learning_rate": 7.512e-06, "loss": 32.6734, "step": 37560 }, { "epoch": 0.07589377699309542, "grad_norm": 155.6432647705078, "learning_rate": 7.514e-06, "loss": 21.4996, "step": 37570 }, { "epoch": 0.07591397762577924, "grad_norm": 0.0, "learning_rate": 7.516000000000001e-06, "loss": 20.1631, "step": 37580 }, { "epoch": 0.07593417825846306, "grad_norm": 655.6403198242188, "learning_rate": 7.518000000000001e-06, "loss": 31.996, "step": 37590 }, { "epoch": 0.07595437889114687, "grad_norm": 237.73785400390625, "learning_rate": 7.520000000000001e-06, "loss": 34.8398, "step": 37600 }, { "epoch": 0.07597457952383069, "grad_norm": 192.99000549316406, "learning_rate": 7.522e-06, "loss": 59.0217, "step": 37610 }, { "epoch": 0.07599478015651451, "grad_norm": 361.86322021484375, "learning_rate": 7.524e-06, "loss": 37.0161, "step": 37620 }, { "epoch": 0.07601498078919831, "grad_norm": 308.7842102050781, "learning_rate": 7.526000000000001e-06, "loss": 14.4074, "step": 37630 }, { "epoch": 0.07603518142188213, "grad_norm": 320.6971130371094, "learning_rate": 7.528000000000001e-06, "loss": 41.1966, "step": 37640 }, { "epoch": 0.07605538205456595, "grad_norm": 255.6307830810547, "learning_rate": 7.530000000000001e-06, "loss": 32.2554, "step": 37650 }, { "epoch": 0.07607558268724976, "grad_norm": 237.41720581054688, "learning_rate": 7.532e-06, "loss": 40.1886, "step": 37660 }, { "epoch": 0.07609578331993358, "grad_norm": 206.28013610839844, "learning_rate": 7.534e-06, "loss": 33.6421, "step": 37670 }, { "epoch": 0.0761159839526174, "grad_norm": 198.29176330566406, "learning_rate": 7.536000000000001e-06, "loss": 13.7155, "step": 37680 }, { "epoch": 0.0761361845853012, "grad_norm": 421.8310852050781, "learning_rate": 7.5380000000000005e-06, "loss": 32.6666, "step": 37690 }, { "epoch": 0.07615638521798503, "grad_norm": 89.13458251953125, "learning_rate": 7.540000000000001e-06, "loss": 23.2478, "step": 37700 }, { "epoch": 0.07617658585066885, "grad_norm": 481.56134033203125, "learning_rate": 7.542000000000001e-06, "loss": 38.0089, "step": 37710 }, { "epoch": 0.07619678648335265, "grad_norm": 477.40826416015625, "learning_rate": 7.544e-06, "loss": 35.8815, "step": 37720 }, { "epoch": 0.07621698711603647, "grad_norm": 342.3693542480469, "learning_rate": 7.546000000000001e-06, "loss": 28.092, "step": 37730 }, { "epoch": 0.07623718774872029, "grad_norm": 399.5074768066406, "learning_rate": 7.548000000000001e-06, "loss": 31.9544, "step": 37740 }, { "epoch": 0.07625738838140411, "grad_norm": 236.89337158203125, "learning_rate": 7.5500000000000006e-06, "loss": 29.6251, "step": 37750 }, { "epoch": 0.07627758901408792, "grad_norm": 217.6133575439453, "learning_rate": 7.552000000000001e-06, "loss": 27.4308, "step": 37760 }, { "epoch": 0.07629778964677174, "grad_norm": 296.9205627441406, "learning_rate": 7.554e-06, "loss": 32.2303, "step": 37770 }, { "epoch": 0.07631799027945556, "grad_norm": 490.7441711425781, "learning_rate": 7.556000000000001e-06, "loss": 28.4327, "step": 37780 }, { "epoch": 0.07633819091213936, "grad_norm": 1622.865478515625, "learning_rate": 7.558000000000001e-06, "loss": 37.7426, "step": 37790 }, { "epoch": 0.07635839154482318, "grad_norm": 195.11070251464844, "learning_rate": 7.5600000000000005e-06, "loss": 23.7224, "step": 37800 }, { "epoch": 0.076378592177507, "grad_norm": 74.11279296875, "learning_rate": 7.562000000000001e-06, "loss": 29.724, "step": 37810 }, { "epoch": 0.07639879281019081, "grad_norm": 170.7561492919922, "learning_rate": 7.564e-06, "loss": 35.1373, "step": 37820 }, { "epoch": 0.07641899344287463, "grad_norm": 147.35421752929688, "learning_rate": 7.566000000000001e-06, "loss": 51.2351, "step": 37830 }, { "epoch": 0.07643919407555845, "grad_norm": 101.01411437988281, "learning_rate": 7.568000000000001e-06, "loss": 54.9718, "step": 37840 }, { "epoch": 0.07645939470824226, "grad_norm": 316.4485168457031, "learning_rate": 7.57e-06, "loss": 23.4815, "step": 37850 }, { "epoch": 0.07647959534092608, "grad_norm": 500.4422302246094, "learning_rate": 7.5720000000000005e-06, "loss": 29.9861, "step": 37860 }, { "epoch": 0.0764997959736099, "grad_norm": 246.33541870117188, "learning_rate": 7.574e-06, "loss": 24.9734, "step": 37870 }, { "epoch": 0.0765199966062937, "grad_norm": 236.24459838867188, "learning_rate": 7.576000000000001e-06, "loss": 23.4552, "step": 37880 }, { "epoch": 0.07654019723897752, "grad_norm": 1214.046142578125, "learning_rate": 7.578000000000001e-06, "loss": 32.9602, "step": 37890 }, { "epoch": 0.07656039787166134, "grad_norm": 211.28492736816406, "learning_rate": 7.58e-06, "loss": 28.5644, "step": 37900 }, { "epoch": 0.07658059850434516, "grad_norm": 199.13990783691406, "learning_rate": 7.582e-06, "loss": 28.9542, "step": 37910 }, { "epoch": 0.07660079913702897, "grad_norm": 235.23980712890625, "learning_rate": 7.5840000000000006e-06, "loss": 38.0257, "step": 37920 }, { "epoch": 0.07662099976971279, "grad_norm": 155.21615600585938, "learning_rate": 7.586000000000001e-06, "loss": 29.3431, "step": 37930 }, { "epoch": 0.07664120040239661, "grad_norm": 84.38011932373047, "learning_rate": 7.588000000000001e-06, "loss": 19.5528, "step": 37940 }, { "epoch": 0.07666140103508041, "grad_norm": 450.3522644042969, "learning_rate": 7.590000000000001e-06, "loss": 38.1439, "step": 37950 }, { "epoch": 0.07668160166776423, "grad_norm": 532.288818359375, "learning_rate": 7.592e-06, "loss": 29.1103, "step": 37960 }, { "epoch": 0.07670180230044805, "grad_norm": 372.9450378417969, "learning_rate": 7.5940000000000005e-06, "loss": 20.2406, "step": 37970 }, { "epoch": 0.07672200293313186, "grad_norm": 863.7986450195312, "learning_rate": 7.5960000000000015e-06, "loss": 49.9165, "step": 37980 }, { "epoch": 0.07674220356581568, "grad_norm": 476.55377197265625, "learning_rate": 7.598000000000001e-06, "loss": 53.3683, "step": 37990 }, { "epoch": 0.0767624041984995, "grad_norm": 439.2981872558594, "learning_rate": 7.600000000000001e-06, "loss": 51.0978, "step": 38000 }, { "epoch": 0.07678260483118331, "grad_norm": 179.1730194091797, "learning_rate": 7.602e-06, "loss": 12.8497, "step": 38010 }, { "epoch": 0.07680280546386713, "grad_norm": 312.1766662597656, "learning_rate": 7.604e-06, "loss": 25.8815, "step": 38020 }, { "epoch": 0.07682300609655095, "grad_norm": 523.0887451171875, "learning_rate": 7.606000000000001e-06, "loss": 27.2018, "step": 38030 }, { "epoch": 0.07684320672923475, "grad_norm": 287.4948425292969, "learning_rate": 7.608000000000001e-06, "loss": 28.3378, "step": 38040 }, { "epoch": 0.07686340736191857, "grad_norm": 36.23078536987305, "learning_rate": 7.610000000000001e-06, "loss": 17.2869, "step": 38050 }, { "epoch": 0.0768836079946024, "grad_norm": 308.6112365722656, "learning_rate": 7.612e-06, "loss": 34.8253, "step": 38060 }, { "epoch": 0.07690380862728621, "grad_norm": 197.16746520996094, "learning_rate": 7.614e-06, "loss": 40.0932, "step": 38070 }, { "epoch": 0.07692400925997002, "grad_norm": 418.63104248046875, "learning_rate": 7.616000000000001e-06, "loss": 26.4699, "step": 38080 }, { "epoch": 0.07694420989265384, "grad_norm": 349.3082275390625, "learning_rate": 7.618000000000001e-06, "loss": 43.366, "step": 38090 }, { "epoch": 0.07696441052533766, "grad_norm": 365.4903259277344, "learning_rate": 7.620000000000001e-06, "loss": 30.0792, "step": 38100 }, { "epoch": 0.07698461115802147, "grad_norm": 116.9273910522461, "learning_rate": 7.622000000000001e-06, "loss": 61.813, "step": 38110 }, { "epoch": 0.07700481179070529, "grad_norm": 229.65188598632812, "learning_rate": 7.624e-06, "loss": 34.4135, "step": 38120 }, { "epoch": 0.0770250124233891, "grad_norm": 288.595947265625, "learning_rate": 7.626e-06, "loss": 23.117, "step": 38130 }, { "epoch": 0.07704521305607291, "grad_norm": 502.9750061035156, "learning_rate": 7.628000000000001e-06, "loss": 30.9003, "step": 38140 }, { "epoch": 0.07706541368875673, "grad_norm": 483.4169921875, "learning_rate": 7.630000000000001e-06, "loss": 28.5855, "step": 38150 }, { "epoch": 0.07708561432144055, "grad_norm": 118.48641967773438, "learning_rate": 7.632e-06, "loss": 28.8925, "step": 38160 }, { "epoch": 0.07710581495412436, "grad_norm": 193.31790161132812, "learning_rate": 7.634e-06, "loss": 35.0673, "step": 38170 }, { "epoch": 0.07712601558680818, "grad_norm": 354.1195373535156, "learning_rate": 7.636e-06, "loss": 28.2316, "step": 38180 }, { "epoch": 0.077146216219492, "grad_norm": 212.8251495361328, "learning_rate": 7.638e-06, "loss": 22.5711, "step": 38190 }, { "epoch": 0.0771664168521758, "grad_norm": 104.05445098876953, "learning_rate": 7.640000000000001e-06, "loss": 38.8466, "step": 38200 }, { "epoch": 0.07718661748485962, "grad_norm": 306.7193603515625, "learning_rate": 7.642e-06, "loss": 33.4127, "step": 38210 }, { "epoch": 0.07720681811754344, "grad_norm": 381.9257507324219, "learning_rate": 7.644e-06, "loss": 26.3448, "step": 38220 }, { "epoch": 0.07722701875022726, "grad_norm": 392.51611328125, "learning_rate": 7.646e-06, "loss": 21.2676, "step": 38230 }, { "epoch": 0.07724721938291107, "grad_norm": 279.4265441894531, "learning_rate": 7.648e-06, "loss": 20.9168, "step": 38240 }, { "epoch": 0.07726742001559489, "grad_norm": 493.9838562011719, "learning_rate": 7.650000000000001e-06, "loss": 22.7139, "step": 38250 }, { "epoch": 0.07728762064827871, "grad_norm": 199.35911560058594, "learning_rate": 7.652e-06, "loss": 47.0684, "step": 38260 }, { "epoch": 0.07730782128096252, "grad_norm": 250.8623504638672, "learning_rate": 7.654e-06, "loss": 20.4408, "step": 38270 }, { "epoch": 0.07732802191364634, "grad_norm": 334.54229736328125, "learning_rate": 7.656000000000001e-06, "loss": 18.4654, "step": 38280 }, { "epoch": 0.07734822254633016, "grad_norm": 284.93121337890625, "learning_rate": 7.658e-06, "loss": 15.5799, "step": 38290 }, { "epoch": 0.07736842317901396, "grad_norm": 282.11199951171875, "learning_rate": 7.660000000000001e-06, "loss": 23.7016, "step": 38300 }, { "epoch": 0.07738862381169778, "grad_norm": 90.72693634033203, "learning_rate": 7.662e-06, "loss": 15.7116, "step": 38310 }, { "epoch": 0.0774088244443816, "grad_norm": 250.9864044189453, "learning_rate": 7.664e-06, "loss": 24.6919, "step": 38320 }, { "epoch": 0.07742902507706541, "grad_norm": 193.32977294921875, "learning_rate": 7.666e-06, "loss": 20.7896, "step": 38330 }, { "epoch": 0.07744922570974923, "grad_norm": 283.58465576171875, "learning_rate": 7.668000000000002e-06, "loss": 15.6896, "step": 38340 }, { "epoch": 0.07746942634243305, "grad_norm": 250.5432891845703, "learning_rate": 7.670000000000001e-06, "loss": 31.925, "step": 38350 }, { "epoch": 0.07748962697511685, "grad_norm": 444.73907470703125, "learning_rate": 7.672e-06, "loss": 27.5278, "step": 38360 }, { "epoch": 0.07750982760780067, "grad_norm": 1004.3863525390625, "learning_rate": 7.674e-06, "loss": 45.7503, "step": 38370 }, { "epoch": 0.0775300282404845, "grad_norm": 409.1285400390625, "learning_rate": 7.676e-06, "loss": 44.6202, "step": 38380 }, { "epoch": 0.07755022887316831, "grad_norm": 856.62841796875, "learning_rate": 7.678000000000002e-06, "loss": 27.1218, "step": 38390 }, { "epoch": 0.07757042950585212, "grad_norm": 301.2478332519531, "learning_rate": 7.680000000000001e-06, "loss": 24.1744, "step": 38400 }, { "epoch": 0.07759063013853594, "grad_norm": 492.9810791015625, "learning_rate": 7.682e-06, "loss": 42.4862, "step": 38410 }, { "epoch": 0.07761083077121976, "grad_norm": 250.3079071044922, "learning_rate": 7.684e-06, "loss": 30.6978, "step": 38420 }, { "epoch": 0.07763103140390357, "grad_norm": 436.26019287109375, "learning_rate": 7.686e-06, "loss": 48.8635, "step": 38430 }, { "epoch": 0.07765123203658739, "grad_norm": 130.24365234375, "learning_rate": 7.688000000000002e-06, "loss": 37.9875, "step": 38440 }, { "epoch": 0.0776714326692712, "grad_norm": 283.32275390625, "learning_rate": 7.690000000000001e-06, "loss": 35.5894, "step": 38450 }, { "epoch": 0.07769163330195501, "grad_norm": 334.1865234375, "learning_rate": 7.692e-06, "loss": 22.2953, "step": 38460 }, { "epoch": 0.07771183393463883, "grad_norm": 104.75457763671875, "learning_rate": 7.694e-06, "loss": 50.6177, "step": 38470 }, { "epoch": 0.07773203456732265, "grad_norm": 434.3100891113281, "learning_rate": 7.696e-06, "loss": 37.0039, "step": 38480 }, { "epoch": 0.07775223520000646, "grad_norm": 420.5364074707031, "learning_rate": 7.698000000000002e-06, "loss": 31.4752, "step": 38490 }, { "epoch": 0.07777243583269028, "grad_norm": 465.9878234863281, "learning_rate": 7.7e-06, "loss": 24.6613, "step": 38500 }, { "epoch": 0.0777926364653741, "grad_norm": 570.0089111328125, "learning_rate": 7.702e-06, "loss": 30.2488, "step": 38510 }, { "epoch": 0.0778128370980579, "grad_norm": 283.9444885253906, "learning_rate": 7.704000000000001e-06, "loss": 40.7462, "step": 38520 }, { "epoch": 0.07783303773074172, "grad_norm": 630.6188354492188, "learning_rate": 7.706e-06, "loss": 27.6957, "step": 38530 }, { "epoch": 0.07785323836342554, "grad_norm": 403.64471435546875, "learning_rate": 7.708000000000001e-06, "loss": 33.9087, "step": 38540 }, { "epoch": 0.07787343899610936, "grad_norm": 106.56230926513672, "learning_rate": 7.71e-06, "loss": 34.0174, "step": 38550 }, { "epoch": 0.07789363962879317, "grad_norm": 181.42352294921875, "learning_rate": 7.712e-06, "loss": 12.7997, "step": 38560 }, { "epoch": 0.07791384026147699, "grad_norm": 102.48980712890625, "learning_rate": 7.714000000000001e-06, "loss": 20.7698, "step": 38570 }, { "epoch": 0.07793404089416081, "grad_norm": 156.4767303466797, "learning_rate": 7.716e-06, "loss": 13.9578, "step": 38580 }, { "epoch": 0.07795424152684462, "grad_norm": 250.74014282226562, "learning_rate": 7.718000000000001e-06, "loss": 42.3722, "step": 38590 }, { "epoch": 0.07797444215952844, "grad_norm": 26.536998748779297, "learning_rate": 7.72e-06, "loss": 20.4438, "step": 38600 }, { "epoch": 0.07799464279221226, "grad_norm": 265.4915771484375, "learning_rate": 7.722e-06, "loss": 23.7436, "step": 38610 }, { "epoch": 0.07801484342489606, "grad_norm": 266.903564453125, "learning_rate": 7.724000000000001e-06, "loss": 23.5531, "step": 38620 }, { "epoch": 0.07803504405757988, "grad_norm": 549.0919189453125, "learning_rate": 7.726e-06, "loss": 42.3911, "step": 38630 }, { "epoch": 0.0780552446902637, "grad_norm": 513.4551391601562, "learning_rate": 7.728000000000001e-06, "loss": 66.4977, "step": 38640 }, { "epoch": 0.07807544532294751, "grad_norm": 445.8760986328125, "learning_rate": 7.73e-06, "loss": 32.1178, "step": 38650 }, { "epoch": 0.07809564595563133, "grad_norm": 350.96405029296875, "learning_rate": 7.732e-06, "loss": 45.5947, "step": 38660 }, { "epoch": 0.07811584658831515, "grad_norm": 568.6697387695312, "learning_rate": 7.734e-06, "loss": 34.3551, "step": 38670 }, { "epoch": 0.07813604722099896, "grad_norm": 487.7399597167969, "learning_rate": 7.736e-06, "loss": 41.2422, "step": 38680 }, { "epoch": 0.07815624785368278, "grad_norm": 214.81137084960938, "learning_rate": 7.738000000000001e-06, "loss": 26.7334, "step": 38690 }, { "epoch": 0.0781764484863666, "grad_norm": 150.01593017578125, "learning_rate": 7.74e-06, "loss": 27.2484, "step": 38700 }, { "epoch": 0.07819664911905042, "grad_norm": 350.7374572753906, "learning_rate": 7.742000000000001e-06, "loss": 32.6605, "step": 38710 }, { "epoch": 0.07821684975173422, "grad_norm": 361.48638916015625, "learning_rate": 7.744e-06, "loss": 46.3375, "step": 38720 }, { "epoch": 0.07823705038441804, "grad_norm": 120.71048736572266, "learning_rate": 7.746e-06, "loss": 31.5285, "step": 38730 }, { "epoch": 0.07825725101710186, "grad_norm": 436.93212890625, "learning_rate": 7.748000000000001e-06, "loss": 27.044, "step": 38740 }, { "epoch": 0.07827745164978567, "grad_norm": 775.6504516601562, "learning_rate": 7.75e-06, "loss": 43.6234, "step": 38750 }, { "epoch": 0.07829765228246949, "grad_norm": 558.884521484375, "learning_rate": 7.752000000000001e-06, "loss": 43.6799, "step": 38760 }, { "epoch": 0.07831785291515331, "grad_norm": 186.13818359375, "learning_rate": 7.754e-06, "loss": 32.61, "step": 38770 }, { "epoch": 0.07833805354783711, "grad_norm": 115.69839477539062, "learning_rate": 7.756e-06, "loss": 45.8588, "step": 38780 }, { "epoch": 0.07835825418052093, "grad_norm": 0.0, "learning_rate": 7.758000000000001e-06, "loss": 25.6279, "step": 38790 }, { "epoch": 0.07837845481320475, "grad_norm": 845.0911254882812, "learning_rate": 7.76e-06, "loss": 56.0535, "step": 38800 }, { "epoch": 0.07839865544588856, "grad_norm": 92.69341278076172, "learning_rate": 7.762000000000001e-06, "loss": 31.0724, "step": 38810 }, { "epoch": 0.07841885607857238, "grad_norm": 484.225830078125, "learning_rate": 7.764e-06, "loss": 41.4143, "step": 38820 }, { "epoch": 0.0784390567112562, "grad_norm": 673.5256958007812, "learning_rate": 7.766e-06, "loss": 56.2218, "step": 38830 }, { "epoch": 0.07845925734394, "grad_norm": 274.1990661621094, "learning_rate": 7.768e-06, "loss": 21.4156, "step": 38840 }, { "epoch": 0.07847945797662383, "grad_norm": 180.1290283203125, "learning_rate": 7.77e-06, "loss": 22.0655, "step": 38850 }, { "epoch": 0.07849965860930765, "grad_norm": 253.1152801513672, "learning_rate": 7.772000000000001e-06, "loss": 44.1436, "step": 38860 }, { "epoch": 0.07851985924199147, "grad_norm": 529.6954956054688, "learning_rate": 7.774e-06, "loss": 36.1115, "step": 38870 }, { "epoch": 0.07854005987467527, "grad_norm": 158.56822204589844, "learning_rate": 7.776e-06, "loss": 19.5905, "step": 38880 }, { "epoch": 0.07856026050735909, "grad_norm": 250.98370361328125, "learning_rate": 7.778e-06, "loss": 22.0232, "step": 38890 }, { "epoch": 0.07858046114004291, "grad_norm": 0.0, "learning_rate": 7.78e-06, "loss": 31.0307, "step": 38900 }, { "epoch": 0.07860066177272672, "grad_norm": 411.3817138671875, "learning_rate": 7.782000000000001e-06, "loss": 35.6574, "step": 38910 }, { "epoch": 0.07862086240541054, "grad_norm": 91.82675170898438, "learning_rate": 7.784e-06, "loss": 22.0617, "step": 38920 }, { "epoch": 0.07864106303809436, "grad_norm": 281.73065185546875, "learning_rate": 7.786e-06, "loss": 23.0781, "step": 38930 }, { "epoch": 0.07866126367077816, "grad_norm": 173.93626403808594, "learning_rate": 7.788e-06, "loss": 32.6501, "step": 38940 }, { "epoch": 0.07868146430346198, "grad_norm": 624.3499145507812, "learning_rate": 7.790000000000002e-06, "loss": 24.9687, "step": 38950 }, { "epoch": 0.0787016649361458, "grad_norm": 613.9353637695312, "learning_rate": 7.792000000000001e-06, "loss": 20.4454, "step": 38960 }, { "epoch": 0.07872186556882961, "grad_norm": 162.20516967773438, "learning_rate": 7.794e-06, "loss": 32.4282, "step": 38970 }, { "epoch": 0.07874206620151343, "grad_norm": 156.30751037597656, "learning_rate": 7.796e-06, "loss": 50.4084, "step": 38980 }, { "epoch": 0.07876226683419725, "grad_norm": 713.5732421875, "learning_rate": 7.798e-06, "loss": 35.6498, "step": 38990 }, { "epoch": 0.07878246746688106, "grad_norm": 150.2173614501953, "learning_rate": 7.800000000000002e-06, "loss": 28.604, "step": 39000 }, { "epoch": 0.07880266809956488, "grad_norm": 872.6207275390625, "learning_rate": 7.802000000000001e-06, "loss": 33.7192, "step": 39010 }, { "epoch": 0.0788228687322487, "grad_norm": 231.05963134765625, "learning_rate": 7.804e-06, "loss": 15.9618, "step": 39020 }, { "epoch": 0.07884306936493252, "grad_norm": 394.56756591796875, "learning_rate": 7.806e-06, "loss": 26.5363, "step": 39030 }, { "epoch": 0.07886326999761632, "grad_norm": 422.15557861328125, "learning_rate": 7.808e-06, "loss": 28.8521, "step": 39040 }, { "epoch": 0.07888347063030014, "grad_norm": 423.39910888671875, "learning_rate": 7.810000000000001e-06, "loss": 26.4724, "step": 39050 }, { "epoch": 0.07890367126298396, "grad_norm": 541.9584350585938, "learning_rate": 7.812e-06, "loss": 40.4451, "step": 39060 }, { "epoch": 0.07892387189566777, "grad_norm": 267.0364990234375, "learning_rate": 7.814e-06, "loss": 19.7719, "step": 39070 }, { "epoch": 0.07894407252835159, "grad_norm": 518.2913208007812, "learning_rate": 7.816000000000001e-06, "loss": 39.5686, "step": 39080 }, { "epoch": 0.07896427316103541, "grad_norm": 447.22528076171875, "learning_rate": 7.818e-06, "loss": 29.7089, "step": 39090 }, { "epoch": 0.07898447379371921, "grad_norm": 508.5252380371094, "learning_rate": 7.820000000000001e-06, "loss": 31.3717, "step": 39100 }, { "epoch": 0.07900467442640303, "grad_norm": 30.512914657592773, "learning_rate": 7.822e-06, "loss": 42.3715, "step": 39110 }, { "epoch": 0.07902487505908685, "grad_norm": 160.32351684570312, "learning_rate": 7.824e-06, "loss": 29.866, "step": 39120 }, { "epoch": 0.07904507569177066, "grad_norm": 274.9743957519531, "learning_rate": 7.826000000000001e-06, "loss": 26.1579, "step": 39130 }, { "epoch": 0.07906527632445448, "grad_norm": 64.92930603027344, "learning_rate": 7.828000000000002e-06, "loss": 21.6843, "step": 39140 }, { "epoch": 0.0790854769571383, "grad_norm": 367.30499267578125, "learning_rate": 7.830000000000001e-06, "loss": 28.0127, "step": 39150 }, { "epoch": 0.07910567758982211, "grad_norm": 357.40155029296875, "learning_rate": 7.832e-06, "loss": 19.7759, "step": 39160 }, { "epoch": 0.07912587822250593, "grad_norm": 480.8984680175781, "learning_rate": 7.834e-06, "loss": 20.6722, "step": 39170 }, { "epoch": 0.07914607885518975, "grad_norm": 399.2489013671875, "learning_rate": 7.836000000000001e-06, "loss": 18.6696, "step": 39180 }, { "epoch": 0.07916627948787357, "grad_norm": 288.9344177246094, "learning_rate": 7.838000000000002e-06, "loss": 29.6439, "step": 39190 }, { "epoch": 0.07918648012055737, "grad_norm": 225.2371368408203, "learning_rate": 7.840000000000001e-06, "loss": 54.9371, "step": 39200 }, { "epoch": 0.0792066807532412, "grad_norm": 223.05824279785156, "learning_rate": 7.842e-06, "loss": 20.5463, "step": 39210 }, { "epoch": 0.07922688138592501, "grad_norm": 239.17227172851562, "learning_rate": 7.844e-06, "loss": 40.5121, "step": 39220 }, { "epoch": 0.07924708201860882, "grad_norm": 135.14833068847656, "learning_rate": 7.846e-06, "loss": 18.4658, "step": 39230 }, { "epoch": 0.07926728265129264, "grad_norm": 219.3106689453125, "learning_rate": 7.848000000000002e-06, "loss": 28.6128, "step": 39240 }, { "epoch": 0.07928748328397646, "grad_norm": 918.423828125, "learning_rate": 7.850000000000001e-06, "loss": 29.1178, "step": 39250 }, { "epoch": 0.07930768391666027, "grad_norm": 202.35873413085938, "learning_rate": 7.852e-06, "loss": 25.1345, "step": 39260 }, { "epoch": 0.07932788454934409, "grad_norm": 349.73077392578125, "learning_rate": 7.854e-06, "loss": 30.6748, "step": 39270 }, { "epoch": 0.0793480851820279, "grad_norm": 604.9017944335938, "learning_rate": 7.856e-06, "loss": 35.2519, "step": 39280 }, { "epoch": 0.07936828581471171, "grad_norm": 313.26983642578125, "learning_rate": 7.858000000000002e-06, "loss": 28.2664, "step": 39290 }, { "epoch": 0.07938848644739553, "grad_norm": 419.33795166015625, "learning_rate": 7.860000000000001e-06, "loss": 26.6464, "step": 39300 }, { "epoch": 0.07940868708007935, "grad_norm": 192.68238830566406, "learning_rate": 7.862e-06, "loss": 32.6142, "step": 39310 }, { "epoch": 0.07942888771276316, "grad_norm": 168.05323791503906, "learning_rate": 7.864000000000001e-06, "loss": 25.8059, "step": 39320 }, { "epoch": 0.07944908834544698, "grad_norm": 300.5409851074219, "learning_rate": 7.866e-06, "loss": 32.4831, "step": 39330 }, { "epoch": 0.0794692889781308, "grad_norm": 540.9428100585938, "learning_rate": 7.868000000000002e-06, "loss": 32.5384, "step": 39340 }, { "epoch": 0.07948948961081462, "grad_norm": 91.29859161376953, "learning_rate": 7.870000000000001e-06, "loss": 20.3322, "step": 39350 }, { "epoch": 0.07950969024349842, "grad_norm": 179.37200927734375, "learning_rate": 7.872e-06, "loss": 28.2527, "step": 39360 }, { "epoch": 0.07952989087618224, "grad_norm": 1025.3359375, "learning_rate": 7.874000000000001e-06, "loss": 54.2034, "step": 39370 }, { "epoch": 0.07955009150886606, "grad_norm": 204.60137939453125, "learning_rate": 7.876e-06, "loss": 29.0064, "step": 39380 }, { "epoch": 0.07957029214154987, "grad_norm": 62.05070114135742, "learning_rate": 7.878e-06, "loss": 28.4462, "step": 39390 }, { "epoch": 0.07959049277423369, "grad_norm": 107.2646484375, "learning_rate": 7.88e-06, "loss": 17.1708, "step": 39400 }, { "epoch": 0.07961069340691751, "grad_norm": 149.5799102783203, "learning_rate": 7.882e-06, "loss": 22.4472, "step": 39410 }, { "epoch": 0.07963089403960132, "grad_norm": 172.60009765625, "learning_rate": 7.884000000000001e-06, "loss": 25.0812, "step": 39420 }, { "epoch": 0.07965109467228514, "grad_norm": 332.5563049316406, "learning_rate": 7.886e-06, "loss": 31.6898, "step": 39430 }, { "epoch": 0.07967129530496896, "grad_norm": 411.9806823730469, "learning_rate": 7.888e-06, "loss": 20.4908, "step": 39440 }, { "epoch": 0.07969149593765276, "grad_norm": 209.25726318359375, "learning_rate": 7.89e-06, "loss": 24.3344, "step": 39450 }, { "epoch": 0.07971169657033658, "grad_norm": 379.9347839355469, "learning_rate": 7.892e-06, "loss": 42.9146, "step": 39460 }, { "epoch": 0.0797318972030204, "grad_norm": 186.62278747558594, "learning_rate": 7.894000000000001e-06, "loss": 35.9105, "step": 39470 }, { "epoch": 0.07975209783570421, "grad_norm": 245.76437377929688, "learning_rate": 7.896e-06, "loss": 14.2335, "step": 39480 }, { "epoch": 0.07977229846838803, "grad_norm": 146.6034393310547, "learning_rate": 7.898e-06, "loss": 22.7355, "step": 39490 }, { "epoch": 0.07979249910107185, "grad_norm": 410.4243469238281, "learning_rate": 7.9e-06, "loss": 24.1946, "step": 39500 }, { "epoch": 0.07981269973375565, "grad_norm": 248.1637725830078, "learning_rate": 7.902000000000002e-06, "loss": 26.9794, "step": 39510 }, { "epoch": 0.07983290036643947, "grad_norm": 150.55824279785156, "learning_rate": 7.904000000000001e-06, "loss": 39.6953, "step": 39520 }, { "epoch": 0.0798531009991233, "grad_norm": 254.12696838378906, "learning_rate": 7.906e-06, "loss": 38.9566, "step": 39530 }, { "epoch": 0.07987330163180711, "grad_norm": 415.8531494140625, "learning_rate": 7.908e-06, "loss": 29.3243, "step": 39540 }, { "epoch": 0.07989350226449092, "grad_norm": 210.81619262695312, "learning_rate": 7.91e-06, "loss": 24.6885, "step": 39550 }, { "epoch": 0.07991370289717474, "grad_norm": 252.2795867919922, "learning_rate": 7.912000000000001e-06, "loss": 20.2741, "step": 39560 }, { "epoch": 0.07993390352985856, "grad_norm": 198.05453491210938, "learning_rate": 7.914e-06, "loss": 21.0051, "step": 39570 }, { "epoch": 0.07995410416254237, "grad_norm": 343.16778564453125, "learning_rate": 7.916e-06, "loss": 22.7918, "step": 39580 }, { "epoch": 0.07997430479522619, "grad_norm": 292.4228210449219, "learning_rate": 7.918e-06, "loss": 21.4728, "step": 39590 }, { "epoch": 0.07999450542791, "grad_norm": 637.90087890625, "learning_rate": 7.92e-06, "loss": 33.7059, "step": 39600 }, { "epoch": 0.08001470606059381, "grad_norm": 126.41364288330078, "learning_rate": 7.922000000000001e-06, "loss": 24.0562, "step": 39610 }, { "epoch": 0.08003490669327763, "grad_norm": 244.18698120117188, "learning_rate": 7.924e-06, "loss": 49.3408, "step": 39620 }, { "epoch": 0.08005510732596145, "grad_norm": 182.91236877441406, "learning_rate": 7.926e-06, "loss": 17.9665, "step": 39630 }, { "epoch": 0.08007530795864526, "grad_norm": 144.26022338867188, "learning_rate": 7.928e-06, "loss": 42.2314, "step": 39640 }, { "epoch": 0.08009550859132908, "grad_norm": 491.58984375, "learning_rate": 7.93e-06, "loss": 35.5911, "step": 39650 }, { "epoch": 0.0801157092240129, "grad_norm": 1157.291259765625, "learning_rate": 7.932000000000001e-06, "loss": 75.9516, "step": 39660 }, { "epoch": 0.0801359098566967, "grad_norm": 279.23260498046875, "learning_rate": 7.934e-06, "loss": 37.2464, "step": 39670 }, { "epoch": 0.08015611048938052, "grad_norm": 213.79232788085938, "learning_rate": 7.936e-06, "loss": 57.0431, "step": 39680 }, { "epoch": 0.08017631112206434, "grad_norm": 59.882991790771484, "learning_rate": 7.938000000000001e-06, "loss": 35.9057, "step": 39690 }, { "epoch": 0.08019651175474816, "grad_norm": 260.17156982421875, "learning_rate": 7.94e-06, "loss": 39.7954, "step": 39700 }, { "epoch": 0.08021671238743197, "grad_norm": 172.5113067626953, "learning_rate": 7.942000000000001e-06, "loss": 11.3265, "step": 39710 }, { "epoch": 0.08023691302011579, "grad_norm": 336.946533203125, "learning_rate": 7.944e-06, "loss": 25.7131, "step": 39720 }, { "epoch": 0.08025711365279961, "grad_norm": 424.34173583984375, "learning_rate": 7.946e-06, "loss": 26.6149, "step": 39730 }, { "epoch": 0.08027731428548342, "grad_norm": 389.1899719238281, "learning_rate": 7.948e-06, "loss": 19.9098, "step": 39740 }, { "epoch": 0.08029751491816724, "grad_norm": 255.89874267578125, "learning_rate": 7.950000000000002e-06, "loss": 40.5864, "step": 39750 }, { "epoch": 0.08031771555085106, "grad_norm": 138.30908203125, "learning_rate": 7.952000000000001e-06, "loss": 31.5143, "step": 39760 }, { "epoch": 0.08033791618353486, "grad_norm": 82.79999542236328, "learning_rate": 7.954e-06, "loss": 28.5208, "step": 39770 }, { "epoch": 0.08035811681621868, "grad_norm": 179.16165161132812, "learning_rate": 7.956e-06, "loss": 43.5588, "step": 39780 }, { "epoch": 0.0803783174489025, "grad_norm": 140.02513122558594, "learning_rate": 7.958e-06, "loss": 42.9207, "step": 39790 }, { "epoch": 0.08039851808158631, "grad_norm": 573.3342895507812, "learning_rate": 7.960000000000002e-06, "loss": 14.4922, "step": 39800 }, { "epoch": 0.08041871871427013, "grad_norm": 389.9683532714844, "learning_rate": 7.962000000000001e-06, "loss": 43.1999, "step": 39810 }, { "epoch": 0.08043891934695395, "grad_norm": 468.8789978027344, "learning_rate": 7.964e-06, "loss": 64.1244, "step": 39820 }, { "epoch": 0.08045911997963776, "grad_norm": 140.41920471191406, "learning_rate": 7.966e-06, "loss": 20.7715, "step": 39830 }, { "epoch": 0.08047932061232158, "grad_norm": 374.7374267578125, "learning_rate": 7.968e-06, "loss": 21.7507, "step": 39840 }, { "epoch": 0.0804995212450054, "grad_norm": 107.04667663574219, "learning_rate": 7.970000000000002e-06, "loss": 23.0104, "step": 39850 }, { "epoch": 0.08051972187768922, "grad_norm": 577.4719848632812, "learning_rate": 7.972000000000001e-06, "loss": 21.1799, "step": 39860 }, { "epoch": 0.08053992251037302, "grad_norm": 391.03936767578125, "learning_rate": 7.974e-06, "loss": 24.7051, "step": 39870 }, { "epoch": 0.08056012314305684, "grad_norm": 408.8507080078125, "learning_rate": 7.976000000000001e-06, "loss": 22.4824, "step": 39880 }, { "epoch": 0.08058032377574066, "grad_norm": 205.51414489746094, "learning_rate": 7.978e-06, "loss": 26.1893, "step": 39890 }, { "epoch": 0.08060052440842447, "grad_norm": 287.0848083496094, "learning_rate": 7.980000000000002e-06, "loss": 27.1337, "step": 39900 }, { "epoch": 0.08062072504110829, "grad_norm": 142.0780487060547, "learning_rate": 7.982e-06, "loss": 47.5738, "step": 39910 }, { "epoch": 0.08064092567379211, "grad_norm": 258.84295654296875, "learning_rate": 7.984e-06, "loss": 17.7857, "step": 39920 }, { "epoch": 0.08066112630647591, "grad_norm": 440.9141845703125, "learning_rate": 7.986000000000001e-06, "loss": 50.331, "step": 39930 }, { "epoch": 0.08068132693915973, "grad_norm": 254.79446411132812, "learning_rate": 7.988e-06, "loss": 47.5054, "step": 39940 }, { "epoch": 0.08070152757184355, "grad_norm": 63.407222747802734, "learning_rate": 7.990000000000001e-06, "loss": 23.5639, "step": 39950 }, { "epoch": 0.08072172820452736, "grad_norm": 536.3187255859375, "learning_rate": 7.992e-06, "loss": 36.8076, "step": 39960 }, { "epoch": 0.08074192883721118, "grad_norm": 616.1565551757812, "learning_rate": 7.994e-06, "loss": 22.7586, "step": 39970 }, { "epoch": 0.080762129469895, "grad_norm": 211.81797790527344, "learning_rate": 7.996000000000001e-06, "loss": 24.2161, "step": 39980 }, { "epoch": 0.0807823301025788, "grad_norm": 477.96588134765625, "learning_rate": 7.998e-06, "loss": 45.9811, "step": 39990 }, { "epoch": 0.08080253073526263, "grad_norm": 359.7879943847656, "learning_rate": 8.000000000000001e-06, "loss": 16.3087, "step": 40000 }, { "epoch": 0.08082273136794645, "grad_norm": 863.2049560546875, "learning_rate": 8.002e-06, "loss": 37.9083, "step": 40010 }, { "epoch": 0.08084293200063027, "grad_norm": 165.74371337890625, "learning_rate": 8.004e-06, "loss": 43.3696, "step": 40020 }, { "epoch": 0.08086313263331407, "grad_norm": 157.85032653808594, "learning_rate": 8.006000000000001e-06, "loss": 21.7917, "step": 40030 }, { "epoch": 0.08088333326599789, "grad_norm": 168.14688110351562, "learning_rate": 8.008e-06, "loss": 31.49, "step": 40040 }, { "epoch": 0.08090353389868171, "grad_norm": 438.9236755371094, "learning_rate": 8.010000000000001e-06, "loss": 30.0219, "step": 40050 }, { "epoch": 0.08092373453136552, "grad_norm": 310.1937255859375, "learning_rate": 8.012e-06, "loss": 34.872, "step": 40060 }, { "epoch": 0.08094393516404934, "grad_norm": 263.5599060058594, "learning_rate": 8.014e-06, "loss": 45.1015, "step": 40070 }, { "epoch": 0.08096413579673316, "grad_norm": 257.67437744140625, "learning_rate": 8.016e-06, "loss": 32.7484, "step": 40080 }, { "epoch": 0.08098433642941696, "grad_norm": 692.6240844726562, "learning_rate": 8.018e-06, "loss": 48.4083, "step": 40090 }, { "epoch": 0.08100453706210078, "grad_norm": 167.19589233398438, "learning_rate": 8.020000000000001e-06, "loss": 17.4995, "step": 40100 }, { "epoch": 0.0810247376947846, "grad_norm": 560.2919921875, "learning_rate": 8.022e-06, "loss": 34.5696, "step": 40110 }, { "epoch": 0.08104493832746841, "grad_norm": 210.54278564453125, "learning_rate": 8.024000000000001e-06, "loss": 30.3846, "step": 40120 }, { "epoch": 0.08106513896015223, "grad_norm": 248.98561096191406, "learning_rate": 8.026e-06, "loss": 37.1009, "step": 40130 }, { "epoch": 0.08108533959283605, "grad_norm": 503.51629638671875, "learning_rate": 8.028e-06, "loss": 64.9032, "step": 40140 }, { "epoch": 0.08110554022551986, "grad_norm": 418.5016784667969, "learning_rate": 8.030000000000001e-06, "loss": 31.8185, "step": 40150 }, { "epoch": 0.08112574085820368, "grad_norm": 291.9407653808594, "learning_rate": 8.032e-06, "loss": 47.0525, "step": 40160 }, { "epoch": 0.0811459414908875, "grad_norm": 233.1592254638672, "learning_rate": 8.034000000000001e-06, "loss": 33.0006, "step": 40170 }, { "epoch": 0.08116614212357132, "grad_norm": 192.9244842529297, "learning_rate": 8.036e-06, "loss": 28.7474, "step": 40180 }, { "epoch": 0.08118634275625512, "grad_norm": 564.216796875, "learning_rate": 8.038e-06, "loss": 32.0133, "step": 40190 }, { "epoch": 0.08120654338893894, "grad_norm": 454.8736877441406, "learning_rate": 8.040000000000001e-06, "loss": 26.5801, "step": 40200 }, { "epoch": 0.08122674402162276, "grad_norm": 460.096435546875, "learning_rate": 8.042e-06, "loss": 35.8877, "step": 40210 }, { "epoch": 0.08124694465430657, "grad_norm": 353.7837219238281, "learning_rate": 8.044000000000001e-06, "loss": 27.0809, "step": 40220 }, { "epoch": 0.08126714528699039, "grad_norm": 637.493896484375, "learning_rate": 8.046e-06, "loss": 33.4177, "step": 40230 }, { "epoch": 0.08128734591967421, "grad_norm": 178.525390625, "learning_rate": 8.048e-06, "loss": 33.1798, "step": 40240 }, { "epoch": 0.08130754655235801, "grad_norm": 251.61866760253906, "learning_rate": 8.050000000000001e-06, "loss": 40.4518, "step": 40250 }, { "epoch": 0.08132774718504183, "grad_norm": 39.06891632080078, "learning_rate": 8.052e-06, "loss": 31.6461, "step": 40260 }, { "epoch": 0.08134794781772565, "grad_norm": 281.3625183105469, "learning_rate": 8.054000000000001e-06, "loss": 48.1097, "step": 40270 }, { "epoch": 0.08136814845040946, "grad_norm": 945.8479614257812, "learning_rate": 8.056e-06, "loss": 30.1889, "step": 40280 }, { "epoch": 0.08138834908309328, "grad_norm": 271.213623046875, "learning_rate": 8.058e-06, "loss": 39.2823, "step": 40290 }, { "epoch": 0.0814085497157771, "grad_norm": 154.48793029785156, "learning_rate": 8.06e-06, "loss": 28.972, "step": 40300 }, { "epoch": 0.08142875034846091, "grad_norm": 120.0969009399414, "learning_rate": 8.062000000000002e-06, "loss": 29.5174, "step": 40310 }, { "epoch": 0.08144895098114473, "grad_norm": 207.9576416015625, "learning_rate": 8.064000000000001e-06, "loss": 25.9545, "step": 40320 }, { "epoch": 0.08146915161382855, "grad_norm": 231.7963409423828, "learning_rate": 8.066e-06, "loss": 28.4391, "step": 40330 }, { "epoch": 0.08148935224651237, "grad_norm": 348.1459655761719, "learning_rate": 8.068e-06, "loss": 24.5878, "step": 40340 }, { "epoch": 0.08150955287919617, "grad_norm": 683.8905029296875, "learning_rate": 8.07e-06, "loss": 48.0969, "step": 40350 }, { "epoch": 0.08152975351188, "grad_norm": 184.5467987060547, "learning_rate": 8.072000000000002e-06, "loss": 27.6666, "step": 40360 }, { "epoch": 0.08154995414456381, "grad_norm": 49.814697265625, "learning_rate": 8.074000000000001e-06, "loss": 19.4061, "step": 40370 }, { "epoch": 0.08157015477724762, "grad_norm": 463.6975402832031, "learning_rate": 8.076e-06, "loss": 41.6708, "step": 40380 }, { "epoch": 0.08159035540993144, "grad_norm": 224.41197204589844, "learning_rate": 8.078e-06, "loss": 28.5051, "step": 40390 }, { "epoch": 0.08161055604261526, "grad_norm": 253.52926635742188, "learning_rate": 8.08e-06, "loss": 30.3742, "step": 40400 }, { "epoch": 0.08163075667529907, "grad_norm": 343.83209228515625, "learning_rate": 8.082000000000002e-06, "loss": 24.0533, "step": 40410 }, { "epoch": 0.08165095730798289, "grad_norm": 90.15672302246094, "learning_rate": 8.084000000000001e-06, "loss": 30.7446, "step": 40420 }, { "epoch": 0.0816711579406667, "grad_norm": 159.6481170654297, "learning_rate": 8.086e-06, "loss": 20.9314, "step": 40430 }, { "epoch": 0.08169135857335051, "grad_norm": 157.68934631347656, "learning_rate": 8.088e-06, "loss": 45.4913, "step": 40440 }, { "epoch": 0.08171155920603433, "grad_norm": 213.709716796875, "learning_rate": 8.09e-06, "loss": 43.9062, "step": 40450 }, { "epoch": 0.08173175983871815, "grad_norm": 0.0, "learning_rate": 8.092000000000001e-06, "loss": 23.6692, "step": 40460 }, { "epoch": 0.08175196047140196, "grad_norm": 0.0, "learning_rate": 8.094e-06, "loss": 25.6546, "step": 40470 }, { "epoch": 0.08177216110408578, "grad_norm": 405.41015625, "learning_rate": 8.096e-06, "loss": 50.8927, "step": 40480 }, { "epoch": 0.0817923617367696, "grad_norm": 403.33013916015625, "learning_rate": 8.098000000000001e-06, "loss": 24.6395, "step": 40490 }, { "epoch": 0.08181256236945342, "grad_norm": 304.68218994140625, "learning_rate": 8.1e-06, "loss": 29.1053, "step": 40500 }, { "epoch": 0.08183276300213722, "grad_norm": 355.4708251953125, "learning_rate": 8.102000000000001e-06, "loss": 27.8879, "step": 40510 }, { "epoch": 0.08185296363482104, "grad_norm": 479.08544921875, "learning_rate": 8.104e-06, "loss": 29.915, "step": 40520 }, { "epoch": 0.08187316426750486, "grad_norm": 637.172607421875, "learning_rate": 8.106e-06, "loss": 36.0089, "step": 40530 }, { "epoch": 0.08189336490018867, "grad_norm": 130.26646423339844, "learning_rate": 8.108000000000001e-06, "loss": 26.9828, "step": 40540 }, { "epoch": 0.08191356553287249, "grad_norm": 444.16094970703125, "learning_rate": 8.110000000000002e-06, "loss": 29.0736, "step": 40550 }, { "epoch": 0.08193376616555631, "grad_norm": 169.3374481201172, "learning_rate": 8.112000000000001e-06, "loss": 49.4036, "step": 40560 }, { "epoch": 0.08195396679824012, "grad_norm": 159.84176635742188, "learning_rate": 8.114e-06, "loss": 40.1323, "step": 40570 }, { "epoch": 0.08197416743092394, "grad_norm": 126.69480895996094, "learning_rate": 8.116e-06, "loss": 14.1003, "step": 40580 }, { "epoch": 0.08199436806360776, "grad_norm": 635.8216552734375, "learning_rate": 8.118000000000001e-06, "loss": 29.2911, "step": 40590 }, { "epoch": 0.08201456869629156, "grad_norm": 157.3028564453125, "learning_rate": 8.120000000000002e-06, "loss": 20.4374, "step": 40600 }, { "epoch": 0.08203476932897538, "grad_norm": 523.0020141601562, "learning_rate": 8.122000000000001e-06, "loss": 47.6064, "step": 40610 }, { "epoch": 0.0820549699616592, "grad_norm": 510.1506652832031, "learning_rate": 8.124e-06, "loss": 37.0499, "step": 40620 }, { "epoch": 0.08207517059434301, "grad_norm": 115.43280029296875, "learning_rate": 8.126e-06, "loss": 11.0177, "step": 40630 }, { "epoch": 0.08209537122702683, "grad_norm": 308.2386779785156, "learning_rate": 8.128e-06, "loss": 36.6576, "step": 40640 }, { "epoch": 0.08211557185971065, "grad_norm": 288.2535705566406, "learning_rate": 8.13e-06, "loss": 36.9718, "step": 40650 }, { "epoch": 0.08213577249239447, "grad_norm": 572.7259521484375, "learning_rate": 8.132000000000001e-06, "loss": 27.302, "step": 40660 }, { "epoch": 0.08215597312507827, "grad_norm": 278.70574951171875, "learning_rate": 8.134e-06, "loss": 24.7672, "step": 40670 }, { "epoch": 0.0821761737577621, "grad_norm": 312.2572021484375, "learning_rate": 8.136000000000001e-06, "loss": 28.5263, "step": 40680 }, { "epoch": 0.08219637439044591, "grad_norm": 270.0114440917969, "learning_rate": 8.138e-06, "loss": 26.2294, "step": 40690 }, { "epoch": 0.08221657502312972, "grad_norm": 256.81890869140625, "learning_rate": 8.14e-06, "loss": 29.7334, "step": 40700 }, { "epoch": 0.08223677565581354, "grad_norm": 127.7161865234375, "learning_rate": 8.142000000000001e-06, "loss": 20.9934, "step": 40710 }, { "epoch": 0.08225697628849736, "grad_norm": 163.72933959960938, "learning_rate": 8.144e-06, "loss": 27.977, "step": 40720 }, { "epoch": 0.08227717692118117, "grad_norm": 327.1219482421875, "learning_rate": 8.146000000000001e-06, "loss": 46.1607, "step": 40730 }, { "epoch": 0.08229737755386499, "grad_norm": 241.39405822753906, "learning_rate": 8.148e-06, "loss": 36.7972, "step": 40740 }, { "epoch": 0.0823175781865488, "grad_norm": 408.3983459472656, "learning_rate": 8.15e-06, "loss": 18.3151, "step": 40750 }, { "epoch": 0.08233777881923261, "grad_norm": 627.0402221679688, "learning_rate": 8.152000000000001e-06, "loss": 38.8091, "step": 40760 }, { "epoch": 0.08235797945191643, "grad_norm": 350.5205383300781, "learning_rate": 8.154e-06, "loss": 35.262, "step": 40770 }, { "epoch": 0.08237818008460025, "grad_norm": 218.96446228027344, "learning_rate": 8.156000000000001e-06, "loss": 28.8551, "step": 40780 }, { "epoch": 0.08239838071728406, "grad_norm": 424.80548095703125, "learning_rate": 8.158e-06, "loss": 36.3631, "step": 40790 }, { "epoch": 0.08241858134996788, "grad_norm": 100.89142608642578, "learning_rate": 8.16e-06, "loss": 28.5806, "step": 40800 }, { "epoch": 0.0824387819826517, "grad_norm": 173.55133056640625, "learning_rate": 8.162e-06, "loss": 19.854, "step": 40810 }, { "epoch": 0.08245898261533552, "grad_norm": 306.83343505859375, "learning_rate": 8.164e-06, "loss": 28.4752, "step": 40820 }, { "epoch": 0.08247918324801932, "grad_norm": 766.4083251953125, "learning_rate": 8.166000000000001e-06, "loss": 53.2717, "step": 40830 }, { "epoch": 0.08249938388070314, "grad_norm": 337.2882995605469, "learning_rate": 8.168e-06, "loss": 32.2042, "step": 40840 }, { "epoch": 0.08251958451338696, "grad_norm": 581.1080932617188, "learning_rate": 8.17e-06, "loss": 26.9153, "step": 40850 }, { "epoch": 0.08253978514607077, "grad_norm": 226.75962829589844, "learning_rate": 8.172e-06, "loss": 24.8457, "step": 40860 }, { "epoch": 0.08255998577875459, "grad_norm": 201.4370880126953, "learning_rate": 8.174e-06, "loss": 27.0182, "step": 40870 }, { "epoch": 0.08258018641143841, "grad_norm": 216.34823608398438, "learning_rate": 8.176000000000001e-06, "loss": 30.9278, "step": 40880 }, { "epoch": 0.08260038704412222, "grad_norm": 468.9625549316406, "learning_rate": 8.178e-06, "loss": 27.1487, "step": 40890 }, { "epoch": 0.08262058767680604, "grad_norm": 585.5101928710938, "learning_rate": 8.18e-06, "loss": 19.1534, "step": 40900 }, { "epoch": 0.08264078830948986, "grad_norm": 441.31878662109375, "learning_rate": 8.182e-06, "loss": 23.8832, "step": 40910 }, { "epoch": 0.08266098894217366, "grad_norm": 77.79655456542969, "learning_rate": 8.184000000000002e-06, "loss": 39.9275, "step": 40920 }, { "epoch": 0.08268118957485748, "grad_norm": 240.9159393310547, "learning_rate": 8.186000000000001e-06, "loss": 37.3558, "step": 40930 }, { "epoch": 0.0827013902075413, "grad_norm": 393.0770263671875, "learning_rate": 8.188e-06, "loss": 20.696, "step": 40940 }, { "epoch": 0.08272159084022511, "grad_norm": 222.16619873046875, "learning_rate": 8.19e-06, "loss": 27.7187, "step": 40950 }, { "epoch": 0.08274179147290893, "grad_norm": 438.2812805175781, "learning_rate": 8.192e-06, "loss": 24.0314, "step": 40960 }, { "epoch": 0.08276199210559275, "grad_norm": 664.20751953125, "learning_rate": 8.194000000000002e-06, "loss": 50.9502, "step": 40970 }, { "epoch": 0.08278219273827657, "grad_norm": 410.01531982421875, "learning_rate": 8.196e-06, "loss": 35.3512, "step": 40980 }, { "epoch": 0.08280239337096038, "grad_norm": 175.22756958007812, "learning_rate": 8.198e-06, "loss": 18.2625, "step": 40990 }, { "epoch": 0.0828225940036442, "grad_norm": 158.560302734375, "learning_rate": 8.2e-06, "loss": 64.3385, "step": 41000 }, { "epoch": 0.08284279463632802, "grad_norm": 168.2200927734375, "learning_rate": 8.202e-06, "loss": 64.4943, "step": 41010 }, { "epoch": 0.08286299526901182, "grad_norm": 226.4715576171875, "learning_rate": 8.204000000000001e-06, "loss": 23.171, "step": 41020 }, { "epoch": 0.08288319590169564, "grad_norm": 357.5988464355469, "learning_rate": 8.206e-06, "loss": 24.8901, "step": 41030 }, { "epoch": 0.08290339653437946, "grad_norm": 372.561767578125, "learning_rate": 8.208e-06, "loss": 20.6614, "step": 41040 }, { "epoch": 0.08292359716706327, "grad_norm": 392.785400390625, "learning_rate": 8.210000000000001e-06, "loss": 29.3886, "step": 41050 }, { "epoch": 0.08294379779974709, "grad_norm": 249.48855590820312, "learning_rate": 8.212e-06, "loss": 49.0068, "step": 41060 }, { "epoch": 0.08296399843243091, "grad_norm": 210.17108154296875, "learning_rate": 8.214000000000001e-06, "loss": 38.5541, "step": 41070 }, { "epoch": 0.08298419906511471, "grad_norm": 112.82111358642578, "learning_rate": 8.216e-06, "loss": 12.8448, "step": 41080 }, { "epoch": 0.08300439969779853, "grad_norm": 76.14142608642578, "learning_rate": 8.218e-06, "loss": 23.1569, "step": 41090 }, { "epoch": 0.08302460033048235, "grad_norm": 662.5321044921875, "learning_rate": 8.220000000000001e-06, "loss": 29.0148, "step": 41100 }, { "epoch": 0.08304480096316616, "grad_norm": 190.77471923828125, "learning_rate": 8.222000000000002e-06, "loss": 30.8724, "step": 41110 }, { "epoch": 0.08306500159584998, "grad_norm": 312.532958984375, "learning_rate": 8.224000000000001e-06, "loss": 28.3397, "step": 41120 }, { "epoch": 0.0830852022285338, "grad_norm": 196.30162048339844, "learning_rate": 8.226e-06, "loss": 19.1215, "step": 41130 }, { "epoch": 0.08310540286121762, "grad_norm": 261.78900146484375, "learning_rate": 8.228e-06, "loss": 11.2737, "step": 41140 }, { "epoch": 0.08312560349390143, "grad_norm": 56.07016372680664, "learning_rate": 8.23e-06, "loss": 46.9157, "step": 41150 }, { "epoch": 0.08314580412658525, "grad_norm": 329.784423828125, "learning_rate": 8.232000000000002e-06, "loss": 39.4573, "step": 41160 }, { "epoch": 0.08316600475926907, "grad_norm": 282.2011413574219, "learning_rate": 8.234000000000001e-06, "loss": 20.6273, "step": 41170 }, { "epoch": 0.08318620539195287, "grad_norm": 415.67633056640625, "learning_rate": 8.236e-06, "loss": 49.1074, "step": 41180 }, { "epoch": 0.08320640602463669, "grad_norm": 227.2619171142578, "learning_rate": 8.238e-06, "loss": 55.4592, "step": 41190 }, { "epoch": 0.08322660665732051, "grad_norm": 236.8892059326172, "learning_rate": 8.24e-06, "loss": 37.7061, "step": 41200 }, { "epoch": 0.08324680729000432, "grad_norm": 192.26300048828125, "learning_rate": 8.242000000000002e-06, "loss": 56.7623, "step": 41210 }, { "epoch": 0.08326700792268814, "grad_norm": 189.49490356445312, "learning_rate": 8.244000000000001e-06, "loss": 51.0876, "step": 41220 }, { "epoch": 0.08328720855537196, "grad_norm": 258.2380676269531, "learning_rate": 8.246e-06, "loss": 39.6859, "step": 41230 }, { "epoch": 0.08330740918805576, "grad_norm": 524.2737426757812, "learning_rate": 8.248e-06, "loss": 29.5771, "step": 41240 }, { "epoch": 0.08332760982073958, "grad_norm": 77.49710845947266, "learning_rate": 8.25e-06, "loss": 34.6435, "step": 41250 }, { "epoch": 0.0833478104534234, "grad_norm": 265.68145751953125, "learning_rate": 8.252000000000002e-06, "loss": 18.5736, "step": 41260 }, { "epoch": 0.08336801108610721, "grad_norm": 304.15155029296875, "learning_rate": 8.254000000000001e-06, "loss": 34.0947, "step": 41270 }, { "epoch": 0.08338821171879103, "grad_norm": 224.34307861328125, "learning_rate": 8.256e-06, "loss": 32.4563, "step": 41280 }, { "epoch": 0.08340841235147485, "grad_norm": 173.5712890625, "learning_rate": 8.258000000000001e-06, "loss": 19.3969, "step": 41290 }, { "epoch": 0.08342861298415867, "grad_norm": 283.6551513671875, "learning_rate": 8.26e-06, "loss": 34.7951, "step": 41300 }, { "epoch": 0.08344881361684248, "grad_norm": 269.00372314453125, "learning_rate": 8.262000000000002e-06, "loss": 38.4963, "step": 41310 }, { "epoch": 0.0834690142495263, "grad_norm": 353.7650451660156, "learning_rate": 8.264e-06, "loss": 48.9065, "step": 41320 }, { "epoch": 0.08348921488221012, "grad_norm": 527.9609375, "learning_rate": 8.266e-06, "loss": 46.4389, "step": 41330 }, { "epoch": 0.08350941551489392, "grad_norm": 483.041259765625, "learning_rate": 8.268000000000001e-06, "loss": 43.836, "step": 41340 }, { "epoch": 0.08352961614757774, "grad_norm": 286.0506896972656, "learning_rate": 8.27e-06, "loss": 29.8812, "step": 41350 }, { "epoch": 0.08354981678026156, "grad_norm": 372.75299072265625, "learning_rate": 8.272000000000001e-06, "loss": 36.4736, "step": 41360 }, { "epoch": 0.08357001741294537, "grad_norm": 386.266357421875, "learning_rate": 8.274e-06, "loss": 26.3645, "step": 41370 }, { "epoch": 0.08359021804562919, "grad_norm": 350.2783203125, "learning_rate": 8.276e-06, "loss": 42.382, "step": 41380 }, { "epoch": 0.08361041867831301, "grad_norm": 0.0, "learning_rate": 8.278000000000001e-06, "loss": 39.4846, "step": 41390 }, { "epoch": 0.08363061931099681, "grad_norm": 419.4817199707031, "learning_rate": 8.28e-06, "loss": 31.5819, "step": 41400 }, { "epoch": 0.08365081994368063, "grad_norm": 592.9572143554688, "learning_rate": 8.282000000000001e-06, "loss": 51.5094, "step": 41410 }, { "epoch": 0.08367102057636445, "grad_norm": 31.86620330810547, "learning_rate": 8.284e-06, "loss": 23.5829, "step": 41420 }, { "epoch": 0.08369122120904826, "grad_norm": 530.0918579101562, "learning_rate": 8.286e-06, "loss": 28.8136, "step": 41430 }, { "epoch": 0.08371142184173208, "grad_norm": 321.6744384765625, "learning_rate": 8.288000000000001e-06, "loss": 27.5551, "step": 41440 }, { "epoch": 0.0837316224744159, "grad_norm": 232.60191345214844, "learning_rate": 8.29e-06, "loss": 43.3869, "step": 41450 }, { "epoch": 0.08375182310709972, "grad_norm": 293.7187194824219, "learning_rate": 8.292000000000001e-06, "loss": 33.4211, "step": 41460 }, { "epoch": 0.08377202373978353, "grad_norm": 336.6203308105469, "learning_rate": 8.294e-06, "loss": 34.3253, "step": 41470 }, { "epoch": 0.08379222437246735, "grad_norm": 499.2196044921875, "learning_rate": 8.296000000000002e-06, "loss": 59.8273, "step": 41480 }, { "epoch": 0.08381242500515117, "grad_norm": 300.2536926269531, "learning_rate": 8.298000000000001e-06, "loss": 17.5322, "step": 41490 }, { "epoch": 0.08383262563783497, "grad_norm": 668.7669067382812, "learning_rate": 8.3e-06, "loss": 19.9508, "step": 41500 }, { "epoch": 0.0838528262705188, "grad_norm": 33.423954010009766, "learning_rate": 8.302000000000001e-06, "loss": 31.59, "step": 41510 }, { "epoch": 0.08387302690320261, "grad_norm": 517.7073364257812, "learning_rate": 8.304e-06, "loss": 35.2468, "step": 41520 }, { "epoch": 0.08389322753588642, "grad_norm": 159.40745544433594, "learning_rate": 8.306000000000001e-06, "loss": 14.8161, "step": 41530 }, { "epoch": 0.08391342816857024, "grad_norm": 274.1006774902344, "learning_rate": 8.308e-06, "loss": 38.3693, "step": 41540 }, { "epoch": 0.08393362880125406, "grad_norm": 4513.33251953125, "learning_rate": 8.31e-06, "loss": 45.6495, "step": 41550 }, { "epoch": 0.08395382943393787, "grad_norm": 165.20193481445312, "learning_rate": 8.312000000000001e-06, "loss": 15.1101, "step": 41560 }, { "epoch": 0.08397403006662169, "grad_norm": 644.0301513671875, "learning_rate": 8.314e-06, "loss": 32.3317, "step": 41570 }, { "epoch": 0.0839942306993055, "grad_norm": 375.2214050292969, "learning_rate": 8.316000000000001e-06, "loss": 28.8861, "step": 41580 }, { "epoch": 0.08401443133198931, "grad_norm": 215.7982940673828, "learning_rate": 8.318e-06, "loss": 30.0036, "step": 41590 }, { "epoch": 0.08403463196467313, "grad_norm": 199.40289306640625, "learning_rate": 8.32e-06, "loss": 52.6695, "step": 41600 }, { "epoch": 0.08405483259735695, "grad_norm": 264.20306396484375, "learning_rate": 8.322000000000001e-06, "loss": 41.5143, "step": 41610 }, { "epoch": 0.08407503323004077, "grad_norm": 479.6063232421875, "learning_rate": 8.324e-06, "loss": 25.9131, "step": 41620 }, { "epoch": 0.08409523386272458, "grad_norm": 259.01995849609375, "learning_rate": 8.326000000000001e-06, "loss": 19.0429, "step": 41630 }, { "epoch": 0.0841154344954084, "grad_norm": 268.679931640625, "learning_rate": 8.328e-06, "loss": 35.3796, "step": 41640 }, { "epoch": 0.08413563512809222, "grad_norm": 245.01026916503906, "learning_rate": 8.33e-06, "loss": 35.9912, "step": 41650 }, { "epoch": 0.08415583576077602, "grad_norm": 175.18490600585938, "learning_rate": 8.332000000000001e-06, "loss": 25.7014, "step": 41660 }, { "epoch": 0.08417603639345984, "grad_norm": 110.11150360107422, "learning_rate": 8.334e-06, "loss": 15.9875, "step": 41670 }, { "epoch": 0.08419623702614366, "grad_norm": 243.1925506591797, "learning_rate": 8.336000000000001e-06, "loss": 54.028, "step": 41680 }, { "epoch": 0.08421643765882747, "grad_norm": 511.2884216308594, "learning_rate": 8.338e-06, "loss": 22.2955, "step": 41690 }, { "epoch": 0.08423663829151129, "grad_norm": 188.66065979003906, "learning_rate": 8.34e-06, "loss": 17.948, "step": 41700 }, { "epoch": 0.08425683892419511, "grad_norm": 242.79151916503906, "learning_rate": 8.342e-06, "loss": 30.1877, "step": 41710 }, { "epoch": 0.08427703955687892, "grad_norm": 295.4762268066406, "learning_rate": 8.344000000000002e-06, "loss": 20.6829, "step": 41720 }, { "epoch": 0.08429724018956274, "grad_norm": 52.62208557128906, "learning_rate": 8.346000000000001e-06, "loss": 20.0833, "step": 41730 }, { "epoch": 0.08431744082224656, "grad_norm": 301.21337890625, "learning_rate": 8.348e-06, "loss": 26.0083, "step": 41740 }, { "epoch": 0.08433764145493036, "grad_norm": 2658.359375, "learning_rate": 8.35e-06, "loss": 49.5686, "step": 41750 }, { "epoch": 0.08435784208761418, "grad_norm": 375.2143249511719, "learning_rate": 8.352e-06, "loss": 33.8862, "step": 41760 }, { "epoch": 0.084378042720298, "grad_norm": 221.1530303955078, "learning_rate": 8.354000000000002e-06, "loss": 35.8105, "step": 41770 }, { "epoch": 0.08439824335298182, "grad_norm": 317.4464111328125, "learning_rate": 8.356000000000001e-06, "loss": 21.6561, "step": 41780 }, { "epoch": 0.08441844398566563, "grad_norm": 456.3763427734375, "learning_rate": 8.358e-06, "loss": 31.3938, "step": 41790 }, { "epoch": 0.08443864461834945, "grad_norm": 952.3814697265625, "learning_rate": 8.36e-06, "loss": 34.7157, "step": 41800 }, { "epoch": 0.08445884525103327, "grad_norm": 133.3919677734375, "learning_rate": 8.362e-06, "loss": 24.6247, "step": 41810 }, { "epoch": 0.08447904588371707, "grad_norm": 305.6374206542969, "learning_rate": 8.364000000000002e-06, "loss": 52.421, "step": 41820 }, { "epoch": 0.0844992465164009, "grad_norm": 235.7846221923828, "learning_rate": 8.366000000000001e-06, "loss": 33.5586, "step": 41830 }, { "epoch": 0.08451944714908471, "grad_norm": 178.06602478027344, "learning_rate": 8.368e-06, "loss": 22.454, "step": 41840 }, { "epoch": 0.08453964778176852, "grad_norm": 247.74459838867188, "learning_rate": 8.370000000000001e-06, "loss": 29.7215, "step": 41850 }, { "epoch": 0.08455984841445234, "grad_norm": 314.3917236328125, "learning_rate": 8.372e-06, "loss": 30.1896, "step": 41860 }, { "epoch": 0.08458004904713616, "grad_norm": 394.4649963378906, "learning_rate": 8.374000000000001e-06, "loss": 37.3609, "step": 41870 }, { "epoch": 0.08460024967981997, "grad_norm": 224.7773895263672, "learning_rate": 8.376e-06, "loss": 30.5429, "step": 41880 }, { "epoch": 0.08462045031250379, "grad_norm": 191.2155303955078, "learning_rate": 8.378e-06, "loss": 38.9542, "step": 41890 }, { "epoch": 0.0846406509451876, "grad_norm": 378.46441650390625, "learning_rate": 8.380000000000001e-06, "loss": 29.1265, "step": 41900 }, { "epoch": 0.08466085157787141, "grad_norm": 350.72320556640625, "learning_rate": 8.382e-06, "loss": 25.5901, "step": 41910 }, { "epoch": 0.08468105221055523, "grad_norm": 231.08656311035156, "learning_rate": 8.384000000000001e-06, "loss": 22.1115, "step": 41920 }, { "epoch": 0.08470125284323905, "grad_norm": 307.6070861816406, "learning_rate": 8.386e-06, "loss": 38.3375, "step": 41930 }, { "epoch": 0.08472145347592287, "grad_norm": 282.542724609375, "learning_rate": 8.388e-06, "loss": 28.1582, "step": 41940 }, { "epoch": 0.08474165410860668, "grad_norm": 167.2634735107422, "learning_rate": 8.390000000000001e-06, "loss": 15.3848, "step": 41950 }, { "epoch": 0.0847618547412905, "grad_norm": 259.5269775390625, "learning_rate": 8.392e-06, "loss": 27.6436, "step": 41960 }, { "epoch": 0.08478205537397432, "grad_norm": 419.9631652832031, "learning_rate": 8.394000000000001e-06, "loss": 34.1695, "step": 41970 }, { "epoch": 0.08480225600665812, "grad_norm": 93.49170684814453, "learning_rate": 8.396e-06, "loss": 34.9861, "step": 41980 }, { "epoch": 0.08482245663934194, "grad_norm": 319.0379638671875, "learning_rate": 8.398e-06, "loss": 25.2961, "step": 41990 }, { "epoch": 0.08484265727202576, "grad_norm": 178.8649139404297, "learning_rate": 8.400000000000001e-06, "loss": 23.2323, "step": 42000 }, { "epoch": 0.08486285790470957, "grad_norm": 420.569580078125, "learning_rate": 8.402e-06, "loss": 29.0994, "step": 42010 }, { "epoch": 0.08488305853739339, "grad_norm": 133.65672302246094, "learning_rate": 8.404000000000001e-06, "loss": 75.1066, "step": 42020 }, { "epoch": 0.08490325917007721, "grad_norm": 427.2525939941406, "learning_rate": 8.406e-06, "loss": 33.7187, "step": 42030 }, { "epoch": 0.08492345980276102, "grad_norm": 509.57177734375, "learning_rate": 8.408e-06, "loss": 56.1802, "step": 42040 }, { "epoch": 0.08494366043544484, "grad_norm": 333.47821044921875, "learning_rate": 8.41e-06, "loss": 28.756, "step": 42050 }, { "epoch": 0.08496386106812866, "grad_norm": 157.85980224609375, "learning_rate": 8.412e-06, "loss": 22.9473, "step": 42060 }, { "epoch": 0.08498406170081246, "grad_norm": 324.76995849609375, "learning_rate": 8.414000000000001e-06, "loss": 22.5294, "step": 42070 }, { "epoch": 0.08500426233349628, "grad_norm": 298.1239318847656, "learning_rate": 8.416e-06, "loss": 22.5299, "step": 42080 }, { "epoch": 0.0850244629661801, "grad_norm": 276.1764831542969, "learning_rate": 8.418000000000001e-06, "loss": 23.2805, "step": 42090 }, { "epoch": 0.08504466359886392, "grad_norm": 104.5836410522461, "learning_rate": 8.42e-06, "loss": 35.1485, "step": 42100 }, { "epoch": 0.08506486423154773, "grad_norm": 150.0564422607422, "learning_rate": 8.422e-06, "loss": 28.03, "step": 42110 }, { "epoch": 0.08508506486423155, "grad_norm": 191.0790557861328, "learning_rate": 8.424000000000001e-06, "loss": 14.4712, "step": 42120 }, { "epoch": 0.08510526549691537, "grad_norm": 148.47979736328125, "learning_rate": 8.426e-06, "loss": 48.8822, "step": 42130 }, { "epoch": 0.08512546612959918, "grad_norm": 85.35737609863281, "learning_rate": 8.428000000000001e-06, "loss": 36.4277, "step": 42140 }, { "epoch": 0.085145666762283, "grad_norm": 424.6553039550781, "learning_rate": 8.43e-06, "loss": 25.3304, "step": 42150 }, { "epoch": 0.08516586739496682, "grad_norm": 212.98960876464844, "learning_rate": 8.432e-06, "loss": 42.1157, "step": 42160 }, { "epoch": 0.08518606802765062, "grad_norm": 157.29359436035156, "learning_rate": 8.434000000000001e-06, "loss": 34.9971, "step": 42170 }, { "epoch": 0.08520626866033444, "grad_norm": 558.0282592773438, "learning_rate": 8.436e-06, "loss": 31.2307, "step": 42180 }, { "epoch": 0.08522646929301826, "grad_norm": 180.7625274658203, "learning_rate": 8.438000000000001e-06, "loss": 34.8854, "step": 42190 }, { "epoch": 0.08524666992570207, "grad_norm": 52.43059158325195, "learning_rate": 8.44e-06, "loss": 23.0003, "step": 42200 }, { "epoch": 0.08526687055838589, "grad_norm": 481.37432861328125, "learning_rate": 8.442e-06, "loss": 36.8225, "step": 42210 }, { "epoch": 0.08528707119106971, "grad_norm": 398.3457946777344, "learning_rate": 8.444e-06, "loss": 32.4871, "step": 42220 }, { "epoch": 0.08530727182375351, "grad_norm": 872.3755493164062, "learning_rate": 8.446e-06, "loss": 44.333, "step": 42230 }, { "epoch": 0.08532747245643733, "grad_norm": 209.79222106933594, "learning_rate": 8.448000000000001e-06, "loss": 21.6167, "step": 42240 }, { "epoch": 0.08534767308912115, "grad_norm": 324.35980224609375, "learning_rate": 8.45e-06, "loss": 46.3984, "step": 42250 }, { "epoch": 0.08536787372180497, "grad_norm": 164.54129028320312, "learning_rate": 8.452e-06, "loss": 19.7064, "step": 42260 }, { "epoch": 0.08538807435448878, "grad_norm": 154.22244262695312, "learning_rate": 8.454e-06, "loss": 24.3104, "step": 42270 }, { "epoch": 0.0854082749871726, "grad_norm": 351.8018798828125, "learning_rate": 8.456000000000002e-06, "loss": 39.6021, "step": 42280 }, { "epoch": 0.08542847561985642, "grad_norm": 77.59127044677734, "learning_rate": 8.458000000000001e-06, "loss": 29.2288, "step": 42290 }, { "epoch": 0.08544867625254023, "grad_norm": 294.09991455078125, "learning_rate": 8.46e-06, "loss": 19.8423, "step": 42300 }, { "epoch": 0.08546887688522405, "grad_norm": 257.42919921875, "learning_rate": 8.462e-06, "loss": 31.9114, "step": 42310 }, { "epoch": 0.08548907751790787, "grad_norm": 171.97219848632812, "learning_rate": 8.464e-06, "loss": 19.2819, "step": 42320 }, { "epoch": 0.08550927815059167, "grad_norm": 89.53369903564453, "learning_rate": 8.466000000000002e-06, "loss": 34.8391, "step": 42330 }, { "epoch": 0.08552947878327549, "grad_norm": 235.73695373535156, "learning_rate": 8.468000000000001e-06, "loss": 21.7473, "step": 42340 }, { "epoch": 0.08554967941595931, "grad_norm": 206.36239624023438, "learning_rate": 8.47e-06, "loss": 34.7355, "step": 42350 }, { "epoch": 0.08556988004864312, "grad_norm": 257.4617004394531, "learning_rate": 8.472e-06, "loss": 16.6864, "step": 42360 }, { "epoch": 0.08559008068132694, "grad_norm": 345.4248962402344, "learning_rate": 8.474e-06, "loss": 39.4804, "step": 42370 }, { "epoch": 0.08561028131401076, "grad_norm": 308.76263427734375, "learning_rate": 8.476000000000002e-06, "loss": 40.1388, "step": 42380 }, { "epoch": 0.08563048194669456, "grad_norm": 170.83843994140625, "learning_rate": 8.478e-06, "loss": 40.357, "step": 42390 }, { "epoch": 0.08565068257937838, "grad_norm": 631.047119140625, "learning_rate": 8.48e-06, "loss": 25.3222, "step": 42400 }, { "epoch": 0.0856708832120622, "grad_norm": 426.3690185546875, "learning_rate": 8.482e-06, "loss": 52.927, "step": 42410 }, { "epoch": 0.08569108384474602, "grad_norm": 30.795799255371094, "learning_rate": 8.484e-06, "loss": 25.8796, "step": 42420 }, { "epoch": 0.08571128447742983, "grad_norm": 430.88421630859375, "learning_rate": 8.486000000000001e-06, "loss": 35.4998, "step": 42430 }, { "epoch": 0.08573148511011365, "grad_norm": 1433.7452392578125, "learning_rate": 8.488e-06, "loss": 52.4891, "step": 42440 }, { "epoch": 0.08575168574279747, "grad_norm": 254.577880859375, "learning_rate": 8.49e-06, "loss": 33.805, "step": 42450 }, { "epoch": 0.08577188637548128, "grad_norm": 459.60638427734375, "learning_rate": 8.492000000000001e-06, "loss": 34.5536, "step": 42460 }, { "epoch": 0.0857920870081651, "grad_norm": 274.1844482421875, "learning_rate": 8.494e-06, "loss": 18.0228, "step": 42470 }, { "epoch": 0.08581228764084892, "grad_norm": 300.3509826660156, "learning_rate": 8.496000000000001e-06, "loss": 34.386, "step": 42480 }, { "epoch": 0.08583248827353272, "grad_norm": 44.45258331298828, "learning_rate": 8.498e-06, "loss": 25.8344, "step": 42490 }, { "epoch": 0.08585268890621654, "grad_norm": 240.37379455566406, "learning_rate": 8.5e-06, "loss": 26.4031, "step": 42500 }, { "epoch": 0.08587288953890036, "grad_norm": 14.943487167358398, "learning_rate": 8.502000000000001e-06, "loss": 29.8204, "step": 42510 }, { "epoch": 0.08589309017158417, "grad_norm": 293.4966125488281, "learning_rate": 8.504000000000002e-06, "loss": 20.5903, "step": 42520 }, { "epoch": 0.08591329080426799, "grad_norm": 285.6411437988281, "learning_rate": 8.506000000000001e-06, "loss": 25.664, "step": 42530 }, { "epoch": 0.08593349143695181, "grad_norm": 462.6854553222656, "learning_rate": 8.508e-06, "loss": 30.487, "step": 42540 }, { "epoch": 0.08595369206963561, "grad_norm": 305.1309814453125, "learning_rate": 8.51e-06, "loss": 47.7367, "step": 42550 }, { "epoch": 0.08597389270231943, "grad_norm": 572.1591186523438, "learning_rate": 8.512e-06, "loss": 26.6352, "step": 42560 }, { "epoch": 0.08599409333500325, "grad_norm": 109.34073638916016, "learning_rate": 8.514000000000002e-06, "loss": 51.7328, "step": 42570 }, { "epoch": 0.08601429396768706, "grad_norm": 167.21900939941406, "learning_rate": 8.516000000000001e-06, "loss": 36.8221, "step": 42580 }, { "epoch": 0.08603449460037088, "grad_norm": 0.0, "learning_rate": 8.518e-06, "loss": 17.7282, "step": 42590 }, { "epoch": 0.0860546952330547, "grad_norm": 214.46209716796875, "learning_rate": 8.52e-06, "loss": 23.4998, "step": 42600 }, { "epoch": 0.08607489586573852, "grad_norm": 121.43507385253906, "learning_rate": 8.522e-06, "loss": 24.3943, "step": 42610 }, { "epoch": 0.08609509649842233, "grad_norm": 541.6785278320312, "learning_rate": 8.524000000000002e-06, "loss": 35.5568, "step": 42620 }, { "epoch": 0.08611529713110615, "grad_norm": 488.5171203613281, "learning_rate": 8.526000000000001e-06, "loss": 26.4984, "step": 42630 }, { "epoch": 0.08613549776378997, "grad_norm": 275.2954406738281, "learning_rate": 8.528e-06, "loss": 29.7917, "step": 42640 }, { "epoch": 0.08615569839647377, "grad_norm": 138.8548126220703, "learning_rate": 8.530000000000001e-06, "loss": 24.5588, "step": 42650 }, { "epoch": 0.0861758990291576, "grad_norm": 305.92816162109375, "learning_rate": 8.532e-06, "loss": 25.8591, "step": 42660 }, { "epoch": 0.08619609966184141, "grad_norm": 233.24847412109375, "learning_rate": 8.534000000000002e-06, "loss": 30.9056, "step": 42670 }, { "epoch": 0.08621630029452522, "grad_norm": 342.1689453125, "learning_rate": 8.536000000000001e-06, "loss": 41.2376, "step": 42680 }, { "epoch": 0.08623650092720904, "grad_norm": 206.1613006591797, "learning_rate": 8.538e-06, "loss": 25.8778, "step": 42690 }, { "epoch": 0.08625670155989286, "grad_norm": 334.8594055175781, "learning_rate": 8.540000000000001e-06, "loss": 21.6407, "step": 42700 }, { "epoch": 0.08627690219257667, "grad_norm": 112.96905517578125, "learning_rate": 8.542e-06, "loss": 21.9702, "step": 42710 }, { "epoch": 0.08629710282526049, "grad_norm": 362.480224609375, "learning_rate": 8.544000000000002e-06, "loss": 19.9175, "step": 42720 }, { "epoch": 0.0863173034579443, "grad_norm": 268.5098876953125, "learning_rate": 8.546000000000001e-06, "loss": 20.5704, "step": 42730 }, { "epoch": 0.08633750409062811, "grad_norm": 362.4500427246094, "learning_rate": 8.548e-06, "loss": 13.8568, "step": 42740 }, { "epoch": 0.08635770472331193, "grad_norm": 177.8553924560547, "learning_rate": 8.550000000000001e-06, "loss": 20.5585, "step": 42750 }, { "epoch": 0.08637790535599575, "grad_norm": 210.55909729003906, "learning_rate": 8.552e-06, "loss": 30.1487, "step": 42760 }, { "epoch": 0.08639810598867957, "grad_norm": 388.76531982421875, "learning_rate": 8.554000000000001e-06, "loss": 23.6628, "step": 42770 }, { "epoch": 0.08641830662136338, "grad_norm": 357.1836853027344, "learning_rate": 8.556e-06, "loss": 35.3295, "step": 42780 }, { "epoch": 0.0864385072540472, "grad_norm": 172.72671508789062, "learning_rate": 8.558e-06, "loss": 35.478, "step": 42790 }, { "epoch": 0.08645870788673102, "grad_norm": 688.3428344726562, "learning_rate": 8.560000000000001e-06, "loss": 52.281, "step": 42800 }, { "epoch": 0.08647890851941482, "grad_norm": 521.0298461914062, "learning_rate": 8.562e-06, "loss": 34.707, "step": 42810 }, { "epoch": 0.08649910915209864, "grad_norm": 227.07034301757812, "learning_rate": 8.564000000000001e-06, "loss": 29.1736, "step": 42820 }, { "epoch": 0.08651930978478246, "grad_norm": 239.42547607421875, "learning_rate": 8.566e-06, "loss": 37.0392, "step": 42830 }, { "epoch": 0.08653951041746627, "grad_norm": 143.53134155273438, "learning_rate": 8.568e-06, "loss": 23.6397, "step": 42840 }, { "epoch": 0.08655971105015009, "grad_norm": 379.3684997558594, "learning_rate": 8.570000000000001e-06, "loss": 34.1899, "step": 42850 }, { "epoch": 0.08657991168283391, "grad_norm": 359.375, "learning_rate": 8.572e-06, "loss": 19.6014, "step": 42860 }, { "epoch": 0.08660011231551772, "grad_norm": 305.4334411621094, "learning_rate": 8.574000000000001e-06, "loss": 37.524, "step": 42870 }, { "epoch": 0.08662031294820154, "grad_norm": 526.5411987304688, "learning_rate": 8.576e-06, "loss": 44.0992, "step": 42880 }, { "epoch": 0.08664051358088536, "grad_norm": 269.4141540527344, "learning_rate": 8.578000000000002e-06, "loss": 20.1755, "step": 42890 }, { "epoch": 0.08666071421356916, "grad_norm": 268.38604736328125, "learning_rate": 8.580000000000001e-06, "loss": 31.3924, "step": 42900 }, { "epoch": 0.08668091484625298, "grad_norm": 85.12313079833984, "learning_rate": 8.582e-06, "loss": 22.3348, "step": 42910 }, { "epoch": 0.0867011154789368, "grad_norm": 719.7230224609375, "learning_rate": 8.584000000000001e-06, "loss": 28.9957, "step": 42920 }, { "epoch": 0.08672131611162062, "grad_norm": 326.01397705078125, "learning_rate": 8.586e-06, "loss": 19.4053, "step": 42930 }, { "epoch": 0.08674151674430443, "grad_norm": 816.2281494140625, "learning_rate": 8.588000000000001e-06, "loss": 45.1098, "step": 42940 }, { "epoch": 0.08676171737698825, "grad_norm": 389.2813720703125, "learning_rate": 8.59e-06, "loss": 21.4716, "step": 42950 }, { "epoch": 0.08678191800967207, "grad_norm": 117.62506866455078, "learning_rate": 8.592e-06, "loss": 14.4609, "step": 42960 }, { "epoch": 0.08680211864235587, "grad_norm": 185.10391235351562, "learning_rate": 8.594000000000001e-06, "loss": 32.0074, "step": 42970 }, { "epoch": 0.0868223192750397, "grad_norm": 90.70287322998047, "learning_rate": 8.596e-06, "loss": 59.9193, "step": 42980 }, { "epoch": 0.08684251990772351, "grad_norm": 319.0557556152344, "learning_rate": 8.598000000000001e-06, "loss": 27.5591, "step": 42990 }, { "epoch": 0.08686272054040732, "grad_norm": 1605.0791015625, "learning_rate": 8.6e-06, "loss": 24.3427, "step": 43000 }, { "epoch": 0.08688292117309114, "grad_norm": 160.68763732910156, "learning_rate": 8.602e-06, "loss": 18.491, "step": 43010 }, { "epoch": 0.08690312180577496, "grad_norm": 190.15452575683594, "learning_rate": 8.604000000000001e-06, "loss": 28.2721, "step": 43020 }, { "epoch": 0.08692332243845877, "grad_norm": 120.46871185302734, "learning_rate": 8.606e-06, "loss": 29.6521, "step": 43030 }, { "epoch": 0.08694352307114259, "grad_norm": 231.99745178222656, "learning_rate": 8.608000000000001e-06, "loss": 25.8387, "step": 43040 }, { "epoch": 0.0869637237038264, "grad_norm": 241.53402709960938, "learning_rate": 8.61e-06, "loss": 29.9424, "step": 43050 }, { "epoch": 0.08698392433651021, "grad_norm": 280.4883728027344, "learning_rate": 8.612e-06, "loss": 35.0889, "step": 43060 }, { "epoch": 0.08700412496919403, "grad_norm": 257.79052734375, "learning_rate": 8.614000000000001e-06, "loss": 18.1445, "step": 43070 }, { "epoch": 0.08702432560187785, "grad_norm": 542.74609375, "learning_rate": 8.616000000000002e-06, "loss": 23.3069, "step": 43080 }, { "epoch": 0.08704452623456167, "grad_norm": 80.36368560791016, "learning_rate": 8.618000000000001e-06, "loss": 33.4549, "step": 43090 }, { "epoch": 0.08706472686724548, "grad_norm": 255.57667541503906, "learning_rate": 8.62e-06, "loss": 31.8864, "step": 43100 }, { "epoch": 0.0870849274999293, "grad_norm": 81.74248504638672, "learning_rate": 8.622e-06, "loss": 42.916, "step": 43110 }, { "epoch": 0.08710512813261312, "grad_norm": 420.43988037109375, "learning_rate": 8.624e-06, "loss": 25.6018, "step": 43120 }, { "epoch": 0.08712532876529692, "grad_norm": 278.19940185546875, "learning_rate": 8.626000000000002e-06, "loss": 20.4954, "step": 43130 }, { "epoch": 0.08714552939798074, "grad_norm": 345.0391845703125, "learning_rate": 8.628000000000001e-06, "loss": 11.9522, "step": 43140 }, { "epoch": 0.08716573003066456, "grad_norm": 415.4322814941406, "learning_rate": 8.63e-06, "loss": 37.3526, "step": 43150 }, { "epoch": 0.08718593066334837, "grad_norm": 16.312620162963867, "learning_rate": 8.632e-06, "loss": 33.451, "step": 43160 }, { "epoch": 0.08720613129603219, "grad_norm": 432.6830749511719, "learning_rate": 8.634e-06, "loss": 22.2926, "step": 43170 }, { "epoch": 0.08722633192871601, "grad_norm": 188.8334503173828, "learning_rate": 8.636000000000002e-06, "loss": 39.5638, "step": 43180 }, { "epoch": 0.08724653256139982, "grad_norm": 258.3606872558594, "learning_rate": 8.638000000000001e-06, "loss": 31.5929, "step": 43190 }, { "epoch": 0.08726673319408364, "grad_norm": 775.7046508789062, "learning_rate": 8.64e-06, "loss": 45.0896, "step": 43200 }, { "epoch": 0.08728693382676746, "grad_norm": 457.9463195800781, "learning_rate": 8.642e-06, "loss": 29.4912, "step": 43210 }, { "epoch": 0.08730713445945126, "grad_norm": 131.72274780273438, "learning_rate": 8.644e-06, "loss": 20.4987, "step": 43220 }, { "epoch": 0.08732733509213508, "grad_norm": 114.04011535644531, "learning_rate": 8.646000000000002e-06, "loss": 28.0899, "step": 43230 }, { "epoch": 0.0873475357248189, "grad_norm": 474.2365417480469, "learning_rate": 8.648000000000001e-06, "loss": 38.2425, "step": 43240 }, { "epoch": 0.08736773635750272, "grad_norm": 352.0413513183594, "learning_rate": 8.65e-06, "loss": 21.0791, "step": 43250 }, { "epoch": 0.08738793699018653, "grad_norm": 112.28624725341797, "learning_rate": 8.652000000000001e-06, "loss": 72.1697, "step": 43260 }, { "epoch": 0.08740813762287035, "grad_norm": 147.17440795898438, "learning_rate": 8.654e-06, "loss": 22.0941, "step": 43270 }, { "epoch": 0.08742833825555417, "grad_norm": 281.201904296875, "learning_rate": 8.656000000000001e-06, "loss": 23.792, "step": 43280 }, { "epoch": 0.08744853888823798, "grad_norm": 450.3614807128906, "learning_rate": 8.658e-06, "loss": 32.2011, "step": 43290 }, { "epoch": 0.0874687395209218, "grad_norm": 728.5098876953125, "learning_rate": 8.66e-06, "loss": 31.5085, "step": 43300 }, { "epoch": 0.08748894015360562, "grad_norm": 396.8482360839844, "learning_rate": 8.662000000000001e-06, "loss": 24.7274, "step": 43310 }, { "epoch": 0.08750914078628942, "grad_norm": 294.6155700683594, "learning_rate": 8.664e-06, "loss": 22.6699, "step": 43320 }, { "epoch": 0.08752934141897324, "grad_norm": 189.0536651611328, "learning_rate": 8.666000000000001e-06, "loss": 28.3745, "step": 43330 }, { "epoch": 0.08754954205165706, "grad_norm": 185.00338745117188, "learning_rate": 8.668e-06, "loss": 37.1693, "step": 43340 }, { "epoch": 0.08756974268434087, "grad_norm": 165.09756469726562, "learning_rate": 8.67e-06, "loss": 23.2971, "step": 43350 }, { "epoch": 0.08758994331702469, "grad_norm": 126.29808044433594, "learning_rate": 8.672000000000001e-06, "loss": 21.2976, "step": 43360 }, { "epoch": 0.08761014394970851, "grad_norm": 344.4629211425781, "learning_rate": 8.674e-06, "loss": 27.5838, "step": 43370 }, { "epoch": 0.08763034458239231, "grad_norm": 146.5240936279297, "learning_rate": 8.676000000000001e-06, "loss": 32.791, "step": 43380 }, { "epoch": 0.08765054521507613, "grad_norm": 239.6765594482422, "learning_rate": 8.678e-06, "loss": 26.68, "step": 43390 }, { "epoch": 0.08767074584775995, "grad_norm": 91.46793365478516, "learning_rate": 8.68e-06, "loss": 37.275, "step": 43400 }, { "epoch": 0.08769094648044377, "grad_norm": 249.57054138183594, "learning_rate": 8.682000000000001e-06, "loss": 26.041, "step": 43410 }, { "epoch": 0.08771114711312758, "grad_norm": 412.2458190917969, "learning_rate": 8.684e-06, "loss": 41.8173, "step": 43420 }, { "epoch": 0.0877313477458114, "grad_norm": 332.7708435058594, "learning_rate": 8.686000000000001e-06, "loss": 24.3211, "step": 43430 }, { "epoch": 0.08775154837849522, "grad_norm": 557.77587890625, "learning_rate": 8.688e-06, "loss": 38.0592, "step": 43440 }, { "epoch": 0.08777174901117903, "grad_norm": 301.3050537109375, "learning_rate": 8.690000000000002e-06, "loss": 33.7214, "step": 43450 }, { "epoch": 0.08779194964386285, "grad_norm": 118.56238555908203, "learning_rate": 8.692e-06, "loss": 23.6909, "step": 43460 }, { "epoch": 0.08781215027654667, "grad_norm": 393.8979797363281, "learning_rate": 8.694e-06, "loss": 14.0555, "step": 43470 }, { "epoch": 0.08783235090923047, "grad_norm": 447.10772705078125, "learning_rate": 8.696000000000001e-06, "loss": 21.3402, "step": 43480 }, { "epoch": 0.08785255154191429, "grad_norm": 401.24627685546875, "learning_rate": 8.698e-06, "loss": 26.676, "step": 43490 }, { "epoch": 0.08787275217459811, "grad_norm": 259.7637023925781, "learning_rate": 8.700000000000001e-06, "loss": 20.5141, "step": 43500 }, { "epoch": 0.08789295280728192, "grad_norm": 273.2626647949219, "learning_rate": 8.702e-06, "loss": 29.7172, "step": 43510 }, { "epoch": 0.08791315343996574, "grad_norm": 301.2015075683594, "learning_rate": 8.704e-06, "loss": 25.2262, "step": 43520 }, { "epoch": 0.08793335407264956, "grad_norm": 239.5045166015625, "learning_rate": 8.706000000000001e-06, "loss": 31.8363, "step": 43530 }, { "epoch": 0.08795355470533336, "grad_norm": 245.7670440673828, "learning_rate": 8.708e-06, "loss": 23.8348, "step": 43540 }, { "epoch": 0.08797375533801718, "grad_norm": 216.952880859375, "learning_rate": 8.710000000000001e-06, "loss": 48.5257, "step": 43550 }, { "epoch": 0.087993955970701, "grad_norm": 103.68922424316406, "learning_rate": 8.712e-06, "loss": 21.9983, "step": 43560 }, { "epoch": 0.08801415660338482, "grad_norm": 380.9727478027344, "learning_rate": 8.714e-06, "loss": 22.2832, "step": 43570 }, { "epoch": 0.08803435723606863, "grad_norm": 82.02774810791016, "learning_rate": 8.716000000000001e-06, "loss": 19.9277, "step": 43580 }, { "epoch": 0.08805455786875245, "grad_norm": 604.9141235351562, "learning_rate": 8.718e-06, "loss": 36.7878, "step": 43590 }, { "epoch": 0.08807475850143627, "grad_norm": 251.89004516601562, "learning_rate": 8.720000000000001e-06, "loss": 39.7867, "step": 43600 }, { "epoch": 0.08809495913412008, "grad_norm": 687.6893920898438, "learning_rate": 8.722e-06, "loss": 27.467, "step": 43610 }, { "epoch": 0.0881151597668039, "grad_norm": 266.224853515625, "learning_rate": 8.724e-06, "loss": 38.6423, "step": 43620 }, { "epoch": 0.08813536039948772, "grad_norm": 584.7669067382812, "learning_rate": 8.726e-06, "loss": 21.8989, "step": 43630 }, { "epoch": 0.08815556103217152, "grad_norm": 130.69248962402344, "learning_rate": 8.728e-06, "loss": 20.8488, "step": 43640 }, { "epoch": 0.08817576166485534, "grad_norm": 828.7688598632812, "learning_rate": 8.730000000000001e-06, "loss": 26.2175, "step": 43650 }, { "epoch": 0.08819596229753916, "grad_norm": 660.9241943359375, "learning_rate": 8.732e-06, "loss": 43.9075, "step": 43660 }, { "epoch": 0.08821616293022297, "grad_norm": 193.8571014404297, "learning_rate": 8.734e-06, "loss": 34.5534, "step": 43670 }, { "epoch": 0.08823636356290679, "grad_norm": 303.8436279296875, "learning_rate": 8.736e-06, "loss": 27.6308, "step": 43680 }, { "epoch": 0.08825656419559061, "grad_norm": 332.6838073730469, "learning_rate": 8.738000000000002e-06, "loss": 13.308, "step": 43690 }, { "epoch": 0.08827676482827441, "grad_norm": 97.66356658935547, "learning_rate": 8.740000000000001e-06, "loss": 17.1152, "step": 43700 }, { "epoch": 0.08829696546095823, "grad_norm": 459.4017028808594, "learning_rate": 8.742e-06, "loss": 36.8421, "step": 43710 }, { "epoch": 0.08831716609364205, "grad_norm": 668.2135620117188, "learning_rate": 8.744e-06, "loss": 27.5472, "step": 43720 }, { "epoch": 0.08833736672632587, "grad_norm": 24.42778778076172, "learning_rate": 8.746e-06, "loss": 26.5021, "step": 43730 }, { "epoch": 0.08835756735900968, "grad_norm": 140.9199981689453, "learning_rate": 8.748000000000002e-06, "loss": 31.507, "step": 43740 }, { "epoch": 0.0883777679916935, "grad_norm": 49.494415283203125, "learning_rate": 8.750000000000001e-06, "loss": 31.445, "step": 43750 }, { "epoch": 0.08839796862437732, "grad_norm": 326.2047424316406, "learning_rate": 8.752e-06, "loss": 31.5424, "step": 43760 }, { "epoch": 0.08841816925706113, "grad_norm": 144.2211456298828, "learning_rate": 8.754e-06, "loss": 14.9317, "step": 43770 }, { "epoch": 0.08843836988974495, "grad_norm": 313.7799377441406, "learning_rate": 8.756e-06, "loss": 31.2671, "step": 43780 }, { "epoch": 0.08845857052242877, "grad_norm": 238.54342651367188, "learning_rate": 8.758000000000002e-06, "loss": 23.1446, "step": 43790 }, { "epoch": 0.08847877115511257, "grad_norm": 109.89862823486328, "learning_rate": 8.76e-06, "loss": 22.8843, "step": 43800 }, { "epoch": 0.0884989717877964, "grad_norm": 436.75677490234375, "learning_rate": 8.762e-06, "loss": 49.1318, "step": 43810 }, { "epoch": 0.08851917242048021, "grad_norm": 354.0926208496094, "learning_rate": 8.764e-06, "loss": 33.1589, "step": 43820 }, { "epoch": 0.08853937305316402, "grad_norm": 228.75650024414062, "learning_rate": 8.766e-06, "loss": 17.434, "step": 43830 }, { "epoch": 0.08855957368584784, "grad_norm": 356.40936279296875, "learning_rate": 8.768000000000001e-06, "loss": 35.5096, "step": 43840 }, { "epoch": 0.08857977431853166, "grad_norm": 502.86541748046875, "learning_rate": 8.77e-06, "loss": 24.5161, "step": 43850 }, { "epoch": 0.08859997495121547, "grad_norm": 586.56689453125, "learning_rate": 8.772e-06, "loss": 32.1147, "step": 43860 }, { "epoch": 0.08862017558389929, "grad_norm": 345.24951171875, "learning_rate": 8.774000000000001e-06, "loss": 43.0371, "step": 43870 }, { "epoch": 0.0886403762165831, "grad_norm": 53.20797348022461, "learning_rate": 8.776e-06, "loss": 16.307, "step": 43880 }, { "epoch": 0.08866057684926693, "grad_norm": 426.59735107421875, "learning_rate": 8.778000000000001e-06, "loss": 18.8423, "step": 43890 }, { "epoch": 0.08868077748195073, "grad_norm": 168.8759765625, "learning_rate": 8.78e-06, "loss": 33.9336, "step": 43900 }, { "epoch": 0.08870097811463455, "grad_norm": 367.9001159667969, "learning_rate": 8.782e-06, "loss": 44.6825, "step": 43910 }, { "epoch": 0.08872117874731837, "grad_norm": 480.07666015625, "learning_rate": 8.784000000000001e-06, "loss": 34.7406, "step": 43920 }, { "epoch": 0.08874137938000218, "grad_norm": 142.4234161376953, "learning_rate": 8.786000000000002e-06, "loss": 33.244, "step": 43930 }, { "epoch": 0.088761580012686, "grad_norm": 122.31173706054688, "learning_rate": 8.788000000000001e-06, "loss": 35.1773, "step": 43940 }, { "epoch": 0.08878178064536982, "grad_norm": 261.9033203125, "learning_rate": 8.79e-06, "loss": 33.5556, "step": 43950 }, { "epoch": 0.08880198127805362, "grad_norm": 320.999267578125, "learning_rate": 8.792e-06, "loss": 19.5853, "step": 43960 }, { "epoch": 0.08882218191073744, "grad_norm": 799.8468017578125, "learning_rate": 8.794e-06, "loss": 25.7037, "step": 43970 }, { "epoch": 0.08884238254342126, "grad_norm": 623.6458129882812, "learning_rate": 8.796000000000002e-06, "loss": 35.6103, "step": 43980 }, { "epoch": 0.08886258317610507, "grad_norm": 56.706993103027344, "learning_rate": 8.798000000000001e-06, "loss": 23.6134, "step": 43990 }, { "epoch": 0.08888278380878889, "grad_norm": 152.29844665527344, "learning_rate": 8.8e-06, "loss": 35.8613, "step": 44000 }, { "epoch": 0.08890298444147271, "grad_norm": 176.3098602294922, "learning_rate": 8.802e-06, "loss": 36.9491, "step": 44010 }, { "epoch": 0.08892318507415652, "grad_norm": 121.60172271728516, "learning_rate": 8.804e-06, "loss": 22.9304, "step": 44020 }, { "epoch": 0.08894338570684034, "grad_norm": 282.4427490234375, "learning_rate": 8.806000000000002e-06, "loss": 14.3868, "step": 44030 }, { "epoch": 0.08896358633952416, "grad_norm": 375.6902770996094, "learning_rate": 8.808000000000001e-06, "loss": 16.5813, "step": 44040 }, { "epoch": 0.08898378697220798, "grad_norm": 240.91685485839844, "learning_rate": 8.81e-06, "loss": 28.5082, "step": 44050 }, { "epoch": 0.08900398760489178, "grad_norm": 340.7265625, "learning_rate": 8.812000000000001e-06, "loss": 50.1663, "step": 44060 }, { "epoch": 0.0890241882375756, "grad_norm": 317.5110778808594, "learning_rate": 8.814e-06, "loss": 25.4112, "step": 44070 }, { "epoch": 0.08904438887025942, "grad_norm": 181.19049072265625, "learning_rate": 8.816000000000002e-06, "loss": 27.7551, "step": 44080 }, { "epoch": 0.08906458950294323, "grad_norm": 278.0091247558594, "learning_rate": 8.818000000000001e-06, "loss": 18.9264, "step": 44090 }, { "epoch": 0.08908479013562705, "grad_norm": 394.2637023925781, "learning_rate": 8.82e-06, "loss": 24.4046, "step": 44100 }, { "epoch": 0.08910499076831087, "grad_norm": 292.78350830078125, "learning_rate": 8.822000000000001e-06, "loss": 33.9718, "step": 44110 }, { "epoch": 0.08912519140099467, "grad_norm": 269.312744140625, "learning_rate": 8.824e-06, "loss": 31.1621, "step": 44120 }, { "epoch": 0.0891453920336785, "grad_norm": 288.2286682128906, "learning_rate": 8.826000000000002e-06, "loss": 19.5916, "step": 44130 }, { "epoch": 0.08916559266636231, "grad_norm": 299.10894775390625, "learning_rate": 8.828000000000001e-06, "loss": 27.2105, "step": 44140 }, { "epoch": 0.08918579329904612, "grad_norm": 184.63409423828125, "learning_rate": 8.83e-06, "loss": 14.5964, "step": 44150 }, { "epoch": 0.08920599393172994, "grad_norm": 896.6502685546875, "learning_rate": 8.832000000000001e-06, "loss": 59.9696, "step": 44160 }, { "epoch": 0.08922619456441376, "grad_norm": 453.1307373046875, "learning_rate": 8.834e-06, "loss": 48.6163, "step": 44170 }, { "epoch": 0.08924639519709757, "grad_norm": 1160.077392578125, "learning_rate": 8.836000000000001e-06, "loss": 42.9799, "step": 44180 }, { "epoch": 0.08926659582978139, "grad_norm": 431.4505920410156, "learning_rate": 8.838e-06, "loss": 44.8626, "step": 44190 }, { "epoch": 0.0892867964624652, "grad_norm": 752.236572265625, "learning_rate": 8.84e-06, "loss": 25.891, "step": 44200 }, { "epoch": 0.08930699709514903, "grad_norm": 214.89564514160156, "learning_rate": 8.842000000000001e-06, "loss": 26.8191, "step": 44210 }, { "epoch": 0.08932719772783283, "grad_norm": 896.2755737304688, "learning_rate": 8.844e-06, "loss": 38.9298, "step": 44220 }, { "epoch": 0.08934739836051665, "grad_norm": 755.916259765625, "learning_rate": 8.846000000000001e-06, "loss": 29.9312, "step": 44230 }, { "epoch": 0.08936759899320047, "grad_norm": 192.81129455566406, "learning_rate": 8.848e-06, "loss": 26.8022, "step": 44240 }, { "epoch": 0.08938779962588428, "grad_norm": 210.9685516357422, "learning_rate": 8.85e-06, "loss": 34.2972, "step": 44250 }, { "epoch": 0.0894080002585681, "grad_norm": 446.32708740234375, "learning_rate": 8.852000000000001e-06, "loss": 34.9849, "step": 44260 }, { "epoch": 0.08942820089125192, "grad_norm": 34.32825469970703, "learning_rate": 8.854e-06, "loss": 24.8629, "step": 44270 }, { "epoch": 0.08944840152393572, "grad_norm": 281.3403625488281, "learning_rate": 8.856000000000001e-06, "loss": 21.3949, "step": 44280 }, { "epoch": 0.08946860215661954, "grad_norm": 612.6090698242188, "learning_rate": 8.858e-06, "loss": 28.2288, "step": 44290 }, { "epoch": 0.08948880278930336, "grad_norm": 588.2706909179688, "learning_rate": 8.860000000000002e-06, "loss": 38.0148, "step": 44300 }, { "epoch": 0.08950900342198717, "grad_norm": 28.243799209594727, "learning_rate": 8.862000000000001e-06, "loss": 15.7022, "step": 44310 }, { "epoch": 0.08952920405467099, "grad_norm": 457.6310729980469, "learning_rate": 8.864e-06, "loss": 24.5033, "step": 44320 }, { "epoch": 0.08954940468735481, "grad_norm": 339.8095397949219, "learning_rate": 8.866000000000001e-06, "loss": 33.6726, "step": 44330 }, { "epoch": 0.08956960532003862, "grad_norm": 381.6207275390625, "learning_rate": 8.868e-06, "loss": 25.8208, "step": 44340 }, { "epoch": 0.08958980595272244, "grad_norm": 605.2793579101562, "learning_rate": 8.870000000000001e-06, "loss": 26.5523, "step": 44350 }, { "epoch": 0.08961000658540626, "grad_norm": 317.77703857421875, "learning_rate": 8.872e-06, "loss": 25.0913, "step": 44360 }, { "epoch": 0.08963020721809008, "grad_norm": 110.2572250366211, "learning_rate": 8.874e-06, "loss": 22.8066, "step": 44370 }, { "epoch": 0.08965040785077388, "grad_norm": 368.8611145019531, "learning_rate": 8.876e-06, "loss": 48.6731, "step": 44380 }, { "epoch": 0.0896706084834577, "grad_norm": 333.0453186035156, "learning_rate": 8.878e-06, "loss": 39.4059, "step": 44390 }, { "epoch": 0.08969080911614152, "grad_norm": 318.3411865234375, "learning_rate": 8.880000000000001e-06, "loss": 30.9673, "step": 44400 }, { "epoch": 0.08971100974882533, "grad_norm": 221.188720703125, "learning_rate": 8.882e-06, "loss": 22.5801, "step": 44410 }, { "epoch": 0.08973121038150915, "grad_norm": 267.1985168457031, "learning_rate": 8.884e-06, "loss": 27.8721, "step": 44420 }, { "epoch": 0.08975141101419297, "grad_norm": 1057.8614501953125, "learning_rate": 8.886000000000001e-06, "loss": 52.9497, "step": 44430 }, { "epoch": 0.08977161164687678, "grad_norm": 72.54844665527344, "learning_rate": 8.888e-06, "loss": 41.9565, "step": 44440 }, { "epoch": 0.0897918122795606, "grad_norm": 253.47872924804688, "learning_rate": 8.890000000000001e-06, "loss": 25.6141, "step": 44450 }, { "epoch": 0.08981201291224442, "grad_norm": 86.93907928466797, "learning_rate": 8.892e-06, "loss": 14.3134, "step": 44460 }, { "epoch": 0.08983221354492822, "grad_norm": 411.9892578125, "learning_rate": 8.894e-06, "loss": 35.8159, "step": 44470 }, { "epoch": 0.08985241417761204, "grad_norm": 140.03704833984375, "learning_rate": 8.896000000000001e-06, "loss": 45.6774, "step": 44480 }, { "epoch": 0.08987261481029586, "grad_norm": 575.7564086914062, "learning_rate": 8.898000000000002e-06, "loss": 34.4929, "step": 44490 }, { "epoch": 0.08989281544297967, "grad_norm": 99.04944610595703, "learning_rate": 8.900000000000001e-06, "loss": 33.2199, "step": 44500 }, { "epoch": 0.08991301607566349, "grad_norm": 119.77254486083984, "learning_rate": 8.902e-06, "loss": 20.4454, "step": 44510 }, { "epoch": 0.08993321670834731, "grad_norm": 180.38006591796875, "learning_rate": 8.904e-06, "loss": 28.0068, "step": 44520 }, { "epoch": 0.08995341734103113, "grad_norm": 527.8374633789062, "learning_rate": 8.906e-06, "loss": 27.2499, "step": 44530 }, { "epoch": 0.08997361797371493, "grad_norm": 154.22067260742188, "learning_rate": 8.908000000000002e-06, "loss": 30.0185, "step": 44540 }, { "epoch": 0.08999381860639875, "grad_norm": 293.45343017578125, "learning_rate": 8.910000000000001e-06, "loss": 30.2363, "step": 44550 }, { "epoch": 0.09001401923908257, "grad_norm": 250.1461944580078, "learning_rate": 8.912e-06, "loss": 13.1879, "step": 44560 }, { "epoch": 0.09003421987176638, "grad_norm": 78.75881958007812, "learning_rate": 8.914e-06, "loss": 27.6514, "step": 44570 }, { "epoch": 0.0900544205044502, "grad_norm": 151.12237548828125, "learning_rate": 8.916e-06, "loss": 52.0958, "step": 44580 }, { "epoch": 0.09007462113713402, "grad_norm": 93.19563293457031, "learning_rate": 8.918000000000002e-06, "loss": 37.8502, "step": 44590 }, { "epoch": 0.09009482176981783, "grad_norm": 269.7446594238281, "learning_rate": 8.920000000000001e-06, "loss": 43.9331, "step": 44600 }, { "epoch": 0.09011502240250165, "grad_norm": 880.7884521484375, "learning_rate": 8.922e-06, "loss": 32.9319, "step": 44610 }, { "epoch": 0.09013522303518547, "grad_norm": 329.60833740234375, "learning_rate": 8.924e-06, "loss": 17.723, "step": 44620 }, { "epoch": 0.09015542366786927, "grad_norm": 438.1557312011719, "learning_rate": 8.926e-06, "loss": 41.5125, "step": 44630 }, { "epoch": 0.09017562430055309, "grad_norm": 285.39154052734375, "learning_rate": 8.928000000000002e-06, "loss": 43.4447, "step": 44640 }, { "epoch": 0.09019582493323691, "grad_norm": 453.1195068359375, "learning_rate": 8.930000000000001e-06, "loss": 34.8414, "step": 44650 }, { "epoch": 0.09021602556592072, "grad_norm": 80.10704803466797, "learning_rate": 8.932e-06, "loss": 36.2065, "step": 44660 }, { "epoch": 0.09023622619860454, "grad_norm": 452.99725341796875, "learning_rate": 8.934000000000001e-06, "loss": 21.6434, "step": 44670 }, { "epoch": 0.09025642683128836, "grad_norm": 515.354736328125, "learning_rate": 8.936e-06, "loss": 35.193, "step": 44680 }, { "epoch": 0.09027662746397218, "grad_norm": 444.4808044433594, "learning_rate": 8.938000000000001e-06, "loss": 28.7902, "step": 44690 }, { "epoch": 0.09029682809665598, "grad_norm": 226.79052734375, "learning_rate": 8.94e-06, "loss": 29.4399, "step": 44700 }, { "epoch": 0.0903170287293398, "grad_norm": 175.136474609375, "learning_rate": 8.942e-06, "loss": 31.7087, "step": 44710 }, { "epoch": 0.09033722936202362, "grad_norm": 212.49583435058594, "learning_rate": 8.944000000000001e-06, "loss": 20.8032, "step": 44720 }, { "epoch": 0.09035742999470743, "grad_norm": 642.6939697265625, "learning_rate": 8.946e-06, "loss": 34.045, "step": 44730 }, { "epoch": 0.09037763062739125, "grad_norm": 339.7453308105469, "learning_rate": 8.948000000000001e-06, "loss": 29.6102, "step": 44740 }, { "epoch": 0.09039783126007507, "grad_norm": 142.92942810058594, "learning_rate": 8.95e-06, "loss": 32.2397, "step": 44750 }, { "epoch": 0.09041803189275888, "grad_norm": 218.3876495361328, "learning_rate": 8.952e-06, "loss": 25.6413, "step": 44760 }, { "epoch": 0.0904382325254427, "grad_norm": 216.1447296142578, "learning_rate": 8.954000000000001e-06, "loss": 28.4335, "step": 44770 }, { "epoch": 0.09045843315812652, "grad_norm": 377.4523620605469, "learning_rate": 8.956e-06, "loss": 46.6937, "step": 44780 }, { "epoch": 0.09047863379081032, "grad_norm": 101.82817077636719, "learning_rate": 8.958000000000001e-06, "loss": 18.7022, "step": 44790 }, { "epoch": 0.09049883442349414, "grad_norm": 311.8998718261719, "learning_rate": 8.96e-06, "loss": 25.3086, "step": 44800 }, { "epoch": 0.09051903505617796, "grad_norm": 339.15887451171875, "learning_rate": 8.962e-06, "loss": 26.1719, "step": 44810 }, { "epoch": 0.09053923568886177, "grad_norm": 98.6194839477539, "learning_rate": 8.964000000000001e-06, "loss": 29.6965, "step": 44820 }, { "epoch": 0.09055943632154559, "grad_norm": 145.718017578125, "learning_rate": 8.966e-06, "loss": 35.0742, "step": 44830 }, { "epoch": 0.09057963695422941, "grad_norm": 99.06491088867188, "learning_rate": 8.968000000000001e-06, "loss": 19.8541, "step": 44840 }, { "epoch": 0.09059983758691323, "grad_norm": 447.5253601074219, "learning_rate": 8.97e-06, "loss": 29.3785, "step": 44850 }, { "epoch": 0.09062003821959703, "grad_norm": 156.33859252929688, "learning_rate": 8.972000000000002e-06, "loss": 22.8707, "step": 44860 }, { "epoch": 0.09064023885228085, "grad_norm": 23.149415969848633, "learning_rate": 8.974e-06, "loss": 31.5113, "step": 44870 }, { "epoch": 0.09066043948496467, "grad_norm": 0.0, "learning_rate": 8.976e-06, "loss": 24.2602, "step": 44880 }, { "epoch": 0.09068064011764848, "grad_norm": 897.9341430664062, "learning_rate": 8.978000000000001e-06, "loss": 40.0263, "step": 44890 }, { "epoch": 0.0907008407503323, "grad_norm": 463.1059265136719, "learning_rate": 8.98e-06, "loss": 34.5467, "step": 44900 }, { "epoch": 0.09072104138301612, "grad_norm": 181.9649200439453, "learning_rate": 8.982000000000001e-06, "loss": 36.1558, "step": 44910 }, { "epoch": 0.09074124201569993, "grad_norm": 603.5677490234375, "learning_rate": 8.984e-06, "loss": 53.6853, "step": 44920 }, { "epoch": 0.09076144264838375, "grad_norm": 156.77557373046875, "learning_rate": 8.986e-06, "loss": 34.3136, "step": 44930 }, { "epoch": 0.09078164328106757, "grad_norm": 260.46429443359375, "learning_rate": 8.988000000000001e-06, "loss": 36.3128, "step": 44940 }, { "epoch": 0.09080184391375137, "grad_norm": 403.0807800292969, "learning_rate": 8.99e-06, "loss": 34.9704, "step": 44950 }, { "epoch": 0.0908220445464352, "grad_norm": 39.43014907836914, "learning_rate": 8.992000000000001e-06, "loss": 18.9711, "step": 44960 }, { "epoch": 0.09084224517911901, "grad_norm": 462.73785400390625, "learning_rate": 8.994e-06, "loss": 34.4086, "step": 44970 }, { "epoch": 0.09086244581180282, "grad_norm": 242.1519775390625, "learning_rate": 8.996e-06, "loss": 33.3202, "step": 44980 }, { "epoch": 0.09088264644448664, "grad_norm": 72.20600891113281, "learning_rate": 8.998000000000001e-06, "loss": 19.1827, "step": 44990 }, { "epoch": 0.09090284707717046, "grad_norm": 546.0796508789062, "learning_rate": 9e-06, "loss": 19.4579, "step": 45000 }, { "epoch": 0.09092304770985428, "grad_norm": 466.9305725097656, "learning_rate": 9.002000000000001e-06, "loss": 19.2954, "step": 45010 }, { "epoch": 0.09094324834253809, "grad_norm": 308.1265563964844, "learning_rate": 9.004e-06, "loss": 29.9295, "step": 45020 }, { "epoch": 0.0909634489752219, "grad_norm": 78.5440902709961, "learning_rate": 9.006e-06, "loss": 21.1052, "step": 45030 }, { "epoch": 0.09098364960790573, "grad_norm": 159.1719512939453, "learning_rate": 9.008e-06, "loss": 23.3582, "step": 45040 }, { "epoch": 0.09100385024058953, "grad_norm": 200.64991760253906, "learning_rate": 9.01e-06, "loss": 28.5952, "step": 45050 }, { "epoch": 0.09102405087327335, "grad_norm": 82.9343032836914, "learning_rate": 9.012000000000001e-06, "loss": 17.7191, "step": 45060 }, { "epoch": 0.09104425150595717, "grad_norm": 725.3158569335938, "learning_rate": 9.014e-06, "loss": 33.8439, "step": 45070 }, { "epoch": 0.09106445213864098, "grad_norm": 615.6925659179688, "learning_rate": 9.016e-06, "loss": 39.5029, "step": 45080 }, { "epoch": 0.0910846527713248, "grad_norm": 209.6754150390625, "learning_rate": 9.018e-06, "loss": 23.1886, "step": 45090 }, { "epoch": 0.09110485340400862, "grad_norm": 309.28070068359375, "learning_rate": 9.020000000000002e-06, "loss": 19.6506, "step": 45100 }, { "epoch": 0.09112505403669242, "grad_norm": 0.0, "learning_rate": 9.022000000000001e-06, "loss": 14.6931, "step": 45110 }, { "epoch": 0.09114525466937624, "grad_norm": 413.8370056152344, "learning_rate": 9.024e-06, "loss": 51.6613, "step": 45120 }, { "epoch": 0.09116545530206006, "grad_norm": 254.11924743652344, "learning_rate": 9.026e-06, "loss": 46.1803, "step": 45130 }, { "epoch": 0.09118565593474387, "grad_norm": 94.26102447509766, "learning_rate": 9.028e-06, "loss": 30.6714, "step": 45140 }, { "epoch": 0.09120585656742769, "grad_norm": 338.0152282714844, "learning_rate": 9.030000000000002e-06, "loss": 36.0554, "step": 45150 }, { "epoch": 0.09122605720011151, "grad_norm": 51.32347869873047, "learning_rate": 9.032000000000001e-06, "loss": 32.7614, "step": 45160 }, { "epoch": 0.09124625783279533, "grad_norm": 204.2504425048828, "learning_rate": 9.034e-06, "loss": 25.1966, "step": 45170 }, { "epoch": 0.09126645846547914, "grad_norm": 209.50502014160156, "learning_rate": 9.036e-06, "loss": 15.9054, "step": 45180 }, { "epoch": 0.09128665909816296, "grad_norm": 0.0, "learning_rate": 9.038e-06, "loss": 26.9298, "step": 45190 }, { "epoch": 0.09130685973084678, "grad_norm": 188.56651306152344, "learning_rate": 9.040000000000002e-06, "loss": 26.2713, "step": 45200 }, { "epoch": 0.09132706036353058, "grad_norm": 1024.7669677734375, "learning_rate": 9.042e-06, "loss": 20.8682, "step": 45210 }, { "epoch": 0.0913472609962144, "grad_norm": 214.8667449951172, "learning_rate": 9.044e-06, "loss": 35.3718, "step": 45220 }, { "epoch": 0.09136746162889822, "grad_norm": 121.48190307617188, "learning_rate": 9.046000000000001e-06, "loss": 31.5103, "step": 45230 }, { "epoch": 0.09138766226158203, "grad_norm": 345.91253662109375, "learning_rate": 9.048e-06, "loss": 37.65, "step": 45240 }, { "epoch": 0.09140786289426585, "grad_norm": 228.71878051757812, "learning_rate": 9.050000000000001e-06, "loss": 37.4669, "step": 45250 }, { "epoch": 0.09142806352694967, "grad_norm": 268.6900634765625, "learning_rate": 9.052e-06, "loss": 20.3024, "step": 45260 }, { "epoch": 0.09144826415963347, "grad_norm": 98.0002212524414, "learning_rate": 9.054e-06, "loss": 41.5486, "step": 45270 }, { "epoch": 0.0914684647923173, "grad_norm": 113.31243896484375, "learning_rate": 9.056000000000001e-06, "loss": 18.5772, "step": 45280 }, { "epoch": 0.09148866542500111, "grad_norm": 237.8240509033203, "learning_rate": 9.058000000000002e-06, "loss": 29.6599, "step": 45290 }, { "epoch": 0.09150886605768492, "grad_norm": 88.89898681640625, "learning_rate": 9.060000000000001e-06, "loss": 20.4503, "step": 45300 }, { "epoch": 0.09152906669036874, "grad_norm": 383.4280700683594, "learning_rate": 9.062e-06, "loss": 49.2337, "step": 45310 }, { "epoch": 0.09154926732305256, "grad_norm": 0.0, "learning_rate": 9.064e-06, "loss": 19.6159, "step": 45320 }, { "epoch": 0.09156946795573638, "grad_norm": 201.0872802734375, "learning_rate": 9.066000000000001e-06, "loss": 24.1952, "step": 45330 }, { "epoch": 0.09158966858842019, "grad_norm": 205.6769256591797, "learning_rate": 9.068000000000002e-06, "loss": 40.0135, "step": 45340 }, { "epoch": 0.091609869221104, "grad_norm": 344.142578125, "learning_rate": 9.070000000000001e-06, "loss": 36.2188, "step": 45350 }, { "epoch": 0.09163006985378783, "grad_norm": 260.41717529296875, "learning_rate": 9.072e-06, "loss": 21.0162, "step": 45360 }, { "epoch": 0.09165027048647163, "grad_norm": 113.44036865234375, "learning_rate": 9.074e-06, "loss": 27.6821, "step": 45370 }, { "epoch": 0.09167047111915545, "grad_norm": 308.5354919433594, "learning_rate": 9.076000000000001e-06, "loss": 18.9794, "step": 45380 }, { "epoch": 0.09169067175183927, "grad_norm": 218.33511352539062, "learning_rate": 9.078000000000002e-06, "loss": 49.4744, "step": 45390 }, { "epoch": 0.09171087238452308, "grad_norm": 304.7184143066406, "learning_rate": 9.080000000000001e-06, "loss": 25.491, "step": 45400 }, { "epoch": 0.0917310730172069, "grad_norm": 455.8787841796875, "learning_rate": 9.082e-06, "loss": 34.3024, "step": 45410 }, { "epoch": 0.09175127364989072, "grad_norm": 666.511474609375, "learning_rate": 9.084e-06, "loss": 32.709, "step": 45420 }, { "epoch": 0.09177147428257452, "grad_norm": 280.88848876953125, "learning_rate": 9.086e-06, "loss": 22.1083, "step": 45430 }, { "epoch": 0.09179167491525834, "grad_norm": 111.59747314453125, "learning_rate": 9.088000000000002e-06, "loss": 28.3981, "step": 45440 }, { "epoch": 0.09181187554794216, "grad_norm": 293.6383056640625, "learning_rate": 9.090000000000001e-06, "loss": 42.2958, "step": 45450 }, { "epoch": 0.09183207618062597, "grad_norm": 0.0, "learning_rate": 9.092e-06, "loss": 26.8012, "step": 45460 }, { "epoch": 0.09185227681330979, "grad_norm": 196.1160125732422, "learning_rate": 9.094000000000001e-06, "loss": 34.0597, "step": 45470 }, { "epoch": 0.09187247744599361, "grad_norm": 306.42962646484375, "learning_rate": 9.096e-06, "loss": 20.4899, "step": 45480 }, { "epoch": 0.09189267807867743, "grad_norm": 94.22349548339844, "learning_rate": 9.098000000000002e-06, "loss": 12.2727, "step": 45490 }, { "epoch": 0.09191287871136124, "grad_norm": 151.02691650390625, "learning_rate": 9.100000000000001e-06, "loss": 18.8706, "step": 45500 }, { "epoch": 0.09193307934404506, "grad_norm": 518.2387084960938, "learning_rate": 9.102e-06, "loss": 32.4138, "step": 45510 }, { "epoch": 0.09195327997672888, "grad_norm": 268.1435241699219, "learning_rate": 9.104000000000001e-06, "loss": 33.0072, "step": 45520 }, { "epoch": 0.09197348060941268, "grad_norm": 360.8044738769531, "learning_rate": 9.106e-06, "loss": 35.7828, "step": 45530 }, { "epoch": 0.0919936812420965, "grad_norm": 180.61915588378906, "learning_rate": 9.108000000000002e-06, "loss": 25.6874, "step": 45540 }, { "epoch": 0.09201388187478032, "grad_norm": 401.39898681640625, "learning_rate": 9.110000000000001e-06, "loss": 29.3808, "step": 45550 }, { "epoch": 0.09203408250746413, "grad_norm": 745.9600219726562, "learning_rate": 9.112e-06, "loss": 31.4711, "step": 45560 }, { "epoch": 0.09205428314014795, "grad_norm": 210.75648498535156, "learning_rate": 9.114000000000001e-06, "loss": 20.4864, "step": 45570 }, { "epoch": 0.09207448377283177, "grad_norm": 244.63116455078125, "learning_rate": 9.116e-06, "loss": 15.5425, "step": 45580 }, { "epoch": 0.09209468440551558, "grad_norm": 324.91180419921875, "learning_rate": 9.118000000000001e-06, "loss": 31.7958, "step": 45590 }, { "epoch": 0.0921148850381994, "grad_norm": 368.211669921875, "learning_rate": 9.12e-06, "loss": 27.7676, "step": 45600 }, { "epoch": 0.09213508567088322, "grad_norm": 264.7908935546875, "learning_rate": 9.122e-06, "loss": 46.8034, "step": 45610 }, { "epoch": 0.09215528630356702, "grad_norm": 248.51397705078125, "learning_rate": 9.124000000000001e-06, "loss": 24.1754, "step": 45620 }, { "epoch": 0.09217548693625084, "grad_norm": 254.4674835205078, "learning_rate": 9.126e-06, "loss": 30.4968, "step": 45630 }, { "epoch": 0.09219568756893466, "grad_norm": 200.9159393310547, "learning_rate": 9.128e-06, "loss": 22.9383, "step": 45640 }, { "epoch": 0.09221588820161847, "grad_norm": 329.0419006347656, "learning_rate": 9.13e-06, "loss": 22.9019, "step": 45650 }, { "epoch": 0.09223608883430229, "grad_norm": 497.4212646484375, "learning_rate": 9.132000000000002e-06, "loss": 26.0021, "step": 45660 }, { "epoch": 0.09225628946698611, "grad_norm": 1260.4561767578125, "learning_rate": 9.134000000000001e-06, "loss": 33.3642, "step": 45670 }, { "epoch": 0.09227649009966993, "grad_norm": 243.73631286621094, "learning_rate": 9.136e-06, "loss": 24.4885, "step": 45680 }, { "epoch": 0.09229669073235373, "grad_norm": 341.9969482421875, "learning_rate": 9.138e-06, "loss": 25.2685, "step": 45690 }, { "epoch": 0.09231689136503755, "grad_norm": 125.28470611572266, "learning_rate": 9.14e-06, "loss": 27.5011, "step": 45700 }, { "epoch": 0.09233709199772137, "grad_norm": 263.9452209472656, "learning_rate": 9.142000000000002e-06, "loss": 31.127, "step": 45710 }, { "epoch": 0.09235729263040518, "grad_norm": 171.90745544433594, "learning_rate": 9.144000000000001e-06, "loss": 22.2849, "step": 45720 }, { "epoch": 0.092377493263089, "grad_norm": 204.85107421875, "learning_rate": 9.146e-06, "loss": 16.2026, "step": 45730 }, { "epoch": 0.09239769389577282, "grad_norm": 129.3535614013672, "learning_rate": 9.148e-06, "loss": 31.9694, "step": 45740 }, { "epoch": 0.09241789452845663, "grad_norm": 341.0097961425781, "learning_rate": 9.15e-06, "loss": 23.2592, "step": 45750 }, { "epoch": 0.09243809516114045, "grad_norm": 238.45262145996094, "learning_rate": 9.152000000000001e-06, "loss": 20.5036, "step": 45760 }, { "epoch": 0.09245829579382427, "grad_norm": 112.69478607177734, "learning_rate": 9.154e-06, "loss": 25.247, "step": 45770 }, { "epoch": 0.09247849642650807, "grad_norm": 342.8255310058594, "learning_rate": 9.156e-06, "loss": 46.3407, "step": 45780 }, { "epoch": 0.09249869705919189, "grad_norm": 263.7348937988281, "learning_rate": 9.158e-06, "loss": 26.5618, "step": 45790 }, { "epoch": 0.09251889769187571, "grad_norm": 331.3025207519531, "learning_rate": 9.16e-06, "loss": 26.4323, "step": 45800 }, { "epoch": 0.09253909832455952, "grad_norm": 382.53021240234375, "learning_rate": 9.162000000000001e-06, "loss": 47.2401, "step": 45810 }, { "epoch": 0.09255929895724334, "grad_norm": 242.24581909179688, "learning_rate": 9.164e-06, "loss": 34.1963, "step": 45820 }, { "epoch": 0.09257949958992716, "grad_norm": 190.64939880371094, "learning_rate": 9.166e-06, "loss": 29.8224, "step": 45830 }, { "epoch": 0.09259970022261098, "grad_norm": 259.488037109375, "learning_rate": 9.168000000000001e-06, "loss": 44.4343, "step": 45840 }, { "epoch": 0.09261990085529478, "grad_norm": 390.8870849609375, "learning_rate": 9.17e-06, "loss": 41.4994, "step": 45850 }, { "epoch": 0.0926401014879786, "grad_norm": 275.4360656738281, "learning_rate": 9.172000000000001e-06, "loss": 34.6979, "step": 45860 }, { "epoch": 0.09266030212066242, "grad_norm": 178.65843200683594, "learning_rate": 9.174e-06, "loss": 32.5448, "step": 45870 }, { "epoch": 0.09268050275334623, "grad_norm": 149.93983459472656, "learning_rate": 9.176e-06, "loss": 17.7622, "step": 45880 }, { "epoch": 0.09270070338603005, "grad_norm": 344.4548645019531, "learning_rate": 9.178000000000001e-06, "loss": 23.7696, "step": 45890 }, { "epoch": 0.09272090401871387, "grad_norm": 150.549072265625, "learning_rate": 9.180000000000002e-06, "loss": 22.8689, "step": 45900 }, { "epoch": 0.09274110465139768, "grad_norm": 177.60374450683594, "learning_rate": 9.182000000000001e-06, "loss": 25.8094, "step": 45910 }, { "epoch": 0.0927613052840815, "grad_norm": 300.2453918457031, "learning_rate": 9.184e-06, "loss": 35.0513, "step": 45920 }, { "epoch": 0.09278150591676532, "grad_norm": 414.835693359375, "learning_rate": 9.186e-06, "loss": 35.2172, "step": 45930 }, { "epoch": 0.09280170654944912, "grad_norm": 310.05487060546875, "learning_rate": 9.188e-06, "loss": 25.8454, "step": 45940 }, { "epoch": 0.09282190718213294, "grad_norm": 299.5995178222656, "learning_rate": 9.190000000000002e-06, "loss": 23.1495, "step": 45950 }, { "epoch": 0.09284210781481676, "grad_norm": 462.22125244140625, "learning_rate": 9.192000000000001e-06, "loss": 20.2255, "step": 45960 }, { "epoch": 0.09286230844750057, "grad_norm": 343.907958984375, "learning_rate": 9.194e-06, "loss": 17.4941, "step": 45970 }, { "epoch": 0.09288250908018439, "grad_norm": 279.1583557128906, "learning_rate": 9.196e-06, "loss": 33.785, "step": 45980 }, { "epoch": 0.09290270971286821, "grad_norm": 354.1332702636719, "learning_rate": 9.198e-06, "loss": 28.0464, "step": 45990 }, { "epoch": 0.09292291034555203, "grad_norm": 392.66558837890625, "learning_rate": 9.200000000000002e-06, "loss": 20.3079, "step": 46000 }, { "epoch": 0.09294311097823584, "grad_norm": 196.61886596679688, "learning_rate": 9.202000000000001e-06, "loss": 21.2111, "step": 46010 }, { "epoch": 0.09296331161091966, "grad_norm": 512.6605834960938, "learning_rate": 9.204e-06, "loss": 26.6114, "step": 46020 }, { "epoch": 0.09298351224360348, "grad_norm": 226.9892120361328, "learning_rate": 9.206000000000001e-06, "loss": 17.5706, "step": 46030 }, { "epoch": 0.09300371287628728, "grad_norm": 466.7447204589844, "learning_rate": 9.208e-06, "loss": 19.2912, "step": 46040 }, { "epoch": 0.0930239135089711, "grad_norm": 111.75578308105469, "learning_rate": 9.210000000000002e-06, "loss": 26.1449, "step": 46050 }, { "epoch": 0.09304411414165492, "grad_norm": 205.64398193359375, "learning_rate": 9.212000000000001e-06, "loss": 41.9457, "step": 46060 }, { "epoch": 0.09306431477433873, "grad_norm": 699.5403442382812, "learning_rate": 9.214e-06, "loss": 38.3786, "step": 46070 }, { "epoch": 0.09308451540702255, "grad_norm": 355.754150390625, "learning_rate": 9.216000000000001e-06, "loss": 25.631, "step": 46080 }, { "epoch": 0.09310471603970637, "grad_norm": 220.89688110351562, "learning_rate": 9.218e-06, "loss": 108.2437, "step": 46090 }, { "epoch": 0.09312491667239017, "grad_norm": 257.9708557128906, "learning_rate": 9.220000000000002e-06, "loss": 39.5461, "step": 46100 }, { "epoch": 0.093145117305074, "grad_norm": 205.7578887939453, "learning_rate": 9.222e-06, "loss": 22.0845, "step": 46110 }, { "epoch": 0.09316531793775781, "grad_norm": 160.69703674316406, "learning_rate": 9.224e-06, "loss": 32.2559, "step": 46120 }, { "epoch": 0.09318551857044162, "grad_norm": 342.7028503417969, "learning_rate": 9.226000000000001e-06, "loss": 26.7098, "step": 46130 }, { "epoch": 0.09320571920312544, "grad_norm": 514.9971313476562, "learning_rate": 9.228e-06, "loss": 22.4883, "step": 46140 }, { "epoch": 0.09322591983580926, "grad_norm": 498.7698059082031, "learning_rate": 9.230000000000001e-06, "loss": 32.9935, "step": 46150 }, { "epoch": 0.09324612046849308, "grad_norm": 185.79342651367188, "learning_rate": 9.232e-06, "loss": 36.0566, "step": 46160 }, { "epoch": 0.09326632110117689, "grad_norm": 374.2249755859375, "learning_rate": 9.234e-06, "loss": 42.7387, "step": 46170 }, { "epoch": 0.0932865217338607, "grad_norm": 484.03936767578125, "learning_rate": 9.236000000000001e-06, "loss": 34.8401, "step": 46180 }, { "epoch": 0.09330672236654453, "grad_norm": 443.19073486328125, "learning_rate": 9.238e-06, "loss": 41.23, "step": 46190 }, { "epoch": 0.09332692299922833, "grad_norm": 69.16180419921875, "learning_rate": 9.240000000000001e-06, "loss": 14.7336, "step": 46200 }, { "epoch": 0.09334712363191215, "grad_norm": 256.017578125, "learning_rate": 9.242e-06, "loss": 32.4395, "step": 46210 }, { "epoch": 0.09336732426459597, "grad_norm": 171.83221435546875, "learning_rate": 9.244e-06, "loss": 28.0523, "step": 46220 }, { "epoch": 0.09338752489727978, "grad_norm": 375.6372985839844, "learning_rate": 9.246000000000001e-06, "loss": 20.6037, "step": 46230 }, { "epoch": 0.0934077255299636, "grad_norm": 42.199371337890625, "learning_rate": 9.248e-06, "loss": 18.4211, "step": 46240 }, { "epoch": 0.09342792616264742, "grad_norm": 15.212872505187988, "learning_rate": 9.250000000000001e-06, "loss": 22.778, "step": 46250 }, { "epoch": 0.09344812679533122, "grad_norm": 508.84991455078125, "learning_rate": 9.252e-06, "loss": 38.8366, "step": 46260 }, { "epoch": 0.09346832742801504, "grad_norm": 246.87570190429688, "learning_rate": 9.254000000000002e-06, "loss": 28.2511, "step": 46270 }, { "epoch": 0.09348852806069886, "grad_norm": 259.32354736328125, "learning_rate": 9.256e-06, "loss": 21.8547, "step": 46280 }, { "epoch": 0.09350872869338267, "grad_norm": 480.2353210449219, "learning_rate": 9.258e-06, "loss": 42.0136, "step": 46290 }, { "epoch": 0.09352892932606649, "grad_norm": 767.52392578125, "learning_rate": 9.260000000000001e-06, "loss": 31.8907, "step": 46300 }, { "epoch": 0.09354912995875031, "grad_norm": 117.1618881225586, "learning_rate": 9.262e-06, "loss": 27.5934, "step": 46310 }, { "epoch": 0.09356933059143413, "grad_norm": 308.9576416015625, "learning_rate": 9.264000000000001e-06, "loss": 16.238, "step": 46320 }, { "epoch": 0.09358953122411794, "grad_norm": 392.2482604980469, "learning_rate": 9.266e-06, "loss": 23.5379, "step": 46330 }, { "epoch": 0.09360973185680176, "grad_norm": 252.03379821777344, "learning_rate": 9.268e-06, "loss": 17.4141, "step": 46340 }, { "epoch": 0.09362993248948558, "grad_norm": 182.81300354003906, "learning_rate": 9.270000000000001e-06, "loss": 31.7994, "step": 46350 }, { "epoch": 0.09365013312216938, "grad_norm": 206.65316772460938, "learning_rate": 9.272e-06, "loss": 37.9354, "step": 46360 }, { "epoch": 0.0936703337548532, "grad_norm": 173.16893005371094, "learning_rate": 9.274000000000001e-06, "loss": 19.3361, "step": 46370 }, { "epoch": 0.09369053438753702, "grad_norm": 230.43125915527344, "learning_rate": 9.276e-06, "loss": 49.8127, "step": 46380 }, { "epoch": 0.09371073502022083, "grad_norm": 405.7583312988281, "learning_rate": 9.278e-06, "loss": 30.0313, "step": 46390 }, { "epoch": 0.09373093565290465, "grad_norm": 498.750732421875, "learning_rate": 9.280000000000001e-06, "loss": 41.0712, "step": 46400 }, { "epoch": 0.09375113628558847, "grad_norm": 299.1022644042969, "learning_rate": 9.282e-06, "loss": 26.8364, "step": 46410 }, { "epoch": 0.09377133691827227, "grad_norm": 488.5164489746094, "learning_rate": 9.284000000000001e-06, "loss": 44.3081, "step": 46420 }, { "epoch": 0.0937915375509561, "grad_norm": 614.4946899414062, "learning_rate": 9.286e-06, "loss": 39.014, "step": 46430 }, { "epoch": 0.09381173818363991, "grad_norm": 262.28125, "learning_rate": 9.288e-06, "loss": 30.4426, "step": 46440 }, { "epoch": 0.09383193881632372, "grad_norm": 142.2065887451172, "learning_rate": 9.29e-06, "loss": 61.3889, "step": 46450 }, { "epoch": 0.09385213944900754, "grad_norm": 216.53396606445312, "learning_rate": 9.292000000000002e-06, "loss": 37.8996, "step": 46460 }, { "epoch": 0.09387234008169136, "grad_norm": 179.7064208984375, "learning_rate": 9.294000000000001e-06, "loss": 30.5631, "step": 46470 }, { "epoch": 0.09389254071437518, "grad_norm": 115.9587631225586, "learning_rate": 9.296e-06, "loss": 8.736, "step": 46480 }, { "epoch": 0.09391274134705899, "grad_norm": 41.4525032043457, "learning_rate": 9.298e-06, "loss": 8.5796, "step": 46490 }, { "epoch": 0.0939329419797428, "grad_norm": 327.2312927246094, "learning_rate": 9.3e-06, "loss": 18.7261, "step": 46500 }, { "epoch": 0.09395314261242663, "grad_norm": 161.3704071044922, "learning_rate": 9.302000000000002e-06, "loss": 28.4808, "step": 46510 }, { "epoch": 0.09397334324511043, "grad_norm": 273.9179382324219, "learning_rate": 9.304000000000001e-06, "loss": 25.7428, "step": 46520 }, { "epoch": 0.09399354387779425, "grad_norm": 115.5491943359375, "learning_rate": 9.306e-06, "loss": 13.5796, "step": 46530 }, { "epoch": 0.09401374451047807, "grad_norm": 202.24899291992188, "learning_rate": 9.308e-06, "loss": 16.3043, "step": 46540 }, { "epoch": 0.09403394514316188, "grad_norm": 345.0452880859375, "learning_rate": 9.31e-06, "loss": 39.5702, "step": 46550 }, { "epoch": 0.0940541457758457, "grad_norm": 297.7521667480469, "learning_rate": 9.312000000000002e-06, "loss": 21.0327, "step": 46560 }, { "epoch": 0.09407434640852952, "grad_norm": 284.87689208984375, "learning_rate": 9.314000000000001e-06, "loss": 20.9661, "step": 46570 }, { "epoch": 0.09409454704121333, "grad_norm": 144.26654052734375, "learning_rate": 9.316e-06, "loss": 14.2522, "step": 46580 }, { "epoch": 0.09411474767389715, "grad_norm": 395.47515869140625, "learning_rate": 9.318e-06, "loss": 35.5239, "step": 46590 }, { "epoch": 0.09413494830658097, "grad_norm": 277.9868469238281, "learning_rate": 9.32e-06, "loss": 17.8679, "step": 46600 }, { "epoch": 0.09415514893926477, "grad_norm": 147.97491455078125, "learning_rate": 9.322000000000002e-06, "loss": 17.2787, "step": 46610 }, { "epoch": 0.09417534957194859, "grad_norm": 136.29014587402344, "learning_rate": 9.324000000000001e-06, "loss": 20.8164, "step": 46620 }, { "epoch": 0.09419555020463241, "grad_norm": 325.43695068359375, "learning_rate": 9.326e-06, "loss": 39.6291, "step": 46630 }, { "epoch": 0.09421575083731623, "grad_norm": 515.9306640625, "learning_rate": 9.328000000000001e-06, "loss": 29.0055, "step": 46640 }, { "epoch": 0.09423595147000004, "grad_norm": 136.9405059814453, "learning_rate": 9.33e-06, "loss": 36.0736, "step": 46650 }, { "epoch": 0.09425615210268386, "grad_norm": 167.56654357910156, "learning_rate": 9.332000000000001e-06, "loss": 19.506, "step": 46660 }, { "epoch": 0.09427635273536768, "grad_norm": 509.81536865234375, "learning_rate": 9.334e-06, "loss": 40.1764, "step": 46670 }, { "epoch": 0.09429655336805148, "grad_norm": 175.9873809814453, "learning_rate": 9.336e-06, "loss": 14.0671, "step": 46680 }, { "epoch": 0.0943167540007353, "grad_norm": 209.4967041015625, "learning_rate": 9.338000000000001e-06, "loss": 19.5968, "step": 46690 }, { "epoch": 0.09433695463341912, "grad_norm": 398.7560729980469, "learning_rate": 9.340000000000002e-06, "loss": 18.8386, "step": 46700 }, { "epoch": 0.09435715526610293, "grad_norm": 0.0, "learning_rate": 9.342000000000001e-06, "loss": 22.1879, "step": 46710 }, { "epoch": 0.09437735589878675, "grad_norm": 383.53955078125, "learning_rate": 9.344e-06, "loss": 15.3881, "step": 46720 }, { "epoch": 0.09439755653147057, "grad_norm": 73.78945922851562, "learning_rate": 9.346e-06, "loss": 31.332, "step": 46730 }, { "epoch": 0.09441775716415438, "grad_norm": 174.1156005859375, "learning_rate": 9.348000000000001e-06, "loss": 15.3515, "step": 46740 }, { "epoch": 0.0944379577968382, "grad_norm": 312.28387451171875, "learning_rate": 9.350000000000002e-06, "loss": 25.5055, "step": 46750 }, { "epoch": 0.09445815842952202, "grad_norm": 283.5758972167969, "learning_rate": 9.352000000000001e-06, "loss": 40.2705, "step": 46760 }, { "epoch": 0.09447835906220582, "grad_norm": 211.5045623779297, "learning_rate": 9.354e-06, "loss": 16.7056, "step": 46770 }, { "epoch": 0.09449855969488964, "grad_norm": 286.8927917480469, "learning_rate": 9.356e-06, "loss": 15.8403, "step": 46780 }, { "epoch": 0.09451876032757346, "grad_norm": 175.50860595703125, "learning_rate": 9.358000000000001e-06, "loss": 29.1351, "step": 46790 }, { "epoch": 0.09453896096025728, "grad_norm": 450.1950988769531, "learning_rate": 9.360000000000002e-06, "loss": 45.8493, "step": 46800 }, { "epoch": 0.09455916159294109, "grad_norm": 160.00094604492188, "learning_rate": 9.362000000000001e-06, "loss": 22.1217, "step": 46810 }, { "epoch": 0.09457936222562491, "grad_norm": 379.1471252441406, "learning_rate": 9.364e-06, "loss": 36.5822, "step": 46820 }, { "epoch": 0.09459956285830873, "grad_norm": 146.80996704101562, "learning_rate": 9.366000000000001e-06, "loss": 15.8828, "step": 46830 }, { "epoch": 0.09461976349099253, "grad_norm": 156.9616241455078, "learning_rate": 9.368e-06, "loss": 22.1853, "step": 46840 }, { "epoch": 0.09463996412367635, "grad_norm": 259.34136962890625, "learning_rate": 9.370000000000002e-06, "loss": 16.7545, "step": 46850 }, { "epoch": 0.09466016475636017, "grad_norm": 136.08917236328125, "learning_rate": 9.372000000000001e-06, "loss": 19.4068, "step": 46860 }, { "epoch": 0.09468036538904398, "grad_norm": 0.0, "learning_rate": 9.374e-06, "loss": 12.6939, "step": 46870 }, { "epoch": 0.0947005660217278, "grad_norm": 193.8932342529297, "learning_rate": 9.376000000000001e-06, "loss": 25.0969, "step": 46880 }, { "epoch": 0.09472076665441162, "grad_norm": 366.0346984863281, "learning_rate": 9.378e-06, "loss": 25.531, "step": 46890 }, { "epoch": 0.09474096728709543, "grad_norm": 294.6965637207031, "learning_rate": 9.38e-06, "loss": 46.0427, "step": 46900 }, { "epoch": 0.09476116791977925, "grad_norm": 151.49635314941406, "learning_rate": 9.382000000000001e-06, "loss": 26.556, "step": 46910 }, { "epoch": 0.09478136855246307, "grad_norm": 33.83079147338867, "learning_rate": 9.384e-06, "loss": 12.5296, "step": 46920 }, { "epoch": 0.09480156918514687, "grad_norm": 174.05621337890625, "learning_rate": 9.386000000000001e-06, "loss": 17.9229, "step": 46930 }, { "epoch": 0.09482176981783069, "grad_norm": 990.734619140625, "learning_rate": 9.388e-06, "loss": 34.1208, "step": 46940 }, { "epoch": 0.09484197045051451, "grad_norm": 162.53610229492188, "learning_rate": 9.39e-06, "loss": 18.6538, "step": 46950 }, { "epoch": 0.09486217108319833, "grad_norm": 283.63641357421875, "learning_rate": 9.392000000000001e-06, "loss": 23.7475, "step": 46960 }, { "epoch": 0.09488237171588214, "grad_norm": 861.7537841796875, "learning_rate": 9.394e-06, "loss": 37.3668, "step": 46970 }, { "epoch": 0.09490257234856596, "grad_norm": 620.2133178710938, "learning_rate": 9.396000000000001e-06, "loss": 44.9794, "step": 46980 }, { "epoch": 0.09492277298124978, "grad_norm": 342.7024841308594, "learning_rate": 9.398e-06, "loss": 35.2573, "step": 46990 }, { "epoch": 0.09494297361393358, "grad_norm": 488.1629638671875, "learning_rate": 9.4e-06, "loss": 42.4338, "step": 47000 }, { "epoch": 0.0949631742466174, "grad_norm": 487.81591796875, "learning_rate": 9.402e-06, "loss": 23.7289, "step": 47010 }, { "epoch": 0.09498337487930122, "grad_norm": 260.9259338378906, "learning_rate": 9.404e-06, "loss": 26.5113, "step": 47020 }, { "epoch": 0.09500357551198503, "grad_norm": 234.31756591796875, "learning_rate": 9.406000000000001e-06, "loss": 35.6993, "step": 47030 }, { "epoch": 0.09502377614466885, "grad_norm": 214.07005310058594, "learning_rate": 9.408e-06, "loss": 29.3348, "step": 47040 }, { "epoch": 0.09504397677735267, "grad_norm": 285.12481689453125, "learning_rate": 9.41e-06, "loss": 28.4724, "step": 47050 }, { "epoch": 0.09506417741003648, "grad_norm": 102.60460662841797, "learning_rate": 9.412e-06, "loss": 13.0767, "step": 47060 }, { "epoch": 0.0950843780427203, "grad_norm": 364.4623718261719, "learning_rate": 9.414000000000002e-06, "loss": 28.5524, "step": 47070 }, { "epoch": 0.09510457867540412, "grad_norm": 105.84059143066406, "learning_rate": 9.416000000000001e-06, "loss": 14.806, "step": 47080 }, { "epoch": 0.09512477930808792, "grad_norm": 473.8843688964844, "learning_rate": 9.418e-06, "loss": 42.4885, "step": 47090 }, { "epoch": 0.09514497994077174, "grad_norm": 501.0219421386719, "learning_rate": 9.42e-06, "loss": 20.5563, "step": 47100 }, { "epoch": 0.09516518057345556, "grad_norm": 245.79226684570312, "learning_rate": 9.422e-06, "loss": 30.8523, "step": 47110 }, { "epoch": 0.09518538120613938, "grad_norm": 440.6980285644531, "learning_rate": 9.424000000000002e-06, "loss": 20.3458, "step": 47120 }, { "epoch": 0.09520558183882319, "grad_norm": 134.84454345703125, "learning_rate": 9.426000000000001e-06, "loss": 22.9754, "step": 47130 }, { "epoch": 0.09522578247150701, "grad_norm": 253.13475036621094, "learning_rate": 9.428e-06, "loss": 24.166, "step": 47140 }, { "epoch": 0.09524598310419083, "grad_norm": 552.6319580078125, "learning_rate": 9.43e-06, "loss": 30.454, "step": 47150 }, { "epoch": 0.09526618373687464, "grad_norm": 192.63673400878906, "learning_rate": 9.432e-06, "loss": 15.3704, "step": 47160 }, { "epoch": 0.09528638436955846, "grad_norm": 282.004638671875, "learning_rate": 9.434000000000001e-06, "loss": 14.1634, "step": 47170 }, { "epoch": 0.09530658500224228, "grad_norm": 4.800084590911865, "learning_rate": 9.436e-06, "loss": 20.9173, "step": 47180 }, { "epoch": 0.09532678563492608, "grad_norm": 215.919189453125, "learning_rate": 9.438e-06, "loss": 28.422, "step": 47190 }, { "epoch": 0.0953469862676099, "grad_norm": 145.40113830566406, "learning_rate": 9.440000000000001e-06, "loss": 41.2476, "step": 47200 }, { "epoch": 0.09536718690029372, "grad_norm": 238.4381866455078, "learning_rate": 9.442e-06, "loss": 12.3229, "step": 47210 }, { "epoch": 0.09538738753297753, "grad_norm": 108.76630401611328, "learning_rate": 9.444000000000001e-06, "loss": 28.2775, "step": 47220 }, { "epoch": 0.09540758816566135, "grad_norm": 967.0126953125, "learning_rate": 9.446e-06, "loss": 35.584, "step": 47230 }, { "epoch": 0.09542778879834517, "grad_norm": 149.7557830810547, "learning_rate": 9.448e-06, "loss": 32.1452, "step": 47240 }, { "epoch": 0.09544798943102897, "grad_norm": 389.9820556640625, "learning_rate": 9.450000000000001e-06, "loss": 55.3542, "step": 47250 }, { "epoch": 0.0954681900637128, "grad_norm": 353.1036682128906, "learning_rate": 9.452000000000002e-06, "loss": 28.5349, "step": 47260 }, { "epoch": 0.09548839069639661, "grad_norm": 343.2425842285156, "learning_rate": 9.454000000000001e-06, "loss": 24.9427, "step": 47270 }, { "epoch": 0.09550859132908043, "grad_norm": 164.5740203857422, "learning_rate": 9.456e-06, "loss": 30.4049, "step": 47280 }, { "epoch": 0.09552879196176424, "grad_norm": 408.0051574707031, "learning_rate": 9.458e-06, "loss": 46.2949, "step": 47290 }, { "epoch": 0.09554899259444806, "grad_norm": 482.9880065917969, "learning_rate": 9.460000000000001e-06, "loss": 45.5242, "step": 47300 }, { "epoch": 0.09556919322713188, "grad_norm": 119.9643783569336, "learning_rate": 9.462000000000002e-06, "loss": 25.7254, "step": 47310 }, { "epoch": 0.09558939385981569, "grad_norm": 224.818603515625, "learning_rate": 9.464000000000001e-06, "loss": 24.7989, "step": 47320 }, { "epoch": 0.0956095944924995, "grad_norm": 179.49264526367188, "learning_rate": 9.466e-06, "loss": 14.824, "step": 47330 }, { "epoch": 0.09562979512518333, "grad_norm": 249.2217559814453, "learning_rate": 9.468e-06, "loss": 42.176, "step": 47340 }, { "epoch": 0.09564999575786713, "grad_norm": 298.93292236328125, "learning_rate": 9.47e-06, "loss": 46.4277, "step": 47350 }, { "epoch": 0.09567019639055095, "grad_norm": 141.67662048339844, "learning_rate": 9.472000000000002e-06, "loss": 34.9843, "step": 47360 }, { "epoch": 0.09569039702323477, "grad_norm": 398.0690002441406, "learning_rate": 9.474000000000001e-06, "loss": 30.6515, "step": 47370 }, { "epoch": 0.09571059765591858, "grad_norm": 371.044677734375, "learning_rate": 9.476e-06, "loss": 34.9853, "step": 47380 }, { "epoch": 0.0957307982886024, "grad_norm": 133.7108154296875, "learning_rate": 9.478e-06, "loss": 28.2317, "step": 47390 }, { "epoch": 0.09575099892128622, "grad_norm": 344.5661315917969, "learning_rate": 9.48e-06, "loss": 34.9745, "step": 47400 }, { "epoch": 0.09577119955397002, "grad_norm": 464.4530334472656, "learning_rate": 9.482000000000002e-06, "loss": 42.0046, "step": 47410 }, { "epoch": 0.09579140018665384, "grad_norm": 85.56758117675781, "learning_rate": 9.484000000000001e-06, "loss": 23.7937, "step": 47420 }, { "epoch": 0.09581160081933766, "grad_norm": 297.1073303222656, "learning_rate": 9.486e-06, "loss": 40.2882, "step": 47430 }, { "epoch": 0.09583180145202148, "grad_norm": 530.075927734375, "learning_rate": 9.488000000000001e-06, "loss": 27.4558, "step": 47440 }, { "epoch": 0.09585200208470529, "grad_norm": 368.1951904296875, "learning_rate": 9.49e-06, "loss": 36.2635, "step": 47450 }, { "epoch": 0.09587220271738911, "grad_norm": 7.486334323883057, "learning_rate": 9.492000000000002e-06, "loss": 21.1109, "step": 47460 }, { "epoch": 0.09589240335007293, "grad_norm": 212.6814727783203, "learning_rate": 9.494000000000001e-06, "loss": 44.6286, "step": 47470 }, { "epoch": 0.09591260398275674, "grad_norm": 441.1130676269531, "learning_rate": 9.496e-06, "loss": 35.2669, "step": 47480 }, { "epoch": 0.09593280461544056, "grad_norm": 1021.3592529296875, "learning_rate": 9.498000000000001e-06, "loss": 30.2061, "step": 47490 }, { "epoch": 0.09595300524812438, "grad_norm": 484.38470458984375, "learning_rate": 9.5e-06, "loss": 27.3102, "step": 47500 }, { "epoch": 0.09597320588080818, "grad_norm": 93.83991241455078, "learning_rate": 9.502000000000002e-06, "loss": 22.2169, "step": 47510 }, { "epoch": 0.095993406513492, "grad_norm": 207.8339080810547, "learning_rate": 9.504e-06, "loss": 30.1165, "step": 47520 }, { "epoch": 0.09601360714617582, "grad_norm": 310.22607421875, "learning_rate": 9.506e-06, "loss": 46.088, "step": 47530 }, { "epoch": 0.09603380777885963, "grad_norm": 234.8119354248047, "learning_rate": 9.508000000000001e-06, "loss": 31.9612, "step": 47540 }, { "epoch": 0.09605400841154345, "grad_norm": 276.117431640625, "learning_rate": 9.51e-06, "loss": 28.9469, "step": 47550 }, { "epoch": 0.09607420904422727, "grad_norm": 248.8937530517578, "learning_rate": 9.512000000000001e-06, "loss": 23.3061, "step": 47560 }, { "epoch": 0.09609440967691107, "grad_norm": 452.4816589355469, "learning_rate": 9.514e-06, "loss": 39.5645, "step": 47570 }, { "epoch": 0.0961146103095949, "grad_norm": 358.3267517089844, "learning_rate": 9.516e-06, "loss": 34.1317, "step": 47580 }, { "epoch": 0.09613481094227871, "grad_norm": 224.7090606689453, "learning_rate": 9.518000000000001e-06, "loss": 23.2155, "step": 47590 }, { "epoch": 0.09615501157496253, "grad_norm": 88.47576904296875, "learning_rate": 9.52e-06, "loss": 37.8515, "step": 47600 }, { "epoch": 0.09617521220764634, "grad_norm": 599.2704467773438, "learning_rate": 9.522000000000001e-06, "loss": 25.5475, "step": 47610 }, { "epoch": 0.09619541284033016, "grad_norm": 206.11366271972656, "learning_rate": 9.524e-06, "loss": 24.6448, "step": 47620 }, { "epoch": 0.09621561347301398, "grad_norm": 191.48468017578125, "learning_rate": 9.526000000000002e-06, "loss": 28.373, "step": 47630 }, { "epoch": 0.09623581410569779, "grad_norm": 184.4026336669922, "learning_rate": 9.528000000000001e-06, "loss": 33.8116, "step": 47640 }, { "epoch": 0.0962560147383816, "grad_norm": 413.20556640625, "learning_rate": 9.53e-06, "loss": 31.2827, "step": 47650 }, { "epoch": 0.09627621537106543, "grad_norm": 279.68231201171875, "learning_rate": 9.532000000000001e-06, "loss": 50.3798, "step": 47660 }, { "epoch": 0.09629641600374923, "grad_norm": 397.31622314453125, "learning_rate": 9.534e-06, "loss": 33.5925, "step": 47670 }, { "epoch": 0.09631661663643305, "grad_norm": 104.85977935791016, "learning_rate": 9.536000000000002e-06, "loss": 37.729, "step": 47680 }, { "epoch": 0.09633681726911687, "grad_norm": 238.70516967773438, "learning_rate": 9.538e-06, "loss": 14.1088, "step": 47690 }, { "epoch": 0.09635701790180068, "grad_norm": 296.948974609375, "learning_rate": 9.54e-06, "loss": 22.2157, "step": 47700 }, { "epoch": 0.0963772185344845, "grad_norm": 274.18646240234375, "learning_rate": 9.542000000000001e-06, "loss": 27.6657, "step": 47710 }, { "epoch": 0.09639741916716832, "grad_norm": 320.31134033203125, "learning_rate": 9.544e-06, "loss": 36.0598, "step": 47720 }, { "epoch": 0.09641761979985213, "grad_norm": 320.7585754394531, "learning_rate": 9.546000000000001e-06, "loss": 29.9983, "step": 47730 }, { "epoch": 0.09643782043253595, "grad_norm": 83.4265365600586, "learning_rate": 9.548e-06, "loss": 20.6847, "step": 47740 }, { "epoch": 0.09645802106521977, "grad_norm": 10.389191627502441, "learning_rate": 9.55e-06, "loss": 25.2331, "step": 47750 }, { "epoch": 0.09647822169790359, "grad_norm": 251.98333740234375, "learning_rate": 9.552000000000001e-06, "loss": 15.0803, "step": 47760 }, { "epoch": 0.09649842233058739, "grad_norm": 406.4750671386719, "learning_rate": 9.554e-06, "loss": 32.0222, "step": 47770 }, { "epoch": 0.09651862296327121, "grad_norm": 204.90992736816406, "learning_rate": 9.556000000000001e-06, "loss": 26.125, "step": 47780 }, { "epoch": 0.09653882359595503, "grad_norm": 644.4906616210938, "learning_rate": 9.558e-06, "loss": 46.7493, "step": 47790 }, { "epoch": 0.09655902422863884, "grad_norm": 61.69281768798828, "learning_rate": 9.56e-06, "loss": 34.1308, "step": 47800 }, { "epoch": 0.09657922486132266, "grad_norm": 224.52615356445312, "learning_rate": 9.562000000000001e-06, "loss": 34.1622, "step": 47810 }, { "epoch": 0.09659942549400648, "grad_norm": 179.5339813232422, "learning_rate": 9.564e-06, "loss": 52.3749, "step": 47820 }, { "epoch": 0.09661962612669028, "grad_norm": 226.17970275878906, "learning_rate": 9.566000000000001e-06, "loss": 20.9835, "step": 47830 }, { "epoch": 0.0966398267593741, "grad_norm": 371.74407958984375, "learning_rate": 9.568e-06, "loss": 20.822, "step": 47840 }, { "epoch": 0.09666002739205792, "grad_norm": 633.26318359375, "learning_rate": 9.57e-06, "loss": 36.2172, "step": 47850 }, { "epoch": 0.09668022802474173, "grad_norm": 214.9342498779297, "learning_rate": 9.572000000000001e-06, "loss": 30.5562, "step": 47860 }, { "epoch": 0.09670042865742555, "grad_norm": 270.5494689941406, "learning_rate": 9.574000000000002e-06, "loss": 33.3513, "step": 47870 }, { "epoch": 0.09672062929010937, "grad_norm": 193.7895965576172, "learning_rate": 9.576000000000001e-06, "loss": 33.9909, "step": 47880 }, { "epoch": 0.09674082992279318, "grad_norm": 243.3131561279297, "learning_rate": 9.578e-06, "loss": 24.7807, "step": 47890 }, { "epoch": 0.096761030555477, "grad_norm": 142.13450622558594, "learning_rate": 9.58e-06, "loss": 25.0281, "step": 47900 }, { "epoch": 0.09678123118816082, "grad_norm": 332.16943359375, "learning_rate": 9.582e-06, "loss": 15.0823, "step": 47910 }, { "epoch": 0.09680143182084464, "grad_norm": 230.81385803222656, "learning_rate": 9.584000000000002e-06, "loss": 16.607, "step": 47920 }, { "epoch": 0.09682163245352844, "grad_norm": 51.80336380004883, "learning_rate": 9.586000000000001e-06, "loss": 34.93, "step": 47930 }, { "epoch": 0.09684183308621226, "grad_norm": 240.3956756591797, "learning_rate": 9.588e-06, "loss": 36.5367, "step": 47940 }, { "epoch": 0.09686203371889608, "grad_norm": 282.3229064941406, "learning_rate": 9.59e-06, "loss": 28.0469, "step": 47950 }, { "epoch": 0.09688223435157989, "grad_norm": 102.88674926757812, "learning_rate": 9.592e-06, "loss": 11.9609, "step": 47960 }, { "epoch": 0.09690243498426371, "grad_norm": 166.9150390625, "learning_rate": 9.594000000000002e-06, "loss": 30.0205, "step": 47970 }, { "epoch": 0.09692263561694753, "grad_norm": 244.00332641601562, "learning_rate": 9.596000000000001e-06, "loss": 23.0701, "step": 47980 }, { "epoch": 0.09694283624963133, "grad_norm": 171.54745483398438, "learning_rate": 9.598e-06, "loss": 39.789, "step": 47990 }, { "epoch": 0.09696303688231515, "grad_norm": 663.2396850585938, "learning_rate": 9.600000000000001e-06, "loss": 35.9569, "step": 48000 }, { "epoch": 0.09698323751499897, "grad_norm": 181.33450317382812, "learning_rate": 9.602e-06, "loss": 35.9819, "step": 48010 }, { "epoch": 0.09700343814768278, "grad_norm": 182.11563110351562, "learning_rate": 9.604000000000002e-06, "loss": 37.579, "step": 48020 }, { "epoch": 0.0970236387803666, "grad_norm": 240.76393127441406, "learning_rate": 9.606000000000001e-06, "loss": 48.8861, "step": 48030 }, { "epoch": 0.09704383941305042, "grad_norm": 296.1114807128906, "learning_rate": 9.608e-06, "loss": 17.3549, "step": 48040 }, { "epoch": 0.09706404004573423, "grad_norm": 203.3651123046875, "learning_rate": 9.610000000000001e-06, "loss": 31.2498, "step": 48050 }, { "epoch": 0.09708424067841805, "grad_norm": 278.7127685546875, "learning_rate": 9.612000000000002e-06, "loss": 30.8443, "step": 48060 }, { "epoch": 0.09710444131110187, "grad_norm": 74.41629028320312, "learning_rate": 9.614000000000001e-06, "loss": 24.6337, "step": 48070 }, { "epoch": 0.09712464194378569, "grad_norm": 117.17208099365234, "learning_rate": 9.616e-06, "loss": 34.0065, "step": 48080 }, { "epoch": 0.09714484257646949, "grad_norm": 60.72517013549805, "learning_rate": 9.618e-06, "loss": 22.252, "step": 48090 }, { "epoch": 0.09716504320915331, "grad_norm": 1368.06396484375, "learning_rate": 9.620000000000001e-06, "loss": 34.2581, "step": 48100 }, { "epoch": 0.09718524384183713, "grad_norm": 275.4927673339844, "learning_rate": 9.622000000000002e-06, "loss": 30.9936, "step": 48110 }, { "epoch": 0.09720544447452094, "grad_norm": 386.2610778808594, "learning_rate": 9.624000000000001e-06, "loss": 18.8651, "step": 48120 }, { "epoch": 0.09722564510720476, "grad_norm": 330.3645935058594, "learning_rate": 9.626e-06, "loss": 15.014, "step": 48130 }, { "epoch": 0.09724584573988858, "grad_norm": 106.3940658569336, "learning_rate": 9.628e-06, "loss": 12.9681, "step": 48140 }, { "epoch": 0.09726604637257238, "grad_norm": 0.0, "learning_rate": 9.630000000000001e-06, "loss": 38.0537, "step": 48150 }, { "epoch": 0.0972862470052562, "grad_norm": 333.6963806152344, "learning_rate": 9.632e-06, "loss": 28.5315, "step": 48160 }, { "epoch": 0.09730644763794002, "grad_norm": 513.44140625, "learning_rate": 9.634000000000001e-06, "loss": 29.244, "step": 48170 }, { "epoch": 0.09732664827062383, "grad_norm": 248.2833709716797, "learning_rate": 9.636e-06, "loss": 17.3875, "step": 48180 }, { "epoch": 0.09734684890330765, "grad_norm": 322.15643310546875, "learning_rate": 9.638e-06, "loss": 19.5762, "step": 48190 }, { "epoch": 0.09736704953599147, "grad_norm": 295.4443359375, "learning_rate": 9.640000000000001e-06, "loss": 55.6807, "step": 48200 }, { "epoch": 0.09738725016867528, "grad_norm": 357.3920593261719, "learning_rate": 9.642e-06, "loss": 30.4318, "step": 48210 }, { "epoch": 0.0974074508013591, "grad_norm": 274.4111022949219, "learning_rate": 9.644000000000001e-06, "loss": 23.1488, "step": 48220 }, { "epoch": 0.09742765143404292, "grad_norm": 633.9121704101562, "learning_rate": 9.646e-06, "loss": 32.3785, "step": 48230 }, { "epoch": 0.09744785206672674, "grad_norm": 265.9201965332031, "learning_rate": 9.648000000000001e-06, "loss": 20.7568, "step": 48240 }, { "epoch": 0.09746805269941054, "grad_norm": 48.80295181274414, "learning_rate": 9.65e-06, "loss": 28.3308, "step": 48250 }, { "epoch": 0.09748825333209436, "grad_norm": 152.09278869628906, "learning_rate": 9.652e-06, "loss": 31.1182, "step": 48260 }, { "epoch": 0.09750845396477818, "grad_norm": 132.50619506835938, "learning_rate": 9.654000000000001e-06, "loss": 20.2228, "step": 48270 }, { "epoch": 0.09752865459746199, "grad_norm": 376.0819091796875, "learning_rate": 9.656e-06, "loss": 40.3744, "step": 48280 }, { "epoch": 0.09754885523014581, "grad_norm": 584.2837524414062, "learning_rate": 9.658000000000001e-06, "loss": 31.7879, "step": 48290 }, { "epoch": 0.09756905586282963, "grad_norm": 499.9570617675781, "learning_rate": 9.66e-06, "loss": 23.5869, "step": 48300 }, { "epoch": 0.09758925649551344, "grad_norm": 159.17916870117188, "learning_rate": 9.662e-06, "loss": 23.0782, "step": 48310 }, { "epoch": 0.09760945712819726, "grad_norm": 117.46460723876953, "learning_rate": 9.664000000000001e-06, "loss": 26.3456, "step": 48320 }, { "epoch": 0.09762965776088108, "grad_norm": 64.19599914550781, "learning_rate": 9.666e-06, "loss": 25.6575, "step": 48330 }, { "epoch": 0.09764985839356488, "grad_norm": 289.7062072753906, "learning_rate": 9.668000000000001e-06, "loss": 21.0112, "step": 48340 }, { "epoch": 0.0976700590262487, "grad_norm": 492.8553466796875, "learning_rate": 9.67e-06, "loss": 41.5334, "step": 48350 }, { "epoch": 0.09769025965893252, "grad_norm": 542.4243774414062, "learning_rate": 9.672e-06, "loss": 37.145, "step": 48360 }, { "epoch": 0.09771046029161633, "grad_norm": 332.63787841796875, "learning_rate": 9.674000000000001e-06, "loss": 25.038, "step": 48370 }, { "epoch": 0.09773066092430015, "grad_norm": 224.95941162109375, "learning_rate": 9.676e-06, "loss": 30.3335, "step": 48380 }, { "epoch": 0.09775086155698397, "grad_norm": 158.5520477294922, "learning_rate": 9.678000000000001e-06, "loss": 30.1593, "step": 48390 }, { "epoch": 0.09777106218966779, "grad_norm": 700.5819091796875, "learning_rate": 9.68e-06, "loss": 22.3392, "step": 48400 }, { "epoch": 0.0977912628223516, "grad_norm": 363.53155517578125, "learning_rate": 9.682e-06, "loss": 42.9607, "step": 48410 }, { "epoch": 0.09781146345503541, "grad_norm": 212.304443359375, "learning_rate": 9.684e-06, "loss": 32.2258, "step": 48420 }, { "epoch": 0.09783166408771923, "grad_norm": 333.85693359375, "learning_rate": 9.686000000000002e-06, "loss": 37.6916, "step": 48430 }, { "epoch": 0.09785186472040304, "grad_norm": 275.5306396484375, "learning_rate": 9.688000000000001e-06, "loss": 23.432, "step": 48440 }, { "epoch": 0.09787206535308686, "grad_norm": 387.5707702636719, "learning_rate": 9.69e-06, "loss": 35.2525, "step": 48450 }, { "epoch": 0.09789226598577068, "grad_norm": 17.26321029663086, "learning_rate": 9.692e-06, "loss": 43.5601, "step": 48460 }, { "epoch": 0.09791246661845449, "grad_norm": 195.82852172851562, "learning_rate": 9.694e-06, "loss": 17.4614, "step": 48470 }, { "epoch": 0.0979326672511383, "grad_norm": 270.9240417480469, "learning_rate": 9.696000000000002e-06, "loss": 18.5254, "step": 48480 }, { "epoch": 0.09795286788382213, "grad_norm": 132.8568878173828, "learning_rate": 9.698000000000001e-06, "loss": 19.2739, "step": 48490 }, { "epoch": 0.09797306851650593, "grad_norm": 61.91486740112305, "learning_rate": 9.7e-06, "loss": 81.6742, "step": 48500 }, { "epoch": 0.09799326914918975, "grad_norm": 553.7900390625, "learning_rate": 9.702e-06, "loss": 45.9665, "step": 48510 }, { "epoch": 0.09801346978187357, "grad_norm": 212.63873291015625, "learning_rate": 9.704e-06, "loss": 42.6666, "step": 48520 }, { "epoch": 0.09803367041455738, "grad_norm": 289.29937744140625, "learning_rate": 9.706000000000002e-06, "loss": 27.9769, "step": 48530 }, { "epoch": 0.0980538710472412, "grad_norm": 128.13623046875, "learning_rate": 9.708000000000001e-06, "loss": 25.3855, "step": 48540 }, { "epoch": 0.09807407167992502, "grad_norm": 214.09823608398438, "learning_rate": 9.71e-06, "loss": 29.7313, "step": 48550 }, { "epoch": 0.09809427231260884, "grad_norm": 0.0, "learning_rate": 9.712e-06, "loss": 14.0571, "step": 48560 }, { "epoch": 0.09811447294529264, "grad_norm": 205.05311584472656, "learning_rate": 9.714e-06, "loss": 28.3344, "step": 48570 }, { "epoch": 0.09813467357797646, "grad_norm": 348.35455322265625, "learning_rate": 9.716000000000002e-06, "loss": 24.4622, "step": 48580 }, { "epoch": 0.09815487421066028, "grad_norm": 115.58353424072266, "learning_rate": 9.718e-06, "loss": 12.4728, "step": 48590 }, { "epoch": 0.09817507484334409, "grad_norm": 170.3902587890625, "learning_rate": 9.72e-06, "loss": 20.6357, "step": 48600 }, { "epoch": 0.09819527547602791, "grad_norm": 306.58453369140625, "learning_rate": 9.722000000000001e-06, "loss": 23.1536, "step": 48610 }, { "epoch": 0.09821547610871173, "grad_norm": 162.218505859375, "learning_rate": 9.724e-06, "loss": 34.9795, "step": 48620 }, { "epoch": 0.09823567674139554, "grad_norm": 151.15281677246094, "learning_rate": 9.726000000000001e-06, "loss": 27.0086, "step": 48630 }, { "epoch": 0.09825587737407936, "grad_norm": 218.60882568359375, "learning_rate": 9.728e-06, "loss": 23.5312, "step": 48640 }, { "epoch": 0.09827607800676318, "grad_norm": 411.7706298828125, "learning_rate": 9.73e-06, "loss": 17.409, "step": 48650 }, { "epoch": 0.09829627863944698, "grad_norm": 411.3281555175781, "learning_rate": 9.732000000000001e-06, "loss": 28.5197, "step": 48660 }, { "epoch": 0.0983164792721308, "grad_norm": 487.44158935546875, "learning_rate": 9.734000000000002e-06, "loss": 18.7795, "step": 48670 }, { "epoch": 0.09833667990481462, "grad_norm": 96.8828353881836, "learning_rate": 9.736000000000001e-06, "loss": 14.2251, "step": 48680 }, { "epoch": 0.09835688053749843, "grad_norm": 369.0585021972656, "learning_rate": 9.738e-06, "loss": 23.9811, "step": 48690 }, { "epoch": 0.09837708117018225, "grad_norm": 211.08279418945312, "learning_rate": 9.74e-06, "loss": 16.9726, "step": 48700 }, { "epoch": 0.09839728180286607, "grad_norm": 497.3608703613281, "learning_rate": 9.742000000000001e-06, "loss": 50.4403, "step": 48710 }, { "epoch": 0.09841748243554987, "grad_norm": 154.86351013183594, "learning_rate": 9.744000000000002e-06, "loss": 32.5661, "step": 48720 }, { "epoch": 0.0984376830682337, "grad_norm": 243.56246948242188, "learning_rate": 9.746000000000001e-06, "loss": 20.2901, "step": 48730 }, { "epoch": 0.09845788370091751, "grad_norm": 268.6463317871094, "learning_rate": 9.748e-06, "loss": 30.2848, "step": 48740 }, { "epoch": 0.09847808433360133, "grad_norm": 721.9420776367188, "learning_rate": 9.75e-06, "loss": 41.5502, "step": 48750 }, { "epoch": 0.09849828496628514, "grad_norm": 403.8372497558594, "learning_rate": 9.752e-06, "loss": 31.6006, "step": 48760 }, { "epoch": 0.09851848559896896, "grad_norm": 238.09739685058594, "learning_rate": 9.754000000000002e-06, "loss": 36.8688, "step": 48770 }, { "epoch": 0.09853868623165278, "grad_norm": 552.0816650390625, "learning_rate": 9.756000000000001e-06, "loss": 41.3344, "step": 48780 }, { "epoch": 0.09855888686433659, "grad_norm": 386.64678955078125, "learning_rate": 9.758e-06, "loss": 38.9197, "step": 48790 }, { "epoch": 0.0985790874970204, "grad_norm": 379.62451171875, "learning_rate": 9.760000000000001e-06, "loss": 27.8786, "step": 48800 }, { "epoch": 0.09859928812970423, "grad_norm": 209.05154418945312, "learning_rate": 9.762e-06, "loss": 23.7208, "step": 48810 }, { "epoch": 0.09861948876238803, "grad_norm": 787.5438232421875, "learning_rate": 9.764000000000002e-06, "loss": 56.193, "step": 48820 }, { "epoch": 0.09863968939507185, "grad_norm": 130.70431518554688, "learning_rate": 9.766000000000001e-06, "loss": 22.1296, "step": 48830 }, { "epoch": 0.09865989002775567, "grad_norm": 163.56263732910156, "learning_rate": 9.768e-06, "loss": 41.2987, "step": 48840 }, { "epoch": 0.09868009066043948, "grad_norm": 101.71668243408203, "learning_rate": 9.770000000000001e-06, "loss": 15.8774, "step": 48850 }, { "epoch": 0.0987002912931233, "grad_norm": 193.52305603027344, "learning_rate": 9.772e-06, "loss": 16.2699, "step": 48860 }, { "epoch": 0.09872049192580712, "grad_norm": 617.1961059570312, "learning_rate": 9.774000000000002e-06, "loss": 33.2527, "step": 48870 }, { "epoch": 0.09874069255849093, "grad_norm": 81.0466079711914, "learning_rate": 9.776000000000001e-06, "loss": 37.9588, "step": 48880 }, { "epoch": 0.09876089319117475, "grad_norm": 1044.55712890625, "learning_rate": 9.778e-06, "loss": 29.393, "step": 48890 }, { "epoch": 0.09878109382385857, "grad_norm": 334.58795166015625, "learning_rate": 9.780000000000001e-06, "loss": 45.8269, "step": 48900 }, { "epoch": 0.09880129445654239, "grad_norm": 525.953857421875, "learning_rate": 9.782e-06, "loss": 32.2243, "step": 48910 }, { "epoch": 0.09882149508922619, "grad_norm": 71.38017272949219, "learning_rate": 9.784000000000002e-06, "loss": 36.192, "step": 48920 }, { "epoch": 0.09884169572191001, "grad_norm": 96.96369171142578, "learning_rate": 9.786e-06, "loss": 35.3399, "step": 48930 }, { "epoch": 0.09886189635459383, "grad_norm": 441.6774597167969, "learning_rate": 9.788e-06, "loss": 27.5023, "step": 48940 }, { "epoch": 0.09888209698727764, "grad_norm": 60.7429313659668, "learning_rate": 9.790000000000001e-06, "loss": 24.0334, "step": 48950 }, { "epoch": 0.09890229761996146, "grad_norm": 213.49717712402344, "learning_rate": 9.792e-06, "loss": 17.2507, "step": 48960 }, { "epoch": 0.09892249825264528, "grad_norm": 486.7066955566406, "learning_rate": 9.794000000000001e-06, "loss": 24.4355, "step": 48970 }, { "epoch": 0.09894269888532908, "grad_norm": 371.7529602050781, "learning_rate": 9.796e-06, "loss": 59.7438, "step": 48980 }, { "epoch": 0.0989628995180129, "grad_norm": 454.64593505859375, "learning_rate": 9.798e-06, "loss": 41.6025, "step": 48990 }, { "epoch": 0.09898310015069672, "grad_norm": 238.39202880859375, "learning_rate": 9.800000000000001e-06, "loss": 39.6442, "step": 49000 }, { "epoch": 0.09900330078338053, "grad_norm": 269.2257080078125, "learning_rate": 9.802e-06, "loss": 29.4997, "step": 49010 }, { "epoch": 0.09902350141606435, "grad_norm": 320.70294189453125, "learning_rate": 9.804000000000001e-06, "loss": 29.6055, "step": 49020 }, { "epoch": 0.09904370204874817, "grad_norm": 678.3585205078125, "learning_rate": 9.806e-06, "loss": 36.273, "step": 49030 }, { "epoch": 0.09906390268143198, "grad_norm": 130.6610565185547, "learning_rate": 9.808000000000002e-06, "loss": 17.6018, "step": 49040 }, { "epoch": 0.0990841033141158, "grad_norm": 478.6215515136719, "learning_rate": 9.810000000000001e-06, "loss": 35.1446, "step": 49050 }, { "epoch": 0.09910430394679962, "grad_norm": 151.55528259277344, "learning_rate": 9.812e-06, "loss": 19.7562, "step": 49060 }, { "epoch": 0.09912450457948344, "grad_norm": 378.3835754394531, "learning_rate": 9.814000000000001e-06, "loss": 19.8534, "step": 49070 }, { "epoch": 0.09914470521216724, "grad_norm": 284.2619934082031, "learning_rate": 9.816e-06, "loss": 34.9392, "step": 49080 }, { "epoch": 0.09916490584485106, "grad_norm": 237.59681701660156, "learning_rate": 9.818000000000002e-06, "loss": 26.6921, "step": 49090 }, { "epoch": 0.09918510647753488, "grad_norm": 225.83067321777344, "learning_rate": 9.820000000000001e-06, "loss": 26.8983, "step": 49100 }, { "epoch": 0.09920530711021869, "grad_norm": 399.5172424316406, "learning_rate": 9.822e-06, "loss": 33.0025, "step": 49110 }, { "epoch": 0.09922550774290251, "grad_norm": 399.3423767089844, "learning_rate": 9.824000000000001e-06, "loss": 32.6224, "step": 49120 }, { "epoch": 0.09924570837558633, "grad_norm": 671.6337890625, "learning_rate": 9.826e-06, "loss": 56.4198, "step": 49130 }, { "epoch": 0.09926590900827013, "grad_norm": 365.015380859375, "learning_rate": 9.828000000000001e-06, "loss": 36.155, "step": 49140 }, { "epoch": 0.09928610964095395, "grad_norm": 235.26443481445312, "learning_rate": 9.83e-06, "loss": 25.9957, "step": 49150 }, { "epoch": 0.09930631027363777, "grad_norm": 174.57179260253906, "learning_rate": 9.832e-06, "loss": 19.219, "step": 49160 }, { "epoch": 0.09932651090632158, "grad_norm": 300.6072082519531, "learning_rate": 9.834000000000001e-06, "loss": 41.7614, "step": 49170 }, { "epoch": 0.0993467115390054, "grad_norm": 304.1320495605469, "learning_rate": 9.836e-06, "loss": 31.4001, "step": 49180 }, { "epoch": 0.09936691217168922, "grad_norm": 151.56703186035156, "learning_rate": 9.838000000000001e-06, "loss": 47.1372, "step": 49190 }, { "epoch": 0.09938711280437303, "grad_norm": 537.6617431640625, "learning_rate": 9.84e-06, "loss": 29.1027, "step": 49200 }, { "epoch": 0.09940731343705685, "grad_norm": 238.98143005371094, "learning_rate": 9.842e-06, "loss": 16.4828, "step": 49210 }, { "epoch": 0.09942751406974067, "grad_norm": 240.89715576171875, "learning_rate": 9.844000000000001e-06, "loss": 30.6301, "step": 49220 }, { "epoch": 0.09944771470242449, "grad_norm": 342.465087890625, "learning_rate": 9.846000000000002e-06, "loss": 18.4708, "step": 49230 }, { "epoch": 0.09946791533510829, "grad_norm": 292.0984802246094, "learning_rate": 9.848000000000001e-06, "loss": 23.0303, "step": 49240 }, { "epoch": 0.09948811596779211, "grad_norm": 289.4315490722656, "learning_rate": 9.85e-06, "loss": 21.7864, "step": 49250 }, { "epoch": 0.09950831660047593, "grad_norm": 24.86438751220703, "learning_rate": 9.852e-06, "loss": 18.6277, "step": 49260 }, { "epoch": 0.09952851723315974, "grad_norm": 485.41912841796875, "learning_rate": 9.854000000000001e-06, "loss": 29.882, "step": 49270 }, { "epoch": 0.09954871786584356, "grad_norm": 249.60585021972656, "learning_rate": 9.856000000000002e-06, "loss": 28.5069, "step": 49280 }, { "epoch": 0.09956891849852738, "grad_norm": 92.90636444091797, "learning_rate": 9.858000000000001e-06, "loss": 24.8964, "step": 49290 }, { "epoch": 0.09958911913121118, "grad_norm": 241.29672241210938, "learning_rate": 9.86e-06, "loss": 16.9046, "step": 49300 }, { "epoch": 0.099609319763895, "grad_norm": 297.7762145996094, "learning_rate": 9.862e-06, "loss": 15.2971, "step": 49310 }, { "epoch": 0.09962952039657882, "grad_norm": 285.7322998046875, "learning_rate": 9.864e-06, "loss": 34.0328, "step": 49320 }, { "epoch": 0.09964972102926263, "grad_norm": 75.5140151977539, "learning_rate": 9.866000000000002e-06, "loss": 27.2674, "step": 49330 }, { "epoch": 0.09966992166194645, "grad_norm": 271.3094787597656, "learning_rate": 9.868000000000001e-06, "loss": 49.1859, "step": 49340 }, { "epoch": 0.09969012229463027, "grad_norm": 114.27145385742188, "learning_rate": 9.87e-06, "loss": 31.3537, "step": 49350 }, { "epoch": 0.09971032292731408, "grad_norm": 620.1126098632812, "learning_rate": 9.872e-06, "loss": 31.5118, "step": 49360 }, { "epoch": 0.0997305235599979, "grad_norm": 346.7948913574219, "learning_rate": 9.874e-06, "loss": 38.7183, "step": 49370 }, { "epoch": 0.09975072419268172, "grad_norm": 89.07073211669922, "learning_rate": 9.876000000000002e-06, "loss": 31.1577, "step": 49380 }, { "epoch": 0.09977092482536554, "grad_norm": 244.71463012695312, "learning_rate": 9.878000000000001e-06, "loss": 23.9022, "step": 49390 }, { "epoch": 0.09979112545804934, "grad_norm": 364.9339904785156, "learning_rate": 9.88e-06, "loss": 14.9031, "step": 49400 }, { "epoch": 0.09981132609073316, "grad_norm": 193.1050567626953, "learning_rate": 9.882000000000001e-06, "loss": 30.017, "step": 49410 }, { "epoch": 0.09983152672341698, "grad_norm": 184.03233337402344, "learning_rate": 9.884e-06, "loss": 27.4826, "step": 49420 }, { "epoch": 0.09985172735610079, "grad_norm": 565.5440673828125, "learning_rate": 9.886000000000002e-06, "loss": 41.4609, "step": 49430 }, { "epoch": 0.09987192798878461, "grad_norm": 276.92926025390625, "learning_rate": 9.888000000000001e-06, "loss": 23.3332, "step": 49440 }, { "epoch": 0.09989212862146843, "grad_norm": 163.4635467529297, "learning_rate": 9.89e-06, "loss": 30.5689, "step": 49450 }, { "epoch": 0.09991232925415224, "grad_norm": 102.8772964477539, "learning_rate": 9.892000000000001e-06, "loss": 20.0673, "step": 49460 }, { "epoch": 0.09993252988683606, "grad_norm": 523.088134765625, "learning_rate": 9.894e-06, "loss": 37.1003, "step": 49470 }, { "epoch": 0.09995273051951988, "grad_norm": 369.327880859375, "learning_rate": 9.896000000000001e-06, "loss": 27.1317, "step": 49480 }, { "epoch": 0.09997293115220368, "grad_norm": 839.7412719726562, "learning_rate": 9.898e-06, "loss": 32.4058, "step": 49490 }, { "epoch": 0.0999931317848875, "grad_norm": 526.5969848632812, "learning_rate": 9.9e-06, "loss": 41.0066, "step": 49500 }, { "epoch": 0.10001333241757132, "grad_norm": 537.4699096679688, "learning_rate": 9.902000000000001e-06, "loss": 28.4742, "step": 49510 }, { "epoch": 0.10003353305025513, "grad_norm": 227.6514129638672, "learning_rate": 9.904e-06, "loss": 16.1466, "step": 49520 }, { "epoch": 0.10005373368293895, "grad_norm": 74.63841247558594, "learning_rate": 9.906000000000001e-06, "loss": 21.0938, "step": 49530 }, { "epoch": 0.10007393431562277, "grad_norm": 298.52032470703125, "learning_rate": 9.908e-06, "loss": 24.204, "step": 49540 }, { "epoch": 0.10009413494830659, "grad_norm": 177.2586669921875, "learning_rate": 9.91e-06, "loss": 16.3952, "step": 49550 }, { "epoch": 0.1001143355809904, "grad_norm": 0.0, "learning_rate": 9.912000000000001e-06, "loss": 18.158, "step": 49560 }, { "epoch": 0.10013453621367421, "grad_norm": 193.31202697753906, "learning_rate": 9.914e-06, "loss": 38.8835, "step": 49570 }, { "epoch": 0.10015473684635803, "grad_norm": 602.8677978515625, "learning_rate": 9.916000000000001e-06, "loss": 31.0423, "step": 49580 }, { "epoch": 0.10017493747904184, "grad_norm": 200.57896423339844, "learning_rate": 9.918e-06, "loss": 23.06, "step": 49590 }, { "epoch": 0.10019513811172566, "grad_norm": 462.3005676269531, "learning_rate": 9.920000000000002e-06, "loss": 37.7004, "step": 49600 }, { "epoch": 0.10021533874440948, "grad_norm": 328.4309997558594, "learning_rate": 9.922000000000001e-06, "loss": 36.4841, "step": 49610 }, { "epoch": 0.10023553937709329, "grad_norm": 488.2724304199219, "learning_rate": 9.924e-06, "loss": 31.5873, "step": 49620 }, { "epoch": 0.1002557400097771, "grad_norm": 209.86187744140625, "learning_rate": 9.926000000000001e-06, "loss": 32.5883, "step": 49630 }, { "epoch": 0.10027594064246093, "grad_norm": 237.76564025878906, "learning_rate": 9.928e-06, "loss": 36.1294, "step": 49640 }, { "epoch": 0.10029614127514473, "grad_norm": 374.63824462890625, "learning_rate": 9.930000000000001e-06, "loss": 38.9708, "step": 49650 }, { "epoch": 0.10031634190782855, "grad_norm": 359.27252197265625, "learning_rate": 9.932e-06, "loss": 53.2856, "step": 49660 }, { "epoch": 0.10033654254051237, "grad_norm": 411.49365234375, "learning_rate": 9.934e-06, "loss": 28.3803, "step": 49670 }, { "epoch": 0.10035674317319618, "grad_norm": 236.98284912109375, "learning_rate": 9.936000000000001e-06, "loss": 44.0789, "step": 49680 }, { "epoch": 0.10037694380588, "grad_norm": 233.22879028320312, "learning_rate": 9.938e-06, "loss": 27.7855, "step": 49690 }, { "epoch": 0.10039714443856382, "grad_norm": 167.821044921875, "learning_rate": 9.940000000000001e-06, "loss": 19.2479, "step": 49700 }, { "epoch": 0.10041734507124764, "grad_norm": 74.50845336914062, "learning_rate": 9.942e-06, "loss": 61.4135, "step": 49710 }, { "epoch": 0.10043754570393144, "grad_norm": 343.1907043457031, "learning_rate": 9.944e-06, "loss": 24.3024, "step": 49720 }, { "epoch": 0.10045774633661526, "grad_norm": 261.58734130859375, "learning_rate": 9.946000000000001e-06, "loss": 34.6121, "step": 49730 }, { "epoch": 0.10047794696929908, "grad_norm": 284.1789855957031, "learning_rate": 9.948e-06, "loss": 38.767, "step": 49740 }, { "epoch": 0.10049814760198289, "grad_norm": 258.99444580078125, "learning_rate": 9.950000000000001e-06, "loss": 30.548, "step": 49750 }, { "epoch": 0.10051834823466671, "grad_norm": 338.01824951171875, "learning_rate": 9.952e-06, "loss": 30.1462, "step": 49760 }, { "epoch": 0.10053854886735053, "grad_norm": 110.8593521118164, "learning_rate": 9.954e-06, "loss": 20.5546, "step": 49770 }, { "epoch": 0.10055874950003434, "grad_norm": 551.5993041992188, "learning_rate": 9.956000000000001e-06, "loss": 26.6706, "step": 49780 }, { "epoch": 0.10057895013271816, "grad_norm": 211.20059204101562, "learning_rate": 9.958e-06, "loss": 38.2057, "step": 49790 }, { "epoch": 0.10059915076540198, "grad_norm": 278.45709228515625, "learning_rate": 9.960000000000001e-06, "loss": 24.5288, "step": 49800 }, { "epoch": 0.10061935139808578, "grad_norm": 317.9666748046875, "learning_rate": 9.962e-06, "loss": 19.0307, "step": 49810 }, { "epoch": 0.1006395520307696, "grad_norm": 133.56689453125, "learning_rate": 9.964e-06, "loss": 26.7845, "step": 49820 }, { "epoch": 0.10065975266345342, "grad_norm": 211.31565856933594, "learning_rate": 9.966e-06, "loss": 20.4976, "step": 49830 }, { "epoch": 0.10067995329613723, "grad_norm": 285.4066467285156, "learning_rate": 9.968000000000002e-06, "loss": 34.0572, "step": 49840 }, { "epoch": 0.10070015392882105, "grad_norm": 336.01519775390625, "learning_rate": 9.970000000000001e-06, "loss": 23.3422, "step": 49850 }, { "epoch": 0.10072035456150487, "grad_norm": 243.3843994140625, "learning_rate": 9.972e-06, "loss": 29.7194, "step": 49860 }, { "epoch": 0.10074055519418869, "grad_norm": 292.5046081542969, "learning_rate": 9.974e-06, "loss": 28.8181, "step": 49870 }, { "epoch": 0.1007607558268725, "grad_norm": 398.6467590332031, "learning_rate": 9.976e-06, "loss": 30.6082, "step": 49880 }, { "epoch": 0.10078095645955631, "grad_norm": 203.60562133789062, "learning_rate": 9.978000000000002e-06, "loss": 21.856, "step": 49890 }, { "epoch": 0.10080115709224013, "grad_norm": 507.09100341796875, "learning_rate": 9.980000000000001e-06, "loss": 27.0159, "step": 49900 }, { "epoch": 0.10082135772492394, "grad_norm": 318.009765625, "learning_rate": 9.982e-06, "loss": 35.3112, "step": 49910 }, { "epoch": 0.10084155835760776, "grad_norm": 125.39407348632812, "learning_rate": 9.984e-06, "loss": 22.5182, "step": 49920 }, { "epoch": 0.10086175899029158, "grad_norm": 131.48182678222656, "learning_rate": 9.986e-06, "loss": 17.5017, "step": 49930 }, { "epoch": 0.10088195962297539, "grad_norm": 356.50506591796875, "learning_rate": 9.988000000000002e-06, "loss": 23.632, "step": 49940 }, { "epoch": 0.1009021602556592, "grad_norm": 115.21746063232422, "learning_rate": 9.990000000000001e-06, "loss": 37.4513, "step": 49950 }, { "epoch": 0.10092236088834303, "grad_norm": 39.12056350708008, "learning_rate": 9.992e-06, "loss": 43.4576, "step": 49960 }, { "epoch": 0.10094256152102683, "grad_norm": 62.628761291503906, "learning_rate": 9.994000000000001e-06, "loss": 15.0839, "step": 49970 }, { "epoch": 0.10096276215371065, "grad_norm": 308.5050048828125, "learning_rate": 9.996e-06, "loss": 42.9877, "step": 49980 }, { "epoch": 0.10098296278639447, "grad_norm": 275.00091552734375, "learning_rate": 9.998000000000002e-06, "loss": 28.232, "step": 49990 }, { "epoch": 0.10100316341907828, "grad_norm": 139.22975158691406, "learning_rate": 1e-05, "loss": 30.6326, "step": 50000 }, { "epoch": 0.1010233640517621, "grad_norm": 471.0130310058594, "learning_rate": 9.999999987815305e-06, "loss": 23.1219, "step": 50010 }, { "epoch": 0.10104356468444592, "grad_norm": 1806.28466796875, "learning_rate": 9.999999951261215e-06, "loss": 59.6046, "step": 50020 }, { "epoch": 0.10106376531712974, "grad_norm": 288.6343688964844, "learning_rate": 9.99999989033773e-06, "loss": 44.0059, "step": 50030 }, { "epoch": 0.10108396594981355, "grad_norm": 439.6029052734375, "learning_rate": 9.999999805044853e-06, "loss": 48.1023, "step": 50040 }, { "epoch": 0.10110416658249737, "grad_norm": 142.65753173828125, "learning_rate": 9.999999695382584e-06, "loss": 31.8775, "step": 50050 }, { "epoch": 0.10112436721518119, "grad_norm": 378.08984375, "learning_rate": 9.999999561350923e-06, "loss": 19.3112, "step": 50060 }, { "epoch": 0.10114456784786499, "grad_norm": 530.3013305664062, "learning_rate": 9.99999940294987e-06, "loss": 21.2745, "step": 50070 }, { "epoch": 0.10116476848054881, "grad_norm": 257.8508605957031, "learning_rate": 9.999999220179426e-06, "loss": 21.481, "step": 50080 }, { "epoch": 0.10118496911323263, "grad_norm": 171.88690185546875, "learning_rate": 9.999999013039593e-06, "loss": 20.1977, "step": 50090 }, { "epoch": 0.10120516974591644, "grad_norm": 478.6723327636719, "learning_rate": 9.999998781530372e-06, "loss": 42.7311, "step": 50100 }, { "epoch": 0.10122537037860026, "grad_norm": 246.24720764160156, "learning_rate": 9.999998525651761e-06, "loss": 32.6225, "step": 50110 }, { "epoch": 0.10124557101128408, "grad_norm": 416.1274108886719, "learning_rate": 9.999998245403766e-06, "loss": 31.2341, "step": 50120 }, { "epoch": 0.10126577164396788, "grad_norm": 368.7269592285156, "learning_rate": 9.999997940786385e-06, "loss": 26.3761, "step": 50130 }, { "epoch": 0.1012859722766517, "grad_norm": 132.03887939453125, "learning_rate": 9.99999761179962e-06, "loss": 30.188, "step": 50140 }, { "epoch": 0.10130617290933552, "grad_norm": 241.72500610351562, "learning_rate": 9.999997258443473e-06, "loss": 17.5732, "step": 50150 }, { "epoch": 0.10132637354201933, "grad_norm": 164.09193420410156, "learning_rate": 9.999996880717946e-06, "loss": 30.8421, "step": 50160 }, { "epoch": 0.10134657417470315, "grad_norm": 122.90593719482422, "learning_rate": 9.999996478623041e-06, "loss": 42.6231, "step": 50170 }, { "epoch": 0.10136677480738697, "grad_norm": 991.2377319335938, "learning_rate": 9.99999605215876e-06, "loss": 25.8944, "step": 50180 }, { "epoch": 0.10138697544007079, "grad_norm": 108.53369140625, "learning_rate": 9.999995601325104e-06, "loss": 18.4249, "step": 50190 }, { "epoch": 0.1014071760727546, "grad_norm": 916.2879638671875, "learning_rate": 9.999995126122076e-06, "loss": 33.0909, "step": 50200 }, { "epoch": 0.10142737670543842, "grad_norm": 816.096435546875, "learning_rate": 9.999994626549678e-06, "loss": 50.3456, "step": 50210 }, { "epoch": 0.10144757733812224, "grad_norm": 95.27555847167969, "learning_rate": 9.999994102607912e-06, "loss": 19.8343, "step": 50220 }, { "epoch": 0.10146777797080604, "grad_norm": 265.289794921875, "learning_rate": 9.999993554296783e-06, "loss": 26.0474, "step": 50230 }, { "epoch": 0.10148797860348986, "grad_norm": 117.16630554199219, "learning_rate": 9.999992981616292e-06, "loss": 33.6441, "step": 50240 }, { "epoch": 0.10150817923617368, "grad_norm": 128.14080810546875, "learning_rate": 9.99999238456644e-06, "loss": 25.5422, "step": 50250 }, { "epoch": 0.10152837986885749, "grad_norm": 41.98344039916992, "learning_rate": 9.999991763147232e-06, "loss": 25.2422, "step": 50260 }, { "epoch": 0.10154858050154131, "grad_norm": 556.4345092773438, "learning_rate": 9.99999111735867e-06, "loss": 62.7839, "step": 50270 }, { "epoch": 0.10156878113422513, "grad_norm": 214.891845703125, "learning_rate": 9.999990447200758e-06, "loss": 39.6728, "step": 50280 }, { "epoch": 0.10158898176690893, "grad_norm": 188.03500366210938, "learning_rate": 9.9999897526735e-06, "loss": 28.9581, "step": 50290 }, { "epoch": 0.10160918239959275, "grad_norm": 266.3095397949219, "learning_rate": 9.999989033776898e-06, "loss": 48.701, "step": 50300 }, { "epoch": 0.10162938303227657, "grad_norm": 0.0, "learning_rate": 9.999988290510955e-06, "loss": 34.5775, "step": 50310 }, { "epoch": 0.10164958366496038, "grad_norm": 396.36395263671875, "learning_rate": 9.999987522875676e-06, "loss": 16.2926, "step": 50320 }, { "epoch": 0.1016697842976442, "grad_norm": 296.4020690917969, "learning_rate": 9.999986730871065e-06, "loss": 32.8171, "step": 50330 }, { "epoch": 0.10168998493032802, "grad_norm": 607.1825561523438, "learning_rate": 9.999985914497124e-06, "loss": 42.0191, "step": 50340 }, { "epoch": 0.10171018556301184, "grad_norm": 307.2294616699219, "learning_rate": 9.999985073753857e-06, "loss": 22.0844, "step": 50350 }, { "epoch": 0.10173038619569565, "grad_norm": 801.8738403320312, "learning_rate": 9.999984208641271e-06, "loss": 31.9706, "step": 50360 }, { "epoch": 0.10175058682837947, "grad_norm": 320.78997802734375, "learning_rate": 9.999983319159368e-06, "loss": 28.8287, "step": 50370 }, { "epoch": 0.10177078746106329, "grad_norm": 351.9096984863281, "learning_rate": 9.999982405308154e-06, "loss": 36.6011, "step": 50380 }, { "epoch": 0.10179098809374709, "grad_norm": 684.657470703125, "learning_rate": 9.999981467087629e-06, "loss": 40.7873, "step": 50390 }, { "epoch": 0.10181118872643091, "grad_norm": 187.62615966796875, "learning_rate": 9.999980504497803e-06, "loss": 20.1668, "step": 50400 }, { "epoch": 0.10183138935911473, "grad_norm": 257.9358215332031, "learning_rate": 9.999979517538677e-06, "loss": 23.1528, "step": 50410 }, { "epoch": 0.10185158999179854, "grad_norm": 278.9385070800781, "learning_rate": 9.99997850621026e-06, "loss": 33.8999, "step": 50420 }, { "epoch": 0.10187179062448236, "grad_norm": 223.14247131347656, "learning_rate": 9.999977470512551e-06, "loss": 17.4874, "step": 50430 }, { "epoch": 0.10189199125716618, "grad_norm": 382.02825927734375, "learning_rate": 9.999976410445563e-06, "loss": 14.3687, "step": 50440 }, { "epoch": 0.10191219188984998, "grad_norm": 355.7935791015625, "learning_rate": 9.999975326009292e-06, "loss": 27.5968, "step": 50450 }, { "epoch": 0.1019323925225338, "grad_norm": 420.86944580078125, "learning_rate": 9.999974217203749e-06, "loss": 34.6843, "step": 50460 }, { "epoch": 0.10195259315521762, "grad_norm": 515.8864135742188, "learning_rate": 9.999973084028938e-06, "loss": 48.7909, "step": 50470 }, { "epoch": 0.10197279378790143, "grad_norm": 1683.2763671875, "learning_rate": 9.999971926484865e-06, "loss": 65.2516, "step": 50480 }, { "epoch": 0.10199299442058525, "grad_norm": 169.98770141601562, "learning_rate": 9.999970744571534e-06, "loss": 26.0087, "step": 50490 }, { "epoch": 0.10201319505326907, "grad_norm": 435.380615234375, "learning_rate": 9.999969538288953e-06, "loss": 19.055, "step": 50500 }, { "epoch": 0.10203339568595289, "grad_norm": 265.7920837402344, "learning_rate": 9.999968307637127e-06, "loss": 23.0871, "step": 50510 }, { "epoch": 0.1020535963186367, "grad_norm": 202.9770050048828, "learning_rate": 9.999967052616061e-06, "loss": 27.4185, "step": 50520 }, { "epoch": 0.10207379695132052, "grad_norm": 228.78289794921875, "learning_rate": 9.999965773225762e-06, "loss": 54.9186, "step": 50530 }, { "epoch": 0.10209399758400434, "grad_norm": 220.6419219970703, "learning_rate": 9.999964469466236e-06, "loss": 20.843, "step": 50540 }, { "epoch": 0.10211419821668814, "grad_norm": 226.0003662109375, "learning_rate": 9.999963141337493e-06, "loss": 26.7992, "step": 50550 }, { "epoch": 0.10213439884937196, "grad_norm": 113.45968627929688, "learning_rate": 9.999961788839533e-06, "loss": 17.1383, "step": 50560 }, { "epoch": 0.10215459948205578, "grad_norm": 294.0021057128906, "learning_rate": 9.999960411972366e-06, "loss": 30.5648, "step": 50570 }, { "epoch": 0.10217480011473959, "grad_norm": 163.34014892578125, "learning_rate": 9.999959010735997e-06, "loss": 18.981, "step": 50580 }, { "epoch": 0.10219500074742341, "grad_norm": 261.8861999511719, "learning_rate": 9.999957585130438e-06, "loss": 26.4652, "step": 50590 }, { "epoch": 0.10221520138010723, "grad_norm": 72.05908966064453, "learning_rate": 9.999956135155688e-06, "loss": 14.7182, "step": 50600 }, { "epoch": 0.10223540201279104, "grad_norm": 334.1035461425781, "learning_rate": 9.999954660811761e-06, "loss": 30.3572, "step": 50610 }, { "epoch": 0.10225560264547486, "grad_norm": 164.53323364257812, "learning_rate": 9.99995316209866e-06, "loss": 29.3539, "step": 50620 }, { "epoch": 0.10227580327815868, "grad_norm": 493.40057373046875, "learning_rate": 9.999951639016396e-06, "loss": 48.8005, "step": 50630 }, { "epoch": 0.10229600391084248, "grad_norm": 264.8002624511719, "learning_rate": 9.999950091564972e-06, "loss": 29.7895, "step": 50640 }, { "epoch": 0.1023162045435263, "grad_norm": 0.0, "learning_rate": 9.999948519744397e-06, "loss": 19.5758, "step": 50650 }, { "epoch": 0.10233640517621012, "grad_norm": 398.1997375488281, "learning_rate": 9.999946923554681e-06, "loss": 36.8041, "step": 50660 }, { "epoch": 0.10235660580889394, "grad_norm": 850.4107055664062, "learning_rate": 9.99994530299583e-06, "loss": 49.9445, "step": 50670 }, { "epoch": 0.10237680644157775, "grad_norm": 143.2680206298828, "learning_rate": 9.99994365806785e-06, "loss": 30.118, "step": 50680 }, { "epoch": 0.10239700707426157, "grad_norm": 0.0, "learning_rate": 9.999941988770754e-06, "loss": 30.5931, "step": 50690 }, { "epoch": 0.10241720770694539, "grad_norm": 176.04603576660156, "learning_rate": 9.999940295104546e-06, "loss": 26.0548, "step": 50700 }, { "epoch": 0.1024374083396292, "grad_norm": 842.947509765625, "learning_rate": 9.999938577069235e-06, "loss": 39.8273, "step": 50710 }, { "epoch": 0.10245760897231301, "grad_norm": 193.65521240234375, "learning_rate": 9.99993683466483e-06, "loss": 17.9434, "step": 50720 }, { "epoch": 0.10247780960499683, "grad_norm": 421.017333984375, "learning_rate": 9.999935067891339e-06, "loss": 39.8136, "step": 50730 }, { "epoch": 0.10249801023768064, "grad_norm": 233.7952423095703, "learning_rate": 9.999933276748772e-06, "loss": 34.9791, "step": 50740 }, { "epoch": 0.10251821087036446, "grad_norm": 214.416748046875, "learning_rate": 9.999931461237135e-06, "loss": 49.5014, "step": 50750 }, { "epoch": 0.10253841150304828, "grad_norm": 0.0, "learning_rate": 9.99992962135644e-06, "loss": 23.6579, "step": 50760 }, { "epoch": 0.10255861213573209, "grad_norm": 360.38043212890625, "learning_rate": 9.999927757106693e-06, "loss": 24.3242, "step": 50770 }, { "epoch": 0.1025788127684159, "grad_norm": 237.60289001464844, "learning_rate": 9.999925868487905e-06, "loss": 17.9011, "step": 50780 }, { "epoch": 0.10259901340109973, "grad_norm": 196.97837829589844, "learning_rate": 9.999923955500085e-06, "loss": 47.13, "step": 50790 }, { "epoch": 0.10261921403378353, "grad_norm": 24.678083419799805, "learning_rate": 9.999922018143242e-06, "loss": 36.1751, "step": 50800 }, { "epoch": 0.10263941466646735, "grad_norm": 325.7549133300781, "learning_rate": 9.999920056417385e-06, "loss": 25.5162, "step": 50810 }, { "epoch": 0.10265961529915117, "grad_norm": 132.64601135253906, "learning_rate": 9.999918070322525e-06, "loss": 30.3848, "step": 50820 }, { "epoch": 0.10267981593183499, "grad_norm": 456.0604553222656, "learning_rate": 9.999916059858669e-06, "loss": 22.6282, "step": 50830 }, { "epoch": 0.1027000165645188, "grad_norm": 433.6499328613281, "learning_rate": 9.999914025025831e-06, "loss": 32.5785, "step": 50840 }, { "epoch": 0.10272021719720262, "grad_norm": 250.8369140625, "learning_rate": 9.999911965824018e-06, "loss": 23.2193, "step": 50850 }, { "epoch": 0.10274041782988644, "grad_norm": 206.635986328125, "learning_rate": 9.99990988225324e-06, "loss": 16.0045, "step": 50860 }, { "epoch": 0.10276061846257024, "grad_norm": 4.674332141876221, "learning_rate": 9.999907774313507e-06, "loss": 38.8995, "step": 50870 }, { "epoch": 0.10278081909525406, "grad_norm": 313.18328857421875, "learning_rate": 9.99990564200483e-06, "loss": 29.5193, "step": 50880 }, { "epoch": 0.10280101972793788, "grad_norm": 0.0, "learning_rate": 9.999903485327221e-06, "loss": 30.8923, "step": 50890 }, { "epoch": 0.10282122036062169, "grad_norm": 270.57373046875, "learning_rate": 9.999901304280686e-06, "loss": 33.3304, "step": 50900 }, { "epoch": 0.10284142099330551, "grad_norm": 228.66285705566406, "learning_rate": 9.99989909886524e-06, "loss": 23.2465, "step": 50910 }, { "epoch": 0.10286162162598933, "grad_norm": 260.7828063964844, "learning_rate": 9.999896869080893e-06, "loss": 33.4242, "step": 50920 }, { "epoch": 0.10288182225867314, "grad_norm": 329.14532470703125, "learning_rate": 9.999894614927655e-06, "loss": 42.0958, "step": 50930 }, { "epoch": 0.10290202289135696, "grad_norm": 198.07843017578125, "learning_rate": 9.999892336405534e-06, "loss": 27.7986, "step": 50940 }, { "epoch": 0.10292222352404078, "grad_norm": 301.6722106933594, "learning_rate": 9.999890033514547e-06, "loss": 17.6956, "step": 50950 }, { "epoch": 0.10294242415672458, "grad_norm": 437.3856506347656, "learning_rate": 9.999887706254703e-06, "loss": 25.4234, "step": 50960 }, { "epoch": 0.1029626247894084, "grad_norm": 284.0990905761719, "learning_rate": 9.999885354626011e-06, "loss": 22.3443, "step": 50970 }, { "epoch": 0.10298282542209222, "grad_norm": 174.0272979736328, "learning_rate": 9.999882978628485e-06, "loss": 20.16, "step": 50980 }, { "epoch": 0.10300302605477604, "grad_norm": 1048.8843994140625, "learning_rate": 9.999880578262135e-06, "loss": 39.4205, "step": 50990 }, { "epoch": 0.10302322668745985, "grad_norm": 468.3249206542969, "learning_rate": 9.999878153526974e-06, "loss": 50.4241, "step": 51000 }, { "epoch": 0.10304342732014367, "grad_norm": 190.69573974609375, "learning_rate": 9.999875704423015e-06, "loss": 21.9767, "step": 51010 }, { "epoch": 0.10306362795282749, "grad_norm": 46.21951675415039, "learning_rate": 9.999873230950265e-06, "loss": 25.4578, "step": 51020 }, { "epoch": 0.1030838285855113, "grad_norm": 74.81370544433594, "learning_rate": 9.99987073310874e-06, "loss": 44.2168, "step": 51030 }, { "epoch": 0.10310402921819511, "grad_norm": 200.86611938476562, "learning_rate": 9.999868210898454e-06, "loss": 22.0225, "step": 51040 }, { "epoch": 0.10312422985087893, "grad_norm": 198.10902404785156, "learning_rate": 9.999865664319414e-06, "loss": 54.9293, "step": 51050 }, { "epoch": 0.10314443048356274, "grad_norm": 166.8558807373047, "learning_rate": 9.999863093371638e-06, "loss": 35.8511, "step": 51060 }, { "epoch": 0.10316463111624656, "grad_norm": 117.50363159179688, "learning_rate": 9.999860498055134e-06, "loss": 25.3519, "step": 51070 }, { "epoch": 0.10318483174893038, "grad_norm": 494.44097900390625, "learning_rate": 9.999857878369917e-06, "loss": 52.0224, "step": 51080 }, { "epoch": 0.10320503238161419, "grad_norm": 329.0636291503906, "learning_rate": 9.999855234315997e-06, "loss": 27.5441, "step": 51090 }, { "epoch": 0.103225233014298, "grad_norm": 298.48773193359375, "learning_rate": 9.99985256589339e-06, "loss": 48.5755, "step": 51100 }, { "epoch": 0.10324543364698183, "grad_norm": 70.40919494628906, "learning_rate": 9.999849873102108e-06, "loss": 18.4243, "step": 51110 }, { "epoch": 0.10326563427966563, "grad_norm": 258.7916564941406, "learning_rate": 9.999847155942165e-06, "loss": 17.6642, "step": 51120 }, { "epoch": 0.10328583491234945, "grad_norm": 462.6172180175781, "learning_rate": 9.999844414413574e-06, "loss": 25.2386, "step": 51130 }, { "epoch": 0.10330603554503327, "grad_norm": 0.0, "learning_rate": 9.999841648516347e-06, "loss": 14.8295, "step": 51140 }, { "epoch": 0.10332623617771709, "grad_norm": 249.71768188476562, "learning_rate": 9.999838858250497e-06, "loss": 23.2043, "step": 51150 }, { "epoch": 0.1033464368104009, "grad_norm": 304.3594055175781, "learning_rate": 9.99983604361604e-06, "loss": 26.7364, "step": 51160 }, { "epoch": 0.10336663744308472, "grad_norm": 470.1942138671875, "learning_rate": 9.999833204612988e-06, "loss": 33.3088, "step": 51170 }, { "epoch": 0.10338683807576854, "grad_norm": 388.824462890625, "learning_rate": 9.999830341241354e-06, "loss": 25.6821, "step": 51180 }, { "epoch": 0.10340703870845235, "grad_norm": 350.3390808105469, "learning_rate": 9.999827453501156e-06, "loss": 32.7862, "step": 51190 }, { "epoch": 0.10342723934113617, "grad_norm": 423.8389587402344, "learning_rate": 9.999824541392404e-06, "loss": 22.668, "step": 51200 }, { "epoch": 0.10344743997381999, "grad_norm": 301.91351318359375, "learning_rate": 9.999821604915114e-06, "loss": 31.6929, "step": 51210 }, { "epoch": 0.10346764060650379, "grad_norm": 170.9901580810547, "learning_rate": 9.999818644069299e-06, "loss": 19.7407, "step": 51220 }, { "epoch": 0.10348784123918761, "grad_norm": 267.2139587402344, "learning_rate": 9.999815658854976e-06, "loss": 20.7972, "step": 51230 }, { "epoch": 0.10350804187187143, "grad_norm": 447.11505126953125, "learning_rate": 9.999812649272157e-06, "loss": 26.804, "step": 51240 }, { "epoch": 0.10352824250455524, "grad_norm": 280.6558837890625, "learning_rate": 9.999809615320857e-06, "loss": 29.0669, "step": 51250 }, { "epoch": 0.10354844313723906, "grad_norm": 403.64471435546875, "learning_rate": 9.999806557001092e-06, "loss": 34.4389, "step": 51260 }, { "epoch": 0.10356864376992288, "grad_norm": 158.74339294433594, "learning_rate": 9.999803474312877e-06, "loss": 45.7292, "step": 51270 }, { "epoch": 0.10358884440260668, "grad_norm": 1125.2117919921875, "learning_rate": 9.999800367256225e-06, "loss": 47.5907, "step": 51280 }, { "epoch": 0.1036090450352905, "grad_norm": 196.57582092285156, "learning_rate": 9.999797235831153e-06, "loss": 25.6336, "step": 51290 }, { "epoch": 0.10362924566797432, "grad_norm": 47.85155487060547, "learning_rate": 9.999794080037675e-06, "loss": 27.5896, "step": 51300 }, { "epoch": 0.10364944630065814, "grad_norm": 637.5588989257812, "learning_rate": 9.999790899875807e-06, "loss": 33.3999, "step": 51310 }, { "epoch": 0.10366964693334195, "grad_norm": 411.1919250488281, "learning_rate": 9.999787695345565e-06, "loss": 29.2971, "step": 51320 }, { "epoch": 0.10368984756602577, "grad_norm": 406.6997375488281, "learning_rate": 9.999784466446965e-06, "loss": 30.1459, "step": 51330 }, { "epoch": 0.10371004819870959, "grad_norm": 341.4475402832031, "learning_rate": 9.99978121318002e-06, "loss": 20.1846, "step": 51340 }, { "epoch": 0.1037302488313934, "grad_norm": 539.531982421875, "learning_rate": 9.99977793554475e-06, "loss": 14.1709, "step": 51350 }, { "epoch": 0.10375044946407722, "grad_norm": 110.14012908935547, "learning_rate": 9.999774633541169e-06, "loss": 20.2737, "step": 51360 }, { "epoch": 0.10377065009676104, "grad_norm": 376.2015075683594, "learning_rate": 9.999771307169291e-06, "loss": 30.2862, "step": 51370 }, { "epoch": 0.10379085072944484, "grad_norm": 310.2348327636719, "learning_rate": 9.999767956429135e-06, "loss": 28.0031, "step": 51380 }, { "epoch": 0.10381105136212866, "grad_norm": 151.3242645263672, "learning_rate": 9.999764581320714e-06, "loss": 35.0257, "step": 51390 }, { "epoch": 0.10383125199481248, "grad_norm": 0.0, "learning_rate": 9.99976118184405e-06, "loss": 24.3201, "step": 51400 }, { "epoch": 0.10385145262749629, "grad_norm": 412.5196838378906, "learning_rate": 9.999757757999155e-06, "loss": 24.0532, "step": 51410 }, { "epoch": 0.10387165326018011, "grad_norm": 276.8865051269531, "learning_rate": 9.999754309786047e-06, "loss": 26.966, "step": 51420 }, { "epoch": 0.10389185389286393, "grad_norm": 473.754638671875, "learning_rate": 9.999750837204743e-06, "loss": 24.6135, "step": 51430 }, { "epoch": 0.10391205452554773, "grad_norm": 15.540340423583984, "learning_rate": 9.99974734025526e-06, "loss": 15.8732, "step": 51440 }, { "epoch": 0.10393225515823155, "grad_norm": 576.4054565429688, "learning_rate": 9.999743818937614e-06, "loss": 37.8242, "step": 51450 }, { "epoch": 0.10395245579091537, "grad_norm": 136.3487548828125, "learning_rate": 9.999740273251824e-06, "loss": 51.881, "step": 51460 }, { "epoch": 0.1039726564235992, "grad_norm": 533.147216796875, "learning_rate": 9.999736703197907e-06, "loss": 25.8817, "step": 51470 }, { "epoch": 0.103992857056283, "grad_norm": 352.6027526855469, "learning_rate": 9.999733108775878e-06, "loss": 28.3331, "step": 51480 }, { "epoch": 0.10401305768896682, "grad_norm": 253.3265380859375, "learning_rate": 9.999729489985757e-06, "loss": 19.8205, "step": 51490 }, { "epoch": 0.10403325832165064, "grad_norm": 531.1339111328125, "learning_rate": 9.999725846827562e-06, "loss": 37.5408, "step": 51500 }, { "epoch": 0.10405345895433445, "grad_norm": 645.356689453125, "learning_rate": 9.999722179301309e-06, "loss": 32.6811, "step": 51510 }, { "epoch": 0.10407365958701827, "grad_norm": 829.6427612304688, "learning_rate": 9.999718487407015e-06, "loss": 39.8614, "step": 51520 }, { "epoch": 0.10409386021970209, "grad_norm": 229.8744659423828, "learning_rate": 9.9997147711447e-06, "loss": 22.0126, "step": 51530 }, { "epoch": 0.10411406085238589, "grad_norm": 495.3528747558594, "learning_rate": 9.999711030514383e-06, "loss": 37.0498, "step": 51540 }, { "epoch": 0.10413426148506971, "grad_norm": 214.57911682128906, "learning_rate": 9.99970726551608e-06, "loss": 14.6295, "step": 51550 }, { "epoch": 0.10415446211775353, "grad_norm": 334.7769470214844, "learning_rate": 9.999703476149808e-06, "loss": 25.496, "step": 51560 }, { "epoch": 0.10417466275043734, "grad_norm": 512.3464965820312, "learning_rate": 9.999699662415592e-06, "loss": 29.9426, "step": 51570 }, { "epoch": 0.10419486338312116, "grad_norm": 850.2612915039062, "learning_rate": 9.999695824313443e-06, "loss": 25.0559, "step": 51580 }, { "epoch": 0.10421506401580498, "grad_norm": 210.4770050048828, "learning_rate": 9.999691961843385e-06, "loss": 31.2893, "step": 51590 }, { "epoch": 0.10423526464848878, "grad_norm": 220.5719451904297, "learning_rate": 9.999688075005434e-06, "loss": 38.7006, "step": 51600 }, { "epoch": 0.1042554652811726, "grad_norm": 279.3293151855469, "learning_rate": 9.999684163799609e-06, "loss": 23.7144, "step": 51610 }, { "epoch": 0.10427566591385642, "grad_norm": 244.04415893554688, "learning_rate": 9.99968022822593e-06, "loss": 21.0691, "step": 51620 }, { "epoch": 0.10429586654654023, "grad_norm": 205.00442504882812, "learning_rate": 9.999676268284416e-06, "loss": 19.3602, "step": 51630 }, { "epoch": 0.10431606717922405, "grad_norm": 176.70343017578125, "learning_rate": 9.999672283975085e-06, "loss": 53.0338, "step": 51640 }, { "epoch": 0.10433626781190787, "grad_norm": 194.0509490966797, "learning_rate": 9.99966827529796e-06, "loss": 50.3281, "step": 51650 }, { "epoch": 0.10435646844459169, "grad_norm": 92.40589141845703, "learning_rate": 9.999664242253058e-06, "loss": 22.219, "step": 51660 }, { "epoch": 0.1043766690772755, "grad_norm": 512.8795166015625, "learning_rate": 9.999660184840398e-06, "loss": 52.871, "step": 51670 }, { "epoch": 0.10439686970995932, "grad_norm": 223.3532257080078, "learning_rate": 9.999656103060001e-06, "loss": 23.4778, "step": 51680 }, { "epoch": 0.10441707034264314, "grad_norm": 189.29037475585938, "learning_rate": 9.999651996911886e-06, "loss": 26.703, "step": 51690 }, { "epoch": 0.10443727097532694, "grad_norm": 253.76043701171875, "learning_rate": 9.999647866396073e-06, "loss": 19.2253, "step": 51700 }, { "epoch": 0.10445747160801076, "grad_norm": 215.17721557617188, "learning_rate": 9.999643711512586e-06, "loss": 29.436, "step": 51710 }, { "epoch": 0.10447767224069458, "grad_norm": 67.32207489013672, "learning_rate": 9.999639532261438e-06, "loss": 20.8944, "step": 51720 }, { "epoch": 0.10449787287337839, "grad_norm": 745.7921752929688, "learning_rate": 9.999635328642655e-06, "loss": 34.7835, "step": 51730 }, { "epoch": 0.10451807350606221, "grad_norm": 0.0, "learning_rate": 9.999631100656255e-06, "loss": 25.8605, "step": 51740 }, { "epoch": 0.10453827413874603, "grad_norm": 487.88037109375, "learning_rate": 9.999626848302261e-06, "loss": 28.454, "step": 51750 }, { "epoch": 0.10455847477142984, "grad_norm": 257.8740539550781, "learning_rate": 9.99962257158069e-06, "loss": 40.0581, "step": 51760 }, { "epoch": 0.10457867540411366, "grad_norm": 631.4549560546875, "learning_rate": 9.999618270491567e-06, "loss": 23.5193, "step": 51770 }, { "epoch": 0.10459887603679748, "grad_norm": 359.64056396484375, "learning_rate": 9.999613945034909e-06, "loss": 42.8022, "step": 51780 }, { "epoch": 0.10461907666948128, "grad_norm": 386.89959716796875, "learning_rate": 9.999609595210743e-06, "loss": 12.5305, "step": 51790 }, { "epoch": 0.1046392773021651, "grad_norm": 388.4532775878906, "learning_rate": 9.999605221019082e-06, "loss": 51.0667, "step": 51800 }, { "epoch": 0.10465947793484892, "grad_norm": 86.2433853149414, "learning_rate": 9.999600822459952e-06, "loss": 18.3662, "step": 51810 }, { "epoch": 0.10467967856753274, "grad_norm": 25.467668533325195, "learning_rate": 9.999596399533375e-06, "loss": 40.2428, "step": 51820 }, { "epoch": 0.10469987920021655, "grad_norm": 168.73619079589844, "learning_rate": 9.999591952239371e-06, "loss": 17.2677, "step": 51830 }, { "epoch": 0.10472007983290037, "grad_norm": 581.0770874023438, "learning_rate": 9.999587480577964e-06, "loss": 23.5776, "step": 51840 }, { "epoch": 0.10474028046558419, "grad_norm": 332.0441589355469, "learning_rate": 9.999582984549172e-06, "loss": 27.7383, "step": 51850 }, { "epoch": 0.104760481098268, "grad_norm": 116.67573547363281, "learning_rate": 9.99957846415302e-06, "loss": 18.4031, "step": 51860 }, { "epoch": 0.10478068173095181, "grad_norm": 632.2677612304688, "learning_rate": 9.999573919389527e-06, "loss": 32.0135, "step": 51870 }, { "epoch": 0.10480088236363563, "grad_norm": 238.19703674316406, "learning_rate": 9.999569350258717e-06, "loss": 42.3347, "step": 51880 }, { "epoch": 0.10482108299631944, "grad_norm": 2599.101806640625, "learning_rate": 9.999564756760616e-06, "loss": 44.25, "step": 51890 }, { "epoch": 0.10484128362900326, "grad_norm": 234.98065185546875, "learning_rate": 9.999560138895238e-06, "loss": 29.4408, "step": 51900 }, { "epoch": 0.10486148426168708, "grad_norm": 287.81109619140625, "learning_rate": 9.999555496662614e-06, "loss": 26.2337, "step": 51910 }, { "epoch": 0.10488168489437089, "grad_norm": 248.37091064453125, "learning_rate": 9.999550830062762e-06, "loss": 34.4246, "step": 51920 }, { "epoch": 0.1049018855270547, "grad_norm": 275.33648681640625, "learning_rate": 9.999546139095706e-06, "loss": 18.09, "step": 51930 }, { "epoch": 0.10492208615973853, "grad_norm": 253.08572387695312, "learning_rate": 9.999541423761468e-06, "loss": 29.6582, "step": 51940 }, { "epoch": 0.10494228679242233, "grad_norm": 208.5484161376953, "learning_rate": 9.999536684060071e-06, "loss": 36.9299, "step": 51950 }, { "epoch": 0.10496248742510615, "grad_norm": 603.1412353515625, "learning_rate": 9.999531919991538e-06, "loss": 32.8705, "step": 51960 }, { "epoch": 0.10498268805778997, "grad_norm": 343.6145324707031, "learning_rate": 9.999527131555894e-06, "loss": 26.0192, "step": 51970 }, { "epoch": 0.10500288869047379, "grad_norm": 324.5255126953125, "learning_rate": 9.99952231875316e-06, "loss": 33.7823, "step": 51980 }, { "epoch": 0.1050230893231576, "grad_norm": 203.3389892578125, "learning_rate": 9.999517481583363e-06, "loss": 17.2607, "step": 51990 }, { "epoch": 0.10504328995584142, "grad_norm": 328.50091552734375, "learning_rate": 9.999512620046523e-06, "loss": 18.514, "step": 52000 }, { "epoch": 0.10506349058852524, "grad_norm": 5.304311275482178, "learning_rate": 9.999507734142663e-06, "loss": 24.9925, "step": 52010 }, { "epoch": 0.10508369122120904, "grad_norm": 188.5509033203125, "learning_rate": 9.999502823871809e-06, "loss": 15.0527, "step": 52020 }, { "epoch": 0.10510389185389286, "grad_norm": 325.5667419433594, "learning_rate": 9.999497889233987e-06, "loss": 14.9754, "step": 52030 }, { "epoch": 0.10512409248657668, "grad_norm": 147.8154754638672, "learning_rate": 9.999492930229217e-06, "loss": 22.9097, "step": 52040 }, { "epoch": 0.10514429311926049, "grad_norm": 100.01856231689453, "learning_rate": 9.999487946857526e-06, "loss": 29.5962, "step": 52050 }, { "epoch": 0.10516449375194431, "grad_norm": 314.3624267578125, "learning_rate": 9.999482939118936e-06, "loss": 21.121, "step": 52060 }, { "epoch": 0.10518469438462813, "grad_norm": 504.4967041015625, "learning_rate": 9.999477907013473e-06, "loss": 41.1061, "step": 52070 }, { "epoch": 0.10520489501731194, "grad_norm": 363.31439208984375, "learning_rate": 9.999472850541161e-06, "loss": 31.1386, "step": 52080 }, { "epoch": 0.10522509564999576, "grad_norm": 341.00506591796875, "learning_rate": 9.999467769702023e-06, "loss": 25.7745, "step": 52090 }, { "epoch": 0.10524529628267958, "grad_norm": 325.4628601074219, "learning_rate": 9.999462664496088e-06, "loss": 17.0715, "step": 52100 }, { "epoch": 0.10526549691536338, "grad_norm": 440.8080749511719, "learning_rate": 9.999457534923377e-06, "loss": 29.039, "step": 52110 }, { "epoch": 0.1052856975480472, "grad_norm": 257.4093933105469, "learning_rate": 9.999452380983915e-06, "loss": 29.5857, "step": 52120 }, { "epoch": 0.10530589818073102, "grad_norm": 199.72740173339844, "learning_rate": 9.999447202677732e-06, "loss": 26.7195, "step": 52130 }, { "epoch": 0.10532609881341484, "grad_norm": 266.7873840332031, "learning_rate": 9.999442000004848e-06, "loss": 24.3691, "step": 52140 }, { "epoch": 0.10534629944609865, "grad_norm": 363.8753356933594, "learning_rate": 9.99943677296529e-06, "loss": 34.7164, "step": 52150 }, { "epoch": 0.10536650007878247, "grad_norm": 203.3870849609375, "learning_rate": 9.999431521559081e-06, "loss": 33.541, "step": 52160 }, { "epoch": 0.10538670071146629, "grad_norm": 349.9403381347656, "learning_rate": 9.999426245786253e-06, "loss": 14.5201, "step": 52170 }, { "epoch": 0.1054069013441501, "grad_norm": 817.1277465820312, "learning_rate": 9.999420945646828e-06, "loss": 38.6919, "step": 52180 }, { "epoch": 0.10542710197683391, "grad_norm": 324.4822692871094, "learning_rate": 9.99941562114083e-06, "loss": 13.4006, "step": 52190 }, { "epoch": 0.10544730260951773, "grad_norm": 176.04063415527344, "learning_rate": 9.999410272268285e-06, "loss": 20.2245, "step": 52200 }, { "epoch": 0.10546750324220154, "grad_norm": 164.1830596923828, "learning_rate": 9.999404899029222e-06, "loss": 27.652, "step": 52210 }, { "epoch": 0.10548770387488536, "grad_norm": 391.5329284667969, "learning_rate": 9.999399501423667e-06, "loss": 27.9447, "step": 52220 }, { "epoch": 0.10550790450756918, "grad_norm": 118.47169494628906, "learning_rate": 9.999394079451643e-06, "loss": 20.1754, "step": 52230 }, { "epoch": 0.10552810514025299, "grad_norm": 623.3313598632812, "learning_rate": 9.99938863311318e-06, "loss": 16.3596, "step": 52240 }, { "epoch": 0.1055483057729368, "grad_norm": 316.72119140625, "learning_rate": 9.999383162408303e-06, "loss": 21.6291, "step": 52250 }, { "epoch": 0.10556850640562063, "grad_norm": 316.44580078125, "learning_rate": 9.99937766733704e-06, "loss": 23.7353, "step": 52260 }, { "epoch": 0.10558870703830443, "grad_norm": 243.25796508789062, "learning_rate": 9.999372147899416e-06, "loss": 13.5009, "step": 52270 }, { "epoch": 0.10560890767098825, "grad_norm": 102.94891357421875, "learning_rate": 9.999366604095458e-06, "loss": 45.5807, "step": 52280 }, { "epoch": 0.10562910830367207, "grad_norm": 1227.4219970703125, "learning_rate": 9.999361035925193e-06, "loss": 49.065, "step": 52290 }, { "epoch": 0.10564930893635589, "grad_norm": 419.87139892578125, "learning_rate": 9.999355443388649e-06, "loss": 40.9676, "step": 52300 }, { "epoch": 0.1056695095690397, "grad_norm": 418.601806640625, "learning_rate": 9.999349826485854e-06, "loss": 26.3758, "step": 52310 }, { "epoch": 0.10568971020172352, "grad_norm": 253.9137725830078, "learning_rate": 9.999344185216833e-06, "loss": 33.5302, "step": 52320 }, { "epoch": 0.10570991083440734, "grad_norm": 644.6627807617188, "learning_rate": 9.999338519581616e-06, "loss": 35.0133, "step": 52330 }, { "epoch": 0.10573011146709115, "grad_norm": 275.2368469238281, "learning_rate": 9.999332829580227e-06, "loss": 29.1556, "step": 52340 }, { "epoch": 0.10575031209977497, "grad_norm": 198.24954223632812, "learning_rate": 9.999327115212698e-06, "loss": 19.2075, "step": 52350 }, { "epoch": 0.10577051273245879, "grad_norm": 168.063720703125, "learning_rate": 9.999321376479054e-06, "loss": 23.2625, "step": 52360 }, { "epoch": 0.10579071336514259, "grad_norm": 202.45333862304688, "learning_rate": 9.999315613379326e-06, "loss": 21.0214, "step": 52370 }, { "epoch": 0.10581091399782641, "grad_norm": 184.0305633544922, "learning_rate": 9.999309825913538e-06, "loss": 23.3164, "step": 52380 }, { "epoch": 0.10583111463051023, "grad_norm": 528.8081665039062, "learning_rate": 9.999304014081721e-06, "loss": 29.0037, "step": 52390 }, { "epoch": 0.10585131526319404, "grad_norm": 93.98223876953125, "learning_rate": 9.999298177883902e-06, "loss": 22.6967, "step": 52400 }, { "epoch": 0.10587151589587786, "grad_norm": 1776.77587890625, "learning_rate": 9.999292317320112e-06, "loss": 14.8197, "step": 52410 }, { "epoch": 0.10589171652856168, "grad_norm": 465.3815612792969, "learning_rate": 9.999286432390376e-06, "loss": 42.1874, "step": 52420 }, { "epoch": 0.10591191716124548, "grad_norm": 564.4176025390625, "learning_rate": 9.999280523094724e-06, "loss": 23.6701, "step": 52430 }, { "epoch": 0.1059321177939293, "grad_norm": 97.61070251464844, "learning_rate": 9.999274589433186e-06, "loss": 35.1113, "step": 52440 }, { "epoch": 0.10595231842661312, "grad_norm": 402.47259521484375, "learning_rate": 9.99926863140579e-06, "loss": 29.0945, "step": 52450 }, { "epoch": 0.10597251905929694, "grad_norm": 349.5125732421875, "learning_rate": 9.999262649012564e-06, "loss": 24.3517, "step": 52460 }, { "epoch": 0.10599271969198075, "grad_norm": 180.43035888671875, "learning_rate": 9.99925664225354e-06, "loss": 10.7036, "step": 52470 }, { "epoch": 0.10601292032466457, "grad_norm": 401.3384094238281, "learning_rate": 9.999250611128743e-06, "loss": 30.3671, "step": 52480 }, { "epoch": 0.10603312095734839, "grad_norm": 198.52703857421875, "learning_rate": 9.999244555638205e-06, "loss": 13.332, "step": 52490 }, { "epoch": 0.1060533215900322, "grad_norm": 367.86077880859375, "learning_rate": 9.999238475781957e-06, "loss": 11.3917, "step": 52500 }, { "epoch": 0.10607352222271602, "grad_norm": 100.33647155761719, "learning_rate": 9.999232371560027e-06, "loss": 14.9774, "step": 52510 }, { "epoch": 0.10609372285539984, "grad_norm": 179.84249877929688, "learning_rate": 9.999226242972445e-06, "loss": 12.0446, "step": 52520 }, { "epoch": 0.10611392348808364, "grad_norm": 286.1056213378906, "learning_rate": 9.999220090019238e-06, "loss": 31.4821, "step": 52530 }, { "epoch": 0.10613412412076746, "grad_norm": 555.4896850585938, "learning_rate": 9.99921391270044e-06, "loss": 39.6218, "step": 52540 }, { "epoch": 0.10615432475345128, "grad_norm": 488.5726013183594, "learning_rate": 9.999207711016081e-06, "loss": 22.1534, "step": 52550 }, { "epoch": 0.10617452538613509, "grad_norm": 220.59344482421875, "learning_rate": 9.999201484966188e-06, "loss": 17.941, "step": 52560 }, { "epoch": 0.10619472601881891, "grad_norm": 234.64988708496094, "learning_rate": 9.999195234550796e-06, "loss": 21.7613, "step": 52570 }, { "epoch": 0.10621492665150273, "grad_norm": 404.691162109375, "learning_rate": 9.99918895976993e-06, "loss": 38.7944, "step": 52580 }, { "epoch": 0.10623512728418653, "grad_norm": 301.55743408203125, "learning_rate": 9.999182660623625e-06, "loss": 27.4092, "step": 52590 }, { "epoch": 0.10625532791687035, "grad_norm": 534.3307495117188, "learning_rate": 9.999176337111908e-06, "loss": 31.8385, "step": 52600 }, { "epoch": 0.10627552854955417, "grad_norm": 344.15435791015625, "learning_rate": 9.999169989234815e-06, "loss": 20.1863, "step": 52610 }, { "epoch": 0.106295729182238, "grad_norm": 377.4220886230469, "learning_rate": 9.999163616992371e-06, "loss": 28.109, "step": 52620 }, { "epoch": 0.1063159298149218, "grad_norm": 29.229249954223633, "learning_rate": 9.999157220384612e-06, "loss": 12.606, "step": 52630 }, { "epoch": 0.10633613044760562, "grad_norm": 637.4224243164062, "learning_rate": 9.999150799411565e-06, "loss": 28.3209, "step": 52640 }, { "epoch": 0.10635633108028944, "grad_norm": 700.8707275390625, "learning_rate": 9.999144354073264e-06, "loss": 46.3826, "step": 52650 }, { "epoch": 0.10637653171297325, "grad_norm": 437.7762451171875, "learning_rate": 9.999137884369741e-06, "loss": 44.2207, "step": 52660 }, { "epoch": 0.10639673234565707, "grad_norm": 331.6518859863281, "learning_rate": 9.999131390301027e-06, "loss": 24.0271, "step": 52670 }, { "epoch": 0.10641693297834089, "grad_norm": 439.0265197753906, "learning_rate": 9.99912487186715e-06, "loss": 32.7539, "step": 52680 }, { "epoch": 0.10643713361102469, "grad_norm": 473.2354431152344, "learning_rate": 9.999118329068148e-06, "loss": 28.3734, "step": 52690 }, { "epoch": 0.10645733424370851, "grad_norm": 290.6955261230469, "learning_rate": 9.999111761904046e-06, "loss": 46.8339, "step": 52700 }, { "epoch": 0.10647753487639233, "grad_norm": 196.3876190185547, "learning_rate": 9.999105170374881e-06, "loss": 18.8935, "step": 52710 }, { "epoch": 0.10649773550907614, "grad_norm": 515.7637329101562, "learning_rate": 9.999098554480685e-06, "loss": 17.2101, "step": 52720 }, { "epoch": 0.10651793614175996, "grad_norm": 0.0, "learning_rate": 9.999091914221487e-06, "loss": 21.925, "step": 52730 }, { "epoch": 0.10653813677444378, "grad_norm": 582.2947387695312, "learning_rate": 9.999085249597322e-06, "loss": 20.8303, "step": 52740 }, { "epoch": 0.10655833740712758, "grad_norm": 75.26732635498047, "learning_rate": 9.999078560608221e-06, "loss": 26.7147, "step": 52750 }, { "epoch": 0.1065785380398114, "grad_norm": 131.88365173339844, "learning_rate": 9.999071847254219e-06, "loss": 21.8251, "step": 52760 }, { "epoch": 0.10659873867249522, "grad_norm": 717.3924560546875, "learning_rate": 9.999065109535346e-06, "loss": 38.4199, "step": 52770 }, { "epoch": 0.10661893930517904, "grad_norm": 165.60885620117188, "learning_rate": 9.999058347451638e-06, "loss": 38.605, "step": 52780 }, { "epoch": 0.10663913993786285, "grad_norm": 116.25701904296875, "learning_rate": 9.999051561003124e-06, "loss": 29.3475, "step": 52790 }, { "epoch": 0.10665934057054667, "grad_norm": 269.235107421875, "learning_rate": 9.99904475018984e-06, "loss": 18.2689, "step": 52800 }, { "epoch": 0.10667954120323049, "grad_norm": 26.602397918701172, "learning_rate": 9.999037915011819e-06, "loss": 27.1056, "step": 52810 }, { "epoch": 0.1066997418359143, "grad_norm": 202.05877685546875, "learning_rate": 9.999031055469091e-06, "loss": 29.2349, "step": 52820 }, { "epoch": 0.10671994246859812, "grad_norm": 139.73046875, "learning_rate": 9.999024171561693e-06, "loss": 32.1607, "step": 52830 }, { "epoch": 0.10674014310128194, "grad_norm": 217.9608917236328, "learning_rate": 9.999017263289656e-06, "loss": 30.1523, "step": 52840 }, { "epoch": 0.10676034373396574, "grad_norm": 611.6710205078125, "learning_rate": 9.999010330653019e-06, "loss": 27.6382, "step": 52850 }, { "epoch": 0.10678054436664956, "grad_norm": 170.50779724121094, "learning_rate": 9.999003373651809e-06, "loss": 23.5909, "step": 52860 }, { "epoch": 0.10680074499933338, "grad_norm": 114.28993225097656, "learning_rate": 9.998996392286062e-06, "loss": 26.2649, "step": 52870 }, { "epoch": 0.10682094563201719, "grad_norm": 278.091064453125, "learning_rate": 9.998989386555815e-06, "loss": 44.7148, "step": 52880 }, { "epoch": 0.10684114626470101, "grad_norm": 300.4612121582031, "learning_rate": 9.9989823564611e-06, "loss": 28.6721, "step": 52890 }, { "epoch": 0.10686134689738483, "grad_norm": 0.0, "learning_rate": 9.99897530200195e-06, "loss": 21.801, "step": 52900 }, { "epoch": 0.10688154753006864, "grad_norm": 474.3299255371094, "learning_rate": 9.998968223178402e-06, "loss": 28.6565, "step": 52910 }, { "epoch": 0.10690174816275246, "grad_norm": 565.7369384765625, "learning_rate": 9.99896111999049e-06, "loss": 29.6718, "step": 52920 }, { "epoch": 0.10692194879543628, "grad_norm": 316.7675476074219, "learning_rate": 9.998953992438245e-06, "loss": 18.5716, "step": 52930 }, { "epoch": 0.1069421494281201, "grad_norm": 42.29882049560547, "learning_rate": 9.998946840521706e-06, "loss": 21.9433, "step": 52940 }, { "epoch": 0.1069623500608039, "grad_norm": 165.08815002441406, "learning_rate": 9.998939664240908e-06, "loss": 28.7829, "step": 52950 }, { "epoch": 0.10698255069348772, "grad_norm": 617.2383422851562, "learning_rate": 9.998932463595882e-06, "loss": 29.7643, "step": 52960 }, { "epoch": 0.10700275132617154, "grad_norm": 156.27337646484375, "learning_rate": 9.998925238586666e-06, "loss": 33.9777, "step": 52970 }, { "epoch": 0.10702295195885535, "grad_norm": 876.9364013671875, "learning_rate": 9.998917989213296e-06, "loss": 39.8205, "step": 52980 }, { "epoch": 0.10704315259153917, "grad_norm": 79.51915740966797, "learning_rate": 9.998910715475804e-06, "loss": 21.5833, "step": 52990 }, { "epoch": 0.10706335322422299, "grad_norm": 189.0352020263672, "learning_rate": 9.998903417374228e-06, "loss": 21.2155, "step": 53000 }, { "epoch": 0.1070835538569068, "grad_norm": 335.4963684082031, "learning_rate": 9.998896094908603e-06, "loss": 31.1107, "step": 53010 }, { "epoch": 0.10710375448959061, "grad_norm": 871.9072875976562, "learning_rate": 9.998888748078966e-06, "loss": 31.8085, "step": 53020 }, { "epoch": 0.10712395512227443, "grad_norm": 110.77191162109375, "learning_rate": 9.99888137688535e-06, "loss": 24.3797, "step": 53030 }, { "epoch": 0.10714415575495824, "grad_norm": 98.8787612915039, "learning_rate": 9.998873981327795e-06, "loss": 16.4739, "step": 53040 }, { "epoch": 0.10716435638764206, "grad_norm": 240.804443359375, "learning_rate": 9.998866561406331e-06, "loss": 45.8461, "step": 53050 }, { "epoch": 0.10718455702032588, "grad_norm": 529.602294921875, "learning_rate": 9.998859117121e-06, "loss": 22.9661, "step": 53060 }, { "epoch": 0.10720475765300969, "grad_norm": 318.4818115234375, "learning_rate": 9.998851648471834e-06, "loss": 24.5641, "step": 53070 }, { "epoch": 0.1072249582856935, "grad_norm": 1589.8702392578125, "learning_rate": 9.998844155458873e-06, "loss": 34.3553, "step": 53080 }, { "epoch": 0.10724515891837733, "grad_norm": 376.7889709472656, "learning_rate": 9.998836638082152e-06, "loss": 20.9843, "step": 53090 }, { "epoch": 0.10726535955106115, "grad_norm": 172.7795867919922, "learning_rate": 9.998829096341706e-06, "loss": 27.1686, "step": 53100 }, { "epoch": 0.10728556018374495, "grad_norm": 301.62896728515625, "learning_rate": 9.998821530237576e-06, "loss": 14.9831, "step": 53110 }, { "epoch": 0.10730576081642877, "grad_norm": 403.9078674316406, "learning_rate": 9.998813939769794e-06, "loss": 23.4815, "step": 53120 }, { "epoch": 0.10732596144911259, "grad_norm": 333.2471618652344, "learning_rate": 9.9988063249384e-06, "loss": 30.8573, "step": 53130 }, { "epoch": 0.1073461620817964, "grad_norm": 475.81292724609375, "learning_rate": 9.99879868574343e-06, "loss": 24.5294, "step": 53140 }, { "epoch": 0.10736636271448022, "grad_norm": 578.8433227539062, "learning_rate": 9.998791022184921e-06, "loss": 51.5362, "step": 53150 }, { "epoch": 0.10738656334716404, "grad_norm": 312.8624267578125, "learning_rate": 9.998783334262911e-06, "loss": 36.1632, "step": 53160 }, { "epoch": 0.10740676397984784, "grad_norm": 385.38446044921875, "learning_rate": 9.998775621977438e-06, "loss": 21.2316, "step": 53170 }, { "epoch": 0.10742696461253166, "grad_norm": 134.56028747558594, "learning_rate": 9.998767885328538e-06, "loss": 53.6637, "step": 53180 }, { "epoch": 0.10744716524521548, "grad_norm": 175.1334228515625, "learning_rate": 9.99876012431625e-06, "loss": 32.2523, "step": 53190 }, { "epoch": 0.10746736587789929, "grad_norm": 806.820556640625, "learning_rate": 9.998752338940612e-06, "loss": 36.0072, "step": 53200 }, { "epoch": 0.10748756651058311, "grad_norm": 360.48345947265625, "learning_rate": 9.99874452920166e-06, "loss": 26.6062, "step": 53210 }, { "epoch": 0.10750776714326693, "grad_norm": 510.0466613769531, "learning_rate": 9.998736695099434e-06, "loss": 32.4799, "step": 53220 }, { "epoch": 0.10752796777595074, "grad_norm": 43.029850006103516, "learning_rate": 9.998728836633972e-06, "loss": 28.0297, "step": 53230 }, { "epoch": 0.10754816840863456, "grad_norm": 215.483642578125, "learning_rate": 9.998720953805312e-06, "loss": 10.1328, "step": 53240 }, { "epoch": 0.10756836904131838, "grad_norm": 352.0667419433594, "learning_rate": 9.998713046613492e-06, "loss": 25.4543, "step": 53250 }, { "epoch": 0.1075885696740022, "grad_norm": 209.58932495117188, "learning_rate": 9.998705115058552e-06, "loss": 25.7006, "step": 53260 }, { "epoch": 0.107608770306686, "grad_norm": 771.2131958007812, "learning_rate": 9.998697159140528e-06, "loss": 24.7904, "step": 53270 }, { "epoch": 0.10762897093936982, "grad_norm": 418.80230712890625, "learning_rate": 9.998689178859461e-06, "loss": 42.0414, "step": 53280 }, { "epoch": 0.10764917157205364, "grad_norm": 195.22291564941406, "learning_rate": 9.99868117421539e-06, "loss": 13.4321, "step": 53290 }, { "epoch": 0.10766937220473745, "grad_norm": 850.9882202148438, "learning_rate": 9.998673145208351e-06, "loss": 28.6134, "step": 53300 }, { "epoch": 0.10768957283742127, "grad_norm": 281.3316955566406, "learning_rate": 9.998665091838386e-06, "loss": 22.1758, "step": 53310 }, { "epoch": 0.10770977347010509, "grad_norm": 515.4241333007812, "learning_rate": 9.998657014105535e-06, "loss": 39.2342, "step": 53320 }, { "epoch": 0.1077299741027889, "grad_norm": 353.08331298828125, "learning_rate": 9.998648912009835e-06, "loss": 34.8498, "step": 53330 }, { "epoch": 0.10775017473547271, "grad_norm": 716.5982055664062, "learning_rate": 9.998640785551327e-06, "loss": 33.9811, "step": 53340 }, { "epoch": 0.10777037536815653, "grad_norm": 300.8550720214844, "learning_rate": 9.99863263473005e-06, "loss": 25.2244, "step": 53350 }, { "epoch": 0.10779057600084034, "grad_norm": 416.7822570800781, "learning_rate": 9.998624459546043e-06, "loss": 15.9481, "step": 53360 }, { "epoch": 0.10781077663352416, "grad_norm": 312.3878173828125, "learning_rate": 9.998616259999348e-06, "loss": 23.67, "step": 53370 }, { "epoch": 0.10783097726620798, "grad_norm": 368.4659118652344, "learning_rate": 9.998608036090003e-06, "loss": 26.2956, "step": 53380 }, { "epoch": 0.10785117789889179, "grad_norm": 217.9113006591797, "learning_rate": 9.998599787818048e-06, "loss": 33.0332, "step": 53390 }, { "epoch": 0.1078713785315756, "grad_norm": 271.96673583984375, "learning_rate": 9.998591515183524e-06, "loss": 25.4211, "step": 53400 }, { "epoch": 0.10789157916425943, "grad_norm": 228.54415893554688, "learning_rate": 9.998583218186471e-06, "loss": 16.5485, "step": 53410 }, { "epoch": 0.10791177979694325, "grad_norm": 337.5104675292969, "learning_rate": 9.998574896826931e-06, "loss": 32.334, "step": 53420 }, { "epoch": 0.10793198042962705, "grad_norm": 456.17266845703125, "learning_rate": 9.998566551104943e-06, "loss": 20.8827, "step": 53430 }, { "epoch": 0.10795218106231087, "grad_norm": 262.7278137207031, "learning_rate": 9.998558181020547e-06, "loss": 42.3757, "step": 53440 }, { "epoch": 0.10797238169499469, "grad_norm": 50.50201416015625, "learning_rate": 9.998549786573785e-06, "loss": 28.6598, "step": 53450 }, { "epoch": 0.1079925823276785, "grad_norm": 324.3455505371094, "learning_rate": 9.998541367764699e-06, "loss": 33.99, "step": 53460 }, { "epoch": 0.10801278296036232, "grad_norm": 496.0831298828125, "learning_rate": 9.998532924593327e-06, "loss": 32.4324, "step": 53470 }, { "epoch": 0.10803298359304614, "grad_norm": 274.8130187988281, "learning_rate": 9.99852445705971e-06, "loss": 27.7603, "step": 53480 }, { "epoch": 0.10805318422572995, "grad_norm": 42.96908187866211, "learning_rate": 9.998515965163894e-06, "loss": 13.1927, "step": 53490 }, { "epoch": 0.10807338485841377, "grad_norm": 80.90930938720703, "learning_rate": 9.998507448905917e-06, "loss": 44.1911, "step": 53500 }, { "epoch": 0.10809358549109759, "grad_norm": 291.58660888671875, "learning_rate": 9.99849890828582e-06, "loss": 22.7009, "step": 53510 }, { "epoch": 0.10811378612378139, "grad_norm": 305.44732666015625, "learning_rate": 9.998490343303646e-06, "loss": 51.4827, "step": 53520 }, { "epoch": 0.10813398675646521, "grad_norm": 222.2078094482422, "learning_rate": 9.998481753959436e-06, "loss": 39.2673, "step": 53530 }, { "epoch": 0.10815418738914903, "grad_norm": 101.01174926757812, "learning_rate": 9.998473140253234e-06, "loss": 30.6629, "step": 53540 }, { "epoch": 0.10817438802183284, "grad_norm": 163.60247802734375, "learning_rate": 9.998464502185076e-06, "loss": 50.5303, "step": 53550 }, { "epoch": 0.10819458865451666, "grad_norm": 250.6178436279297, "learning_rate": 9.998455839755013e-06, "loss": 27.708, "step": 53560 }, { "epoch": 0.10821478928720048, "grad_norm": 296.7221984863281, "learning_rate": 9.99844715296308e-06, "loss": 27.6463, "step": 53570 }, { "epoch": 0.1082349899198843, "grad_norm": 131.57839965820312, "learning_rate": 9.998438441809322e-06, "loss": 37.0361, "step": 53580 }, { "epoch": 0.1082551905525681, "grad_norm": 162.1026611328125, "learning_rate": 9.998429706293781e-06, "loss": 29.839, "step": 53590 }, { "epoch": 0.10827539118525192, "grad_norm": 85.38102722167969, "learning_rate": 9.9984209464165e-06, "loss": 24.9544, "step": 53600 }, { "epoch": 0.10829559181793574, "grad_norm": 148.11148071289062, "learning_rate": 9.998412162177523e-06, "loss": 24.2626, "step": 53610 }, { "epoch": 0.10831579245061955, "grad_norm": 352.221435546875, "learning_rate": 9.99840335357689e-06, "loss": 30.1198, "step": 53620 }, { "epoch": 0.10833599308330337, "grad_norm": 208.3675994873047, "learning_rate": 9.998394520614645e-06, "loss": 24.2047, "step": 53630 }, { "epoch": 0.10835619371598719, "grad_norm": 166.61154174804688, "learning_rate": 9.998385663290833e-06, "loss": 25.3655, "step": 53640 }, { "epoch": 0.108376394348671, "grad_norm": 274.33837890625, "learning_rate": 9.998376781605493e-06, "loss": 31.7415, "step": 53650 }, { "epoch": 0.10839659498135482, "grad_norm": 606.1007080078125, "learning_rate": 9.998367875558673e-06, "loss": 38.1207, "step": 53660 }, { "epoch": 0.10841679561403864, "grad_norm": 203.2119598388672, "learning_rate": 9.998358945150412e-06, "loss": 16.2503, "step": 53670 }, { "epoch": 0.10843699624672244, "grad_norm": 390.416015625, "learning_rate": 9.998349990380757e-06, "loss": 26.9997, "step": 53680 }, { "epoch": 0.10845719687940626, "grad_norm": 128.118408203125, "learning_rate": 9.998341011249752e-06, "loss": 31.2511, "step": 53690 }, { "epoch": 0.10847739751209008, "grad_norm": 64.0647201538086, "learning_rate": 9.998332007757436e-06, "loss": 23.9766, "step": 53700 }, { "epoch": 0.10849759814477389, "grad_norm": 288.9873962402344, "learning_rate": 9.998322979903859e-06, "loss": 18.0814, "step": 53710 }, { "epoch": 0.10851779877745771, "grad_norm": 120.61299133300781, "learning_rate": 9.99831392768906e-06, "loss": 34.8651, "step": 53720 }, { "epoch": 0.10853799941014153, "grad_norm": 380.6361999511719, "learning_rate": 9.998304851113086e-06, "loss": 20.3351, "step": 53730 }, { "epoch": 0.10855820004282535, "grad_norm": 226.6094207763672, "learning_rate": 9.99829575017598e-06, "loss": 21.8826, "step": 53740 }, { "epoch": 0.10857840067550915, "grad_norm": 437.5682373046875, "learning_rate": 9.998286624877786e-06, "loss": 30.2184, "step": 53750 }, { "epoch": 0.10859860130819297, "grad_norm": 697.1895751953125, "learning_rate": 9.998277475218552e-06, "loss": 23.8082, "step": 53760 }, { "epoch": 0.1086188019408768, "grad_norm": 55.12375259399414, "learning_rate": 9.998268301198317e-06, "loss": 30.2272, "step": 53770 }, { "epoch": 0.1086390025735606, "grad_norm": 251.8342742919922, "learning_rate": 9.99825910281713e-06, "loss": 20.1038, "step": 53780 }, { "epoch": 0.10865920320624442, "grad_norm": 355.8857116699219, "learning_rate": 9.998249880075033e-06, "loss": 37.7643, "step": 53790 }, { "epoch": 0.10867940383892824, "grad_norm": 401.7855529785156, "learning_rate": 9.998240632972073e-06, "loss": 44.8027, "step": 53800 }, { "epoch": 0.10869960447161205, "grad_norm": 170.02149963378906, "learning_rate": 9.998231361508295e-06, "loss": 51.3691, "step": 53810 }, { "epoch": 0.10871980510429587, "grad_norm": 149.75357055664062, "learning_rate": 9.998222065683743e-06, "loss": 36.4286, "step": 53820 }, { "epoch": 0.10874000573697969, "grad_norm": 346.4633483886719, "learning_rate": 9.998212745498464e-06, "loss": 24.7053, "step": 53830 }, { "epoch": 0.10876020636966349, "grad_norm": 6.9385504722595215, "learning_rate": 9.9982034009525e-06, "loss": 24.3322, "step": 53840 }, { "epoch": 0.10878040700234731, "grad_norm": 329.6048583984375, "learning_rate": 9.9981940320459e-06, "loss": 39.7074, "step": 53850 }, { "epoch": 0.10880060763503113, "grad_norm": 95.94493865966797, "learning_rate": 9.998184638778708e-06, "loss": 17.5897, "step": 53860 }, { "epoch": 0.10882080826771494, "grad_norm": 367.76513671875, "learning_rate": 9.99817522115097e-06, "loss": 26.9395, "step": 53870 }, { "epoch": 0.10884100890039876, "grad_norm": 437.0582275390625, "learning_rate": 9.998165779162734e-06, "loss": 15.9037, "step": 53880 }, { "epoch": 0.10886120953308258, "grad_norm": 584.129150390625, "learning_rate": 9.998156312814043e-06, "loss": 37.4188, "step": 53890 }, { "epoch": 0.1088814101657664, "grad_norm": 146.9011688232422, "learning_rate": 9.998146822104943e-06, "loss": 19.7329, "step": 53900 }, { "epoch": 0.1089016107984502, "grad_norm": 364.1485900878906, "learning_rate": 9.998137307035486e-06, "loss": 21.3229, "step": 53910 }, { "epoch": 0.10892181143113402, "grad_norm": 173.67649841308594, "learning_rate": 9.99812776760571e-06, "loss": 17.9368, "step": 53920 }, { "epoch": 0.10894201206381784, "grad_norm": 444.15557861328125, "learning_rate": 9.998118203815666e-06, "loss": 33.8437, "step": 53930 }, { "epoch": 0.10896221269650165, "grad_norm": 117.7065200805664, "learning_rate": 9.9981086156654e-06, "loss": 25.0378, "step": 53940 }, { "epoch": 0.10898241332918547, "grad_norm": 301.90509033203125, "learning_rate": 9.99809900315496e-06, "loss": 26.0238, "step": 53950 }, { "epoch": 0.10900261396186929, "grad_norm": 37.849876403808594, "learning_rate": 9.998089366284392e-06, "loss": 19.6138, "step": 53960 }, { "epoch": 0.1090228145945531, "grad_norm": 52.86341857910156, "learning_rate": 9.99807970505374e-06, "loss": 43.3096, "step": 53970 }, { "epoch": 0.10904301522723692, "grad_norm": 99.61842346191406, "learning_rate": 9.998070019463055e-06, "loss": 20.9459, "step": 53980 }, { "epoch": 0.10906321585992074, "grad_norm": 347.1647033691406, "learning_rate": 9.998060309512384e-06, "loss": 16.2718, "step": 53990 }, { "epoch": 0.10908341649260454, "grad_norm": 167.6675567626953, "learning_rate": 9.998050575201772e-06, "loss": 25.5626, "step": 54000 }, { "epoch": 0.10910361712528836, "grad_norm": 269.9803161621094, "learning_rate": 9.99804081653127e-06, "loss": 27.568, "step": 54010 }, { "epoch": 0.10912381775797218, "grad_norm": 187.9766845703125, "learning_rate": 9.99803103350092e-06, "loss": 35.8174, "step": 54020 }, { "epoch": 0.10914401839065599, "grad_norm": 310.7139587402344, "learning_rate": 9.998021226110775e-06, "loss": 26.5961, "step": 54030 }, { "epoch": 0.10916421902333981, "grad_norm": 410.5540466308594, "learning_rate": 9.99801139436088e-06, "loss": 34.1806, "step": 54040 }, { "epoch": 0.10918441965602363, "grad_norm": 77.53673553466797, "learning_rate": 9.998001538251283e-06, "loss": 41.9684, "step": 54050 }, { "epoch": 0.10920462028870745, "grad_norm": 175.5196533203125, "learning_rate": 9.997991657782033e-06, "loss": 32.7055, "step": 54060 }, { "epoch": 0.10922482092139126, "grad_norm": 161.60763549804688, "learning_rate": 9.997981752953179e-06, "loss": 27.7577, "step": 54070 }, { "epoch": 0.10924502155407508, "grad_norm": 233.81199645996094, "learning_rate": 9.997971823764766e-06, "loss": 19.4837, "step": 54080 }, { "epoch": 0.1092652221867589, "grad_norm": 517.9832153320312, "learning_rate": 9.997961870216849e-06, "loss": 45.0446, "step": 54090 }, { "epoch": 0.1092854228194427, "grad_norm": 204.83140563964844, "learning_rate": 9.997951892309468e-06, "loss": 28.5982, "step": 54100 }, { "epoch": 0.10930562345212652, "grad_norm": 236.12742614746094, "learning_rate": 9.997941890042677e-06, "loss": 20.0582, "step": 54110 }, { "epoch": 0.10932582408481034, "grad_norm": 62.931976318359375, "learning_rate": 9.997931863416522e-06, "loss": 28.9499, "step": 54120 }, { "epoch": 0.10934602471749415, "grad_norm": 320.4612731933594, "learning_rate": 9.997921812431055e-06, "loss": 36.2257, "step": 54130 }, { "epoch": 0.10936622535017797, "grad_norm": 247.424072265625, "learning_rate": 9.997911737086322e-06, "loss": 30.0979, "step": 54140 }, { "epoch": 0.10938642598286179, "grad_norm": 655.7802734375, "learning_rate": 9.997901637382375e-06, "loss": 35.9455, "step": 54150 }, { "epoch": 0.1094066266155456, "grad_norm": 429.4595031738281, "learning_rate": 9.99789151331926e-06, "loss": 25.1089, "step": 54160 }, { "epoch": 0.10942682724822941, "grad_norm": 575.7311401367188, "learning_rate": 9.997881364897028e-06, "loss": 33.6836, "step": 54170 }, { "epoch": 0.10944702788091323, "grad_norm": 404.14031982421875, "learning_rate": 9.99787119211573e-06, "loss": 15.9472, "step": 54180 }, { "epoch": 0.10946722851359704, "grad_norm": 451.4541015625, "learning_rate": 9.997860994975412e-06, "loss": 48.5605, "step": 54190 }, { "epoch": 0.10948742914628086, "grad_norm": 315.8075866699219, "learning_rate": 9.997850773476126e-06, "loss": 54.3026, "step": 54200 }, { "epoch": 0.10950762977896468, "grad_norm": 418.5802001953125, "learning_rate": 9.997840527617921e-06, "loss": 21.5466, "step": 54210 }, { "epoch": 0.1095278304116485, "grad_norm": 408.77374267578125, "learning_rate": 9.99783025740085e-06, "loss": 20.2147, "step": 54220 }, { "epoch": 0.1095480310443323, "grad_norm": 163.56582641601562, "learning_rate": 9.997819962824958e-06, "loss": 10.339, "step": 54230 }, { "epoch": 0.10956823167701613, "grad_norm": 559.4049682617188, "learning_rate": 9.9978096438903e-06, "loss": 25.6497, "step": 54240 }, { "epoch": 0.10958843230969995, "grad_norm": 107.73455047607422, "learning_rate": 9.997799300596921e-06, "loss": 14.6805, "step": 54250 }, { "epoch": 0.10960863294238375, "grad_norm": 291.5948791503906, "learning_rate": 9.997788932944877e-06, "loss": 24.7233, "step": 54260 }, { "epoch": 0.10962883357506757, "grad_norm": 393.5211181640625, "learning_rate": 9.997778540934213e-06, "loss": 17.3494, "step": 54270 }, { "epoch": 0.10964903420775139, "grad_norm": 356.5329895019531, "learning_rate": 9.997768124564986e-06, "loss": 38.2552, "step": 54280 }, { "epoch": 0.1096692348404352, "grad_norm": 164.11915588378906, "learning_rate": 9.997757683837242e-06, "loss": 23.5119, "step": 54290 }, { "epoch": 0.10968943547311902, "grad_norm": 194.57708740234375, "learning_rate": 9.997747218751032e-06, "loss": 30.3619, "step": 54300 }, { "epoch": 0.10970963610580284, "grad_norm": 287.5960693359375, "learning_rate": 9.997736729306409e-06, "loss": 20.927, "step": 54310 }, { "epoch": 0.10972983673848664, "grad_norm": 170.546630859375, "learning_rate": 9.997726215503422e-06, "loss": 34.8744, "step": 54320 }, { "epoch": 0.10975003737117046, "grad_norm": 208.91253662109375, "learning_rate": 9.997715677342126e-06, "loss": 42.6922, "step": 54330 }, { "epoch": 0.10977023800385428, "grad_norm": 646.8623046875, "learning_rate": 9.99770511482257e-06, "loss": 26.4852, "step": 54340 }, { "epoch": 0.10979043863653809, "grad_norm": 371.97119140625, "learning_rate": 9.997694527944804e-06, "loss": 26.7902, "step": 54350 }, { "epoch": 0.10981063926922191, "grad_norm": 337.8583068847656, "learning_rate": 9.99768391670888e-06, "loss": 14.0327, "step": 54360 }, { "epoch": 0.10983083990190573, "grad_norm": 254.46575927734375, "learning_rate": 9.997673281114852e-06, "loss": 22.4593, "step": 54370 }, { "epoch": 0.10985104053458955, "grad_norm": 420.8552551269531, "learning_rate": 9.99766262116277e-06, "loss": 32.51, "step": 54380 }, { "epoch": 0.10987124116727336, "grad_norm": 316.763427734375, "learning_rate": 9.997651936852689e-06, "loss": 20.3859, "step": 54390 }, { "epoch": 0.10989144179995718, "grad_norm": 344.2328186035156, "learning_rate": 9.997641228184656e-06, "loss": 20.2038, "step": 54400 }, { "epoch": 0.109911642432641, "grad_norm": 324.0479736328125, "learning_rate": 9.997630495158728e-06, "loss": 30.0895, "step": 54410 }, { "epoch": 0.1099318430653248, "grad_norm": 235.1464080810547, "learning_rate": 9.997619737774953e-06, "loss": 25.2305, "step": 54420 }, { "epoch": 0.10995204369800862, "grad_norm": 15.469261169433594, "learning_rate": 9.997608956033386e-06, "loss": 16.4893, "step": 54430 }, { "epoch": 0.10997224433069244, "grad_norm": 850.8834228515625, "learning_rate": 9.99759814993408e-06, "loss": 52.103, "step": 54440 }, { "epoch": 0.10999244496337625, "grad_norm": 575.6388549804688, "learning_rate": 9.997587319477084e-06, "loss": 27.7807, "step": 54450 }, { "epoch": 0.11001264559606007, "grad_norm": 227.6542510986328, "learning_rate": 9.997576464662458e-06, "loss": 17.8235, "step": 54460 }, { "epoch": 0.11003284622874389, "grad_norm": 231.88462829589844, "learning_rate": 9.997565585490247e-06, "loss": 20.3408, "step": 54470 }, { "epoch": 0.1100530468614277, "grad_norm": 399.1966857910156, "learning_rate": 9.997554681960508e-06, "loss": 23.3814, "step": 54480 }, { "epoch": 0.11007324749411151, "grad_norm": 43.860084533691406, "learning_rate": 9.997543754073295e-06, "loss": 16.9079, "step": 54490 }, { "epoch": 0.11009344812679533, "grad_norm": 307.2585754394531, "learning_rate": 9.997532801828659e-06, "loss": 21.7701, "step": 54500 }, { "epoch": 0.11011364875947914, "grad_norm": 202.39834594726562, "learning_rate": 9.997521825226654e-06, "loss": 35.6813, "step": 54510 }, { "epoch": 0.11013384939216296, "grad_norm": 528.8931274414062, "learning_rate": 9.997510824267334e-06, "loss": 23.6914, "step": 54520 }, { "epoch": 0.11015405002484678, "grad_norm": 129.41329956054688, "learning_rate": 9.997499798950752e-06, "loss": 28.1611, "step": 54530 }, { "epoch": 0.1101742506575306, "grad_norm": 220.817138671875, "learning_rate": 9.997488749276962e-06, "loss": 27.0997, "step": 54540 }, { "epoch": 0.1101944512902144, "grad_norm": 190.77639770507812, "learning_rate": 9.997477675246018e-06, "loss": 16.2595, "step": 54550 }, { "epoch": 0.11021465192289823, "grad_norm": 199.07130432128906, "learning_rate": 9.997466576857974e-06, "loss": 32.5664, "step": 54560 }, { "epoch": 0.11023485255558205, "grad_norm": 129.1767120361328, "learning_rate": 9.997455454112885e-06, "loss": 20.869, "step": 54570 }, { "epoch": 0.11025505318826585, "grad_norm": 443.89776611328125, "learning_rate": 9.997444307010804e-06, "loss": 31.0807, "step": 54580 }, { "epoch": 0.11027525382094967, "grad_norm": 267.8390808105469, "learning_rate": 9.997433135551786e-06, "loss": 44.8408, "step": 54590 }, { "epoch": 0.11029545445363349, "grad_norm": 342.8444519042969, "learning_rate": 9.997421939735885e-06, "loss": 19.8933, "step": 54600 }, { "epoch": 0.1103156550863173, "grad_norm": 133.0485076904297, "learning_rate": 9.997410719563155e-06, "loss": 18.7641, "step": 54610 }, { "epoch": 0.11033585571900112, "grad_norm": 158.60877990722656, "learning_rate": 9.997399475033648e-06, "loss": 19.6392, "step": 54620 }, { "epoch": 0.11035605635168494, "grad_norm": 289.0514831542969, "learning_rate": 9.997388206147427e-06, "loss": 54.8414, "step": 54630 }, { "epoch": 0.11037625698436875, "grad_norm": 324.3666076660156, "learning_rate": 9.99737691290454e-06, "loss": 27.6285, "step": 54640 }, { "epoch": 0.11039645761705257, "grad_norm": 0.0, "learning_rate": 9.997365595305045e-06, "loss": 23.9169, "step": 54650 }, { "epoch": 0.11041665824973639, "grad_norm": 383.123046875, "learning_rate": 9.997354253348994e-06, "loss": 20.2683, "step": 54660 }, { "epoch": 0.11043685888242019, "grad_norm": 907.8447875976562, "learning_rate": 9.997342887036446e-06, "loss": 30.8266, "step": 54670 }, { "epoch": 0.11045705951510401, "grad_norm": 659.7605590820312, "learning_rate": 9.997331496367455e-06, "loss": 50.102, "step": 54680 }, { "epoch": 0.11047726014778783, "grad_norm": 550.5804443359375, "learning_rate": 9.997320081342076e-06, "loss": 32.0709, "step": 54690 }, { "epoch": 0.11049746078047164, "grad_norm": 89.91242218017578, "learning_rate": 9.997308641960365e-06, "loss": 46.9984, "step": 54700 }, { "epoch": 0.11051766141315546, "grad_norm": 361.9211120605469, "learning_rate": 9.997297178222378e-06, "loss": 40.8677, "step": 54710 }, { "epoch": 0.11053786204583928, "grad_norm": 147.31106567382812, "learning_rate": 9.997285690128172e-06, "loss": 24.5855, "step": 54720 }, { "epoch": 0.1105580626785231, "grad_norm": 184.7095489501953, "learning_rate": 9.997274177677799e-06, "loss": 24.0306, "step": 54730 }, { "epoch": 0.1105782633112069, "grad_norm": 268.792724609375, "learning_rate": 9.997262640871319e-06, "loss": 31.485, "step": 54740 }, { "epoch": 0.11059846394389072, "grad_norm": 104.79292297363281, "learning_rate": 9.997251079708788e-06, "loss": 15.6083, "step": 54750 }, { "epoch": 0.11061866457657454, "grad_norm": 112.54867553710938, "learning_rate": 9.997239494190258e-06, "loss": 33.9944, "step": 54760 }, { "epoch": 0.11063886520925835, "grad_norm": 233.14279174804688, "learning_rate": 9.997227884315792e-06, "loss": 30.5499, "step": 54770 }, { "epoch": 0.11065906584194217, "grad_norm": 224.94532775878906, "learning_rate": 9.997216250085441e-06, "loss": 26.4267, "step": 54780 }, { "epoch": 0.11067926647462599, "grad_norm": 469.75164794921875, "learning_rate": 9.997204591499266e-06, "loss": 22.3007, "step": 54790 }, { "epoch": 0.1106994671073098, "grad_norm": 293.3824157714844, "learning_rate": 9.997192908557322e-06, "loss": 34.2513, "step": 54800 }, { "epoch": 0.11071966773999362, "grad_norm": 188.20541381835938, "learning_rate": 9.997181201259664e-06, "loss": 32.0187, "step": 54810 }, { "epoch": 0.11073986837267744, "grad_norm": 737.5634765625, "learning_rate": 9.997169469606353e-06, "loss": 34.757, "step": 54820 }, { "epoch": 0.11076006900536124, "grad_norm": 181.03126525878906, "learning_rate": 9.997157713597444e-06, "loss": 23.7193, "step": 54830 }, { "epoch": 0.11078026963804506, "grad_norm": 44.8083610534668, "learning_rate": 9.997145933232994e-06, "loss": 52.1436, "step": 54840 }, { "epoch": 0.11080047027072888, "grad_norm": 741.776611328125, "learning_rate": 9.99713412851306e-06, "loss": 24.3121, "step": 54850 }, { "epoch": 0.11082067090341269, "grad_norm": 272.800537109375, "learning_rate": 9.9971222994377e-06, "loss": 28.4178, "step": 54860 }, { "epoch": 0.11084087153609651, "grad_norm": 289.96734619140625, "learning_rate": 9.997110446006974e-06, "loss": 21.7965, "step": 54870 }, { "epoch": 0.11086107216878033, "grad_norm": 30.921451568603516, "learning_rate": 9.997098568220937e-06, "loss": 24.858, "step": 54880 }, { "epoch": 0.11088127280146415, "grad_norm": 324.41192626953125, "learning_rate": 9.997086666079647e-06, "loss": 43.3221, "step": 54890 }, { "epoch": 0.11090147343414795, "grad_norm": 162.92955017089844, "learning_rate": 9.997074739583162e-06, "loss": 28.729, "step": 54900 }, { "epoch": 0.11092167406683177, "grad_norm": 168.18646240234375, "learning_rate": 9.997062788731541e-06, "loss": 39.803, "step": 54910 }, { "epoch": 0.1109418746995156, "grad_norm": 179.87942504882812, "learning_rate": 9.997050813524843e-06, "loss": 18.9566, "step": 54920 }, { "epoch": 0.1109620753321994, "grad_norm": 181.29588317871094, "learning_rate": 9.997038813963126e-06, "loss": 36.7798, "step": 54930 }, { "epoch": 0.11098227596488322, "grad_norm": 196.99169921875, "learning_rate": 9.997026790046446e-06, "loss": 26.9748, "step": 54940 }, { "epoch": 0.11100247659756704, "grad_norm": 177.3017578125, "learning_rate": 9.997014741774866e-06, "loss": 35.5617, "step": 54950 }, { "epoch": 0.11102267723025085, "grad_norm": 72.29965209960938, "learning_rate": 9.99700266914844e-06, "loss": 26.9579, "step": 54960 }, { "epoch": 0.11104287786293467, "grad_norm": 144.84507751464844, "learning_rate": 9.996990572167229e-06, "loss": 27.0188, "step": 54970 }, { "epoch": 0.11106307849561849, "grad_norm": 94.6026611328125, "learning_rate": 9.996978450831293e-06, "loss": 20.8574, "step": 54980 }, { "epoch": 0.11108327912830229, "grad_norm": 817.5119018554688, "learning_rate": 9.99696630514069e-06, "loss": 57.3325, "step": 54990 }, { "epoch": 0.11110347976098611, "grad_norm": 166.598388671875, "learning_rate": 9.99695413509548e-06, "loss": 12.2504, "step": 55000 }, { "epoch": 0.11112368039366993, "grad_norm": 247.50985717773438, "learning_rate": 9.99694194069572e-06, "loss": 15.9198, "step": 55010 }, { "epoch": 0.11114388102635374, "grad_norm": 174.9697723388672, "learning_rate": 9.996929721941472e-06, "loss": 17.9002, "step": 55020 }, { "epoch": 0.11116408165903756, "grad_norm": 306.4726867675781, "learning_rate": 9.996917478832794e-06, "loss": 28.1601, "step": 55030 }, { "epoch": 0.11118428229172138, "grad_norm": 7.0872015953063965, "learning_rate": 9.996905211369748e-06, "loss": 25.0305, "step": 55040 }, { "epoch": 0.1112044829244052, "grad_norm": 203.99505615234375, "learning_rate": 9.99689291955239e-06, "loss": 18.0373, "step": 55050 }, { "epoch": 0.111224683557089, "grad_norm": 178.87757873535156, "learning_rate": 9.996880603380784e-06, "loss": 30.7529, "step": 55060 }, { "epoch": 0.11124488418977282, "grad_norm": 117.42859649658203, "learning_rate": 9.996868262854986e-06, "loss": 19.3749, "step": 55070 }, { "epoch": 0.11126508482245664, "grad_norm": 50.27333068847656, "learning_rate": 9.996855897975058e-06, "loss": 35.7943, "step": 55080 }, { "epoch": 0.11128528545514045, "grad_norm": 112.12095642089844, "learning_rate": 9.996843508741061e-06, "loss": 39.9017, "step": 55090 }, { "epoch": 0.11130548608782427, "grad_norm": 144.43429565429688, "learning_rate": 9.996831095153054e-06, "loss": 26.6877, "step": 55100 }, { "epoch": 0.11132568672050809, "grad_norm": 438.1100158691406, "learning_rate": 9.9968186572111e-06, "loss": 17.5252, "step": 55110 }, { "epoch": 0.1113458873531919, "grad_norm": 363.10107421875, "learning_rate": 9.996806194915258e-06, "loss": 18.0201, "step": 55120 }, { "epoch": 0.11136608798587572, "grad_norm": 164.59405517578125, "learning_rate": 9.996793708265586e-06, "loss": 20.2864, "step": 55130 }, { "epoch": 0.11138628861855954, "grad_norm": 503.38519287109375, "learning_rate": 9.99678119726215e-06, "loss": 48.9766, "step": 55140 }, { "epoch": 0.11140648925124334, "grad_norm": 287.0119323730469, "learning_rate": 9.996768661905008e-06, "loss": 26.8062, "step": 55150 }, { "epoch": 0.11142668988392716, "grad_norm": 170.13784790039062, "learning_rate": 9.996756102194222e-06, "loss": 25.2835, "step": 55160 }, { "epoch": 0.11144689051661098, "grad_norm": 231.0880126953125, "learning_rate": 9.996743518129852e-06, "loss": 30.2916, "step": 55170 }, { "epoch": 0.11146709114929479, "grad_norm": 380.8134765625, "learning_rate": 9.99673090971196e-06, "loss": 20.0888, "step": 55180 }, { "epoch": 0.11148729178197861, "grad_norm": 208.29835510253906, "learning_rate": 9.996718276940608e-06, "loss": 51.5115, "step": 55190 }, { "epoch": 0.11150749241466243, "grad_norm": 110.6097640991211, "learning_rate": 9.996705619815857e-06, "loss": 34.0922, "step": 55200 }, { "epoch": 0.11152769304734625, "grad_norm": 134.5955810546875, "learning_rate": 9.996692938337768e-06, "loss": 31.0729, "step": 55210 }, { "epoch": 0.11154789368003006, "grad_norm": 552.3612060546875, "learning_rate": 9.996680232506404e-06, "loss": 29.4318, "step": 55220 }, { "epoch": 0.11156809431271388, "grad_norm": 140.12771606445312, "learning_rate": 9.996667502321829e-06, "loss": 38.3503, "step": 55230 }, { "epoch": 0.1115882949453977, "grad_norm": 370.1923828125, "learning_rate": 9.9966547477841e-06, "loss": 42.0921, "step": 55240 }, { "epoch": 0.1116084955780815, "grad_norm": 215.5733642578125, "learning_rate": 9.996641968893281e-06, "loss": 30.2461, "step": 55250 }, { "epoch": 0.11162869621076532, "grad_norm": 252.74423217773438, "learning_rate": 9.996629165649437e-06, "loss": 20.2094, "step": 55260 }, { "epoch": 0.11164889684344914, "grad_norm": 673.462890625, "learning_rate": 9.996616338052629e-06, "loss": 22.1481, "step": 55270 }, { "epoch": 0.11166909747613295, "grad_norm": 174.24172973632812, "learning_rate": 9.996603486102918e-06, "loss": 17.2121, "step": 55280 }, { "epoch": 0.11168929810881677, "grad_norm": 136.31153869628906, "learning_rate": 9.996590609800367e-06, "loss": 36.5455, "step": 55290 }, { "epoch": 0.11170949874150059, "grad_norm": 106.99838256835938, "learning_rate": 9.99657770914504e-06, "loss": 23.8077, "step": 55300 }, { "epoch": 0.1117296993741844, "grad_norm": 116.03511810302734, "learning_rate": 9.996564784137e-06, "loss": 23.0754, "step": 55310 }, { "epoch": 0.11174990000686821, "grad_norm": 392.85186767578125, "learning_rate": 9.996551834776309e-06, "loss": 32.1545, "step": 55320 }, { "epoch": 0.11177010063955203, "grad_norm": 517.2945556640625, "learning_rate": 9.996538861063029e-06, "loss": 24.5557, "step": 55330 }, { "epoch": 0.11179030127223584, "grad_norm": 238.66897583007812, "learning_rate": 9.996525862997225e-06, "loss": 16.3867, "step": 55340 }, { "epoch": 0.11181050190491966, "grad_norm": 272.9686584472656, "learning_rate": 9.99651284057896e-06, "loss": 17.9789, "step": 55350 }, { "epoch": 0.11183070253760348, "grad_norm": 115.64042663574219, "learning_rate": 9.9964997938083e-06, "loss": 26.1221, "step": 55360 }, { "epoch": 0.1118509031702873, "grad_norm": 149.3016815185547, "learning_rate": 9.996486722685302e-06, "loss": 30.9704, "step": 55370 }, { "epoch": 0.1118711038029711, "grad_norm": 347.9624938964844, "learning_rate": 9.996473627210035e-06, "loss": 17.0274, "step": 55380 }, { "epoch": 0.11189130443565493, "grad_norm": 291.4383239746094, "learning_rate": 9.996460507382563e-06, "loss": 22.6698, "step": 55390 }, { "epoch": 0.11191150506833875, "grad_norm": 461.1695251464844, "learning_rate": 9.996447363202947e-06, "loss": 31.1687, "step": 55400 }, { "epoch": 0.11193170570102255, "grad_norm": 97.66959381103516, "learning_rate": 9.996434194671254e-06, "loss": 23.3234, "step": 55410 }, { "epoch": 0.11195190633370637, "grad_norm": 317.62646484375, "learning_rate": 9.996421001787545e-06, "loss": 27.4706, "step": 55420 }, { "epoch": 0.11197210696639019, "grad_norm": 214.2448272705078, "learning_rate": 9.996407784551888e-06, "loss": 21.1284, "step": 55430 }, { "epoch": 0.111992307599074, "grad_norm": 524.8641967773438, "learning_rate": 9.996394542964343e-06, "loss": 15.9067, "step": 55440 }, { "epoch": 0.11201250823175782, "grad_norm": 239.1848602294922, "learning_rate": 9.996381277024978e-06, "loss": 26.0277, "step": 55450 }, { "epoch": 0.11203270886444164, "grad_norm": 895.6574096679688, "learning_rate": 9.996367986733857e-06, "loss": 46.5001, "step": 55460 }, { "epoch": 0.11205290949712544, "grad_norm": 200.08340454101562, "learning_rate": 9.996354672091044e-06, "loss": 31.4413, "step": 55470 }, { "epoch": 0.11207311012980926, "grad_norm": 233.74139404296875, "learning_rate": 9.996341333096606e-06, "loss": 19.4758, "step": 55480 }, { "epoch": 0.11209331076249308, "grad_norm": 324.208251953125, "learning_rate": 9.996327969750605e-06, "loss": 39.5084, "step": 55490 }, { "epoch": 0.11211351139517689, "grad_norm": 101.8046646118164, "learning_rate": 9.996314582053106e-06, "loss": 23.3805, "step": 55500 }, { "epoch": 0.11213371202786071, "grad_norm": 420.5274963378906, "learning_rate": 9.996301170004179e-06, "loss": 28.7951, "step": 55510 }, { "epoch": 0.11215391266054453, "grad_norm": 170.65696716308594, "learning_rate": 9.996287733603883e-06, "loss": 15.1907, "step": 55520 }, { "epoch": 0.11217411329322835, "grad_norm": 117.08451080322266, "learning_rate": 9.996274272852289e-06, "loss": 24.9213, "step": 55530 }, { "epoch": 0.11219431392591216, "grad_norm": 234.3174285888672, "learning_rate": 9.996260787749457e-06, "loss": 16.7729, "step": 55540 }, { "epoch": 0.11221451455859598, "grad_norm": 510.7735900878906, "learning_rate": 9.99624727829546e-06, "loss": 24.8112, "step": 55550 }, { "epoch": 0.1122347151912798, "grad_norm": 226.36012268066406, "learning_rate": 9.996233744490356e-06, "loss": 22.1939, "step": 55560 }, { "epoch": 0.1122549158239636, "grad_norm": 246.5647430419922, "learning_rate": 9.996220186334217e-06, "loss": 10.5261, "step": 55570 }, { "epoch": 0.11227511645664742, "grad_norm": 212.084716796875, "learning_rate": 9.996206603827105e-06, "loss": 20.7141, "step": 55580 }, { "epoch": 0.11229531708933124, "grad_norm": 587.3858642578125, "learning_rate": 9.99619299696909e-06, "loss": 50.841, "step": 55590 }, { "epoch": 0.11231551772201505, "grad_norm": 402.7237243652344, "learning_rate": 9.996179365760235e-06, "loss": 28.4183, "step": 55600 }, { "epoch": 0.11233571835469887, "grad_norm": 238.6761932373047, "learning_rate": 9.996165710200607e-06, "loss": 42.3101, "step": 55610 }, { "epoch": 0.11235591898738269, "grad_norm": 294.3119201660156, "learning_rate": 9.996152030290276e-06, "loss": 28.6469, "step": 55620 }, { "epoch": 0.1123761196200665, "grad_norm": 177.66180419921875, "learning_rate": 9.996138326029303e-06, "loss": 38.8774, "step": 55630 }, { "epoch": 0.11239632025275031, "grad_norm": 354.15087890625, "learning_rate": 9.99612459741776e-06, "loss": 12.4942, "step": 55640 }, { "epoch": 0.11241652088543413, "grad_norm": 566.0038452148438, "learning_rate": 9.99611084445571e-06, "loss": 38.1022, "step": 55650 }, { "epoch": 0.11243672151811794, "grad_norm": 297.788818359375, "learning_rate": 9.996097067143223e-06, "loss": 20.9869, "step": 55660 }, { "epoch": 0.11245692215080176, "grad_norm": 348.9312438964844, "learning_rate": 9.996083265480366e-06, "loss": 16.3948, "step": 55670 }, { "epoch": 0.11247712278348558, "grad_norm": 486.8305969238281, "learning_rate": 9.996069439467203e-06, "loss": 16.4553, "step": 55680 }, { "epoch": 0.1124973234161694, "grad_norm": 260.0607604980469, "learning_rate": 9.996055589103804e-06, "loss": 46.7094, "step": 55690 }, { "epoch": 0.1125175240488532, "grad_norm": 151.2454376220703, "learning_rate": 9.996041714390235e-06, "loss": 25.3334, "step": 55700 }, { "epoch": 0.11253772468153703, "grad_norm": 114.37619018554688, "learning_rate": 9.996027815326565e-06, "loss": 29.8307, "step": 55710 }, { "epoch": 0.11255792531422085, "grad_norm": 256.87957763671875, "learning_rate": 9.996013891912862e-06, "loss": 23.291, "step": 55720 }, { "epoch": 0.11257812594690465, "grad_norm": 66.24264526367188, "learning_rate": 9.995999944149192e-06, "loss": 28.0676, "step": 55730 }, { "epoch": 0.11259832657958847, "grad_norm": 312.0833740234375, "learning_rate": 9.995985972035626e-06, "loss": 21.1501, "step": 55740 }, { "epoch": 0.11261852721227229, "grad_norm": 78.77447509765625, "learning_rate": 9.995971975572231e-06, "loss": 19.401, "step": 55750 }, { "epoch": 0.1126387278449561, "grad_norm": 353.7744140625, "learning_rate": 9.995957954759073e-06, "loss": 29.008, "step": 55760 }, { "epoch": 0.11265892847763992, "grad_norm": 236.60470581054688, "learning_rate": 9.995943909596222e-06, "loss": 26.8386, "step": 55770 }, { "epoch": 0.11267912911032374, "grad_norm": 116.1494369506836, "learning_rate": 9.995929840083746e-06, "loss": 16.9905, "step": 55780 }, { "epoch": 0.11269932974300755, "grad_norm": 299.65655517578125, "learning_rate": 9.995915746221715e-06, "loss": 41.4537, "step": 55790 }, { "epoch": 0.11271953037569137, "grad_norm": 298.9288635253906, "learning_rate": 9.995901628010196e-06, "loss": 19.4023, "step": 55800 }, { "epoch": 0.11273973100837519, "grad_norm": 571.8767700195312, "learning_rate": 9.995887485449257e-06, "loss": 25.1349, "step": 55810 }, { "epoch": 0.11275993164105899, "grad_norm": 285.6886291503906, "learning_rate": 9.99587331853897e-06, "loss": 13.3089, "step": 55820 }, { "epoch": 0.11278013227374281, "grad_norm": 209.84771728515625, "learning_rate": 9.995859127279402e-06, "loss": 29.0732, "step": 55830 }, { "epoch": 0.11280033290642663, "grad_norm": 859.5326538085938, "learning_rate": 9.995844911670623e-06, "loss": 31.0665, "step": 55840 }, { "epoch": 0.11282053353911045, "grad_norm": 257.53594970703125, "learning_rate": 9.995830671712701e-06, "loss": 31.5598, "step": 55850 }, { "epoch": 0.11284073417179426, "grad_norm": 149.15843200683594, "learning_rate": 9.995816407405708e-06, "loss": 18.4485, "step": 55860 }, { "epoch": 0.11286093480447808, "grad_norm": 299.5854797363281, "learning_rate": 9.995802118749708e-06, "loss": 15.4007, "step": 55870 }, { "epoch": 0.1128811354371619, "grad_norm": 566.4382934570312, "learning_rate": 9.995787805744778e-06, "loss": 40.2616, "step": 55880 }, { "epoch": 0.1129013360698457, "grad_norm": 218.10276794433594, "learning_rate": 9.995773468390983e-06, "loss": 56.1534, "step": 55890 }, { "epoch": 0.11292153670252952, "grad_norm": 293.6954345703125, "learning_rate": 9.995759106688394e-06, "loss": 23.4557, "step": 55900 }, { "epoch": 0.11294173733521334, "grad_norm": 240.96194458007812, "learning_rate": 9.99574472063708e-06, "loss": 25.4571, "step": 55910 }, { "epoch": 0.11296193796789715, "grad_norm": 350.2251892089844, "learning_rate": 9.995730310237113e-06, "loss": 19.1962, "step": 55920 }, { "epoch": 0.11298213860058097, "grad_norm": 415.62481689453125, "learning_rate": 9.995715875488563e-06, "loss": 32.9966, "step": 55930 }, { "epoch": 0.11300233923326479, "grad_norm": 331.7501220703125, "learning_rate": 9.9957014163915e-06, "loss": 23.0287, "step": 55940 }, { "epoch": 0.1130225398659486, "grad_norm": 269.7156677246094, "learning_rate": 9.995686932945993e-06, "loss": 22.6468, "step": 55950 }, { "epoch": 0.11304274049863242, "grad_norm": 516.4306030273438, "learning_rate": 9.995672425152115e-06, "loss": 21.4522, "step": 55960 }, { "epoch": 0.11306294113131624, "grad_norm": 244.93951416015625, "learning_rate": 9.995657893009933e-06, "loss": 20.3903, "step": 55970 }, { "epoch": 0.11308314176400004, "grad_norm": 268.0805358886719, "learning_rate": 9.995643336519523e-06, "loss": 71.4901, "step": 55980 }, { "epoch": 0.11310334239668386, "grad_norm": 13.511696815490723, "learning_rate": 9.995628755680952e-06, "loss": 15.6134, "step": 55990 }, { "epoch": 0.11312354302936768, "grad_norm": 625.4601440429688, "learning_rate": 9.995614150494293e-06, "loss": 36.4829, "step": 56000 }, { "epoch": 0.1131437436620515, "grad_norm": 330.9189758300781, "learning_rate": 9.995599520959615e-06, "loss": 22.9784, "step": 56010 }, { "epoch": 0.11316394429473531, "grad_norm": 195.6566925048828, "learning_rate": 9.995584867076994e-06, "loss": 31.8342, "step": 56020 }, { "epoch": 0.11318414492741913, "grad_norm": 114.97940826416016, "learning_rate": 9.995570188846495e-06, "loss": 21.0331, "step": 56030 }, { "epoch": 0.11320434556010295, "grad_norm": 403.75421142578125, "learning_rate": 9.995555486268193e-06, "loss": 24.9634, "step": 56040 }, { "epoch": 0.11322454619278675, "grad_norm": 283.7743835449219, "learning_rate": 9.995540759342161e-06, "loss": 25.5978, "step": 56050 }, { "epoch": 0.11324474682547057, "grad_norm": 143.7545928955078, "learning_rate": 9.995526008068469e-06, "loss": 35.9499, "step": 56060 }, { "epoch": 0.1132649474581544, "grad_norm": 619.2141723632812, "learning_rate": 9.995511232447189e-06, "loss": 42.4843, "step": 56070 }, { "epoch": 0.1132851480908382, "grad_norm": 387.1339111328125, "learning_rate": 9.995496432478392e-06, "loss": 26.6714, "step": 56080 }, { "epoch": 0.11330534872352202, "grad_norm": 111.55400085449219, "learning_rate": 9.995481608162154e-06, "loss": 34.8469, "step": 56090 }, { "epoch": 0.11332554935620584, "grad_norm": 692.3009033203125, "learning_rate": 9.995466759498543e-06, "loss": 42.4702, "step": 56100 }, { "epoch": 0.11334574998888965, "grad_norm": 135.44497680664062, "learning_rate": 9.995451886487632e-06, "loss": 15.3559, "step": 56110 }, { "epoch": 0.11336595062157347, "grad_norm": 150.6249237060547, "learning_rate": 9.995436989129495e-06, "loss": 13.8931, "step": 56120 }, { "epoch": 0.11338615125425729, "grad_norm": 253.2174530029297, "learning_rate": 9.995422067424203e-06, "loss": 20.5135, "step": 56130 }, { "epoch": 0.11340635188694109, "grad_norm": 253.5105438232422, "learning_rate": 9.995407121371832e-06, "loss": 67.3374, "step": 56140 }, { "epoch": 0.11342655251962491, "grad_norm": 344.0068664550781, "learning_rate": 9.995392150972451e-06, "loss": 22.4025, "step": 56150 }, { "epoch": 0.11344675315230873, "grad_norm": 110.14144897460938, "learning_rate": 9.995377156226133e-06, "loss": 16.1749, "step": 56160 }, { "epoch": 0.11346695378499255, "grad_norm": 311.9841613769531, "learning_rate": 9.995362137132956e-06, "loss": 20.6258, "step": 56170 }, { "epoch": 0.11348715441767636, "grad_norm": 280.3816223144531, "learning_rate": 9.995347093692987e-06, "loss": 44.1587, "step": 56180 }, { "epoch": 0.11350735505036018, "grad_norm": 141.09727478027344, "learning_rate": 9.995332025906304e-06, "loss": 19.154, "step": 56190 }, { "epoch": 0.113527555683044, "grad_norm": 388.6886291503906, "learning_rate": 9.995316933772978e-06, "loss": 29.2458, "step": 56200 }, { "epoch": 0.1135477563157278, "grad_norm": 140.4736328125, "learning_rate": 9.995301817293084e-06, "loss": 25.7489, "step": 56210 }, { "epoch": 0.11356795694841162, "grad_norm": 239.93907165527344, "learning_rate": 9.995286676466694e-06, "loss": 24.0478, "step": 56220 }, { "epoch": 0.11358815758109544, "grad_norm": 453.95379638671875, "learning_rate": 9.995271511293881e-06, "loss": 30.1414, "step": 56230 }, { "epoch": 0.11360835821377925, "grad_norm": 331.7467956542969, "learning_rate": 9.995256321774722e-06, "loss": 26.402, "step": 56240 }, { "epoch": 0.11362855884646307, "grad_norm": 899.1552734375, "learning_rate": 9.99524110790929e-06, "loss": 30.2267, "step": 56250 }, { "epoch": 0.11364875947914689, "grad_norm": 153.71461486816406, "learning_rate": 9.995225869697657e-06, "loss": 19.3431, "step": 56260 }, { "epoch": 0.1136689601118307, "grad_norm": 507.67333984375, "learning_rate": 9.9952106071399e-06, "loss": 33.8425, "step": 56270 }, { "epoch": 0.11368916074451452, "grad_norm": 153.02943420410156, "learning_rate": 9.995195320236093e-06, "loss": 15.2612, "step": 56280 }, { "epoch": 0.11370936137719834, "grad_norm": 177.84576416015625, "learning_rate": 9.995180008986309e-06, "loss": 26.5994, "step": 56290 }, { "epoch": 0.11372956200988214, "grad_norm": 78.94143676757812, "learning_rate": 9.995164673390624e-06, "loss": 15.5149, "step": 56300 }, { "epoch": 0.11374976264256596, "grad_norm": 410.3663330078125, "learning_rate": 9.995149313449114e-06, "loss": 32.3468, "step": 56310 }, { "epoch": 0.11376996327524978, "grad_norm": 398.2605285644531, "learning_rate": 9.995133929161848e-06, "loss": 39.5699, "step": 56320 }, { "epoch": 0.1137901639079336, "grad_norm": 60.753662109375, "learning_rate": 9.995118520528908e-06, "loss": 23.7165, "step": 56330 }, { "epoch": 0.11381036454061741, "grad_norm": 202.7401885986328, "learning_rate": 9.995103087550366e-06, "loss": 34.9136, "step": 56340 }, { "epoch": 0.11383056517330123, "grad_norm": 496.8358459472656, "learning_rate": 9.995087630226295e-06, "loss": 22.2819, "step": 56350 }, { "epoch": 0.11385076580598505, "grad_norm": 583.8392333984375, "learning_rate": 9.995072148556776e-06, "loss": 38.2525, "step": 56360 }, { "epoch": 0.11387096643866886, "grad_norm": 210.76284790039062, "learning_rate": 9.995056642541879e-06, "loss": 32.7455, "step": 56370 }, { "epoch": 0.11389116707135268, "grad_norm": 349.40887451171875, "learning_rate": 9.995041112181683e-06, "loss": 23.4268, "step": 56380 }, { "epoch": 0.1139113677040365, "grad_norm": 331.050537109375, "learning_rate": 9.99502555747626e-06, "loss": 22.3393, "step": 56390 }, { "epoch": 0.1139315683367203, "grad_norm": 126.42340850830078, "learning_rate": 9.995009978425692e-06, "loss": 24.1087, "step": 56400 }, { "epoch": 0.11395176896940412, "grad_norm": 203.77980041503906, "learning_rate": 9.994994375030048e-06, "loss": 18.141, "step": 56410 }, { "epoch": 0.11397196960208794, "grad_norm": 219.4334259033203, "learning_rate": 9.994978747289408e-06, "loss": 24.9965, "step": 56420 }, { "epoch": 0.11399217023477175, "grad_norm": 206.49668884277344, "learning_rate": 9.994963095203849e-06, "loss": 21.7502, "step": 56430 }, { "epoch": 0.11401237086745557, "grad_norm": 404.4913330078125, "learning_rate": 9.994947418773445e-06, "loss": 19.8698, "step": 56440 }, { "epoch": 0.11403257150013939, "grad_norm": 22.792964935302734, "learning_rate": 9.994931717998272e-06, "loss": 24.2364, "step": 56450 }, { "epoch": 0.1140527721328232, "grad_norm": 435.1682434082031, "learning_rate": 9.99491599287841e-06, "loss": 27.604, "step": 56460 }, { "epoch": 0.11407297276550701, "grad_norm": 632.3375244140625, "learning_rate": 9.99490024341393e-06, "loss": 36.4259, "step": 56470 }, { "epoch": 0.11409317339819083, "grad_norm": 392.5815734863281, "learning_rate": 9.994884469604913e-06, "loss": 30.5421, "step": 56480 }, { "epoch": 0.11411337403087465, "grad_norm": 369.9122619628906, "learning_rate": 9.994868671451436e-06, "loss": 20.9246, "step": 56490 }, { "epoch": 0.11413357466355846, "grad_norm": 270.46820068359375, "learning_rate": 9.994852848953574e-06, "loss": 28.2486, "step": 56500 }, { "epoch": 0.11415377529624228, "grad_norm": 162.85911560058594, "learning_rate": 9.994837002111407e-06, "loss": 28.6467, "step": 56510 }, { "epoch": 0.1141739759289261, "grad_norm": 482.07562255859375, "learning_rate": 9.994821130925007e-06, "loss": 22.8006, "step": 56520 }, { "epoch": 0.1141941765616099, "grad_norm": 330.98114013671875, "learning_rate": 9.994805235394456e-06, "loss": 26.1827, "step": 56530 }, { "epoch": 0.11421437719429373, "grad_norm": 153.094482421875, "learning_rate": 9.99478931551983e-06, "loss": 36.259, "step": 56540 }, { "epoch": 0.11423457782697755, "grad_norm": 484.9561767578125, "learning_rate": 9.994773371301208e-06, "loss": 40.0326, "step": 56550 }, { "epoch": 0.11425477845966135, "grad_norm": 624.584716796875, "learning_rate": 9.994757402738666e-06, "loss": 47.2495, "step": 56560 }, { "epoch": 0.11427497909234517, "grad_norm": 147.15438842773438, "learning_rate": 9.99474140983228e-06, "loss": 29.1344, "step": 56570 }, { "epoch": 0.11429517972502899, "grad_norm": 247.71958923339844, "learning_rate": 9.994725392582132e-06, "loss": 22.5732, "step": 56580 }, { "epoch": 0.1143153803577128, "grad_norm": 335.0296630859375, "learning_rate": 9.994709350988299e-06, "loss": 13.0115, "step": 56590 }, { "epoch": 0.11433558099039662, "grad_norm": 161.0751190185547, "learning_rate": 9.994693285050858e-06, "loss": 22.6273, "step": 56600 }, { "epoch": 0.11435578162308044, "grad_norm": 157.8527374267578, "learning_rate": 9.994677194769886e-06, "loss": 24.5205, "step": 56610 }, { "epoch": 0.11437598225576424, "grad_norm": 163.2815704345703, "learning_rate": 9.994661080145464e-06, "loss": 31.7056, "step": 56620 }, { "epoch": 0.11439618288844806, "grad_norm": 247.09072875976562, "learning_rate": 9.99464494117767e-06, "loss": 25.3225, "step": 56630 }, { "epoch": 0.11441638352113188, "grad_norm": 110.21215057373047, "learning_rate": 9.994628777866582e-06, "loss": 25.684, "step": 56640 }, { "epoch": 0.1144365841538157, "grad_norm": 291.3282775878906, "learning_rate": 9.99461259021228e-06, "loss": 37.3551, "step": 56650 }, { "epoch": 0.11445678478649951, "grad_norm": 136.4427032470703, "learning_rate": 9.99459637821484e-06, "loss": 24.9859, "step": 56660 }, { "epoch": 0.11447698541918333, "grad_norm": 97.61874389648438, "learning_rate": 9.994580141874345e-06, "loss": 21.286, "step": 56670 }, { "epoch": 0.11449718605186715, "grad_norm": 441.978271484375, "learning_rate": 9.994563881190874e-06, "loss": 19.3143, "step": 56680 }, { "epoch": 0.11451738668455096, "grad_norm": 119.06640625, "learning_rate": 9.9945475961645e-06, "loss": 39.3967, "step": 56690 }, { "epoch": 0.11453758731723478, "grad_norm": 505.91131591796875, "learning_rate": 9.994531286795309e-06, "loss": 38.7248, "step": 56700 }, { "epoch": 0.1145577879499186, "grad_norm": 128.2786407470703, "learning_rate": 9.994514953083379e-06, "loss": 19.6084, "step": 56710 }, { "epoch": 0.1145779885826024, "grad_norm": 242.7487335205078, "learning_rate": 9.994498595028787e-06, "loss": 17.1823, "step": 56720 }, { "epoch": 0.11459818921528622, "grad_norm": 271.3013610839844, "learning_rate": 9.994482212631616e-06, "loss": 23.3666, "step": 56730 }, { "epoch": 0.11461838984797004, "grad_norm": 323.2547607421875, "learning_rate": 9.994465805891944e-06, "loss": 36.61, "step": 56740 }, { "epoch": 0.11463859048065385, "grad_norm": 357.11175537109375, "learning_rate": 9.994449374809851e-06, "loss": 28.6268, "step": 56750 }, { "epoch": 0.11465879111333767, "grad_norm": 342.7884826660156, "learning_rate": 9.994432919385417e-06, "loss": 24.6382, "step": 56760 }, { "epoch": 0.11467899174602149, "grad_norm": 916.4397583007812, "learning_rate": 9.994416439618723e-06, "loss": 33.1571, "step": 56770 }, { "epoch": 0.1146991923787053, "grad_norm": 298.98992919921875, "learning_rate": 9.994399935509851e-06, "loss": 27.189, "step": 56780 }, { "epoch": 0.11471939301138911, "grad_norm": 405.8646545410156, "learning_rate": 9.994383407058878e-06, "loss": 22.565, "step": 56790 }, { "epoch": 0.11473959364407293, "grad_norm": 61.12555694580078, "learning_rate": 9.994366854265886e-06, "loss": 35.7585, "step": 56800 }, { "epoch": 0.11475979427675675, "grad_norm": 251.25845336914062, "learning_rate": 9.994350277130956e-06, "loss": 29.1007, "step": 56810 }, { "epoch": 0.11477999490944056, "grad_norm": 276.25433349609375, "learning_rate": 9.994333675654169e-06, "loss": 32.9132, "step": 56820 }, { "epoch": 0.11480019554212438, "grad_norm": 609.2211303710938, "learning_rate": 9.994317049835604e-06, "loss": 35.6791, "step": 56830 }, { "epoch": 0.1148203961748082, "grad_norm": 586.57763671875, "learning_rate": 9.994300399675342e-06, "loss": 19.8828, "step": 56840 }, { "epoch": 0.114840596807492, "grad_norm": 54.14316940307617, "learning_rate": 9.994283725173468e-06, "loss": 16.7143, "step": 56850 }, { "epoch": 0.11486079744017583, "grad_norm": 277.7856750488281, "learning_rate": 9.994267026330063e-06, "loss": 29.47, "step": 56860 }, { "epoch": 0.11488099807285965, "grad_norm": 258.54620361328125, "learning_rate": 9.994250303145203e-06, "loss": 32.8883, "step": 56870 }, { "epoch": 0.11490119870554345, "grad_norm": 279.69146728515625, "learning_rate": 9.994233555618973e-06, "loss": 31.5662, "step": 56880 }, { "epoch": 0.11492139933822727, "grad_norm": 339.6410217285156, "learning_rate": 9.994216783751457e-06, "loss": 19.5512, "step": 56890 }, { "epoch": 0.11494159997091109, "grad_norm": 400.9548645019531, "learning_rate": 9.99419998754273e-06, "loss": 19.7477, "step": 56900 }, { "epoch": 0.1149618006035949, "grad_norm": 257.39825439453125, "learning_rate": 9.99418316699288e-06, "loss": 17.816, "step": 56910 }, { "epoch": 0.11498200123627872, "grad_norm": 497.29071044921875, "learning_rate": 9.994166322101988e-06, "loss": 22.5001, "step": 56920 }, { "epoch": 0.11500220186896254, "grad_norm": 102.57048034667969, "learning_rate": 9.994149452870133e-06, "loss": 18.8315, "step": 56930 }, { "epoch": 0.11502240250164635, "grad_norm": 65.16831970214844, "learning_rate": 9.9941325592974e-06, "loss": 27.4809, "step": 56940 }, { "epoch": 0.11504260313433017, "grad_norm": 314.6603698730469, "learning_rate": 9.994115641383872e-06, "loss": 16.6416, "step": 56950 }, { "epoch": 0.11506280376701399, "grad_norm": 392.1504821777344, "learning_rate": 9.994098699129628e-06, "loss": 26.1088, "step": 56960 }, { "epoch": 0.1150830043996978, "grad_norm": 362.40936279296875, "learning_rate": 9.994081732534755e-06, "loss": 34.9431, "step": 56970 }, { "epoch": 0.11510320503238161, "grad_norm": 320.8494567871094, "learning_rate": 9.994064741599332e-06, "loss": 37.1496, "step": 56980 }, { "epoch": 0.11512340566506543, "grad_norm": 114.26335144042969, "learning_rate": 9.994047726323442e-06, "loss": 36.6114, "step": 56990 }, { "epoch": 0.11514360629774925, "grad_norm": 96.81778717041016, "learning_rate": 9.994030686707171e-06, "loss": 23.9523, "step": 57000 }, { "epoch": 0.11516380693043306, "grad_norm": 420.2095031738281, "learning_rate": 9.9940136227506e-06, "loss": 24.9919, "step": 57010 }, { "epoch": 0.11518400756311688, "grad_norm": 336.4881286621094, "learning_rate": 9.993996534453812e-06, "loss": 35.2103, "step": 57020 }, { "epoch": 0.1152042081958007, "grad_norm": 399.7489929199219, "learning_rate": 9.993979421816889e-06, "loss": 26.0697, "step": 57030 }, { "epoch": 0.1152244088284845, "grad_norm": 264.2169189453125, "learning_rate": 9.993962284839918e-06, "loss": 12.1086, "step": 57040 }, { "epoch": 0.11524460946116832, "grad_norm": 384.7521057128906, "learning_rate": 9.99394512352298e-06, "loss": 19.9703, "step": 57050 }, { "epoch": 0.11526481009385214, "grad_norm": 47.6986083984375, "learning_rate": 9.993927937866158e-06, "loss": 18.5779, "step": 57060 }, { "epoch": 0.11528501072653595, "grad_norm": 370.5284729003906, "learning_rate": 9.993910727869538e-06, "loss": 16.0233, "step": 57070 }, { "epoch": 0.11530521135921977, "grad_norm": 847.4443359375, "learning_rate": 9.993893493533203e-06, "loss": 31.5699, "step": 57080 }, { "epoch": 0.11532541199190359, "grad_norm": 237.04823303222656, "learning_rate": 9.993876234857236e-06, "loss": 27.4908, "step": 57090 }, { "epoch": 0.1153456126245874, "grad_norm": 462.86871337890625, "learning_rate": 9.993858951841724e-06, "loss": 32.9984, "step": 57100 }, { "epoch": 0.11536581325727122, "grad_norm": 258.5721740722656, "learning_rate": 9.993841644486747e-06, "loss": 24.6506, "step": 57110 }, { "epoch": 0.11538601388995504, "grad_norm": 237.86985778808594, "learning_rate": 9.993824312792393e-06, "loss": 21.8508, "step": 57120 }, { "epoch": 0.11540621452263886, "grad_norm": 386.3475341796875, "learning_rate": 9.993806956758743e-06, "loss": 35.1498, "step": 57130 }, { "epoch": 0.11542641515532266, "grad_norm": 268.44921875, "learning_rate": 9.993789576385884e-06, "loss": 29.8847, "step": 57140 }, { "epoch": 0.11544661578800648, "grad_norm": 241.96156311035156, "learning_rate": 9.993772171673901e-06, "loss": 27.2748, "step": 57150 }, { "epoch": 0.1154668164206903, "grad_norm": 355.00787353515625, "learning_rate": 9.993754742622879e-06, "loss": 21.6485, "step": 57160 }, { "epoch": 0.11548701705337411, "grad_norm": 269.3025207519531, "learning_rate": 9.993737289232902e-06, "loss": 29.8238, "step": 57170 }, { "epoch": 0.11550721768605793, "grad_norm": 274.2128601074219, "learning_rate": 9.993719811504053e-06, "loss": 47.0738, "step": 57180 }, { "epoch": 0.11552741831874175, "grad_norm": 447.0065002441406, "learning_rate": 9.993702309436419e-06, "loss": 33.02, "step": 57190 }, { "epoch": 0.11554761895142555, "grad_norm": 228.51173400878906, "learning_rate": 9.99368478303009e-06, "loss": 33.7036, "step": 57200 }, { "epoch": 0.11556781958410937, "grad_norm": 398.73663330078125, "learning_rate": 9.993667232285142e-06, "loss": 29.3223, "step": 57210 }, { "epoch": 0.1155880202167932, "grad_norm": 266.3036193847656, "learning_rate": 9.993649657201669e-06, "loss": 37.8779, "step": 57220 }, { "epoch": 0.115608220849477, "grad_norm": 479.6033630371094, "learning_rate": 9.993632057779752e-06, "loss": 28.9317, "step": 57230 }, { "epoch": 0.11562842148216082, "grad_norm": 211.12905883789062, "learning_rate": 9.993614434019476e-06, "loss": 24.459, "step": 57240 }, { "epoch": 0.11564862211484464, "grad_norm": 234.18621826171875, "learning_rate": 9.993596785920932e-06, "loss": 22.2034, "step": 57250 }, { "epoch": 0.11566882274752845, "grad_norm": 421.3722229003906, "learning_rate": 9.993579113484202e-06, "loss": 17.2644, "step": 57260 }, { "epoch": 0.11568902338021227, "grad_norm": 419.07708740234375, "learning_rate": 9.993561416709372e-06, "loss": 20.3429, "step": 57270 }, { "epoch": 0.11570922401289609, "grad_norm": 537.0961303710938, "learning_rate": 9.99354369559653e-06, "loss": 17.7134, "step": 57280 }, { "epoch": 0.1157294246455799, "grad_norm": 299.83758544921875, "learning_rate": 9.993525950145761e-06, "loss": 32.1745, "step": 57290 }, { "epoch": 0.11574962527826371, "grad_norm": 232.31634521484375, "learning_rate": 9.993508180357154e-06, "loss": 24.0154, "step": 57300 }, { "epoch": 0.11576982591094753, "grad_norm": 234.57496643066406, "learning_rate": 9.993490386230793e-06, "loss": 17.5588, "step": 57310 }, { "epoch": 0.11579002654363135, "grad_norm": 106.5687484741211, "learning_rate": 9.993472567766764e-06, "loss": 42.1031, "step": 57320 }, { "epoch": 0.11581022717631516, "grad_norm": 196.64889526367188, "learning_rate": 9.993454724965157e-06, "loss": 40.0228, "step": 57330 }, { "epoch": 0.11583042780899898, "grad_norm": 369.8841552734375, "learning_rate": 9.993436857826058e-06, "loss": 20.4496, "step": 57340 }, { "epoch": 0.1158506284416828, "grad_norm": 818.25537109375, "learning_rate": 9.993418966349551e-06, "loss": 42.6559, "step": 57350 }, { "epoch": 0.1158708290743666, "grad_norm": 105.40882110595703, "learning_rate": 9.993401050535726e-06, "loss": 24.6452, "step": 57360 }, { "epoch": 0.11589102970705042, "grad_norm": 264.6867370605469, "learning_rate": 9.993383110384673e-06, "loss": 25.2028, "step": 57370 }, { "epoch": 0.11591123033973424, "grad_norm": 486.58428955078125, "learning_rate": 9.993365145896473e-06, "loss": 34.0316, "step": 57380 }, { "epoch": 0.11593143097241805, "grad_norm": 172.103515625, "learning_rate": 9.993347157071218e-06, "loss": 39.0784, "step": 57390 }, { "epoch": 0.11595163160510187, "grad_norm": 321.31170654296875, "learning_rate": 9.993329143908994e-06, "loss": 17.4031, "step": 57400 }, { "epoch": 0.11597183223778569, "grad_norm": 398.4848327636719, "learning_rate": 9.993311106409891e-06, "loss": 37.2044, "step": 57410 }, { "epoch": 0.1159920328704695, "grad_norm": 313.95977783203125, "learning_rate": 9.993293044573995e-06, "loss": 26.3, "step": 57420 }, { "epoch": 0.11601223350315332, "grad_norm": 194.95750427246094, "learning_rate": 9.993274958401392e-06, "loss": 16.3248, "step": 57430 }, { "epoch": 0.11603243413583714, "grad_norm": 604.9672241210938, "learning_rate": 9.993256847892175e-06, "loss": 33.4554, "step": 57440 }, { "epoch": 0.11605263476852096, "grad_norm": 158.6489715576172, "learning_rate": 9.993238713046428e-06, "loss": 23.8083, "step": 57450 }, { "epoch": 0.11607283540120476, "grad_norm": 467.1813659667969, "learning_rate": 9.993220553864242e-06, "loss": 25.1391, "step": 57460 }, { "epoch": 0.11609303603388858, "grad_norm": 325.73822021484375, "learning_rate": 9.993202370345705e-06, "loss": 35.3902, "step": 57470 }, { "epoch": 0.1161132366665724, "grad_norm": 360.4731750488281, "learning_rate": 9.993184162490903e-06, "loss": 31.605, "step": 57480 }, { "epoch": 0.11613343729925621, "grad_norm": 3013.0234375, "learning_rate": 9.99316593029993e-06, "loss": 19.4119, "step": 57490 }, { "epoch": 0.11615363793194003, "grad_norm": 167.80923461914062, "learning_rate": 9.993147673772869e-06, "loss": 11.4998, "step": 57500 }, { "epoch": 0.11617383856462385, "grad_norm": 43.22615051269531, "learning_rate": 9.993129392909814e-06, "loss": 14.3428, "step": 57510 }, { "epoch": 0.11619403919730766, "grad_norm": 370.9212341308594, "learning_rate": 9.993111087710852e-06, "loss": 30.0697, "step": 57520 }, { "epoch": 0.11621423982999148, "grad_norm": 200.44580078125, "learning_rate": 9.993092758176071e-06, "loss": 27.21, "step": 57530 }, { "epoch": 0.1162344404626753, "grad_norm": 439.9917907714844, "learning_rate": 9.993074404305563e-06, "loss": 45.7154, "step": 57540 }, { "epoch": 0.1162546410953591, "grad_norm": 224.69369506835938, "learning_rate": 9.993056026099415e-06, "loss": 21.3037, "step": 57550 }, { "epoch": 0.11627484172804292, "grad_norm": 717.5725708007812, "learning_rate": 9.993037623557716e-06, "loss": 26.365, "step": 57560 }, { "epoch": 0.11629504236072674, "grad_norm": 329.2342834472656, "learning_rate": 9.993019196680558e-06, "loss": 32.1588, "step": 57570 }, { "epoch": 0.11631524299341055, "grad_norm": 142.0894775390625, "learning_rate": 9.993000745468031e-06, "loss": 33.7871, "step": 57580 }, { "epoch": 0.11633544362609437, "grad_norm": 205.23207092285156, "learning_rate": 9.992982269920223e-06, "loss": 35.2067, "step": 57590 }, { "epoch": 0.11635564425877819, "grad_norm": 277.56072998046875, "learning_rate": 9.992963770037227e-06, "loss": 32.7253, "step": 57600 }, { "epoch": 0.11637584489146201, "grad_norm": 389.6510314941406, "learning_rate": 9.99294524581913e-06, "loss": 27.7489, "step": 57610 }, { "epoch": 0.11639604552414581, "grad_norm": 400.34222412109375, "learning_rate": 9.992926697266023e-06, "loss": 32.1503, "step": 57620 }, { "epoch": 0.11641624615682963, "grad_norm": 158.33193969726562, "learning_rate": 9.992908124377997e-06, "loss": 28.6788, "step": 57630 }, { "epoch": 0.11643644678951345, "grad_norm": 86.4566879272461, "learning_rate": 9.992889527155143e-06, "loss": 15.907, "step": 57640 }, { "epoch": 0.11645664742219726, "grad_norm": 182.49945068359375, "learning_rate": 9.992870905597549e-06, "loss": 23.3574, "step": 57650 }, { "epoch": 0.11647684805488108, "grad_norm": 237.85972595214844, "learning_rate": 9.99285225970531e-06, "loss": 17.4379, "step": 57660 }, { "epoch": 0.1164970486875649, "grad_norm": 558.4737548828125, "learning_rate": 9.992833589478513e-06, "loss": 33.7654, "step": 57670 }, { "epoch": 0.1165172493202487, "grad_norm": 153.9173583984375, "learning_rate": 9.992814894917251e-06, "loss": 32.458, "step": 57680 }, { "epoch": 0.11653744995293253, "grad_norm": 245.08447265625, "learning_rate": 9.992796176021616e-06, "loss": 33.5103, "step": 57690 }, { "epoch": 0.11655765058561635, "grad_norm": 400.8786315917969, "learning_rate": 9.992777432791697e-06, "loss": 31.5172, "step": 57700 }, { "epoch": 0.11657785121830015, "grad_norm": 177.2507781982422, "learning_rate": 9.992758665227586e-06, "loss": 22.173, "step": 57710 }, { "epoch": 0.11659805185098397, "grad_norm": 230.39956665039062, "learning_rate": 9.992739873329375e-06, "loss": 20.745, "step": 57720 }, { "epoch": 0.11661825248366779, "grad_norm": 295.79986572265625, "learning_rate": 9.992721057097157e-06, "loss": 24.4786, "step": 57730 }, { "epoch": 0.1166384531163516, "grad_norm": 0.0, "learning_rate": 9.99270221653102e-06, "loss": 29.5727, "step": 57740 }, { "epoch": 0.11665865374903542, "grad_norm": 148.14878845214844, "learning_rate": 9.99268335163106e-06, "loss": 14.8937, "step": 57750 }, { "epoch": 0.11667885438171924, "grad_norm": 266.6999206542969, "learning_rate": 9.992664462397366e-06, "loss": 25.5763, "step": 57760 }, { "epoch": 0.11669905501440304, "grad_norm": 221.40333557128906, "learning_rate": 9.99264554883003e-06, "loss": 38.4859, "step": 57770 }, { "epoch": 0.11671925564708686, "grad_norm": 1092.9656982421875, "learning_rate": 9.992626610929146e-06, "loss": 40.8613, "step": 57780 }, { "epoch": 0.11673945627977068, "grad_norm": 413.777587890625, "learning_rate": 9.992607648694805e-06, "loss": 44.9294, "step": 57790 }, { "epoch": 0.1167596569124545, "grad_norm": 593.9998779296875, "learning_rate": 9.9925886621271e-06, "loss": 43.0056, "step": 57800 }, { "epoch": 0.11677985754513831, "grad_norm": 475.001708984375, "learning_rate": 9.992569651226123e-06, "loss": 26.6464, "step": 57810 }, { "epoch": 0.11680005817782213, "grad_norm": 323.5718688964844, "learning_rate": 9.992550615991968e-06, "loss": 24.8182, "step": 57820 }, { "epoch": 0.11682025881050595, "grad_norm": 428.949951171875, "learning_rate": 9.992531556424726e-06, "loss": 23.3145, "step": 57830 }, { "epoch": 0.11684045944318976, "grad_norm": 499.9779357910156, "learning_rate": 9.992512472524491e-06, "loss": 28.5719, "step": 57840 }, { "epoch": 0.11686066007587358, "grad_norm": 461.8590087890625, "learning_rate": 9.992493364291356e-06, "loss": 19.2011, "step": 57850 }, { "epoch": 0.1168808607085574, "grad_norm": 442.87249755859375, "learning_rate": 9.992474231725412e-06, "loss": 11.0193, "step": 57860 }, { "epoch": 0.1169010613412412, "grad_norm": 372.9396057128906, "learning_rate": 9.992455074826757e-06, "loss": 41.8805, "step": 57870 }, { "epoch": 0.11692126197392502, "grad_norm": 225.491455078125, "learning_rate": 9.99243589359548e-06, "loss": 23.5409, "step": 57880 }, { "epoch": 0.11694146260660884, "grad_norm": 367.5392761230469, "learning_rate": 9.992416688031676e-06, "loss": 61.0873, "step": 57890 }, { "epoch": 0.11696166323929265, "grad_norm": 325.21466064453125, "learning_rate": 9.992397458135438e-06, "loss": 27.7964, "step": 57900 }, { "epoch": 0.11698186387197647, "grad_norm": 332.7278137207031, "learning_rate": 9.992378203906862e-06, "loss": 44.3652, "step": 57910 }, { "epoch": 0.11700206450466029, "grad_norm": 294.8116760253906, "learning_rate": 9.99235892534604e-06, "loss": 20.2365, "step": 57920 }, { "epoch": 0.1170222651373441, "grad_norm": 209.17709350585938, "learning_rate": 9.992339622453065e-06, "loss": 18.9584, "step": 57930 }, { "epoch": 0.11704246577002791, "grad_norm": 369.8221130371094, "learning_rate": 9.992320295228032e-06, "loss": 19.3343, "step": 57940 }, { "epoch": 0.11706266640271173, "grad_norm": 902.5731201171875, "learning_rate": 9.992300943671035e-06, "loss": 30.804, "step": 57950 }, { "epoch": 0.11708286703539555, "grad_norm": 378.2359924316406, "learning_rate": 9.99228156778217e-06, "loss": 37.8588, "step": 57960 }, { "epoch": 0.11710306766807936, "grad_norm": 905.7559204101562, "learning_rate": 9.99226216756153e-06, "loss": 37.6372, "step": 57970 }, { "epoch": 0.11712326830076318, "grad_norm": 267.1894226074219, "learning_rate": 9.99224274300921e-06, "loss": 27.692, "step": 57980 }, { "epoch": 0.117143468933447, "grad_norm": 132.7151336669922, "learning_rate": 9.992223294125303e-06, "loss": 31.0942, "step": 57990 }, { "epoch": 0.11716366956613081, "grad_norm": 256.0967102050781, "learning_rate": 9.992203820909906e-06, "loss": 16.1999, "step": 58000 }, { "epoch": 0.11718387019881463, "grad_norm": 1014.3612060546875, "learning_rate": 9.992184323363112e-06, "loss": 35.2244, "step": 58010 }, { "epoch": 0.11720407083149845, "grad_norm": 100.53959655761719, "learning_rate": 9.992164801485018e-06, "loss": 16.6405, "step": 58020 }, { "epoch": 0.11722427146418225, "grad_norm": 389.44049072265625, "learning_rate": 9.992145255275718e-06, "loss": 45.6165, "step": 58030 }, { "epoch": 0.11724447209686607, "grad_norm": 174.82296752929688, "learning_rate": 9.99212568473531e-06, "loss": 41.0026, "step": 58040 }, { "epoch": 0.1172646727295499, "grad_norm": 140.51516723632812, "learning_rate": 9.992106089863884e-06, "loss": 20.5486, "step": 58050 }, { "epoch": 0.1172848733622337, "grad_norm": 434.0888977050781, "learning_rate": 9.992086470661537e-06, "loss": 22.0969, "step": 58060 }, { "epoch": 0.11730507399491752, "grad_norm": 200.20823669433594, "learning_rate": 9.992066827128368e-06, "loss": 30.4417, "step": 58070 }, { "epoch": 0.11732527462760134, "grad_norm": 212.73338317871094, "learning_rate": 9.992047159264472e-06, "loss": 32.0729, "step": 58080 }, { "epoch": 0.11734547526028515, "grad_norm": 1332.333740234375, "learning_rate": 9.992027467069943e-06, "loss": 50.3281, "step": 58090 }, { "epoch": 0.11736567589296897, "grad_norm": 216.3925018310547, "learning_rate": 9.992007750544876e-06, "loss": 14.4566, "step": 58100 }, { "epoch": 0.11738587652565279, "grad_norm": 107.63963317871094, "learning_rate": 9.99198800968937e-06, "loss": 16.9797, "step": 58110 }, { "epoch": 0.1174060771583366, "grad_norm": 214.14712524414062, "learning_rate": 9.991968244503519e-06, "loss": 16.324, "step": 58120 }, { "epoch": 0.11742627779102041, "grad_norm": 88.31385803222656, "learning_rate": 9.991948454987422e-06, "loss": 34.5001, "step": 58130 }, { "epoch": 0.11744647842370423, "grad_norm": 247.99636840820312, "learning_rate": 9.99192864114117e-06, "loss": 24.787, "step": 58140 }, { "epoch": 0.11746667905638805, "grad_norm": 297.07244873046875, "learning_rate": 9.991908802964867e-06, "loss": 24.9379, "step": 58150 }, { "epoch": 0.11748687968907186, "grad_norm": 301.9154052734375, "learning_rate": 9.991888940458605e-06, "loss": 44.6129, "step": 58160 }, { "epoch": 0.11750708032175568, "grad_norm": 0.0, "learning_rate": 9.99186905362248e-06, "loss": 19.5961, "step": 58170 }, { "epoch": 0.1175272809544395, "grad_norm": 411.6956787109375, "learning_rate": 9.991849142456593e-06, "loss": 30.2058, "step": 58180 }, { "epoch": 0.1175474815871233, "grad_norm": 352.0191650390625, "learning_rate": 9.991829206961038e-06, "loss": 20.0372, "step": 58190 }, { "epoch": 0.11756768221980712, "grad_norm": 236.75027465820312, "learning_rate": 9.991809247135912e-06, "loss": 18.2385, "step": 58200 }, { "epoch": 0.11758788285249094, "grad_norm": 207.8545379638672, "learning_rate": 9.991789262981314e-06, "loss": 22.352, "step": 58210 }, { "epoch": 0.11760808348517475, "grad_norm": 487.16143798828125, "learning_rate": 9.99176925449734e-06, "loss": 29.4012, "step": 58220 }, { "epoch": 0.11762828411785857, "grad_norm": 460.7721862792969, "learning_rate": 9.991749221684088e-06, "loss": 37.8211, "step": 58230 }, { "epoch": 0.11764848475054239, "grad_norm": 142.8541717529297, "learning_rate": 9.991729164541656e-06, "loss": 15.3445, "step": 58240 }, { "epoch": 0.1176686853832262, "grad_norm": 319.8880310058594, "learning_rate": 9.991709083070143e-06, "loss": 27.1557, "step": 58250 }, { "epoch": 0.11768888601591002, "grad_norm": 195.1028289794922, "learning_rate": 9.991688977269643e-06, "loss": 13.4403, "step": 58260 }, { "epoch": 0.11770908664859384, "grad_norm": 359.15716552734375, "learning_rate": 9.991668847140258e-06, "loss": 25.8943, "step": 58270 }, { "epoch": 0.11772928728127766, "grad_norm": 267.1024169921875, "learning_rate": 9.991648692682083e-06, "loss": 23.9114, "step": 58280 }, { "epoch": 0.11774948791396146, "grad_norm": 641.1744384765625, "learning_rate": 9.99162851389522e-06, "loss": 42.8395, "step": 58290 }, { "epoch": 0.11776968854664528, "grad_norm": 190.07516479492188, "learning_rate": 9.991608310779762e-06, "loss": 18.9374, "step": 58300 }, { "epoch": 0.1177898891793291, "grad_norm": 130.28907775878906, "learning_rate": 9.991588083335812e-06, "loss": 19.5276, "step": 58310 }, { "epoch": 0.11781008981201291, "grad_norm": 44.220584869384766, "learning_rate": 9.991567831563468e-06, "loss": 19.9829, "step": 58320 }, { "epoch": 0.11783029044469673, "grad_norm": 330.2967529296875, "learning_rate": 9.991547555462825e-06, "loss": 51.2223, "step": 58330 }, { "epoch": 0.11785049107738055, "grad_norm": 491.4270324707031, "learning_rate": 9.991527255033988e-06, "loss": 30.2171, "step": 58340 }, { "epoch": 0.11787069171006435, "grad_norm": 267.8139953613281, "learning_rate": 9.99150693027705e-06, "loss": 25.7192, "step": 58350 }, { "epoch": 0.11789089234274817, "grad_norm": 365.2885437011719, "learning_rate": 9.991486581192115e-06, "loss": 25.0827, "step": 58360 }, { "epoch": 0.117911092975432, "grad_norm": 196.19619750976562, "learning_rate": 9.991466207779279e-06, "loss": 39.206, "step": 58370 }, { "epoch": 0.1179312936081158, "grad_norm": 258.1605224609375, "learning_rate": 9.99144581003864e-06, "loss": 60.3787, "step": 58380 }, { "epoch": 0.11795149424079962, "grad_norm": 228.70217895507812, "learning_rate": 9.991425387970301e-06, "loss": 18.2715, "step": 58390 }, { "epoch": 0.11797169487348344, "grad_norm": 190.49481201171875, "learning_rate": 9.99140494157436e-06, "loss": 20.3801, "step": 58400 }, { "epoch": 0.11799189550616725, "grad_norm": 358.5969543457031, "learning_rate": 9.991384470850918e-06, "loss": 33.1349, "step": 58410 }, { "epoch": 0.11801209613885107, "grad_norm": 709.0108642578125, "learning_rate": 9.991363975800073e-06, "loss": 41.2618, "step": 58420 }, { "epoch": 0.11803229677153489, "grad_norm": 67.12505340576172, "learning_rate": 9.991343456421923e-06, "loss": 22.8281, "step": 58430 }, { "epoch": 0.1180524974042187, "grad_norm": 389.284912109375, "learning_rate": 9.991322912716572e-06, "loss": 24.4255, "step": 58440 }, { "epoch": 0.11807269803690251, "grad_norm": 138.83786010742188, "learning_rate": 9.99130234468412e-06, "loss": 33.4322, "step": 58450 }, { "epoch": 0.11809289866958633, "grad_norm": 265.3197021484375, "learning_rate": 9.991281752324664e-06, "loss": 37.2237, "step": 58460 }, { "epoch": 0.11811309930227015, "grad_norm": 429.0298767089844, "learning_rate": 9.991261135638307e-06, "loss": 33.8706, "step": 58470 }, { "epoch": 0.11813329993495396, "grad_norm": 0.0, "learning_rate": 9.991240494625147e-06, "loss": 25.4353, "step": 58480 }, { "epoch": 0.11815350056763778, "grad_norm": 279.2240905761719, "learning_rate": 9.991219829285287e-06, "loss": 25.4403, "step": 58490 }, { "epoch": 0.1181737012003216, "grad_norm": 199.76661682128906, "learning_rate": 9.991199139618828e-06, "loss": 27.4733, "step": 58500 }, { "epoch": 0.1181939018330054, "grad_norm": 339.84600830078125, "learning_rate": 9.991178425625869e-06, "loss": 20.5974, "step": 58510 }, { "epoch": 0.11821410246568922, "grad_norm": 343.9501953125, "learning_rate": 9.99115768730651e-06, "loss": 19.338, "step": 58520 }, { "epoch": 0.11823430309837304, "grad_norm": 263.9930114746094, "learning_rate": 9.991136924660856e-06, "loss": 29.1788, "step": 58530 }, { "epoch": 0.11825450373105685, "grad_norm": 203.44667053222656, "learning_rate": 9.991116137689006e-06, "loss": 29.7544, "step": 58540 }, { "epoch": 0.11827470436374067, "grad_norm": 81.28977966308594, "learning_rate": 9.991095326391061e-06, "loss": 17.4274, "step": 58550 }, { "epoch": 0.11829490499642449, "grad_norm": 192.9645538330078, "learning_rate": 9.99107449076712e-06, "loss": 40.86, "step": 58560 }, { "epoch": 0.1183151056291083, "grad_norm": 117.95115661621094, "learning_rate": 9.99105363081729e-06, "loss": 33.3824, "step": 58570 }, { "epoch": 0.11833530626179212, "grad_norm": 433.0579528808594, "learning_rate": 9.99103274654167e-06, "loss": 39.1083, "step": 58580 }, { "epoch": 0.11835550689447594, "grad_norm": 362.3523254394531, "learning_rate": 9.99101183794036e-06, "loss": 28.0534, "step": 58590 }, { "epoch": 0.11837570752715976, "grad_norm": 365.1864013671875, "learning_rate": 9.990990905013466e-06, "loss": 39.9877, "step": 58600 }, { "epoch": 0.11839590815984356, "grad_norm": 327.9810485839844, "learning_rate": 9.990969947761087e-06, "loss": 40.2006, "step": 58610 }, { "epoch": 0.11841610879252738, "grad_norm": 254.641845703125, "learning_rate": 9.990948966183324e-06, "loss": 19.2787, "step": 58620 }, { "epoch": 0.1184363094252112, "grad_norm": 413.41278076171875, "learning_rate": 9.990927960280283e-06, "loss": 20.449, "step": 58630 }, { "epoch": 0.11845651005789501, "grad_norm": 82.84187316894531, "learning_rate": 9.990906930052065e-06, "loss": 26.1055, "step": 58640 }, { "epoch": 0.11847671069057883, "grad_norm": 318.16278076171875, "learning_rate": 9.99088587549877e-06, "loss": 22.8113, "step": 58650 }, { "epoch": 0.11849691132326265, "grad_norm": 572.5297241210938, "learning_rate": 9.990864796620503e-06, "loss": 29.0794, "step": 58660 }, { "epoch": 0.11851711195594646, "grad_norm": 103.0421371459961, "learning_rate": 9.990843693417366e-06, "loss": 40.2071, "step": 58670 }, { "epoch": 0.11853731258863028, "grad_norm": 0.0, "learning_rate": 9.990822565889464e-06, "loss": 23.9123, "step": 58680 }, { "epoch": 0.1185575132213141, "grad_norm": 287.948974609375, "learning_rate": 9.990801414036896e-06, "loss": 30.1356, "step": 58690 }, { "epoch": 0.1185777138539979, "grad_norm": 218.6237030029297, "learning_rate": 9.99078023785977e-06, "loss": 34.2425, "step": 58700 }, { "epoch": 0.11859791448668172, "grad_norm": 57.53432846069336, "learning_rate": 9.990759037358184e-06, "loss": 22.3636, "step": 58710 }, { "epoch": 0.11861811511936554, "grad_norm": 280.69781494140625, "learning_rate": 9.990737812532245e-06, "loss": 33.4645, "step": 58720 }, { "epoch": 0.11863831575204935, "grad_norm": 136.65992736816406, "learning_rate": 9.990716563382055e-06, "loss": 27.349, "step": 58730 }, { "epoch": 0.11865851638473317, "grad_norm": 363.1716003417969, "learning_rate": 9.990695289907716e-06, "loss": 26.3577, "step": 58740 }, { "epoch": 0.11867871701741699, "grad_norm": 309.2392272949219, "learning_rate": 9.990673992109335e-06, "loss": 22.5706, "step": 58750 }, { "epoch": 0.11869891765010081, "grad_norm": 392.3059997558594, "learning_rate": 9.990652669987016e-06, "loss": 29.6912, "step": 58760 }, { "epoch": 0.11871911828278461, "grad_norm": 568.1610717773438, "learning_rate": 9.990631323540858e-06, "loss": 34.9218, "step": 58770 }, { "epoch": 0.11873931891546843, "grad_norm": 126.25199127197266, "learning_rate": 9.990609952770969e-06, "loss": 24.707, "step": 58780 }, { "epoch": 0.11875951954815225, "grad_norm": 304.19549560546875, "learning_rate": 9.990588557677454e-06, "loss": 21.3605, "step": 58790 }, { "epoch": 0.11877972018083606, "grad_norm": 119.35803985595703, "learning_rate": 9.990567138260414e-06, "loss": 25.7605, "step": 58800 }, { "epoch": 0.11879992081351988, "grad_norm": 347.06561279296875, "learning_rate": 9.990545694519956e-06, "loss": 21.9407, "step": 58810 }, { "epoch": 0.1188201214462037, "grad_norm": 197.2572021484375, "learning_rate": 9.990524226456182e-06, "loss": 21.9094, "step": 58820 }, { "epoch": 0.1188403220788875, "grad_norm": 139.63136291503906, "learning_rate": 9.9905027340692e-06, "loss": 23.6743, "step": 58830 }, { "epoch": 0.11886052271157133, "grad_norm": 485.2525939941406, "learning_rate": 9.990481217359112e-06, "loss": 32.4401, "step": 58840 }, { "epoch": 0.11888072334425515, "grad_norm": 412.3610534667969, "learning_rate": 9.990459676326025e-06, "loss": 39.5501, "step": 58850 }, { "epoch": 0.11890092397693895, "grad_norm": 235.75559997558594, "learning_rate": 9.990438110970043e-06, "loss": 31.2497, "step": 58860 }, { "epoch": 0.11892112460962277, "grad_norm": 238.3065948486328, "learning_rate": 9.990416521291268e-06, "loss": 26.2383, "step": 58870 }, { "epoch": 0.11894132524230659, "grad_norm": 341.9708557128906, "learning_rate": 9.990394907289811e-06, "loss": 18.2421, "step": 58880 }, { "epoch": 0.1189615258749904, "grad_norm": 148.23580932617188, "learning_rate": 9.990373268965773e-06, "loss": 26.4456, "step": 58890 }, { "epoch": 0.11898172650767422, "grad_norm": 294.7439270019531, "learning_rate": 9.990351606319261e-06, "loss": 24.7929, "step": 58900 }, { "epoch": 0.11900192714035804, "grad_norm": 230.58665466308594, "learning_rate": 9.990329919350382e-06, "loss": 28.2516, "step": 58910 }, { "epoch": 0.11902212777304186, "grad_norm": 275.0360107421875, "learning_rate": 9.990308208059239e-06, "loss": 29.9438, "step": 58920 }, { "epoch": 0.11904232840572566, "grad_norm": 330.99285888671875, "learning_rate": 9.990286472445938e-06, "loss": 34.7986, "step": 58930 }, { "epoch": 0.11906252903840948, "grad_norm": 197.58221435546875, "learning_rate": 9.990264712510586e-06, "loss": 22.248, "step": 58940 }, { "epoch": 0.1190827296710933, "grad_norm": 317.3708801269531, "learning_rate": 9.990242928253291e-06, "loss": 28.0511, "step": 58950 }, { "epoch": 0.11910293030377711, "grad_norm": 387.6944885253906, "learning_rate": 9.990221119674157e-06, "loss": 24.7064, "step": 58960 }, { "epoch": 0.11912313093646093, "grad_norm": 157.29299926757812, "learning_rate": 9.99019928677329e-06, "loss": 31.1675, "step": 58970 }, { "epoch": 0.11914333156914475, "grad_norm": 593.2474975585938, "learning_rate": 9.990177429550797e-06, "loss": 39.3722, "step": 58980 }, { "epoch": 0.11916353220182856, "grad_norm": 900.3164672851562, "learning_rate": 9.990155548006783e-06, "loss": 42.2055, "step": 58990 }, { "epoch": 0.11918373283451238, "grad_norm": 276.26763916015625, "learning_rate": 9.990133642141359e-06, "loss": 22.7152, "step": 59000 }, { "epoch": 0.1192039334671962, "grad_norm": 1213.490478515625, "learning_rate": 9.990111711954626e-06, "loss": 30.5302, "step": 59010 }, { "epoch": 0.11922413409988, "grad_norm": 384.2647705078125, "learning_rate": 9.990089757446697e-06, "loss": 30.7955, "step": 59020 }, { "epoch": 0.11924433473256382, "grad_norm": 341.4183044433594, "learning_rate": 9.990067778617672e-06, "loss": 24.7691, "step": 59030 }, { "epoch": 0.11926453536524764, "grad_norm": 265.82891845703125, "learning_rate": 9.990045775467664e-06, "loss": 19.2245, "step": 59040 }, { "epoch": 0.11928473599793145, "grad_norm": 240.06341552734375, "learning_rate": 9.990023747996778e-06, "loss": 35.3912, "step": 59050 }, { "epoch": 0.11930493663061527, "grad_norm": 202.43338012695312, "learning_rate": 9.990001696205121e-06, "loss": 29.5483, "step": 59060 }, { "epoch": 0.11932513726329909, "grad_norm": 132.75758361816406, "learning_rate": 9.989979620092802e-06, "loss": 39.4405, "step": 59070 }, { "epoch": 0.11934533789598291, "grad_norm": 1723.5029296875, "learning_rate": 9.989957519659926e-06, "loss": 24.5439, "step": 59080 }, { "epoch": 0.11936553852866671, "grad_norm": 84.77678680419922, "learning_rate": 9.989935394906602e-06, "loss": 14.8543, "step": 59090 }, { "epoch": 0.11938573916135053, "grad_norm": 340.624267578125, "learning_rate": 9.98991324583294e-06, "loss": 28.8317, "step": 59100 }, { "epoch": 0.11940593979403435, "grad_norm": 315.0569763183594, "learning_rate": 9.989891072439045e-06, "loss": 18.065, "step": 59110 }, { "epoch": 0.11942614042671816, "grad_norm": 217.311279296875, "learning_rate": 9.989868874725026e-06, "loss": 31.2181, "step": 59120 }, { "epoch": 0.11944634105940198, "grad_norm": 187.72659301757812, "learning_rate": 9.989846652690992e-06, "loss": 40.0783, "step": 59130 }, { "epoch": 0.1194665416920858, "grad_norm": 186.47926330566406, "learning_rate": 9.989824406337049e-06, "loss": 18.5357, "step": 59140 }, { "epoch": 0.11948674232476961, "grad_norm": 538.4887084960938, "learning_rate": 9.989802135663308e-06, "loss": 36.5036, "step": 59150 }, { "epoch": 0.11950694295745343, "grad_norm": 257.734130859375, "learning_rate": 9.989779840669878e-06, "loss": 16.7241, "step": 59160 }, { "epoch": 0.11952714359013725, "grad_norm": 221.60848999023438, "learning_rate": 9.989757521356864e-06, "loss": 32.0586, "step": 59170 }, { "epoch": 0.11954734422282105, "grad_norm": 294.94683837890625, "learning_rate": 9.989735177724378e-06, "loss": 27.4445, "step": 59180 }, { "epoch": 0.11956754485550487, "grad_norm": 350.7003173828125, "learning_rate": 9.989712809772528e-06, "loss": 14.2509, "step": 59190 }, { "epoch": 0.1195877454881887, "grad_norm": 360.4586486816406, "learning_rate": 9.989690417501423e-06, "loss": 22.8196, "step": 59200 }, { "epoch": 0.1196079461208725, "grad_norm": 464.4029541015625, "learning_rate": 9.989668000911173e-06, "loss": 39.4513, "step": 59210 }, { "epoch": 0.11962814675355632, "grad_norm": 748.686767578125, "learning_rate": 9.989645560001884e-06, "loss": 36.4539, "step": 59220 }, { "epoch": 0.11964834738624014, "grad_norm": 578.754638671875, "learning_rate": 9.989623094773669e-06, "loss": 35.823, "step": 59230 }, { "epoch": 0.11966854801892396, "grad_norm": 216.44956970214844, "learning_rate": 9.989600605226637e-06, "loss": 16.1519, "step": 59240 }, { "epoch": 0.11968874865160777, "grad_norm": 311.21331787109375, "learning_rate": 9.989578091360896e-06, "loss": 29.3005, "step": 59250 }, { "epoch": 0.11970894928429159, "grad_norm": 311.48431396484375, "learning_rate": 9.989555553176556e-06, "loss": 30.9478, "step": 59260 }, { "epoch": 0.1197291499169754, "grad_norm": 173.99407958984375, "learning_rate": 9.989532990673729e-06, "loss": 19.4687, "step": 59270 }, { "epoch": 0.11974935054965921, "grad_norm": 294.3829650878906, "learning_rate": 9.989510403852521e-06, "loss": 16.6344, "step": 59280 }, { "epoch": 0.11976955118234303, "grad_norm": 260.50799560546875, "learning_rate": 9.989487792713045e-06, "loss": 35.0627, "step": 59290 }, { "epoch": 0.11978975181502685, "grad_norm": 324.9258728027344, "learning_rate": 9.989465157255413e-06, "loss": 58.3039, "step": 59300 }, { "epoch": 0.11980995244771066, "grad_norm": 505.1606140136719, "learning_rate": 9.98944249747973e-06, "loss": 23.9907, "step": 59310 }, { "epoch": 0.11983015308039448, "grad_norm": 379.2328796386719, "learning_rate": 9.989419813386112e-06, "loss": 16.7696, "step": 59320 }, { "epoch": 0.1198503537130783, "grad_norm": 475.6560974121094, "learning_rate": 9.989397104974665e-06, "loss": 34.6081, "step": 59330 }, { "epoch": 0.1198705543457621, "grad_norm": 219.25741577148438, "learning_rate": 9.989374372245503e-06, "loss": 17.7446, "step": 59340 }, { "epoch": 0.11989075497844592, "grad_norm": 71.01065063476562, "learning_rate": 9.989351615198734e-06, "loss": 24.608, "step": 59350 }, { "epoch": 0.11991095561112974, "grad_norm": 218.5639190673828, "learning_rate": 9.989328833834472e-06, "loss": 43.7207, "step": 59360 }, { "epoch": 0.11993115624381355, "grad_norm": 40.77179718017578, "learning_rate": 9.989306028152825e-06, "loss": 33.2193, "step": 59370 }, { "epoch": 0.11995135687649737, "grad_norm": 239.82217407226562, "learning_rate": 9.989283198153908e-06, "loss": 21.6847, "step": 59380 }, { "epoch": 0.11997155750918119, "grad_norm": 285.28387451171875, "learning_rate": 9.989260343837827e-06, "loss": 36.9324, "step": 59390 }, { "epoch": 0.11999175814186501, "grad_norm": 418.0511474609375, "learning_rate": 9.989237465204698e-06, "loss": 14.7271, "step": 59400 }, { "epoch": 0.12001195877454882, "grad_norm": 137.37930297851562, "learning_rate": 9.989214562254628e-06, "loss": 23.6483, "step": 59410 }, { "epoch": 0.12003215940723264, "grad_norm": 0.0, "learning_rate": 9.989191634987734e-06, "loss": 44.699, "step": 59420 }, { "epoch": 0.12005236003991646, "grad_norm": 446.92120361328125, "learning_rate": 9.989168683404125e-06, "loss": 32.4799, "step": 59430 }, { "epoch": 0.12007256067260026, "grad_norm": 473.528564453125, "learning_rate": 9.98914570750391e-06, "loss": 31.9431, "step": 59440 }, { "epoch": 0.12009276130528408, "grad_norm": 330.35858154296875, "learning_rate": 9.98912270728721e-06, "loss": 27.4563, "step": 59450 }, { "epoch": 0.1201129619379679, "grad_norm": 448.4079895019531, "learning_rate": 9.989099682754125e-06, "loss": 34.3141, "step": 59460 }, { "epoch": 0.12013316257065171, "grad_norm": 217.46304321289062, "learning_rate": 9.989076633904775e-06, "loss": 21.8818, "step": 59470 }, { "epoch": 0.12015336320333553, "grad_norm": 359.943603515625, "learning_rate": 9.989053560739272e-06, "loss": 29.2842, "step": 59480 }, { "epoch": 0.12017356383601935, "grad_norm": 194.5058135986328, "learning_rate": 9.989030463257726e-06, "loss": 28.5674, "step": 59490 }, { "epoch": 0.12019376446870315, "grad_norm": 274.4166259765625, "learning_rate": 9.989007341460251e-06, "loss": 21.2931, "step": 59500 }, { "epoch": 0.12021396510138697, "grad_norm": 118.69823455810547, "learning_rate": 9.98898419534696e-06, "loss": 32.7995, "step": 59510 }, { "epoch": 0.1202341657340708, "grad_norm": 167.2899627685547, "learning_rate": 9.988961024917963e-06, "loss": 18.1551, "step": 59520 }, { "epoch": 0.1202543663667546, "grad_norm": 440.3486633300781, "learning_rate": 9.988937830173376e-06, "loss": 27.5128, "step": 59530 }, { "epoch": 0.12027456699943842, "grad_norm": 195.29103088378906, "learning_rate": 9.988914611113311e-06, "loss": 35.7536, "step": 59540 }, { "epoch": 0.12029476763212224, "grad_norm": 239.5924072265625, "learning_rate": 9.988891367737882e-06, "loss": 29.2614, "step": 59550 }, { "epoch": 0.12031496826480606, "grad_norm": 178.24607849121094, "learning_rate": 9.988868100047203e-06, "loss": 28.9585, "step": 59560 }, { "epoch": 0.12033516889748987, "grad_norm": 283.34503173828125, "learning_rate": 9.988844808041382e-06, "loss": 26.9437, "step": 59570 }, { "epoch": 0.12035536953017369, "grad_norm": 107.28218078613281, "learning_rate": 9.98882149172054e-06, "loss": 14.8791, "step": 59580 }, { "epoch": 0.1203755701628575, "grad_norm": 189.91212463378906, "learning_rate": 9.988798151084783e-06, "loss": 18.2831, "step": 59590 }, { "epoch": 0.12039577079554131, "grad_norm": 106.02507019042969, "learning_rate": 9.988774786134235e-06, "loss": 28.012, "step": 59600 }, { "epoch": 0.12041597142822513, "grad_norm": 307.8448791503906, "learning_rate": 9.988751396869e-06, "loss": 22.1152, "step": 59610 }, { "epoch": 0.12043617206090895, "grad_norm": 187.6307830810547, "learning_rate": 9.988727983289195e-06, "loss": 16.0499, "step": 59620 }, { "epoch": 0.12045637269359276, "grad_norm": 536.5588989257812, "learning_rate": 9.988704545394936e-06, "loss": 40.6949, "step": 59630 }, { "epoch": 0.12047657332627658, "grad_norm": 226.49505615234375, "learning_rate": 9.988681083186336e-06, "loss": 20.5225, "step": 59640 }, { "epoch": 0.1204967739589604, "grad_norm": 204.02984619140625, "learning_rate": 9.988657596663509e-06, "loss": 32.4295, "step": 59650 }, { "epoch": 0.1205169745916442, "grad_norm": 166.40013122558594, "learning_rate": 9.988634085826571e-06, "loss": 21.1233, "step": 59660 }, { "epoch": 0.12053717522432802, "grad_norm": 560.6005859375, "learning_rate": 9.988610550675635e-06, "loss": 23.223, "step": 59670 }, { "epoch": 0.12055737585701184, "grad_norm": 544.5245361328125, "learning_rate": 9.988586991210816e-06, "loss": 27.7855, "step": 59680 }, { "epoch": 0.12057757648969565, "grad_norm": 133.1433868408203, "learning_rate": 9.98856340743223e-06, "loss": 21.818, "step": 59690 }, { "epoch": 0.12059777712237947, "grad_norm": 601.857177734375, "learning_rate": 9.988539799339989e-06, "loss": 23.2205, "step": 59700 }, { "epoch": 0.12061797775506329, "grad_norm": 297.4815368652344, "learning_rate": 9.988516166934212e-06, "loss": 21.3721, "step": 59710 }, { "epoch": 0.12063817838774711, "grad_norm": 127.99574279785156, "learning_rate": 9.988492510215011e-06, "loss": 15.4862, "step": 59720 }, { "epoch": 0.12065837902043092, "grad_norm": 320.6650085449219, "learning_rate": 9.988468829182504e-06, "loss": 18.4138, "step": 59730 }, { "epoch": 0.12067857965311474, "grad_norm": 255.63742065429688, "learning_rate": 9.988445123836804e-06, "loss": 25.0304, "step": 59740 }, { "epoch": 0.12069878028579856, "grad_norm": 336.26898193359375, "learning_rate": 9.988421394178027e-06, "loss": 26.7984, "step": 59750 }, { "epoch": 0.12071898091848236, "grad_norm": 200.19830322265625, "learning_rate": 9.98839764020629e-06, "loss": 21.6649, "step": 59760 }, { "epoch": 0.12073918155116618, "grad_norm": 261.0440368652344, "learning_rate": 9.988373861921708e-06, "loss": 18.1451, "step": 59770 }, { "epoch": 0.12075938218385, "grad_norm": 594.2536010742188, "learning_rate": 9.988350059324396e-06, "loss": 38.899, "step": 59780 }, { "epoch": 0.12077958281653381, "grad_norm": 309.52386474609375, "learning_rate": 9.988326232414472e-06, "loss": 30.3308, "step": 59790 }, { "epoch": 0.12079978344921763, "grad_norm": 203.76773071289062, "learning_rate": 9.98830238119205e-06, "loss": 17.2265, "step": 59800 }, { "epoch": 0.12081998408190145, "grad_norm": 236.49383544921875, "learning_rate": 9.988278505657247e-06, "loss": 38.5244, "step": 59810 }, { "epoch": 0.12084018471458526, "grad_norm": 490.745849609375, "learning_rate": 9.98825460581018e-06, "loss": 30.519, "step": 59820 }, { "epoch": 0.12086038534726908, "grad_norm": 605.2842407226562, "learning_rate": 9.988230681650964e-06, "loss": 42.1935, "step": 59830 }, { "epoch": 0.1208805859799529, "grad_norm": 261.0123291015625, "learning_rate": 9.988206733179718e-06, "loss": 19.6674, "step": 59840 }, { "epoch": 0.1209007866126367, "grad_norm": 386.28656005859375, "learning_rate": 9.988182760396557e-06, "loss": 37.11, "step": 59850 }, { "epoch": 0.12092098724532052, "grad_norm": 196.51904296875, "learning_rate": 9.988158763301598e-06, "loss": 34.9214, "step": 59860 }, { "epoch": 0.12094118787800434, "grad_norm": 259.41644287109375, "learning_rate": 9.988134741894959e-06, "loss": 24.1099, "step": 59870 }, { "epoch": 0.12096138851068816, "grad_norm": 374.3772277832031, "learning_rate": 9.988110696176756e-06, "loss": 30.7525, "step": 59880 }, { "epoch": 0.12098158914337197, "grad_norm": 724.1673583984375, "learning_rate": 9.988086626147107e-06, "loss": 49.616, "step": 59890 }, { "epoch": 0.12100178977605579, "grad_norm": 351.2283935546875, "learning_rate": 9.988062531806127e-06, "loss": 26.285, "step": 59900 }, { "epoch": 0.12102199040873961, "grad_norm": 105.59093475341797, "learning_rate": 9.988038413153936e-06, "loss": 35.1523, "step": 59910 }, { "epoch": 0.12104219104142341, "grad_norm": 538.7321166992188, "learning_rate": 9.988014270190652e-06, "loss": 35.769, "step": 59920 }, { "epoch": 0.12106239167410723, "grad_norm": 279.2707214355469, "learning_rate": 9.98799010291639e-06, "loss": 30.6418, "step": 59930 }, { "epoch": 0.12108259230679105, "grad_norm": 51.66685104370117, "learning_rate": 9.987965911331268e-06, "loss": 23.7141, "step": 59940 }, { "epoch": 0.12110279293947486, "grad_norm": 326.884521484375, "learning_rate": 9.987941695435409e-06, "loss": 26.8639, "step": 59950 }, { "epoch": 0.12112299357215868, "grad_norm": 379.22772216796875, "learning_rate": 9.987917455228924e-06, "loss": 22.8028, "step": 59960 }, { "epoch": 0.1211431942048425, "grad_norm": 405.9208068847656, "learning_rate": 9.987893190711935e-06, "loss": 12.8865, "step": 59970 }, { "epoch": 0.1211633948375263, "grad_norm": 268.1274108886719, "learning_rate": 9.987868901884558e-06, "loss": 31.0988, "step": 59980 }, { "epoch": 0.12118359547021013, "grad_norm": 11.895827293395996, "learning_rate": 9.987844588746916e-06, "loss": 25.934, "step": 59990 }, { "epoch": 0.12120379610289395, "grad_norm": 764.9451904296875, "learning_rate": 9.987820251299121e-06, "loss": 51.915, "step": 60000 }, { "epoch": 0.12122399673557775, "grad_norm": 282.6944274902344, "learning_rate": 9.987795889541298e-06, "loss": 15.3029, "step": 60010 }, { "epoch": 0.12124419736826157, "grad_norm": 173.5716552734375, "learning_rate": 9.987771503473562e-06, "loss": 29.4922, "step": 60020 }, { "epoch": 0.12126439800094539, "grad_norm": 326.033935546875, "learning_rate": 9.987747093096032e-06, "loss": 34.3642, "step": 60030 }, { "epoch": 0.12128459863362921, "grad_norm": 382.7037048339844, "learning_rate": 9.987722658408828e-06, "loss": 30.1912, "step": 60040 }, { "epoch": 0.12130479926631302, "grad_norm": 185.5147247314453, "learning_rate": 9.98769819941207e-06, "loss": 19.1287, "step": 60050 }, { "epoch": 0.12132499989899684, "grad_norm": 434.92138671875, "learning_rate": 9.987673716105874e-06, "loss": 47.0762, "step": 60060 }, { "epoch": 0.12134520053168066, "grad_norm": 158.16134643554688, "learning_rate": 9.987649208490361e-06, "loss": 26.5726, "step": 60070 }, { "epoch": 0.12136540116436446, "grad_norm": 269.7251892089844, "learning_rate": 9.987624676565652e-06, "loss": 24.2643, "step": 60080 }, { "epoch": 0.12138560179704828, "grad_norm": 402.23431396484375, "learning_rate": 9.987600120331864e-06, "loss": 33.727, "step": 60090 }, { "epoch": 0.1214058024297321, "grad_norm": 273.03143310546875, "learning_rate": 9.987575539789119e-06, "loss": 32.9726, "step": 60100 }, { "epoch": 0.12142600306241591, "grad_norm": 252.29310607910156, "learning_rate": 9.987550934937536e-06, "loss": 23.3976, "step": 60110 }, { "epoch": 0.12144620369509973, "grad_norm": 302.5552978515625, "learning_rate": 9.987526305777234e-06, "loss": 44.3652, "step": 60120 }, { "epoch": 0.12146640432778355, "grad_norm": 272.68572998046875, "learning_rate": 9.987501652308333e-06, "loss": 26.2318, "step": 60130 }, { "epoch": 0.12148660496046736, "grad_norm": 296.3598937988281, "learning_rate": 9.987476974530957e-06, "loss": 42.0439, "step": 60140 }, { "epoch": 0.12150680559315118, "grad_norm": 218.77667236328125, "learning_rate": 9.98745227244522e-06, "loss": 27.6341, "step": 60150 }, { "epoch": 0.121527006225835, "grad_norm": 30.236309051513672, "learning_rate": 9.987427546051246e-06, "loss": 42.9174, "step": 60160 }, { "epoch": 0.1215472068585188, "grad_norm": 61.908241271972656, "learning_rate": 9.987402795349154e-06, "loss": 21.5761, "step": 60170 }, { "epoch": 0.12156740749120262, "grad_norm": 0.0, "learning_rate": 9.987378020339069e-06, "loss": 24.6969, "step": 60180 }, { "epoch": 0.12158760812388644, "grad_norm": 464.0487365722656, "learning_rate": 9.987353221021106e-06, "loss": 23.4154, "step": 60190 }, { "epoch": 0.12160780875657026, "grad_norm": 222.47549438476562, "learning_rate": 9.987328397395389e-06, "loss": 16.3298, "step": 60200 }, { "epoch": 0.12162800938925407, "grad_norm": 267.14990234375, "learning_rate": 9.987303549462038e-06, "loss": 30.7131, "step": 60210 }, { "epoch": 0.12164821002193789, "grad_norm": 631.212158203125, "learning_rate": 9.987278677221174e-06, "loss": 20.172, "step": 60220 }, { "epoch": 0.12166841065462171, "grad_norm": 151.0872802734375, "learning_rate": 9.987253780672918e-06, "loss": 27.4328, "step": 60230 }, { "epoch": 0.12168861128730551, "grad_norm": 241.087158203125, "learning_rate": 9.987228859817395e-06, "loss": 37.0193, "step": 60240 }, { "epoch": 0.12170881191998933, "grad_norm": 256.2538757324219, "learning_rate": 9.987203914654721e-06, "loss": 22.7232, "step": 60250 }, { "epoch": 0.12172901255267315, "grad_norm": 0.0, "learning_rate": 9.987178945185019e-06, "loss": 17.8103, "step": 60260 }, { "epoch": 0.12174921318535696, "grad_norm": 121.62736511230469, "learning_rate": 9.987153951408414e-06, "loss": 24.9505, "step": 60270 }, { "epoch": 0.12176941381804078, "grad_norm": 210.45187377929688, "learning_rate": 9.987128933325025e-06, "loss": 21.9299, "step": 60280 }, { "epoch": 0.1217896144507246, "grad_norm": 106.97030639648438, "learning_rate": 9.987103890934974e-06, "loss": 26.2843, "step": 60290 }, { "epoch": 0.12180981508340841, "grad_norm": 459.6731262207031, "learning_rate": 9.987078824238384e-06, "loss": 28.5275, "step": 60300 }, { "epoch": 0.12183001571609223, "grad_norm": 667.1466064453125, "learning_rate": 9.987053733235376e-06, "loss": 40.8763, "step": 60310 }, { "epoch": 0.12185021634877605, "grad_norm": 421.1333923339844, "learning_rate": 9.987028617926074e-06, "loss": 33.6678, "step": 60320 }, { "epoch": 0.12187041698145985, "grad_norm": 27.695507049560547, "learning_rate": 9.987003478310597e-06, "loss": 35.3024, "step": 60330 }, { "epoch": 0.12189061761414367, "grad_norm": 181.45399475097656, "learning_rate": 9.986978314389071e-06, "loss": 22.4713, "step": 60340 }, { "epoch": 0.1219108182468275, "grad_norm": 294.19146728515625, "learning_rate": 9.98695312616162e-06, "loss": 21.5112, "step": 60350 }, { "epoch": 0.12193101887951131, "grad_norm": 274.27410888671875, "learning_rate": 9.986927913628361e-06, "loss": 29.578, "step": 60360 }, { "epoch": 0.12195121951219512, "grad_norm": 252.2141571044922, "learning_rate": 9.986902676789421e-06, "loss": 24.2965, "step": 60370 }, { "epoch": 0.12197142014487894, "grad_norm": 227.00238037109375, "learning_rate": 9.986877415644925e-06, "loss": 39.2414, "step": 60380 }, { "epoch": 0.12199162077756276, "grad_norm": 335.25439453125, "learning_rate": 9.98685213019499e-06, "loss": 21.6304, "step": 60390 }, { "epoch": 0.12201182141024657, "grad_norm": 326.0909118652344, "learning_rate": 9.986826820439743e-06, "loss": 23.1163, "step": 60400 }, { "epoch": 0.12203202204293039, "grad_norm": 181.29342651367188, "learning_rate": 9.986801486379307e-06, "loss": 23.0673, "step": 60410 }, { "epoch": 0.1220522226756142, "grad_norm": 225.30909729003906, "learning_rate": 9.986776128013807e-06, "loss": 17.5025, "step": 60420 }, { "epoch": 0.12207242330829801, "grad_norm": 230.95016479492188, "learning_rate": 9.986750745343363e-06, "loss": 23.0456, "step": 60430 }, { "epoch": 0.12209262394098183, "grad_norm": 385.0942687988281, "learning_rate": 9.986725338368103e-06, "loss": 23.0927, "step": 60440 }, { "epoch": 0.12211282457366565, "grad_norm": 290.6153869628906, "learning_rate": 9.986699907088147e-06, "loss": 19.4606, "step": 60450 }, { "epoch": 0.12213302520634946, "grad_norm": 4115.587890625, "learning_rate": 9.986674451503619e-06, "loss": 51.2636, "step": 60460 }, { "epoch": 0.12215322583903328, "grad_norm": 168.30946350097656, "learning_rate": 9.986648971614646e-06, "loss": 26.8772, "step": 60470 }, { "epoch": 0.1221734264717171, "grad_norm": 488.22943115234375, "learning_rate": 9.98662346742135e-06, "loss": 17.9627, "step": 60480 }, { "epoch": 0.1221936271044009, "grad_norm": 733.807861328125, "learning_rate": 9.986597938923859e-06, "loss": 26.9987, "step": 60490 }, { "epoch": 0.12221382773708472, "grad_norm": 46.22412872314453, "learning_rate": 9.98657238612229e-06, "loss": 12.6389, "step": 60500 }, { "epoch": 0.12223402836976854, "grad_norm": 160.84274291992188, "learning_rate": 9.986546809016775e-06, "loss": 26.1893, "step": 60510 }, { "epoch": 0.12225422900245236, "grad_norm": 195.6310272216797, "learning_rate": 9.986521207607436e-06, "loss": 27.3586, "step": 60520 }, { "epoch": 0.12227442963513617, "grad_norm": 128.07095336914062, "learning_rate": 9.986495581894396e-06, "loss": 18.2827, "step": 60530 }, { "epoch": 0.12229463026781999, "grad_norm": 157.44161987304688, "learning_rate": 9.986469931877781e-06, "loss": 13.1667, "step": 60540 }, { "epoch": 0.12231483090050381, "grad_norm": 222.19703674316406, "learning_rate": 9.986444257557717e-06, "loss": 17.4932, "step": 60550 }, { "epoch": 0.12233503153318762, "grad_norm": 200.06387329101562, "learning_rate": 9.986418558934329e-06, "loss": 19.8279, "step": 60560 }, { "epoch": 0.12235523216587144, "grad_norm": 103.75589752197266, "learning_rate": 9.98639283600774e-06, "loss": 27.5003, "step": 60570 }, { "epoch": 0.12237543279855526, "grad_norm": 145.72769165039062, "learning_rate": 9.98636708877808e-06, "loss": 16.9009, "step": 60580 }, { "epoch": 0.12239563343123906, "grad_norm": 476.1783142089844, "learning_rate": 9.986341317245469e-06, "loss": 21.8403, "step": 60590 }, { "epoch": 0.12241583406392288, "grad_norm": 277.4079895019531, "learning_rate": 9.986315521410035e-06, "loss": 25.5001, "step": 60600 }, { "epoch": 0.1224360346966067, "grad_norm": 1634.525634765625, "learning_rate": 9.986289701271905e-06, "loss": 46.6916, "step": 60610 }, { "epoch": 0.12245623532929051, "grad_norm": 0.0, "learning_rate": 9.986263856831204e-06, "loss": 27.2311, "step": 60620 }, { "epoch": 0.12247643596197433, "grad_norm": 127.3109130859375, "learning_rate": 9.986237988088059e-06, "loss": 21.043, "step": 60630 }, { "epoch": 0.12249663659465815, "grad_norm": 212.68101501464844, "learning_rate": 9.986212095042593e-06, "loss": 28.275, "step": 60640 }, { "epoch": 0.12251683722734195, "grad_norm": 549.1353759765625, "learning_rate": 9.986186177694935e-06, "loss": 34.4481, "step": 60650 }, { "epoch": 0.12253703786002577, "grad_norm": 272.1681823730469, "learning_rate": 9.986160236045207e-06, "loss": 27.5508, "step": 60660 }, { "epoch": 0.1225572384927096, "grad_norm": 234.8720245361328, "learning_rate": 9.986134270093542e-06, "loss": 34.1048, "step": 60670 }, { "epoch": 0.12257743912539341, "grad_norm": 472.5223388671875, "learning_rate": 9.986108279840063e-06, "loss": 29.2052, "step": 60680 }, { "epoch": 0.12259763975807722, "grad_norm": 1123.423828125, "learning_rate": 9.986082265284896e-06, "loss": 30.0506, "step": 60690 }, { "epoch": 0.12261784039076104, "grad_norm": 512.6643676757812, "learning_rate": 9.98605622642817e-06, "loss": 19.4037, "step": 60700 }, { "epoch": 0.12263804102344486, "grad_norm": 177.73329162597656, "learning_rate": 9.986030163270011e-06, "loss": 28.8787, "step": 60710 }, { "epoch": 0.12265824165612867, "grad_norm": 505.284423828125, "learning_rate": 9.986004075810543e-06, "loss": 40.0967, "step": 60720 }, { "epoch": 0.12267844228881249, "grad_norm": 445.5740051269531, "learning_rate": 9.985977964049898e-06, "loss": 28.4993, "step": 60730 }, { "epoch": 0.1226986429214963, "grad_norm": 359.6636657714844, "learning_rate": 9.9859518279882e-06, "loss": 35.1754, "step": 60740 }, { "epoch": 0.12271884355418011, "grad_norm": 478.0423889160156, "learning_rate": 9.985925667625581e-06, "loss": 24.6532, "step": 60750 }, { "epoch": 0.12273904418686393, "grad_norm": 206.1904296875, "learning_rate": 9.98589948296216e-06, "loss": 21.933, "step": 60760 }, { "epoch": 0.12275924481954775, "grad_norm": 475.64599609375, "learning_rate": 9.985873273998072e-06, "loss": 28.1711, "step": 60770 }, { "epoch": 0.12277944545223156, "grad_norm": 187.2813262939453, "learning_rate": 9.985847040733442e-06, "loss": 20.4963, "step": 60780 }, { "epoch": 0.12279964608491538, "grad_norm": 123.9382553100586, "learning_rate": 9.9858207831684e-06, "loss": 35.0747, "step": 60790 }, { "epoch": 0.1228198467175992, "grad_norm": 234.50006103515625, "learning_rate": 9.98579450130307e-06, "loss": 22.6662, "step": 60800 }, { "epoch": 0.122840047350283, "grad_norm": 437.0867004394531, "learning_rate": 9.985768195137585e-06, "loss": 25.8529, "step": 60810 }, { "epoch": 0.12286024798296682, "grad_norm": 260.9064636230469, "learning_rate": 9.985741864672067e-06, "loss": 25.1511, "step": 60820 }, { "epoch": 0.12288044861565064, "grad_norm": 258.2399597167969, "learning_rate": 9.985715509906649e-06, "loss": 20.4926, "step": 60830 }, { "epoch": 0.12290064924833445, "grad_norm": 339.950927734375, "learning_rate": 9.985689130841459e-06, "loss": 14.3598, "step": 60840 }, { "epoch": 0.12292084988101827, "grad_norm": 297.86846923828125, "learning_rate": 9.985662727476625e-06, "loss": 18.5229, "step": 60850 }, { "epoch": 0.12294105051370209, "grad_norm": 339.5301818847656, "learning_rate": 9.985636299812275e-06, "loss": 17.6491, "step": 60860 }, { "epoch": 0.12296125114638591, "grad_norm": 169.86158752441406, "learning_rate": 9.98560984784854e-06, "loss": 17.3937, "step": 60870 }, { "epoch": 0.12298145177906972, "grad_norm": 361.6994323730469, "learning_rate": 9.985583371585544e-06, "loss": 23.9774, "step": 60880 }, { "epoch": 0.12300165241175354, "grad_norm": 31.77123260498047, "learning_rate": 9.98555687102342e-06, "loss": 12.4806, "step": 60890 }, { "epoch": 0.12302185304443736, "grad_norm": 121.24239349365234, "learning_rate": 9.9855303461623e-06, "loss": 12.7972, "step": 60900 }, { "epoch": 0.12304205367712116, "grad_norm": 322.94635009765625, "learning_rate": 9.985503797002307e-06, "loss": 24.3212, "step": 60910 }, { "epoch": 0.12306225430980498, "grad_norm": 275.1046447753906, "learning_rate": 9.985477223543574e-06, "loss": 27.4177, "step": 60920 }, { "epoch": 0.1230824549424888, "grad_norm": 241.40489196777344, "learning_rate": 9.985450625786228e-06, "loss": 39.5365, "step": 60930 }, { "epoch": 0.12310265557517261, "grad_norm": 380.85809326171875, "learning_rate": 9.985424003730403e-06, "loss": 22.6556, "step": 60940 }, { "epoch": 0.12312285620785643, "grad_norm": 57.18506622314453, "learning_rate": 9.985397357376224e-06, "loss": 29.328, "step": 60950 }, { "epoch": 0.12314305684054025, "grad_norm": 70.07464599609375, "learning_rate": 9.985370686723823e-06, "loss": 38.2441, "step": 60960 }, { "epoch": 0.12316325747322406, "grad_norm": 272.14202880859375, "learning_rate": 9.985343991773331e-06, "loss": 55.0635, "step": 60970 }, { "epoch": 0.12318345810590788, "grad_norm": 227.6904754638672, "learning_rate": 9.985317272524876e-06, "loss": 20.8465, "step": 60980 }, { "epoch": 0.1232036587385917, "grad_norm": 224.39450073242188, "learning_rate": 9.98529052897859e-06, "loss": 42.1046, "step": 60990 }, { "epoch": 0.1232238593712755, "grad_norm": 116.63621520996094, "learning_rate": 9.985263761134602e-06, "loss": 20.1003, "step": 61000 }, { "epoch": 0.12324406000395932, "grad_norm": 308.9950256347656, "learning_rate": 9.985236968993044e-06, "loss": 25.1898, "step": 61010 }, { "epoch": 0.12326426063664314, "grad_norm": 702.830322265625, "learning_rate": 9.985210152554045e-06, "loss": 38.6982, "step": 61020 }, { "epoch": 0.12328446126932696, "grad_norm": 234.2677764892578, "learning_rate": 9.985183311817736e-06, "loss": 31.807, "step": 61030 }, { "epoch": 0.12330466190201077, "grad_norm": 276.5049743652344, "learning_rate": 9.985156446784249e-06, "loss": 28.1678, "step": 61040 }, { "epoch": 0.12332486253469459, "grad_norm": 210.75494384765625, "learning_rate": 9.985129557453714e-06, "loss": 14.4817, "step": 61050 }, { "epoch": 0.12334506316737841, "grad_norm": 569.867431640625, "learning_rate": 9.985102643826261e-06, "loss": 29.1477, "step": 61060 }, { "epoch": 0.12336526380006221, "grad_norm": 165.848876953125, "learning_rate": 9.985075705902024e-06, "loss": 33.1873, "step": 61070 }, { "epoch": 0.12338546443274603, "grad_norm": 386.47894287109375, "learning_rate": 9.985048743681131e-06, "loss": 21.8881, "step": 61080 }, { "epoch": 0.12340566506542985, "grad_norm": 180.79150390625, "learning_rate": 9.985021757163715e-06, "loss": 38.1918, "step": 61090 }, { "epoch": 0.12342586569811366, "grad_norm": 286.1990661621094, "learning_rate": 9.98499474634991e-06, "loss": 28.5467, "step": 61100 }, { "epoch": 0.12344606633079748, "grad_norm": 102.48365783691406, "learning_rate": 9.984967711239844e-06, "loss": 20.9693, "step": 61110 }, { "epoch": 0.1234662669634813, "grad_norm": 427.01104736328125, "learning_rate": 9.984940651833648e-06, "loss": 21.5862, "step": 61120 }, { "epoch": 0.1234864675961651, "grad_norm": 214.21153259277344, "learning_rate": 9.984913568131458e-06, "loss": 30.7306, "step": 61130 }, { "epoch": 0.12350666822884893, "grad_norm": 373.0658874511719, "learning_rate": 9.984886460133403e-06, "loss": 22.4512, "step": 61140 }, { "epoch": 0.12352686886153275, "grad_norm": 93.59442901611328, "learning_rate": 9.984859327839617e-06, "loss": 27.5429, "step": 61150 }, { "epoch": 0.12354706949421655, "grad_norm": 41.76962661743164, "learning_rate": 9.98483217125023e-06, "loss": 26.0265, "step": 61160 }, { "epoch": 0.12356727012690037, "grad_norm": 126.17752075195312, "learning_rate": 9.984804990365376e-06, "loss": 14.894, "step": 61170 }, { "epoch": 0.12358747075958419, "grad_norm": 487.9088134765625, "learning_rate": 9.984777785185188e-06, "loss": 24.2045, "step": 61180 }, { "epoch": 0.12360767139226801, "grad_norm": 35.57856750488281, "learning_rate": 9.984750555709797e-06, "loss": 27.1633, "step": 61190 }, { "epoch": 0.12362787202495182, "grad_norm": 9.89406967163086, "learning_rate": 9.984723301939337e-06, "loss": 17.1971, "step": 61200 }, { "epoch": 0.12364807265763564, "grad_norm": 475.89886474609375, "learning_rate": 9.984696023873939e-06, "loss": 25.6139, "step": 61210 }, { "epoch": 0.12366827329031946, "grad_norm": 256.8410949707031, "learning_rate": 9.984668721513737e-06, "loss": 26.9873, "step": 61220 }, { "epoch": 0.12368847392300326, "grad_norm": 151.1160430908203, "learning_rate": 9.984641394858865e-06, "loss": 36.8086, "step": 61230 }, { "epoch": 0.12370867455568708, "grad_norm": 594.8713989257812, "learning_rate": 9.984614043909455e-06, "loss": 23.4229, "step": 61240 }, { "epoch": 0.1237288751883709, "grad_norm": 297.4693603515625, "learning_rate": 9.984586668665641e-06, "loss": 25.1546, "step": 61250 }, { "epoch": 0.12374907582105471, "grad_norm": 266.6569519042969, "learning_rate": 9.984559269127557e-06, "loss": 33.2932, "step": 61260 }, { "epoch": 0.12376927645373853, "grad_norm": 275.5949401855469, "learning_rate": 9.984531845295333e-06, "loss": 47.204, "step": 61270 }, { "epoch": 0.12378947708642235, "grad_norm": 174.76980590820312, "learning_rate": 9.984504397169107e-06, "loss": 36.7881, "step": 61280 }, { "epoch": 0.12380967771910616, "grad_norm": 250.24725341796875, "learning_rate": 9.984476924749011e-06, "loss": 26.7879, "step": 61290 }, { "epoch": 0.12382987835178998, "grad_norm": 452.5152587890625, "learning_rate": 9.98444942803518e-06, "loss": 32.8186, "step": 61300 }, { "epoch": 0.1238500789844738, "grad_norm": 183.15037536621094, "learning_rate": 9.984421907027747e-06, "loss": 17.9172, "step": 61310 }, { "epoch": 0.1238702796171576, "grad_norm": 356.1861267089844, "learning_rate": 9.984394361726844e-06, "loss": 27.4079, "step": 61320 }, { "epoch": 0.12389048024984142, "grad_norm": 222.74778747558594, "learning_rate": 9.98436679213261e-06, "loss": 15.6972, "step": 61330 }, { "epoch": 0.12391068088252524, "grad_norm": 205.38417053222656, "learning_rate": 9.984339198245175e-06, "loss": 22.1212, "step": 61340 }, { "epoch": 0.12393088151520906, "grad_norm": 394.88067626953125, "learning_rate": 9.984311580064676e-06, "loss": 17.8067, "step": 61350 }, { "epoch": 0.12395108214789287, "grad_norm": 272.67877197265625, "learning_rate": 9.984283937591246e-06, "loss": 20.4595, "step": 61360 }, { "epoch": 0.12397128278057669, "grad_norm": 140.41188049316406, "learning_rate": 9.98425627082502e-06, "loss": 42.2016, "step": 61370 }, { "epoch": 0.12399148341326051, "grad_norm": 147.97874450683594, "learning_rate": 9.984228579766136e-06, "loss": 27.862, "step": 61380 }, { "epoch": 0.12401168404594431, "grad_norm": 371.3487243652344, "learning_rate": 9.984200864414726e-06, "loss": 16.7283, "step": 61390 }, { "epoch": 0.12403188467862813, "grad_norm": 178.35923767089844, "learning_rate": 9.984173124770924e-06, "loss": 26.8529, "step": 61400 }, { "epoch": 0.12405208531131195, "grad_norm": 363.9888610839844, "learning_rate": 9.984145360834868e-06, "loss": 10.7061, "step": 61410 }, { "epoch": 0.12407228594399576, "grad_norm": 367.28424072265625, "learning_rate": 9.984117572606691e-06, "loss": 18.9381, "step": 61420 }, { "epoch": 0.12409248657667958, "grad_norm": 73.9620590209961, "learning_rate": 9.984089760086531e-06, "loss": 22.1716, "step": 61430 }, { "epoch": 0.1241126872093634, "grad_norm": 417.3023986816406, "learning_rate": 9.98406192327452e-06, "loss": 23.5798, "step": 61440 }, { "epoch": 0.12413288784204721, "grad_norm": 232.07496643066406, "learning_rate": 9.984034062170796e-06, "loss": 17.1938, "step": 61450 }, { "epoch": 0.12415308847473103, "grad_norm": 68.66152954101562, "learning_rate": 9.984006176775496e-06, "loss": 10.0207, "step": 61460 }, { "epoch": 0.12417328910741485, "grad_norm": 285.78533935546875, "learning_rate": 9.983978267088753e-06, "loss": 32.1717, "step": 61470 }, { "epoch": 0.12419348974009865, "grad_norm": 285.52099609375, "learning_rate": 9.983950333110705e-06, "loss": 36.3772, "step": 61480 }, { "epoch": 0.12421369037278247, "grad_norm": 470.8903503417969, "learning_rate": 9.983922374841488e-06, "loss": 20.8264, "step": 61490 }, { "epoch": 0.1242338910054663, "grad_norm": 491.67742919921875, "learning_rate": 9.983894392281237e-06, "loss": 16.5751, "step": 61500 }, { "epoch": 0.12425409163815011, "grad_norm": 320.20867919921875, "learning_rate": 9.98386638543009e-06, "loss": 20.3969, "step": 61510 }, { "epoch": 0.12427429227083392, "grad_norm": 279.6925048828125, "learning_rate": 9.983838354288181e-06, "loss": 17.3553, "step": 61520 }, { "epoch": 0.12429449290351774, "grad_norm": 198.5729217529297, "learning_rate": 9.98381029885565e-06, "loss": 16.2848, "step": 61530 }, { "epoch": 0.12431469353620156, "grad_norm": 293.65960693359375, "learning_rate": 9.983782219132631e-06, "loss": 33.016, "step": 61540 }, { "epoch": 0.12433489416888537, "grad_norm": 326.31573486328125, "learning_rate": 9.983754115119262e-06, "loss": 18.3799, "step": 61550 }, { "epoch": 0.12435509480156919, "grad_norm": 167.3172149658203, "learning_rate": 9.983725986815682e-06, "loss": 27.4014, "step": 61560 }, { "epoch": 0.124375295434253, "grad_norm": 546.3370361328125, "learning_rate": 9.983697834222024e-06, "loss": 23.2046, "step": 61570 }, { "epoch": 0.12439549606693681, "grad_norm": 450.7239685058594, "learning_rate": 9.983669657338425e-06, "loss": 32.8493, "step": 61580 }, { "epoch": 0.12441569669962063, "grad_norm": 292.5492248535156, "learning_rate": 9.98364145616503e-06, "loss": 30.1777, "step": 61590 }, { "epoch": 0.12443589733230445, "grad_norm": 752.1747436523438, "learning_rate": 9.983613230701967e-06, "loss": 35.6876, "step": 61600 }, { "epoch": 0.12445609796498826, "grad_norm": 6.191934585571289, "learning_rate": 9.98358498094938e-06, "loss": 10.0093, "step": 61610 }, { "epoch": 0.12447629859767208, "grad_norm": 296.04583740234375, "learning_rate": 9.983556706907401e-06, "loss": 30.3772, "step": 61620 }, { "epoch": 0.1244964992303559, "grad_norm": 120.18108367919922, "learning_rate": 9.983528408576173e-06, "loss": 23.4639, "step": 61630 }, { "epoch": 0.1245166998630397, "grad_norm": 167.8453826904297, "learning_rate": 9.983500085955833e-06, "loss": 22.2626, "step": 61640 }, { "epoch": 0.12453690049572352, "grad_norm": 103.60237884521484, "learning_rate": 9.983471739046515e-06, "loss": 29.013, "step": 61650 }, { "epoch": 0.12455710112840734, "grad_norm": 180.2743377685547, "learning_rate": 9.983443367848363e-06, "loss": 27.9434, "step": 61660 }, { "epoch": 0.12457730176109116, "grad_norm": 323.1442565917969, "learning_rate": 9.98341497236151e-06, "loss": 31.463, "step": 61670 }, { "epoch": 0.12459750239377497, "grad_norm": 195.96817016601562, "learning_rate": 9.9833865525861e-06, "loss": 26.5358, "step": 61680 }, { "epoch": 0.12461770302645879, "grad_norm": 269.8993225097656, "learning_rate": 9.983358108522266e-06, "loss": 18.0417, "step": 61690 }, { "epoch": 0.12463790365914261, "grad_norm": 306.0559997558594, "learning_rate": 9.98332964017015e-06, "loss": 19.7895, "step": 61700 }, { "epoch": 0.12465810429182642, "grad_norm": 298.81744384765625, "learning_rate": 9.98330114752989e-06, "loss": 38.4523, "step": 61710 }, { "epoch": 0.12467830492451024, "grad_norm": 38.49551773071289, "learning_rate": 9.983272630601624e-06, "loss": 24.3665, "step": 61720 }, { "epoch": 0.12469850555719406, "grad_norm": 423.6549377441406, "learning_rate": 9.983244089385491e-06, "loss": 31.9667, "step": 61730 }, { "epoch": 0.12471870618987786, "grad_norm": 127.2133560180664, "learning_rate": 9.98321552388163e-06, "loss": 20.2159, "step": 61740 }, { "epoch": 0.12473890682256168, "grad_norm": 434.6546630859375, "learning_rate": 9.983186934090183e-06, "loss": 23.602, "step": 61750 }, { "epoch": 0.1247591074552455, "grad_norm": 445.6485290527344, "learning_rate": 9.983158320011288e-06, "loss": 19.8051, "step": 61760 }, { "epoch": 0.12477930808792931, "grad_norm": 458.8792419433594, "learning_rate": 9.983129681645082e-06, "loss": 42.2353, "step": 61770 }, { "epoch": 0.12479950872061313, "grad_norm": 279.5656433105469, "learning_rate": 9.983101018991706e-06, "loss": 54.3891, "step": 61780 }, { "epoch": 0.12481970935329695, "grad_norm": 316.1136779785156, "learning_rate": 9.9830723320513e-06, "loss": 18.6613, "step": 61790 }, { "epoch": 0.12483990998598075, "grad_norm": 441.9535827636719, "learning_rate": 9.983043620824005e-06, "loss": 17.448, "step": 61800 }, { "epoch": 0.12486011061866457, "grad_norm": 485.8661804199219, "learning_rate": 9.983014885309959e-06, "loss": 27.8328, "step": 61810 }, { "epoch": 0.1248803112513484, "grad_norm": 63.746150970458984, "learning_rate": 9.982986125509303e-06, "loss": 24.7939, "step": 61820 }, { "epoch": 0.12490051188403221, "grad_norm": 255.59254455566406, "learning_rate": 9.982957341422177e-06, "loss": 21.4609, "step": 61830 }, { "epoch": 0.12492071251671602, "grad_norm": 126.40507507324219, "learning_rate": 9.982928533048722e-06, "loss": 22.8032, "step": 61840 }, { "epoch": 0.12494091314939984, "grad_norm": 186.78550720214844, "learning_rate": 9.982899700389077e-06, "loss": 25.4894, "step": 61850 }, { "epoch": 0.12496111378208366, "grad_norm": 346.2216491699219, "learning_rate": 9.982870843443381e-06, "loss": 17.3761, "step": 61860 }, { "epoch": 0.12498131441476747, "grad_norm": 485.47064208984375, "learning_rate": 9.98284196221178e-06, "loss": 39.9328, "step": 61870 }, { "epoch": 0.12500151504745127, "grad_norm": 126.49263000488281, "learning_rate": 9.982813056694411e-06, "loss": 19.7279, "step": 61880 }, { "epoch": 0.1250217156801351, "grad_norm": 174.61590576171875, "learning_rate": 9.982784126891416e-06, "loss": 26.8052, "step": 61890 }, { "epoch": 0.1250419163128189, "grad_norm": 305.6413269042969, "learning_rate": 9.982755172802933e-06, "loss": 15.7629, "step": 61900 }, { "epoch": 0.12506211694550273, "grad_norm": 406.178466796875, "learning_rate": 9.98272619442911e-06, "loss": 80.8792, "step": 61910 }, { "epoch": 0.12508231757818655, "grad_norm": 310.6302795410156, "learning_rate": 9.982697191770079e-06, "loss": 41.0908, "step": 61920 }, { "epoch": 0.12510251821087037, "grad_norm": 232.52313232421875, "learning_rate": 9.982668164825989e-06, "loss": 36.1328, "step": 61930 }, { "epoch": 0.1251227188435542, "grad_norm": 189.96353149414062, "learning_rate": 9.982639113596978e-06, "loss": 20.4358, "step": 61940 }, { "epoch": 0.12514291947623798, "grad_norm": 59.16046905517578, "learning_rate": 9.982610038083188e-06, "loss": 21.2593, "step": 61950 }, { "epoch": 0.1251631201089218, "grad_norm": 328.79217529296875, "learning_rate": 9.98258093828476e-06, "loss": 29.7983, "step": 61960 }, { "epoch": 0.12518332074160562, "grad_norm": 256.0841369628906, "learning_rate": 9.98255181420184e-06, "loss": 20.8212, "step": 61970 }, { "epoch": 0.12520352137428944, "grad_norm": 135.9645233154297, "learning_rate": 9.982522665834565e-06, "loss": 28.8157, "step": 61980 }, { "epoch": 0.12522372200697326, "grad_norm": 321.89593505859375, "learning_rate": 9.982493493183079e-06, "loss": 34.9681, "step": 61990 }, { "epoch": 0.12524392263965708, "grad_norm": 305.0898742675781, "learning_rate": 9.982464296247523e-06, "loss": 22.568, "step": 62000 }, { "epoch": 0.12526412327234088, "grad_norm": 243.3053741455078, "learning_rate": 9.98243507502804e-06, "loss": 25.2522, "step": 62010 }, { "epoch": 0.1252843239050247, "grad_norm": 74.99102020263672, "learning_rate": 9.982405829524774e-06, "loss": 15.6583, "step": 62020 }, { "epoch": 0.12530452453770852, "grad_norm": 350.07940673828125, "learning_rate": 9.982376559737866e-06, "loss": 22.7865, "step": 62030 }, { "epoch": 0.12532472517039234, "grad_norm": 419.59918212890625, "learning_rate": 9.982347265667459e-06, "loss": 45.2326, "step": 62040 }, { "epoch": 0.12534492580307616, "grad_norm": 135.07754516601562, "learning_rate": 9.982317947313695e-06, "loss": 21.4109, "step": 62050 }, { "epoch": 0.12536512643575998, "grad_norm": 211.19972229003906, "learning_rate": 9.982288604676719e-06, "loss": 32.7408, "step": 62060 }, { "epoch": 0.1253853270684438, "grad_norm": 407.17437744140625, "learning_rate": 9.982259237756674e-06, "loss": 21.276, "step": 62070 }, { "epoch": 0.1254055277011276, "grad_norm": 709.5663452148438, "learning_rate": 9.982229846553698e-06, "loss": 52.4265, "step": 62080 }, { "epoch": 0.1254257283338114, "grad_norm": 260.7980651855469, "learning_rate": 9.982200431067939e-06, "loss": 20.2943, "step": 62090 }, { "epoch": 0.12544592896649523, "grad_norm": 353.387451171875, "learning_rate": 9.98217099129954e-06, "loss": 21.4215, "step": 62100 }, { "epoch": 0.12546612959917905, "grad_norm": 343.1249084472656, "learning_rate": 9.982141527248646e-06, "loss": 18.0451, "step": 62110 }, { "epoch": 0.12548633023186287, "grad_norm": 155.58248901367188, "learning_rate": 9.982112038915394e-06, "loss": 34.6753, "step": 62120 }, { "epoch": 0.1255065308645467, "grad_norm": 381.0648193359375, "learning_rate": 9.982082526299935e-06, "loss": 20.989, "step": 62130 }, { "epoch": 0.12552673149723048, "grad_norm": 436.151611328125, "learning_rate": 9.98205298940241e-06, "loss": 22.887, "step": 62140 }, { "epoch": 0.1255469321299143, "grad_norm": 52.38493728637695, "learning_rate": 9.982023428222963e-06, "loss": 19.1371, "step": 62150 }, { "epoch": 0.12556713276259812, "grad_norm": 579.5265502929688, "learning_rate": 9.981993842761737e-06, "loss": 35.3967, "step": 62160 }, { "epoch": 0.12558733339528194, "grad_norm": 456.53875732421875, "learning_rate": 9.981964233018877e-06, "loss": 36.4644, "step": 62170 }, { "epoch": 0.12560753402796576, "grad_norm": 249.47103881835938, "learning_rate": 9.981934598994529e-06, "loss": 32.0929, "step": 62180 }, { "epoch": 0.12562773466064958, "grad_norm": 415.7689514160156, "learning_rate": 9.981904940688836e-06, "loss": 16.695, "step": 62190 }, { "epoch": 0.12564793529333337, "grad_norm": 310.6134948730469, "learning_rate": 9.981875258101944e-06, "loss": 41.0399, "step": 62200 }, { "epoch": 0.1256681359260172, "grad_norm": 327.4742126464844, "learning_rate": 9.981845551233993e-06, "loss": 27.0866, "step": 62210 }, { "epoch": 0.125688336558701, "grad_norm": 394.2832336425781, "learning_rate": 9.981815820085132e-06, "loss": 21.4833, "step": 62220 }, { "epoch": 0.12570853719138483, "grad_norm": 234.36106872558594, "learning_rate": 9.981786064655505e-06, "loss": 24.7272, "step": 62230 }, { "epoch": 0.12572873782406865, "grad_norm": 362.79949951171875, "learning_rate": 9.981756284945256e-06, "loss": 23.0963, "step": 62240 }, { "epoch": 0.12574893845675247, "grad_norm": 1106.55126953125, "learning_rate": 9.981726480954532e-06, "loss": 33.8362, "step": 62250 }, { "epoch": 0.1257691390894363, "grad_norm": 9.426780700683594, "learning_rate": 9.981696652683479e-06, "loss": 23.5031, "step": 62260 }, { "epoch": 0.12578933972212009, "grad_norm": 415.88067626953125, "learning_rate": 9.98166680013224e-06, "loss": 29.3916, "step": 62270 }, { "epoch": 0.1258095403548039, "grad_norm": 284.00390625, "learning_rate": 9.981636923300959e-06, "loss": 23.9921, "step": 62280 }, { "epoch": 0.12582974098748773, "grad_norm": 253.13829040527344, "learning_rate": 9.981607022189785e-06, "loss": 20.2359, "step": 62290 }, { "epoch": 0.12584994162017155, "grad_norm": 78.62439727783203, "learning_rate": 9.981577096798864e-06, "loss": 26.3576, "step": 62300 }, { "epoch": 0.12587014225285537, "grad_norm": 178.4411163330078, "learning_rate": 9.981547147128338e-06, "loss": 29.74, "step": 62310 }, { "epoch": 0.12589034288553919, "grad_norm": 357.2162170410156, "learning_rate": 9.981517173178357e-06, "loss": 28.2385, "step": 62320 }, { "epoch": 0.12591054351822298, "grad_norm": 1505.00634765625, "learning_rate": 9.981487174949065e-06, "loss": 33.9281, "step": 62330 }, { "epoch": 0.1259307441509068, "grad_norm": 445.2160949707031, "learning_rate": 9.98145715244061e-06, "loss": 25.5891, "step": 62340 }, { "epoch": 0.12595094478359062, "grad_norm": 258.805908203125, "learning_rate": 9.981427105653135e-06, "loss": 12.529, "step": 62350 }, { "epoch": 0.12597114541627444, "grad_norm": 130.9609375, "learning_rate": 9.981397034586789e-06, "loss": 17.3215, "step": 62360 }, { "epoch": 0.12599134604895826, "grad_norm": 219.597900390625, "learning_rate": 9.981366939241719e-06, "loss": 19.1847, "step": 62370 }, { "epoch": 0.12601154668164208, "grad_norm": 111.48919677734375, "learning_rate": 9.98133681961807e-06, "loss": 14.4382, "step": 62380 }, { "epoch": 0.1260317473143259, "grad_norm": 282.3527526855469, "learning_rate": 9.981306675715989e-06, "loss": 31.2238, "step": 62390 }, { "epoch": 0.1260519479470097, "grad_norm": 500.3333740234375, "learning_rate": 9.981276507535625e-06, "loss": 30.8165, "step": 62400 }, { "epoch": 0.1260721485796935, "grad_norm": 340.5166931152344, "learning_rate": 9.981246315077123e-06, "loss": 23.6784, "step": 62410 }, { "epoch": 0.12609234921237733, "grad_norm": 198.2367401123047, "learning_rate": 9.98121609834063e-06, "loss": 22.7156, "step": 62420 }, { "epoch": 0.12611254984506115, "grad_norm": 238.6802978515625, "learning_rate": 9.981185857326292e-06, "loss": 26.808, "step": 62430 }, { "epoch": 0.12613275047774497, "grad_norm": 848.0789794921875, "learning_rate": 9.98115559203426e-06, "loss": 50.9823, "step": 62440 }, { "epoch": 0.1261529511104288, "grad_norm": 43.00599670410156, "learning_rate": 9.981125302464681e-06, "loss": 42.4073, "step": 62450 }, { "epoch": 0.12617315174311258, "grad_norm": 218.25152587890625, "learning_rate": 9.9810949886177e-06, "loss": 16.707, "step": 62460 }, { "epoch": 0.1261933523757964, "grad_norm": 365.4873046875, "learning_rate": 9.981064650493466e-06, "loss": 20.097, "step": 62470 }, { "epoch": 0.12621355300848022, "grad_norm": 268.1619567871094, "learning_rate": 9.981034288092129e-06, "loss": 13.7754, "step": 62480 }, { "epoch": 0.12623375364116404, "grad_norm": 214.0181427001953, "learning_rate": 9.981003901413833e-06, "loss": 29.1083, "step": 62490 }, { "epoch": 0.12625395427384786, "grad_norm": 495.5767822265625, "learning_rate": 9.980973490458728e-06, "loss": 27.0592, "step": 62500 }, { "epoch": 0.12627415490653168, "grad_norm": 226.7865447998047, "learning_rate": 9.980943055226964e-06, "loss": 39.1147, "step": 62510 }, { "epoch": 0.12629435553921547, "grad_norm": 280.8135681152344, "learning_rate": 9.980912595718686e-06, "loss": 52.186, "step": 62520 }, { "epoch": 0.1263145561718993, "grad_norm": 338.8858337402344, "learning_rate": 9.980882111934046e-06, "loss": 24.3076, "step": 62530 }, { "epoch": 0.12633475680458311, "grad_norm": 253.2738037109375, "learning_rate": 9.980851603873189e-06, "loss": 12.2504, "step": 62540 }, { "epoch": 0.12635495743726693, "grad_norm": 290.3886413574219, "learning_rate": 9.980821071536266e-06, "loss": 17.6416, "step": 62550 }, { "epoch": 0.12637515806995075, "grad_norm": 222.4193572998047, "learning_rate": 9.980790514923425e-06, "loss": 18.0189, "step": 62560 }, { "epoch": 0.12639535870263457, "grad_norm": 338.96575927734375, "learning_rate": 9.980759934034816e-06, "loss": 33.2201, "step": 62570 }, { "epoch": 0.1264155593353184, "grad_norm": 248.20677185058594, "learning_rate": 9.980729328870586e-06, "loss": 27.8087, "step": 62580 }, { "epoch": 0.1264357599680022, "grad_norm": 454.62457275390625, "learning_rate": 9.980698699430884e-06, "loss": 30.1609, "step": 62590 }, { "epoch": 0.126455960600686, "grad_norm": 160.72463989257812, "learning_rate": 9.980668045715864e-06, "loss": 15.0084, "step": 62600 }, { "epoch": 0.12647616123336983, "grad_norm": 303.220947265625, "learning_rate": 9.98063736772567e-06, "loss": 26.6503, "step": 62610 }, { "epoch": 0.12649636186605365, "grad_norm": 313.7117614746094, "learning_rate": 9.980606665460453e-06, "loss": 26.235, "step": 62620 }, { "epoch": 0.12651656249873747, "grad_norm": 381.4930725097656, "learning_rate": 9.980575938920364e-06, "loss": 15.6129, "step": 62630 }, { "epoch": 0.1265367631314213, "grad_norm": 85.91773986816406, "learning_rate": 9.980545188105553e-06, "loss": 33.0775, "step": 62640 }, { "epoch": 0.12655696376410508, "grad_norm": 237.16001892089844, "learning_rate": 9.980514413016167e-06, "loss": 14.195, "step": 62650 }, { "epoch": 0.1265771643967889, "grad_norm": 340.02978515625, "learning_rate": 9.980483613652359e-06, "loss": 15.8053, "step": 62660 }, { "epoch": 0.12659736502947272, "grad_norm": 407.2967224121094, "learning_rate": 9.980452790014278e-06, "loss": 16.7743, "step": 62670 }, { "epoch": 0.12661756566215654, "grad_norm": 376.3329162597656, "learning_rate": 9.980421942102075e-06, "loss": 28.763, "step": 62680 }, { "epoch": 0.12663776629484036, "grad_norm": 451.1468505859375, "learning_rate": 9.980391069915897e-06, "loss": 13.2703, "step": 62690 }, { "epoch": 0.12665796692752418, "grad_norm": 48.2034797668457, "learning_rate": 9.980360173455899e-06, "loss": 25.7024, "step": 62700 }, { "epoch": 0.126678167560208, "grad_norm": 236.39694213867188, "learning_rate": 9.980329252722227e-06, "loss": 14.4805, "step": 62710 }, { "epoch": 0.1266983681928918, "grad_norm": 320.87896728515625, "learning_rate": 9.980298307715038e-06, "loss": 28.5063, "step": 62720 }, { "epoch": 0.1267185688255756, "grad_norm": 377.0420227050781, "learning_rate": 9.980267338434477e-06, "loss": 34.5518, "step": 62730 }, { "epoch": 0.12673876945825943, "grad_norm": 48.53511428833008, "learning_rate": 9.980236344880696e-06, "loss": 15.0901, "step": 62740 }, { "epoch": 0.12675897009094325, "grad_norm": 595.3807373046875, "learning_rate": 9.98020532705385e-06, "loss": 36.0624, "step": 62750 }, { "epoch": 0.12677917072362707, "grad_norm": 351.7922668457031, "learning_rate": 9.980174284954084e-06, "loss": 46.5122, "step": 62760 }, { "epoch": 0.1267993713563109, "grad_norm": 185.01815795898438, "learning_rate": 9.980143218581555e-06, "loss": 34.1635, "step": 62770 }, { "epoch": 0.12681957198899468, "grad_norm": 698.515380859375, "learning_rate": 9.98011212793641e-06, "loss": 23.1485, "step": 62780 }, { "epoch": 0.1268397726216785, "grad_norm": 59.31246566772461, "learning_rate": 9.980081013018804e-06, "loss": 18.085, "step": 62790 }, { "epoch": 0.12685997325436232, "grad_norm": 42.891815185546875, "learning_rate": 9.980049873828887e-06, "loss": 15.0156, "step": 62800 }, { "epoch": 0.12688017388704614, "grad_norm": 398.5591125488281, "learning_rate": 9.98001871036681e-06, "loss": 36.735, "step": 62810 }, { "epoch": 0.12690037451972996, "grad_norm": 664.0990600585938, "learning_rate": 9.979987522632727e-06, "loss": 36.419, "step": 62820 }, { "epoch": 0.12692057515241378, "grad_norm": 506.08123779296875, "learning_rate": 9.979956310626788e-06, "loss": 25.1162, "step": 62830 }, { "epoch": 0.12694077578509758, "grad_norm": 63.963768005371094, "learning_rate": 9.979925074349146e-06, "loss": 28.0062, "step": 62840 }, { "epoch": 0.1269609764177814, "grad_norm": 221.45730590820312, "learning_rate": 9.979893813799953e-06, "loss": 14.4822, "step": 62850 }, { "epoch": 0.12698117705046522, "grad_norm": 91.53536224365234, "learning_rate": 9.979862528979362e-06, "loss": 16.4623, "step": 62860 }, { "epoch": 0.12700137768314904, "grad_norm": 264.53985595703125, "learning_rate": 9.979831219887526e-06, "loss": 19.6664, "step": 62870 }, { "epoch": 0.12702157831583286, "grad_norm": 247.8815155029297, "learning_rate": 9.979799886524594e-06, "loss": 16.2956, "step": 62880 }, { "epoch": 0.12704177894851668, "grad_norm": 577.5094604492188, "learning_rate": 9.979768528890725e-06, "loss": 35.9145, "step": 62890 }, { "epoch": 0.1270619795812005, "grad_norm": 905.8956298828125, "learning_rate": 9.979737146986064e-06, "loss": 41.5068, "step": 62900 }, { "epoch": 0.1270821802138843, "grad_norm": 273.90301513671875, "learning_rate": 9.979705740810771e-06, "loss": 25.8719, "step": 62910 }, { "epoch": 0.1271023808465681, "grad_norm": 539.7093505859375, "learning_rate": 9.979674310364996e-06, "loss": 26.2896, "step": 62920 }, { "epoch": 0.12712258147925193, "grad_norm": 526.2796020507812, "learning_rate": 9.979642855648892e-06, "loss": 26.1018, "step": 62930 }, { "epoch": 0.12714278211193575, "grad_norm": 203.54661560058594, "learning_rate": 9.979611376662613e-06, "loss": 26.8522, "step": 62940 }, { "epoch": 0.12716298274461957, "grad_norm": 251.9454345703125, "learning_rate": 9.97957987340631e-06, "loss": 21.4644, "step": 62950 }, { "epoch": 0.1271831833773034, "grad_norm": 500.6198425292969, "learning_rate": 9.979548345880142e-06, "loss": 27.4901, "step": 62960 }, { "epoch": 0.12720338400998718, "grad_norm": 574.1717529296875, "learning_rate": 9.979516794084256e-06, "loss": 25.4944, "step": 62970 }, { "epoch": 0.127223584642671, "grad_norm": 379.51153564453125, "learning_rate": 9.97948521801881e-06, "loss": 20.6563, "step": 62980 }, { "epoch": 0.12724378527535482, "grad_norm": 259.02734375, "learning_rate": 9.979453617683958e-06, "loss": 16.3362, "step": 62990 }, { "epoch": 0.12726398590803864, "grad_norm": 329.0652160644531, "learning_rate": 9.979421993079853e-06, "loss": 27.8089, "step": 63000 }, { "epoch": 0.12728418654072246, "grad_norm": 447.4747314453125, "learning_rate": 9.979390344206648e-06, "loss": 50.0129, "step": 63010 }, { "epoch": 0.12730438717340628, "grad_norm": 229.65965270996094, "learning_rate": 9.9793586710645e-06, "loss": 23.5742, "step": 63020 }, { "epoch": 0.1273245878060901, "grad_norm": 650.58447265625, "learning_rate": 9.97932697365356e-06, "loss": 22.8997, "step": 63030 }, { "epoch": 0.1273447884387739, "grad_norm": 462.25189208984375, "learning_rate": 9.979295251973986e-06, "loss": 20.6243, "step": 63040 }, { "epoch": 0.1273649890714577, "grad_norm": 47.0612907409668, "learning_rate": 9.97926350602593e-06, "loss": 37.5332, "step": 63050 }, { "epoch": 0.12738518970414153, "grad_norm": 397.7591857910156, "learning_rate": 9.979231735809546e-06, "loss": 16.7489, "step": 63060 }, { "epoch": 0.12740539033682535, "grad_norm": 190.6727752685547, "learning_rate": 9.979199941324994e-06, "loss": 25.7037, "step": 63070 }, { "epoch": 0.12742559096950917, "grad_norm": 173.66542053222656, "learning_rate": 9.979168122572422e-06, "loss": 47.5062, "step": 63080 }, { "epoch": 0.127445791602193, "grad_norm": 326.77392578125, "learning_rate": 9.97913627955199e-06, "loss": 29.4594, "step": 63090 }, { "epoch": 0.12746599223487678, "grad_norm": 341.8619079589844, "learning_rate": 9.979104412263851e-06, "loss": 14.0283, "step": 63100 }, { "epoch": 0.1274861928675606, "grad_norm": 621.6937255859375, "learning_rate": 9.979072520708162e-06, "loss": 52.5706, "step": 63110 }, { "epoch": 0.12750639350024442, "grad_norm": 142.89964294433594, "learning_rate": 9.979040604885077e-06, "loss": 17.2266, "step": 63120 }, { "epoch": 0.12752659413292824, "grad_norm": 247.5001983642578, "learning_rate": 9.979008664794751e-06, "loss": 25.3151, "step": 63130 }, { "epoch": 0.12754679476561206, "grad_norm": 366.8077087402344, "learning_rate": 9.978976700437341e-06, "loss": 30.0513, "step": 63140 }, { "epoch": 0.12756699539829588, "grad_norm": 587.1327514648438, "learning_rate": 9.978944711813003e-06, "loss": 24.3117, "step": 63150 }, { "epoch": 0.12758719603097968, "grad_norm": 248.87257385253906, "learning_rate": 9.978912698921892e-06, "loss": 23.8519, "step": 63160 }, { "epoch": 0.1276073966636635, "grad_norm": 286.37371826171875, "learning_rate": 9.978880661764166e-06, "loss": 27.7605, "step": 63170 }, { "epoch": 0.12762759729634732, "grad_norm": 598.4779052734375, "learning_rate": 9.978848600339978e-06, "loss": 27.565, "step": 63180 }, { "epoch": 0.12764779792903114, "grad_norm": 184.56690979003906, "learning_rate": 9.978816514649486e-06, "loss": 22.0815, "step": 63190 }, { "epoch": 0.12766799856171496, "grad_norm": 389.5472106933594, "learning_rate": 9.978784404692847e-06, "loss": 16.2597, "step": 63200 }, { "epoch": 0.12768819919439878, "grad_norm": 362.3110656738281, "learning_rate": 9.978752270470216e-06, "loss": 29.5277, "step": 63210 }, { "epoch": 0.1277083998270826, "grad_norm": 218.22373962402344, "learning_rate": 9.97872011198175e-06, "loss": 22.49, "step": 63220 }, { "epoch": 0.1277286004597664, "grad_norm": 320.49365234375, "learning_rate": 9.978687929227606e-06, "loss": 12.4034, "step": 63230 }, { "epoch": 0.1277488010924502, "grad_norm": 121.07633209228516, "learning_rate": 9.97865572220794e-06, "loss": 32.1143, "step": 63240 }, { "epoch": 0.12776900172513403, "grad_norm": 167.04417419433594, "learning_rate": 9.978623490922913e-06, "loss": 23.4526, "step": 63250 }, { "epoch": 0.12778920235781785, "grad_norm": 415.9090270996094, "learning_rate": 9.978591235372675e-06, "loss": 27.3229, "step": 63260 }, { "epoch": 0.12780940299050167, "grad_norm": 302.4593200683594, "learning_rate": 9.97855895555739e-06, "loss": 39.9013, "step": 63270 }, { "epoch": 0.1278296036231855, "grad_norm": 437.546630859375, "learning_rate": 9.978526651477211e-06, "loss": 23.2822, "step": 63280 }, { "epoch": 0.12784980425586928, "grad_norm": 617.0231323242188, "learning_rate": 9.978494323132296e-06, "loss": 36.5621, "step": 63290 }, { "epoch": 0.1278700048885531, "grad_norm": 322.4091796875, "learning_rate": 9.978461970522807e-06, "loss": 41.6354, "step": 63300 }, { "epoch": 0.12789020552123692, "grad_norm": 512.1978149414062, "learning_rate": 9.978429593648894e-06, "loss": 35.2399, "step": 63310 }, { "epoch": 0.12791040615392074, "grad_norm": 577.8993530273438, "learning_rate": 9.978397192510722e-06, "loss": 48.182, "step": 63320 }, { "epoch": 0.12793060678660456, "grad_norm": 425.5824890136719, "learning_rate": 9.978364767108444e-06, "loss": 24.1838, "step": 63330 }, { "epoch": 0.12795080741928838, "grad_norm": 267.6136474609375, "learning_rate": 9.97833231744222e-06, "loss": 34.8634, "step": 63340 }, { "epoch": 0.1279710080519722, "grad_norm": 365.8782958984375, "learning_rate": 9.97829984351221e-06, "loss": 15.3873, "step": 63350 }, { "epoch": 0.127991208684656, "grad_norm": 301.28277587890625, "learning_rate": 9.978267345318569e-06, "loss": 24.4274, "step": 63360 }, { "epoch": 0.1280114093173398, "grad_norm": 222.0595703125, "learning_rate": 9.978234822861456e-06, "loss": 15.197, "step": 63370 }, { "epoch": 0.12803160995002363, "grad_norm": 506.0646667480469, "learning_rate": 9.978202276141032e-06, "loss": 44.2865, "step": 63380 }, { "epoch": 0.12805181058270745, "grad_norm": 206.04562377929688, "learning_rate": 9.978169705157455e-06, "loss": 30.7349, "step": 63390 }, { "epoch": 0.12807201121539127, "grad_norm": 42.935367584228516, "learning_rate": 9.97813710991088e-06, "loss": 14.3837, "step": 63400 }, { "epoch": 0.1280922118480751, "grad_norm": 362.5654296875, "learning_rate": 9.978104490401468e-06, "loss": 30.5187, "step": 63410 }, { "epoch": 0.12811241248075889, "grad_norm": 473.4012451171875, "learning_rate": 9.978071846629381e-06, "loss": 17.8472, "step": 63420 }, { "epoch": 0.1281326131134427, "grad_norm": 157.42947387695312, "learning_rate": 9.978039178594774e-06, "loss": 26.9455, "step": 63430 }, { "epoch": 0.12815281374612653, "grad_norm": 181.87811279296875, "learning_rate": 9.978006486297808e-06, "loss": 18.9794, "step": 63440 }, { "epoch": 0.12817301437881035, "grad_norm": 318.9611511230469, "learning_rate": 9.977973769738642e-06, "loss": 17.4012, "step": 63450 }, { "epoch": 0.12819321501149417, "grad_norm": 243.36875915527344, "learning_rate": 9.977941028917436e-06, "loss": 31.9373, "step": 63460 }, { "epoch": 0.12821341564417799, "grad_norm": 283.6794738769531, "learning_rate": 9.977908263834348e-06, "loss": 46.1038, "step": 63470 }, { "epoch": 0.12823361627686178, "grad_norm": 2777.658203125, "learning_rate": 9.97787547448954e-06, "loss": 50.0659, "step": 63480 }, { "epoch": 0.1282538169095456, "grad_norm": 350.630615234375, "learning_rate": 9.977842660883172e-06, "loss": 16.3656, "step": 63490 }, { "epoch": 0.12827401754222942, "grad_norm": 687.9367065429688, "learning_rate": 9.9778098230154e-06, "loss": 37.2254, "step": 63500 }, { "epoch": 0.12829421817491324, "grad_norm": 416.64556884765625, "learning_rate": 9.97777696088639e-06, "loss": 22.424, "step": 63510 }, { "epoch": 0.12831441880759706, "grad_norm": 257.024169921875, "learning_rate": 9.977744074496297e-06, "loss": 32.6555, "step": 63520 }, { "epoch": 0.12833461944028088, "grad_norm": 86.00170135498047, "learning_rate": 9.97771116384528e-06, "loss": 15.5293, "step": 63530 }, { "epoch": 0.1283548200729647, "grad_norm": 795.768310546875, "learning_rate": 9.977678228933508e-06, "loss": 41.5156, "step": 63540 }, { "epoch": 0.1283750207056485, "grad_norm": 204.63885498046875, "learning_rate": 9.977645269761131e-06, "loss": 14.8551, "step": 63550 }, { "epoch": 0.1283952213383323, "grad_norm": 283.9824523925781, "learning_rate": 9.977612286328317e-06, "loss": 17.9131, "step": 63560 }, { "epoch": 0.12841542197101613, "grad_norm": 275.2733154296875, "learning_rate": 9.977579278635225e-06, "loss": 22.1619, "step": 63570 }, { "epoch": 0.12843562260369995, "grad_norm": 135.99635314941406, "learning_rate": 9.977546246682015e-06, "loss": 17.6926, "step": 63580 }, { "epoch": 0.12845582323638377, "grad_norm": 432.4513244628906, "learning_rate": 9.977513190468848e-06, "loss": 32.5469, "step": 63590 }, { "epoch": 0.1284760238690676, "grad_norm": 401.6201477050781, "learning_rate": 9.977480109995886e-06, "loss": 18.0901, "step": 63600 }, { "epoch": 0.12849622450175138, "grad_norm": 38.47280502319336, "learning_rate": 9.977447005263289e-06, "loss": 24.3205, "step": 63610 }, { "epoch": 0.1285164251344352, "grad_norm": 0.0, "learning_rate": 9.97741387627122e-06, "loss": 26.4725, "step": 63620 }, { "epoch": 0.12853662576711902, "grad_norm": 316.2154235839844, "learning_rate": 9.977380723019838e-06, "loss": 24.2403, "step": 63630 }, { "epoch": 0.12855682639980284, "grad_norm": 174.98487854003906, "learning_rate": 9.977347545509307e-06, "loss": 32.8193, "step": 63640 }, { "epoch": 0.12857702703248666, "grad_norm": 242.50994873046875, "learning_rate": 9.977314343739785e-06, "loss": 29.025, "step": 63650 }, { "epoch": 0.12859722766517048, "grad_norm": 336.4835510253906, "learning_rate": 9.97728111771144e-06, "loss": 19.1214, "step": 63660 }, { "epoch": 0.1286174282978543, "grad_norm": 977.3390502929688, "learning_rate": 9.97724786742443e-06, "loss": 38.1071, "step": 63670 }, { "epoch": 0.1286376289305381, "grad_norm": 461.77398681640625, "learning_rate": 9.977214592878917e-06, "loss": 23.8822, "step": 63680 }, { "epoch": 0.12865782956322191, "grad_norm": 298.1845703125, "learning_rate": 9.977181294075063e-06, "loss": 27.8384, "step": 63690 }, { "epoch": 0.12867803019590573, "grad_norm": 219.61505126953125, "learning_rate": 9.977147971013033e-06, "loss": 23.5517, "step": 63700 }, { "epoch": 0.12869823082858955, "grad_norm": 149.32620239257812, "learning_rate": 9.977114623692985e-06, "loss": 21.2692, "step": 63710 }, { "epoch": 0.12871843146127337, "grad_norm": 184.06231689453125, "learning_rate": 9.977081252115085e-06, "loss": 23.1091, "step": 63720 }, { "epoch": 0.1287386320939572, "grad_norm": 177.11691284179688, "learning_rate": 9.977047856279496e-06, "loss": 18.9699, "step": 63730 }, { "epoch": 0.128758832726641, "grad_norm": 264.3474426269531, "learning_rate": 9.977014436186377e-06, "loss": 16.6388, "step": 63740 }, { "epoch": 0.1287790333593248, "grad_norm": 167.65147399902344, "learning_rate": 9.976980991835896e-06, "loss": 33.1763, "step": 63750 }, { "epoch": 0.12879923399200863, "grad_norm": 488.4218444824219, "learning_rate": 9.97694752322821e-06, "loss": 14.6142, "step": 63760 }, { "epoch": 0.12881943462469245, "grad_norm": 237.71029663085938, "learning_rate": 9.976914030363488e-06, "loss": 19.8845, "step": 63770 }, { "epoch": 0.12883963525737627, "grad_norm": 354.8117370605469, "learning_rate": 9.976880513241889e-06, "loss": 22.3282, "step": 63780 }, { "epoch": 0.1288598358900601, "grad_norm": 21.65936279296875, "learning_rate": 9.976846971863579e-06, "loss": 25.6418, "step": 63790 }, { "epoch": 0.12888003652274388, "grad_norm": 317.18560791015625, "learning_rate": 9.97681340622872e-06, "loss": 21.0391, "step": 63800 }, { "epoch": 0.1289002371554277, "grad_norm": 101.81526184082031, "learning_rate": 9.976779816337476e-06, "loss": 14.7204, "step": 63810 }, { "epoch": 0.12892043778811152, "grad_norm": 509.6060791015625, "learning_rate": 9.976746202190012e-06, "loss": 24.7291, "step": 63820 }, { "epoch": 0.12894063842079534, "grad_norm": 126.45651245117188, "learning_rate": 9.97671256378649e-06, "loss": 15.4054, "step": 63830 }, { "epoch": 0.12896083905347916, "grad_norm": 292.7330322265625, "learning_rate": 9.976678901127074e-06, "loss": 25.5496, "step": 63840 }, { "epoch": 0.12898103968616298, "grad_norm": 73.06707763671875, "learning_rate": 9.976645214211929e-06, "loss": 25.7349, "step": 63850 }, { "epoch": 0.1290012403188468, "grad_norm": 253.2226104736328, "learning_rate": 9.976611503041218e-06, "loss": 39.7116, "step": 63860 }, { "epoch": 0.1290214409515306, "grad_norm": 510.30322265625, "learning_rate": 9.976577767615108e-06, "loss": 22.2725, "step": 63870 }, { "epoch": 0.1290416415842144, "grad_norm": 157.97813415527344, "learning_rate": 9.97654400793376e-06, "loss": 15.3295, "step": 63880 }, { "epoch": 0.12906184221689823, "grad_norm": 302.39630126953125, "learning_rate": 9.97651022399734e-06, "loss": 25.0676, "step": 63890 }, { "epoch": 0.12908204284958205, "grad_norm": 327.44097900390625, "learning_rate": 9.976476415806013e-06, "loss": 23.1702, "step": 63900 }, { "epoch": 0.12910224348226587, "grad_norm": 178.8892364501953, "learning_rate": 9.976442583359944e-06, "loss": 17.2219, "step": 63910 }, { "epoch": 0.1291224441149497, "grad_norm": 215.63726806640625, "learning_rate": 9.976408726659296e-06, "loss": 20.211, "step": 63920 }, { "epoch": 0.12914264474763348, "grad_norm": 143.2929229736328, "learning_rate": 9.976374845704238e-06, "loss": 30.4832, "step": 63930 }, { "epoch": 0.1291628453803173, "grad_norm": 219.46473693847656, "learning_rate": 9.976340940494931e-06, "loss": 15.2566, "step": 63940 }, { "epoch": 0.12918304601300112, "grad_norm": 247.83139038085938, "learning_rate": 9.976307011031542e-06, "loss": 17.5296, "step": 63950 }, { "epoch": 0.12920324664568494, "grad_norm": 8.385363578796387, "learning_rate": 9.976273057314236e-06, "loss": 15.107, "step": 63960 }, { "epoch": 0.12922344727836876, "grad_norm": 318.6415100097656, "learning_rate": 9.97623907934318e-06, "loss": 17.4182, "step": 63970 }, { "epoch": 0.12924364791105258, "grad_norm": 446.9435729980469, "learning_rate": 9.976205077118536e-06, "loss": 35.0835, "step": 63980 }, { "epoch": 0.12926384854373638, "grad_norm": 319.917236328125, "learning_rate": 9.976171050640473e-06, "loss": 15.2701, "step": 63990 }, { "epoch": 0.1292840491764202, "grad_norm": 594.6360473632812, "learning_rate": 9.976136999909156e-06, "loss": 29.3561, "step": 64000 }, { "epoch": 0.12930424980910402, "grad_norm": 209.76527404785156, "learning_rate": 9.976102924924752e-06, "loss": 21.4054, "step": 64010 }, { "epoch": 0.12932445044178784, "grad_norm": 290.1529846191406, "learning_rate": 9.976068825687424e-06, "loss": 59.4728, "step": 64020 }, { "epoch": 0.12934465107447166, "grad_norm": 770.0337524414062, "learning_rate": 9.97603470219734e-06, "loss": 35.107, "step": 64030 }, { "epoch": 0.12936485170715548, "grad_norm": 583.91748046875, "learning_rate": 9.976000554454668e-06, "loss": 33.0495, "step": 64040 }, { "epoch": 0.1293850523398393, "grad_norm": 292.8315124511719, "learning_rate": 9.975966382459571e-06, "loss": 27.9579, "step": 64050 }, { "epoch": 0.1294052529725231, "grad_norm": 494.964111328125, "learning_rate": 9.975932186212217e-06, "loss": 25.2093, "step": 64060 }, { "epoch": 0.1294254536052069, "grad_norm": 188.4208221435547, "learning_rate": 9.975897965712777e-06, "loss": 31.9076, "step": 64070 }, { "epoch": 0.12944565423789073, "grad_norm": 119.8970947265625, "learning_rate": 9.975863720961411e-06, "loss": 24.8472, "step": 64080 }, { "epoch": 0.12946585487057455, "grad_norm": 339.3464660644531, "learning_rate": 9.975829451958288e-06, "loss": 17.2565, "step": 64090 }, { "epoch": 0.12948605550325837, "grad_norm": 704.3992309570312, "learning_rate": 9.975795158703576e-06, "loss": 19.024, "step": 64100 }, { "epoch": 0.1295062561359422, "grad_norm": 324.55352783203125, "learning_rate": 9.975760841197443e-06, "loss": 18.1377, "step": 64110 }, { "epoch": 0.12952645676862598, "grad_norm": 669.3160400390625, "learning_rate": 9.975726499440055e-06, "loss": 36.9507, "step": 64120 }, { "epoch": 0.1295466574013098, "grad_norm": 361.9762268066406, "learning_rate": 9.975692133431579e-06, "loss": 21.2675, "step": 64130 }, { "epoch": 0.12956685803399362, "grad_norm": 438.4812927246094, "learning_rate": 9.975657743172182e-06, "loss": 34.4257, "step": 64140 }, { "epoch": 0.12958705866667744, "grad_norm": 394.0048522949219, "learning_rate": 9.975623328662036e-06, "loss": 25.7155, "step": 64150 }, { "epoch": 0.12960725929936126, "grad_norm": 146.80307006835938, "learning_rate": 9.975588889901302e-06, "loss": 44.6393, "step": 64160 }, { "epoch": 0.12962745993204508, "grad_norm": 280.2718811035156, "learning_rate": 9.975554426890152e-06, "loss": 31.7724, "step": 64170 }, { "epoch": 0.1296476605647289, "grad_norm": 192.1943359375, "learning_rate": 9.975519939628754e-06, "loss": 20.6603, "step": 64180 }, { "epoch": 0.1296678611974127, "grad_norm": 625.4708251953125, "learning_rate": 9.975485428117276e-06, "loss": 30.0805, "step": 64190 }, { "epoch": 0.1296880618300965, "grad_norm": 493.0346374511719, "learning_rate": 9.975450892355882e-06, "loss": 23.5884, "step": 64200 }, { "epoch": 0.12970826246278033, "grad_norm": 241.29238891601562, "learning_rate": 9.975416332344747e-06, "loss": 15.5798, "step": 64210 }, { "epoch": 0.12972846309546415, "grad_norm": 212.4602508544922, "learning_rate": 9.975381748084035e-06, "loss": 22.2691, "step": 64220 }, { "epoch": 0.12974866372814797, "grad_norm": 392.69781494140625, "learning_rate": 9.975347139573917e-06, "loss": 24.9543, "step": 64230 }, { "epoch": 0.1297688643608318, "grad_norm": 853.1611938476562, "learning_rate": 9.97531250681456e-06, "loss": 32.1381, "step": 64240 }, { "epoch": 0.12978906499351558, "grad_norm": 6.127750873565674, "learning_rate": 9.975277849806133e-06, "loss": 19.7468, "step": 64250 }, { "epoch": 0.1298092656261994, "grad_norm": 277.82958984375, "learning_rate": 9.975243168548804e-06, "loss": 22.047, "step": 64260 }, { "epoch": 0.12982946625888322, "grad_norm": 158.65493774414062, "learning_rate": 9.975208463042745e-06, "loss": 17.1629, "step": 64270 }, { "epoch": 0.12984966689156704, "grad_norm": 110.14351654052734, "learning_rate": 9.975173733288122e-06, "loss": 11.7213, "step": 64280 }, { "epoch": 0.12986986752425086, "grad_norm": 320.3935852050781, "learning_rate": 9.975138979285107e-06, "loss": 13.096, "step": 64290 }, { "epoch": 0.12989006815693468, "grad_norm": 321.2043151855469, "learning_rate": 9.975104201033868e-06, "loss": 26.2436, "step": 64300 }, { "epoch": 0.12991026878961848, "grad_norm": 492.3165588378906, "learning_rate": 9.975069398534574e-06, "loss": 20.0438, "step": 64310 }, { "epoch": 0.1299304694223023, "grad_norm": 208.0177764892578, "learning_rate": 9.975034571787394e-06, "loss": 19.2609, "step": 64320 }, { "epoch": 0.12995067005498612, "grad_norm": 605.1170043945312, "learning_rate": 9.9749997207925e-06, "loss": 55.1152, "step": 64330 }, { "epoch": 0.12997087068766994, "grad_norm": 687.5513305664062, "learning_rate": 9.974964845550062e-06, "loss": 35.4598, "step": 64340 }, { "epoch": 0.12999107132035376, "grad_norm": 277.40069580078125, "learning_rate": 9.974929946060246e-06, "loss": 27.7061, "step": 64350 }, { "epoch": 0.13001127195303758, "grad_norm": 271.25048828125, "learning_rate": 9.974895022323226e-06, "loss": 22.1908, "step": 64360 }, { "epoch": 0.1300314725857214, "grad_norm": 209.47959899902344, "learning_rate": 9.974860074339173e-06, "loss": 26.1927, "step": 64370 }, { "epoch": 0.1300516732184052, "grad_norm": 687.7922973632812, "learning_rate": 9.974825102108251e-06, "loss": 33.5689, "step": 64380 }, { "epoch": 0.130071873851089, "grad_norm": 389.5356140136719, "learning_rate": 9.974790105630639e-06, "loss": 24.8389, "step": 64390 }, { "epoch": 0.13009207448377283, "grad_norm": 403.7509460449219, "learning_rate": 9.974755084906503e-06, "loss": 21.8665, "step": 64400 }, { "epoch": 0.13011227511645665, "grad_norm": 251.35894775390625, "learning_rate": 9.974720039936012e-06, "loss": 30.2783, "step": 64410 }, { "epoch": 0.13013247574914047, "grad_norm": 297.5974426269531, "learning_rate": 9.97468497071934e-06, "loss": 17.8066, "step": 64420 }, { "epoch": 0.1301526763818243, "grad_norm": 378.1520080566406, "learning_rate": 9.974649877256657e-06, "loss": 15.5342, "step": 64430 }, { "epoch": 0.13017287701450808, "grad_norm": 389.9529724121094, "learning_rate": 9.974614759548133e-06, "loss": 24.9331, "step": 64440 }, { "epoch": 0.1301930776471919, "grad_norm": 799.7656860351562, "learning_rate": 9.97457961759394e-06, "loss": 31.2876, "step": 64450 }, { "epoch": 0.13021327827987572, "grad_norm": 211.61444091796875, "learning_rate": 9.97454445139425e-06, "loss": 28.814, "step": 64460 }, { "epoch": 0.13023347891255954, "grad_norm": 207.8016357421875, "learning_rate": 9.974509260949233e-06, "loss": 24.164, "step": 64470 }, { "epoch": 0.13025367954524336, "grad_norm": 898.6503295898438, "learning_rate": 9.97447404625906e-06, "loss": 26.6493, "step": 64480 }, { "epoch": 0.13027388017792718, "grad_norm": 358.8136901855469, "learning_rate": 9.974438807323907e-06, "loss": 34.0798, "step": 64490 }, { "epoch": 0.130294080810611, "grad_norm": 155.9094696044922, "learning_rate": 9.974403544143942e-06, "loss": 25.6251, "step": 64500 }, { "epoch": 0.1303142814432948, "grad_norm": 546.9095458984375, "learning_rate": 9.974368256719335e-06, "loss": 16.9452, "step": 64510 }, { "epoch": 0.1303344820759786, "grad_norm": 448.4377136230469, "learning_rate": 9.974332945050263e-06, "loss": 17.5975, "step": 64520 }, { "epoch": 0.13035468270866243, "grad_norm": 192.56895446777344, "learning_rate": 9.974297609136895e-06, "loss": 12.8582, "step": 64530 }, { "epoch": 0.13037488334134625, "grad_norm": 393.2430114746094, "learning_rate": 9.974262248979402e-06, "loss": 22.1513, "step": 64540 }, { "epoch": 0.13039508397403007, "grad_norm": 352.6806640625, "learning_rate": 9.97422686457796e-06, "loss": 27.5908, "step": 64550 }, { "epoch": 0.1304152846067139, "grad_norm": 189.65972900390625, "learning_rate": 9.97419145593274e-06, "loss": 28.6044, "step": 64560 }, { "epoch": 0.13043548523939769, "grad_norm": 197.1866912841797, "learning_rate": 9.974156023043912e-06, "loss": 40.7048, "step": 64570 }, { "epoch": 0.1304556858720815, "grad_norm": 534.4985961914062, "learning_rate": 9.974120565911653e-06, "loss": 33.4775, "step": 64580 }, { "epoch": 0.13047588650476533, "grad_norm": 307.1629943847656, "learning_rate": 9.974085084536132e-06, "loss": 21.6635, "step": 64590 }, { "epoch": 0.13049608713744915, "grad_norm": 118.1446762084961, "learning_rate": 9.974049578917524e-06, "loss": 10.0092, "step": 64600 }, { "epoch": 0.13051628777013297, "grad_norm": 337.5904846191406, "learning_rate": 9.974014049056003e-06, "loss": 38.2477, "step": 64610 }, { "epoch": 0.13053648840281679, "grad_norm": 308.88690185546875, "learning_rate": 9.973978494951739e-06, "loss": 20.9281, "step": 64620 }, { "epoch": 0.13055668903550058, "grad_norm": 340.42901611328125, "learning_rate": 9.973942916604907e-06, "loss": 19.4033, "step": 64630 }, { "epoch": 0.1305768896681844, "grad_norm": 123.02615356445312, "learning_rate": 9.973907314015682e-06, "loss": 38.994, "step": 64640 }, { "epoch": 0.13059709030086822, "grad_norm": 304.5625, "learning_rate": 9.973871687184234e-06, "loss": 28.7663, "step": 64650 }, { "epoch": 0.13061729093355204, "grad_norm": 212.9185028076172, "learning_rate": 9.97383603611074e-06, "loss": 36.5426, "step": 64660 }, { "epoch": 0.13063749156623586, "grad_norm": 423.26544189453125, "learning_rate": 9.973800360795372e-06, "loss": 22.121, "step": 64670 }, { "epoch": 0.13065769219891968, "grad_norm": 238.12973022460938, "learning_rate": 9.973764661238306e-06, "loss": 15.6144, "step": 64680 }, { "epoch": 0.1306778928316035, "grad_norm": 516.7333984375, "learning_rate": 9.973728937439714e-06, "loss": 21.1768, "step": 64690 }, { "epoch": 0.1306980934642873, "grad_norm": 1335.4205322265625, "learning_rate": 9.973693189399767e-06, "loss": 32.0524, "step": 64700 }, { "epoch": 0.1307182940969711, "grad_norm": 307.717041015625, "learning_rate": 9.973657417118646e-06, "loss": 32.8303, "step": 64710 }, { "epoch": 0.13073849472965493, "grad_norm": 0.0, "learning_rate": 9.97362162059652e-06, "loss": 43.8404, "step": 64720 }, { "epoch": 0.13075869536233875, "grad_norm": 204.90121459960938, "learning_rate": 9.973585799833567e-06, "loss": 18.245, "step": 64730 }, { "epoch": 0.13077889599502257, "grad_norm": 414.0755615234375, "learning_rate": 9.97354995482996e-06, "loss": 15.9035, "step": 64740 }, { "epoch": 0.1307990966277064, "grad_norm": 201.51736450195312, "learning_rate": 9.973514085585871e-06, "loss": 26.6521, "step": 64750 }, { "epoch": 0.13081929726039018, "grad_norm": 79.15742492675781, "learning_rate": 9.97347819210148e-06, "loss": 23.628, "step": 64760 }, { "epoch": 0.130839497893074, "grad_norm": 348.0669860839844, "learning_rate": 9.973442274376958e-06, "loss": 18.0536, "step": 64770 }, { "epoch": 0.13085969852575782, "grad_norm": 363.5284423828125, "learning_rate": 9.973406332412484e-06, "loss": 29.9268, "step": 64780 }, { "epoch": 0.13087989915844164, "grad_norm": 357.0269470214844, "learning_rate": 9.97337036620823e-06, "loss": 38.7089, "step": 64790 }, { "epoch": 0.13090009979112546, "grad_norm": 267.7585754394531, "learning_rate": 9.973334375764372e-06, "loss": 28.6819, "step": 64800 }, { "epoch": 0.13092030042380928, "grad_norm": 342.959228515625, "learning_rate": 9.973298361081083e-06, "loss": 31.0178, "step": 64810 }, { "epoch": 0.1309405010564931, "grad_norm": 561.10205078125, "learning_rate": 9.973262322158544e-06, "loss": 43.5515, "step": 64820 }, { "epoch": 0.1309607016891769, "grad_norm": 512.0466918945312, "learning_rate": 9.973226258996926e-06, "loss": 19.549, "step": 64830 }, { "epoch": 0.13098090232186071, "grad_norm": 278.545166015625, "learning_rate": 9.973190171596407e-06, "loss": 31.2749, "step": 64840 }, { "epoch": 0.13100110295454453, "grad_norm": 252.36727905273438, "learning_rate": 9.973154059957162e-06, "loss": 20.1389, "step": 64850 }, { "epoch": 0.13102130358722835, "grad_norm": 346.197021484375, "learning_rate": 9.973117924079367e-06, "loss": 26.5477, "step": 64860 }, { "epoch": 0.13104150421991217, "grad_norm": 350.5322265625, "learning_rate": 9.973081763963199e-06, "loss": 20.6617, "step": 64870 }, { "epoch": 0.131061704852596, "grad_norm": 242.1079864501953, "learning_rate": 9.973045579608834e-06, "loss": 26.9958, "step": 64880 }, { "epoch": 0.1310819054852798, "grad_norm": 10.063560485839844, "learning_rate": 9.973009371016447e-06, "loss": 19.931, "step": 64890 }, { "epoch": 0.1311021061179636, "grad_norm": 273.2104797363281, "learning_rate": 9.972973138186217e-06, "loss": 34.4457, "step": 64900 }, { "epoch": 0.13112230675064743, "grad_norm": 721.6179809570312, "learning_rate": 9.972936881118318e-06, "loss": 44.8669, "step": 64910 }, { "epoch": 0.13114250738333125, "grad_norm": 340.2410583496094, "learning_rate": 9.972900599812928e-06, "loss": 23.7927, "step": 64920 }, { "epoch": 0.13116270801601507, "grad_norm": 355.6837158203125, "learning_rate": 9.972864294270224e-06, "loss": 35.254, "step": 64930 }, { "epoch": 0.1311829086486989, "grad_norm": 165.6086883544922, "learning_rate": 9.972827964490382e-06, "loss": 29.1851, "step": 64940 }, { "epoch": 0.13120310928138268, "grad_norm": 116.39570617675781, "learning_rate": 9.972791610473578e-06, "loss": 17.6891, "step": 64950 }, { "epoch": 0.1312233099140665, "grad_norm": 528.39990234375, "learning_rate": 9.972755232219992e-06, "loss": 22.8427, "step": 64960 }, { "epoch": 0.13124351054675032, "grad_norm": 92.90364074707031, "learning_rate": 9.972718829729802e-06, "loss": 26.1292, "step": 64970 }, { "epoch": 0.13126371117943414, "grad_norm": 354.4408874511719, "learning_rate": 9.972682403003182e-06, "loss": 32.0834, "step": 64980 }, { "epoch": 0.13128391181211796, "grad_norm": 0.0, "learning_rate": 9.972645952040311e-06, "loss": 26.1783, "step": 64990 }, { "epoch": 0.13130411244480178, "grad_norm": 718.4530639648438, "learning_rate": 9.972609476841368e-06, "loss": 29.5087, "step": 65000 }, { "epoch": 0.1313243130774856, "grad_norm": 324.8057861328125, "learning_rate": 9.972572977406527e-06, "loss": 18.0231, "step": 65010 }, { "epoch": 0.1313445137101694, "grad_norm": 407.2243957519531, "learning_rate": 9.97253645373597e-06, "loss": 44.0193, "step": 65020 }, { "epoch": 0.1313647143428532, "grad_norm": 174.5597686767578, "learning_rate": 9.972499905829874e-06, "loss": 24.9597, "step": 65030 }, { "epoch": 0.13138491497553703, "grad_norm": 165.49142456054688, "learning_rate": 9.972463333688416e-06, "loss": 22.6526, "step": 65040 }, { "epoch": 0.13140511560822085, "grad_norm": 336.7120666503906, "learning_rate": 9.972426737311775e-06, "loss": 60.6148, "step": 65050 }, { "epoch": 0.13142531624090467, "grad_norm": 177.76097106933594, "learning_rate": 9.972390116700128e-06, "loss": 27.7514, "step": 65060 }, { "epoch": 0.1314455168735885, "grad_norm": 324.6535339355469, "learning_rate": 9.972353471853655e-06, "loss": 32.9613, "step": 65070 }, { "epoch": 0.13146571750627228, "grad_norm": 622.7302856445312, "learning_rate": 9.972316802772536e-06, "loss": 36.7279, "step": 65080 }, { "epoch": 0.1314859181389561, "grad_norm": 121.54441833496094, "learning_rate": 9.972280109456946e-06, "loss": 16.9497, "step": 65090 }, { "epoch": 0.13150611877163992, "grad_norm": 218.82803344726562, "learning_rate": 9.972243391907068e-06, "loss": 21.1595, "step": 65100 }, { "epoch": 0.13152631940432374, "grad_norm": 142.96670532226562, "learning_rate": 9.972206650123077e-06, "loss": 24.9789, "step": 65110 }, { "epoch": 0.13154652003700756, "grad_norm": 700.6417846679688, "learning_rate": 9.972169884105155e-06, "loss": 57.8679, "step": 65120 }, { "epoch": 0.13156672066969138, "grad_norm": 249.9122772216797, "learning_rate": 9.972133093853477e-06, "loss": 21.6158, "step": 65130 }, { "epoch": 0.1315869213023752, "grad_norm": 11.959528923034668, "learning_rate": 9.972096279368228e-06, "loss": 24.1135, "step": 65140 }, { "epoch": 0.131607121935059, "grad_norm": 403.01873779296875, "learning_rate": 9.972059440649584e-06, "loss": 15.9828, "step": 65150 }, { "epoch": 0.13162732256774282, "grad_norm": 231.2027587890625, "learning_rate": 9.972022577697726e-06, "loss": 24.3007, "step": 65160 }, { "epoch": 0.13164752320042664, "grad_norm": 144.72796630859375, "learning_rate": 9.971985690512834e-06, "loss": 22.4748, "step": 65170 }, { "epoch": 0.13166772383311046, "grad_norm": 324.7207946777344, "learning_rate": 9.971948779095084e-06, "loss": 25.0189, "step": 65180 }, { "epoch": 0.13168792446579428, "grad_norm": 208.02517700195312, "learning_rate": 9.97191184344466e-06, "loss": 17.7094, "step": 65190 }, { "epoch": 0.1317081250984781, "grad_norm": 247.1680145263672, "learning_rate": 9.97187488356174e-06, "loss": 31.4401, "step": 65200 }, { "epoch": 0.1317283257311619, "grad_norm": 407.8448791503906, "learning_rate": 9.971837899446505e-06, "loss": 24.5998, "step": 65210 }, { "epoch": 0.1317485263638457, "grad_norm": 267.63116455078125, "learning_rate": 9.971800891099137e-06, "loss": 33.9922, "step": 65220 }, { "epoch": 0.13176872699652953, "grad_norm": 167.32179260253906, "learning_rate": 9.971763858519812e-06, "loss": 22.5728, "step": 65230 }, { "epoch": 0.13178892762921335, "grad_norm": 352.41851806640625, "learning_rate": 9.971726801708715e-06, "loss": 23.2781, "step": 65240 }, { "epoch": 0.13180912826189717, "grad_norm": 37.505035400390625, "learning_rate": 9.971689720666024e-06, "loss": 28.2679, "step": 65250 }, { "epoch": 0.131829328894581, "grad_norm": 336.8055114746094, "learning_rate": 9.97165261539192e-06, "loss": 38.4466, "step": 65260 }, { "epoch": 0.13184952952726478, "grad_norm": 314.8041687011719, "learning_rate": 9.971615485886583e-06, "loss": 21.6764, "step": 65270 }, { "epoch": 0.1318697301599486, "grad_norm": 99.35336303710938, "learning_rate": 9.971578332150197e-06, "loss": 28.1127, "step": 65280 }, { "epoch": 0.13188993079263242, "grad_norm": 990.6474609375, "learning_rate": 9.97154115418294e-06, "loss": 32.6564, "step": 65290 }, { "epoch": 0.13191013142531624, "grad_norm": 242.9526824951172, "learning_rate": 9.971503951984996e-06, "loss": 26.4081, "step": 65300 }, { "epoch": 0.13193033205800006, "grad_norm": 504.33282470703125, "learning_rate": 9.971466725556542e-06, "loss": 29.3214, "step": 65310 }, { "epoch": 0.13195053269068388, "grad_norm": 633.6258544921875, "learning_rate": 9.971429474897765e-06, "loss": 34.2679, "step": 65320 }, { "epoch": 0.1319707333233677, "grad_norm": 239.8502960205078, "learning_rate": 9.971392200008842e-06, "loss": 19.3762, "step": 65330 }, { "epoch": 0.1319909339560515, "grad_norm": 175.69712829589844, "learning_rate": 9.971354900889955e-06, "loss": 25.2866, "step": 65340 }, { "epoch": 0.1320111345887353, "grad_norm": 978.638427734375, "learning_rate": 9.97131757754129e-06, "loss": 21.5277, "step": 65350 }, { "epoch": 0.13203133522141913, "grad_norm": 380.2742004394531, "learning_rate": 9.971280229963026e-06, "loss": 21.6805, "step": 65360 }, { "epoch": 0.13205153585410295, "grad_norm": 615.7775268554688, "learning_rate": 9.971242858155344e-06, "loss": 21.3816, "step": 65370 }, { "epoch": 0.13207173648678677, "grad_norm": 208.02012634277344, "learning_rate": 9.971205462118427e-06, "loss": 28.2956, "step": 65380 }, { "epoch": 0.1320919371194706, "grad_norm": 945.4078369140625, "learning_rate": 9.971168041852456e-06, "loss": 25.2252, "step": 65390 }, { "epoch": 0.13211213775215438, "grad_norm": 184.8759307861328, "learning_rate": 9.971130597357618e-06, "loss": 20.1279, "step": 65400 }, { "epoch": 0.1321323383848382, "grad_norm": 184.51284790039062, "learning_rate": 9.97109312863409e-06, "loss": 18.1415, "step": 65410 }, { "epoch": 0.13215253901752202, "grad_norm": 504.8053894042969, "learning_rate": 9.971055635682059e-06, "loss": 35.2129, "step": 65420 }, { "epoch": 0.13217273965020584, "grad_norm": 291.87847900390625, "learning_rate": 9.971018118501706e-06, "loss": 33.9664, "step": 65430 }, { "epoch": 0.13219294028288966, "grad_norm": 493.4468688964844, "learning_rate": 9.970980577093212e-06, "loss": 36.655, "step": 65440 }, { "epoch": 0.13221314091557348, "grad_norm": 78.00147247314453, "learning_rate": 9.970943011456762e-06, "loss": 20.8333, "step": 65450 }, { "epoch": 0.1322333415482573, "grad_norm": 164.45790100097656, "learning_rate": 9.970905421592538e-06, "loss": 25.6599, "step": 65460 }, { "epoch": 0.1322535421809411, "grad_norm": 55.489219665527344, "learning_rate": 9.970867807500725e-06, "loss": 16.4873, "step": 65470 }, { "epoch": 0.13227374281362492, "grad_norm": 172.2893524169922, "learning_rate": 9.970830169181504e-06, "loss": 25.0292, "step": 65480 }, { "epoch": 0.13229394344630874, "grad_norm": 288.478759765625, "learning_rate": 9.97079250663506e-06, "loss": 20.4401, "step": 65490 }, { "epoch": 0.13231414407899256, "grad_norm": 446.6086730957031, "learning_rate": 9.970754819861577e-06, "loss": 39.3905, "step": 65500 }, { "epoch": 0.13233434471167638, "grad_norm": 215.77325439453125, "learning_rate": 9.97071710886124e-06, "loss": 45.3087, "step": 65510 }, { "epoch": 0.1323545453443602, "grad_norm": 190.48316955566406, "learning_rate": 9.970679373634227e-06, "loss": 18.0428, "step": 65520 }, { "epoch": 0.132374745977044, "grad_norm": 401.0550231933594, "learning_rate": 9.970641614180727e-06, "loss": 32.1985, "step": 65530 }, { "epoch": 0.1323949466097278, "grad_norm": 493.65899658203125, "learning_rate": 9.970603830500923e-06, "loss": 24.9609, "step": 65540 }, { "epoch": 0.13241514724241163, "grad_norm": 171.84735107421875, "learning_rate": 9.970566022594996e-06, "loss": 15.9085, "step": 65550 }, { "epoch": 0.13243534787509545, "grad_norm": 420.3487854003906, "learning_rate": 9.970528190463136e-06, "loss": 18.6109, "step": 65560 }, { "epoch": 0.13245554850777927, "grad_norm": 40.914981842041016, "learning_rate": 9.970490334105525e-06, "loss": 25.9298, "step": 65570 }, { "epoch": 0.1324757491404631, "grad_norm": 413.63714599609375, "learning_rate": 9.970452453522344e-06, "loss": 40.6461, "step": 65580 }, { "epoch": 0.13249594977314688, "grad_norm": 169.36326599121094, "learning_rate": 9.970414548713783e-06, "loss": 20.1856, "step": 65590 }, { "epoch": 0.1325161504058307, "grad_norm": 151.5867919921875, "learning_rate": 9.970376619680024e-06, "loss": 27.218, "step": 65600 }, { "epoch": 0.13253635103851452, "grad_norm": 399.72998046875, "learning_rate": 9.970338666421251e-06, "loss": 23.8861, "step": 65610 }, { "epoch": 0.13255655167119834, "grad_norm": 863.8671264648438, "learning_rate": 9.970300688937651e-06, "loss": 47.0243, "step": 65620 }, { "epoch": 0.13257675230388216, "grad_norm": 311.4657897949219, "learning_rate": 9.970262687229409e-06, "loss": 30.7246, "step": 65630 }, { "epoch": 0.13259695293656598, "grad_norm": 67.76972198486328, "learning_rate": 9.970224661296708e-06, "loss": 14.1752, "step": 65640 }, { "epoch": 0.1326171535692498, "grad_norm": 452.4075927734375, "learning_rate": 9.970186611139736e-06, "loss": 16.5598, "step": 65650 }, { "epoch": 0.1326373542019336, "grad_norm": 48.75602340698242, "learning_rate": 9.970148536758678e-06, "loss": 32.5159, "step": 65660 }, { "epoch": 0.1326575548346174, "grad_norm": 205.81130981445312, "learning_rate": 9.970110438153717e-06, "loss": 52.2047, "step": 65670 }, { "epoch": 0.13267775546730123, "grad_norm": 684.9628295898438, "learning_rate": 9.970072315325041e-06, "loss": 40.1993, "step": 65680 }, { "epoch": 0.13269795609998505, "grad_norm": 308.8397216796875, "learning_rate": 9.970034168272835e-06, "loss": 30.3255, "step": 65690 }, { "epoch": 0.13271815673266887, "grad_norm": 173.79136657714844, "learning_rate": 9.969995996997285e-06, "loss": 47.3552, "step": 65700 }, { "epoch": 0.1327383573653527, "grad_norm": 239.71485900878906, "learning_rate": 9.96995780149858e-06, "loss": 17.3421, "step": 65710 }, { "epoch": 0.13275855799803649, "grad_norm": 199.15304565429688, "learning_rate": 9.969919581776902e-06, "loss": 58.029, "step": 65720 }, { "epoch": 0.1327787586307203, "grad_norm": 433.0157470703125, "learning_rate": 9.969881337832437e-06, "loss": 33.3278, "step": 65730 }, { "epoch": 0.13279895926340413, "grad_norm": 103.13777160644531, "learning_rate": 9.969843069665375e-06, "loss": 20.6378, "step": 65740 }, { "epoch": 0.13281915989608795, "grad_norm": 306.76385498046875, "learning_rate": 9.9698047772759e-06, "loss": 20.0396, "step": 65750 }, { "epoch": 0.13283936052877177, "grad_norm": 478.3727111816406, "learning_rate": 9.969766460664199e-06, "loss": 24.2344, "step": 65760 }, { "epoch": 0.13285956116145559, "grad_norm": 299.32220458984375, "learning_rate": 9.96972811983046e-06, "loss": 28.5114, "step": 65770 }, { "epoch": 0.1328797617941394, "grad_norm": 612.5861206054688, "learning_rate": 9.969689754774868e-06, "loss": 43.1939, "step": 65780 }, { "epoch": 0.1328999624268232, "grad_norm": 112.5262680053711, "learning_rate": 9.96965136549761e-06, "loss": 26.5018, "step": 65790 }, { "epoch": 0.13292016305950702, "grad_norm": 585.4520263671875, "learning_rate": 9.969612951998874e-06, "loss": 18.9396, "step": 65800 }, { "epoch": 0.13294036369219084, "grad_norm": 117.73979949951172, "learning_rate": 9.96957451427885e-06, "loss": 14.5127, "step": 65810 }, { "epoch": 0.13296056432487466, "grad_norm": 473.32904052734375, "learning_rate": 9.96953605233772e-06, "loss": 27.4782, "step": 65820 }, { "epoch": 0.13298076495755848, "grad_norm": 193.32872009277344, "learning_rate": 9.969497566175675e-06, "loss": 16.2935, "step": 65830 }, { "epoch": 0.1330009655902423, "grad_norm": 227.9578094482422, "learning_rate": 9.969459055792903e-06, "loss": 28.2305, "step": 65840 }, { "epoch": 0.1330211662229261, "grad_norm": 398.1967468261719, "learning_rate": 9.969420521189587e-06, "loss": 52.17, "step": 65850 }, { "epoch": 0.1330413668556099, "grad_norm": 500.2559509277344, "learning_rate": 9.96938196236592e-06, "loss": 25.51, "step": 65860 }, { "epoch": 0.13306156748829373, "grad_norm": 659.76318359375, "learning_rate": 9.96934337932209e-06, "loss": 33.5757, "step": 65870 }, { "epoch": 0.13308176812097755, "grad_norm": 196.15061950683594, "learning_rate": 9.969304772058279e-06, "loss": 35.1348, "step": 65880 }, { "epoch": 0.13310196875366137, "grad_norm": 559.1372680664062, "learning_rate": 9.969266140574682e-06, "loss": 20.2096, "step": 65890 }, { "epoch": 0.1331221693863452, "grad_norm": 316.5423889160156, "learning_rate": 9.969227484871485e-06, "loss": 17.8212, "step": 65900 }, { "epoch": 0.13314237001902898, "grad_norm": 54.37018585205078, "learning_rate": 9.969188804948872e-06, "loss": 24.5448, "step": 65910 }, { "epoch": 0.1331625706517128, "grad_norm": 5.289597511291504, "learning_rate": 9.969150100807039e-06, "loss": 23.1942, "step": 65920 }, { "epoch": 0.13318277128439662, "grad_norm": 164.17440795898438, "learning_rate": 9.969111372446171e-06, "loss": 27.4981, "step": 65930 }, { "epoch": 0.13320297191708044, "grad_norm": 221.612060546875, "learning_rate": 9.969072619866455e-06, "loss": 27.7909, "step": 65940 }, { "epoch": 0.13322317254976426, "grad_norm": 325.1167907714844, "learning_rate": 9.969033843068083e-06, "loss": 21.2689, "step": 65950 }, { "epoch": 0.13324337318244808, "grad_norm": 8.657360076904297, "learning_rate": 9.968995042051244e-06, "loss": 29.4628, "step": 65960 }, { "epoch": 0.1332635738151319, "grad_norm": 19.7578182220459, "learning_rate": 9.968956216816123e-06, "loss": 25.9155, "step": 65970 }, { "epoch": 0.1332837744478157, "grad_norm": 237.20579528808594, "learning_rate": 9.968917367362914e-06, "loss": 19.6638, "step": 65980 }, { "epoch": 0.13330397508049951, "grad_norm": 290.063720703125, "learning_rate": 9.968878493691803e-06, "loss": 20.9118, "step": 65990 }, { "epoch": 0.13332417571318333, "grad_norm": 459.3388977050781, "learning_rate": 9.968839595802982e-06, "loss": 40.7767, "step": 66000 }, { "epoch": 0.13334437634586715, "grad_norm": 57.77425765991211, "learning_rate": 9.968800673696638e-06, "loss": 30.4635, "step": 66010 }, { "epoch": 0.13336457697855097, "grad_norm": 69.5876693725586, "learning_rate": 9.968761727372965e-06, "loss": 19.9259, "step": 66020 }, { "epoch": 0.1333847776112348, "grad_norm": 414.38568115234375, "learning_rate": 9.968722756832148e-06, "loss": 25.9187, "step": 66030 }, { "epoch": 0.1334049782439186, "grad_norm": 315.5315856933594, "learning_rate": 9.96868376207438e-06, "loss": 23.2683, "step": 66040 }, { "epoch": 0.1334251788766024, "grad_norm": 371.28253173828125, "learning_rate": 9.968644743099848e-06, "loss": 18.7097, "step": 66050 }, { "epoch": 0.13344537950928623, "grad_norm": 459.3819580078125, "learning_rate": 9.968605699908747e-06, "loss": 18.676, "step": 66060 }, { "epoch": 0.13346558014197005, "grad_norm": 776.4884643554688, "learning_rate": 9.968566632501262e-06, "loss": 39.8058, "step": 66070 }, { "epoch": 0.13348578077465387, "grad_norm": 208.42019653320312, "learning_rate": 9.968527540877586e-06, "loss": 31.5618, "step": 66080 }, { "epoch": 0.1335059814073377, "grad_norm": 350.4022521972656, "learning_rate": 9.96848842503791e-06, "loss": 19.8004, "step": 66090 }, { "epoch": 0.1335261820400215, "grad_norm": 265.1343078613281, "learning_rate": 9.968449284982424e-06, "loss": 23.2017, "step": 66100 }, { "epoch": 0.1335463826727053, "grad_norm": 316.6374816894531, "learning_rate": 9.968410120711321e-06, "loss": 25.0555, "step": 66110 }, { "epoch": 0.13356658330538912, "grad_norm": 288.9773864746094, "learning_rate": 9.968370932224787e-06, "loss": 36.4133, "step": 66120 }, { "epoch": 0.13358678393807294, "grad_norm": 462.33856201171875, "learning_rate": 9.968331719523015e-06, "loss": 21.5502, "step": 66130 }, { "epoch": 0.13360698457075676, "grad_norm": 203.50596618652344, "learning_rate": 9.968292482606199e-06, "loss": 25.9663, "step": 66140 }, { "epoch": 0.13362718520344058, "grad_norm": 736.464111328125, "learning_rate": 9.968253221474527e-06, "loss": 47.49, "step": 66150 }, { "epoch": 0.1336473858361244, "grad_norm": 241.01231384277344, "learning_rate": 9.96821393612819e-06, "loss": 20.8539, "step": 66160 }, { "epoch": 0.1336675864688082, "grad_norm": 1.526489496231079, "learning_rate": 9.968174626567382e-06, "loss": 15.0666, "step": 66170 }, { "epoch": 0.133687787101492, "grad_norm": 158.87147521972656, "learning_rate": 9.968135292792294e-06, "loss": 44.0366, "step": 66180 }, { "epoch": 0.13370798773417583, "grad_norm": 529.40869140625, "learning_rate": 9.968095934803116e-06, "loss": 38.2544, "step": 66190 }, { "epoch": 0.13372818836685965, "grad_norm": 200.14126586914062, "learning_rate": 9.968056552600043e-06, "loss": 34.446, "step": 66200 }, { "epoch": 0.13374838899954347, "grad_norm": 220.0067901611328, "learning_rate": 9.968017146183263e-06, "loss": 28.4433, "step": 66210 }, { "epoch": 0.1337685896322273, "grad_norm": 49.626373291015625, "learning_rate": 9.967977715552972e-06, "loss": 19.4243, "step": 66220 }, { "epoch": 0.13378879026491108, "grad_norm": 354.77044677734375, "learning_rate": 9.967938260709357e-06, "loss": 35.2822, "step": 66230 }, { "epoch": 0.1338089908975949, "grad_norm": 271.1885070800781, "learning_rate": 9.967898781652616e-06, "loss": 17.2958, "step": 66240 }, { "epoch": 0.13382919153027872, "grad_norm": 398.65966796875, "learning_rate": 9.967859278382939e-06, "loss": 18.941, "step": 66250 }, { "epoch": 0.13384939216296254, "grad_norm": 4.969280242919922, "learning_rate": 9.967819750900517e-06, "loss": 28.9954, "step": 66260 }, { "epoch": 0.13386959279564636, "grad_norm": 200.7358856201172, "learning_rate": 9.967780199205544e-06, "loss": 26.5483, "step": 66270 }, { "epoch": 0.13388979342833018, "grad_norm": 258.5098571777344, "learning_rate": 9.967740623298214e-06, "loss": 28.5525, "step": 66280 }, { "epoch": 0.133909994061014, "grad_norm": 396.1499938964844, "learning_rate": 9.967701023178717e-06, "loss": 24.657, "step": 66290 }, { "epoch": 0.1339301946936978, "grad_norm": 365.5580139160156, "learning_rate": 9.96766139884725e-06, "loss": 24.8753, "step": 66300 }, { "epoch": 0.13395039532638162, "grad_norm": 44.32944869995117, "learning_rate": 9.967621750304002e-06, "loss": 28.2681, "step": 66310 }, { "epoch": 0.13397059595906544, "grad_norm": 145.4029541015625, "learning_rate": 9.96758207754917e-06, "loss": 20.5242, "step": 66320 }, { "epoch": 0.13399079659174926, "grad_norm": 276.45074462890625, "learning_rate": 9.967542380582944e-06, "loss": 33.4589, "step": 66330 }, { "epoch": 0.13401099722443308, "grad_norm": 580.4900512695312, "learning_rate": 9.96750265940552e-06, "loss": 22.2097, "step": 66340 }, { "epoch": 0.1340311978571169, "grad_norm": 490.7298889160156, "learning_rate": 9.967462914017087e-06, "loss": 24.8151, "step": 66350 }, { "epoch": 0.1340513984898007, "grad_norm": 300.88189697265625, "learning_rate": 9.967423144417847e-06, "loss": 17.8846, "step": 66360 }, { "epoch": 0.1340715991224845, "grad_norm": 247.90757751464844, "learning_rate": 9.967383350607986e-06, "loss": 22.3368, "step": 66370 }, { "epoch": 0.13409179975516833, "grad_norm": 596.3983764648438, "learning_rate": 9.967343532587701e-06, "loss": 25.654, "step": 66380 }, { "epoch": 0.13411200038785215, "grad_norm": 238.70260620117188, "learning_rate": 9.967303690357189e-06, "loss": 23.3272, "step": 66390 }, { "epoch": 0.13413220102053597, "grad_norm": 230.6807861328125, "learning_rate": 9.967263823916638e-06, "loss": 22.6918, "step": 66400 }, { "epoch": 0.1341524016532198, "grad_norm": 320.2897033691406, "learning_rate": 9.967223933266247e-06, "loss": 16.8531, "step": 66410 }, { "epoch": 0.1341726022859036, "grad_norm": 306.5101013183594, "learning_rate": 9.96718401840621e-06, "loss": 21.4082, "step": 66420 }, { "epoch": 0.1341928029185874, "grad_norm": 161.35494995117188, "learning_rate": 9.96714407933672e-06, "loss": 21.7046, "step": 66430 }, { "epoch": 0.13421300355127122, "grad_norm": 233.5550537109375, "learning_rate": 9.96710411605797e-06, "loss": 12.8578, "step": 66440 }, { "epoch": 0.13423320418395504, "grad_norm": 476.2647705078125, "learning_rate": 9.96706412857016e-06, "loss": 31.3925, "step": 66450 }, { "epoch": 0.13425340481663886, "grad_norm": 130.6685028076172, "learning_rate": 9.967024116873481e-06, "loss": 13.1677, "step": 66460 }, { "epoch": 0.13427360544932268, "grad_norm": 467.4010009765625, "learning_rate": 9.966984080968128e-06, "loss": 23.9967, "step": 66470 }, { "epoch": 0.1342938060820065, "grad_norm": 62.86116409301758, "learning_rate": 9.966944020854297e-06, "loss": 23.9043, "step": 66480 }, { "epoch": 0.1343140067146903, "grad_norm": 41.653892517089844, "learning_rate": 9.966903936532184e-06, "loss": 26.3673, "step": 66490 }, { "epoch": 0.1343342073473741, "grad_norm": 557.43701171875, "learning_rate": 9.966863828001982e-06, "loss": 27.1654, "step": 66500 }, { "epoch": 0.13435440798005793, "grad_norm": 143.7139892578125, "learning_rate": 9.96682369526389e-06, "loss": 18.6233, "step": 66510 }, { "epoch": 0.13437460861274175, "grad_norm": 412.89178466796875, "learning_rate": 9.966783538318101e-06, "loss": 23.7237, "step": 66520 }, { "epoch": 0.13439480924542557, "grad_norm": 503.3326721191406, "learning_rate": 9.966743357164812e-06, "loss": 41.8913, "step": 66530 }, { "epoch": 0.1344150098781094, "grad_norm": 407.052978515625, "learning_rate": 9.966703151804219e-06, "loss": 21.7692, "step": 66540 }, { "epoch": 0.13443521051079318, "grad_norm": 146.9783477783203, "learning_rate": 9.966662922236515e-06, "loss": 26.3066, "step": 66550 }, { "epoch": 0.134455411143477, "grad_norm": 208.23899841308594, "learning_rate": 9.966622668461899e-06, "loss": 26.7268, "step": 66560 }, { "epoch": 0.13447561177616082, "grad_norm": 417.6758728027344, "learning_rate": 9.966582390480567e-06, "loss": 20.748, "step": 66570 }, { "epoch": 0.13449581240884464, "grad_norm": 307.28546142578125, "learning_rate": 9.966542088292714e-06, "loss": 30.1261, "step": 66580 }, { "epoch": 0.13451601304152846, "grad_norm": 238.5474395751953, "learning_rate": 9.96650176189854e-06, "loss": 23.2692, "step": 66590 }, { "epoch": 0.13453621367421228, "grad_norm": 246.79116821289062, "learning_rate": 9.966461411298235e-06, "loss": 20.633, "step": 66600 }, { "epoch": 0.1345564143068961, "grad_norm": 156.6832733154297, "learning_rate": 9.966421036492003e-06, "loss": 10.535, "step": 66610 }, { "epoch": 0.1345766149395799, "grad_norm": 570.5266723632812, "learning_rate": 9.966380637480034e-06, "loss": 32.9075, "step": 66620 }, { "epoch": 0.13459681557226372, "grad_norm": 82.65400695800781, "learning_rate": 9.96634021426253e-06, "loss": 25.4304, "step": 66630 }, { "epoch": 0.13461701620494754, "grad_norm": 151.8350830078125, "learning_rate": 9.966299766839685e-06, "loss": 45.611, "step": 66640 }, { "epoch": 0.13463721683763136, "grad_norm": 302.1487121582031, "learning_rate": 9.966259295211698e-06, "loss": 16.5965, "step": 66650 }, { "epoch": 0.13465741747031518, "grad_norm": 166.64117431640625, "learning_rate": 9.966218799378766e-06, "loss": 37.2303, "step": 66660 }, { "epoch": 0.134677618102999, "grad_norm": 309.0213623046875, "learning_rate": 9.966178279341084e-06, "loss": 33.9259, "step": 66670 }, { "epoch": 0.1346978187356828, "grad_norm": 372.9971008300781, "learning_rate": 9.966137735098853e-06, "loss": 39.5996, "step": 66680 }, { "epoch": 0.1347180193683666, "grad_norm": 2.437394857406616, "learning_rate": 9.966097166652268e-06, "loss": 29.7834, "step": 66690 }, { "epoch": 0.13473822000105043, "grad_norm": 209.9769744873047, "learning_rate": 9.966056574001528e-06, "loss": 21.8949, "step": 66700 }, { "epoch": 0.13475842063373425, "grad_norm": 312.7691345214844, "learning_rate": 9.966015957146832e-06, "loss": 28.2982, "step": 66710 }, { "epoch": 0.13477862126641807, "grad_norm": 155.2775421142578, "learning_rate": 9.965975316088377e-06, "loss": 18.2688, "step": 66720 }, { "epoch": 0.1347988218991019, "grad_norm": 205.48687744140625, "learning_rate": 9.96593465082636e-06, "loss": 28.3465, "step": 66730 }, { "epoch": 0.1348190225317857, "grad_norm": 129.78016662597656, "learning_rate": 9.965893961360977e-06, "loss": 18.6096, "step": 66740 }, { "epoch": 0.1348392231644695, "grad_norm": 418.7332763671875, "learning_rate": 9.965853247692433e-06, "loss": 27.0274, "step": 66750 }, { "epoch": 0.13485942379715332, "grad_norm": 244.0142822265625, "learning_rate": 9.965812509820918e-06, "loss": 14.9762, "step": 66760 }, { "epoch": 0.13487962442983714, "grad_norm": 208.21958923339844, "learning_rate": 9.965771747746638e-06, "loss": 27.9421, "step": 66770 }, { "epoch": 0.13489982506252096, "grad_norm": 168.46929931640625, "learning_rate": 9.96573096146979e-06, "loss": 12.1444, "step": 66780 }, { "epoch": 0.13492002569520478, "grad_norm": 203.46258544921875, "learning_rate": 9.96569015099057e-06, "loss": 25.1111, "step": 66790 }, { "epoch": 0.1349402263278886, "grad_norm": 362.4871520996094, "learning_rate": 9.965649316309178e-06, "loss": 8.0727, "step": 66800 }, { "epoch": 0.1349604269605724, "grad_norm": 151.9606170654297, "learning_rate": 9.965608457425813e-06, "loss": 23.3724, "step": 66810 }, { "epoch": 0.1349806275932562, "grad_norm": 215.24185180664062, "learning_rate": 9.965567574340676e-06, "loss": 21.4178, "step": 66820 }, { "epoch": 0.13500082822594003, "grad_norm": 566.1832885742188, "learning_rate": 9.965526667053964e-06, "loss": 38.9688, "step": 66830 }, { "epoch": 0.13502102885862385, "grad_norm": 923.3115844726562, "learning_rate": 9.965485735565878e-06, "loss": 36.6468, "step": 66840 }, { "epoch": 0.13504122949130767, "grad_norm": 586.2872924804688, "learning_rate": 9.965444779876618e-06, "loss": 29.3916, "step": 66850 }, { "epoch": 0.1350614301239915, "grad_norm": 413.8444519042969, "learning_rate": 9.96540379998638e-06, "loss": 46.4519, "step": 66860 }, { "epoch": 0.13508163075667529, "grad_norm": 83.1314926147461, "learning_rate": 9.965362795895368e-06, "loss": 28.3494, "step": 66870 }, { "epoch": 0.1351018313893591, "grad_norm": 143.2391357421875, "learning_rate": 9.965321767603778e-06, "loss": 27.7056, "step": 66880 }, { "epoch": 0.13512203202204293, "grad_norm": 448.9478454589844, "learning_rate": 9.965280715111814e-06, "loss": 25.0388, "step": 66890 }, { "epoch": 0.13514223265472675, "grad_norm": 256.74700927734375, "learning_rate": 9.965239638419673e-06, "loss": 38.4857, "step": 66900 }, { "epoch": 0.13516243328741057, "grad_norm": 556.77734375, "learning_rate": 9.965198537527556e-06, "loss": 25.6874, "step": 66910 }, { "epoch": 0.13518263392009439, "grad_norm": 441.9389953613281, "learning_rate": 9.965157412435663e-06, "loss": 22.1375, "step": 66920 }, { "epoch": 0.1352028345527782, "grad_norm": 279.1792907714844, "learning_rate": 9.965116263144196e-06, "loss": 20.2665, "step": 66930 }, { "epoch": 0.135223035185462, "grad_norm": 808.2194213867188, "learning_rate": 9.965075089653354e-06, "loss": 37.2277, "step": 66940 }, { "epoch": 0.13524323581814582, "grad_norm": 298.69989013671875, "learning_rate": 9.965033891963338e-06, "loss": 19.6175, "step": 66950 }, { "epoch": 0.13526343645082964, "grad_norm": 63.26050567626953, "learning_rate": 9.96499267007435e-06, "loss": 18.5855, "step": 66960 }, { "epoch": 0.13528363708351346, "grad_norm": 81.70155334472656, "learning_rate": 9.964951423986588e-06, "loss": 10.9234, "step": 66970 }, { "epoch": 0.13530383771619728, "grad_norm": 64.6188735961914, "learning_rate": 9.964910153700258e-06, "loss": 34.5105, "step": 66980 }, { "epoch": 0.1353240383488811, "grad_norm": 177.4397430419922, "learning_rate": 9.964868859215555e-06, "loss": 14.4012, "step": 66990 }, { "epoch": 0.1353442389815649, "grad_norm": 203.4015655517578, "learning_rate": 9.964827540532685e-06, "loss": 22.3411, "step": 67000 }, { "epoch": 0.1353644396142487, "grad_norm": 323.9362487792969, "learning_rate": 9.964786197651848e-06, "loss": 42.7544, "step": 67010 }, { "epoch": 0.13538464024693253, "grad_norm": 183.25729370117188, "learning_rate": 9.964744830573245e-06, "loss": 30.4944, "step": 67020 }, { "epoch": 0.13540484087961635, "grad_norm": 190.28562927246094, "learning_rate": 9.964703439297076e-06, "loss": 41.7825, "step": 67030 }, { "epoch": 0.13542504151230017, "grad_norm": 249.52005004882812, "learning_rate": 9.964662023823548e-06, "loss": 35.3591, "step": 67040 }, { "epoch": 0.135445242144984, "grad_norm": 467.80084228515625, "learning_rate": 9.964620584152858e-06, "loss": 24.392, "step": 67050 }, { "epoch": 0.13546544277766778, "grad_norm": 342.14312744140625, "learning_rate": 9.964579120285208e-06, "loss": 37.1346, "step": 67060 }, { "epoch": 0.1354856434103516, "grad_norm": 267.0450439453125, "learning_rate": 9.964537632220801e-06, "loss": 19.6869, "step": 67070 }, { "epoch": 0.13550584404303542, "grad_norm": 272.692626953125, "learning_rate": 9.964496119959842e-06, "loss": 25.7792, "step": 67080 }, { "epoch": 0.13552604467571924, "grad_norm": 377.50384521484375, "learning_rate": 9.96445458350253e-06, "loss": 22.6151, "step": 67090 }, { "epoch": 0.13554624530840306, "grad_norm": 116.07626342773438, "learning_rate": 9.964413022849069e-06, "loss": 15.8783, "step": 67100 }, { "epoch": 0.13556644594108688, "grad_norm": 228.94955444335938, "learning_rate": 9.964371437999661e-06, "loss": 16.7996, "step": 67110 }, { "epoch": 0.1355866465737707, "grad_norm": 544.9328002929688, "learning_rate": 9.96432982895451e-06, "loss": 31.2262, "step": 67120 }, { "epoch": 0.1356068472064545, "grad_norm": 160.2474365234375, "learning_rate": 9.964288195713814e-06, "loss": 20.8693, "step": 67130 }, { "epoch": 0.13562704783913831, "grad_norm": 235.44512939453125, "learning_rate": 9.964246538277782e-06, "loss": 20.1367, "step": 67140 }, { "epoch": 0.13564724847182213, "grad_norm": 212.14146423339844, "learning_rate": 9.964204856646613e-06, "loss": 32.7112, "step": 67150 }, { "epoch": 0.13566744910450595, "grad_norm": 522.595947265625, "learning_rate": 9.964163150820512e-06, "loss": 27.4913, "step": 67160 }, { "epoch": 0.13568764973718977, "grad_norm": 551.277099609375, "learning_rate": 9.964121420799682e-06, "loss": 23.0918, "step": 67170 }, { "epoch": 0.1357078503698736, "grad_norm": 285.1145324707031, "learning_rate": 9.964079666584327e-06, "loss": 31.0092, "step": 67180 }, { "epoch": 0.1357280510025574, "grad_norm": 178.41708374023438, "learning_rate": 9.96403788817465e-06, "loss": 28.5289, "step": 67190 }, { "epoch": 0.1357482516352412, "grad_norm": 235.4615936279297, "learning_rate": 9.963996085570854e-06, "loss": 22.7197, "step": 67200 }, { "epoch": 0.13576845226792503, "grad_norm": 261.9375305175781, "learning_rate": 9.963954258773143e-06, "loss": 31.2134, "step": 67210 }, { "epoch": 0.13578865290060885, "grad_norm": 487.5806884765625, "learning_rate": 9.963912407781721e-06, "loss": 39.0106, "step": 67220 }, { "epoch": 0.13580885353329267, "grad_norm": 378.559814453125, "learning_rate": 9.963870532596791e-06, "loss": 23.705, "step": 67230 }, { "epoch": 0.1358290541659765, "grad_norm": 18.57795524597168, "learning_rate": 9.96382863321856e-06, "loss": 34.7125, "step": 67240 }, { "epoch": 0.1358492547986603, "grad_norm": 223.1298828125, "learning_rate": 9.963786709647228e-06, "loss": 22.0783, "step": 67250 }, { "epoch": 0.1358694554313441, "grad_norm": 100.81449127197266, "learning_rate": 9.963744761883003e-06, "loss": 26.4037, "step": 67260 }, { "epoch": 0.13588965606402792, "grad_norm": 276.6617736816406, "learning_rate": 9.963702789926089e-06, "loss": 25.6281, "step": 67270 }, { "epoch": 0.13590985669671174, "grad_norm": 698.0415649414062, "learning_rate": 9.963660793776689e-06, "loss": 30.9441, "step": 67280 }, { "epoch": 0.13593005732939556, "grad_norm": 386.02880859375, "learning_rate": 9.963618773435006e-06, "loss": 13.8671, "step": 67290 }, { "epoch": 0.13595025796207938, "grad_norm": 240.81494140625, "learning_rate": 9.96357672890125e-06, "loss": 50.0715, "step": 67300 }, { "epoch": 0.1359704585947632, "grad_norm": 231.6616973876953, "learning_rate": 9.963534660175622e-06, "loss": 20.517, "step": 67310 }, { "epoch": 0.135990659227447, "grad_norm": 169.0372772216797, "learning_rate": 9.963492567258327e-06, "loss": 29.797, "step": 67320 }, { "epoch": 0.1360108598601308, "grad_norm": 327.73590087890625, "learning_rate": 9.963450450149572e-06, "loss": 20.3036, "step": 67330 }, { "epoch": 0.13603106049281463, "grad_norm": 486.235107421875, "learning_rate": 9.963408308849563e-06, "loss": 25.2156, "step": 67340 }, { "epoch": 0.13605126112549845, "grad_norm": 205.426513671875, "learning_rate": 9.963366143358502e-06, "loss": 28.0845, "step": 67350 }, { "epoch": 0.13607146175818227, "grad_norm": 584.2162475585938, "learning_rate": 9.963323953676599e-06, "loss": 34.4619, "step": 67360 }, { "epoch": 0.1360916623908661, "grad_norm": 8.391529083251953, "learning_rate": 9.963281739804054e-06, "loss": 40.2791, "step": 67370 }, { "epoch": 0.13611186302354988, "grad_norm": 249.8508758544922, "learning_rate": 9.963239501741076e-06, "loss": 29.1216, "step": 67380 }, { "epoch": 0.1361320636562337, "grad_norm": 135.61180114746094, "learning_rate": 9.963197239487871e-06, "loss": 14.8607, "step": 67390 }, { "epoch": 0.13615226428891752, "grad_norm": 242.98388671875, "learning_rate": 9.963154953044646e-06, "loss": 24.7519, "step": 67400 }, { "epoch": 0.13617246492160134, "grad_norm": 339.5757141113281, "learning_rate": 9.963112642411606e-06, "loss": 18.511, "step": 67410 }, { "epoch": 0.13619266555428516, "grad_norm": 215.37144470214844, "learning_rate": 9.963070307588955e-06, "loss": 16.7106, "step": 67420 }, { "epoch": 0.13621286618696898, "grad_norm": 789.3258056640625, "learning_rate": 9.963027948576902e-06, "loss": 43.2909, "step": 67430 }, { "epoch": 0.1362330668196528, "grad_norm": 29.92152214050293, "learning_rate": 9.96298556537565e-06, "loss": 22.1061, "step": 67440 }, { "epoch": 0.1362532674523366, "grad_norm": 347.4695129394531, "learning_rate": 9.962943157985412e-06, "loss": 58.4131, "step": 67450 }, { "epoch": 0.13627346808502042, "grad_norm": 301.8613586425781, "learning_rate": 9.96290072640639e-06, "loss": 27.5039, "step": 67460 }, { "epoch": 0.13629366871770424, "grad_norm": 70.20340728759766, "learning_rate": 9.962858270638793e-06, "loss": 29.8203, "step": 67470 }, { "epoch": 0.13631386935038806, "grad_norm": 381.9365234375, "learning_rate": 9.962815790682825e-06, "loss": 20.189, "step": 67480 }, { "epoch": 0.13633406998307188, "grad_norm": 107.89916229248047, "learning_rate": 9.962773286538696e-06, "loss": 14.2388, "step": 67490 }, { "epoch": 0.1363542706157557, "grad_norm": 199.02011108398438, "learning_rate": 9.962730758206612e-06, "loss": 22.351, "step": 67500 }, { "epoch": 0.1363744712484395, "grad_norm": 252.8507080078125, "learning_rate": 9.962688205686778e-06, "loss": 24.9563, "step": 67510 }, { "epoch": 0.1363946718811233, "grad_norm": 82.89959716796875, "learning_rate": 9.962645628979406e-06, "loss": 28.4177, "step": 67520 }, { "epoch": 0.13641487251380713, "grad_norm": 251.78750610351562, "learning_rate": 9.962603028084699e-06, "loss": 17.1454, "step": 67530 }, { "epoch": 0.13643507314649095, "grad_norm": 409.2945251464844, "learning_rate": 9.962560403002868e-06, "loss": 34.0234, "step": 67540 }, { "epoch": 0.13645527377917477, "grad_norm": 206.96835327148438, "learning_rate": 9.96251775373412e-06, "loss": 20.7377, "step": 67550 }, { "epoch": 0.1364754744118586, "grad_norm": 234.7782440185547, "learning_rate": 9.962475080278662e-06, "loss": 26.476, "step": 67560 }, { "epoch": 0.1364956750445424, "grad_norm": 168.2493438720703, "learning_rate": 9.9624323826367e-06, "loss": 14.518, "step": 67570 }, { "epoch": 0.1365158756772262, "grad_norm": 350.9550476074219, "learning_rate": 9.962389660808447e-06, "loss": 25.0425, "step": 67580 }, { "epoch": 0.13653607630991002, "grad_norm": 330.09417724609375, "learning_rate": 9.96234691479411e-06, "loss": 41.2408, "step": 67590 }, { "epoch": 0.13655627694259384, "grad_norm": 289.4892578125, "learning_rate": 9.962304144593893e-06, "loss": 24.1793, "step": 67600 }, { "epoch": 0.13657647757527766, "grad_norm": 950.955810546875, "learning_rate": 9.962261350208008e-06, "loss": 29.5317, "step": 67610 }, { "epoch": 0.13659667820796148, "grad_norm": 311.17547607421875, "learning_rate": 9.962218531636664e-06, "loss": 49.982, "step": 67620 }, { "epoch": 0.1366168788406453, "grad_norm": 0.0, "learning_rate": 9.962175688880067e-06, "loss": 23.8503, "step": 67630 }, { "epoch": 0.1366370794733291, "grad_norm": 253.7162322998047, "learning_rate": 9.96213282193843e-06, "loss": 13.7119, "step": 67640 }, { "epoch": 0.1366572801060129, "grad_norm": 196.41546630859375, "learning_rate": 9.962089930811959e-06, "loss": 18.7869, "step": 67650 }, { "epoch": 0.13667748073869673, "grad_norm": 421.8287048339844, "learning_rate": 9.962047015500861e-06, "loss": 40.2657, "step": 67660 }, { "epoch": 0.13669768137138055, "grad_norm": 775.82373046875, "learning_rate": 9.96200407600535e-06, "loss": 31.8149, "step": 67670 }, { "epoch": 0.13671788200406437, "grad_norm": 286.5835266113281, "learning_rate": 9.961961112325633e-06, "loss": 23.9272, "step": 67680 }, { "epoch": 0.1367380826367482, "grad_norm": 141.4736328125, "learning_rate": 9.961918124461918e-06, "loss": 26.106, "step": 67690 }, { "epoch": 0.13675828326943198, "grad_norm": 232.66549682617188, "learning_rate": 9.961875112414417e-06, "loss": 25.6891, "step": 67700 }, { "epoch": 0.1367784839021158, "grad_norm": 109.69756317138672, "learning_rate": 9.961832076183337e-06, "loss": 27.3044, "step": 67710 }, { "epoch": 0.13679868453479962, "grad_norm": 315.5060729980469, "learning_rate": 9.96178901576889e-06, "loss": 17.0082, "step": 67720 }, { "epoch": 0.13681888516748344, "grad_norm": 326.779052734375, "learning_rate": 9.961745931171288e-06, "loss": 23.3058, "step": 67730 }, { "epoch": 0.13683908580016726, "grad_norm": 212.092529296875, "learning_rate": 9.961702822390735e-06, "loss": 13.0448, "step": 67740 }, { "epoch": 0.13685928643285108, "grad_norm": 118.62604522705078, "learning_rate": 9.961659689427444e-06, "loss": 39.4565, "step": 67750 }, { "epoch": 0.1368794870655349, "grad_norm": 235.76905822753906, "learning_rate": 9.961616532281626e-06, "loss": 34.9254, "step": 67760 }, { "epoch": 0.1368996876982187, "grad_norm": 83.66500091552734, "learning_rate": 9.961573350953491e-06, "loss": 21.2863, "step": 67770 }, { "epoch": 0.13691988833090252, "grad_norm": 175.03948974609375, "learning_rate": 9.96153014544325e-06, "loss": 37.4414, "step": 67780 }, { "epoch": 0.13694008896358634, "grad_norm": 376.23895263671875, "learning_rate": 9.961486915751114e-06, "loss": 24.6527, "step": 67790 }, { "epoch": 0.13696028959627016, "grad_norm": 359.23834228515625, "learning_rate": 9.96144366187729e-06, "loss": 23.528, "step": 67800 }, { "epoch": 0.13698049022895398, "grad_norm": 288.9600830078125, "learning_rate": 9.961400383821992e-06, "loss": 19.3785, "step": 67810 }, { "epoch": 0.1370006908616378, "grad_norm": 466.2603759765625, "learning_rate": 9.96135708158543e-06, "loss": 30.3506, "step": 67820 }, { "epoch": 0.1370208914943216, "grad_norm": 306.9299011230469, "learning_rate": 9.961313755167816e-06, "loss": 22.9855, "step": 67830 }, { "epoch": 0.1370410921270054, "grad_norm": 453.46270751953125, "learning_rate": 9.961270404569358e-06, "loss": 41.0135, "step": 67840 }, { "epoch": 0.13706129275968923, "grad_norm": 267.604736328125, "learning_rate": 9.961227029790272e-06, "loss": 18.5872, "step": 67850 }, { "epoch": 0.13708149339237305, "grad_norm": 507.6817626953125, "learning_rate": 9.961183630830768e-06, "loss": 22.7083, "step": 67860 }, { "epoch": 0.13710169402505687, "grad_norm": 260.7120361328125, "learning_rate": 9.961140207691055e-06, "loss": 34.8008, "step": 67870 }, { "epoch": 0.1371218946577407, "grad_norm": 263.5703125, "learning_rate": 9.961096760371349e-06, "loss": 30.9672, "step": 67880 }, { "epoch": 0.1371420952904245, "grad_norm": 783.326904296875, "learning_rate": 9.961053288871855e-06, "loss": 30.8685, "step": 67890 }, { "epoch": 0.1371622959231083, "grad_norm": 124.6601791381836, "learning_rate": 9.961009793192793e-06, "loss": 20.0264, "step": 67900 }, { "epoch": 0.13718249655579212, "grad_norm": 283.2763366699219, "learning_rate": 9.96096627333437e-06, "loss": 32.1768, "step": 67910 }, { "epoch": 0.13720269718847594, "grad_norm": 171.26925659179688, "learning_rate": 9.960922729296797e-06, "loss": 35.0729, "step": 67920 }, { "epoch": 0.13722289782115976, "grad_norm": 356.7204895019531, "learning_rate": 9.96087916108029e-06, "loss": 38.0921, "step": 67930 }, { "epoch": 0.13724309845384358, "grad_norm": 301.0563049316406, "learning_rate": 9.960835568685058e-06, "loss": 21.0514, "step": 67940 }, { "epoch": 0.1372632990865274, "grad_norm": 342.919189453125, "learning_rate": 9.960791952111318e-06, "loss": 28.223, "step": 67950 }, { "epoch": 0.1372834997192112, "grad_norm": 381.216796875, "learning_rate": 9.960748311359278e-06, "loss": 26.2811, "step": 67960 }, { "epoch": 0.137303700351895, "grad_norm": 157.9551239013672, "learning_rate": 9.96070464642915e-06, "loss": 12.4714, "step": 67970 }, { "epoch": 0.13732390098457883, "grad_norm": 376.7027893066406, "learning_rate": 9.960660957321153e-06, "loss": 46.9244, "step": 67980 }, { "epoch": 0.13734410161726265, "grad_norm": 281.2987365722656, "learning_rate": 9.960617244035495e-06, "loss": 23.0282, "step": 67990 }, { "epoch": 0.13736430224994647, "grad_norm": 522.558837890625, "learning_rate": 9.960573506572391e-06, "loss": 31.3913, "step": 68000 }, { "epoch": 0.1373845028826303, "grad_norm": 559.4723510742188, "learning_rate": 9.960529744932051e-06, "loss": 24.9448, "step": 68010 }, { "epoch": 0.13740470351531409, "grad_norm": 36.82872772216797, "learning_rate": 9.960485959114693e-06, "loss": 11.9957, "step": 68020 }, { "epoch": 0.1374249041479979, "grad_norm": 248.4458465576172, "learning_rate": 9.960442149120527e-06, "loss": 36.1138, "step": 68030 }, { "epoch": 0.13744510478068173, "grad_norm": 270.46575927734375, "learning_rate": 9.960398314949767e-06, "loss": 33.6767, "step": 68040 }, { "epoch": 0.13746530541336555, "grad_norm": 164.68907165527344, "learning_rate": 9.960354456602627e-06, "loss": 19.686, "step": 68050 }, { "epoch": 0.13748550604604937, "grad_norm": 283.68218994140625, "learning_rate": 9.960310574079324e-06, "loss": 24.8749, "step": 68060 }, { "epoch": 0.13750570667873319, "grad_norm": 205.39442443847656, "learning_rate": 9.960266667380065e-06, "loss": 18.4756, "step": 68070 }, { "epoch": 0.137525907311417, "grad_norm": 452.92547607421875, "learning_rate": 9.96022273650507e-06, "loss": 20.734, "step": 68080 }, { "epoch": 0.1375461079441008, "grad_norm": 204.71238708496094, "learning_rate": 9.96017878145455e-06, "loss": 40.597, "step": 68090 }, { "epoch": 0.13756630857678462, "grad_norm": 151.49459838867188, "learning_rate": 9.960134802228722e-06, "loss": 22.1096, "step": 68100 }, { "epoch": 0.13758650920946844, "grad_norm": 348.141845703125, "learning_rate": 9.960090798827798e-06, "loss": 19.0809, "step": 68110 }, { "epoch": 0.13760670984215226, "grad_norm": 105.11824798583984, "learning_rate": 9.960046771251991e-06, "loss": 27.6114, "step": 68120 }, { "epoch": 0.13762691047483608, "grad_norm": 0.0, "learning_rate": 9.96000271950152e-06, "loss": 21.1927, "step": 68130 }, { "epoch": 0.1376471111075199, "grad_norm": 394.193603515625, "learning_rate": 9.959958643576597e-06, "loss": 21.1724, "step": 68140 }, { "epoch": 0.1376673117402037, "grad_norm": 335.38751220703125, "learning_rate": 9.959914543477436e-06, "loss": 17.2723, "step": 68150 }, { "epoch": 0.1376875123728875, "grad_norm": 195.28472900390625, "learning_rate": 9.959870419204253e-06, "loss": 28.9812, "step": 68160 }, { "epoch": 0.13770771300557133, "grad_norm": 484.02642822265625, "learning_rate": 9.959826270757265e-06, "loss": 21.2144, "step": 68170 }, { "epoch": 0.13772791363825515, "grad_norm": 274.8529968261719, "learning_rate": 9.959782098136683e-06, "loss": 36.4288, "step": 68180 }, { "epoch": 0.13774811427093897, "grad_norm": 185.51658630371094, "learning_rate": 9.959737901342725e-06, "loss": 21.0721, "step": 68190 }, { "epoch": 0.1377683149036228, "grad_norm": 371.553955078125, "learning_rate": 9.959693680375608e-06, "loss": 18.1696, "step": 68200 }, { "epoch": 0.1377885155363066, "grad_norm": 119.95893859863281, "learning_rate": 9.959649435235543e-06, "loss": 27.1781, "step": 68210 }, { "epoch": 0.1378087161689904, "grad_norm": 450.22882080078125, "learning_rate": 9.95960516592275e-06, "loss": 33.1805, "step": 68220 }, { "epoch": 0.13782891680167422, "grad_norm": 124.0372085571289, "learning_rate": 9.959560872437443e-06, "loss": 17.368, "step": 68230 }, { "epoch": 0.13784911743435804, "grad_norm": 526.9246215820312, "learning_rate": 9.959516554779838e-06, "loss": 28.8138, "step": 68240 }, { "epoch": 0.13786931806704186, "grad_norm": 200.73521423339844, "learning_rate": 9.95947221295015e-06, "loss": 29.383, "step": 68250 }, { "epoch": 0.13788951869972568, "grad_norm": 318.7671203613281, "learning_rate": 9.959427846948595e-06, "loss": 16.232, "step": 68260 }, { "epoch": 0.1379097193324095, "grad_norm": 0.0, "learning_rate": 9.959383456775392e-06, "loss": 39.1111, "step": 68270 }, { "epoch": 0.1379299199650933, "grad_norm": 0.0, "learning_rate": 9.959339042430753e-06, "loss": 31.5578, "step": 68280 }, { "epoch": 0.13795012059777711, "grad_norm": 360.30126953125, "learning_rate": 9.9592946039149e-06, "loss": 25.2174, "step": 68290 }, { "epoch": 0.13797032123046093, "grad_norm": 327.7031555175781, "learning_rate": 9.959250141228046e-06, "loss": 13.5931, "step": 68300 }, { "epoch": 0.13799052186314475, "grad_norm": 424.2384948730469, "learning_rate": 9.959205654370406e-06, "loss": 45.6638, "step": 68310 }, { "epoch": 0.13801072249582857, "grad_norm": 469.1731872558594, "learning_rate": 9.959161143342201e-06, "loss": 31.371, "step": 68320 }, { "epoch": 0.1380309231285124, "grad_norm": 369.717529296875, "learning_rate": 9.959116608143647e-06, "loss": 32.8046, "step": 68330 }, { "epoch": 0.1380511237611962, "grad_norm": 184.7059783935547, "learning_rate": 9.959072048774958e-06, "loss": 25.3257, "step": 68340 }, { "epoch": 0.13807132439388, "grad_norm": 283.5360412597656, "learning_rate": 9.959027465236354e-06, "loss": 15.1024, "step": 68350 }, { "epoch": 0.13809152502656383, "grad_norm": 66.10023498535156, "learning_rate": 9.958982857528053e-06, "loss": 29.8591, "step": 68360 }, { "epoch": 0.13811172565924765, "grad_norm": 143.27935791015625, "learning_rate": 9.958938225650268e-06, "loss": 35.9243, "step": 68370 }, { "epoch": 0.13813192629193147, "grad_norm": 456.9259338378906, "learning_rate": 9.958893569603222e-06, "loss": 25.4754, "step": 68380 }, { "epoch": 0.1381521269246153, "grad_norm": 512.743408203125, "learning_rate": 9.958848889387129e-06, "loss": 21.9964, "step": 68390 }, { "epoch": 0.1381723275572991, "grad_norm": 493.124267578125, "learning_rate": 9.958804185002209e-06, "loss": 21.8836, "step": 68400 }, { "epoch": 0.1381925281899829, "grad_norm": 331.2524719238281, "learning_rate": 9.958759456448677e-06, "loss": 38.3586, "step": 68410 }, { "epoch": 0.13821272882266672, "grad_norm": 686.2259521484375, "learning_rate": 9.958714703726755e-06, "loss": 39.902, "step": 68420 }, { "epoch": 0.13823292945535054, "grad_norm": 165.9106903076172, "learning_rate": 9.958669926836658e-06, "loss": 17.1272, "step": 68430 }, { "epoch": 0.13825313008803436, "grad_norm": 109.25733947753906, "learning_rate": 9.958625125778606e-06, "loss": 25.4555, "step": 68440 }, { "epoch": 0.13827333072071818, "grad_norm": 88.76424407958984, "learning_rate": 9.958580300552816e-06, "loss": 25.0752, "step": 68450 }, { "epoch": 0.138293531353402, "grad_norm": 354.7678527832031, "learning_rate": 9.958535451159506e-06, "loss": 26.5965, "step": 68460 }, { "epoch": 0.1383137319860858, "grad_norm": 29.70665740966797, "learning_rate": 9.958490577598896e-06, "loss": 22.3942, "step": 68470 }, { "epoch": 0.1383339326187696, "grad_norm": 225.27627563476562, "learning_rate": 9.958445679871204e-06, "loss": 20.4128, "step": 68480 }, { "epoch": 0.13835413325145343, "grad_norm": 421.39459228515625, "learning_rate": 9.958400757976651e-06, "loss": 23.7933, "step": 68490 }, { "epoch": 0.13837433388413725, "grad_norm": 479.00628662109375, "learning_rate": 9.958355811915452e-06, "loss": 21.4033, "step": 68500 }, { "epoch": 0.13839453451682107, "grad_norm": 473.3777160644531, "learning_rate": 9.95831084168783e-06, "loss": 56.1437, "step": 68510 }, { "epoch": 0.1384147351495049, "grad_norm": 319.8925476074219, "learning_rate": 9.958265847294001e-06, "loss": 26.1188, "step": 68520 }, { "epoch": 0.1384349357821887, "grad_norm": 144.64608764648438, "learning_rate": 9.958220828734187e-06, "loss": 35.039, "step": 68530 }, { "epoch": 0.1384551364148725, "grad_norm": 199.89491271972656, "learning_rate": 9.958175786008605e-06, "loss": 11.5195, "step": 68540 }, { "epoch": 0.13847533704755632, "grad_norm": 345.8259582519531, "learning_rate": 9.958130719117476e-06, "loss": 27.1246, "step": 68550 }, { "epoch": 0.13849553768024014, "grad_norm": 108.21050262451172, "learning_rate": 9.958085628061018e-06, "loss": 21.6594, "step": 68560 }, { "epoch": 0.13851573831292396, "grad_norm": 136.68035888671875, "learning_rate": 9.958040512839453e-06, "loss": 22.5918, "step": 68570 }, { "epoch": 0.13853593894560778, "grad_norm": 320.9204406738281, "learning_rate": 9.957995373453e-06, "loss": 17.1508, "step": 68580 }, { "epoch": 0.1385561395782916, "grad_norm": 401.5325927734375, "learning_rate": 9.95795020990188e-06, "loss": 34.6839, "step": 68590 }, { "epoch": 0.1385763402109754, "grad_norm": 356.7091369628906, "learning_rate": 9.957905022186309e-06, "loss": 21.4468, "step": 68600 }, { "epoch": 0.13859654084365922, "grad_norm": 221.68397521972656, "learning_rate": 9.957859810306511e-06, "loss": 31.437, "step": 68610 }, { "epoch": 0.13861674147634304, "grad_norm": 588.0836791992188, "learning_rate": 9.957814574262707e-06, "loss": 22.1574, "step": 68620 }, { "epoch": 0.13863694210902686, "grad_norm": 538.992431640625, "learning_rate": 9.957769314055117e-06, "loss": 30.2449, "step": 68630 }, { "epoch": 0.13865714274171068, "grad_norm": 1206.0052490234375, "learning_rate": 9.957724029683958e-06, "loss": 14.9708, "step": 68640 }, { "epoch": 0.1386773433743945, "grad_norm": 743.46923828125, "learning_rate": 9.957678721149454e-06, "loss": 32.0315, "step": 68650 }, { "epoch": 0.1386975440070783, "grad_norm": 680.0125122070312, "learning_rate": 9.957633388451827e-06, "loss": 22.8376, "step": 68660 }, { "epoch": 0.1387177446397621, "grad_norm": 168.84088134765625, "learning_rate": 9.957588031591295e-06, "loss": 25.6897, "step": 68670 }, { "epoch": 0.13873794527244593, "grad_norm": 211.3870086669922, "learning_rate": 9.957542650568079e-06, "loss": 29.8711, "step": 68680 }, { "epoch": 0.13875814590512975, "grad_norm": 453.1075134277344, "learning_rate": 9.957497245382403e-06, "loss": 19.1895, "step": 68690 }, { "epoch": 0.13877834653781357, "grad_norm": 251.54505920410156, "learning_rate": 9.957451816034487e-06, "loss": 23.8377, "step": 68700 }, { "epoch": 0.1387985471704974, "grad_norm": 251.81326293945312, "learning_rate": 9.95740636252455e-06, "loss": 36.5005, "step": 68710 }, { "epoch": 0.1388187478031812, "grad_norm": 219.64144897460938, "learning_rate": 9.957360884852819e-06, "loss": 21.0923, "step": 68720 }, { "epoch": 0.138838948435865, "grad_norm": 494.73040771484375, "learning_rate": 9.95731538301951e-06, "loss": 35.5499, "step": 68730 }, { "epoch": 0.13885914906854882, "grad_norm": 357.3475646972656, "learning_rate": 9.957269857024847e-06, "loss": 25.4745, "step": 68740 }, { "epoch": 0.13887934970123264, "grad_norm": 111.81966400146484, "learning_rate": 9.957224306869053e-06, "loss": 23.9538, "step": 68750 }, { "epoch": 0.13889955033391646, "grad_norm": 313.6694641113281, "learning_rate": 9.957178732552348e-06, "loss": 21.5483, "step": 68760 }, { "epoch": 0.13891975096660028, "grad_norm": 210.10536193847656, "learning_rate": 9.957133134074955e-06, "loss": 15.3917, "step": 68770 }, { "epoch": 0.1389399515992841, "grad_norm": 234.70004272460938, "learning_rate": 9.957087511437099e-06, "loss": 46.4579, "step": 68780 }, { "epoch": 0.1389601522319679, "grad_norm": 92.30130004882812, "learning_rate": 9.957041864638997e-06, "loss": 15.0139, "step": 68790 }, { "epoch": 0.1389803528646517, "grad_norm": 234.37196350097656, "learning_rate": 9.956996193680874e-06, "loss": 26.7213, "step": 68800 }, { "epoch": 0.13900055349733553, "grad_norm": 587.3884887695312, "learning_rate": 9.956950498562954e-06, "loss": 25.3631, "step": 68810 }, { "epoch": 0.13902075413001935, "grad_norm": 227.07496643066406, "learning_rate": 9.956904779285457e-06, "loss": 19.7416, "step": 68820 }, { "epoch": 0.13904095476270317, "grad_norm": 912.9583740234375, "learning_rate": 9.956859035848608e-06, "loss": 35.1171, "step": 68830 }, { "epoch": 0.139061155395387, "grad_norm": 192.91769409179688, "learning_rate": 9.95681326825263e-06, "loss": 25.9823, "step": 68840 }, { "epoch": 0.1390813560280708, "grad_norm": 46.29120635986328, "learning_rate": 9.956767476497745e-06, "loss": 31.7498, "step": 68850 }, { "epoch": 0.1391015566607546, "grad_norm": 254.65963745117188, "learning_rate": 9.956721660584175e-06, "loss": 37.9559, "step": 68860 }, { "epoch": 0.13912175729343843, "grad_norm": 391.18975830078125, "learning_rate": 9.956675820512146e-06, "loss": 28.0916, "step": 68870 }, { "epoch": 0.13914195792612225, "grad_norm": 216.54083251953125, "learning_rate": 9.956629956281881e-06, "loss": 41.3316, "step": 68880 }, { "epoch": 0.13916215855880607, "grad_norm": 346.6646423339844, "learning_rate": 9.956584067893602e-06, "loss": 16.8361, "step": 68890 }, { "epoch": 0.13918235919148988, "grad_norm": 155.60604858398438, "learning_rate": 9.956538155347534e-06, "loss": 16.7181, "step": 68900 }, { "epoch": 0.1392025598241737, "grad_norm": 433.03240966796875, "learning_rate": 9.9564922186439e-06, "loss": 23.5861, "step": 68910 }, { "epoch": 0.1392227604568575, "grad_norm": 356.83111572265625, "learning_rate": 9.956446257782923e-06, "loss": 22.2751, "step": 68920 }, { "epoch": 0.13924296108954132, "grad_norm": 432.6631774902344, "learning_rate": 9.95640027276483e-06, "loss": 33.6632, "step": 68930 }, { "epoch": 0.13926316172222514, "grad_norm": 158.51043701171875, "learning_rate": 9.95635426358984e-06, "loss": 19.9657, "step": 68940 }, { "epoch": 0.13928336235490896, "grad_norm": 435.2838439941406, "learning_rate": 9.956308230258182e-06, "loss": 24.6148, "step": 68950 }, { "epoch": 0.13930356298759278, "grad_norm": 233.56649780273438, "learning_rate": 9.956262172770082e-06, "loss": 23.9593, "step": 68960 }, { "epoch": 0.1393237636202766, "grad_norm": 398.73590087890625, "learning_rate": 9.956216091125756e-06, "loss": 26.4968, "step": 68970 }, { "epoch": 0.1393439642529604, "grad_norm": 428.2207336425781, "learning_rate": 9.956169985325438e-06, "loss": 23.9693, "step": 68980 }, { "epoch": 0.1393641648856442, "grad_norm": 345.56536865234375, "learning_rate": 9.956123855369346e-06, "loss": 37.6976, "step": 68990 }, { "epoch": 0.13938436551832803, "grad_norm": 457.29168701171875, "learning_rate": 9.95607770125771e-06, "loss": 12.8326, "step": 69000 }, { "epoch": 0.13940456615101185, "grad_norm": 134.96719360351562, "learning_rate": 9.95603152299075e-06, "loss": 34.9476, "step": 69010 }, { "epoch": 0.13942476678369567, "grad_norm": 192.50804138183594, "learning_rate": 9.955985320568696e-06, "loss": 16.537, "step": 69020 }, { "epoch": 0.1394449674163795, "grad_norm": 254.4961395263672, "learning_rate": 9.955939093991767e-06, "loss": 22.2549, "step": 69030 }, { "epoch": 0.1394651680490633, "grad_norm": 149.44158935546875, "learning_rate": 9.955892843260195e-06, "loss": 28.4095, "step": 69040 }, { "epoch": 0.1394853686817471, "grad_norm": 267.1541442871094, "learning_rate": 9.955846568374201e-06, "loss": 24.1104, "step": 69050 }, { "epoch": 0.13950556931443092, "grad_norm": 552.9764404296875, "learning_rate": 9.955800269334013e-06, "loss": 31.6535, "step": 69060 }, { "epoch": 0.13952576994711474, "grad_norm": 165.5390625, "learning_rate": 9.955753946139855e-06, "loss": 30.4806, "step": 69070 }, { "epoch": 0.13954597057979856, "grad_norm": 193.80189514160156, "learning_rate": 9.955707598791952e-06, "loss": 16.8974, "step": 69080 }, { "epoch": 0.13956617121248238, "grad_norm": 191.84228515625, "learning_rate": 9.955661227290531e-06, "loss": 47.4134, "step": 69090 }, { "epoch": 0.1395863718451662, "grad_norm": 213.86045837402344, "learning_rate": 9.95561483163582e-06, "loss": 23.5645, "step": 69100 }, { "epoch": 0.13960657247785, "grad_norm": 552.95361328125, "learning_rate": 9.955568411828043e-06, "loss": 27.9251, "step": 69110 }, { "epoch": 0.13962677311053381, "grad_norm": 327.0218200683594, "learning_rate": 9.955521967867427e-06, "loss": 34.2735, "step": 69120 }, { "epoch": 0.13964697374321763, "grad_norm": 163.06100463867188, "learning_rate": 9.955475499754197e-06, "loss": 11.1267, "step": 69130 }, { "epoch": 0.13966717437590145, "grad_norm": 75.47665405273438, "learning_rate": 9.955429007488582e-06, "loss": 13.3311, "step": 69140 }, { "epoch": 0.13968737500858527, "grad_norm": 317.1898193359375, "learning_rate": 9.955382491070806e-06, "loss": 26.5921, "step": 69150 }, { "epoch": 0.1397075756412691, "grad_norm": 138.806640625, "learning_rate": 9.955335950501097e-06, "loss": 11.6847, "step": 69160 }, { "epoch": 0.13972777627395291, "grad_norm": 561.7401123046875, "learning_rate": 9.955289385779681e-06, "loss": 35.7363, "step": 69170 }, { "epoch": 0.1397479769066367, "grad_norm": 304.19293212890625, "learning_rate": 9.955242796906785e-06, "loss": 17.0322, "step": 69180 }, { "epoch": 0.13976817753932053, "grad_norm": 687.1299438476562, "learning_rate": 9.955196183882637e-06, "loss": 35.2047, "step": 69190 }, { "epoch": 0.13978837817200435, "grad_norm": 299.2028503417969, "learning_rate": 9.955149546707465e-06, "loss": 23.9034, "step": 69200 }, { "epoch": 0.13980857880468817, "grad_norm": 33.92219161987305, "learning_rate": 9.955102885381494e-06, "loss": 23.1121, "step": 69210 }, { "epoch": 0.13982877943737199, "grad_norm": 302.9860534667969, "learning_rate": 9.955056199904953e-06, "loss": 17.8332, "step": 69220 }, { "epoch": 0.1398489800700558, "grad_norm": 236.273681640625, "learning_rate": 9.955009490278069e-06, "loss": 24.0247, "step": 69230 }, { "epoch": 0.1398691807027396, "grad_norm": 177.77757263183594, "learning_rate": 9.95496275650107e-06, "loss": 34.4826, "step": 69240 }, { "epoch": 0.13988938133542342, "grad_norm": 264.8266906738281, "learning_rate": 9.954915998574182e-06, "loss": 8.6536, "step": 69250 }, { "epoch": 0.13990958196810724, "grad_norm": 83.99262237548828, "learning_rate": 9.954869216497636e-06, "loss": 19.3348, "step": 69260 }, { "epoch": 0.13992978260079106, "grad_norm": 4.3098344802856445, "learning_rate": 9.954822410271657e-06, "loss": 26.9079, "step": 69270 }, { "epoch": 0.13994998323347488, "grad_norm": 405.73638916015625, "learning_rate": 9.954775579896476e-06, "loss": 36.804, "step": 69280 }, { "epoch": 0.1399701838661587, "grad_norm": 177.8415069580078, "learning_rate": 9.954728725372319e-06, "loss": 16.6372, "step": 69290 }, { "epoch": 0.1399903844988425, "grad_norm": 344.195068359375, "learning_rate": 9.954681846699414e-06, "loss": 14.5981, "step": 69300 }, { "epoch": 0.1400105851315263, "grad_norm": 179.42372131347656, "learning_rate": 9.954634943877993e-06, "loss": 25.9458, "step": 69310 }, { "epoch": 0.14003078576421013, "grad_norm": 211.78524780273438, "learning_rate": 9.95458801690828e-06, "loss": 37.4605, "step": 69320 }, { "epoch": 0.14005098639689395, "grad_norm": 238.4788360595703, "learning_rate": 9.954541065790509e-06, "loss": 18.9446, "step": 69330 }, { "epoch": 0.14007118702957777, "grad_norm": 138.6262969970703, "learning_rate": 9.954494090524903e-06, "loss": 24.5888, "step": 69340 }, { "epoch": 0.1400913876622616, "grad_norm": 886.2630615234375, "learning_rate": 9.954447091111695e-06, "loss": 29.6083, "step": 69350 }, { "epoch": 0.1401115882949454, "grad_norm": 395.9403381347656, "learning_rate": 9.95440006755111e-06, "loss": 29.363, "step": 69360 }, { "epoch": 0.1401317889276292, "grad_norm": 128.65089416503906, "learning_rate": 9.954353019843384e-06, "loss": 32.1163, "step": 69370 }, { "epoch": 0.14015198956031302, "grad_norm": 860.646240234375, "learning_rate": 9.95430594798874e-06, "loss": 43.8206, "step": 69380 }, { "epoch": 0.14017219019299684, "grad_norm": 0.0, "learning_rate": 9.954258851987411e-06, "loss": 12.0305, "step": 69390 }, { "epoch": 0.14019239082568066, "grad_norm": 262.7568054199219, "learning_rate": 9.954211731839623e-06, "loss": 35.8685, "step": 69400 }, { "epoch": 0.14021259145836448, "grad_norm": 61.86322784423828, "learning_rate": 9.95416458754561e-06, "loss": 25.69, "step": 69410 }, { "epoch": 0.1402327920910483, "grad_norm": 0.0, "learning_rate": 9.954117419105599e-06, "loss": 20.799, "step": 69420 }, { "epoch": 0.1402529927237321, "grad_norm": 216.55343627929688, "learning_rate": 9.95407022651982e-06, "loss": 41.214, "step": 69430 }, { "epoch": 0.14027319335641592, "grad_norm": 131.9139862060547, "learning_rate": 9.954023009788505e-06, "loss": 17.082, "step": 69440 }, { "epoch": 0.14029339398909974, "grad_norm": 655.879638671875, "learning_rate": 9.953975768911881e-06, "loss": 34.4271, "step": 69450 }, { "epoch": 0.14031359462178356, "grad_norm": 890.6541748046875, "learning_rate": 9.953928503890181e-06, "loss": 23.1937, "step": 69460 }, { "epoch": 0.14033379525446738, "grad_norm": 55.621463775634766, "learning_rate": 9.953881214723636e-06, "loss": 28.678, "step": 69470 }, { "epoch": 0.1403539958871512, "grad_norm": 156.36248779296875, "learning_rate": 9.95383390141247e-06, "loss": 26.7247, "step": 69480 }, { "epoch": 0.14037419651983502, "grad_norm": 169.95513916015625, "learning_rate": 9.953786563956923e-06, "loss": 26.2841, "step": 69490 }, { "epoch": 0.1403943971525188, "grad_norm": 479.8801574707031, "learning_rate": 9.953739202357219e-06, "loss": 41.0713, "step": 69500 }, { "epoch": 0.14041459778520263, "grad_norm": 483.7135009765625, "learning_rate": 9.953691816613592e-06, "loss": 28.0718, "step": 69510 }, { "epoch": 0.14043479841788645, "grad_norm": 473.8107604980469, "learning_rate": 9.95364440672627e-06, "loss": 23.3146, "step": 69520 }, { "epoch": 0.14045499905057027, "grad_norm": 359.591796875, "learning_rate": 9.953596972695487e-06, "loss": 19.0019, "step": 69530 }, { "epoch": 0.1404751996832541, "grad_norm": 414.6611328125, "learning_rate": 9.953549514521474e-06, "loss": 29.429, "step": 69540 }, { "epoch": 0.1404954003159379, "grad_norm": 262.8575744628906, "learning_rate": 9.953502032204461e-06, "loss": 25.172, "step": 69550 }, { "epoch": 0.1405156009486217, "grad_norm": 0.0, "learning_rate": 9.95345452574468e-06, "loss": 32.0321, "step": 69560 }, { "epoch": 0.14053580158130552, "grad_norm": 155.2565155029297, "learning_rate": 9.95340699514236e-06, "loss": 27.8357, "step": 69570 }, { "epoch": 0.14055600221398934, "grad_norm": 428.5212097167969, "learning_rate": 9.953359440397738e-06, "loss": 27.3436, "step": 69580 }, { "epoch": 0.14057620284667316, "grad_norm": 819.8240966796875, "learning_rate": 9.953311861511043e-06, "loss": 20.5971, "step": 69590 }, { "epoch": 0.14059640347935698, "grad_norm": 221.31495666503906, "learning_rate": 9.953264258482505e-06, "loss": 21.8865, "step": 69600 }, { "epoch": 0.1406166041120408, "grad_norm": 181.98626708984375, "learning_rate": 9.953216631312358e-06, "loss": 19.1573, "step": 69610 }, { "epoch": 0.1406368047447246, "grad_norm": 136.6560516357422, "learning_rate": 9.953168980000836e-06, "loss": 26.8879, "step": 69620 }, { "epoch": 0.1406570053774084, "grad_norm": 141.8535614013672, "learning_rate": 9.953121304548167e-06, "loss": 19.9271, "step": 69630 }, { "epoch": 0.14067720601009223, "grad_norm": 242.22036743164062, "learning_rate": 9.953073604954586e-06, "loss": 41.7273, "step": 69640 }, { "epoch": 0.14069740664277605, "grad_norm": 163.50904846191406, "learning_rate": 9.953025881220325e-06, "loss": 24.1223, "step": 69650 }, { "epoch": 0.14071760727545987, "grad_norm": 346.74041748046875, "learning_rate": 9.952978133345616e-06, "loss": 28.5577, "step": 69660 }, { "epoch": 0.1407378079081437, "grad_norm": 151.51898193359375, "learning_rate": 9.952930361330694e-06, "loss": 22.0834, "step": 69670 }, { "epoch": 0.1407580085408275, "grad_norm": 262.0771789550781, "learning_rate": 9.952882565175788e-06, "loss": 38.5018, "step": 69680 }, { "epoch": 0.1407782091735113, "grad_norm": 611.2744140625, "learning_rate": 9.952834744881135e-06, "loss": 21.2858, "step": 69690 }, { "epoch": 0.14079840980619512, "grad_norm": 831.6581420898438, "learning_rate": 9.952786900446964e-06, "loss": 34.0255, "step": 69700 }, { "epoch": 0.14081861043887894, "grad_norm": 0.0, "learning_rate": 9.952739031873513e-06, "loss": 16.1337, "step": 69710 }, { "epoch": 0.14083881107156276, "grad_norm": 408.74676513671875, "learning_rate": 9.952691139161012e-06, "loss": 28.4654, "step": 69720 }, { "epoch": 0.14085901170424658, "grad_norm": 407.21820068359375, "learning_rate": 9.952643222309694e-06, "loss": 14.0785, "step": 69730 }, { "epoch": 0.1408792123369304, "grad_norm": 404.1986999511719, "learning_rate": 9.952595281319794e-06, "loss": 21.3055, "step": 69740 }, { "epoch": 0.1408994129696142, "grad_norm": 312.14544677734375, "learning_rate": 9.952547316191545e-06, "loss": 11.6818, "step": 69750 }, { "epoch": 0.14091961360229802, "grad_norm": 356.2087707519531, "learning_rate": 9.95249932692518e-06, "loss": 32.6752, "step": 69760 }, { "epoch": 0.14093981423498184, "grad_norm": 241.1984100341797, "learning_rate": 9.952451313520937e-06, "loss": 35.1928, "step": 69770 }, { "epoch": 0.14096001486766566, "grad_norm": 313.96307373046875, "learning_rate": 9.952403275979046e-06, "loss": 34.2784, "step": 69780 }, { "epoch": 0.14098021550034948, "grad_norm": 438.9270324707031, "learning_rate": 9.95235521429974e-06, "loss": 33.3401, "step": 69790 }, { "epoch": 0.1410004161330333, "grad_norm": 147.7724609375, "learning_rate": 9.952307128483257e-06, "loss": 12.4964, "step": 69800 }, { "epoch": 0.14102061676571712, "grad_norm": 307.1788635253906, "learning_rate": 9.952259018529829e-06, "loss": 35.5531, "step": 69810 }, { "epoch": 0.1410408173984009, "grad_norm": 353.0546569824219, "learning_rate": 9.952210884439693e-06, "loss": 19.0095, "step": 69820 }, { "epoch": 0.14106101803108473, "grad_norm": 397.1801452636719, "learning_rate": 9.95216272621308e-06, "loss": 16.7725, "step": 69830 }, { "epoch": 0.14108121866376855, "grad_norm": 251.0044403076172, "learning_rate": 9.952114543850227e-06, "loss": 25.4433, "step": 69840 }, { "epoch": 0.14110141929645237, "grad_norm": 238.82403564453125, "learning_rate": 9.952066337351367e-06, "loss": 13.6478, "step": 69850 }, { "epoch": 0.1411216199291362, "grad_norm": 161.5249481201172, "learning_rate": 9.952018106716737e-06, "loss": 23.2142, "step": 69860 }, { "epoch": 0.14114182056182, "grad_norm": 906.1398315429688, "learning_rate": 9.951969851946573e-06, "loss": 36.48, "step": 69870 }, { "epoch": 0.1411620211945038, "grad_norm": 65.00243377685547, "learning_rate": 9.951921573041107e-06, "loss": 38.1297, "step": 69880 }, { "epoch": 0.14118222182718762, "grad_norm": 289.15576171875, "learning_rate": 9.951873270000576e-06, "loss": 27.616, "step": 69890 }, { "epoch": 0.14120242245987144, "grad_norm": 255.79307556152344, "learning_rate": 9.951824942825215e-06, "loss": 39.4631, "step": 69900 }, { "epoch": 0.14122262309255526, "grad_norm": 298.6893310546875, "learning_rate": 9.951776591515262e-06, "loss": 31.8582, "step": 69910 }, { "epoch": 0.14124282372523908, "grad_norm": 0.0, "learning_rate": 9.951728216070949e-06, "loss": 12.699, "step": 69920 }, { "epoch": 0.1412630243579229, "grad_norm": 209.81834411621094, "learning_rate": 9.951679816492513e-06, "loss": 21.2883, "step": 69930 }, { "epoch": 0.1412832249906067, "grad_norm": 182.9413299560547, "learning_rate": 9.951631392780189e-06, "loss": 20.2108, "step": 69940 }, { "epoch": 0.1413034256232905, "grad_norm": 416.6510314941406, "learning_rate": 9.951582944934215e-06, "loss": 36.5348, "step": 69950 }, { "epoch": 0.14132362625597433, "grad_norm": 491.0602111816406, "learning_rate": 9.951534472954826e-06, "loss": 32.9515, "step": 69960 }, { "epoch": 0.14134382688865815, "grad_norm": 168.59072875976562, "learning_rate": 9.95148597684226e-06, "loss": 34.53, "step": 69970 }, { "epoch": 0.14136402752134197, "grad_norm": 156.453369140625, "learning_rate": 9.951437456596751e-06, "loss": 13.581, "step": 69980 }, { "epoch": 0.1413842281540258, "grad_norm": 709.4266357421875, "learning_rate": 9.951388912218536e-06, "loss": 37.719, "step": 69990 }, { "epoch": 0.1414044287867096, "grad_norm": 376.1303405761719, "learning_rate": 9.951340343707852e-06, "loss": 18.9507, "step": 70000 }, { "epoch": 0.1414246294193934, "grad_norm": 189.286865234375, "learning_rate": 9.951291751064937e-06, "loss": 23.8695, "step": 70010 }, { "epoch": 0.14144483005207723, "grad_norm": 146.30877685546875, "learning_rate": 9.951243134290025e-06, "loss": 10.1796, "step": 70020 }, { "epoch": 0.14146503068476105, "grad_norm": 488.5118408203125, "learning_rate": 9.951194493383355e-06, "loss": 31.4458, "step": 70030 }, { "epoch": 0.14148523131744487, "grad_norm": 358.5882568359375, "learning_rate": 9.951145828345163e-06, "loss": 26.5309, "step": 70040 }, { "epoch": 0.14150543195012869, "grad_norm": 435.0777587890625, "learning_rate": 9.951097139175688e-06, "loss": 31.5678, "step": 70050 }, { "epoch": 0.1415256325828125, "grad_norm": 435.764892578125, "learning_rate": 9.951048425875165e-06, "loss": 31.2178, "step": 70060 }, { "epoch": 0.1415458332154963, "grad_norm": 75.78207397460938, "learning_rate": 9.950999688443833e-06, "loss": 9.9373, "step": 70070 }, { "epoch": 0.14156603384818012, "grad_norm": 416.3119201660156, "learning_rate": 9.950950926881928e-06, "loss": 28.7786, "step": 70080 }, { "epoch": 0.14158623448086394, "grad_norm": 515.712158203125, "learning_rate": 9.950902141189691e-06, "loss": 29.6586, "step": 70090 }, { "epoch": 0.14160643511354776, "grad_norm": 924.7127075195312, "learning_rate": 9.950853331367356e-06, "loss": 29.7788, "step": 70100 }, { "epoch": 0.14162663574623158, "grad_norm": 128.7560577392578, "learning_rate": 9.95080449741516e-06, "loss": 27.1906, "step": 70110 }, { "epoch": 0.1416468363789154, "grad_norm": 437.0798034667969, "learning_rate": 9.950755639333347e-06, "loss": 29.8028, "step": 70120 }, { "epoch": 0.1416670370115992, "grad_norm": 356.2104797363281, "learning_rate": 9.95070675712215e-06, "loss": 24.5837, "step": 70130 }, { "epoch": 0.141687237644283, "grad_norm": 223.80706787109375, "learning_rate": 9.950657850781809e-06, "loss": 15.3837, "step": 70140 }, { "epoch": 0.14170743827696683, "grad_norm": 190.4036407470703, "learning_rate": 9.95060892031256e-06, "loss": 37.3934, "step": 70150 }, { "epoch": 0.14172763890965065, "grad_norm": 345.3880310058594, "learning_rate": 9.950559965714647e-06, "loss": 52.0188, "step": 70160 }, { "epoch": 0.14174783954233447, "grad_norm": 436.74578857421875, "learning_rate": 9.950510986988304e-06, "loss": 27.9825, "step": 70170 }, { "epoch": 0.1417680401750183, "grad_norm": 112.69490814208984, "learning_rate": 9.95046198413377e-06, "loss": 19.529, "step": 70180 }, { "epoch": 0.1417882408077021, "grad_norm": 311.27862548828125, "learning_rate": 9.950412957151286e-06, "loss": 30.6314, "step": 70190 }, { "epoch": 0.1418084414403859, "grad_norm": 0.0, "learning_rate": 9.950363906041089e-06, "loss": 23.8304, "step": 70200 }, { "epoch": 0.14182864207306972, "grad_norm": 306.214599609375, "learning_rate": 9.950314830803418e-06, "loss": 26.5467, "step": 70210 }, { "epoch": 0.14184884270575354, "grad_norm": 552.8003540039062, "learning_rate": 9.950265731438513e-06, "loss": 32.6233, "step": 70220 }, { "epoch": 0.14186904333843736, "grad_norm": 515.1917114257812, "learning_rate": 9.950216607946614e-06, "loss": 37.6608, "step": 70230 }, { "epoch": 0.14188924397112118, "grad_norm": 479.134765625, "learning_rate": 9.95016746032796e-06, "loss": 41.3976, "step": 70240 }, { "epoch": 0.141909444603805, "grad_norm": 39.89117431640625, "learning_rate": 9.95011828858279e-06, "loss": 30.3669, "step": 70250 }, { "epoch": 0.1419296452364888, "grad_norm": 96.72360229492188, "learning_rate": 9.950069092711342e-06, "loss": 19.0105, "step": 70260 }, { "epoch": 0.14194984586917261, "grad_norm": 13.864790916442871, "learning_rate": 9.950019872713858e-06, "loss": 21.8689, "step": 70270 }, { "epoch": 0.14197004650185643, "grad_norm": 124.76101684570312, "learning_rate": 9.94997062859058e-06, "loss": 14.5501, "step": 70280 }, { "epoch": 0.14199024713454025, "grad_norm": 447.6378173828125, "learning_rate": 9.949921360341743e-06, "loss": 23.9222, "step": 70290 }, { "epoch": 0.14201044776722407, "grad_norm": 7.357741832733154, "learning_rate": 9.94987206796759e-06, "loss": 24.3004, "step": 70300 }, { "epoch": 0.1420306483999079, "grad_norm": 149.4551239013672, "learning_rate": 9.949822751468364e-06, "loss": 19.9849, "step": 70310 }, { "epoch": 0.14205084903259171, "grad_norm": 60.981346130371094, "learning_rate": 9.949773410844299e-06, "loss": 32.2566, "step": 70320 }, { "epoch": 0.1420710496652755, "grad_norm": 262.94219970703125, "learning_rate": 9.94972404609564e-06, "loss": 28.9272, "step": 70330 }, { "epoch": 0.14209125029795933, "grad_norm": 296.5335388183594, "learning_rate": 9.949674657222624e-06, "loss": 22.7992, "step": 70340 }, { "epoch": 0.14211145093064315, "grad_norm": 1185.763427734375, "learning_rate": 9.949625244225496e-06, "loss": 23.4008, "step": 70350 }, { "epoch": 0.14213165156332697, "grad_norm": 389.3836364746094, "learning_rate": 9.949575807104494e-06, "loss": 21.1186, "step": 70360 }, { "epoch": 0.1421518521960108, "grad_norm": 935.9951171875, "learning_rate": 9.94952634585986e-06, "loss": 36.8096, "step": 70370 }, { "epoch": 0.1421720528286946, "grad_norm": 50.444828033447266, "learning_rate": 9.949476860491836e-06, "loss": 50.3079, "step": 70380 }, { "epoch": 0.1421922534613784, "grad_norm": 238.8971405029297, "learning_rate": 9.949427351000662e-06, "loss": 28.2718, "step": 70390 }, { "epoch": 0.14221245409406222, "grad_norm": 170.95730590820312, "learning_rate": 9.94937781738658e-06, "loss": 20.2931, "step": 70400 }, { "epoch": 0.14223265472674604, "grad_norm": 161.15362548828125, "learning_rate": 9.949328259649828e-06, "loss": 13.2333, "step": 70410 }, { "epoch": 0.14225285535942986, "grad_norm": 351.4736022949219, "learning_rate": 9.949278677790653e-06, "loss": 13.0054, "step": 70420 }, { "epoch": 0.14227305599211368, "grad_norm": 192.16064453125, "learning_rate": 9.949229071809294e-06, "loss": 19.426, "step": 70430 }, { "epoch": 0.1422932566247975, "grad_norm": 354.9193420410156, "learning_rate": 9.949179441705992e-06, "loss": 37.3764, "step": 70440 }, { "epoch": 0.1423134572574813, "grad_norm": 81.73265838623047, "learning_rate": 9.949129787480988e-06, "loss": 20.535, "step": 70450 }, { "epoch": 0.1423336578901651, "grad_norm": 317.515869140625, "learning_rate": 9.949080109134528e-06, "loss": 29.8027, "step": 70460 }, { "epoch": 0.14235385852284893, "grad_norm": 217.76658630371094, "learning_rate": 9.949030406666852e-06, "loss": 27.2614, "step": 70470 }, { "epoch": 0.14237405915553275, "grad_norm": 169.57925415039062, "learning_rate": 9.948980680078199e-06, "loss": 31.9568, "step": 70480 }, { "epoch": 0.14239425978821657, "grad_norm": 221.68858337402344, "learning_rate": 9.948930929368818e-06, "loss": 15.3627, "step": 70490 }, { "epoch": 0.1424144604209004, "grad_norm": 510.9658508300781, "learning_rate": 9.948881154538946e-06, "loss": 31.4757, "step": 70500 }, { "epoch": 0.1424346610535842, "grad_norm": 676.5675659179688, "learning_rate": 9.948831355588828e-06, "loss": 25.8641, "step": 70510 }, { "epoch": 0.142454861686268, "grad_norm": 347.7689208984375, "learning_rate": 9.948781532518706e-06, "loss": 25.3545, "step": 70520 }, { "epoch": 0.14247506231895182, "grad_norm": 176.4119110107422, "learning_rate": 9.948731685328823e-06, "loss": 22.6806, "step": 70530 }, { "epoch": 0.14249526295163564, "grad_norm": 125.42620849609375, "learning_rate": 9.948681814019421e-06, "loss": 19.0642, "step": 70540 }, { "epoch": 0.14251546358431946, "grad_norm": 231.6793212890625, "learning_rate": 9.948631918590746e-06, "loss": 28.3477, "step": 70550 }, { "epoch": 0.14253566421700328, "grad_norm": 603.5516357421875, "learning_rate": 9.948581999043038e-06, "loss": 36.6601, "step": 70560 }, { "epoch": 0.1425558648496871, "grad_norm": 829.0869140625, "learning_rate": 9.948532055376541e-06, "loss": 40.3321, "step": 70570 }, { "epoch": 0.1425760654823709, "grad_norm": 565.1549682617188, "learning_rate": 9.9484820875915e-06, "loss": 25.0955, "step": 70580 }, { "epoch": 0.14259626611505472, "grad_norm": 237.41543579101562, "learning_rate": 9.948432095688157e-06, "loss": 21.3993, "step": 70590 }, { "epoch": 0.14261646674773854, "grad_norm": 247.08192443847656, "learning_rate": 9.948382079666756e-06, "loss": 18.0309, "step": 70600 }, { "epoch": 0.14263666738042236, "grad_norm": 268.5989074707031, "learning_rate": 9.948332039527541e-06, "loss": 28.8816, "step": 70610 }, { "epoch": 0.14265686801310618, "grad_norm": 341.5416564941406, "learning_rate": 9.948281975270758e-06, "loss": 23.9713, "step": 70620 }, { "epoch": 0.14267706864579, "grad_norm": 224.958740234375, "learning_rate": 9.948231886896646e-06, "loss": 14.8055, "step": 70630 }, { "epoch": 0.14269726927847382, "grad_norm": 868.9297485351562, "learning_rate": 9.948181774405453e-06, "loss": 42.7075, "step": 70640 }, { "epoch": 0.1427174699111576, "grad_norm": 44.96022415161133, "learning_rate": 9.94813163779742e-06, "loss": 25.1508, "step": 70650 }, { "epoch": 0.14273767054384143, "grad_norm": 342.5815124511719, "learning_rate": 9.948081477072797e-06, "loss": 23.0206, "step": 70660 }, { "epoch": 0.14275787117652525, "grad_norm": 115.79615783691406, "learning_rate": 9.948031292231823e-06, "loss": 40.8064, "step": 70670 }, { "epoch": 0.14277807180920907, "grad_norm": 331.35760498046875, "learning_rate": 9.947981083274747e-06, "loss": 22.7461, "step": 70680 }, { "epoch": 0.1427982724418929, "grad_norm": 66.27848052978516, "learning_rate": 9.947930850201808e-06, "loss": 22.0032, "step": 70690 }, { "epoch": 0.1428184730745767, "grad_norm": 246.4425811767578, "learning_rate": 9.947880593013256e-06, "loss": 16.7426, "step": 70700 }, { "epoch": 0.1428386737072605, "grad_norm": 216.62318420410156, "learning_rate": 9.947830311709333e-06, "loss": 19.8728, "step": 70710 }, { "epoch": 0.14285887433994432, "grad_norm": 156.56211853027344, "learning_rate": 9.947780006290287e-06, "loss": 31.9913, "step": 70720 }, { "epoch": 0.14287907497262814, "grad_norm": 134.0349578857422, "learning_rate": 9.947729676756359e-06, "loss": 17.8019, "step": 70730 }, { "epoch": 0.14289927560531196, "grad_norm": 293.2822570800781, "learning_rate": 9.947679323107798e-06, "loss": 32.2131, "step": 70740 }, { "epoch": 0.14291947623799578, "grad_norm": 227.1851806640625, "learning_rate": 9.947628945344849e-06, "loss": 34.7947, "step": 70750 }, { "epoch": 0.1429396768706796, "grad_norm": 128.45733642578125, "learning_rate": 9.947578543467755e-06, "loss": 20.3649, "step": 70760 }, { "epoch": 0.1429598775033634, "grad_norm": 303.4639587402344, "learning_rate": 9.947528117476764e-06, "loss": 32.3845, "step": 70770 }, { "epoch": 0.1429800781360472, "grad_norm": 494.112548828125, "learning_rate": 9.94747766737212e-06, "loss": 35.6072, "step": 70780 }, { "epoch": 0.14300027876873103, "grad_norm": 0.0, "learning_rate": 9.94742719315407e-06, "loss": 30.3195, "step": 70790 }, { "epoch": 0.14302047940141485, "grad_norm": 225.24563598632812, "learning_rate": 9.947376694822861e-06, "loss": 22.8075, "step": 70800 }, { "epoch": 0.14304068003409867, "grad_norm": 277.6125793457031, "learning_rate": 9.947326172378736e-06, "loss": 25.4156, "step": 70810 }, { "epoch": 0.1430608806667825, "grad_norm": 523.2900390625, "learning_rate": 9.947275625821947e-06, "loss": 33.9644, "step": 70820 }, { "epoch": 0.1430810812994663, "grad_norm": 23.58151626586914, "learning_rate": 9.947225055152735e-06, "loss": 27.0659, "step": 70830 }, { "epoch": 0.1431012819321501, "grad_norm": 1.915338158607483, "learning_rate": 9.947174460371347e-06, "loss": 29.238, "step": 70840 }, { "epoch": 0.14312148256483392, "grad_norm": 172.40159606933594, "learning_rate": 9.947123841478032e-06, "loss": 34.8935, "step": 70850 }, { "epoch": 0.14314168319751774, "grad_norm": 318.7747497558594, "learning_rate": 9.947073198473034e-06, "loss": 30.5402, "step": 70860 }, { "epoch": 0.14316188383020156, "grad_norm": 335.532470703125, "learning_rate": 9.947022531356602e-06, "loss": 25.74, "step": 70870 }, { "epoch": 0.14318208446288538, "grad_norm": 161.0736846923828, "learning_rate": 9.946971840128982e-06, "loss": 23.0715, "step": 70880 }, { "epoch": 0.1432022850955692, "grad_norm": 1175.650634765625, "learning_rate": 9.94692112479042e-06, "loss": 36.9286, "step": 70890 }, { "epoch": 0.143222485728253, "grad_norm": 649.8682861328125, "learning_rate": 9.946870385341167e-06, "loss": 25.266, "step": 70900 }, { "epoch": 0.14324268636093682, "grad_norm": 396.2052307128906, "learning_rate": 9.946819621781467e-06, "loss": 25.746, "step": 70910 }, { "epoch": 0.14326288699362064, "grad_norm": 512.9754028320312, "learning_rate": 9.946768834111568e-06, "loss": 28.1627, "step": 70920 }, { "epoch": 0.14328308762630446, "grad_norm": 334.01397705078125, "learning_rate": 9.946718022331715e-06, "loss": 25.4227, "step": 70930 }, { "epoch": 0.14330328825898828, "grad_norm": 447.7845764160156, "learning_rate": 9.946667186442162e-06, "loss": 24.355, "step": 70940 }, { "epoch": 0.1433234888916721, "grad_norm": 660.2554931640625, "learning_rate": 9.946616326443153e-06, "loss": 29.95, "step": 70950 }, { "epoch": 0.14334368952435592, "grad_norm": 140.41163635253906, "learning_rate": 9.946565442334935e-06, "loss": 22.7366, "step": 70960 }, { "epoch": 0.1433638901570397, "grad_norm": 246.50221252441406, "learning_rate": 9.946514534117755e-06, "loss": 24.672, "step": 70970 }, { "epoch": 0.14338409078972353, "grad_norm": 446.34869384765625, "learning_rate": 9.946463601791865e-06, "loss": 36.3361, "step": 70980 }, { "epoch": 0.14340429142240735, "grad_norm": 250.34353637695312, "learning_rate": 9.94641264535751e-06, "loss": 26.7742, "step": 70990 }, { "epoch": 0.14342449205509117, "grad_norm": 182.8427276611328, "learning_rate": 9.946361664814942e-06, "loss": 23.2308, "step": 71000 }, { "epoch": 0.143444692687775, "grad_norm": 250.7086639404297, "learning_rate": 9.946310660164407e-06, "loss": 16.9722, "step": 71010 }, { "epoch": 0.1434648933204588, "grad_norm": 300.5544738769531, "learning_rate": 9.946259631406153e-06, "loss": 18.5594, "step": 71020 }, { "epoch": 0.1434850939531426, "grad_norm": 353.5708312988281, "learning_rate": 9.946208578540428e-06, "loss": 24.985, "step": 71030 }, { "epoch": 0.14350529458582642, "grad_norm": 365.8289489746094, "learning_rate": 9.946157501567484e-06, "loss": 16.3581, "step": 71040 }, { "epoch": 0.14352549521851024, "grad_norm": 242.18414306640625, "learning_rate": 9.946106400487568e-06, "loss": 18.1798, "step": 71050 }, { "epoch": 0.14354569585119406, "grad_norm": 237.22682189941406, "learning_rate": 9.946055275300929e-06, "loss": 20.1327, "step": 71060 }, { "epoch": 0.14356589648387788, "grad_norm": 281.62060546875, "learning_rate": 9.946004126007817e-06, "loss": 15.4899, "step": 71070 }, { "epoch": 0.1435860971165617, "grad_norm": 651.3358154296875, "learning_rate": 9.94595295260848e-06, "loss": 36.8129, "step": 71080 }, { "epoch": 0.1436062977492455, "grad_norm": 0.0, "learning_rate": 9.945901755103169e-06, "loss": 32.5301, "step": 71090 }, { "epoch": 0.1436264983819293, "grad_norm": 265.0592346191406, "learning_rate": 9.945850533492132e-06, "loss": 25.3436, "step": 71100 }, { "epoch": 0.14364669901461313, "grad_norm": 338.178466796875, "learning_rate": 9.94579928777562e-06, "loss": 58.1804, "step": 71110 }, { "epoch": 0.14366689964729695, "grad_norm": 228.4384307861328, "learning_rate": 9.94574801795388e-06, "loss": 22.9891, "step": 71120 }, { "epoch": 0.14368710027998077, "grad_norm": 250.4985809326172, "learning_rate": 9.945696724027166e-06, "loss": 24.3947, "step": 71130 }, { "epoch": 0.1437073009126646, "grad_norm": 284.0098876953125, "learning_rate": 9.945645405995726e-06, "loss": 16.7083, "step": 71140 }, { "epoch": 0.1437275015453484, "grad_norm": 344.5787353515625, "learning_rate": 9.94559406385981e-06, "loss": 19.2843, "step": 71150 }, { "epoch": 0.1437477021780322, "grad_norm": 143.81423950195312, "learning_rate": 9.945542697619667e-06, "loss": 24.2144, "step": 71160 }, { "epoch": 0.14376790281071603, "grad_norm": 663.373046875, "learning_rate": 9.94549130727555e-06, "loss": 29.9885, "step": 71170 }, { "epoch": 0.14378810344339985, "grad_norm": 124.40194702148438, "learning_rate": 9.945439892827709e-06, "loss": 18.191, "step": 71180 }, { "epoch": 0.14380830407608367, "grad_norm": 368.6988220214844, "learning_rate": 9.945388454276392e-06, "loss": 23.2124, "step": 71190 }, { "epoch": 0.14382850470876749, "grad_norm": 281.41033935546875, "learning_rate": 9.945336991621854e-06, "loss": 42.8151, "step": 71200 }, { "epoch": 0.1438487053414513, "grad_norm": 272.2857666015625, "learning_rate": 9.945285504864342e-06, "loss": 28.217, "step": 71210 }, { "epoch": 0.1438689059741351, "grad_norm": 114.200927734375, "learning_rate": 9.945233994004107e-06, "loss": 13.9199, "step": 71220 }, { "epoch": 0.14388910660681892, "grad_norm": 166.01791381835938, "learning_rate": 9.945182459041403e-06, "loss": 21.6865, "step": 71230 }, { "epoch": 0.14390930723950274, "grad_norm": 270.4473876953125, "learning_rate": 9.945130899976477e-06, "loss": 27.6191, "step": 71240 }, { "epoch": 0.14392950787218656, "grad_norm": 285.2046813964844, "learning_rate": 9.945079316809585e-06, "loss": 17.288, "step": 71250 }, { "epoch": 0.14394970850487038, "grad_norm": 177.52964782714844, "learning_rate": 9.945027709540975e-06, "loss": 19.1418, "step": 71260 }, { "epoch": 0.1439699091375542, "grad_norm": 142.7645263671875, "learning_rate": 9.9449760781709e-06, "loss": 28.0719, "step": 71270 }, { "epoch": 0.14399010977023802, "grad_norm": 122.1283950805664, "learning_rate": 9.944924422699613e-06, "loss": 21.6465, "step": 71280 }, { "epoch": 0.1440103104029218, "grad_norm": 64.9097900390625, "learning_rate": 9.944872743127363e-06, "loss": 20.2944, "step": 71290 }, { "epoch": 0.14403051103560563, "grad_norm": 72.4189224243164, "learning_rate": 9.944821039454403e-06, "loss": 26.1332, "step": 71300 }, { "epoch": 0.14405071166828945, "grad_norm": 153.71755981445312, "learning_rate": 9.944769311680984e-06, "loss": 24.6688, "step": 71310 }, { "epoch": 0.14407091230097327, "grad_norm": 153.58157348632812, "learning_rate": 9.94471755980736e-06, "loss": 26.1835, "step": 71320 }, { "epoch": 0.1440911129336571, "grad_norm": 326.7622985839844, "learning_rate": 9.944665783833782e-06, "loss": 37.7071, "step": 71330 }, { "epoch": 0.1441113135663409, "grad_norm": 319.137451171875, "learning_rate": 9.944613983760503e-06, "loss": 25.1492, "step": 71340 }, { "epoch": 0.1441315141990247, "grad_norm": 184.2548828125, "learning_rate": 9.944562159587774e-06, "loss": 21.9951, "step": 71350 }, { "epoch": 0.14415171483170852, "grad_norm": 111.20664978027344, "learning_rate": 9.94451031131585e-06, "loss": 21.898, "step": 71360 }, { "epoch": 0.14417191546439234, "grad_norm": 196.0028839111328, "learning_rate": 9.944458438944983e-06, "loss": 22.6091, "step": 71370 }, { "epoch": 0.14419211609707616, "grad_norm": 138.104248046875, "learning_rate": 9.944406542475425e-06, "loss": 23.53, "step": 71380 }, { "epoch": 0.14421231672975998, "grad_norm": 474.58306884765625, "learning_rate": 9.944354621907428e-06, "loss": 14.2392, "step": 71390 }, { "epoch": 0.1442325173624438, "grad_norm": 169.00267028808594, "learning_rate": 9.944302677241247e-06, "loss": 15.2429, "step": 71400 }, { "epoch": 0.1442527179951276, "grad_norm": 788.33447265625, "learning_rate": 9.944250708477135e-06, "loss": 36.5301, "step": 71410 }, { "epoch": 0.14427291862781141, "grad_norm": 650.7154541015625, "learning_rate": 9.944198715615343e-06, "loss": 39.93, "step": 71420 }, { "epoch": 0.14429311926049523, "grad_norm": 269.6849670410156, "learning_rate": 9.944146698656127e-06, "loss": 20.0065, "step": 71430 }, { "epoch": 0.14431331989317905, "grad_norm": 395.16961669921875, "learning_rate": 9.94409465759974e-06, "loss": 30.9902, "step": 71440 }, { "epoch": 0.14433352052586287, "grad_norm": 250.42271423339844, "learning_rate": 9.944042592446434e-06, "loss": 13.3679, "step": 71450 }, { "epoch": 0.1443537211585467, "grad_norm": 79.03618621826172, "learning_rate": 9.943990503196466e-06, "loss": 25.8598, "step": 71460 }, { "epoch": 0.14437392179123051, "grad_norm": 229.65892028808594, "learning_rate": 9.943938389850087e-06, "loss": 25.98, "step": 71470 }, { "epoch": 0.1443941224239143, "grad_norm": 211.52972412109375, "learning_rate": 9.943886252407551e-06, "loss": 32.7032, "step": 71480 }, { "epoch": 0.14441432305659813, "grad_norm": 169.14308166503906, "learning_rate": 9.943834090869116e-06, "loss": 29.0392, "step": 71490 }, { "epoch": 0.14443452368928195, "grad_norm": 95.42337036132812, "learning_rate": 9.94378190523503e-06, "loss": 33.2617, "step": 71500 }, { "epoch": 0.14445472432196577, "grad_norm": 194.3964080810547, "learning_rate": 9.943729695505552e-06, "loss": 21.6554, "step": 71510 }, { "epoch": 0.1444749249546496, "grad_norm": 350.4386291503906, "learning_rate": 9.943677461680935e-06, "loss": 49.0388, "step": 71520 }, { "epoch": 0.1444951255873334, "grad_norm": 231.56227111816406, "learning_rate": 9.943625203761434e-06, "loss": 13.4587, "step": 71530 }, { "epoch": 0.1445153262200172, "grad_norm": 124.10364532470703, "learning_rate": 9.943572921747302e-06, "loss": 33.0822, "step": 71540 }, { "epoch": 0.14453552685270102, "grad_norm": 238.82797241210938, "learning_rate": 9.943520615638796e-06, "loss": 41.915, "step": 71550 }, { "epoch": 0.14455572748538484, "grad_norm": 321.5826110839844, "learning_rate": 9.943468285436171e-06, "loss": 30.4145, "step": 71560 }, { "epoch": 0.14457592811806866, "grad_norm": 365.925537109375, "learning_rate": 9.94341593113968e-06, "loss": 35.1685, "step": 71570 }, { "epoch": 0.14459612875075248, "grad_norm": 164.7914581298828, "learning_rate": 9.943363552749579e-06, "loss": 14.0934, "step": 71580 }, { "epoch": 0.1446163293834363, "grad_norm": 457.9045104980469, "learning_rate": 9.943311150266124e-06, "loss": 25.2312, "step": 71590 }, { "epoch": 0.14463653001612012, "grad_norm": 300.1724853515625, "learning_rate": 9.94325872368957e-06, "loss": 33.9224, "step": 71600 }, { "epoch": 0.1446567306488039, "grad_norm": 370.9692687988281, "learning_rate": 9.943206273020174e-06, "loss": 33.2487, "step": 71610 }, { "epoch": 0.14467693128148773, "grad_norm": 211.3019561767578, "learning_rate": 9.943153798258188e-06, "loss": 24.9975, "step": 71620 }, { "epoch": 0.14469713191417155, "grad_norm": 422.944580078125, "learning_rate": 9.94310129940387e-06, "loss": 32.5475, "step": 71630 }, { "epoch": 0.14471733254685537, "grad_norm": 162.3535614013672, "learning_rate": 9.943048776457479e-06, "loss": 18.9351, "step": 71640 }, { "epoch": 0.1447375331795392, "grad_norm": 73.76896667480469, "learning_rate": 9.942996229419264e-06, "loss": 23.3387, "step": 71650 }, { "epoch": 0.144757733812223, "grad_norm": 116.13563537597656, "learning_rate": 9.942943658289487e-06, "loss": 16.6555, "step": 71660 }, { "epoch": 0.1447779344449068, "grad_norm": 336.9422302246094, "learning_rate": 9.942891063068401e-06, "loss": 22.118, "step": 71670 }, { "epoch": 0.14479813507759062, "grad_norm": 428.5412292480469, "learning_rate": 9.942838443756265e-06, "loss": 25.259, "step": 71680 }, { "epoch": 0.14481833571027444, "grad_norm": 340.823486328125, "learning_rate": 9.942785800353332e-06, "loss": 22.5589, "step": 71690 }, { "epoch": 0.14483853634295826, "grad_norm": 562.6217041015625, "learning_rate": 9.942733132859861e-06, "loss": 33.2696, "step": 71700 }, { "epoch": 0.14485873697564208, "grad_norm": 432.7618103027344, "learning_rate": 9.94268044127611e-06, "loss": 23.5866, "step": 71710 }, { "epoch": 0.1448789376083259, "grad_norm": 132.93515014648438, "learning_rate": 9.942627725602332e-06, "loss": 27.0982, "step": 71720 }, { "epoch": 0.1448991382410097, "grad_norm": 441.1026611328125, "learning_rate": 9.942574985838785e-06, "loss": 29.9799, "step": 71730 }, { "epoch": 0.14491933887369352, "grad_norm": 223.04612731933594, "learning_rate": 9.942522221985728e-06, "loss": 14.2169, "step": 71740 }, { "epoch": 0.14493953950637734, "grad_norm": 497.781005859375, "learning_rate": 9.942469434043418e-06, "loss": 54.172, "step": 71750 }, { "epoch": 0.14495974013906116, "grad_norm": 374.7501220703125, "learning_rate": 9.942416622012113e-06, "loss": 33.5401, "step": 71760 }, { "epoch": 0.14497994077174498, "grad_norm": 223.306396484375, "learning_rate": 9.942363785892065e-06, "loss": 27.3977, "step": 71770 }, { "epoch": 0.1450001414044288, "grad_norm": 533.19873046875, "learning_rate": 9.942310925683538e-06, "loss": 33.2733, "step": 71780 }, { "epoch": 0.14502034203711262, "grad_norm": 485.14739990234375, "learning_rate": 9.942258041386785e-06, "loss": 21.9078, "step": 71790 }, { "epoch": 0.1450405426697964, "grad_norm": 326.0171813964844, "learning_rate": 9.942205133002067e-06, "loss": 20.407, "step": 71800 }, { "epoch": 0.14506074330248023, "grad_norm": 863.1029663085938, "learning_rate": 9.94215220052964e-06, "loss": 23.7133, "step": 71810 }, { "epoch": 0.14508094393516405, "grad_norm": 197.1686248779297, "learning_rate": 9.942099243969765e-06, "loss": 19.3659, "step": 71820 }, { "epoch": 0.14510114456784787, "grad_norm": 224.6142578125, "learning_rate": 9.942046263322694e-06, "loss": 28.5236, "step": 71830 }, { "epoch": 0.1451213452005317, "grad_norm": 162.52182006835938, "learning_rate": 9.941993258588691e-06, "loss": 18.0524, "step": 71840 }, { "epoch": 0.1451415458332155, "grad_norm": 998.354248046875, "learning_rate": 9.941940229768012e-06, "loss": 27.1875, "step": 71850 }, { "epoch": 0.1451617464658993, "grad_norm": 758.5922241210938, "learning_rate": 9.941887176860916e-06, "loss": 45.0456, "step": 71860 }, { "epoch": 0.14518194709858312, "grad_norm": 225.48980712890625, "learning_rate": 9.94183409986766e-06, "loss": 26.2792, "step": 71870 }, { "epoch": 0.14520214773126694, "grad_norm": 231.745849609375, "learning_rate": 9.941780998788506e-06, "loss": 22.5967, "step": 71880 }, { "epoch": 0.14522234836395076, "grad_norm": 358.07293701171875, "learning_rate": 9.941727873623709e-06, "loss": 23.6945, "step": 71890 }, { "epoch": 0.14524254899663458, "grad_norm": 271.62139892578125, "learning_rate": 9.94167472437353e-06, "loss": 18.7619, "step": 71900 }, { "epoch": 0.1452627496293184, "grad_norm": 156.89666748046875, "learning_rate": 9.941621551038228e-06, "loss": 16.0641, "step": 71910 }, { "epoch": 0.14528295026200222, "grad_norm": 25.164127349853516, "learning_rate": 9.941568353618064e-06, "loss": 17.0257, "step": 71920 }, { "epoch": 0.145303150894686, "grad_norm": 306.4646301269531, "learning_rate": 9.941515132113291e-06, "loss": 20.338, "step": 71930 }, { "epoch": 0.14532335152736983, "grad_norm": 571.8506469726562, "learning_rate": 9.941461886524176e-06, "loss": 22.4007, "step": 71940 }, { "epoch": 0.14534355216005365, "grad_norm": 107.55082702636719, "learning_rate": 9.941408616850974e-06, "loss": 23.8259, "step": 71950 }, { "epoch": 0.14536375279273747, "grad_norm": 242.2540740966797, "learning_rate": 9.941355323093944e-06, "loss": 24.0538, "step": 71960 }, { "epoch": 0.1453839534254213, "grad_norm": 714.3572387695312, "learning_rate": 9.94130200525335e-06, "loss": 22.1577, "step": 71970 }, { "epoch": 0.1454041540581051, "grad_norm": 317.2169494628906, "learning_rate": 9.941248663329448e-06, "loss": 24.8305, "step": 71980 }, { "epoch": 0.1454243546907889, "grad_norm": 344.2856750488281, "learning_rate": 9.941195297322498e-06, "loss": 33.1286, "step": 71990 }, { "epoch": 0.14544455532347272, "grad_norm": 332.62530517578125, "learning_rate": 9.941141907232766e-06, "loss": 43.1203, "step": 72000 }, { "epoch": 0.14546475595615654, "grad_norm": 231.6821746826172, "learning_rate": 9.941088493060504e-06, "loss": 9.8312, "step": 72010 }, { "epoch": 0.14548495658884036, "grad_norm": 159.54827880859375, "learning_rate": 9.941035054805977e-06, "loss": 23.6257, "step": 72020 }, { "epoch": 0.14550515722152418, "grad_norm": 284.62689208984375, "learning_rate": 9.940981592469443e-06, "loss": 24.9786, "step": 72030 }, { "epoch": 0.145525357854208, "grad_norm": 181.58709716796875, "learning_rate": 9.940928106051166e-06, "loss": 22.738, "step": 72040 }, { "epoch": 0.1455455584868918, "grad_norm": 244.52005004882812, "learning_rate": 9.940874595551403e-06, "loss": 15.4713, "step": 72050 }, { "epoch": 0.14556575911957562, "grad_norm": 110.53299713134766, "learning_rate": 9.940821060970418e-06, "loss": 15.9104, "step": 72060 }, { "epoch": 0.14558595975225944, "grad_norm": 654.1826782226562, "learning_rate": 9.940767502308469e-06, "loss": 46.7578, "step": 72070 }, { "epoch": 0.14560616038494326, "grad_norm": 226.22268676757812, "learning_rate": 9.940713919565819e-06, "loss": 28.1405, "step": 72080 }, { "epoch": 0.14562636101762708, "grad_norm": 63.50346374511719, "learning_rate": 9.94066031274273e-06, "loss": 23.2808, "step": 72090 }, { "epoch": 0.1456465616503109, "grad_norm": 284.62615966796875, "learning_rate": 9.94060668183946e-06, "loss": 30.1919, "step": 72100 }, { "epoch": 0.14566676228299472, "grad_norm": 282.10906982421875, "learning_rate": 9.940553026856273e-06, "loss": 34.6519, "step": 72110 }, { "epoch": 0.1456869629156785, "grad_norm": 497.550537109375, "learning_rate": 9.940499347793429e-06, "loss": 34.9269, "step": 72120 }, { "epoch": 0.14570716354836233, "grad_norm": 1064.432373046875, "learning_rate": 9.940445644651191e-06, "loss": 34.5493, "step": 72130 }, { "epoch": 0.14572736418104615, "grad_norm": 122.48796844482422, "learning_rate": 9.94039191742982e-06, "loss": 20.1814, "step": 72140 }, { "epoch": 0.14574756481372997, "grad_norm": 178.9029998779297, "learning_rate": 9.940338166129578e-06, "loss": 22.8217, "step": 72150 }, { "epoch": 0.1457677654464138, "grad_norm": 90.35736083984375, "learning_rate": 9.940284390750727e-06, "loss": 14.4677, "step": 72160 }, { "epoch": 0.1457879660790976, "grad_norm": 47.400943756103516, "learning_rate": 9.94023059129353e-06, "loss": 32.1759, "step": 72170 }, { "epoch": 0.1458081667117814, "grad_norm": 312.0511779785156, "learning_rate": 9.940176767758247e-06, "loss": 24.7397, "step": 72180 }, { "epoch": 0.14582836734446522, "grad_norm": 317.3664855957031, "learning_rate": 9.940122920145142e-06, "loss": 18.8538, "step": 72190 }, { "epoch": 0.14584856797714904, "grad_norm": 229.8318634033203, "learning_rate": 9.940069048454478e-06, "loss": 29.9853, "step": 72200 }, { "epoch": 0.14586876860983286, "grad_norm": 327.14361572265625, "learning_rate": 9.940015152686514e-06, "loss": 17.8146, "step": 72210 }, { "epoch": 0.14588896924251668, "grad_norm": 443.3293762207031, "learning_rate": 9.939961232841517e-06, "loss": 23.4904, "step": 72220 }, { "epoch": 0.1459091698752005, "grad_norm": 390.5252380371094, "learning_rate": 9.939907288919749e-06, "loss": 51.5956, "step": 72230 }, { "epoch": 0.14592937050788432, "grad_norm": 203.84153747558594, "learning_rate": 9.93985332092147e-06, "loss": 40.7476, "step": 72240 }, { "epoch": 0.1459495711405681, "grad_norm": 178.8316192626953, "learning_rate": 9.939799328846947e-06, "loss": 37.3339, "step": 72250 }, { "epoch": 0.14596977177325193, "grad_norm": 1003.7894897460938, "learning_rate": 9.93974531269644e-06, "loss": 41.613, "step": 72260 }, { "epoch": 0.14598997240593575, "grad_norm": 293.8765869140625, "learning_rate": 9.939691272470214e-06, "loss": 35.4791, "step": 72270 }, { "epoch": 0.14601017303861957, "grad_norm": 105.84571838378906, "learning_rate": 9.939637208168532e-06, "loss": 20.4251, "step": 72280 }, { "epoch": 0.1460303736713034, "grad_norm": 291.296630859375, "learning_rate": 9.939583119791656e-06, "loss": 23.6704, "step": 72290 }, { "epoch": 0.1460505743039872, "grad_norm": 435.37054443359375, "learning_rate": 9.939529007339852e-06, "loss": 16.6602, "step": 72300 }, { "epoch": 0.146070774936671, "grad_norm": 239.93365478515625, "learning_rate": 9.939474870813383e-06, "loss": 18.4755, "step": 72310 }, { "epoch": 0.14609097556935483, "grad_norm": 188.5083770751953, "learning_rate": 9.939420710212511e-06, "loss": 25.0004, "step": 72320 }, { "epoch": 0.14611117620203865, "grad_norm": 96.27484130859375, "learning_rate": 9.939366525537503e-06, "loss": 22.0095, "step": 72330 }, { "epoch": 0.14613137683472247, "grad_norm": 304.7362976074219, "learning_rate": 9.939312316788622e-06, "loss": 38.6292, "step": 72340 }, { "epoch": 0.14615157746740629, "grad_norm": 300.9645690917969, "learning_rate": 9.93925808396613e-06, "loss": 24.8815, "step": 72350 }, { "epoch": 0.1461717781000901, "grad_norm": 421.4128112792969, "learning_rate": 9.939203827070296e-06, "loss": 26.0758, "step": 72360 }, { "epoch": 0.1461919787327739, "grad_norm": 388.7615661621094, "learning_rate": 9.939149546101379e-06, "loss": 30.0654, "step": 72370 }, { "epoch": 0.14621217936545772, "grad_norm": 130.48265075683594, "learning_rate": 9.939095241059648e-06, "loss": 27.5746, "step": 72380 }, { "epoch": 0.14623237999814154, "grad_norm": 339.44989013671875, "learning_rate": 9.939040911945365e-06, "loss": 20.017, "step": 72390 }, { "epoch": 0.14625258063082536, "grad_norm": 545.943359375, "learning_rate": 9.938986558758795e-06, "loss": 25.8609, "step": 72400 }, { "epoch": 0.14627278126350918, "grad_norm": 299.3280944824219, "learning_rate": 9.938932181500206e-06, "loss": 16.4031, "step": 72410 }, { "epoch": 0.146292981896193, "grad_norm": 211.26515197753906, "learning_rate": 9.938877780169858e-06, "loss": 24.9415, "step": 72420 }, { "epoch": 0.14631318252887682, "grad_norm": 191.85914611816406, "learning_rate": 9.938823354768019e-06, "loss": 29.8263, "step": 72430 }, { "epoch": 0.1463333831615606, "grad_norm": 0.0, "learning_rate": 9.938768905294954e-06, "loss": 15.9372, "step": 72440 }, { "epoch": 0.14635358379424443, "grad_norm": 303.7027282714844, "learning_rate": 9.938714431750928e-06, "loss": 11.7437, "step": 72450 }, { "epoch": 0.14637378442692825, "grad_norm": 60.131805419921875, "learning_rate": 9.938659934136208e-06, "loss": 38.9834, "step": 72460 }, { "epoch": 0.14639398505961207, "grad_norm": 106.24419403076172, "learning_rate": 9.93860541245106e-06, "loss": 23.2664, "step": 72470 }, { "epoch": 0.1464141856922959, "grad_norm": 729.7398681640625, "learning_rate": 9.938550866695745e-06, "loss": 31.6942, "step": 72480 }, { "epoch": 0.1464343863249797, "grad_norm": 166.63670349121094, "learning_rate": 9.938496296870532e-06, "loss": 11.511, "step": 72490 }, { "epoch": 0.1464545869576635, "grad_norm": 274.7510070800781, "learning_rate": 9.938441702975689e-06, "loss": 30.6187, "step": 72500 }, { "epoch": 0.14647478759034732, "grad_norm": 291.7764892578125, "learning_rate": 9.93838708501148e-06, "loss": 17.5404, "step": 72510 }, { "epoch": 0.14649498822303114, "grad_norm": 194.73660278320312, "learning_rate": 9.93833244297817e-06, "loss": 28.5725, "step": 72520 }, { "epoch": 0.14651518885571496, "grad_norm": 400.1331481933594, "learning_rate": 9.938277776876029e-06, "loss": 50.7125, "step": 72530 }, { "epoch": 0.14653538948839878, "grad_norm": 0.0, "learning_rate": 9.938223086705318e-06, "loss": 39.4398, "step": 72540 }, { "epoch": 0.1465555901210826, "grad_norm": 93.92041015625, "learning_rate": 9.938168372466308e-06, "loss": 24.704, "step": 72550 }, { "epoch": 0.14657579075376642, "grad_norm": 453.53118896484375, "learning_rate": 9.938113634159266e-06, "loss": 27.9512, "step": 72560 }, { "epoch": 0.14659599138645021, "grad_norm": 223.22021484375, "learning_rate": 9.938058871784453e-06, "loss": 19.1568, "step": 72570 }, { "epoch": 0.14661619201913403, "grad_norm": 383.9504699707031, "learning_rate": 9.938004085342144e-06, "loss": 32.6469, "step": 72580 }, { "epoch": 0.14663639265181785, "grad_norm": 404.2530212402344, "learning_rate": 9.9379492748326e-06, "loss": 19.1927, "step": 72590 }, { "epoch": 0.14665659328450167, "grad_norm": 312.5388488769531, "learning_rate": 9.937894440256091e-06, "loss": 22.7151, "step": 72600 }, { "epoch": 0.1466767939171855, "grad_norm": 175.69007873535156, "learning_rate": 9.937839581612883e-06, "loss": 21.0638, "step": 72610 }, { "epoch": 0.14669699454986931, "grad_norm": 443.80950927734375, "learning_rate": 9.937784698903244e-06, "loss": 39.3942, "step": 72620 }, { "epoch": 0.1467171951825531, "grad_norm": 214.1730499267578, "learning_rate": 9.937729792127439e-06, "loss": 17.9415, "step": 72630 }, { "epoch": 0.14673739581523693, "grad_norm": 107.6888427734375, "learning_rate": 9.93767486128574e-06, "loss": 24.4888, "step": 72640 }, { "epoch": 0.14675759644792075, "grad_norm": 139.70843505859375, "learning_rate": 9.937619906378413e-06, "loss": 22.1796, "step": 72650 }, { "epoch": 0.14677779708060457, "grad_norm": 377.9275207519531, "learning_rate": 9.937564927405724e-06, "loss": 50.8135, "step": 72660 }, { "epoch": 0.1467979977132884, "grad_norm": 509.1934814453125, "learning_rate": 9.937509924367944e-06, "loss": 25.7251, "step": 72670 }, { "epoch": 0.1468181983459722, "grad_norm": 384.5520324707031, "learning_rate": 9.937454897265338e-06, "loss": 22.491, "step": 72680 }, { "epoch": 0.146838398978656, "grad_norm": 344.75445556640625, "learning_rate": 9.937399846098177e-06, "loss": 25.3196, "step": 72690 }, { "epoch": 0.14685859961133982, "grad_norm": 232.0851593017578, "learning_rate": 9.937344770866727e-06, "loss": 25.6145, "step": 72700 }, { "epoch": 0.14687880024402364, "grad_norm": 0.0, "learning_rate": 9.937289671571257e-06, "loss": 15.2844, "step": 72710 }, { "epoch": 0.14689900087670746, "grad_norm": 454.17547607421875, "learning_rate": 9.937234548212038e-06, "loss": 38.1949, "step": 72720 }, { "epoch": 0.14691920150939128, "grad_norm": 203.44876098632812, "learning_rate": 9.937179400789336e-06, "loss": 23.6081, "step": 72730 }, { "epoch": 0.1469394021420751, "grad_norm": 521.8082885742188, "learning_rate": 9.937124229303419e-06, "loss": 36.6301, "step": 72740 }, { "epoch": 0.14695960277475892, "grad_norm": 64.08634185791016, "learning_rate": 9.937069033754558e-06, "loss": 18.344, "step": 72750 }, { "epoch": 0.1469798034074427, "grad_norm": 237.89395141601562, "learning_rate": 9.937013814143021e-06, "loss": 21.0849, "step": 72760 }, { "epoch": 0.14700000404012653, "grad_norm": 398.6676940917969, "learning_rate": 9.936958570469077e-06, "loss": 26.8529, "step": 72770 }, { "epoch": 0.14702020467281035, "grad_norm": 124.23784637451172, "learning_rate": 9.936903302732997e-06, "loss": 31.3257, "step": 72780 }, { "epoch": 0.14704040530549417, "grad_norm": 102.38275146484375, "learning_rate": 9.936848010935049e-06, "loss": 21.4798, "step": 72790 }, { "epoch": 0.147060605938178, "grad_norm": 242.70022583007812, "learning_rate": 9.936792695075502e-06, "loss": 18.9041, "step": 72800 }, { "epoch": 0.1470808065708618, "grad_norm": 375.91619873046875, "learning_rate": 9.936737355154627e-06, "loss": 20.7801, "step": 72810 }, { "epoch": 0.1471010072035456, "grad_norm": 20.822330474853516, "learning_rate": 9.936681991172692e-06, "loss": 27.5109, "step": 72820 }, { "epoch": 0.14712120783622942, "grad_norm": 116.5805435180664, "learning_rate": 9.936626603129968e-06, "loss": 22.8889, "step": 72830 }, { "epoch": 0.14714140846891324, "grad_norm": 59.22004699707031, "learning_rate": 9.936571191026726e-06, "loss": 12.7618, "step": 72840 }, { "epoch": 0.14716160910159706, "grad_norm": 296.4792785644531, "learning_rate": 9.936515754863231e-06, "loss": 19.3913, "step": 72850 }, { "epoch": 0.14718180973428088, "grad_norm": 212.79595947265625, "learning_rate": 9.93646029463976e-06, "loss": 43.1641, "step": 72860 }, { "epoch": 0.1472020103669647, "grad_norm": 371.1495056152344, "learning_rate": 9.93640481035658e-06, "loss": 21.7511, "step": 72870 }, { "epoch": 0.14722221099964852, "grad_norm": 424.4947814941406, "learning_rate": 9.936349302013962e-06, "loss": 31.64, "step": 72880 }, { "epoch": 0.14724241163233232, "grad_norm": 302.2626037597656, "learning_rate": 9.936293769612175e-06, "loss": 22.372, "step": 72890 }, { "epoch": 0.14726261226501614, "grad_norm": 330.1347961425781, "learning_rate": 9.936238213151491e-06, "loss": 28.6687, "step": 72900 }, { "epoch": 0.14728281289769996, "grad_norm": 473.2004089355469, "learning_rate": 9.93618263263218e-06, "loss": 25.7576, "step": 72910 }, { "epoch": 0.14730301353038378, "grad_norm": 364.60906982421875, "learning_rate": 9.936127028054516e-06, "loss": 33.9359, "step": 72920 }, { "epoch": 0.1473232141630676, "grad_norm": 180.75283813476562, "learning_rate": 9.936071399418764e-06, "loss": 26.7356, "step": 72930 }, { "epoch": 0.14734341479575142, "grad_norm": 259.2234191894531, "learning_rate": 9.936015746725202e-06, "loss": 17.1737, "step": 72940 }, { "epoch": 0.1473636154284352, "grad_norm": 257.33612060546875, "learning_rate": 9.935960069974096e-06, "loss": 12.2143, "step": 72950 }, { "epoch": 0.14738381606111903, "grad_norm": 262.45184326171875, "learning_rate": 9.93590436916572e-06, "loss": 19.2878, "step": 72960 }, { "epoch": 0.14740401669380285, "grad_norm": 143.31515502929688, "learning_rate": 9.935848644300345e-06, "loss": 19.7012, "step": 72970 }, { "epoch": 0.14742421732648667, "grad_norm": 0.0, "learning_rate": 9.935792895378243e-06, "loss": 23.4557, "step": 72980 }, { "epoch": 0.1474444179591705, "grad_norm": 135.18759155273438, "learning_rate": 9.935737122399683e-06, "loss": 18.6015, "step": 72990 }, { "epoch": 0.1474646185918543, "grad_norm": 146.4440155029297, "learning_rate": 9.93568132536494e-06, "loss": 16.3004, "step": 73000 }, { "epoch": 0.1474848192245381, "grad_norm": 328.7703857421875, "learning_rate": 9.935625504274284e-06, "loss": 23.7404, "step": 73010 }, { "epoch": 0.14750501985722192, "grad_norm": 151.2288818359375, "learning_rate": 9.93556965912799e-06, "loss": 28.6221, "step": 73020 }, { "epoch": 0.14752522048990574, "grad_norm": 160.69448852539062, "learning_rate": 9.935513789926327e-06, "loss": 31.8866, "step": 73030 }, { "epoch": 0.14754542112258956, "grad_norm": 369.7319030761719, "learning_rate": 9.935457896669568e-06, "loss": 26.7234, "step": 73040 }, { "epoch": 0.14756562175527338, "grad_norm": 344.50848388671875, "learning_rate": 9.935401979357985e-06, "loss": 14.9657, "step": 73050 }, { "epoch": 0.1475858223879572, "grad_norm": 360.8180847167969, "learning_rate": 9.935346037991854e-06, "loss": 27.4612, "step": 73060 }, { "epoch": 0.14760602302064102, "grad_norm": 455.5966796875, "learning_rate": 9.935290072571442e-06, "loss": 19.9029, "step": 73070 }, { "epoch": 0.1476262236533248, "grad_norm": 279.91998291015625, "learning_rate": 9.935234083097028e-06, "loss": 26.7266, "step": 73080 }, { "epoch": 0.14764642428600863, "grad_norm": 276.9877014160156, "learning_rate": 9.935178069568878e-06, "loss": 39.1465, "step": 73090 }, { "epoch": 0.14766662491869245, "grad_norm": 287.7261047363281, "learning_rate": 9.93512203198727e-06, "loss": 20.6985, "step": 73100 }, { "epoch": 0.14768682555137627, "grad_norm": 309.4169921875, "learning_rate": 9.935065970352477e-06, "loss": 14.4859, "step": 73110 }, { "epoch": 0.1477070261840601, "grad_norm": 389.2830505371094, "learning_rate": 9.93500988466477e-06, "loss": 27.9293, "step": 73120 }, { "epoch": 0.1477272268167439, "grad_norm": 276.6993408203125, "learning_rate": 9.934953774924425e-06, "loss": 37.9287, "step": 73130 }, { "epoch": 0.1477474274494277, "grad_norm": 91.15270233154297, "learning_rate": 9.934897641131712e-06, "loss": 18.1096, "step": 73140 }, { "epoch": 0.14776762808211152, "grad_norm": 300.0426330566406, "learning_rate": 9.934841483286907e-06, "loss": 34.75, "step": 73150 }, { "epoch": 0.14778782871479534, "grad_norm": 53.40587615966797, "learning_rate": 9.934785301390282e-06, "loss": 38.5322, "step": 73160 }, { "epoch": 0.14780802934747916, "grad_norm": 397.1943664550781, "learning_rate": 9.934729095442113e-06, "loss": 29.398, "step": 73170 }, { "epoch": 0.14782822998016298, "grad_norm": 162.71224975585938, "learning_rate": 9.934672865442673e-06, "loss": 12.9504, "step": 73180 }, { "epoch": 0.1478484306128468, "grad_norm": 263.53106689453125, "learning_rate": 9.934616611392235e-06, "loss": 23.3911, "step": 73190 }, { "epoch": 0.1478686312455306, "grad_norm": 573.6403198242188, "learning_rate": 9.934560333291077e-06, "loss": 35.6992, "step": 73200 }, { "epoch": 0.14788883187821442, "grad_norm": 376.0777282714844, "learning_rate": 9.934504031139468e-06, "loss": 38.2472, "step": 73210 }, { "epoch": 0.14790903251089824, "grad_norm": 358.96844482421875, "learning_rate": 9.934447704937684e-06, "loss": 36.2182, "step": 73220 }, { "epoch": 0.14792923314358206, "grad_norm": 0.0, "learning_rate": 9.934391354686002e-06, "loss": 29.5924, "step": 73230 }, { "epoch": 0.14794943377626588, "grad_norm": 201.43179321289062, "learning_rate": 9.934334980384694e-06, "loss": 19.3227, "step": 73240 }, { "epoch": 0.1479696344089497, "grad_norm": 284.2002258300781, "learning_rate": 9.934278582034037e-06, "loss": 31.9488, "step": 73250 }, { "epoch": 0.14798983504163352, "grad_norm": 776.5807495117188, "learning_rate": 9.934222159634303e-06, "loss": 28.9504, "step": 73260 }, { "epoch": 0.1480100356743173, "grad_norm": 163.8245391845703, "learning_rate": 9.93416571318577e-06, "loss": 20.2317, "step": 73270 }, { "epoch": 0.14803023630700113, "grad_norm": 421.5320739746094, "learning_rate": 9.934109242688712e-06, "loss": 28.5653, "step": 73280 }, { "epoch": 0.14805043693968495, "grad_norm": 282.01934814453125, "learning_rate": 9.934052748143403e-06, "loss": 49.2104, "step": 73290 }, { "epoch": 0.14807063757236877, "grad_norm": 286.2742614746094, "learning_rate": 9.93399622955012e-06, "loss": 16.345, "step": 73300 }, { "epoch": 0.1480908382050526, "grad_norm": 90.08783721923828, "learning_rate": 9.933939686909137e-06, "loss": 25.0712, "step": 73310 }, { "epoch": 0.1481110388377364, "grad_norm": 775.7799072265625, "learning_rate": 9.933883120220731e-06, "loss": 35.9436, "step": 73320 }, { "epoch": 0.1481312394704202, "grad_norm": 106.52813720703125, "learning_rate": 9.933826529485178e-06, "loss": 38.4554, "step": 73330 }, { "epoch": 0.14815144010310402, "grad_norm": 109.09796905517578, "learning_rate": 9.933769914702751e-06, "loss": 18.1236, "step": 73340 }, { "epoch": 0.14817164073578784, "grad_norm": 186.26087951660156, "learning_rate": 9.933713275873728e-06, "loss": 29.9297, "step": 73350 }, { "epoch": 0.14819184136847166, "grad_norm": 214.27685546875, "learning_rate": 9.933656612998387e-06, "loss": 21.4412, "step": 73360 }, { "epoch": 0.14821204200115548, "grad_norm": 258.84942626953125, "learning_rate": 9.933599926077e-06, "loss": 27.3872, "step": 73370 }, { "epoch": 0.1482322426338393, "grad_norm": 139.08279418945312, "learning_rate": 9.933543215109846e-06, "loss": 22.6585, "step": 73380 }, { "epoch": 0.14825244326652312, "grad_norm": 372.6390380859375, "learning_rate": 9.933486480097201e-06, "loss": 31.9554, "step": 73390 }, { "epoch": 0.1482726438992069, "grad_norm": 9.438552856445312, "learning_rate": 9.93342972103934e-06, "loss": 16.0987, "step": 73400 }, { "epoch": 0.14829284453189073, "grad_norm": 317.71710205078125, "learning_rate": 9.933372937936542e-06, "loss": 41.1456, "step": 73410 }, { "epoch": 0.14831304516457455, "grad_norm": 537.4074096679688, "learning_rate": 9.933316130789084e-06, "loss": 36.9997, "step": 73420 }, { "epoch": 0.14833324579725837, "grad_norm": 245.1003875732422, "learning_rate": 9.93325929959724e-06, "loss": 36.3274, "step": 73430 }, { "epoch": 0.1483534464299422, "grad_norm": 118.14433288574219, "learning_rate": 9.933202444361288e-06, "loss": 18.1481, "step": 73440 }, { "epoch": 0.148373647062626, "grad_norm": 412.84918212890625, "learning_rate": 9.933145565081506e-06, "loss": 24.8431, "step": 73450 }, { "epoch": 0.1483938476953098, "grad_norm": 529.8720703125, "learning_rate": 9.933088661758172e-06, "loss": 25.0147, "step": 73460 }, { "epoch": 0.14841404832799363, "grad_norm": 418.7952880859375, "learning_rate": 9.933031734391561e-06, "loss": 24.5994, "step": 73470 }, { "epoch": 0.14843424896067745, "grad_norm": 98.73407745361328, "learning_rate": 9.93297478298195e-06, "loss": 15.8692, "step": 73480 }, { "epoch": 0.14845444959336127, "grad_norm": 195.6909637451172, "learning_rate": 9.93291780752962e-06, "loss": 11.5668, "step": 73490 }, { "epoch": 0.14847465022604509, "grad_norm": 215.5811309814453, "learning_rate": 9.932860808034847e-06, "loss": 24.8293, "step": 73500 }, { "epoch": 0.1484948508587289, "grad_norm": 112.73097229003906, "learning_rate": 9.93280378449791e-06, "loss": 44.389, "step": 73510 }, { "epoch": 0.1485150514914127, "grad_norm": 197.42066955566406, "learning_rate": 9.932746736919084e-06, "loss": 36.3723, "step": 73520 }, { "epoch": 0.14853525212409652, "grad_norm": 121.78983306884766, "learning_rate": 9.93268966529865e-06, "loss": 26.3173, "step": 73530 }, { "epoch": 0.14855545275678034, "grad_norm": 127.56995391845703, "learning_rate": 9.932632569636882e-06, "loss": 18.3952, "step": 73540 }, { "epoch": 0.14857565338946416, "grad_norm": 734.1718139648438, "learning_rate": 9.932575449934063e-06, "loss": 18.5538, "step": 73550 }, { "epoch": 0.14859585402214798, "grad_norm": 0.0, "learning_rate": 9.93251830619047e-06, "loss": 24.492, "step": 73560 }, { "epoch": 0.1486160546548318, "grad_norm": 389.0741271972656, "learning_rate": 9.93246113840638e-06, "loss": 33.0825, "step": 73570 }, { "epoch": 0.14863625528751562, "grad_norm": 620.7560424804688, "learning_rate": 9.932403946582071e-06, "loss": 32.9965, "step": 73580 }, { "epoch": 0.1486564559201994, "grad_norm": 249.2867889404297, "learning_rate": 9.932346730717828e-06, "loss": 16.9857, "step": 73590 }, { "epoch": 0.14867665655288323, "grad_norm": 166.7878875732422, "learning_rate": 9.932289490813922e-06, "loss": 24.6991, "step": 73600 }, { "epoch": 0.14869685718556705, "grad_norm": 259.1558837890625, "learning_rate": 9.932232226870635e-06, "loss": 17.8316, "step": 73610 }, { "epoch": 0.14871705781825087, "grad_norm": 101.16908264160156, "learning_rate": 9.932174938888248e-06, "loss": 24.0263, "step": 73620 }, { "epoch": 0.1487372584509347, "grad_norm": 402.5858154296875, "learning_rate": 9.932117626867037e-06, "loss": 21.5286, "step": 73630 }, { "epoch": 0.1487574590836185, "grad_norm": 391.76837158203125, "learning_rate": 9.932060290807283e-06, "loss": 23.2796, "step": 73640 }, { "epoch": 0.1487776597163023, "grad_norm": 599.74462890625, "learning_rate": 9.932002930709268e-06, "loss": 24.3521, "step": 73650 }, { "epoch": 0.14879786034898612, "grad_norm": 1054.1151123046875, "learning_rate": 9.931945546573266e-06, "loss": 36.3407, "step": 73660 }, { "epoch": 0.14881806098166994, "grad_norm": 297.4301452636719, "learning_rate": 9.931888138399562e-06, "loss": 24.9232, "step": 73670 }, { "epoch": 0.14883826161435376, "grad_norm": 325.4300537109375, "learning_rate": 9.93183070618843e-06, "loss": 40.2573, "step": 73680 }, { "epoch": 0.14885846224703758, "grad_norm": 387.38873291015625, "learning_rate": 9.931773249940156e-06, "loss": 14.6954, "step": 73690 }, { "epoch": 0.1488786628797214, "grad_norm": 453.527099609375, "learning_rate": 9.931715769655017e-06, "loss": 24.9453, "step": 73700 }, { "epoch": 0.14889886351240522, "grad_norm": 650.870849609375, "learning_rate": 9.931658265333293e-06, "loss": 34.0704, "step": 73710 }, { "epoch": 0.14891906414508901, "grad_norm": 140.52073669433594, "learning_rate": 9.931600736975264e-06, "loss": 24.8845, "step": 73720 }, { "epoch": 0.14893926477777283, "grad_norm": 381.51531982421875, "learning_rate": 9.93154318458121e-06, "loss": 31.5659, "step": 73730 }, { "epoch": 0.14895946541045665, "grad_norm": 123.93963623046875, "learning_rate": 9.931485608151416e-06, "loss": 32.1984, "step": 73740 }, { "epoch": 0.14897966604314047, "grad_norm": 413.2242431640625, "learning_rate": 9.931428007686158e-06, "loss": 32.5908, "step": 73750 }, { "epoch": 0.1489998666758243, "grad_norm": 253.4676513671875, "learning_rate": 9.931370383185717e-06, "loss": 21.3014, "step": 73760 }, { "epoch": 0.14902006730850811, "grad_norm": 286.63873291015625, "learning_rate": 9.931312734650376e-06, "loss": 24.3093, "step": 73770 }, { "epoch": 0.1490402679411919, "grad_norm": 102.63798522949219, "learning_rate": 9.931255062080415e-06, "loss": 19.9405, "step": 73780 }, { "epoch": 0.14906046857387573, "grad_norm": 77.91295623779297, "learning_rate": 9.931197365476113e-06, "loss": 15.572, "step": 73790 }, { "epoch": 0.14908066920655955, "grad_norm": 229.90560913085938, "learning_rate": 9.931139644837755e-06, "loss": 15.3746, "step": 73800 }, { "epoch": 0.14910086983924337, "grad_norm": 416.01239013671875, "learning_rate": 9.93108190016562e-06, "loss": 26.4428, "step": 73810 }, { "epoch": 0.1491210704719272, "grad_norm": 208.6383514404297, "learning_rate": 9.93102413145999e-06, "loss": 20.8373, "step": 73820 }, { "epoch": 0.149141271104611, "grad_norm": 269.01910400390625, "learning_rate": 9.930966338721146e-06, "loss": 19.5364, "step": 73830 }, { "epoch": 0.1491614717372948, "grad_norm": 194.07818603515625, "learning_rate": 9.930908521949371e-06, "loss": 21.6251, "step": 73840 }, { "epoch": 0.14918167236997862, "grad_norm": 257.6159362792969, "learning_rate": 9.930850681144946e-06, "loss": 23.5923, "step": 73850 }, { "epoch": 0.14920187300266244, "grad_norm": 131.69590759277344, "learning_rate": 9.930792816308151e-06, "loss": 30.2225, "step": 73860 }, { "epoch": 0.14922207363534626, "grad_norm": 278.0802917480469, "learning_rate": 9.930734927439272e-06, "loss": 23.2799, "step": 73870 }, { "epoch": 0.14924227426803008, "grad_norm": 31.00725555419922, "learning_rate": 9.930677014538587e-06, "loss": 31.09, "step": 73880 }, { "epoch": 0.1492624749007139, "grad_norm": 395.4269104003906, "learning_rate": 9.93061907760638e-06, "loss": 25.5013, "step": 73890 }, { "epoch": 0.14928267553339772, "grad_norm": 315.1025085449219, "learning_rate": 9.930561116642936e-06, "loss": 31.0769, "step": 73900 }, { "epoch": 0.1493028761660815, "grad_norm": 707.716796875, "learning_rate": 9.930503131648535e-06, "loss": 53.125, "step": 73910 }, { "epoch": 0.14932307679876533, "grad_norm": 805.9435424804688, "learning_rate": 9.930445122623458e-06, "loss": 20.2176, "step": 73920 }, { "epoch": 0.14934327743144915, "grad_norm": 104.26602935791016, "learning_rate": 9.93038708956799e-06, "loss": 21.0549, "step": 73930 }, { "epoch": 0.14936347806413297, "grad_norm": 275.37884521484375, "learning_rate": 9.930329032482412e-06, "loss": 35.6653, "step": 73940 }, { "epoch": 0.1493836786968168, "grad_norm": 560.7946166992188, "learning_rate": 9.930270951367012e-06, "loss": 38.4, "step": 73950 }, { "epoch": 0.1494038793295006, "grad_norm": 143.7568817138672, "learning_rate": 9.930212846222065e-06, "loss": 20.7986, "step": 73960 }, { "epoch": 0.1494240799621844, "grad_norm": 620.0817260742188, "learning_rate": 9.930154717047862e-06, "loss": 22.5662, "step": 73970 }, { "epoch": 0.14944428059486822, "grad_norm": 442.9324035644531, "learning_rate": 9.930096563844682e-06, "loss": 49.678, "step": 73980 }, { "epoch": 0.14946448122755204, "grad_norm": 236.52011108398438, "learning_rate": 9.930038386612809e-06, "loss": 25.2331, "step": 73990 }, { "epoch": 0.14948468186023586, "grad_norm": 316.88616943359375, "learning_rate": 9.929980185352525e-06, "loss": 22.3037, "step": 74000 }, { "epoch": 0.14950488249291968, "grad_norm": 35.031089782714844, "learning_rate": 9.929921960064117e-06, "loss": 14.6388, "step": 74010 }, { "epoch": 0.1495250831256035, "grad_norm": 28.051513671875, "learning_rate": 9.929863710747869e-06, "loss": 35.1441, "step": 74020 }, { "epoch": 0.14954528375828732, "grad_norm": 388.2430114746094, "learning_rate": 9.929805437404061e-06, "loss": 23.471, "step": 74030 }, { "epoch": 0.14956548439097112, "grad_norm": 114.31440734863281, "learning_rate": 9.929747140032979e-06, "loss": 30.1783, "step": 74040 }, { "epoch": 0.14958568502365494, "grad_norm": 246.185546875, "learning_rate": 9.929688818634909e-06, "loss": 25.8489, "step": 74050 }, { "epoch": 0.14960588565633876, "grad_norm": 377.8116760253906, "learning_rate": 9.929630473210132e-06, "loss": 33.947, "step": 74060 }, { "epoch": 0.14962608628902258, "grad_norm": 156.3207550048828, "learning_rate": 9.929572103758935e-06, "loss": 16.3018, "step": 74070 }, { "epoch": 0.1496462869217064, "grad_norm": 370.22064208984375, "learning_rate": 9.929513710281602e-06, "loss": 25.3374, "step": 74080 }, { "epoch": 0.14966648755439022, "grad_norm": 312.04351806640625, "learning_rate": 9.929455292778416e-06, "loss": 15.8911, "step": 74090 }, { "epoch": 0.149686688187074, "grad_norm": 301.77056884765625, "learning_rate": 9.929396851249661e-06, "loss": 30.526, "step": 74100 }, { "epoch": 0.14970688881975783, "grad_norm": 407.1257019042969, "learning_rate": 9.929338385695626e-06, "loss": 39.1722, "step": 74110 }, { "epoch": 0.14972708945244165, "grad_norm": 336.2999267578125, "learning_rate": 9.929279896116595e-06, "loss": 23.1933, "step": 74120 }, { "epoch": 0.14974729008512547, "grad_norm": 402.5205993652344, "learning_rate": 9.92922138251285e-06, "loss": 28.0673, "step": 74130 }, { "epoch": 0.1497674907178093, "grad_norm": 126.55119323730469, "learning_rate": 9.929162844884676e-06, "loss": 22.7441, "step": 74140 }, { "epoch": 0.1497876913504931, "grad_norm": 186.93682861328125, "learning_rate": 9.929104283232363e-06, "loss": 20.9324, "step": 74150 }, { "epoch": 0.1498078919831769, "grad_norm": 163.87965393066406, "learning_rate": 9.929045697556192e-06, "loss": 18.6977, "step": 74160 }, { "epoch": 0.14982809261586072, "grad_norm": 179.60267639160156, "learning_rate": 9.92898708785645e-06, "loss": 18.1682, "step": 74170 }, { "epoch": 0.14984829324854454, "grad_norm": 167.43165588378906, "learning_rate": 9.928928454133424e-06, "loss": 12.088, "step": 74180 }, { "epoch": 0.14986849388122836, "grad_norm": 310.5000305175781, "learning_rate": 9.928869796387396e-06, "loss": 13.7691, "step": 74190 }, { "epoch": 0.14988869451391218, "grad_norm": 254.00616455078125, "learning_rate": 9.928811114618658e-06, "loss": 40.0576, "step": 74200 }, { "epoch": 0.149908895146596, "grad_norm": 549.0490112304688, "learning_rate": 9.92875240882749e-06, "loss": 17.2874, "step": 74210 }, { "epoch": 0.14992909577927982, "grad_norm": 361.3526306152344, "learning_rate": 9.92869367901418e-06, "loss": 28.8017, "step": 74220 }, { "epoch": 0.1499492964119636, "grad_norm": 94.11409759521484, "learning_rate": 9.928634925179018e-06, "loss": 26.365, "step": 74230 }, { "epoch": 0.14996949704464743, "grad_norm": 393.6409912109375, "learning_rate": 9.928576147322283e-06, "loss": 29.0807, "step": 74240 }, { "epoch": 0.14998969767733125, "grad_norm": 341.0413513183594, "learning_rate": 9.92851734544427e-06, "loss": 38.7935, "step": 74250 }, { "epoch": 0.15000989831001507, "grad_norm": 245.63893127441406, "learning_rate": 9.928458519545258e-06, "loss": 25.493, "step": 74260 }, { "epoch": 0.1500300989426989, "grad_norm": 377.0208435058594, "learning_rate": 9.928399669625537e-06, "loss": 20.9942, "step": 74270 }, { "epoch": 0.1500502995753827, "grad_norm": 460.97998046875, "learning_rate": 9.928340795685396e-06, "loss": 43.2093, "step": 74280 }, { "epoch": 0.1500705002080665, "grad_norm": 210.88978576660156, "learning_rate": 9.928281897725117e-06, "loss": 27.7297, "step": 74290 }, { "epoch": 0.15009070084075032, "grad_norm": 120.76721954345703, "learning_rate": 9.928222975744992e-06, "loss": 23.7316, "step": 74300 }, { "epoch": 0.15011090147343414, "grad_norm": 274.6885681152344, "learning_rate": 9.928164029745304e-06, "loss": 21.5781, "step": 74310 }, { "epoch": 0.15013110210611796, "grad_norm": 371.17608642578125, "learning_rate": 9.928105059726342e-06, "loss": 33.3037, "step": 74320 }, { "epoch": 0.15015130273880178, "grad_norm": 223.9054718017578, "learning_rate": 9.928046065688396e-06, "loss": 26.8376, "step": 74330 }, { "epoch": 0.1501715033714856, "grad_norm": 366.9564514160156, "learning_rate": 9.927987047631749e-06, "loss": 22.7162, "step": 74340 }, { "epoch": 0.15019170400416942, "grad_norm": 531.2222290039062, "learning_rate": 9.927928005556692e-06, "loss": 26.85, "step": 74350 }, { "epoch": 0.15021190463685322, "grad_norm": 267.49957275390625, "learning_rate": 9.927868939463511e-06, "loss": 15.3905, "step": 74360 }, { "epoch": 0.15023210526953704, "grad_norm": 235.9876708984375, "learning_rate": 9.927809849352496e-06, "loss": 28.0215, "step": 74370 }, { "epoch": 0.15025230590222086, "grad_norm": 311.71673583984375, "learning_rate": 9.927750735223932e-06, "loss": 28.6564, "step": 74380 }, { "epoch": 0.15027250653490468, "grad_norm": 311.93994140625, "learning_rate": 9.927691597078109e-06, "loss": 27.5603, "step": 74390 }, { "epoch": 0.1502927071675885, "grad_norm": 430.7981262207031, "learning_rate": 9.927632434915315e-06, "loss": 37.5322, "step": 74400 }, { "epoch": 0.15031290780027232, "grad_norm": 464.0959777832031, "learning_rate": 9.927573248735839e-06, "loss": 23.6514, "step": 74410 }, { "epoch": 0.1503331084329561, "grad_norm": 285.1459045410156, "learning_rate": 9.927514038539966e-06, "loss": 41.7334, "step": 74420 }, { "epoch": 0.15035330906563993, "grad_norm": 247.54722595214844, "learning_rate": 9.927454804327989e-06, "loss": 33.8532, "step": 74430 }, { "epoch": 0.15037350969832375, "grad_norm": 402.6376953125, "learning_rate": 9.927395546100195e-06, "loss": 23.4954, "step": 74440 }, { "epoch": 0.15039371033100757, "grad_norm": 221.2576141357422, "learning_rate": 9.927336263856873e-06, "loss": 18.6415, "step": 74450 }, { "epoch": 0.1504139109636914, "grad_norm": 308.38079833984375, "learning_rate": 9.92727695759831e-06, "loss": 20.7388, "step": 74460 }, { "epoch": 0.1504341115963752, "grad_norm": 98.65376281738281, "learning_rate": 9.927217627324798e-06, "loss": 21.7574, "step": 74470 }, { "epoch": 0.150454312229059, "grad_norm": 106.0455093383789, "learning_rate": 9.927158273036624e-06, "loss": 12.1189, "step": 74480 }, { "epoch": 0.15047451286174282, "grad_norm": 315.6065368652344, "learning_rate": 9.92709889473408e-06, "loss": 36.3618, "step": 74490 }, { "epoch": 0.15049471349442664, "grad_norm": 212.79962158203125, "learning_rate": 9.927039492417452e-06, "loss": 27.5968, "step": 74500 }, { "epoch": 0.15051491412711046, "grad_norm": 127.71649932861328, "learning_rate": 9.92698006608703e-06, "loss": 23.7261, "step": 74510 }, { "epoch": 0.15053511475979428, "grad_norm": 297.0060119628906, "learning_rate": 9.926920615743108e-06, "loss": 29.8254, "step": 74520 }, { "epoch": 0.1505553153924781, "grad_norm": 306.8353576660156, "learning_rate": 9.92686114138597e-06, "loss": 38.0227, "step": 74530 }, { "epoch": 0.15057551602516192, "grad_norm": 329.77325439453125, "learning_rate": 9.926801643015908e-06, "loss": 29.0942, "step": 74540 }, { "epoch": 0.1505957166578457, "grad_norm": 69.90805053710938, "learning_rate": 9.926742120633215e-06, "loss": 17.774, "step": 74550 }, { "epoch": 0.15061591729052953, "grad_norm": 273.5223388671875, "learning_rate": 9.926682574238175e-06, "loss": 29.7742, "step": 74560 }, { "epoch": 0.15063611792321335, "grad_norm": 163.78860473632812, "learning_rate": 9.926623003831085e-06, "loss": 8.4, "step": 74570 }, { "epoch": 0.15065631855589717, "grad_norm": 384.5724792480469, "learning_rate": 9.92656340941223e-06, "loss": 46.8532, "step": 74580 }, { "epoch": 0.150676519188581, "grad_norm": 150.9720001220703, "learning_rate": 9.926503790981903e-06, "loss": 17.5495, "step": 74590 }, { "epoch": 0.1506967198212648, "grad_norm": 545.6251220703125, "learning_rate": 9.926444148540394e-06, "loss": 19.148, "step": 74600 }, { "epoch": 0.1507169204539486, "grad_norm": 568.3290405273438, "learning_rate": 9.926384482087994e-06, "loss": 19.6122, "step": 74610 }, { "epoch": 0.15073712108663243, "grad_norm": 204.19741821289062, "learning_rate": 9.926324791624993e-06, "loss": 16.3809, "step": 74620 }, { "epoch": 0.15075732171931625, "grad_norm": 239.31793212890625, "learning_rate": 9.926265077151682e-06, "loss": 11.6309, "step": 74630 }, { "epoch": 0.15077752235200007, "grad_norm": 211.80018615722656, "learning_rate": 9.926205338668353e-06, "loss": 18.0973, "step": 74640 }, { "epoch": 0.15079772298468389, "grad_norm": 251.9052276611328, "learning_rate": 9.926145576175297e-06, "loss": 38.5932, "step": 74650 }, { "epoch": 0.1508179236173677, "grad_norm": 282.91668701171875, "learning_rate": 9.926085789672806e-06, "loss": 28.4947, "step": 74660 }, { "epoch": 0.15083812425005153, "grad_norm": 344.56005859375, "learning_rate": 9.926025979161169e-06, "loss": 16.2855, "step": 74670 }, { "epoch": 0.15085832488273532, "grad_norm": 246.29403686523438, "learning_rate": 9.925966144640677e-06, "loss": 17.0469, "step": 74680 }, { "epoch": 0.15087852551541914, "grad_norm": 146.0758514404297, "learning_rate": 9.925906286111627e-06, "loss": 12.3357, "step": 74690 }, { "epoch": 0.15089872614810296, "grad_norm": 266.17755126953125, "learning_rate": 9.925846403574306e-06, "loss": 29.2464, "step": 74700 }, { "epoch": 0.15091892678078678, "grad_norm": 225.78302001953125, "learning_rate": 9.925786497029007e-06, "loss": 14.1822, "step": 74710 }, { "epoch": 0.1509391274134706, "grad_norm": 225.07122802734375, "learning_rate": 9.925726566476021e-06, "loss": 27.4273, "step": 74720 }, { "epoch": 0.15095932804615442, "grad_norm": 235.81561279296875, "learning_rate": 9.925666611915642e-06, "loss": 38.433, "step": 74730 }, { "epoch": 0.1509795286788382, "grad_norm": 212.3058319091797, "learning_rate": 9.925606633348161e-06, "loss": 13.0633, "step": 74740 }, { "epoch": 0.15099972931152203, "grad_norm": 208.72142028808594, "learning_rate": 9.92554663077387e-06, "loss": 18.7167, "step": 74750 }, { "epoch": 0.15101992994420585, "grad_norm": 1078.463623046875, "learning_rate": 9.925486604193064e-06, "loss": 33.9012, "step": 74760 }, { "epoch": 0.15104013057688967, "grad_norm": 293.7212829589844, "learning_rate": 9.925426553606033e-06, "loss": 22.0213, "step": 74770 }, { "epoch": 0.1510603312095735, "grad_norm": 307.2977600097656, "learning_rate": 9.92536647901307e-06, "loss": 20.4276, "step": 74780 }, { "epoch": 0.1510805318422573, "grad_norm": 203.576171875, "learning_rate": 9.925306380414468e-06, "loss": 23.3435, "step": 74790 }, { "epoch": 0.1511007324749411, "grad_norm": 497.7390441894531, "learning_rate": 9.925246257810519e-06, "loss": 26.5068, "step": 74800 }, { "epoch": 0.15112093310762492, "grad_norm": 273.7267150878906, "learning_rate": 9.925186111201519e-06, "loss": 30.1445, "step": 74810 }, { "epoch": 0.15114113374030874, "grad_norm": 89.37116241455078, "learning_rate": 9.92512594058776e-06, "loss": 18.6597, "step": 74820 }, { "epoch": 0.15116133437299256, "grad_norm": 293.5412902832031, "learning_rate": 9.925065745969531e-06, "loss": 42.4702, "step": 74830 }, { "epoch": 0.15118153500567638, "grad_norm": 374.43939208984375, "learning_rate": 9.925005527347132e-06, "loss": 23.5106, "step": 74840 }, { "epoch": 0.1512017356383602, "grad_norm": 105.58113861083984, "learning_rate": 9.924945284720852e-06, "loss": 14.3909, "step": 74850 }, { "epoch": 0.15122193627104402, "grad_norm": 292.2124938964844, "learning_rate": 9.924885018090987e-06, "loss": 16.8359, "step": 74860 }, { "epoch": 0.15124213690372781, "grad_norm": 440.976806640625, "learning_rate": 9.924824727457829e-06, "loss": 15.9947, "step": 74870 }, { "epoch": 0.15126233753641163, "grad_norm": 315.2858581542969, "learning_rate": 9.924764412821673e-06, "loss": 27.3101, "step": 74880 }, { "epoch": 0.15128253816909545, "grad_norm": 334.018798828125, "learning_rate": 9.924704074182811e-06, "loss": 34.6423, "step": 74890 }, { "epoch": 0.15130273880177927, "grad_norm": 227.99505615234375, "learning_rate": 9.92464371154154e-06, "loss": 9.3259, "step": 74900 }, { "epoch": 0.1513229394344631, "grad_norm": 362.9433288574219, "learning_rate": 9.924583324898152e-06, "loss": 27.6404, "step": 74910 }, { "epoch": 0.15134314006714691, "grad_norm": 590.1314697265625, "learning_rate": 9.924522914252943e-06, "loss": 39.5389, "step": 74920 }, { "epoch": 0.1513633406998307, "grad_norm": 161.0101776123047, "learning_rate": 9.924462479606207e-06, "loss": 21.389, "step": 74930 }, { "epoch": 0.15138354133251453, "grad_norm": 238.23193359375, "learning_rate": 9.924402020958238e-06, "loss": 16.4229, "step": 74940 }, { "epoch": 0.15140374196519835, "grad_norm": 166.070068359375, "learning_rate": 9.92434153830933e-06, "loss": 12.0039, "step": 74950 }, { "epoch": 0.15142394259788217, "grad_norm": 208.85397338867188, "learning_rate": 9.92428103165978e-06, "loss": 23.5889, "step": 74960 }, { "epoch": 0.151444143230566, "grad_norm": 178.75547790527344, "learning_rate": 9.92422050100988e-06, "loss": 20.778, "step": 74970 }, { "epoch": 0.1514643438632498, "grad_norm": 104.81141662597656, "learning_rate": 9.924159946359927e-06, "loss": 25.2922, "step": 74980 }, { "epoch": 0.15148454449593363, "grad_norm": 230.0667724609375, "learning_rate": 9.924099367710215e-06, "loss": 27.1245, "step": 74990 }, { "epoch": 0.15150474512861742, "grad_norm": 193.9864501953125, "learning_rate": 9.924038765061042e-06, "loss": 38.5879, "step": 75000 }, { "epoch": 0.15152494576130124, "grad_norm": 234.8813934326172, "learning_rate": 9.923978138412698e-06, "loss": 17.9905, "step": 75010 }, { "epoch": 0.15154514639398506, "grad_norm": 334.52264404296875, "learning_rate": 9.923917487765484e-06, "loss": 25.079, "step": 75020 }, { "epoch": 0.15156534702666888, "grad_norm": 246.24508666992188, "learning_rate": 9.923856813119694e-06, "loss": 24.4485, "step": 75030 }, { "epoch": 0.1515855476593527, "grad_norm": 212.68238830566406, "learning_rate": 9.92379611447562e-06, "loss": 27.9421, "step": 75040 }, { "epoch": 0.15160574829203652, "grad_norm": 268.9492492675781, "learning_rate": 9.923735391833564e-06, "loss": 28.6172, "step": 75050 }, { "epoch": 0.1516259489247203, "grad_norm": 402.49176025390625, "learning_rate": 9.923674645193819e-06, "loss": 22.5723, "step": 75060 }, { "epoch": 0.15164614955740413, "grad_norm": 251.81884765625, "learning_rate": 9.92361387455668e-06, "loss": 29.6918, "step": 75070 }, { "epoch": 0.15166635019008795, "grad_norm": 203.9662628173828, "learning_rate": 9.923553079922443e-06, "loss": 16.3817, "step": 75080 }, { "epoch": 0.15168655082277177, "grad_norm": 235.32337951660156, "learning_rate": 9.923492261291406e-06, "loss": 16.204, "step": 75090 }, { "epoch": 0.1517067514554556, "grad_norm": 522.1925048828125, "learning_rate": 9.923431418663866e-06, "loss": 30.4098, "step": 75100 }, { "epoch": 0.1517269520881394, "grad_norm": 97.13179016113281, "learning_rate": 9.923370552040117e-06, "loss": 19.5536, "step": 75110 }, { "epoch": 0.1517471527208232, "grad_norm": 342.87384033203125, "learning_rate": 9.923309661420458e-06, "loss": 30.3985, "step": 75120 }, { "epoch": 0.15176735335350702, "grad_norm": 240.81214904785156, "learning_rate": 9.923248746805185e-06, "loss": 17.5904, "step": 75130 }, { "epoch": 0.15178755398619084, "grad_norm": 254.4801025390625, "learning_rate": 9.923187808194594e-06, "loss": 35.5812, "step": 75140 }, { "epoch": 0.15180775461887466, "grad_norm": 543.9935913085938, "learning_rate": 9.923126845588982e-06, "loss": 26.7793, "step": 75150 }, { "epoch": 0.15182795525155848, "grad_norm": 24.202945709228516, "learning_rate": 9.92306585898865e-06, "loss": 16.6117, "step": 75160 }, { "epoch": 0.1518481558842423, "grad_norm": 84.58307647705078, "learning_rate": 9.92300484839389e-06, "loss": 16.8216, "step": 75170 }, { "epoch": 0.15186835651692612, "grad_norm": 442.3559875488281, "learning_rate": 9.922943813805e-06, "loss": 16.7459, "step": 75180 }, { "epoch": 0.15188855714960992, "grad_norm": 343.6642150878906, "learning_rate": 9.92288275522228e-06, "loss": 22.2921, "step": 75190 }, { "epoch": 0.15190875778229374, "grad_norm": 0.0, "learning_rate": 9.922821672646028e-06, "loss": 19.539, "step": 75200 }, { "epoch": 0.15192895841497756, "grad_norm": 314.22100830078125, "learning_rate": 9.922760566076538e-06, "loss": 13.9953, "step": 75210 }, { "epoch": 0.15194915904766138, "grad_norm": 400.48968505859375, "learning_rate": 9.922699435514112e-06, "loss": 19.8686, "step": 75220 }, { "epoch": 0.1519693596803452, "grad_norm": 636.5394287109375, "learning_rate": 9.922638280959044e-06, "loss": 29.4964, "step": 75230 }, { "epoch": 0.15198956031302902, "grad_norm": 361.6471252441406, "learning_rate": 9.922577102411638e-06, "loss": 15.6522, "step": 75240 }, { "epoch": 0.1520097609457128, "grad_norm": 652.4127807617188, "learning_rate": 9.922515899872184e-06, "loss": 21.9327, "step": 75250 }, { "epoch": 0.15202996157839663, "grad_norm": 481.90777587890625, "learning_rate": 9.922454673340987e-06, "loss": 39.6445, "step": 75260 }, { "epoch": 0.15205016221108045, "grad_norm": 271.26922607421875, "learning_rate": 9.922393422818342e-06, "loss": 22.166, "step": 75270 }, { "epoch": 0.15207036284376427, "grad_norm": 285.2798156738281, "learning_rate": 9.922332148304548e-06, "loss": 35.8855, "step": 75280 }, { "epoch": 0.1520905634764481, "grad_norm": 333.0103454589844, "learning_rate": 9.922270849799903e-06, "loss": 28.3611, "step": 75290 }, { "epoch": 0.1521107641091319, "grad_norm": 383.8343811035156, "learning_rate": 9.922209527304709e-06, "loss": 19.3632, "step": 75300 }, { "epoch": 0.15213096474181573, "grad_norm": 263.3747863769531, "learning_rate": 9.922148180819261e-06, "loss": 12.8317, "step": 75310 }, { "epoch": 0.15215116537449952, "grad_norm": 398.2802429199219, "learning_rate": 9.922086810343862e-06, "loss": 31.1158, "step": 75320 }, { "epoch": 0.15217136600718334, "grad_norm": 752.3159790039062, "learning_rate": 9.922025415878809e-06, "loss": 19.2766, "step": 75330 }, { "epoch": 0.15219156663986716, "grad_norm": 166.7379913330078, "learning_rate": 9.9219639974244e-06, "loss": 24.6321, "step": 75340 }, { "epoch": 0.15221176727255098, "grad_norm": 542.4790649414062, "learning_rate": 9.921902554980935e-06, "loss": 30.3907, "step": 75350 }, { "epoch": 0.1522319679052348, "grad_norm": 178.31329345703125, "learning_rate": 9.921841088548713e-06, "loss": 27.5856, "step": 75360 }, { "epoch": 0.15225216853791862, "grad_norm": 428.7532653808594, "learning_rate": 9.921779598128036e-06, "loss": 15.8402, "step": 75370 }, { "epoch": 0.1522723691706024, "grad_norm": 341.6441650390625, "learning_rate": 9.921718083719203e-06, "loss": 16.9623, "step": 75380 }, { "epoch": 0.15229256980328623, "grad_norm": 594.728515625, "learning_rate": 9.921656545322512e-06, "loss": 29.7716, "step": 75390 }, { "epoch": 0.15231277043597005, "grad_norm": 241.04052734375, "learning_rate": 9.921594982938262e-06, "loss": 22.2931, "step": 75400 }, { "epoch": 0.15233297106865387, "grad_norm": 367.4365234375, "learning_rate": 9.921533396566758e-06, "loss": 22.3911, "step": 75410 }, { "epoch": 0.1523531717013377, "grad_norm": 137.57421875, "learning_rate": 9.921471786208296e-06, "loss": 21.1017, "step": 75420 }, { "epoch": 0.1523733723340215, "grad_norm": 360.8106689453125, "learning_rate": 9.921410151863177e-06, "loss": 27.1866, "step": 75430 }, { "epoch": 0.1523935729667053, "grad_norm": 262.8116455078125, "learning_rate": 9.921348493531701e-06, "loss": 23.1394, "step": 75440 }, { "epoch": 0.15241377359938912, "grad_norm": 177.4680633544922, "learning_rate": 9.921286811214173e-06, "loss": 11.5607, "step": 75450 }, { "epoch": 0.15243397423207294, "grad_norm": 271.998291015625, "learning_rate": 9.921225104910886e-06, "loss": 13.7471, "step": 75460 }, { "epoch": 0.15245417486475676, "grad_norm": 87.26866149902344, "learning_rate": 9.921163374622147e-06, "loss": 12.0394, "step": 75470 }, { "epoch": 0.15247437549744058, "grad_norm": 378.51947021484375, "learning_rate": 9.921101620348252e-06, "loss": 31.1476, "step": 75480 }, { "epoch": 0.1524945761301244, "grad_norm": 313.8820495605469, "learning_rate": 9.921039842089508e-06, "loss": 28.8252, "step": 75490 }, { "epoch": 0.15251477676280822, "grad_norm": 231.86331176757812, "learning_rate": 9.92097803984621e-06, "loss": 10.2794, "step": 75500 }, { "epoch": 0.15253497739549202, "grad_norm": 358.7131042480469, "learning_rate": 9.920916213618664e-06, "loss": 25.3487, "step": 75510 }, { "epoch": 0.15255517802817584, "grad_norm": 311.32659912109375, "learning_rate": 9.920854363407168e-06, "loss": 34.5399, "step": 75520 }, { "epoch": 0.15257537866085966, "grad_norm": 300.0164794921875, "learning_rate": 9.920792489212023e-06, "loss": 31.6782, "step": 75530 }, { "epoch": 0.15259557929354348, "grad_norm": 160.0378875732422, "learning_rate": 9.920730591033534e-06, "loss": 30.3758, "step": 75540 }, { "epoch": 0.1526157799262273, "grad_norm": 103.84625244140625, "learning_rate": 9.920668668872002e-06, "loss": 20.8754, "step": 75550 }, { "epoch": 0.15263598055891112, "grad_norm": 228.58480834960938, "learning_rate": 9.920606722727726e-06, "loss": 22.4296, "step": 75560 }, { "epoch": 0.1526561811915949, "grad_norm": 382.6224365234375, "learning_rate": 9.920544752601011e-06, "loss": 26.4538, "step": 75570 }, { "epoch": 0.15267638182427873, "grad_norm": 222.89279174804688, "learning_rate": 9.920482758492156e-06, "loss": 20.0068, "step": 75580 }, { "epoch": 0.15269658245696255, "grad_norm": 179.0259552001953, "learning_rate": 9.920420740401466e-06, "loss": 19.4062, "step": 75590 }, { "epoch": 0.15271678308964637, "grad_norm": 404.4815368652344, "learning_rate": 9.920358698329242e-06, "loss": 16.4118, "step": 75600 }, { "epoch": 0.1527369837223302, "grad_norm": 125.50210571289062, "learning_rate": 9.920296632275785e-06, "loss": 19.4025, "step": 75610 }, { "epoch": 0.152757184355014, "grad_norm": 262.7456970214844, "learning_rate": 9.9202345422414e-06, "loss": 31.4975, "step": 75620 }, { "epoch": 0.15277738498769783, "grad_norm": 38.31513977050781, "learning_rate": 9.92017242822639e-06, "loss": 14.5561, "step": 75630 }, { "epoch": 0.15279758562038162, "grad_norm": 460.3357849121094, "learning_rate": 9.920110290231056e-06, "loss": 43.6104, "step": 75640 }, { "epoch": 0.15281778625306544, "grad_norm": 250.47462463378906, "learning_rate": 9.920048128255699e-06, "loss": 28.487, "step": 75650 }, { "epoch": 0.15283798688574926, "grad_norm": 786.2476806640625, "learning_rate": 9.919985942300625e-06, "loss": 28.5765, "step": 75660 }, { "epoch": 0.15285818751843308, "grad_norm": 77.79499816894531, "learning_rate": 9.919923732366137e-06, "loss": 37.5105, "step": 75670 }, { "epoch": 0.1528783881511169, "grad_norm": 346.8711853027344, "learning_rate": 9.919861498452538e-06, "loss": 20.6156, "step": 75680 }, { "epoch": 0.15289858878380072, "grad_norm": 167.84336853027344, "learning_rate": 9.91979924056013e-06, "loss": 24.4978, "step": 75690 }, { "epoch": 0.1529187894164845, "grad_norm": 810.560546875, "learning_rate": 9.919736958689216e-06, "loss": 35.5779, "step": 75700 }, { "epoch": 0.15293899004916833, "grad_norm": 358.73712158203125, "learning_rate": 9.919674652840103e-06, "loss": 25.5562, "step": 75710 }, { "epoch": 0.15295919068185215, "grad_norm": 171.64158630371094, "learning_rate": 9.91961232301309e-06, "loss": 14.3432, "step": 75720 }, { "epoch": 0.15297939131453597, "grad_norm": 244.98538208007812, "learning_rate": 9.919549969208486e-06, "loss": 25.7399, "step": 75730 }, { "epoch": 0.1529995919472198, "grad_norm": 245.8119354248047, "learning_rate": 9.919487591426591e-06, "loss": 17.0358, "step": 75740 }, { "epoch": 0.1530197925799036, "grad_norm": 320.4654846191406, "learning_rate": 9.91942518966771e-06, "loss": 35.6943, "step": 75750 }, { "epoch": 0.1530399932125874, "grad_norm": 260.30511474609375, "learning_rate": 9.919362763932145e-06, "loss": 28.6758, "step": 75760 }, { "epoch": 0.15306019384527123, "grad_norm": 463.55792236328125, "learning_rate": 9.919300314220206e-06, "loss": 23.7819, "step": 75770 }, { "epoch": 0.15308039447795505, "grad_norm": 412.503173828125, "learning_rate": 9.919237840532192e-06, "loss": 19.5172, "step": 75780 }, { "epoch": 0.15310059511063887, "grad_norm": 280.8273620605469, "learning_rate": 9.91917534286841e-06, "loss": 14.6161, "step": 75790 }, { "epoch": 0.15312079574332269, "grad_norm": 411.6118469238281, "learning_rate": 9.919112821229165e-06, "loss": 16.5466, "step": 75800 }, { "epoch": 0.1531409963760065, "grad_norm": 129.98182678222656, "learning_rate": 9.91905027561476e-06, "loss": 32.632, "step": 75810 }, { "epoch": 0.15316119700869033, "grad_norm": 57.28913116455078, "learning_rate": 9.918987706025498e-06, "loss": 14.7234, "step": 75820 }, { "epoch": 0.15318139764137412, "grad_norm": 511.5194091796875, "learning_rate": 9.918925112461688e-06, "loss": 21.8461, "step": 75830 }, { "epoch": 0.15320159827405794, "grad_norm": 75.726318359375, "learning_rate": 9.918862494923635e-06, "loss": 18.2241, "step": 75840 }, { "epoch": 0.15322179890674176, "grad_norm": 370.091552734375, "learning_rate": 9.918799853411642e-06, "loss": 21.3902, "step": 75850 }, { "epoch": 0.15324199953942558, "grad_norm": 215.97994995117188, "learning_rate": 9.918737187926014e-06, "loss": 35.4219, "step": 75860 }, { "epoch": 0.1532622001721094, "grad_norm": 318.54827880859375, "learning_rate": 9.91867449846706e-06, "loss": 16.8096, "step": 75870 }, { "epoch": 0.15328240080479322, "grad_norm": 331.7506103515625, "learning_rate": 9.91861178503508e-06, "loss": 40.2114, "step": 75880 }, { "epoch": 0.153302601437477, "grad_norm": 573.4930419921875, "learning_rate": 9.918549047630386e-06, "loss": 24.252, "step": 75890 }, { "epoch": 0.15332280207016083, "grad_norm": 303.61065673828125, "learning_rate": 9.918486286253279e-06, "loss": 19.3571, "step": 75900 }, { "epoch": 0.15334300270284465, "grad_norm": 82.65367126464844, "learning_rate": 9.918423500904066e-06, "loss": 30.9111, "step": 75910 }, { "epoch": 0.15336320333552847, "grad_norm": 175.0387725830078, "learning_rate": 9.918360691583056e-06, "loss": 15.5602, "step": 75920 }, { "epoch": 0.1533834039682123, "grad_norm": 239.22735595703125, "learning_rate": 9.918297858290548e-06, "loss": 20.5608, "step": 75930 }, { "epoch": 0.1534036046008961, "grad_norm": 109.45496368408203, "learning_rate": 9.918235001026856e-06, "loss": 40.0477, "step": 75940 }, { "epoch": 0.1534238052335799, "grad_norm": 337.3406982421875, "learning_rate": 9.918172119792283e-06, "loss": 29.2329, "step": 75950 }, { "epoch": 0.15344400586626372, "grad_norm": 294.4963073730469, "learning_rate": 9.918109214587134e-06, "loss": 19.8144, "step": 75960 }, { "epoch": 0.15346420649894754, "grad_norm": 189.7427215576172, "learning_rate": 9.918046285411717e-06, "loss": 25.3137, "step": 75970 }, { "epoch": 0.15348440713163136, "grad_norm": 173.51768493652344, "learning_rate": 9.917983332266342e-06, "loss": 28.8269, "step": 75980 }, { "epoch": 0.15350460776431518, "grad_norm": 118.64507293701172, "learning_rate": 9.91792035515131e-06, "loss": 15.0698, "step": 75990 }, { "epoch": 0.153524808396999, "grad_norm": 0.0, "learning_rate": 9.91785735406693e-06, "loss": 7.9113, "step": 76000 }, { "epoch": 0.15354500902968282, "grad_norm": 128.10076904296875, "learning_rate": 9.917794329013511e-06, "loss": 24.2705, "step": 76010 }, { "epoch": 0.15356520966236661, "grad_norm": 296.3288879394531, "learning_rate": 9.917731279991358e-06, "loss": 25.7816, "step": 76020 }, { "epoch": 0.15358541029505043, "grad_norm": 85.29432678222656, "learning_rate": 9.91766820700078e-06, "loss": 31.5856, "step": 76030 }, { "epoch": 0.15360561092773425, "grad_norm": 112.39793395996094, "learning_rate": 9.917605110042084e-06, "loss": 18.9189, "step": 76040 }, { "epoch": 0.15362581156041807, "grad_norm": 130.1544952392578, "learning_rate": 9.917541989115579e-06, "loss": 25.6442, "step": 76050 }, { "epoch": 0.1536460121931019, "grad_norm": 315.6504211425781, "learning_rate": 9.917478844221566e-06, "loss": 17.0859, "step": 76060 }, { "epoch": 0.15366621282578571, "grad_norm": 300.9882507324219, "learning_rate": 9.91741567536036e-06, "loss": 10.3314, "step": 76070 }, { "epoch": 0.1536864134584695, "grad_norm": 380.73968505859375, "learning_rate": 9.917352482532267e-06, "loss": 31.1713, "step": 76080 }, { "epoch": 0.15370661409115333, "grad_norm": 103.53414154052734, "learning_rate": 9.917289265737594e-06, "loss": 27.6559, "step": 76090 }, { "epoch": 0.15372681472383715, "grad_norm": 87.87761688232422, "learning_rate": 9.91722602497665e-06, "loss": 14.5372, "step": 76100 }, { "epoch": 0.15374701535652097, "grad_norm": 211.5806121826172, "learning_rate": 9.917162760249741e-06, "loss": 16.9959, "step": 76110 }, { "epoch": 0.1537672159892048, "grad_norm": 390.87841796875, "learning_rate": 9.91709947155718e-06, "loss": 47.205, "step": 76120 }, { "epoch": 0.1537874166218886, "grad_norm": 264.104248046875, "learning_rate": 9.91703615889927e-06, "loss": 14.1096, "step": 76130 }, { "epoch": 0.15380761725457243, "grad_norm": 0.0, "learning_rate": 9.916972822276322e-06, "loss": 16.0041, "step": 76140 }, { "epoch": 0.15382781788725622, "grad_norm": 405.2744140625, "learning_rate": 9.916909461688646e-06, "loss": 30.3848, "step": 76150 }, { "epoch": 0.15384801851994004, "grad_norm": 240.55393981933594, "learning_rate": 9.916846077136548e-06, "loss": 24.3566, "step": 76160 }, { "epoch": 0.15386821915262386, "grad_norm": 545.998291015625, "learning_rate": 9.916782668620341e-06, "loss": 26.9343, "step": 76170 }, { "epoch": 0.15388841978530768, "grad_norm": 1.5668977499008179, "learning_rate": 9.91671923614033e-06, "loss": 21.3602, "step": 76180 }, { "epoch": 0.1539086204179915, "grad_norm": 603.1826782226562, "learning_rate": 9.916655779696826e-06, "loss": 32.0043, "step": 76190 }, { "epoch": 0.15392882105067532, "grad_norm": 307.4775695800781, "learning_rate": 9.91659229929014e-06, "loss": 27.6179, "step": 76200 }, { "epoch": 0.1539490216833591, "grad_norm": 358.2984924316406, "learning_rate": 9.916528794920577e-06, "loss": 21.5979, "step": 76210 }, { "epoch": 0.15396922231604293, "grad_norm": 348.7200622558594, "learning_rate": 9.916465266588448e-06, "loss": 25.3998, "step": 76220 }, { "epoch": 0.15398942294872675, "grad_norm": 363.2261657714844, "learning_rate": 9.916401714294067e-06, "loss": 34.6779, "step": 76230 }, { "epoch": 0.15400962358141057, "grad_norm": 306.2249755859375, "learning_rate": 9.916338138037738e-06, "loss": 28.5127, "step": 76240 }, { "epoch": 0.1540298242140944, "grad_norm": 145.91131591796875, "learning_rate": 9.916274537819774e-06, "loss": 25.1777, "step": 76250 }, { "epoch": 0.1540500248467782, "grad_norm": 506.87493896484375, "learning_rate": 9.916210913640483e-06, "loss": 21.9147, "step": 76260 }, { "epoch": 0.154070225479462, "grad_norm": 459.17181396484375, "learning_rate": 9.916147265500179e-06, "loss": 35.8954, "step": 76270 }, { "epoch": 0.15409042611214582, "grad_norm": 156.5463104248047, "learning_rate": 9.916083593399167e-06, "loss": 18.336, "step": 76280 }, { "epoch": 0.15411062674482964, "grad_norm": 266.3895568847656, "learning_rate": 9.916019897337761e-06, "loss": 35.1233, "step": 76290 }, { "epoch": 0.15413082737751346, "grad_norm": 141.76657104492188, "learning_rate": 9.915956177316269e-06, "loss": 10.5874, "step": 76300 }, { "epoch": 0.15415102801019728, "grad_norm": 79.8198471069336, "learning_rate": 9.915892433335004e-06, "loss": 20.1494, "step": 76310 }, { "epoch": 0.1541712286428811, "grad_norm": 147.40185546875, "learning_rate": 9.915828665394274e-06, "loss": 27.0375, "step": 76320 }, { "epoch": 0.15419142927556492, "grad_norm": 844.73876953125, "learning_rate": 9.915764873494393e-06, "loss": 34.3195, "step": 76330 }, { "epoch": 0.15421162990824872, "grad_norm": 197.09841918945312, "learning_rate": 9.915701057635669e-06, "loss": 18.7705, "step": 76340 }, { "epoch": 0.15423183054093254, "grad_norm": 489.9423522949219, "learning_rate": 9.915637217818415e-06, "loss": 14.7817, "step": 76350 }, { "epoch": 0.15425203117361636, "grad_norm": 247.25758361816406, "learning_rate": 9.915573354042943e-06, "loss": 25.1321, "step": 76360 }, { "epoch": 0.15427223180630018, "grad_norm": 361.0143127441406, "learning_rate": 9.91550946630956e-06, "loss": 19.1792, "step": 76370 }, { "epoch": 0.154292432438984, "grad_norm": 352.0470886230469, "learning_rate": 9.915445554618581e-06, "loss": 23.0438, "step": 76380 }, { "epoch": 0.15431263307166782, "grad_norm": 177.42503356933594, "learning_rate": 9.915381618970317e-06, "loss": 16.7805, "step": 76390 }, { "epoch": 0.1543328337043516, "grad_norm": 96.308349609375, "learning_rate": 9.915317659365078e-06, "loss": 16.6877, "step": 76400 }, { "epoch": 0.15435303433703543, "grad_norm": 991.4146728515625, "learning_rate": 9.915253675803178e-06, "loss": 27.3158, "step": 76410 }, { "epoch": 0.15437323496971925, "grad_norm": 115.77900695800781, "learning_rate": 9.915189668284927e-06, "loss": 20.8895, "step": 76420 }, { "epoch": 0.15439343560240307, "grad_norm": 118.19866943359375, "learning_rate": 9.915125636810638e-06, "loss": 28.0331, "step": 76430 }, { "epoch": 0.1544136362350869, "grad_norm": 228.486328125, "learning_rate": 9.915061581380622e-06, "loss": 24.0501, "step": 76440 }, { "epoch": 0.1544338368677707, "grad_norm": 335.5968933105469, "learning_rate": 9.914997501995193e-06, "loss": 27.8743, "step": 76450 }, { "epoch": 0.15445403750045453, "grad_norm": 258.3746032714844, "learning_rate": 9.914933398654663e-06, "loss": 30.5555, "step": 76460 }, { "epoch": 0.15447423813313832, "grad_norm": 1432.78515625, "learning_rate": 9.914869271359342e-06, "loss": 51.5473, "step": 76470 }, { "epoch": 0.15449443876582214, "grad_norm": 617.3510131835938, "learning_rate": 9.914805120109545e-06, "loss": 28.9139, "step": 76480 }, { "epoch": 0.15451463939850596, "grad_norm": 202.69711303710938, "learning_rate": 9.914740944905585e-06, "loss": 27.5848, "step": 76490 }, { "epoch": 0.15453484003118978, "grad_norm": 363.6729736328125, "learning_rate": 9.914676745747772e-06, "loss": 50.8086, "step": 76500 }, { "epoch": 0.1545550406638736, "grad_norm": 492.5381164550781, "learning_rate": 9.914612522636423e-06, "loss": 25.8733, "step": 76510 }, { "epoch": 0.15457524129655742, "grad_norm": 378.327880859375, "learning_rate": 9.914548275571845e-06, "loss": 30.5789, "step": 76520 }, { "epoch": 0.1545954419292412, "grad_norm": 483.16473388671875, "learning_rate": 9.914484004554356e-06, "loss": 31.8337, "step": 76530 }, { "epoch": 0.15461564256192503, "grad_norm": 306.7428283691406, "learning_rate": 9.91441970958427e-06, "loss": 31.724, "step": 76540 }, { "epoch": 0.15463584319460885, "grad_norm": 425.106201171875, "learning_rate": 9.914355390661897e-06, "loss": 23.3722, "step": 76550 }, { "epoch": 0.15465604382729267, "grad_norm": 145.69932556152344, "learning_rate": 9.914291047787552e-06, "loss": 32.2535, "step": 76560 }, { "epoch": 0.1546762444599765, "grad_norm": 165.27816772460938, "learning_rate": 9.914226680961549e-06, "loss": 32.6985, "step": 76570 }, { "epoch": 0.1546964450926603, "grad_norm": 404.2888488769531, "learning_rate": 9.9141622901842e-06, "loss": 21.5119, "step": 76580 }, { "epoch": 0.1547166457253441, "grad_norm": 307.5506896972656, "learning_rate": 9.914097875455821e-06, "loss": 29.8998, "step": 76590 }, { "epoch": 0.15473684635802792, "grad_norm": 327.2987365722656, "learning_rate": 9.914033436776724e-06, "loss": 19.3247, "step": 76600 }, { "epoch": 0.15475704699071174, "grad_norm": 392.3492126464844, "learning_rate": 9.913968974147225e-06, "loss": 13.9422, "step": 76610 }, { "epoch": 0.15477724762339556, "grad_norm": 788.29638671875, "learning_rate": 9.913904487567636e-06, "loss": 32.0831, "step": 76620 }, { "epoch": 0.15479744825607938, "grad_norm": 499.4055480957031, "learning_rate": 9.913839977038274e-06, "loss": 34.1278, "step": 76630 }, { "epoch": 0.1548176488887632, "grad_norm": 128.5001678466797, "learning_rate": 9.913775442559451e-06, "loss": 42.8461, "step": 76640 }, { "epoch": 0.15483784952144702, "grad_norm": 292.69769287109375, "learning_rate": 9.913710884131483e-06, "loss": 22.958, "step": 76650 }, { "epoch": 0.15485805015413082, "grad_norm": 674.0772705078125, "learning_rate": 9.913646301754685e-06, "loss": 21.6926, "step": 76660 }, { "epoch": 0.15487825078681464, "grad_norm": 20.876062393188477, "learning_rate": 9.913581695429368e-06, "loss": 25.4622, "step": 76670 }, { "epoch": 0.15489845141949846, "grad_norm": 204.44215393066406, "learning_rate": 9.913517065155852e-06, "loss": 20.0481, "step": 76680 }, { "epoch": 0.15491865205218228, "grad_norm": 220.14602661132812, "learning_rate": 9.91345241093445e-06, "loss": 17.2742, "step": 76690 }, { "epoch": 0.1549388526848661, "grad_norm": 677.0068969726562, "learning_rate": 9.913387732765475e-06, "loss": 19.4264, "step": 76700 }, { "epoch": 0.15495905331754992, "grad_norm": 247.87020874023438, "learning_rate": 9.913323030649247e-06, "loss": 24.6661, "step": 76710 }, { "epoch": 0.1549792539502337, "grad_norm": 120.063232421875, "learning_rate": 9.913258304586076e-06, "loss": 17.9531, "step": 76720 }, { "epoch": 0.15499945458291753, "grad_norm": 193.60769653320312, "learning_rate": 9.91319355457628e-06, "loss": 20.1889, "step": 76730 }, { "epoch": 0.15501965521560135, "grad_norm": 274.03924560546875, "learning_rate": 9.913128780620175e-06, "loss": 18.4391, "step": 76740 }, { "epoch": 0.15503985584828517, "grad_norm": 50.97987365722656, "learning_rate": 9.913063982718076e-06, "loss": 34.9578, "step": 76750 }, { "epoch": 0.155060056480969, "grad_norm": 363.1507263183594, "learning_rate": 9.9129991608703e-06, "loss": 23.1078, "step": 76760 }, { "epoch": 0.1550802571136528, "grad_norm": 67.11571502685547, "learning_rate": 9.912934315077162e-06, "loss": 23.9468, "step": 76770 }, { "epoch": 0.15510045774633663, "grad_norm": 446.4045104980469, "learning_rate": 9.912869445338978e-06, "loss": 25.5677, "step": 76780 }, { "epoch": 0.15512065837902042, "grad_norm": 287.01287841796875, "learning_rate": 9.912804551656064e-06, "loss": 25.1273, "step": 76790 }, { "epoch": 0.15514085901170424, "grad_norm": 170.6279754638672, "learning_rate": 9.912739634028734e-06, "loss": 30.2837, "step": 76800 }, { "epoch": 0.15516105964438806, "grad_norm": 613.9049072265625, "learning_rate": 9.91267469245731e-06, "loss": 40.3323, "step": 76810 }, { "epoch": 0.15518126027707188, "grad_norm": 242.52334594726562, "learning_rate": 9.912609726942104e-06, "loss": 75.5536, "step": 76820 }, { "epoch": 0.1552014609097557, "grad_norm": 351.3273010253906, "learning_rate": 9.912544737483434e-06, "loss": 16.2963, "step": 76830 }, { "epoch": 0.15522166154243952, "grad_norm": 398.487060546875, "learning_rate": 9.912479724081617e-06, "loss": 24.543, "step": 76840 }, { "epoch": 0.1552418621751233, "grad_norm": 245.7581787109375, "learning_rate": 9.912414686736971e-06, "loss": 23.1903, "step": 76850 }, { "epoch": 0.15526206280780713, "grad_norm": 775.232421875, "learning_rate": 9.912349625449808e-06, "loss": 31.1511, "step": 76860 }, { "epoch": 0.15528226344049095, "grad_norm": 580.8097534179688, "learning_rate": 9.912284540220452e-06, "loss": 43.4463, "step": 76870 }, { "epoch": 0.15530246407317477, "grad_norm": 261.2633361816406, "learning_rate": 9.912219431049217e-06, "loss": 19.2085, "step": 76880 }, { "epoch": 0.1553226647058586, "grad_norm": 166.5902099609375, "learning_rate": 9.912154297936418e-06, "loss": 18.65, "step": 76890 }, { "epoch": 0.1553428653385424, "grad_norm": 269.4258117675781, "learning_rate": 9.912089140882377e-06, "loss": 48.3616, "step": 76900 }, { "epoch": 0.1553630659712262, "grad_norm": 243.7389678955078, "learning_rate": 9.912023959887408e-06, "loss": 17.2493, "step": 76910 }, { "epoch": 0.15538326660391003, "grad_norm": 337.1795349121094, "learning_rate": 9.91195875495183e-06, "loss": 24.3858, "step": 76920 }, { "epoch": 0.15540346723659385, "grad_norm": 126.31841278076172, "learning_rate": 9.911893526075961e-06, "loss": 35.7699, "step": 76930 }, { "epoch": 0.15542366786927767, "grad_norm": 101.54562377929688, "learning_rate": 9.911828273260119e-06, "loss": 17.9187, "step": 76940 }, { "epoch": 0.15544386850196149, "grad_norm": 447.547119140625, "learning_rate": 9.911762996504621e-06, "loss": 35.5394, "step": 76950 }, { "epoch": 0.1554640691346453, "grad_norm": 311.34063720703125, "learning_rate": 9.911697695809787e-06, "loss": 28.5778, "step": 76960 }, { "epoch": 0.15548426976732913, "grad_norm": 494.7862854003906, "learning_rate": 9.911632371175934e-06, "loss": 36.2096, "step": 76970 }, { "epoch": 0.15550447040001292, "grad_norm": 411.1186218261719, "learning_rate": 9.911567022603379e-06, "loss": 17.9846, "step": 76980 }, { "epoch": 0.15552467103269674, "grad_norm": 38.03059768676758, "learning_rate": 9.911501650092443e-06, "loss": 33.2987, "step": 76990 }, { "epoch": 0.15554487166538056, "grad_norm": 428.56634521484375, "learning_rate": 9.911436253643445e-06, "loss": 39.4676, "step": 77000 }, { "epoch": 0.15556507229806438, "grad_norm": 128.7852783203125, "learning_rate": 9.911370833256701e-06, "loss": 29.7276, "step": 77010 }, { "epoch": 0.1555852729307482, "grad_norm": 854.0330200195312, "learning_rate": 9.91130538893253e-06, "loss": 33.404, "step": 77020 }, { "epoch": 0.15560547356343202, "grad_norm": 338.5502624511719, "learning_rate": 9.911239920671253e-06, "loss": 20.4805, "step": 77030 }, { "epoch": 0.1556256741961158, "grad_norm": 133.19232177734375, "learning_rate": 9.91117442847319e-06, "loss": 40.4524, "step": 77040 }, { "epoch": 0.15564587482879963, "grad_norm": 167.0153045654297, "learning_rate": 9.911108912338656e-06, "loss": 16.9395, "step": 77050 }, { "epoch": 0.15566607546148345, "grad_norm": 213.392333984375, "learning_rate": 9.911043372267975e-06, "loss": 25.4748, "step": 77060 }, { "epoch": 0.15568627609416727, "grad_norm": 606.1139526367188, "learning_rate": 9.910977808261463e-06, "loss": 32.1799, "step": 77070 }, { "epoch": 0.1557064767268511, "grad_norm": 261.03515625, "learning_rate": 9.910912220319443e-06, "loss": 31.2058, "step": 77080 }, { "epoch": 0.1557266773595349, "grad_norm": 155.10240173339844, "learning_rate": 9.910846608442229e-06, "loss": 17.291, "step": 77090 }, { "epoch": 0.15574687799221873, "grad_norm": 141.19027709960938, "learning_rate": 9.910780972630146e-06, "loss": 25.5223, "step": 77100 }, { "epoch": 0.15576707862490252, "grad_norm": 532.0252685546875, "learning_rate": 9.910715312883512e-06, "loss": 22.9292, "step": 77110 }, { "epoch": 0.15578727925758634, "grad_norm": 430.4310302734375, "learning_rate": 9.910649629202648e-06, "loss": 24.6226, "step": 77120 }, { "epoch": 0.15580747989027016, "grad_norm": 268.6076354980469, "learning_rate": 9.910583921587872e-06, "loss": 20.6363, "step": 77130 }, { "epoch": 0.15582768052295398, "grad_norm": 48.74985122680664, "learning_rate": 9.910518190039506e-06, "loss": 39.7291, "step": 77140 }, { "epoch": 0.1558478811556378, "grad_norm": 225.8473358154297, "learning_rate": 9.91045243455787e-06, "loss": 20.072, "step": 77150 }, { "epoch": 0.15586808178832162, "grad_norm": 206.98040771484375, "learning_rate": 9.910386655143285e-06, "loss": 23.4223, "step": 77160 }, { "epoch": 0.15588828242100541, "grad_norm": 20.053604125976562, "learning_rate": 9.91032085179607e-06, "loss": 21.0467, "step": 77170 }, { "epoch": 0.15590848305368923, "grad_norm": 496.5740966796875, "learning_rate": 9.910255024516546e-06, "loss": 29.8205, "step": 77180 }, { "epoch": 0.15592868368637305, "grad_norm": 574.7696533203125, "learning_rate": 9.910189173305035e-06, "loss": 26.5465, "step": 77190 }, { "epoch": 0.15594888431905687, "grad_norm": 361.0043029785156, "learning_rate": 9.91012329816186e-06, "loss": 33.0187, "step": 77200 }, { "epoch": 0.1559690849517407, "grad_norm": 1158.8739013671875, "learning_rate": 9.910057399087338e-06, "loss": 50.4988, "step": 77210 }, { "epoch": 0.15598928558442451, "grad_norm": 0.0, "learning_rate": 9.90999147608179e-06, "loss": 8.2458, "step": 77220 }, { "epoch": 0.1560094862171083, "grad_norm": 220.076171875, "learning_rate": 9.909925529145541e-06, "loss": 16.3636, "step": 77230 }, { "epoch": 0.15602968684979213, "grad_norm": 196.78509521484375, "learning_rate": 9.90985955827891e-06, "loss": 33.5524, "step": 77240 }, { "epoch": 0.15604988748247595, "grad_norm": 139.1849822998047, "learning_rate": 9.90979356348222e-06, "loss": 52.2446, "step": 77250 }, { "epoch": 0.15607008811515977, "grad_norm": 280.085205078125, "learning_rate": 9.909727544755789e-06, "loss": 19.1968, "step": 77260 }, { "epoch": 0.1560902887478436, "grad_norm": 338.23883056640625, "learning_rate": 9.909661502099943e-06, "loss": 17.976, "step": 77270 }, { "epoch": 0.1561104893805274, "grad_norm": 243.4253387451172, "learning_rate": 9.909595435515002e-06, "loss": 19.1929, "step": 77280 }, { "epoch": 0.15613069001321123, "grad_norm": 14.41247844696045, "learning_rate": 9.90952934500129e-06, "loss": 24.5665, "step": 77290 }, { "epoch": 0.15615089064589502, "grad_norm": 286.1709899902344, "learning_rate": 9.909463230559127e-06, "loss": 19.6789, "step": 77300 }, { "epoch": 0.15617109127857884, "grad_norm": 585.2762451171875, "learning_rate": 9.909397092188834e-06, "loss": 21.6996, "step": 77310 }, { "epoch": 0.15619129191126266, "grad_norm": 16.653564453125, "learning_rate": 9.909330929890734e-06, "loss": 12.9129, "step": 77320 }, { "epoch": 0.15621149254394648, "grad_norm": 554.5662231445312, "learning_rate": 9.909264743665153e-06, "loss": 35.5452, "step": 77330 }, { "epoch": 0.1562316931766303, "grad_norm": 0.0, "learning_rate": 9.90919853351241e-06, "loss": 34.6841, "step": 77340 }, { "epoch": 0.15625189380931412, "grad_norm": 1057.9351806640625, "learning_rate": 9.90913229943283e-06, "loss": 42.0297, "step": 77350 }, { "epoch": 0.1562720944419979, "grad_norm": 183.4393768310547, "learning_rate": 9.909066041426733e-06, "loss": 26.7767, "step": 77360 }, { "epoch": 0.15629229507468173, "grad_norm": 316.7025451660156, "learning_rate": 9.908999759494444e-06, "loss": 27.702, "step": 77370 }, { "epoch": 0.15631249570736555, "grad_norm": 387.78106689453125, "learning_rate": 9.908933453636287e-06, "loss": 25.5375, "step": 77380 }, { "epoch": 0.15633269634004937, "grad_norm": 136.0981903076172, "learning_rate": 9.90886712385258e-06, "loss": 18.6561, "step": 77390 }, { "epoch": 0.1563528969727332, "grad_norm": 296.9389953613281, "learning_rate": 9.908800770143654e-06, "loss": 35.1614, "step": 77400 }, { "epoch": 0.156373097605417, "grad_norm": 250.59197998046875, "learning_rate": 9.908734392509827e-06, "loss": 24.7248, "step": 77410 }, { "epoch": 0.15639329823810083, "grad_norm": 310.6629638671875, "learning_rate": 9.908667990951424e-06, "loss": 12.6592, "step": 77420 }, { "epoch": 0.15641349887078462, "grad_norm": 195.0734405517578, "learning_rate": 9.908601565468768e-06, "loss": 11.4622, "step": 77430 }, { "epoch": 0.15643369950346844, "grad_norm": 310.5344543457031, "learning_rate": 9.908535116062185e-06, "loss": 22.8923, "step": 77440 }, { "epoch": 0.15645390013615226, "grad_norm": 291.4944152832031, "learning_rate": 9.908468642731996e-06, "loss": 25.747, "step": 77450 }, { "epoch": 0.15647410076883608, "grad_norm": 622.6764526367188, "learning_rate": 9.908402145478526e-06, "loss": 27.8448, "step": 77460 }, { "epoch": 0.1564943014015199, "grad_norm": 84.3390121459961, "learning_rate": 9.908335624302099e-06, "loss": 16.1088, "step": 77470 }, { "epoch": 0.15651450203420372, "grad_norm": 450.2479553222656, "learning_rate": 9.908269079203039e-06, "loss": 31.7284, "step": 77480 }, { "epoch": 0.15653470266688752, "grad_norm": 894.72509765625, "learning_rate": 9.908202510181673e-06, "loss": 36.6593, "step": 77490 }, { "epoch": 0.15655490329957134, "grad_norm": 267.014892578125, "learning_rate": 9.908135917238321e-06, "loss": 22.2197, "step": 77500 }, { "epoch": 0.15657510393225516, "grad_norm": 163.3948516845703, "learning_rate": 9.90806930037331e-06, "loss": 20.6001, "step": 77510 }, { "epoch": 0.15659530456493898, "grad_norm": 1664.0909423828125, "learning_rate": 9.908002659586966e-06, "loss": 23.8483, "step": 77520 }, { "epoch": 0.1566155051976228, "grad_norm": 398.69464111328125, "learning_rate": 9.907935994879612e-06, "loss": 25.441, "step": 77530 }, { "epoch": 0.15663570583030662, "grad_norm": 135.42494201660156, "learning_rate": 9.907869306251571e-06, "loss": 30.2165, "step": 77540 }, { "epoch": 0.1566559064629904, "grad_norm": 305.9441833496094, "learning_rate": 9.907802593703173e-06, "loss": 15.5694, "step": 77550 }, { "epoch": 0.15667610709567423, "grad_norm": 874.8712768554688, "learning_rate": 9.90773585723474e-06, "loss": 25.9192, "step": 77560 }, { "epoch": 0.15669630772835805, "grad_norm": 305.59136962890625, "learning_rate": 9.907669096846596e-06, "loss": 25.8768, "step": 77570 }, { "epoch": 0.15671650836104187, "grad_norm": 362.0372314453125, "learning_rate": 9.90760231253907e-06, "loss": 23.3091, "step": 77580 }, { "epoch": 0.1567367089937257, "grad_norm": 150.18389892578125, "learning_rate": 9.907535504312484e-06, "loss": 11.6575, "step": 77590 }, { "epoch": 0.1567569096264095, "grad_norm": 104.10480499267578, "learning_rate": 9.907468672167165e-06, "loss": 27.7656, "step": 77600 }, { "epoch": 0.15677711025909333, "grad_norm": 430.8246154785156, "learning_rate": 9.90740181610344e-06, "loss": 26.649, "step": 77610 }, { "epoch": 0.15679731089177712, "grad_norm": 216.72146606445312, "learning_rate": 9.907334936121634e-06, "loss": 22.4477, "step": 77620 }, { "epoch": 0.15681751152446094, "grad_norm": 0.0, "learning_rate": 9.907268032222072e-06, "loss": 22.9646, "step": 77630 }, { "epoch": 0.15683771215714476, "grad_norm": 142.9793701171875, "learning_rate": 9.90720110440508e-06, "loss": 30.3685, "step": 77640 }, { "epoch": 0.15685791278982858, "grad_norm": 108.90454864501953, "learning_rate": 9.907134152670987e-06, "loss": 20.7053, "step": 77650 }, { "epoch": 0.1568781134225124, "grad_norm": 304.1000671386719, "learning_rate": 9.907067177020115e-06, "loss": 22.5135, "step": 77660 }, { "epoch": 0.15689831405519622, "grad_norm": 307.55450439453125, "learning_rate": 9.907000177452794e-06, "loss": 15.3403, "step": 77670 }, { "epoch": 0.15691851468788, "grad_norm": 204.1021270751953, "learning_rate": 9.90693315396935e-06, "loss": 17.6675, "step": 77680 }, { "epoch": 0.15693871532056383, "grad_norm": 524.37353515625, "learning_rate": 9.906866106570108e-06, "loss": 29.3534, "step": 77690 }, { "epoch": 0.15695891595324765, "grad_norm": 207.7183074951172, "learning_rate": 9.906799035255395e-06, "loss": 22.1489, "step": 77700 }, { "epoch": 0.15697911658593147, "grad_norm": 348.5693664550781, "learning_rate": 9.90673194002554e-06, "loss": 18.0789, "step": 77710 }, { "epoch": 0.1569993172186153, "grad_norm": 0.0, "learning_rate": 9.906664820880869e-06, "loss": 20.7953, "step": 77720 }, { "epoch": 0.1570195178512991, "grad_norm": 87.70877075195312, "learning_rate": 9.906597677821708e-06, "loss": 26.4692, "step": 77730 }, { "epoch": 0.15703971848398293, "grad_norm": 174.55470275878906, "learning_rate": 9.906530510848384e-06, "loss": 25.824, "step": 77740 }, { "epoch": 0.15705991911666672, "grad_norm": 319.6702880859375, "learning_rate": 9.906463319961225e-06, "loss": 37.2395, "step": 77750 }, { "epoch": 0.15708011974935054, "grad_norm": 121.48678588867188, "learning_rate": 9.906396105160561e-06, "loss": 18.412, "step": 77760 }, { "epoch": 0.15710032038203436, "grad_norm": 372.0241394042969, "learning_rate": 9.906328866446717e-06, "loss": 19.8534, "step": 77770 }, { "epoch": 0.15712052101471818, "grad_norm": 227.71495056152344, "learning_rate": 9.906261603820022e-06, "loss": 14.8766, "step": 77780 }, { "epoch": 0.157140721647402, "grad_norm": 44.970420837402344, "learning_rate": 9.906194317280802e-06, "loss": 27.1705, "step": 77790 }, { "epoch": 0.15716092228008582, "grad_norm": 92.33956909179688, "learning_rate": 9.906127006829385e-06, "loss": 11.7834, "step": 77800 }, { "epoch": 0.15718112291276962, "grad_norm": 553.8487548828125, "learning_rate": 9.9060596724661e-06, "loss": 20.5821, "step": 77810 }, { "epoch": 0.15720132354545344, "grad_norm": 163.576904296875, "learning_rate": 9.905992314191277e-06, "loss": 26.6047, "step": 77820 }, { "epoch": 0.15722152417813726, "grad_norm": 301.1156311035156, "learning_rate": 9.905924932005241e-06, "loss": 42.1435, "step": 77830 }, { "epoch": 0.15724172481082108, "grad_norm": 380.759033203125, "learning_rate": 9.905857525908322e-06, "loss": 17.2644, "step": 77840 }, { "epoch": 0.1572619254435049, "grad_norm": 250.01329040527344, "learning_rate": 9.905790095900849e-06, "loss": 23.6343, "step": 77850 }, { "epoch": 0.15728212607618872, "grad_norm": 542.8060913085938, "learning_rate": 9.905722641983151e-06, "loss": 20.6595, "step": 77860 }, { "epoch": 0.1573023267088725, "grad_norm": 366.47247314453125, "learning_rate": 9.905655164155554e-06, "loss": 32.7805, "step": 77870 }, { "epoch": 0.15732252734155633, "grad_norm": 728.276123046875, "learning_rate": 9.90558766241839e-06, "loss": 29.8618, "step": 77880 }, { "epoch": 0.15734272797424015, "grad_norm": 124.33809661865234, "learning_rate": 9.905520136771985e-06, "loss": 25.4725, "step": 77890 }, { "epoch": 0.15736292860692397, "grad_norm": 445.59503173828125, "learning_rate": 9.90545258721667e-06, "loss": 25.9569, "step": 77900 }, { "epoch": 0.1573831292396078, "grad_norm": 30.37824821472168, "learning_rate": 9.905385013752777e-06, "loss": 16.0552, "step": 77910 }, { "epoch": 0.1574033298722916, "grad_norm": 302.47076416015625, "learning_rate": 9.905317416380629e-06, "loss": 38.008, "step": 77920 }, { "epoch": 0.15742353050497543, "grad_norm": 872.95947265625, "learning_rate": 9.905249795100561e-06, "loss": 30.7689, "step": 77930 }, { "epoch": 0.15744373113765922, "grad_norm": 288.53094482421875, "learning_rate": 9.905182149912899e-06, "loss": 30.7893, "step": 77940 }, { "epoch": 0.15746393177034304, "grad_norm": 406.2919006347656, "learning_rate": 9.905114480817976e-06, "loss": 32.2709, "step": 77950 }, { "epoch": 0.15748413240302686, "grad_norm": 254.48304748535156, "learning_rate": 9.905046787816118e-06, "loss": 18.5877, "step": 77960 }, { "epoch": 0.15750433303571068, "grad_norm": 293.1307373046875, "learning_rate": 9.904979070907657e-06, "loss": 23.1883, "step": 77970 }, { "epoch": 0.1575245336683945, "grad_norm": 483.73492431640625, "learning_rate": 9.904911330092923e-06, "loss": 30.4014, "step": 77980 }, { "epoch": 0.15754473430107832, "grad_norm": 285.63458251953125, "learning_rate": 9.904843565372249e-06, "loss": 33.4594, "step": 77990 }, { "epoch": 0.1575649349337621, "grad_norm": 180.6461944580078, "learning_rate": 9.904775776745959e-06, "loss": 25.4667, "step": 78000 }, { "epoch": 0.15758513556644593, "grad_norm": 393.5180358886719, "learning_rate": 9.904707964214386e-06, "loss": 17.2262, "step": 78010 }, { "epoch": 0.15760533619912975, "grad_norm": 290.3833923339844, "learning_rate": 9.904640127777865e-06, "loss": 36.9872, "step": 78020 }, { "epoch": 0.15762553683181357, "grad_norm": 326.89349365234375, "learning_rate": 9.904572267436721e-06, "loss": 13.7387, "step": 78030 }, { "epoch": 0.1576457374644974, "grad_norm": 372.831787109375, "learning_rate": 9.904504383191286e-06, "loss": 12.063, "step": 78040 }, { "epoch": 0.1576659380971812, "grad_norm": 334.119140625, "learning_rate": 9.904436475041892e-06, "loss": 25.3495, "step": 78050 }, { "epoch": 0.15768613872986503, "grad_norm": 468.73004150390625, "learning_rate": 9.904368542988869e-06, "loss": 30.9935, "step": 78060 }, { "epoch": 0.15770633936254883, "grad_norm": 283.2841796875, "learning_rate": 9.90430058703255e-06, "loss": 26.2415, "step": 78070 }, { "epoch": 0.15772653999523265, "grad_norm": 229.72860717773438, "learning_rate": 9.904232607173262e-06, "loss": 23.8641, "step": 78080 }, { "epoch": 0.15774674062791647, "grad_norm": 13.974811553955078, "learning_rate": 9.90416460341134e-06, "loss": 22.6554, "step": 78090 }, { "epoch": 0.15776694126060029, "grad_norm": 562.4100341796875, "learning_rate": 9.904096575747117e-06, "loss": 33.4553, "step": 78100 }, { "epoch": 0.1577871418932841, "grad_norm": 446.45391845703125, "learning_rate": 9.90402852418092e-06, "loss": 29.6537, "step": 78110 }, { "epoch": 0.15780734252596793, "grad_norm": 187.55992126464844, "learning_rate": 9.903960448713084e-06, "loss": 20.1491, "step": 78120 }, { "epoch": 0.15782754315865172, "grad_norm": 68.31873321533203, "learning_rate": 9.903892349343938e-06, "loss": 45.8574, "step": 78130 }, { "epoch": 0.15784774379133554, "grad_norm": 239.15435791015625, "learning_rate": 9.903824226073816e-06, "loss": 26.5363, "step": 78140 }, { "epoch": 0.15786794442401936, "grad_norm": 287.8302307128906, "learning_rate": 9.90375607890305e-06, "loss": 16.545, "step": 78150 }, { "epoch": 0.15788814505670318, "grad_norm": 274.6324157714844, "learning_rate": 9.903687907831972e-06, "loss": 16.7302, "step": 78160 }, { "epoch": 0.157908345689387, "grad_norm": 500.4756774902344, "learning_rate": 9.903619712860912e-06, "loss": 36.5951, "step": 78170 }, { "epoch": 0.15792854632207082, "grad_norm": 648.75537109375, "learning_rate": 9.903551493990205e-06, "loss": 42.5038, "step": 78180 }, { "epoch": 0.1579487469547546, "grad_norm": 55.20969772338867, "learning_rate": 9.903483251220183e-06, "loss": 9.0323, "step": 78190 }, { "epoch": 0.15796894758743843, "grad_norm": 350.45098876953125, "learning_rate": 9.903414984551178e-06, "loss": 25.5319, "step": 78200 }, { "epoch": 0.15798914822012225, "grad_norm": 336.1378173828125, "learning_rate": 9.903346693983524e-06, "loss": 25.6374, "step": 78210 }, { "epoch": 0.15800934885280607, "grad_norm": 277.9574279785156, "learning_rate": 9.903278379517554e-06, "loss": 22.1648, "step": 78220 }, { "epoch": 0.1580295494854899, "grad_norm": 778.4917602539062, "learning_rate": 9.903210041153597e-06, "loss": 30.4893, "step": 78230 }, { "epoch": 0.1580497501181737, "grad_norm": 173.2406768798828, "learning_rate": 9.90314167889199e-06, "loss": 18.2213, "step": 78240 }, { "epoch": 0.15806995075085753, "grad_norm": 119.60963439941406, "learning_rate": 9.903073292733065e-06, "loss": 14.2673, "step": 78250 }, { "epoch": 0.15809015138354132, "grad_norm": 532.746826171875, "learning_rate": 9.903004882677157e-06, "loss": 26.8637, "step": 78260 }, { "epoch": 0.15811035201622514, "grad_norm": 325.25897216796875, "learning_rate": 9.902936448724596e-06, "loss": 34.0299, "step": 78270 }, { "epoch": 0.15813055264890896, "grad_norm": 143.3190460205078, "learning_rate": 9.90286799087572e-06, "loss": 14.9064, "step": 78280 }, { "epoch": 0.15815075328159278, "grad_norm": 302.5338134765625, "learning_rate": 9.902799509130857e-06, "loss": 19.4782, "step": 78290 }, { "epoch": 0.1581709539142766, "grad_norm": 244.98529052734375, "learning_rate": 9.902731003490344e-06, "loss": 19.9063, "step": 78300 }, { "epoch": 0.15819115454696042, "grad_norm": 257.07373046875, "learning_rate": 9.902662473954516e-06, "loss": 24.5142, "step": 78310 }, { "epoch": 0.15821135517964421, "grad_norm": 550.9244384765625, "learning_rate": 9.902593920523706e-06, "loss": 40.0246, "step": 78320 }, { "epoch": 0.15823155581232803, "grad_norm": 175.841796875, "learning_rate": 9.902525343198249e-06, "loss": 20.0277, "step": 78330 }, { "epoch": 0.15825175644501185, "grad_norm": 455.2513122558594, "learning_rate": 9.902456741978475e-06, "loss": 24.2129, "step": 78340 }, { "epoch": 0.15827195707769567, "grad_norm": 0.0, "learning_rate": 9.902388116864723e-06, "loss": 24.1444, "step": 78350 }, { "epoch": 0.1582921577103795, "grad_norm": 206.78802490234375, "learning_rate": 9.902319467857326e-06, "loss": 32.6032, "step": 78360 }, { "epoch": 0.15831235834306331, "grad_norm": 291.38677978515625, "learning_rate": 9.902250794956618e-06, "loss": 17.9107, "step": 78370 }, { "epoch": 0.15833255897574713, "grad_norm": 277.2306213378906, "learning_rate": 9.902182098162933e-06, "loss": 11.4953, "step": 78380 }, { "epoch": 0.15835275960843093, "grad_norm": 358.3746032714844, "learning_rate": 9.90211337747661e-06, "loss": 23.5522, "step": 78390 }, { "epoch": 0.15837296024111475, "grad_norm": 664.525390625, "learning_rate": 9.90204463289798e-06, "loss": 21.2484, "step": 78400 }, { "epoch": 0.15839316087379857, "grad_norm": 167.4664306640625, "learning_rate": 9.901975864427378e-06, "loss": 17.28, "step": 78410 }, { "epoch": 0.1584133615064824, "grad_norm": 251.64549255371094, "learning_rate": 9.90190707206514e-06, "loss": 30.9232, "step": 78420 }, { "epoch": 0.1584335621391662, "grad_norm": 364.5206604003906, "learning_rate": 9.901838255811602e-06, "loss": 22.1139, "step": 78430 }, { "epoch": 0.15845376277185003, "grad_norm": 457.7205810546875, "learning_rate": 9.9017694156671e-06, "loss": 27.0687, "step": 78440 }, { "epoch": 0.15847396340453382, "grad_norm": 390.2932434082031, "learning_rate": 9.901700551631966e-06, "loss": 23.843, "step": 78450 }, { "epoch": 0.15849416403721764, "grad_norm": 59.90983581542969, "learning_rate": 9.901631663706539e-06, "loss": 19.8026, "step": 78460 }, { "epoch": 0.15851436466990146, "grad_norm": 0.0, "learning_rate": 9.901562751891155e-06, "loss": 17.1488, "step": 78470 }, { "epoch": 0.15853456530258528, "grad_norm": 227.6846923828125, "learning_rate": 9.901493816186148e-06, "loss": 26.771, "step": 78480 }, { "epoch": 0.1585547659352691, "grad_norm": 193.66342163085938, "learning_rate": 9.901424856591855e-06, "loss": 11.5987, "step": 78490 }, { "epoch": 0.15857496656795292, "grad_norm": 82.53070068359375, "learning_rate": 9.901355873108611e-06, "loss": 16.1427, "step": 78500 }, { "epoch": 0.1585951672006367, "grad_norm": 307.8499450683594, "learning_rate": 9.901286865736752e-06, "loss": 25.9792, "step": 78510 }, { "epoch": 0.15861536783332053, "grad_norm": 141.9443817138672, "learning_rate": 9.901217834476616e-06, "loss": 23.1482, "step": 78520 }, { "epoch": 0.15863556846600435, "grad_norm": 193.13128662109375, "learning_rate": 9.90114877932854e-06, "loss": 24.1832, "step": 78530 }, { "epoch": 0.15865576909868817, "grad_norm": 961.600341796875, "learning_rate": 9.901079700292858e-06, "loss": 24.4098, "step": 78540 }, { "epoch": 0.158675969731372, "grad_norm": 810.603759765625, "learning_rate": 9.901010597369908e-06, "loss": 26.2459, "step": 78550 }, { "epoch": 0.1586961703640558, "grad_norm": 144.28564453125, "learning_rate": 9.900941470560025e-06, "loss": 20.496, "step": 78560 }, { "epoch": 0.15871637099673963, "grad_norm": 183.66050720214844, "learning_rate": 9.900872319863551e-06, "loss": 10.9152, "step": 78570 }, { "epoch": 0.15873657162942342, "grad_norm": 303.0276184082031, "learning_rate": 9.90080314528082e-06, "loss": 21.0455, "step": 78580 }, { "epoch": 0.15875677226210724, "grad_norm": 294.7305603027344, "learning_rate": 9.900733946812167e-06, "loss": 24.5111, "step": 78590 }, { "epoch": 0.15877697289479106, "grad_norm": 197.31326293945312, "learning_rate": 9.900664724457932e-06, "loss": 32.2693, "step": 78600 }, { "epoch": 0.15879717352747488, "grad_norm": 231.70770263671875, "learning_rate": 9.900595478218449e-06, "loss": 15.751, "step": 78610 }, { "epoch": 0.1588173741601587, "grad_norm": 276.1138610839844, "learning_rate": 9.900526208094061e-06, "loss": 19.6376, "step": 78620 }, { "epoch": 0.15883757479284252, "grad_norm": 534.7863159179688, "learning_rate": 9.900456914085101e-06, "loss": 26.7315, "step": 78630 }, { "epoch": 0.15885777542552632, "grad_norm": 421.7462158203125, "learning_rate": 9.90038759619191e-06, "loss": 16.7052, "step": 78640 }, { "epoch": 0.15887797605821014, "grad_norm": 91.12374877929688, "learning_rate": 9.900318254414823e-06, "loss": 14.1424, "step": 78650 }, { "epoch": 0.15889817669089396, "grad_norm": 87.20350646972656, "learning_rate": 9.900248888754179e-06, "loss": 35.8224, "step": 78660 }, { "epoch": 0.15891837732357778, "grad_norm": 228.17660522460938, "learning_rate": 9.900179499210316e-06, "loss": 24.2139, "step": 78670 }, { "epoch": 0.1589385779562616, "grad_norm": 655.0401000976562, "learning_rate": 9.900110085783573e-06, "loss": 19.3671, "step": 78680 }, { "epoch": 0.15895877858894542, "grad_norm": 255.26931762695312, "learning_rate": 9.900040648474287e-06, "loss": 36.3776, "step": 78690 }, { "epoch": 0.15897897922162924, "grad_norm": 626.984619140625, "learning_rate": 9.899971187282799e-06, "loss": 42.1527, "step": 78700 }, { "epoch": 0.15899917985431303, "grad_norm": 273.1549987792969, "learning_rate": 9.899901702209445e-06, "loss": 20.3207, "step": 78710 }, { "epoch": 0.15901938048699685, "grad_norm": 518.8218383789062, "learning_rate": 9.899832193254564e-06, "loss": 35.112, "step": 78720 }, { "epoch": 0.15903958111968067, "grad_norm": 524.879150390625, "learning_rate": 9.899762660418495e-06, "loss": 26.5123, "step": 78730 }, { "epoch": 0.1590597817523645, "grad_norm": 385.1907043457031, "learning_rate": 9.899693103701577e-06, "loss": 28.2033, "step": 78740 }, { "epoch": 0.1590799823850483, "grad_norm": 211.6658477783203, "learning_rate": 9.899623523104149e-06, "loss": 16.0751, "step": 78750 }, { "epoch": 0.15910018301773213, "grad_norm": 210.55941772460938, "learning_rate": 9.89955391862655e-06, "loss": 23.4252, "step": 78760 }, { "epoch": 0.15912038365041592, "grad_norm": 498.5218505859375, "learning_rate": 9.89948429026912e-06, "loss": 28.2865, "step": 78770 }, { "epoch": 0.15914058428309974, "grad_norm": 293.834716796875, "learning_rate": 9.8994146380322e-06, "loss": 24.0819, "step": 78780 }, { "epoch": 0.15916078491578356, "grad_norm": 126.66592407226562, "learning_rate": 9.899344961916123e-06, "loss": 24.8249, "step": 78790 }, { "epoch": 0.15918098554846738, "grad_norm": 248.39149475097656, "learning_rate": 9.899275261921236e-06, "loss": 15.5919, "step": 78800 }, { "epoch": 0.1592011861811512, "grad_norm": 122.49162292480469, "learning_rate": 9.899205538047873e-06, "loss": 20.9407, "step": 78810 }, { "epoch": 0.15922138681383502, "grad_norm": 115.37190246582031, "learning_rate": 9.899135790296379e-06, "loss": 27.0949, "step": 78820 }, { "epoch": 0.1592415874465188, "grad_norm": 323.1927795410156, "learning_rate": 9.89906601866709e-06, "loss": 19.5396, "step": 78830 }, { "epoch": 0.15926178807920263, "grad_norm": 386.13226318359375, "learning_rate": 9.898996223160348e-06, "loss": 27.6209, "step": 78840 }, { "epoch": 0.15928198871188645, "grad_norm": 409.11749267578125, "learning_rate": 9.898926403776492e-06, "loss": 37.1976, "step": 78850 }, { "epoch": 0.15930218934457027, "grad_norm": 353.06341552734375, "learning_rate": 9.898856560515864e-06, "loss": 40.5408, "step": 78860 }, { "epoch": 0.1593223899772541, "grad_norm": 228.47926330566406, "learning_rate": 9.898786693378801e-06, "loss": 19.0287, "step": 78870 }, { "epoch": 0.1593425906099379, "grad_norm": 151.72991943359375, "learning_rate": 9.898716802365648e-06, "loss": 28.3832, "step": 78880 }, { "epoch": 0.15936279124262173, "grad_norm": 686.2725830078125, "learning_rate": 9.898646887476742e-06, "loss": 34.1943, "step": 78890 }, { "epoch": 0.15938299187530552, "grad_norm": 278.0683898925781, "learning_rate": 9.898576948712427e-06, "loss": 15.0278, "step": 78900 }, { "epoch": 0.15940319250798934, "grad_norm": 980.3809814453125, "learning_rate": 9.89850698607304e-06, "loss": 63.3683, "step": 78910 }, { "epoch": 0.15942339314067316, "grad_norm": 268.85980224609375, "learning_rate": 9.898436999558924e-06, "loss": 30.0864, "step": 78920 }, { "epoch": 0.15944359377335698, "grad_norm": 235.75758361816406, "learning_rate": 9.898366989170423e-06, "loss": 27.5428, "step": 78930 }, { "epoch": 0.1594637944060408, "grad_norm": 549.0601196289062, "learning_rate": 9.898296954907874e-06, "loss": 32.0666, "step": 78940 }, { "epoch": 0.15948399503872462, "grad_norm": 468.0362548828125, "learning_rate": 9.898226896771619e-06, "loss": 32.5472, "step": 78950 }, { "epoch": 0.15950419567140842, "grad_norm": 1192.0595703125, "learning_rate": 9.898156814762e-06, "loss": 26.3997, "step": 78960 }, { "epoch": 0.15952439630409224, "grad_norm": 269.3596496582031, "learning_rate": 9.898086708879359e-06, "loss": 21.7946, "step": 78970 }, { "epoch": 0.15954459693677606, "grad_norm": 388.3513488769531, "learning_rate": 9.898016579124039e-06, "loss": 14.6609, "step": 78980 }, { "epoch": 0.15956479756945988, "grad_norm": 98.30065155029297, "learning_rate": 9.897946425496379e-06, "loss": 19.4645, "step": 78990 }, { "epoch": 0.1595849982021437, "grad_norm": 322.4309997558594, "learning_rate": 9.89787624799672e-06, "loss": 17.2494, "step": 79000 }, { "epoch": 0.15960519883482752, "grad_norm": 102.33526611328125, "learning_rate": 9.897806046625408e-06, "loss": 18.9696, "step": 79010 }, { "epoch": 0.1596253994675113, "grad_norm": 340.1557312011719, "learning_rate": 9.897735821382786e-06, "loss": 26.1355, "step": 79020 }, { "epoch": 0.15964560010019513, "grad_norm": 132.0850067138672, "learning_rate": 9.89766557226919e-06, "loss": 20.3174, "step": 79030 }, { "epoch": 0.15966580073287895, "grad_norm": 151.599609375, "learning_rate": 9.897595299284968e-06, "loss": 25.9709, "step": 79040 }, { "epoch": 0.15968600136556277, "grad_norm": 255.3791961669922, "learning_rate": 9.897525002430459e-06, "loss": 39.2787, "step": 79050 }, { "epoch": 0.1597062019982466, "grad_norm": 279.4506530761719, "learning_rate": 9.89745468170601e-06, "loss": 22.974, "step": 79060 }, { "epoch": 0.1597264026309304, "grad_norm": 437.998779296875, "learning_rate": 9.897384337111956e-06, "loss": 27.4569, "step": 79070 }, { "epoch": 0.15974660326361423, "grad_norm": 378.3663635253906, "learning_rate": 9.89731396864865e-06, "loss": 24.425, "step": 79080 }, { "epoch": 0.15976680389629802, "grad_norm": 324.250732421875, "learning_rate": 9.897243576316426e-06, "loss": 31.6584, "step": 79090 }, { "epoch": 0.15978700452898184, "grad_norm": 117.89221954345703, "learning_rate": 9.897173160115633e-06, "loss": 20.7133, "step": 79100 }, { "epoch": 0.15980720516166566, "grad_norm": 292.5522766113281, "learning_rate": 9.89710272004661e-06, "loss": 29.6966, "step": 79110 }, { "epoch": 0.15982740579434948, "grad_norm": 313.3870544433594, "learning_rate": 9.897032256109705e-06, "loss": 21.5904, "step": 79120 }, { "epoch": 0.1598476064270333, "grad_norm": 99.99504089355469, "learning_rate": 9.896961768305255e-06, "loss": 36.7773, "step": 79130 }, { "epoch": 0.15986780705971712, "grad_norm": 11.042187690734863, "learning_rate": 9.89689125663361e-06, "loss": 15.7367, "step": 79140 }, { "epoch": 0.1598880076924009, "grad_norm": 131.44537353515625, "learning_rate": 9.89682072109511e-06, "loss": 38.1357, "step": 79150 }, { "epoch": 0.15990820832508473, "grad_norm": 640.0494384765625, "learning_rate": 9.8967501616901e-06, "loss": 33.1062, "step": 79160 }, { "epoch": 0.15992840895776855, "grad_norm": 210.3061065673828, "learning_rate": 9.896679578418924e-06, "loss": 38.8052, "step": 79170 }, { "epoch": 0.15994860959045237, "grad_norm": 176.02792358398438, "learning_rate": 9.896608971281926e-06, "loss": 14.9175, "step": 79180 }, { "epoch": 0.1599688102231362, "grad_norm": 339.4121398925781, "learning_rate": 9.896538340279449e-06, "loss": 21.1519, "step": 79190 }, { "epoch": 0.15998901085582, "grad_norm": 123.92938995361328, "learning_rate": 9.896467685411838e-06, "loss": 16.2993, "step": 79200 }, { "epoch": 0.16000921148850383, "grad_norm": 164.72332763671875, "learning_rate": 9.896397006679437e-06, "loss": 25.2167, "step": 79210 }, { "epoch": 0.16002941212118763, "grad_norm": 288.5563049316406, "learning_rate": 9.89632630408259e-06, "loss": 24.5294, "step": 79220 }, { "epoch": 0.16004961275387145, "grad_norm": 362.49676513671875, "learning_rate": 9.896255577621646e-06, "loss": 39.2996, "step": 79230 }, { "epoch": 0.16006981338655527, "grad_norm": 138.41302490234375, "learning_rate": 9.896184827296942e-06, "loss": 21.0374, "step": 79240 }, { "epoch": 0.16009001401923909, "grad_norm": 642.0553588867188, "learning_rate": 9.89611405310883e-06, "loss": 38.9641, "step": 79250 }, { "epoch": 0.1601102146519229, "grad_norm": 232.47686767578125, "learning_rate": 9.89604325505765e-06, "loss": 23.2717, "step": 79260 }, { "epoch": 0.16013041528460673, "grad_norm": 201.1175079345703, "learning_rate": 9.89597243314375e-06, "loss": 22.3984, "step": 79270 }, { "epoch": 0.16015061591729052, "grad_norm": 772.5776977539062, "learning_rate": 9.895901587367473e-06, "loss": 27.7033, "step": 79280 }, { "epoch": 0.16017081654997434, "grad_norm": 193.57681274414062, "learning_rate": 9.895830717729166e-06, "loss": 25.7225, "step": 79290 }, { "epoch": 0.16019101718265816, "grad_norm": 327.1907958984375, "learning_rate": 9.895759824229176e-06, "loss": 23.3995, "step": 79300 }, { "epoch": 0.16021121781534198, "grad_norm": 218.80909729003906, "learning_rate": 9.895688906867844e-06, "loss": 21.6849, "step": 79310 }, { "epoch": 0.1602314184480258, "grad_norm": 526.1359252929688, "learning_rate": 9.89561796564552e-06, "loss": 34.8581, "step": 79320 }, { "epoch": 0.16025161908070962, "grad_norm": 196.8597412109375, "learning_rate": 9.895547000562546e-06, "loss": 21.5634, "step": 79330 }, { "epoch": 0.1602718197133934, "grad_norm": 239.510986328125, "learning_rate": 9.895476011619269e-06, "loss": 34.9804, "step": 79340 }, { "epoch": 0.16029202034607723, "grad_norm": 227.97206115722656, "learning_rate": 9.895404998816038e-06, "loss": 24.5847, "step": 79350 }, { "epoch": 0.16031222097876105, "grad_norm": 257.0469055175781, "learning_rate": 9.895333962153195e-06, "loss": 18.7783, "step": 79360 }, { "epoch": 0.16033242161144487, "grad_norm": 50.597557067871094, "learning_rate": 9.895262901631088e-06, "loss": 22.2979, "step": 79370 }, { "epoch": 0.1603526222441287, "grad_norm": 174.1249237060547, "learning_rate": 9.895191817250064e-06, "loss": 24.1611, "step": 79380 }, { "epoch": 0.1603728228768125, "grad_norm": 106.97676849365234, "learning_rate": 9.89512070901047e-06, "loss": 16.1988, "step": 79390 }, { "epoch": 0.16039302350949633, "grad_norm": 286.0371398925781, "learning_rate": 9.89504957691265e-06, "loss": 17.0589, "step": 79400 }, { "epoch": 0.16041322414218012, "grad_norm": 136.12533569335938, "learning_rate": 9.894978420956953e-06, "loss": 13.7769, "step": 79410 }, { "epoch": 0.16043342477486394, "grad_norm": 404.1493225097656, "learning_rate": 9.894907241143722e-06, "loss": 15.6453, "step": 79420 }, { "epoch": 0.16045362540754776, "grad_norm": 323.4573974609375, "learning_rate": 9.89483603747331e-06, "loss": 30.2034, "step": 79430 }, { "epoch": 0.16047382604023158, "grad_norm": 180.85203552246094, "learning_rate": 9.89476480994606e-06, "loss": 36.8373, "step": 79440 }, { "epoch": 0.1604940266729154, "grad_norm": 455.7139587402344, "learning_rate": 9.894693558562319e-06, "loss": 18.5815, "step": 79450 }, { "epoch": 0.16051422730559922, "grad_norm": 565.9526977539062, "learning_rate": 9.894622283322436e-06, "loss": 28.9462, "step": 79460 }, { "epoch": 0.16053442793828301, "grad_norm": 245.9419403076172, "learning_rate": 9.894550984226759e-06, "loss": 22.8166, "step": 79470 }, { "epoch": 0.16055462857096683, "grad_norm": 285.9190979003906, "learning_rate": 9.894479661275631e-06, "loss": 20.774, "step": 79480 }, { "epoch": 0.16057482920365065, "grad_norm": 296.0243225097656, "learning_rate": 9.894408314469404e-06, "loss": 27.4234, "step": 79490 }, { "epoch": 0.16059502983633447, "grad_norm": 408.56951904296875, "learning_rate": 9.894336943808426e-06, "loss": 30.9837, "step": 79500 }, { "epoch": 0.1606152304690183, "grad_norm": 597.5803833007812, "learning_rate": 9.894265549293043e-06, "loss": 20.8736, "step": 79510 }, { "epoch": 0.16063543110170211, "grad_norm": 153.14039611816406, "learning_rate": 9.894194130923602e-06, "loss": 22.1242, "step": 79520 }, { "epoch": 0.16065563173438593, "grad_norm": 687.3168334960938, "learning_rate": 9.894122688700452e-06, "loss": 50.4978, "step": 79530 }, { "epoch": 0.16067583236706973, "grad_norm": 255.63487243652344, "learning_rate": 9.894051222623943e-06, "loss": 14.704, "step": 79540 }, { "epoch": 0.16069603299975355, "grad_norm": 420.9255676269531, "learning_rate": 9.893979732694422e-06, "loss": 33.5102, "step": 79550 }, { "epoch": 0.16071623363243737, "grad_norm": 265.6815185546875, "learning_rate": 9.893908218912237e-06, "loss": 30.6763, "step": 79560 }, { "epoch": 0.1607364342651212, "grad_norm": 610.089111328125, "learning_rate": 9.893836681277736e-06, "loss": 16.063, "step": 79570 }, { "epoch": 0.160756634897805, "grad_norm": 499.9600830078125, "learning_rate": 9.89376511979127e-06, "loss": 16.8185, "step": 79580 }, { "epoch": 0.16077683553048883, "grad_norm": 153.86932373046875, "learning_rate": 9.893693534453186e-06, "loss": 23.5216, "step": 79590 }, { "epoch": 0.16079703616317262, "grad_norm": 340.69171142578125, "learning_rate": 9.893621925263832e-06, "loss": 20.7776, "step": 79600 }, { "epoch": 0.16081723679585644, "grad_norm": 218.74844360351562, "learning_rate": 9.89355029222356e-06, "loss": 18.6324, "step": 79610 }, { "epoch": 0.16083743742854026, "grad_norm": 181.83973693847656, "learning_rate": 9.893478635332716e-06, "loss": 21.6528, "step": 79620 }, { "epoch": 0.16085763806122408, "grad_norm": 466.6626281738281, "learning_rate": 9.893406954591651e-06, "loss": 29.3477, "step": 79630 }, { "epoch": 0.1608778386939079, "grad_norm": 242.06484985351562, "learning_rate": 9.893335250000715e-06, "loss": 13.9332, "step": 79640 }, { "epoch": 0.16089803932659172, "grad_norm": 171.4771728515625, "learning_rate": 9.893263521560255e-06, "loss": 33.8595, "step": 79650 }, { "epoch": 0.1609182399592755, "grad_norm": 272.8205261230469, "learning_rate": 9.893191769270624e-06, "loss": 18.6067, "step": 79660 }, { "epoch": 0.16093844059195933, "grad_norm": 186.5063018798828, "learning_rate": 9.893119993132167e-06, "loss": 34.3874, "step": 79670 }, { "epoch": 0.16095864122464315, "grad_norm": 158.88882446289062, "learning_rate": 9.89304819314524e-06, "loss": 30.803, "step": 79680 }, { "epoch": 0.16097884185732697, "grad_norm": 203.2928466796875, "learning_rate": 9.892976369310188e-06, "loss": 17.0838, "step": 79690 }, { "epoch": 0.1609990424900108, "grad_norm": 649.178955078125, "learning_rate": 9.89290452162736e-06, "loss": 18.4793, "step": 79700 }, { "epoch": 0.1610192431226946, "grad_norm": 360.7314147949219, "learning_rate": 9.892832650097113e-06, "loss": 13.0618, "step": 79710 }, { "epoch": 0.16103944375537843, "grad_norm": 214.683837890625, "learning_rate": 9.89276075471979e-06, "loss": 17.9819, "step": 79720 }, { "epoch": 0.16105964438806222, "grad_norm": 228.68344116210938, "learning_rate": 9.892688835495747e-06, "loss": 35.0442, "step": 79730 }, { "epoch": 0.16107984502074604, "grad_norm": 418.1957092285156, "learning_rate": 9.89261689242533e-06, "loss": 23.57, "step": 79740 }, { "epoch": 0.16110004565342986, "grad_norm": 304.65899658203125, "learning_rate": 9.892544925508894e-06, "loss": 16.1013, "step": 79750 }, { "epoch": 0.16112024628611368, "grad_norm": 208.30052185058594, "learning_rate": 9.892472934746784e-06, "loss": 27.2053, "step": 79760 }, { "epoch": 0.1611404469187975, "grad_norm": 53.07558059692383, "learning_rate": 9.892400920139357e-06, "loss": 31.0708, "step": 79770 }, { "epoch": 0.16116064755148132, "grad_norm": 75.84584045410156, "learning_rate": 9.892328881686961e-06, "loss": 17.7935, "step": 79780 }, { "epoch": 0.16118084818416512, "grad_norm": 397.0741271972656, "learning_rate": 9.892256819389947e-06, "loss": 30.3217, "step": 79790 }, { "epoch": 0.16120104881684894, "grad_norm": 173.03497314453125, "learning_rate": 9.892184733248666e-06, "loss": 20.5065, "step": 79800 }, { "epoch": 0.16122124944953276, "grad_norm": 461.6858215332031, "learning_rate": 9.89211262326347e-06, "loss": 29.9561, "step": 79810 }, { "epoch": 0.16124145008221658, "grad_norm": 295.36737060546875, "learning_rate": 9.892040489434711e-06, "loss": 32.8787, "step": 79820 }, { "epoch": 0.1612616507149004, "grad_norm": 443.5086975097656, "learning_rate": 9.89196833176274e-06, "loss": 33.0394, "step": 79830 }, { "epoch": 0.16128185134758422, "grad_norm": 143.6066131591797, "learning_rate": 9.891896150247909e-06, "loss": 12.4552, "step": 79840 }, { "epoch": 0.16130205198026804, "grad_norm": 395.365234375, "learning_rate": 9.891823944890569e-06, "loss": 19.5851, "step": 79850 }, { "epoch": 0.16132225261295183, "grad_norm": 0.0, "learning_rate": 9.891751715691071e-06, "loss": 34.2382, "step": 79860 }, { "epoch": 0.16134245324563565, "grad_norm": 45.12834167480469, "learning_rate": 9.89167946264977e-06, "loss": 15.5762, "step": 79870 }, { "epoch": 0.16136265387831947, "grad_norm": 256.7513732910156, "learning_rate": 9.891607185767018e-06, "loss": 28.3777, "step": 79880 }, { "epoch": 0.1613828545110033, "grad_norm": 793.5049438476562, "learning_rate": 9.891534885043164e-06, "loss": 42.0422, "step": 79890 }, { "epoch": 0.1614030551436871, "grad_norm": 418.63909912109375, "learning_rate": 9.891462560478562e-06, "loss": 16.5695, "step": 79900 }, { "epoch": 0.16142325577637093, "grad_norm": 243.2330780029297, "learning_rate": 9.891390212073566e-06, "loss": 18.4892, "step": 79910 }, { "epoch": 0.16144345640905472, "grad_norm": 357.662841796875, "learning_rate": 9.891317839828527e-06, "loss": 45.7515, "step": 79920 }, { "epoch": 0.16146365704173854, "grad_norm": 200.64755249023438, "learning_rate": 9.891245443743797e-06, "loss": 36.2153, "step": 79930 }, { "epoch": 0.16148385767442236, "grad_norm": 478.99285888671875, "learning_rate": 9.891173023819731e-06, "loss": 30.0824, "step": 79940 }, { "epoch": 0.16150405830710618, "grad_norm": 374.90386962890625, "learning_rate": 9.891100580056681e-06, "loss": 33.3652, "step": 79950 }, { "epoch": 0.16152425893979, "grad_norm": 163.4543914794922, "learning_rate": 9.891028112454998e-06, "loss": 29.5467, "step": 79960 }, { "epoch": 0.16154445957247382, "grad_norm": 103.79144287109375, "learning_rate": 9.890955621015039e-06, "loss": 37.0293, "step": 79970 }, { "epoch": 0.1615646602051576, "grad_norm": 131.85476684570312, "learning_rate": 9.890883105737156e-06, "loss": 31.054, "step": 79980 }, { "epoch": 0.16158486083784143, "grad_norm": 433.762939453125, "learning_rate": 9.890810566621702e-06, "loss": 31.5571, "step": 79990 }, { "epoch": 0.16160506147052525, "grad_norm": 146.28781127929688, "learning_rate": 9.890738003669029e-06, "loss": 13.6598, "step": 80000 }, { "epoch": 0.16162526210320907, "grad_norm": 405.04400634765625, "learning_rate": 9.890665416879492e-06, "loss": 30.8783, "step": 80010 }, { "epoch": 0.1616454627358929, "grad_norm": 146.69900512695312, "learning_rate": 9.890592806253447e-06, "loss": 10.3258, "step": 80020 }, { "epoch": 0.1616656633685767, "grad_norm": 296.2576904296875, "learning_rate": 9.890520171791244e-06, "loss": 29.4906, "step": 80030 }, { "epoch": 0.16168586400126053, "grad_norm": 226.18460083007812, "learning_rate": 9.89044751349324e-06, "loss": 19.2447, "step": 80040 }, { "epoch": 0.16170606463394432, "grad_norm": 428.83612060546875, "learning_rate": 9.890374831359787e-06, "loss": 21.8972, "step": 80050 }, { "epoch": 0.16172626526662814, "grad_norm": 46.6389045715332, "learning_rate": 9.89030212539124e-06, "loss": 19.4337, "step": 80060 }, { "epoch": 0.16174646589931196, "grad_norm": 105.94498443603516, "learning_rate": 9.890229395587954e-06, "loss": 22.3748, "step": 80070 }, { "epoch": 0.16176666653199578, "grad_norm": 370.9466857910156, "learning_rate": 9.890156641950284e-06, "loss": 18.8669, "step": 80080 }, { "epoch": 0.1617868671646796, "grad_norm": 119.1142578125, "learning_rate": 9.890083864478584e-06, "loss": 30.5122, "step": 80090 }, { "epoch": 0.16180706779736342, "grad_norm": 377.0303039550781, "learning_rate": 9.890011063173207e-06, "loss": 35.2885, "step": 80100 }, { "epoch": 0.16182726843004722, "grad_norm": 802.4269409179688, "learning_rate": 9.889938238034509e-06, "loss": 25.1109, "step": 80110 }, { "epoch": 0.16184746906273104, "grad_norm": 279.87042236328125, "learning_rate": 9.889865389062845e-06, "loss": 23.9394, "step": 80120 }, { "epoch": 0.16186766969541486, "grad_norm": 240.43170166015625, "learning_rate": 9.889792516258571e-06, "loss": 22.1724, "step": 80130 }, { "epoch": 0.16188787032809868, "grad_norm": 262.88372802734375, "learning_rate": 9.88971961962204e-06, "loss": 15.4759, "step": 80140 }, { "epoch": 0.1619080709607825, "grad_norm": 349.3766784667969, "learning_rate": 9.88964669915361e-06, "loss": 18.3533, "step": 80150 }, { "epoch": 0.16192827159346632, "grad_norm": 512.00146484375, "learning_rate": 9.889573754853633e-06, "loss": 28.0475, "step": 80160 }, { "epoch": 0.16194847222615014, "grad_norm": 180.49630737304688, "learning_rate": 9.889500786722471e-06, "loss": 16.3905, "step": 80170 }, { "epoch": 0.16196867285883393, "grad_norm": 331.3290710449219, "learning_rate": 9.889427794760472e-06, "loss": 17.3083, "step": 80180 }, { "epoch": 0.16198887349151775, "grad_norm": 160.41104125976562, "learning_rate": 9.889354778967995e-06, "loss": 18.3554, "step": 80190 }, { "epoch": 0.16200907412420157, "grad_norm": 218.92311096191406, "learning_rate": 9.889281739345395e-06, "loss": 16.3536, "step": 80200 }, { "epoch": 0.1620292747568854, "grad_norm": 198.17308044433594, "learning_rate": 9.88920867589303e-06, "loss": 11.1539, "step": 80210 }, { "epoch": 0.1620494753895692, "grad_norm": 228.3618621826172, "learning_rate": 9.889135588611254e-06, "loss": 29.536, "step": 80220 }, { "epoch": 0.16206967602225303, "grad_norm": 404.361083984375, "learning_rate": 9.889062477500425e-06, "loss": 34.7887, "step": 80230 }, { "epoch": 0.16208987665493682, "grad_norm": 147.97549438476562, "learning_rate": 9.8889893425609e-06, "loss": 19.7953, "step": 80240 }, { "epoch": 0.16211007728762064, "grad_norm": 396.1574401855469, "learning_rate": 9.88891618379303e-06, "loss": 35.8678, "step": 80250 }, { "epoch": 0.16213027792030446, "grad_norm": 505.1474304199219, "learning_rate": 9.88884300119718e-06, "loss": 29.7115, "step": 80260 }, { "epoch": 0.16215047855298828, "grad_norm": 216.63259887695312, "learning_rate": 9.888769794773699e-06, "loss": 44.9501, "step": 80270 }, { "epoch": 0.1621706791856721, "grad_norm": 185.27220153808594, "learning_rate": 9.888696564522948e-06, "loss": 30.4614, "step": 80280 }, { "epoch": 0.16219087981835592, "grad_norm": 697.4872436523438, "learning_rate": 9.888623310445282e-06, "loss": 44.8921, "step": 80290 }, { "epoch": 0.1622110804510397, "grad_norm": 310.7380065917969, "learning_rate": 9.88855003254106e-06, "loss": 14.2312, "step": 80300 }, { "epoch": 0.16223128108372353, "grad_norm": 522.4017944335938, "learning_rate": 9.88847673081064e-06, "loss": 29.1832, "step": 80310 }, { "epoch": 0.16225148171640735, "grad_norm": 287.394287109375, "learning_rate": 9.888403405254374e-06, "loss": 32.2139, "step": 80320 }, { "epoch": 0.16227168234909117, "grad_norm": 200.25411987304688, "learning_rate": 9.888330055872623e-06, "loss": 27.5293, "step": 80330 }, { "epoch": 0.162291882981775, "grad_norm": 317.7895812988281, "learning_rate": 9.888256682665744e-06, "loss": 36.9938, "step": 80340 }, { "epoch": 0.1623120836144588, "grad_norm": 520.1140747070312, "learning_rate": 9.888183285634097e-06, "loss": 24.2919, "step": 80350 }, { "epoch": 0.16233228424714263, "grad_norm": 239.23446655273438, "learning_rate": 9.888109864778036e-06, "loss": 17.6143, "step": 80360 }, { "epoch": 0.16235248487982643, "grad_norm": 401.1061706542969, "learning_rate": 9.88803642009792e-06, "loss": 18.9185, "step": 80370 }, { "epoch": 0.16237268551251025, "grad_norm": 390.7381286621094, "learning_rate": 9.887962951594108e-06, "loss": 23.9172, "step": 80380 }, { "epoch": 0.16239288614519407, "grad_norm": 245.8197021484375, "learning_rate": 9.887889459266957e-06, "loss": 21.8135, "step": 80390 }, { "epoch": 0.16241308677787789, "grad_norm": 338.093017578125, "learning_rate": 9.887815943116827e-06, "loss": 32.026, "step": 80400 }, { "epoch": 0.1624332874105617, "grad_norm": 329.7839660644531, "learning_rate": 9.887742403144074e-06, "loss": 21.0404, "step": 80410 }, { "epoch": 0.16245348804324553, "grad_norm": 613.2798461914062, "learning_rate": 9.887668839349057e-06, "loss": 26.6171, "step": 80420 }, { "epoch": 0.16247368867592932, "grad_norm": 568.7392578125, "learning_rate": 9.887595251732135e-06, "loss": 30.9465, "step": 80430 }, { "epoch": 0.16249388930861314, "grad_norm": 129.49044799804688, "learning_rate": 9.887521640293668e-06, "loss": 16.8359, "step": 80440 }, { "epoch": 0.16251408994129696, "grad_norm": 287.7742614746094, "learning_rate": 9.887448005034011e-06, "loss": 20.0484, "step": 80450 }, { "epoch": 0.16253429057398078, "grad_norm": 103.6380386352539, "learning_rate": 9.887374345953526e-06, "loss": 16.0436, "step": 80460 }, { "epoch": 0.1625544912066646, "grad_norm": 362.8030090332031, "learning_rate": 9.88730066305257e-06, "loss": 21.0072, "step": 80470 }, { "epoch": 0.16257469183934842, "grad_norm": 327.8041076660156, "learning_rate": 9.887226956331506e-06, "loss": 28.3863, "step": 80480 }, { "epoch": 0.16259489247203224, "grad_norm": 255.6417999267578, "learning_rate": 9.887153225790688e-06, "loss": 23.4629, "step": 80490 }, { "epoch": 0.16261509310471603, "grad_norm": 169.76089477539062, "learning_rate": 9.887079471430481e-06, "loss": 11.9341, "step": 80500 }, { "epoch": 0.16263529373739985, "grad_norm": 157.0330810546875, "learning_rate": 9.88700569325124e-06, "loss": 26.8687, "step": 80510 }, { "epoch": 0.16265549437008367, "grad_norm": 150.6828155517578, "learning_rate": 9.886931891253324e-06, "loss": 14.0354, "step": 80520 }, { "epoch": 0.1626756950027675, "grad_norm": 425.0356750488281, "learning_rate": 9.886858065437097e-06, "loss": 49.3559, "step": 80530 }, { "epoch": 0.1626958956354513, "grad_norm": 422.2124328613281, "learning_rate": 9.886784215802915e-06, "loss": 43.4344, "step": 80540 }, { "epoch": 0.16271609626813513, "grad_norm": 64.40585327148438, "learning_rate": 9.88671034235114e-06, "loss": 23.7212, "step": 80550 }, { "epoch": 0.16273629690081892, "grad_norm": 263.89166259765625, "learning_rate": 9.886636445082132e-06, "loss": 31.7894, "step": 80560 }, { "epoch": 0.16275649753350274, "grad_norm": 26.529434204101562, "learning_rate": 9.88656252399625e-06, "loss": 20.581, "step": 80570 }, { "epoch": 0.16277669816618656, "grad_norm": 75.9874267578125, "learning_rate": 9.886488579093856e-06, "loss": 18.7525, "step": 80580 }, { "epoch": 0.16279689879887038, "grad_norm": 132.23245239257812, "learning_rate": 9.886414610375309e-06, "loss": 22.447, "step": 80590 }, { "epoch": 0.1628170994315542, "grad_norm": 411.032470703125, "learning_rate": 9.886340617840968e-06, "loss": 21.0157, "step": 80600 }, { "epoch": 0.16283730006423802, "grad_norm": 214.38507080078125, "learning_rate": 9.886266601491197e-06, "loss": 32.4482, "step": 80610 }, { "epoch": 0.16285750069692181, "grad_norm": 0.0, "learning_rate": 9.886192561326356e-06, "loss": 28.4212, "step": 80620 }, { "epoch": 0.16287770132960563, "grad_norm": 67.20950317382812, "learning_rate": 9.886118497346804e-06, "loss": 15.7665, "step": 80630 }, { "epoch": 0.16289790196228945, "grad_norm": 456.99755859375, "learning_rate": 9.886044409552902e-06, "loss": 21.855, "step": 80640 }, { "epoch": 0.16291810259497327, "grad_norm": 0.0, "learning_rate": 9.885970297945013e-06, "loss": 27.9623, "step": 80650 }, { "epoch": 0.1629383032276571, "grad_norm": 293.7131652832031, "learning_rate": 9.885896162523498e-06, "loss": 30.4787, "step": 80660 }, { "epoch": 0.16295850386034091, "grad_norm": 89.81012725830078, "learning_rate": 9.885822003288717e-06, "loss": 30.5552, "step": 80670 }, { "epoch": 0.16297870449302473, "grad_norm": 456.4233703613281, "learning_rate": 9.885747820241032e-06, "loss": 28.0326, "step": 80680 }, { "epoch": 0.16299890512570853, "grad_norm": 258.32415771484375, "learning_rate": 9.885673613380806e-06, "loss": 15.9463, "step": 80690 }, { "epoch": 0.16301910575839235, "grad_norm": 230.64132690429688, "learning_rate": 9.8855993827084e-06, "loss": 16.9724, "step": 80700 }, { "epoch": 0.16303930639107617, "grad_norm": 177.12010192871094, "learning_rate": 9.885525128224173e-06, "loss": 22.8599, "step": 80710 }, { "epoch": 0.16305950702376, "grad_norm": 280.6875915527344, "learning_rate": 9.885450849928489e-06, "loss": 34.321, "step": 80720 }, { "epoch": 0.1630797076564438, "grad_norm": 242.12623596191406, "learning_rate": 9.885376547821711e-06, "loss": 22.7097, "step": 80730 }, { "epoch": 0.16309990828912763, "grad_norm": 292.6549987792969, "learning_rate": 9.885302221904201e-06, "loss": 16.0462, "step": 80740 }, { "epoch": 0.16312010892181142, "grad_norm": 240.89224243164062, "learning_rate": 9.88522787217632e-06, "loss": 33.4345, "step": 80750 }, { "epoch": 0.16314030955449524, "grad_norm": 102.455322265625, "learning_rate": 9.88515349863843e-06, "loss": 34.8494, "step": 80760 }, { "epoch": 0.16316051018717906, "grad_norm": 219.32252502441406, "learning_rate": 9.885079101290894e-06, "loss": 19.7898, "step": 80770 }, { "epoch": 0.16318071081986288, "grad_norm": 270.3295593261719, "learning_rate": 9.885004680134075e-06, "loss": 17.0847, "step": 80780 }, { "epoch": 0.1632009114525467, "grad_norm": 379.77142333984375, "learning_rate": 9.884930235168338e-06, "loss": 41.7392, "step": 80790 }, { "epoch": 0.16322111208523052, "grad_norm": 221.11737060546875, "learning_rate": 9.884855766394041e-06, "loss": 31.9398, "step": 80800 }, { "epoch": 0.16324131271791434, "grad_norm": 313.6752014160156, "learning_rate": 9.88478127381155e-06, "loss": 18.1931, "step": 80810 }, { "epoch": 0.16326151335059813, "grad_norm": 292.05999755859375, "learning_rate": 9.884706757421229e-06, "loss": 16.8612, "step": 80820 }, { "epoch": 0.16328171398328195, "grad_norm": 238.26834106445312, "learning_rate": 9.884632217223438e-06, "loss": 27.5101, "step": 80830 }, { "epoch": 0.16330191461596577, "grad_norm": 468.2528381347656, "learning_rate": 9.884557653218544e-06, "loss": 32.3533, "step": 80840 }, { "epoch": 0.1633221152486496, "grad_norm": 771.6322021484375, "learning_rate": 9.884483065406905e-06, "loss": 27.0523, "step": 80850 }, { "epoch": 0.1633423158813334, "grad_norm": 349.31060791015625, "learning_rate": 9.88440845378889e-06, "loss": 32.5365, "step": 80860 }, { "epoch": 0.16336251651401723, "grad_norm": 437.2756042480469, "learning_rate": 9.884333818364861e-06, "loss": 33.9966, "step": 80870 }, { "epoch": 0.16338271714670102, "grad_norm": 257.4693298339844, "learning_rate": 9.88425915913518e-06, "loss": 31.3829, "step": 80880 }, { "epoch": 0.16340291777938484, "grad_norm": 321.56280517578125, "learning_rate": 9.884184476100215e-06, "loss": 22.2147, "step": 80890 }, { "epoch": 0.16342311841206866, "grad_norm": 43.66645431518555, "learning_rate": 9.884109769260326e-06, "loss": 22.979, "step": 80900 }, { "epoch": 0.16344331904475248, "grad_norm": 64.46803283691406, "learning_rate": 9.884035038615876e-06, "loss": 14.9225, "step": 80910 }, { "epoch": 0.1634635196774363, "grad_norm": 278.61669921875, "learning_rate": 9.883960284167234e-06, "loss": 26.227, "step": 80920 }, { "epoch": 0.16348372031012012, "grad_norm": 462.6636962890625, "learning_rate": 9.88388550591476e-06, "loss": 21.0877, "step": 80930 }, { "epoch": 0.16350392094280392, "grad_norm": 402.46881103515625, "learning_rate": 9.883810703858823e-06, "loss": 23.0185, "step": 80940 }, { "epoch": 0.16352412157548774, "grad_norm": 0.0, "learning_rate": 9.883735877999785e-06, "loss": 27.3403, "step": 80950 }, { "epoch": 0.16354432220817156, "grad_norm": 127.80304718017578, "learning_rate": 9.883661028338009e-06, "loss": 21.8834, "step": 80960 }, { "epoch": 0.16356452284085538, "grad_norm": 232.68206787109375, "learning_rate": 9.88358615487386e-06, "loss": 23.2149, "step": 80970 }, { "epoch": 0.1635847234735392, "grad_norm": 244.39747619628906, "learning_rate": 9.883511257607708e-06, "loss": 21.3421, "step": 80980 }, { "epoch": 0.16360492410622302, "grad_norm": 0.0, "learning_rate": 9.883436336539913e-06, "loss": 29.5231, "step": 80990 }, { "epoch": 0.16362512473890684, "grad_norm": 782.1522216796875, "learning_rate": 9.883361391670841e-06, "loss": 44.5305, "step": 81000 }, { "epoch": 0.16364532537159063, "grad_norm": 68.39727783203125, "learning_rate": 9.883286423000857e-06, "loss": 9.536, "step": 81010 }, { "epoch": 0.16366552600427445, "grad_norm": 209.87847900390625, "learning_rate": 9.883211430530329e-06, "loss": 12.456, "step": 81020 }, { "epoch": 0.16368572663695827, "grad_norm": 197.98065185546875, "learning_rate": 9.88313641425962e-06, "loss": 32.2721, "step": 81030 }, { "epoch": 0.1637059272696421, "grad_norm": 43.96011734008789, "learning_rate": 9.883061374189095e-06, "loss": 31.3199, "step": 81040 }, { "epoch": 0.1637261279023259, "grad_norm": 0.0, "learning_rate": 9.882986310319124e-06, "loss": 33.0037, "step": 81050 }, { "epoch": 0.16374632853500973, "grad_norm": 156.55555725097656, "learning_rate": 9.882911222650069e-06, "loss": 10.6641, "step": 81060 }, { "epoch": 0.16376652916769352, "grad_norm": 171.52134704589844, "learning_rate": 9.882836111182295e-06, "loss": 25.338, "step": 81070 }, { "epoch": 0.16378672980037734, "grad_norm": 53.66230010986328, "learning_rate": 9.882760975916173e-06, "loss": 15.5371, "step": 81080 }, { "epoch": 0.16380693043306116, "grad_norm": 401.748046875, "learning_rate": 9.882685816852064e-06, "loss": 32.5024, "step": 81090 }, { "epoch": 0.16382713106574498, "grad_norm": 491.9634094238281, "learning_rate": 9.882610633990337e-06, "loss": 22.4836, "step": 81100 }, { "epoch": 0.1638473316984288, "grad_norm": 211.6920928955078, "learning_rate": 9.882535427331357e-06, "loss": 17.6775, "step": 81110 }, { "epoch": 0.16386753233111262, "grad_norm": 63.229881286621094, "learning_rate": 9.882460196875495e-06, "loss": 36.3382, "step": 81120 }, { "epoch": 0.16388773296379644, "grad_norm": 79.49101257324219, "learning_rate": 9.88238494262311e-06, "loss": 29.1099, "step": 81130 }, { "epoch": 0.16390793359648023, "grad_norm": 406.00335693359375, "learning_rate": 9.882309664574576e-06, "loss": 19.7655, "step": 81140 }, { "epoch": 0.16392813422916405, "grad_norm": 114.1545181274414, "learning_rate": 9.882234362730255e-06, "loss": 16.5532, "step": 81150 }, { "epoch": 0.16394833486184787, "grad_norm": 433.3111877441406, "learning_rate": 9.882159037090517e-06, "loss": 21.9381, "step": 81160 }, { "epoch": 0.1639685354945317, "grad_norm": 312.1286926269531, "learning_rate": 9.882083687655728e-06, "loss": 29.6213, "step": 81170 }, { "epoch": 0.1639887361272155, "grad_norm": 518.3797607421875, "learning_rate": 9.882008314426253e-06, "loss": 29.5096, "step": 81180 }, { "epoch": 0.16400893675989933, "grad_norm": 264.34808349609375, "learning_rate": 9.881932917402464e-06, "loss": 20.8684, "step": 81190 }, { "epoch": 0.16402913739258312, "grad_norm": 274.4454650878906, "learning_rate": 9.881857496584726e-06, "loss": 23.2552, "step": 81200 }, { "epoch": 0.16404933802526694, "grad_norm": 186.0841827392578, "learning_rate": 9.881782051973405e-06, "loss": 18.6341, "step": 81210 }, { "epoch": 0.16406953865795076, "grad_norm": 84.46864318847656, "learning_rate": 9.88170658356887e-06, "loss": 34.5852, "step": 81220 }, { "epoch": 0.16408973929063458, "grad_norm": 383.7880554199219, "learning_rate": 9.881631091371492e-06, "loss": 31.9449, "step": 81230 }, { "epoch": 0.1641099399233184, "grad_norm": 728.3754272460938, "learning_rate": 9.881555575381635e-06, "loss": 27.2611, "step": 81240 }, { "epoch": 0.16413014055600222, "grad_norm": 336.0076904296875, "learning_rate": 9.881480035599667e-06, "loss": 43.8432, "step": 81250 }, { "epoch": 0.16415034118868602, "grad_norm": 606.0540161132812, "learning_rate": 9.88140447202596e-06, "loss": 34.4571, "step": 81260 }, { "epoch": 0.16417054182136984, "grad_norm": 751.754638671875, "learning_rate": 9.881328884660876e-06, "loss": 24.2582, "step": 81270 }, { "epoch": 0.16419074245405366, "grad_norm": 130.7711181640625, "learning_rate": 9.88125327350479e-06, "loss": 25.4759, "step": 81280 }, { "epoch": 0.16421094308673748, "grad_norm": 441.4452209472656, "learning_rate": 9.881177638558066e-06, "loss": 28.2043, "step": 81290 }, { "epoch": 0.1642311437194213, "grad_norm": 143.61544799804688, "learning_rate": 9.881101979821075e-06, "loss": 36.7642, "step": 81300 }, { "epoch": 0.16425134435210512, "grad_norm": 197.10601806640625, "learning_rate": 9.881026297294185e-06, "loss": 36.3317, "step": 81310 }, { "epoch": 0.16427154498478894, "grad_norm": 234.24197387695312, "learning_rate": 9.880950590977764e-06, "loss": 19.4874, "step": 81320 }, { "epoch": 0.16429174561747273, "grad_norm": 95.19441223144531, "learning_rate": 9.880874860872183e-06, "loss": 8.1595, "step": 81330 }, { "epoch": 0.16431194625015655, "grad_norm": 337.255859375, "learning_rate": 9.88079910697781e-06, "loss": 20.7182, "step": 81340 }, { "epoch": 0.16433214688284037, "grad_norm": 419.42303466796875, "learning_rate": 9.880723329295012e-06, "loss": 19.9814, "step": 81350 }, { "epoch": 0.1643523475155242, "grad_norm": 363.29315185546875, "learning_rate": 9.880647527824161e-06, "loss": 21.3818, "step": 81360 }, { "epoch": 0.164372548148208, "grad_norm": 571.0088500976562, "learning_rate": 9.880571702565627e-06, "loss": 38.0265, "step": 81370 }, { "epoch": 0.16439274878089183, "grad_norm": 612.8377075195312, "learning_rate": 9.880495853519777e-06, "loss": 25.3299, "step": 81380 }, { "epoch": 0.16441294941357562, "grad_norm": 296.54150390625, "learning_rate": 9.880419980686986e-06, "loss": 10.7019, "step": 81390 }, { "epoch": 0.16443315004625944, "grad_norm": 218.109375, "learning_rate": 9.880344084067616e-06, "loss": 38.9949, "step": 81400 }, { "epoch": 0.16445335067894326, "grad_norm": 295.714111328125, "learning_rate": 9.880268163662043e-06, "loss": 23.5245, "step": 81410 }, { "epoch": 0.16447355131162708, "grad_norm": 647.34716796875, "learning_rate": 9.880192219470633e-06, "loss": 32.4657, "step": 81420 }, { "epoch": 0.1644937519443109, "grad_norm": 254.79225158691406, "learning_rate": 9.88011625149376e-06, "loss": 17.5209, "step": 81430 }, { "epoch": 0.16451395257699472, "grad_norm": 253.12171936035156, "learning_rate": 9.88004025973179e-06, "loss": 24.6352, "step": 81440 }, { "epoch": 0.16453415320967854, "grad_norm": 612.4387817382812, "learning_rate": 9.879964244185098e-06, "loss": 31.1745, "step": 81450 }, { "epoch": 0.16455435384236233, "grad_norm": 87.36180114746094, "learning_rate": 9.87988820485405e-06, "loss": 34.1086, "step": 81460 }, { "epoch": 0.16457455447504615, "grad_norm": 402.9397888183594, "learning_rate": 9.87981214173902e-06, "loss": 18.3938, "step": 81470 }, { "epoch": 0.16459475510772997, "grad_norm": 21.20860481262207, "learning_rate": 9.879736054840377e-06, "loss": 47.8813, "step": 81480 }, { "epoch": 0.1646149557404138, "grad_norm": 423.5443115234375, "learning_rate": 9.879659944158493e-06, "loss": 24.2684, "step": 81490 }, { "epoch": 0.1646351563730976, "grad_norm": 1167.2650146484375, "learning_rate": 9.879583809693737e-06, "loss": 34.1481, "step": 81500 }, { "epoch": 0.16465535700578143, "grad_norm": 294.60076904296875, "learning_rate": 9.879507651446482e-06, "loss": 25.3584, "step": 81510 }, { "epoch": 0.16467555763846523, "grad_norm": 151.131103515625, "learning_rate": 9.8794314694171e-06, "loss": 23.0911, "step": 81520 }, { "epoch": 0.16469575827114905, "grad_norm": 516.186767578125, "learning_rate": 9.879355263605958e-06, "loss": 19.2731, "step": 81530 }, { "epoch": 0.16471595890383287, "grad_norm": 421.6802673339844, "learning_rate": 9.879279034013434e-06, "loss": 20.5787, "step": 81540 }, { "epoch": 0.16473615953651669, "grad_norm": 0.0, "learning_rate": 9.879202780639893e-06, "loss": 15.4527, "step": 81550 }, { "epoch": 0.1647563601692005, "grad_norm": 52.423805236816406, "learning_rate": 9.879126503485709e-06, "loss": 29.4885, "step": 81560 }, { "epoch": 0.16477656080188433, "grad_norm": 249.44981384277344, "learning_rate": 9.879050202551256e-06, "loss": 30.8984, "step": 81570 }, { "epoch": 0.16479676143456812, "grad_norm": 95.38017272949219, "learning_rate": 9.878973877836902e-06, "loss": 28.0367, "step": 81580 }, { "epoch": 0.16481696206725194, "grad_norm": 260.55615234375, "learning_rate": 9.878897529343023e-06, "loss": 11.8228, "step": 81590 }, { "epoch": 0.16483716269993576, "grad_norm": 333.01690673828125, "learning_rate": 9.878821157069988e-06, "loss": 20.05, "step": 81600 }, { "epoch": 0.16485736333261958, "grad_norm": 147.23208618164062, "learning_rate": 9.87874476101817e-06, "loss": 34.3341, "step": 81610 }, { "epoch": 0.1648775639653034, "grad_norm": 248.02413940429688, "learning_rate": 9.878668341187944e-06, "loss": 33.1512, "step": 81620 }, { "epoch": 0.16489776459798722, "grad_norm": 456.67706298828125, "learning_rate": 9.878591897579678e-06, "loss": 28.5025, "step": 81630 }, { "epoch": 0.16491796523067104, "grad_norm": 361.0797424316406, "learning_rate": 9.87851543019375e-06, "loss": 21.1918, "step": 81640 }, { "epoch": 0.16493816586335483, "grad_norm": 294.69757080078125, "learning_rate": 9.878438939030526e-06, "loss": 17.416, "step": 81650 }, { "epoch": 0.16495836649603865, "grad_norm": 298.83514404296875, "learning_rate": 9.878362424090384e-06, "loss": 31.7704, "step": 81660 }, { "epoch": 0.16497856712872247, "grad_norm": 313.79986572265625, "learning_rate": 9.878285885373693e-06, "loss": 25.9559, "step": 81670 }, { "epoch": 0.1649987677614063, "grad_norm": 238.77378845214844, "learning_rate": 9.87820932288083e-06, "loss": 18.8528, "step": 81680 }, { "epoch": 0.1650189683940901, "grad_norm": 165.88497924804688, "learning_rate": 9.878132736612167e-06, "loss": 19.6697, "step": 81690 }, { "epoch": 0.16503916902677393, "grad_norm": 274.5264587402344, "learning_rate": 9.878056126568077e-06, "loss": 11.9117, "step": 81700 }, { "epoch": 0.16505936965945772, "grad_norm": 422.99365234375, "learning_rate": 9.87797949274893e-06, "loss": 28.8778, "step": 81710 }, { "epoch": 0.16507957029214154, "grad_norm": 282.18182373046875, "learning_rate": 9.877902835155105e-06, "loss": 22.7029, "step": 81720 }, { "epoch": 0.16509977092482536, "grad_norm": 230.08029174804688, "learning_rate": 9.877826153786973e-06, "loss": 19.8323, "step": 81730 }, { "epoch": 0.16511997155750918, "grad_norm": 544.3232421875, "learning_rate": 9.877749448644908e-06, "loss": 13.905, "step": 81740 }, { "epoch": 0.165140172190193, "grad_norm": 273.5807189941406, "learning_rate": 9.877672719729283e-06, "loss": 24.756, "step": 81750 }, { "epoch": 0.16516037282287682, "grad_norm": 292.6930847167969, "learning_rate": 9.877595967040475e-06, "loss": 25.1662, "step": 81760 }, { "epoch": 0.16518057345556064, "grad_norm": 290.103759765625, "learning_rate": 9.877519190578852e-06, "loss": 23.5173, "step": 81770 }, { "epoch": 0.16520077408824443, "grad_norm": 277.82855224609375, "learning_rate": 9.877442390344796e-06, "loss": 40.2694, "step": 81780 }, { "epoch": 0.16522097472092825, "grad_norm": 258.6546630859375, "learning_rate": 9.877365566338675e-06, "loss": 18.0024, "step": 81790 }, { "epoch": 0.16524117535361207, "grad_norm": 144.20506286621094, "learning_rate": 9.877288718560866e-06, "loss": 14.2021, "step": 81800 }, { "epoch": 0.1652613759862959, "grad_norm": 145.52598571777344, "learning_rate": 9.877211847011744e-06, "loss": 25.3402, "step": 81810 }, { "epoch": 0.16528157661897971, "grad_norm": 165.09524536132812, "learning_rate": 9.877134951691683e-06, "loss": 31.8929, "step": 81820 }, { "epoch": 0.16530177725166353, "grad_norm": 365.2620544433594, "learning_rate": 9.877058032601057e-06, "loss": 44.7228, "step": 81830 }, { "epoch": 0.16532197788434733, "grad_norm": 532.808349609375, "learning_rate": 9.876981089740242e-06, "loss": 29.6548, "step": 81840 }, { "epoch": 0.16534217851703115, "grad_norm": 99.7681655883789, "learning_rate": 9.876904123109613e-06, "loss": 39.753, "step": 81850 }, { "epoch": 0.16536237914971497, "grad_norm": 0.0, "learning_rate": 9.876827132709545e-06, "loss": 15.8142, "step": 81860 }, { "epoch": 0.1653825797823988, "grad_norm": 621.6065673828125, "learning_rate": 9.876750118540413e-06, "loss": 28.9388, "step": 81870 }, { "epoch": 0.1654027804150826, "grad_norm": 246.3247833251953, "learning_rate": 9.87667308060259e-06, "loss": 29.0766, "step": 81880 }, { "epoch": 0.16542298104776643, "grad_norm": 210.59066772460938, "learning_rate": 9.876596018896457e-06, "loss": 16.8943, "step": 81890 }, { "epoch": 0.16544318168045022, "grad_norm": 226.64437866210938, "learning_rate": 9.876518933422385e-06, "loss": 21.0003, "step": 81900 }, { "epoch": 0.16546338231313404, "grad_norm": 366.88043212890625, "learning_rate": 9.876441824180752e-06, "loss": 24.2955, "step": 81910 }, { "epoch": 0.16548358294581786, "grad_norm": 329.21673583984375, "learning_rate": 9.876364691171933e-06, "loss": 23.1887, "step": 81920 }, { "epoch": 0.16550378357850168, "grad_norm": 319.2961730957031, "learning_rate": 9.876287534396304e-06, "loss": 23.3995, "step": 81930 }, { "epoch": 0.1655239842111855, "grad_norm": 436.950927734375, "learning_rate": 9.876210353854239e-06, "loss": 18.4773, "step": 81940 }, { "epoch": 0.16554418484386932, "grad_norm": 118.1144790649414, "learning_rate": 9.876133149546117e-06, "loss": 10.7863, "step": 81950 }, { "epoch": 0.16556438547655314, "grad_norm": 505.93841552734375, "learning_rate": 9.876055921472316e-06, "loss": 32.0768, "step": 81960 }, { "epoch": 0.16558458610923693, "grad_norm": 172.62196350097656, "learning_rate": 9.875978669633206e-06, "loss": 12.7182, "step": 81970 }, { "epoch": 0.16560478674192075, "grad_norm": 480.1077575683594, "learning_rate": 9.87590139402917e-06, "loss": 16.9109, "step": 81980 }, { "epoch": 0.16562498737460457, "grad_norm": 192.9483642578125, "learning_rate": 9.87582409466058e-06, "loss": 18.1957, "step": 81990 }, { "epoch": 0.1656451880072884, "grad_norm": 203.35108947753906, "learning_rate": 9.875746771527817e-06, "loss": 25.5466, "step": 82000 }, { "epoch": 0.1656653886399722, "grad_norm": 302.7189025878906, "learning_rate": 9.875669424631255e-06, "loss": 18.9077, "step": 82010 }, { "epoch": 0.16568558927265603, "grad_norm": 255.28013610839844, "learning_rate": 9.87559205397127e-06, "loss": 22.976, "step": 82020 }, { "epoch": 0.16570578990533982, "grad_norm": 99.51516723632812, "learning_rate": 9.875514659548243e-06, "loss": 26.7295, "step": 82030 }, { "epoch": 0.16572599053802364, "grad_norm": 152.20811462402344, "learning_rate": 9.875437241362546e-06, "loss": 17.3818, "step": 82040 }, { "epoch": 0.16574619117070746, "grad_norm": 72.63243865966797, "learning_rate": 9.87535979941456e-06, "loss": 4.7484, "step": 82050 }, { "epoch": 0.16576639180339128, "grad_norm": 282.1438293457031, "learning_rate": 9.875282333704665e-06, "loss": 38.0493, "step": 82060 }, { "epoch": 0.1657865924360751, "grad_norm": 380.2642517089844, "learning_rate": 9.875204844233231e-06, "loss": 29.8368, "step": 82070 }, { "epoch": 0.16580679306875892, "grad_norm": 92.33009338378906, "learning_rate": 9.875127331000642e-06, "loss": 21.818, "step": 82080 }, { "epoch": 0.16582699370144272, "grad_norm": 501.4319763183594, "learning_rate": 9.875049794007274e-06, "loss": 50.9315, "step": 82090 }, { "epoch": 0.16584719433412654, "grad_norm": 277.1695556640625, "learning_rate": 9.874972233253503e-06, "loss": 27.0679, "step": 82100 }, { "epoch": 0.16586739496681036, "grad_norm": 236.0446014404297, "learning_rate": 9.87489464873971e-06, "loss": 7.1813, "step": 82110 }, { "epoch": 0.16588759559949418, "grad_norm": 250.10704040527344, "learning_rate": 9.874817040466271e-06, "loss": 32.659, "step": 82120 }, { "epoch": 0.165907796232178, "grad_norm": 514.8346557617188, "learning_rate": 9.874739408433565e-06, "loss": 23.1178, "step": 82130 }, { "epoch": 0.16592799686486182, "grad_norm": 148.30673217773438, "learning_rate": 9.87466175264197e-06, "loss": 31.5439, "step": 82140 }, { "epoch": 0.16594819749754564, "grad_norm": 391.8564453125, "learning_rate": 9.874584073091867e-06, "loss": 29.2721, "step": 82150 }, { "epoch": 0.16596839813022943, "grad_norm": 236.8070526123047, "learning_rate": 9.874506369783629e-06, "loss": 18.8714, "step": 82160 }, { "epoch": 0.16598859876291325, "grad_norm": 234.38430786132812, "learning_rate": 9.874428642717641e-06, "loss": 22.8539, "step": 82170 }, { "epoch": 0.16600879939559707, "grad_norm": 195.00747680664062, "learning_rate": 9.874350891894278e-06, "loss": 14.4165, "step": 82180 }, { "epoch": 0.1660290000282809, "grad_norm": 70.35597229003906, "learning_rate": 9.87427311731392e-06, "loss": 26.744, "step": 82190 }, { "epoch": 0.1660492006609647, "grad_norm": 259.35302734375, "learning_rate": 9.874195318976945e-06, "loss": 18.4305, "step": 82200 }, { "epoch": 0.16606940129364853, "grad_norm": 349.1132507324219, "learning_rate": 9.874117496883734e-06, "loss": 23.331, "step": 82210 }, { "epoch": 0.16608960192633232, "grad_norm": 315.6028747558594, "learning_rate": 9.874039651034665e-06, "loss": 8.7159, "step": 82220 }, { "epoch": 0.16610980255901614, "grad_norm": 635.9444580078125, "learning_rate": 9.873961781430119e-06, "loss": 41.295, "step": 82230 }, { "epoch": 0.16613000319169996, "grad_norm": 600.2528686523438, "learning_rate": 9.873883888070474e-06, "loss": 41.5236, "step": 82240 }, { "epoch": 0.16615020382438378, "grad_norm": 158.3350372314453, "learning_rate": 9.87380597095611e-06, "loss": 10.7288, "step": 82250 }, { "epoch": 0.1661704044570676, "grad_norm": 382.1212158203125, "learning_rate": 9.873728030087406e-06, "loss": 17.6228, "step": 82260 }, { "epoch": 0.16619060508975142, "grad_norm": 320.80877685546875, "learning_rate": 9.873650065464744e-06, "loss": 15.4946, "step": 82270 }, { "epoch": 0.16621080572243524, "grad_norm": 161.38720703125, "learning_rate": 9.873572077088502e-06, "loss": 21.5519, "step": 82280 }, { "epoch": 0.16623100635511903, "grad_norm": 226.51065063476562, "learning_rate": 9.87349406495906e-06, "loss": 28.5442, "step": 82290 }, { "epoch": 0.16625120698780285, "grad_norm": 8.74120044708252, "learning_rate": 9.873416029076801e-06, "loss": 28.8595, "step": 82300 }, { "epoch": 0.16627140762048667, "grad_norm": 40.29245376586914, "learning_rate": 9.873337969442102e-06, "loss": 28.9965, "step": 82310 }, { "epoch": 0.1662916082531705, "grad_norm": 247.56024169921875, "learning_rate": 9.873259886055344e-06, "loss": 28.835, "step": 82320 }, { "epoch": 0.1663118088858543, "grad_norm": 76.94730377197266, "learning_rate": 9.873181778916911e-06, "loss": 22.0698, "step": 82330 }, { "epoch": 0.16633200951853813, "grad_norm": 290.40130615234375, "learning_rate": 9.873103648027178e-06, "loss": 16.5292, "step": 82340 }, { "epoch": 0.16635221015122192, "grad_norm": 528.3880615234375, "learning_rate": 9.873025493386531e-06, "loss": 27.8214, "step": 82350 }, { "epoch": 0.16637241078390574, "grad_norm": 133.72166442871094, "learning_rate": 9.872947314995348e-06, "loss": 35.7723, "step": 82360 }, { "epoch": 0.16639261141658956, "grad_norm": 258.2024841308594, "learning_rate": 9.872869112854011e-06, "loss": 19.4769, "step": 82370 }, { "epoch": 0.16641281204927338, "grad_norm": 259.41796875, "learning_rate": 9.872790886962901e-06, "loss": 29.3896, "step": 82380 }, { "epoch": 0.1664330126819572, "grad_norm": 227.64395141601562, "learning_rate": 9.8727126373224e-06, "loss": 29.6025, "step": 82390 }, { "epoch": 0.16645321331464102, "grad_norm": 867.68017578125, "learning_rate": 9.872634363932887e-06, "loss": 36.1364, "step": 82400 }, { "epoch": 0.16647341394732482, "grad_norm": 397.1578369140625, "learning_rate": 9.872556066794745e-06, "loss": 36.3982, "step": 82410 }, { "epoch": 0.16649361458000864, "grad_norm": 280.6644287109375, "learning_rate": 9.872477745908356e-06, "loss": 25.6629, "step": 82420 }, { "epoch": 0.16651381521269246, "grad_norm": 332.4615173339844, "learning_rate": 9.872399401274103e-06, "loss": 21.2553, "step": 82430 }, { "epoch": 0.16653401584537628, "grad_norm": 198.8942108154297, "learning_rate": 9.872321032892364e-06, "loss": 27.4648, "step": 82440 }, { "epoch": 0.1665542164780601, "grad_norm": 190.68072509765625, "learning_rate": 9.872242640763525e-06, "loss": 19.7145, "step": 82450 }, { "epoch": 0.16657441711074392, "grad_norm": 198.4000701904297, "learning_rate": 9.872164224887966e-06, "loss": 21.6132, "step": 82460 }, { "epoch": 0.16659461774342774, "grad_norm": 325.7412414550781, "learning_rate": 9.872085785266069e-06, "loss": 29.4805, "step": 82470 }, { "epoch": 0.16661481837611153, "grad_norm": 243.84555053710938, "learning_rate": 9.872007321898218e-06, "loss": 19.7013, "step": 82480 }, { "epoch": 0.16663501900879535, "grad_norm": 123.94475555419922, "learning_rate": 9.871928834784793e-06, "loss": 25.2052, "step": 82490 }, { "epoch": 0.16665521964147917, "grad_norm": 397.7675476074219, "learning_rate": 9.871850323926178e-06, "loss": 18.5286, "step": 82500 }, { "epoch": 0.166675420274163, "grad_norm": 159.7510986328125, "learning_rate": 9.871771789322754e-06, "loss": 23.6241, "step": 82510 }, { "epoch": 0.1666956209068468, "grad_norm": 189.0202178955078, "learning_rate": 9.871693230974907e-06, "loss": 19.3545, "step": 82520 }, { "epoch": 0.16671582153953063, "grad_norm": 445.9609680175781, "learning_rate": 9.871614648883017e-06, "loss": 15.3945, "step": 82530 }, { "epoch": 0.16673602217221442, "grad_norm": 197.10597229003906, "learning_rate": 9.87153604304747e-06, "loss": 23.969, "step": 82540 }, { "epoch": 0.16675622280489824, "grad_norm": 536.9927978515625, "learning_rate": 9.871457413468645e-06, "loss": 79.3977, "step": 82550 }, { "epoch": 0.16677642343758206, "grad_norm": 187.7716522216797, "learning_rate": 9.871378760146928e-06, "loss": 21.8753, "step": 82560 }, { "epoch": 0.16679662407026588, "grad_norm": 456.45928955078125, "learning_rate": 9.871300083082702e-06, "loss": 28.9908, "step": 82570 }, { "epoch": 0.1668168247029497, "grad_norm": 284.4895935058594, "learning_rate": 9.87122138227635e-06, "loss": 24.3395, "step": 82580 }, { "epoch": 0.16683702533563352, "grad_norm": 200.79249572753906, "learning_rate": 9.871142657728257e-06, "loss": 36.2815, "step": 82590 }, { "epoch": 0.16685722596831734, "grad_norm": 390.7917785644531, "learning_rate": 9.871063909438803e-06, "loss": 32.4031, "step": 82600 }, { "epoch": 0.16687742660100113, "grad_norm": 260.52337646484375, "learning_rate": 9.870985137408375e-06, "loss": 21.4062, "step": 82610 }, { "epoch": 0.16689762723368495, "grad_norm": 389.3468322753906, "learning_rate": 9.870906341637358e-06, "loss": 24.6023, "step": 82620 }, { "epoch": 0.16691782786636877, "grad_norm": 323.9815673828125, "learning_rate": 9.870827522126134e-06, "loss": 28.7362, "step": 82630 }, { "epoch": 0.1669380284990526, "grad_norm": 460.2393493652344, "learning_rate": 9.870748678875086e-06, "loss": 18.5964, "step": 82640 }, { "epoch": 0.1669582291317364, "grad_norm": 375.8121032714844, "learning_rate": 9.8706698118846e-06, "loss": 29.1043, "step": 82650 }, { "epoch": 0.16697842976442023, "grad_norm": 624.316162109375, "learning_rate": 9.870590921155062e-06, "loss": 26.313, "step": 82660 }, { "epoch": 0.16699863039710403, "grad_norm": 612.198974609375, "learning_rate": 9.870512006686852e-06, "loss": 22.9305, "step": 82670 }, { "epoch": 0.16701883102978785, "grad_norm": 174.2124481201172, "learning_rate": 9.870433068480359e-06, "loss": 25.6592, "step": 82680 }, { "epoch": 0.16703903166247167, "grad_norm": 310.32025146484375, "learning_rate": 9.870354106535964e-06, "loss": 27.9929, "step": 82690 }, { "epoch": 0.16705923229515549, "grad_norm": 618.5444946289062, "learning_rate": 9.870275120854055e-06, "loss": 27.0415, "step": 82700 }, { "epoch": 0.1670794329278393, "grad_norm": 105.72997283935547, "learning_rate": 9.870196111435016e-06, "loss": 24.776, "step": 82710 }, { "epoch": 0.16709963356052313, "grad_norm": 290.8124084472656, "learning_rate": 9.870117078279231e-06, "loss": 46.4076, "step": 82720 }, { "epoch": 0.16711983419320692, "grad_norm": 374.3414611816406, "learning_rate": 9.870038021387087e-06, "loss": 26.227, "step": 82730 }, { "epoch": 0.16714003482589074, "grad_norm": 133.33152770996094, "learning_rate": 9.869958940758968e-06, "loss": 20.061, "step": 82740 }, { "epoch": 0.16716023545857456, "grad_norm": 251.47698974609375, "learning_rate": 9.86987983639526e-06, "loss": 25.2994, "step": 82750 }, { "epoch": 0.16718043609125838, "grad_norm": 484.54595947265625, "learning_rate": 9.869800708296347e-06, "loss": 23.6379, "step": 82760 }, { "epoch": 0.1672006367239422, "grad_norm": 106.24179077148438, "learning_rate": 9.869721556462617e-06, "loss": 36.8222, "step": 82770 }, { "epoch": 0.16722083735662602, "grad_norm": 112.37265014648438, "learning_rate": 9.869642380894454e-06, "loss": 28.6513, "step": 82780 }, { "epoch": 0.16724103798930984, "grad_norm": 589.19091796875, "learning_rate": 9.869563181592246e-06, "loss": 30.3592, "step": 82790 }, { "epoch": 0.16726123862199363, "grad_norm": 259.4186706542969, "learning_rate": 9.869483958556376e-06, "loss": 21.4465, "step": 82800 }, { "epoch": 0.16728143925467745, "grad_norm": 332.6955261230469, "learning_rate": 9.869404711787234e-06, "loss": 21.0657, "step": 82810 }, { "epoch": 0.16730163988736127, "grad_norm": 334.7618408203125, "learning_rate": 9.869325441285203e-06, "loss": 24.2098, "step": 82820 }, { "epoch": 0.1673218405200451, "grad_norm": 229.76461791992188, "learning_rate": 9.869246147050669e-06, "loss": 25.9278, "step": 82830 }, { "epoch": 0.1673420411527289, "grad_norm": 210.72390747070312, "learning_rate": 9.869166829084023e-06, "loss": 23.1679, "step": 82840 }, { "epoch": 0.16736224178541273, "grad_norm": 385.73834228515625, "learning_rate": 9.869087487385644e-06, "loss": 24.5877, "step": 82850 }, { "epoch": 0.16738244241809652, "grad_norm": 207.53558349609375, "learning_rate": 9.869008121955928e-06, "loss": 19.2107, "step": 82860 }, { "epoch": 0.16740264305078034, "grad_norm": 671.8244018554688, "learning_rate": 9.868928732795253e-06, "loss": 23.0094, "step": 82870 }, { "epoch": 0.16742284368346416, "grad_norm": 492.0208435058594, "learning_rate": 9.868849319904012e-06, "loss": 32.7543, "step": 82880 }, { "epoch": 0.16744304431614798, "grad_norm": 485.330810546875, "learning_rate": 9.86876988328259e-06, "loss": 17.6634, "step": 82890 }, { "epoch": 0.1674632449488318, "grad_norm": 336.3710632324219, "learning_rate": 9.868690422931372e-06, "loss": 39.828, "step": 82900 }, { "epoch": 0.16748344558151562, "grad_norm": 245.62684631347656, "learning_rate": 9.86861093885075e-06, "loss": 22.2631, "step": 82910 }, { "epoch": 0.16750364621419944, "grad_norm": 84.7574234008789, "learning_rate": 9.868531431041108e-06, "loss": 17.4598, "step": 82920 }, { "epoch": 0.16752384684688323, "grad_norm": 370.2298583984375, "learning_rate": 9.868451899502833e-06, "loss": 22.6991, "step": 82930 }, { "epoch": 0.16754404747956705, "grad_norm": 160.45919799804688, "learning_rate": 9.868372344236314e-06, "loss": 63.2138, "step": 82940 }, { "epoch": 0.16756424811225087, "grad_norm": 328.9117126464844, "learning_rate": 9.86829276524194e-06, "loss": 30.112, "step": 82950 }, { "epoch": 0.1675844487449347, "grad_norm": 0.0, "learning_rate": 9.868213162520097e-06, "loss": 22.7893, "step": 82960 }, { "epoch": 0.16760464937761851, "grad_norm": 91.4285659790039, "learning_rate": 9.868133536071174e-06, "loss": 17.0102, "step": 82970 }, { "epoch": 0.16762485001030233, "grad_norm": 293.1816711425781, "learning_rate": 9.868053885895559e-06, "loss": 23.7461, "step": 82980 }, { "epoch": 0.16764505064298613, "grad_norm": 118.60198211669922, "learning_rate": 9.867974211993639e-06, "loss": 25.0596, "step": 82990 }, { "epoch": 0.16766525127566995, "grad_norm": 580.802001953125, "learning_rate": 9.867894514365802e-06, "loss": 22.5236, "step": 83000 }, { "epoch": 0.16768545190835377, "grad_norm": 99.74955749511719, "learning_rate": 9.867814793012437e-06, "loss": 15.4682, "step": 83010 }, { "epoch": 0.1677056525410376, "grad_norm": 760.5239868164062, "learning_rate": 9.867735047933936e-06, "loss": 26.4277, "step": 83020 }, { "epoch": 0.1677258531737214, "grad_norm": 71.77997589111328, "learning_rate": 9.867655279130684e-06, "loss": 24.385, "step": 83030 }, { "epoch": 0.16774605380640523, "grad_norm": 521.3129272460938, "learning_rate": 9.86757548660307e-06, "loss": 21.1037, "step": 83040 }, { "epoch": 0.16776625443908902, "grad_norm": 371.6778564453125, "learning_rate": 9.867495670351483e-06, "loss": 33.7804, "step": 83050 }, { "epoch": 0.16778645507177284, "grad_norm": 236.27554321289062, "learning_rate": 9.867415830376313e-06, "loss": 21.726, "step": 83060 }, { "epoch": 0.16780665570445666, "grad_norm": 205.06906127929688, "learning_rate": 9.867335966677949e-06, "loss": 28.3996, "step": 83070 }, { "epoch": 0.16782685633714048, "grad_norm": 306.7882080078125, "learning_rate": 9.867256079256779e-06, "loss": 22.0159, "step": 83080 }, { "epoch": 0.1678470569698243, "grad_norm": 297.3826904296875, "learning_rate": 9.867176168113193e-06, "loss": 29.6875, "step": 83090 }, { "epoch": 0.16786725760250812, "grad_norm": 190.8712615966797, "learning_rate": 9.867096233247581e-06, "loss": 26.7083, "step": 83100 }, { "epoch": 0.16788745823519194, "grad_norm": 147.22586059570312, "learning_rate": 9.867016274660333e-06, "loss": 25.6067, "step": 83110 }, { "epoch": 0.16790765886787573, "grad_norm": 233.47174072265625, "learning_rate": 9.866936292351837e-06, "loss": 33.7561, "step": 83120 }, { "epoch": 0.16792785950055955, "grad_norm": 109.52207946777344, "learning_rate": 9.866856286322484e-06, "loss": 14.246, "step": 83130 }, { "epoch": 0.16794806013324337, "grad_norm": 548.0711059570312, "learning_rate": 9.866776256572662e-06, "loss": 46.6586, "step": 83140 }, { "epoch": 0.1679682607659272, "grad_norm": 397.80816650390625, "learning_rate": 9.866696203102765e-06, "loss": 31.4378, "step": 83150 }, { "epoch": 0.167988461398611, "grad_norm": 314.922119140625, "learning_rate": 9.866616125913182e-06, "loss": 25.0187, "step": 83160 }, { "epoch": 0.16800866203129483, "grad_norm": 238.7561492919922, "learning_rate": 9.8665360250043e-06, "loss": 19.7497, "step": 83170 }, { "epoch": 0.16802886266397862, "grad_norm": 487.22332763671875, "learning_rate": 9.866455900376514e-06, "loss": 30.385, "step": 83180 }, { "epoch": 0.16804906329666244, "grad_norm": 309.3050842285156, "learning_rate": 9.86637575203021e-06, "loss": 21.9702, "step": 83190 }, { "epoch": 0.16806926392934626, "grad_norm": 238.50714111328125, "learning_rate": 9.866295579965782e-06, "loss": 26.3885, "step": 83200 }, { "epoch": 0.16808946456203008, "grad_norm": 529.0665893554688, "learning_rate": 9.86621538418362e-06, "loss": 38.1726, "step": 83210 }, { "epoch": 0.1681096651947139, "grad_norm": 329.76605224609375, "learning_rate": 9.866135164684112e-06, "loss": 31.2291, "step": 83220 }, { "epoch": 0.16812986582739772, "grad_norm": 164.70004272460938, "learning_rate": 9.866054921467654e-06, "loss": 26.1923, "step": 83230 }, { "epoch": 0.16815006646008154, "grad_norm": 326.3816223144531, "learning_rate": 9.865974654534634e-06, "loss": 24.4277, "step": 83240 }, { "epoch": 0.16817026709276534, "grad_norm": 569.1002807617188, "learning_rate": 9.865894363885442e-06, "loss": 31.137, "step": 83250 }, { "epoch": 0.16819046772544916, "grad_norm": 128.40577697753906, "learning_rate": 9.865814049520473e-06, "loss": 15.6652, "step": 83260 }, { "epoch": 0.16821066835813298, "grad_norm": 152.5447540283203, "learning_rate": 9.865733711440116e-06, "loss": 19.684, "step": 83270 }, { "epoch": 0.1682308689908168, "grad_norm": 220.48594665527344, "learning_rate": 9.865653349644761e-06, "loss": 19.8219, "step": 83280 }, { "epoch": 0.16825106962350062, "grad_norm": 388.7926940917969, "learning_rate": 9.865572964134804e-06, "loss": 34.0414, "step": 83290 }, { "epoch": 0.16827127025618444, "grad_norm": 133.34329223632812, "learning_rate": 9.865492554910634e-06, "loss": 13.7757, "step": 83300 }, { "epoch": 0.16829147088886823, "grad_norm": 154.62208557128906, "learning_rate": 9.865412121972643e-06, "loss": 19.0008, "step": 83310 }, { "epoch": 0.16831167152155205, "grad_norm": 131.62197875976562, "learning_rate": 9.865331665321222e-06, "loss": 26.1626, "step": 83320 }, { "epoch": 0.16833187215423587, "grad_norm": 718.8201293945312, "learning_rate": 9.865251184956767e-06, "loss": 35.367, "step": 83330 }, { "epoch": 0.1683520727869197, "grad_norm": 234.20265197753906, "learning_rate": 9.865170680879667e-06, "loss": 21.1284, "step": 83340 }, { "epoch": 0.1683722734196035, "grad_norm": 26.155784606933594, "learning_rate": 9.865090153090315e-06, "loss": 29.3822, "step": 83350 }, { "epoch": 0.16839247405228733, "grad_norm": 230.96932983398438, "learning_rate": 9.865009601589105e-06, "loss": 29.0026, "step": 83360 }, { "epoch": 0.16841267468497112, "grad_norm": 200.5190887451172, "learning_rate": 9.864929026376427e-06, "loss": 23.3673, "step": 83370 }, { "epoch": 0.16843287531765494, "grad_norm": 430.5661926269531, "learning_rate": 9.864848427452675e-06, "loss": 29.4518, "step": 83380 }, { "epoch": 0.16845307595033876, "grad_norm": 274.7444152832031, "learning_rate": 9.864767804818242e-06, "loss": 17.6514, "step": 83390 }, { "epoch": 0.16847327658302258, "grad_norm": 368.7231750488281, "learning_rate": 9.86468715847352e-06, "loss": 23.9579, "step": 83400 }, { "epoch": 0.1684934772157064, "grad_norm": 285.7784729003906, "learning_rate": 9.864606488418905e-06, "loss": 34.2801, "step": 83410 }, { "epoch": 0.16851367784839022, "grad_norm": 227.82862854003906, "learning_rate": 9.864525794654786e-06, "loss": 19.1242, "step": 83420 }, { "epoch": 0.16853387848107404, "grad_norm": 513.9238891601562, "learning_rate": 9.864445077181559e-06, "loss": 26.8872, "step": 83430 }, { "epoch": 0.16855407911375783, "grad_norm": 165.78533935546875, "learning_rate": 9.864364335999615e-06, "loss": 26.223, "step": 83440 }, { "epoch": 0.16857427974644165, "grad_norm": 342.3609313964844, "learning_rate": 9.864283571109352e-06, "loss": 25.4453, "step": 83450 }, { "epoch": 0.16859448037912547, "grad_norm": 107.10284423828125, "learning_rate": 9.864202782511158e-06, "loss": 38.802, "step": 83460 }, { "epoch": 0.1686146810118093, "grad_norm": 75.13509368896484, "learning_rate": 9.864121970205431e-06, "loss": 29.527, "step": 83470 }, { "epoch": 0.1686348816444931, "grad_norm": 130.06968688964844, "learning_rate": 9.864041134192563e-06, "loss": 14.746, "step": 83480 }, { "epoch": 0.16865508227717693, "grad_norm": 327.178955078125, "learning_rate": 9.86396027447295e-06, "loss": 31.8861, "step": 83490 }, { "epoch": 0.16867528290986072, "grad_norm": 375.7424011230469, "learning_rate": 9.863879391046985e-06, "loss": 19.9673, "step": 83500 }, { "epoch": 0.16869548354254454, "grad_norm": 169.68846130371094, "learning_rate": 9.863798483915059e-06, "loss": 21.6416, "step": 83510 }, { "epoch": 0.16871568417522836, "grad_norm": 297.5664978027344, "learning_rate": 9.86371755307757e-06, "loss": 30.011, "step": 83520 }, { "epoch": 0.16873588480791218, "grad_norm": 303.6901550292969, "learning_rate": 9.863636598534912e-06, "loss": 22.5431, "step": 83530 }, { "epoch": 0.168756085440596, "grad_norm": 194.3824920654297, "learning_rate": 9.863555620287479e-06, "loss": 15.1987, "step": 83540 }, { "epoch": 0.16877628607327982, "grad_norm": 126.67166900634766, "learning_rate": 9.863474618335666e-06, "loss": 22.436, "step": 83550 }, { "epoch": 0.16879648670596364, "grad_norm": 204.26611328125, "learning_rate": 9.863393592679867e-06, "loss": 30.3146, "step": 83560 }, { "epoch": 0.16881668733864744, "grad_norm": 584.0396728515625, "learning_rate": 9.863312543320479e-06, "loss": 19.8445, "step": 83570 }, { "epoch": 0.16883688797133126, "grad_norm": 460.9364929199219, "learning_rate": 9.863231470257893e-06, "loss": 19.7953, "step": 83580 }, { "epoch": 0.16885708860401508, "grad_norm": 323.747802734375, "learning_rate": 9.863150373492509e-06, "loss": 17.9594, "step": 83590 }, { "epoch": 0.1688772892366989, "grad_norm": 104.8280029296875, "learning_rate": 9.863069253024719e-06, "loss": 25.4374, "step": 83600 }, { "epoch": 0.16889748986938272, "grad_norm": 292.62493896484375, "learning_rate": 9.862988108854919e-06, "loss": 11.6928, "step": 83610 }, { "epoch": 0.16891769050206654, "grad_norm": 295.65618896484375, "learning_rate": 9.862906940983505e-06, "loss": 29.2945, "step": 83620 }, { "epoch": 0.16893789113475033, "grad_norm": 239.69361877441406, "learning_rate": 9.862825749410872e-06, "loss": 22.6239, "step": 83630 }, { "epoch": 0.16895809176743415, "grad_norm": 277.5348205566406, "learning_rate": 9.862744534137416e-06, "loss": 14.1224, "step": 83640 }, { "epoch": 0.16897829240011797, "grad_norm": 206.1162567138672, "learning_rate": 9.862663295163533e-06, "loss": 35.1965, "step": 83650 }, { "epoch": 0.1689984930328018, "grad_norm": 163.30923461914062, "learning_rate": 9.862582032489621e-06, "loss": 16.7144, "step": 83660 }, { "epoch": 0.1690186936654856, "grad_norm": 292.2177734375, "learning_rate": 9.86250074611607e-06, "loss": 15.3692, "step": 83670 }, { "epoch": 0.16903889429816943, "grad_norm": 216.5235137939453, "learning_rate": 9.862419436043284e-06, "loss": 37.4011, "step": 83680 }, { "epoch": 0.16905909493085322, "grad_norm": 306.11883544921875, "learning_rate": 9.862338102271654e-06, "loss": 18.4068, "step": 83690 }, { "epoch": 0.16907929556353704, "grad_norm": 163.0714111328125, "learning_rate": 9.862256744801576e-06, "loss": 17.1539, "step": 83700 }, { "epoch": 0.16909949619622086, "grad_norm": 616.3630981445312, "learning_rate": 9.86217536363345e-06, "loss": 25.9609, "step": 83710 }, { "epoch": 0.16911969682890468, "grad_norm": 113.66357421875, "learning_rate": 9.862093958767671e-06, "loss": 14.9592, "step": 83720 }, { "epoch": 0.1691398974615885, "grad_norm": 433.3678283691406, "learning_rate": 9.862012530204636e-06, "loss": 26.9927, "step": 83730 }, { "epoch": 0.16916009809427232, "grad_norm": 455.7156066894531, "learning_rate": 9.86193107794474e-06, "loss": 18.731, "step": 83740 }, { "epoch": 0.16918029872695614, "grad_norm": 71.03558349609375, "learning_rate": 9.861849601988384e-06, "loss": 15.5733, "step": 83750 }, { "epoch": 0.16920049935963993, "grad_norm": 121.52517700195312, "learning_rate": 9.861768102335961e-06, "loss": 34.9433, "step": 83760 }, { "epoch": 0.16922069999232375, "grad_norm": 104.53846740722656, "learning_rate": 9.861686578987871e-06, "loss": 33.4901, "step": 83770 }, { "epoch": 0.16924090062500757, "grad_norm": 312.4114685058594, "learning_rate": 9.86160503194451e-06, "loss": 14.1773, "step": 83780 }, { "epoch": 0.1692611012576914, "grad_norm": 229.57968139648438, "learning_rate": 9.861523461206275e-06, "loss": 21.1125, "step": 83790 }, { "epoch": 0.1692813018903752, "grad_norm": 274.9559020996094, "learning_rate": 9.861441866773564e-06, "loss": 29.1323, "step": 83800 }, { "epoch": 0.16930150252305903, "grad_norm": 129.7029266357422, "learning_rate": 9.861360248646777e-06, "loss": 31.0498, "step": 83810 }, { "epoch": 0.16932170315574283, "grad_norm": 289.2229309082031, "learning_rate": 9.861278606826307e-06, "loss": 44.4128, "step": 83820 }, { "epoch": 0.16934190378842665, "grad_norm": 211.1077423095703, "learning_rate": 9.861196941312556e-06, "loss": 15.2835, "step": 83830 }, { "epoch": 0.16936210442111047, "grad_norm": 206.8957977294922, "learning_rate": 9.861115252105922e-06, "loss": 26.5058, "step": 83840 }, { "epoch": 0.16938230505379429, "grad_norm": 462.19012451171875, "learning_rate": 9.8610335392068e-06, "loss": 19.6288, "step": 83850 }, { "epoch": 0.1694025056864781, "grad_norm": 164.64259338378906, "learning_rate": 9.86095180261559e-06, "loss": 24.6654, "step": 83860 }, { "epoch": 0.16942270631916193, "grad_norm": 470.2506103515625, "learning_rate": 9.860870042332693e-06, "loss": 16.7121, "step": 83870 }, { "epoch": 0.16944290695184575, "grad_norm": 91.24612426757812, "learning_rate": 9.860788258358503e-06, "loss": 23.063, "step": 83880 }, { "epoch": 0.16946310758452954, "grad_norm": 36.26790237426758, "learning_rate": 9.86070645069342e-06, "loss": 15.5098, "step": 83890 }, { "epoch": 0.16948330821721336, "grad_norm": 218.2975311279297, "learning_rate": 9.860624619337844e-06, "loss": 22.3793, "step": 83900 }, { "epoch": 0.16950350884989718, "grad_norm": 384.70574951171875, "learning_rate": 9.860542764292173e-06, "loss": 22.575, "step": 83910 }, { "epoch": 0.169523709482581, "grad_norm": 205.90420532226562, "learning_rate": 9.860460885556806e-06, "loss": 24.2325, "step": 83920 }, { "epoch": 0.16954391011526482, "grad_norm": 408.55902099609375, "learning_rate": 9.860378983132144e-06, "loss": 19.7377, "step": 83930 }, { "epoch": 0.16956411074794864, "grad_norm": 0.0, "learning_rate": 9.860297057018581e-06, "loss": 25.7222, "step": 83940 }, { "epoch": 0.16958431138063243, "grad_norm": 190.34181213378906, "learning_rate": 9.860215107216523e-06, "loss": 22.0545, "step": 83950 }, { "epoch": 0.16960451201331625, "grad_norm": 384.6967468261719, "learning_rate": 9.860133133726364e-06, "loss": 18.3426, "step": 83960 }, { "epoch": 0.16962471264600007, "grad_norm": 325.1596984863281, "learning_rate": 9.860051136548506e-06, "loss": 27.4019, "step": 83970 }, { "epoch": 0.1696449132786839, "grad_norm": 361.1190185546875, "learning_rate": 9.859969115683348e-06, "loss": 49.643, "step": 83980 }, { "epoch": 0.1696651139113677, "grad_norm": 423.29620361328125, "learning_rate": 9.85988707113129e-06, "loss": 20.6459, "step": 83990 }, { "epoch": 0.16968531454405153, "grad_norm": 153.49134826660156, "learning_rate": 9.859805002892733e-06, "loss": 16.0364, "step": 84000 }, { "epoch": 0.16970551517673532, "grad_norm": 320.2734069824219, "learning_rate": 9.859722910968073e-06, "loss": 24.854, "step": 84010 }, { "epoch": 0.16972571580941914, "grad_norm": 289.88201904296875, "learning_rate": 9.859640795357716e-06, "loss": 16.8925, "step": 84020 }, { "epoch": 0.16974591644210296, "grad_norm": 459.7493591308594, "learning_rate": 9.859558656062057e-06, "loss": 19.565, "step": 84030 }, { "epoch": 0.16976611707478678, "grad_norm": 45.174808502197266, "learning_rate": 9.8594764930815e-06, "loss": 11.664, "step": 84040 }, { "epoch": 0.1697863177074706, "grad_norm": 504.0591735839844, "learning_rate": 9.859394306416443e-06, "loss": 17.244, "step": 84050 }, { "epoch": 0.16980651834015442, "grad_norm": 412.1477355957031, "learning_rate": 9.859312096067289e-06, "loss": 26.4491, "step": 84060 }, { "epoch": 0.16982671897283824, "grad_norm": 308.2583923339844, "learning_rate": 9.859229862034436e-06, "loss": 23.8198, "step": 84070 }, { "epoch": 0.16984691960552203, "grad_norm": 644.6337280273438, "learning_rate": 9.859147604318286e-06, "loss": 20.8304, "step": 84080 }, { "epoch": 0.16986712023820585, "grad_norm": 718.21875, "learning_rate": 9.859065322919239e-06, "loss": 21.2133, "step": 84090 }, { "epoch": 0.16988732087088967, "grad_norm": 180.73793029785156, "learning_rate": 9.8589830178377e-06, "loss": 24.3676, "step": 84100 }, { "epoch": 0.1699075215035735, "grad_norm": 216.9444580078125, "learning_rate": 9.858900689074065e-06, "loss": 15.8231, "step": 84110 }, { "epoch": 0.16992772213625731, "grad_norm": 99.7641372680664, "learning_rate": 9.858818336628737e-06, "loss": 14.7757, "step": 84120 }, { "epoch": 0.16994792276894113, "grad_norm": 378.4314270019531, "learning_rate": 9.858735960502118e-06, "loss": 30.8074, "step": 84130 }, { "epoch": 0.16996812340162493, "grad_norm": 433.3677978515625, "learning_rate": 9.858653560694609e-06, "loss": 18.0801, "step": 84140 }, { "epoch": 0.16998832403430875, "grad_norm": 233.5803985595703, "learning_rate": 9.858571137206611e-06, "loss": 17.627, "step": 84150 }, { "epoch": 0.17000852466699257, "grad_norm": 504.9804992675781, "learning_rate": 9.858488690038529e-06, "loss": 24.7923, "step": 84160 }, { "epoch": 0.1700287252996764, "grad_norm": 105.98303985595703, "learning_rate": 9.858406219190761e-06, "loss": 22.1878, "step": 84170 }, { "epoch": 0.1700489259323602, "grad_norm": 578.96875, "learning_rate": 9.858323724663712e-06, "loss": 35.5191, "step": 84180 }, { "epoch": 0.17006912656504403, "grad_norm": 323.6617736816406, "learning_rate": 9.85824120645778e-06, "loss": 20.3284, "step": 84190 }, { "epoch": 0.17008932719772785, "grad_norm": 102.07389068603516, "learning_rate": 9.85815866457337e-06, "loss": 18.9842, "step": 84200 }, { "epoch": 0.17010952783041164, "grad_norm": 139.9350128173828, "learning_rate": 9.858076099010885e-06, "loss": 16.8205, "step": 84210 }, { "epoch": 0.17012972846309546, "grad_norm": 244.8942413330078, "learning_rate": 9.857993509770725e-06, "loss": 12.5485, "step": 84220 }, { "epoch": 0.17014992909577928, "grad_norm": 201.49378967285156, "learning_rate": 9.857910896853296e-06, "loss": 25.128, "step": 84230 }, { "epoch": 0.1701701297284631, "grad_norm": 327.20013427734375, "learning_rate": 9.857828260258997e-06, "loss": 27.3789, "step": 84240 }, { "epoch": 0.17019033036114692, "grad_norm": 276.7530517578125, "learning_rate": 9.857745599988231e-06, "loss": 22.8194, "step": 84250 }, { "epoch": 0.17021053099383074, "grad_norm": 337.30078125, "learning_rate": 9.857662916041404e-06, "loss": 28.0651, "step": 84260 }, { "epoch": 0.17023073162651453, "grad_norm": 415.4382019042969, "learning_rate": 9.857580208418917e-06, "loss": 20.6391, "step": 84270 }, { "epoch": 0.17025093225919835, "grad_norm": 156.11378479003906, "learning_rate": 9.857497477121172e-06, "loss": 20.5975, "step": 84280 }, { "epoch": 0.17027113289188217, "grad_norm": 189.0169219970703, "learning_rate": 9.857414722148574e-06, "loss": 25.6833, "step": 84290 }, { "epoch": 0.170291333524566, "grad_norm": 171.80224609375, "learning_rate": 9.857331943501527e-06, "loss": 20.7575, "step": 84300 }, { "epoch": 0.1703115341572498, "grad_norm": 339.89874267578125, "learning_rate": 9.857249141180431e-06, "loss": 22.0804, "step": 84310 }, { "epoch": 0.17033173478993363, "grad_norm": 336.9088134765625, "learning_rate": 9.857166315185693e-06, "loss": 16.6827, "step": 84320 }, { "epoch": 0.17035193542261742, "grad_norm": 593.3232421875, "learning_rate": 9.857083465517716e-06, "loss": 31.5982, "step": 84330 }, { "epoch": 0.17037213605530124, "grad_norm": 484.4998474121094, "learning_rate": 9.857000592176902e-06, "loss": 19.53, "step": 84340 }, { "epoch": 0.17039233668798506, "grad_norm": 19.165119171142578, "learning_rate": 9.856917695163659e-06, "loss": 22.4891, "step": 84350 }, { "epoch": 0.17041253732066888, "grad_norm": 328.4586486816406, "learning_rate": 9.856834774478385e-06, "loss": 29.1836, "step": 84360 }, { "epoch": 0.1704327379533527, "grad_norm": 797.2910766601562, "learning_rate": 9.85675183012149e-06, "loss": 36.7739, "step": 84370 }, { "epoch": 0.17045293858603652, "grad_norm": 226.66549682617188, "learning_rate": 9.856668862093372e-06, "loss": 28.2813, "step": 84380 }, { "epoch": 0.17047313921872034, "grad_norm": 0.0, "learning_rate": 9.856585870394442e-06, "loss": 27.6452, "step": 84390 }, { "epoch": 0.17049333985140414, "grad_norm": 309.6792907714844, "learning_rate": 9.856502855025101e-06, "loss": 26.8594, "step": 84400 }, { "epoch": 0.17051354048408796, "grad_norm": 422.0649108886719, "learning_rate": 9.856419815985754e-06, "loss": 26.6869, "step": 84410 }, { "epoch": 0.17053374111677178, "grad_norm": 410.0615234375, "learning_rate": 9.856336753276804e-06, "loss": 30.8284, "step": 84420 }, { "epoch": 0.1705539417494556, "grad_norm": 266.186279296875, "learning_rate": 9.85625366689866e-06, "loss": 30.8975, "step": 84430 }, { "epoch": 0.17057414238213942, "grad_norm": 318.1581726074219, "learning_rate": 9.856170556851725e-06, "loss": 27.4438, "step": 84440 }, { "epoch": 0.17059434301482324, "grad_norm": 78.99361419677734, "learning_rate": 9.856087423136403e-06, "loss": 17.2723, "step": 84450 }, { "epoch": 0.17061454364750703, "grad_norm": 738.6920166015625, "learning_rate": 9.856004265753099e-06, "loss": 20.2413, "step": 84460 }, { "epoch": 0.17063474428019085, "grad_norm": 409.4278259277344, "learning_rate": 9.85592108470222e-06, "loss": 18.4217, "step": 84470 }, { "epoch": 0.17065494491287467, "grad_norm": 268.1202697753906, "learning_rate": 9.85583787998417e-06, "loss": 31.2547, "step": 84480 }, { "epoch": 0.1706751455455585, "grad_norm": 172.7768096923828, "learning_rate": 9.855754651599355e-06, "loss": 17.9168, "step": 84490 }, { "epoch": 0.1706953461782423, "grad_norm": 435.4432678222656, "learning_rate": 9.85567139954818e-06, "loss": 31.7412, "step": 84500 }, { "epoch": 0.17071554681092613, "grad_norm": 648.7313842773438, "learning_rate": 9.855588123831053e-06, "loss": 26.9404, "step": 84510 }, { "epoch": 0.17073574744360995, "grad_norm": 177.42039489746094, "learning_rate": 9.855504824448379e-06, "loss": 22.3172, "step": 84520 }, { "epoch": 0.17075594807629374, "grad_norm": 647.2815551757812, "learning_rate": 9.855421501400562e-06, "loss": 32.6165, "step": 84530 }, { "epoch": 0.17077614870897756, "grad_norm": 234.53184509277344, "learning_rate": 9.85533815468801e-06, "loss": 42.8664, "step": 84540 }, { "epoch": 0.17079634934166138, "grad_norm": 225.09999084472656, "learning_rate": 9.85525478431113e-06, "loss": 33.6597, "step": 84550 }, { "epoch": 0.1708165499743452, "grad_norm": 155.2668914794922, "learning_rate": 9.855171390270325e-06, "loss": 16.6565, "step": 84560 }, { "epoch": 0.17083675060702902, "grad_norm": 612.5817260742188, "learning_rate": 9.855087972566004e-06, "loss": 25.0548, "step": 84570 }, { "epoch": 0.17085695123971284, "grad_norm": 176.7689666748047, "learning_rate": 9.855004531198573e-06, "loss": 20.134, "step": 84580 }, { "epoch": 0.17087715187239663, "grad_norm": 274.6050109863281, "learning_rate": 9.854921066168439e-06, "loss": 19.9635, "step": 84590 }, { "epoch": 0.17089735250508045, "grad_norm": 779.0989990234375, "learning_rate": 9.854837577476008e-06, "loss": 45.2003, "step": 84600 }, { "epoch": 0.17091755313776427, "grad_norm": 800.0113525390625, "learning_rate": 9.854754065121689e-06, "loss": 23.4595, "step": 84610 }, { "epoch": 0.1709377537704481, "grad_norm": 85.84465789794922, "learning_rate": 9.854670529105887e-06, "loss": 25.0604, "step": 84620 }, { "epoch": 0.1709579544031319, "grad_norm": 386.937744140625, "learning_rate": 9.854586969429009e-06, "loss": 22.2587, "step": 84630 }, { "epoch": 0.17097815503581573, "grad_norm": 360.1268310546875, "learning_rate": 9.854503386091463e-06, "loss": 25.7714, "step": 84640 }, { "epoch": 0.17099835566849952, "grad_norm": 137.27012634277344, "learning_rate": 9.854419779093656e-06, "loss": 23.4878, "step": 84650 }, { "epoch": 0.17101855630118334, "grad_norm": 251.29054260253906, "learning_rate": 9.854336148435997e-06, "loss": 22.8396, "step": 84660 }, { "epoch": 0.17103875693386716, "grad_norm": 298.8148193359375, "learning_rate": 9.85425249411889e-06, "loss": 17.6215, "step": 84670 }, { "epoch": 0.17105895756655098, "grad_norm": 127.91056823730469, "learning_rate": 9.854168816142747e-06, "loss": 33.43, "step": 84680 }, { "epoch": 0.1710791581992348, "grad_norm": 519.0956420898438, "learning_rate": 9.854085114507974e-06, "loss": 20.1039, "step": 84690 }, { "epoch": 0.17109935883191862, "grad_norm": 892.5123901367188, "learning_rate": 9.854001389214979e-06, "loss": 35.9394, "step": 84700 }, { "epoch": 0.17111955946460244, "grad_norm": 629.5753173828125, "learning_rate": 9.853917640264169e-06, "loss": 36.9454, "step": 84710 }, { "epoch": 0.17113976009728624, "grad_norm": 341.8872985839844, "learning_rate": 9.853833867655954e-06, "loss": 24.1226, "step": 84720 }, { "epoch": 0.17115996072997006, "grad_norm": 202.8926239013672, "learning_rate": 9.853750071390739e-06, "loss": 30.6026, "step": 84730 }, { "epoch": 0.17118016136265388, "grad_norm": 715.450439453125, "learning_rate": 9.853666251468938e-06, "loss": 36.1103, "step": 84740 }, { "epoch": 0.1712003619953377, "grad_norm": 703.70556640625, "learning_rate": 9.853582407890954e-06, "loss": 40.4052, "step": 84750 }, { "epoch": 0.17122056262802152, "grad_norm": 98.35871887207031, "learning_rate": 9.853498540657201e-06, "loss": 27.517, "step": 84760 }, { "epoch": 0.17124076326070534, "grad_norm": 264.1483154296875, "learning_rate": 9.853414649768082e-06, "loss": 27.3736, "step": 84770 }, { "epoch": 0.17126096389338913, "grad_norm": 323.0660095214844, "learning_rate": 9.85333073522401e-06, "loss": 21.0344, "step": 84780 }, { "epoch": 0.17128116452607295, "grad_norm": 227.3601531982422, "learning_rate": 9.853246797025391e-06, "loss": 24.0355, "step": 84790 }, { "epoch": 0.17130136515875677, "grad_norm": 227.77989196777344, "learning_rate": 9.853162835172638e-06, "loss": 25.5491, "step": 84800 }, { "epoch": 0.1713215657914406, "grad_norm": 427.9307556152344, "learning_rate": 9.853078849666156e-06, "loss": 26.7696, "step": 84810 }, { "epoch": 0.1713417664241244, "grad_norm": 328.3766174316406, "learning_rate": 9.852994840506357e-06, "loss": 19.2563, "step": 84820 }, { "epoch": 0.17136196705680823, "grad_norm": 267.5766296386719, "learning_rate": 9.85291080769365e-06, "loss": 26.6297, "step": 84830 }, { "epoch": 0.17138216768949205, "grad_norm": 185.25315856933594, "learning_rate": 9.852826751228445e-06, "loss": 20.0181, "step": 84840 }, { "epoch": 0.17140236832217584, "grad_norm": 360.0921936035156, "learning_rate": 9.852742671111151e-06, "loss": 25.1588, "step": 84850 }, { "epoch": 0.17142256895485966, "grad_norm": 360.1064758300781, "learning_rate": 9.852658567342177e-06, "loss": 17.7994, "step": 84860 }, { "epoch": 0.17144276958754348, "grad_norm": 526.0274658203125, "learning_rate": 9.852574439921933e-06, "loss": 37.9525, "step": 84870 }, { "epoch": 0.1714629702202273, "grad_norm": 115.02400970458984, "learning_rate": 9.85249028885083e-06, "loss": 13.7458, "step": 84880 }, { "epoch": 0.17148317085291112, "grad_norm": 132.46200561523438, "learning_rate": 9.852406114129277e-06, "loss": 11.4753, "step": 84890 }, { "epoch": 0.17150337148559494, "grad_norm": 484.5084228515625, "learning_rate": 9.852321915757688e-06, "loss": 42.6002, "step": 84900 }, { "epoch": 0.17152357211827873, "grad_norm": 256.08489990234375, "learning_rate": 9.852237693736469e-06, "loss": 38.0972, "step": 84910 }, { "epoch": 0.17154377275096255, "grad_norm": 327.9403381347656, "learning_rate": 9.852153448066031e-06, "loss": 28.3486, "step": 84920 }, { "epoch": 0.17156397338364637, "grad_norm": 275.482177734375, "learning_rate": 9.852069178746786e-06, "loss": 27.9715, "step": 84930 }, { "epoch": 0.1715841740163302, "grad_norm": 606.2677001953125, "learning_rate": 9.851984885779147e-06, "loss": 25.1426, "step": 84940 }, { "epoch": 0.171604374649014, "grad_norm": 226.09786987304688, "learning_rate": 9.85190056916352e-06, "loss": 31.8892, "step": 84950 }, { "epoch": 0.17162457528169783, "grad_norm": 307.9385986328125, "learning_rate": 9.851816228900317e-06, "loss": 15.9128, "step": 84960 }, { "epoch": 0.17164477591438163, "grad_norm": 338.29150390625, "learning_rate": 9.85173186498995e-06, "loss": 8.4826, "step": 84970 }, { "epoch": 0.17166497654706545, "grad_norm": 68.31180572509766, "learning_rate": 9.851647477432834e-06, "loss": 15.3004, "step": 84980 }, { "epoch": 0.17168517717974927, "grad_norm": 240.7517852783203, "learning_rate": 9.851563066229373e-06, "loss": 21.7407, "step": 84990 }, { "epoch": 0.17170537781243309, "grad_norm": 311.25, "learning_rate": 9.851478631379982e-06, "loss": 29.1843, "step": 85000 }, { "epoch": 0.1717255784451169, "grad_norm": 309.25030517578125, "learning_rate": 9.851394172885075e-06, "loss": 31.1875, "step": 85010 }, { "epoch": 0.17174577907780073, "grad_norm": 79.93856811523438, "learning_rate": 9.85130969074506e-06, "loss": 23.3641, "step": 85020 }, { "epoch": 0.17176597971048455, "grad_norm": 287.9908752441406, "learning_rate": 9.851225184960349e-06, "loss": 34.053, "step": 85030 }, { "epoch": 0.17178618034316834, "grad_norm": 348.8714904785156, "learning_rate": 9.851140655531357e-06, "loss": 18.7023, "step": 85040 }, { "epoch": 0.17180638097585216, "grad_norm": 1116.8525390625, "learning_rate": 9.851056102458492e-06, "loss": 25.2101, "step": 85050 }, { "epoch": 0.17182658160853598, "grad_norm": 180.66981506347656, "learning_rate": 9.85097152574217e-06, "loss": 16.0873, "step": 85060 }, { "epoch": 0.1718467822412198, "grad_norm": 700.40478515625, "learning_rate": 9.8508869253828e-06, "loss": 32.6577, "step": 85070 }, { "epoch": 0.17186698287390362, "grad_norm": 347.9123840332031, "learning_rate": 9.850802301380793e-06, "loss": 18.1944, "step": 85080 }, { "epoch": 0.17188718350658744, "grad_norm": 50.497230529785156, "learning_rate": 9.850717653736566e-06, "loss": 21.6603, "step": 85090 }, { "epoch": 0.17190738413927123, "grad_norm": 222.06668090820312, "learning_rate": 9.85063298245053e-06, "loss": 30.4199, "step": 85100 }, { "epoch": 0.17192758477195505, "grad_norm": 149.8507843017578, "learning_rate": 9.850548287523096e-06, "loss": 22.3144, "step": 85110 }, { "epoch": 0.17194778540463887, "grad_norm": 397.1883850097656, "learning_rate": 9.850463568954679e-06, "loss": 11.4104, "step": 85120 }, { "epoch": 0.1719679860373227, "grad_norm": 391.9459533691406, "learning_rate": 9.85037882674569e-06, "loss": 20.0366, "step": 85130 }, { "epoch": 0.1719881866700065, "grad_norm": 362.1179504394531, "learning_rate": 9.850294060896544e-06, "loss": 28.2257, "step": 85140 }, { "epoch": 0.17200838730269033, "grad_norm": 200.34681701660156, "learning_rate": 9.850209271407653e-06, "loss": 18.8264, "step": 85150 }, { "epoch": 0.17202858793537412, "grad_norm": 0.0, "learning_rate": 9.850124458279429e-06, "loss": 15.5735, "step": 85160 }, { "epoch": 0.17204878856805794, "grad_norm": 447.478515625, "learning_rate": 9.850039621512287e-06, "loss": 24.8637, "step": 85170 }, { "epoch": 0.17206898920074176, "grad_norm": 226.17283630371094, "learning_rate": 9.849954761106642e-06, "loss": 18.0156, "step": 85180 }, { "epoch": 0.17208918983342558, "grad_norm": 315.91607666015625, "learning_rate": 9.849869877062903e-06, "loss": 26.8137, "step": 85190 }, { "epoch": 0.1721093904661094, "grad_norm": 427.2509765625, "learning_rate": 9.849784969381488e-06, "loss": 27.1346, "step": 85200 }, { "epoch": 0.17212959109879322, "grad_norm": 1112.1611328125, "learning_rate": 9.849700038062808e-06, "loss": 36.3495, "step": 85210 }, { "epoch": 0.17214979173147704, "grad_norm": 220.50448608398438, "learning_rate": 9.849615083107279e-06, "loss": 24.0419, "step": 85220 }, { "epoch": 0.17216999236416083, "grad_norm": 406.08526611328125, "learning_rate": 9.849530104515314e-06, "loss": 17.1039, "step": 85230 }, { "epoch": 0.17219019299684465, "grad_norm": 102.21381378173828, "learning_rate": 9.849445102287328e-06, "loss": 11.0827, "step": 85240 }, { "epoch": 0.17221039362952847, "grad_norm": 285.37158203125, "learning_rate": 9.849360076423736e-06, "loss": 30.0452, "step": 85250 }, { "epoch": 0.1722305942622123, "grad_norm": 559.7421875, "learning_rate": 9.849275026924949e-06, "loss": 33.9452, "step": 85260 }, { "epoch": 0.17225079489489611, "grad_norm": 811.9658813476562, "learning_rate": 9.849189953791385e-06, "loss": 34.1936, "step": 85270 }, { "epoch": 0.17227099552757993, "grad_norm": 176.1626434326172, "learning_rate": 9.849104857023455e-06, "loss": 17.397, "step": 85280 }, { "epoch": 0.17229119616026373, "grad_norm": 192.6520233154297, "learning_rate": 9.849019736621578e-06, "loss": 35.5135, "step": 85290 }, { "epoch": 0.17231139679294755, "grad_norm": 13.615872383117676, "learning_rate": 9.848934592586165e-06, "loss": 20.4972, "step": 85300 }, { "epoch": 0.17233159742563137, "grad_norm": 286.0615234375, "learning_rate": 9.848849424917636e-06, "loss": 26.3584, "step": 85310 }, { "epoch": 0.1723517980583152, "grad_norm": 120.81085968017578, "learning_rate": 9.848764233616401e-06, "loss": 36.59, "step": 85320 }, { "epoch": 0.172371998690999, "grad_norm": 0.0, "learning_rate": 9.848679018682879e-06, "loss": 19.986, "step": 85330 }, { "epoch": 0.17239219932368283, "grad_norm": 182.1063995361328, "learning_rate": 9.848593780117482e-06, "loss": 25.6217, "step": 85340 }, { "epoch": 0.17241239995636665, "grad_norm": 216.0826416015625, "learning_rate": 9.848508517920626e-06, "loss": 27.0021, "step": 85350 }, { "epoch": 0.17243260058905044, "grad_norm": 305.2259826660156, "learning_rate": 9.84842323209273e-06, "loss": 21.4958, "step": 85360 }, { "epoch": 0.17245280122173426, "grad_norm": 490.9894104003906, "learning_rate": 9.848337922634205e-06, "loss": 22.691, "step": 85370 }, { "epoch": 0.17247300185441808, "grad_norm": 475.3125, "learning_rate": 9.84825258954547e-06, "loss": 24.1004, "step": 85380 }, { "epoch": 0.1724932024871019, "grad_norm": 151.05519104003906, "learning_rate": 9.84816723282694e-06, "loss": 22.6916, "step": 85390 }, { "epoch": 0.17251340311978572, "grad_norm": 683.9437255859375, "learning_rate": 9.84808185247903e-06, "loss": 38.4804, "step": 85400 }, { "epoch": 0.17253360375246954, "grad_norm": 527.5541381835938, "learning_rate": 9.847996448502159e-06, "loss": 36.6851, "step": 85410 }, { "epoch": 0.17255380438515333, "grad_norm": 626.6885986328125, "learning_rate": 9.84791102089674e-06, "loss": 38.3677, "step": 85420 }, { "epoch": 0.17257400501783715, "grad_norm": 224.6432342529297, "learning_rate": 9.84782556966319e-06, "loss": 26.3055, "step": 85430 }, { "epoch": 0.17259420565052097, "grad_norm": 182.1968536376953, "learning_rate": 9.847740094801928e-06, "loss": 20.4819, "step": 85440 }, { "epoch": 0.1726144062832048, "grad_norm": 0.0, "learning_rate": 9.847654596313368e-06, "loss": 26.5135, "step": 85450 }, { "epoch": 0.1726346069158886, "grad_norm": 323.23876953125, "learning_rate": 9.847569074197927e-06, "loss": 15.7724, "step": 85460 }, { "epoch": 0.17265480754857243, "grad_norm": 281.2192077636719, "learning_rate": 9.847483528456021e-06, "loss": 25.9193, "step": 85470 }, { "epoch": 0.17267500818125622, "grad_norm": 963.7646484375, "learning_rate": 9.84739795908807e-06, "loss": 24.3037, "step": 85480 }, { "epoch": 0.17269520881394004, "grad_norm": 162.03683471679688, "learning_rate": 9.84731236609449e-06, "loss": 18.2723, "step": 85490 }, { "epoch": 0.17271540944662386, "grad_norm": 452.464111328125, "learning_rate": 9.847226749475696e-06, "loss": 16.9832, "step": 85500 }, { "epoch": 0.17273561007930768, "grad_norm": 12.439457893371582, "learning_rate": 9.847141109232105e-06, "loss": 39.1478, "step": 85510 }, { "epoch": 0.1727558107119915, "grad_norm": 224.26266479492188, "learning_rate": 9.84705544536414e-06, "loss": 21.4832, "step": 85520 }, { "epoch": 0.17277601134467532, "grad_norm": 279.1081237792969, "learning_rate": 9.846969757872212e-06, "loss": 31.6636, "step": 85530 }, { "epoch": 0.17279621197735914, "grad_norm": 127.87471771240234, "learning_rate": 9.846884046756742e-06, "loss": 29.0749, "step": 85540 }, { "epoch": 0.17281641261004294, "grad_norm": 164.3279266357422, "learning_rate": 9.846798312018147e-06, "loss": 21.6745, "step": 85550 }, { "epoch": 0.17283661324272676, "grad_norm": 301.88037109375, "learning_rate": 9.846712553656845e-06, "loss": 20.5714, "step": 85560 }, { "epoch": 0.17285681387541058, "grad_norm": 242.24362182617188, "learning_rate": 9.846626771673254e-06, "loss": 31.7325, "step": 85570 }, { "epoch": 0.1728770145080944, "grad_norm": 284.55609130859375, "learning_rate": 9.846540966067793e-06, "loss": 23.2168, "step": 85580 }, { "epoch": 0.17289721514077822, "grad_norm": 327.6295166015625, "learning_rate": 9.846455136840876e-06, "loss": 31.0933, "step": 85590 }, { "epoch": 0.17291741577346204, "grad_norm": 158.96127319335938, "learning_rate": 9.846369283992927e-06, "loss": 9.9503, "step": 85600 }, { "epoch": 0.17293761640614583, "grad_norm": 277.4278869628906, "learning_rate": 9.846283407524362e-06, "loss": 20.3902, "step": 85610 }, { "epoch": 0.17295781703882965, "grad_norm": 321.7147216796875, "learning_rate": 9.846197507435598e-06, "loss": 28.0512, "step": 85620 }, { "epoch": 0.17297801767151347, "grad_norm": 186.66732788085938, "learning_rate": 9.846111583727056e-06, "loss": 13.3802, "step": 85630 }, { "epoch": 0.1729982183041973, "grad_norm": 148.4031219482422, "learning_rate": 9.846025636399152e-06, "loss": 33.7018, "step": 85640 }, { "epoch": 0.1730184189368811, "grad_norm": 327.5085754394531, "learning_rate": 9.845939665452309e-06, "loss": 21.5132, "step": 85650 }, { "epoch": 0.17303861956956493, "grad_norm": 174.8408203125, "learning_rate": 9.845853670886945e-06, "loss": 31.9308, "step": 85660 }, { "epoch": 0.17305882020224875, "grad_norm": 709.9732666015625, "learning_rate": 9.845767652703475e-06, "loss": 43.9263, "step": 85670 }, { "epoch": 0.17307902083493254, "grad_norm": 247.24818420410156, "learning_rate": 9.845681610902323e-06, "loss": 39.5147, "step": 85680 }, { "epoch": 0.17309922146761636, "grad_norm": 229.6263427734375, "learning_rate": 9.845595545483906e-06, "loss": 13.3858, "step": 85690 }, { "epoch": 0.17311942210030018, "grad_norm": 192.13433837890625, "learning_rate": 9.845509456448642e-06, "loss": 16.7545, "step": 85700 }, { "epoch": 0.173139622732984, "grad_norm": 59.46381378173828, "learning_rate": 9.845423343796957e-06, "loss": 36.9117, "step": 85710 }, { "epoch": 0.17315982336566782, "grad_norm": 249.67477416992188, "learning_rate": 9.845337207529264e-06, "loss": 16.9929, "step": 85720 }, { "epoch": 0.17318002399835164, "grad_norm": 156.17892456054688, "learning_rate": 9.845251047645984e-06, "loss": 16.9912, "step": 85730 }, { "epoch": 0.17320022463103543, "grad_norm": 594.8213500976562, "learning_rate": 9.84516486414754e-06, "loss": 37.3285, "step": 85740 }, { "epoch": 0.17322042526371925, "grad_norm": 352.3725891113281, "learning_rate": 9.845078657034348e-06, "loss": 12.7506, "step": 85750 }, { "epoch": 0.17324062589640307, "grad_norm": 428.70953369140625, "learning_rate": 9.844992426306832e-06, "loss": 19.9736, "step": 85760 }, { "epoch": 0.1732608265290869, "grad_norm": 180.72329711914062, "learning_rate": 9.84490617196541e-06, "loss": 14.9355, "step": 85770 }, { "epoch": 0.1732810271617707, "grad_norm": 39.293922424316406, "learning_rate": 9.844819894010502e-06, "loss": 11.8103, "step": 85780 }, { "epoch": 0.17330122779445453, "grad_norm": 208.16317749023438, "learning_rate": 9.84473359244253e-06, "loss": 9.9368, "step": 85790 }, { "epoch": 0.17332142842713832, "grad_norm": 356.2953186035156, "learning_rate": 9.844647267261915e-06, "loss": 15.9601, "step": 85800 }, { "epoch": 0.17334162905982214, "grad_norm": 168.4425048828125, "learning_rate": 9.844560918469076e-06, "loss": 33.9255, "step": 85810 }, { "epoch": 0.17336182969250596, "grad_norm": 306.7591247558594, "learning_rate": 9.844474546064436e-06, "loss": 41.9797, "step": 85820 }, { "epoch": 0.17338203032518978, "grad_norm": 208.79046630859375, "learning_rate": 9.844388150048413e-06, "loss": 28.2466, "step": 85830 }, { "epoch": 0.1734022309578736, "grad_norm": 376.2872009277344, "learning_rate": 9.844301730421431e-06, "loss": 18.7639, "step": 85840 }, { "epoch": 0.17342243159055742, "grad_norm": 82.89310455322266, "learning_rate": 9.84421528718391e-06, "loss": 26.0091, "step": 85850 }, { "epoch": 0.17344263222324124, "grad_norm": 61.881839752197266, "learning_rate": 9.844128820336269e-06, "loss": 26.8232, "step": 85860 }, { "epoch": 0.17346283285592504, "grad_norm": 168.95465087890625, "learning_rate": 9.844042329878934e-06, "loss": 26.7037, "step": 85870 }, { "epoch": 0.17348303348860886, "grad_norm": 461.1837158203125, "learning_rate": 9.843955815812322e-06, "loss": 26.5489, "step": 85880 }, { "epoch": 0.17350323412129268, "grad_norm": 498.4544372558594, "learning_rate": 9.843869278136857e-06, "loss": 33.1602, "step": 85890 }, { "epoch": 0.1735234347539765, "grad_norm": 392.6561279296875, "learning_rate": 9.843782716852963e-06, "loss": 28.6238, "step": 85900 }, { "epoch": 0.17354363538666032, "grad_norm": 610.072998046875, "learning_rate": 9.843696131961058e-06, "loss": 31.7436, "step": 85910 }, { "epoch": 0.17356383601934414, "grad_norm": 213.9162139892578, "learning_rate": 9.843609523461565e-06, "loss": 20.588, "step": 85920 }, { "epoch": 0.17358403665202793, "grad_norm": 160.66455078125, "learning_rate": 9.843522891354908e-06, "loss": 14.0331, "step": 85930 }, { "epoch": 0.17360423728471175, "grad_norm": 3.9117703437805176, "learning_rate": 9.843436235641506e-06, "loss": 18.9164, "step": 85940 }, { "epoch": 0.17362443791739557, "grad_norm": 500.8890075683594, "learning_rate": 9.843349556321787e-06, "loss": 22.647, "step": 85950 }, { "epoch": 0.1736446385500794, "grad_norm": 115.68204498291016, "learning_rate": 9.843262853396164e-06, "loss": 24.7894, "step": 85960 }, { "epoch": 0.1736648391827632, "grad_norm": 220.97817993164062, "learning_rate": 9.84317612686507e-06, "loss": 22.5631, "step": 85970 }, { "epoch": 0.17368503981544703, "grad_norm": 368.697998046875, "learning_rate": 9.843089376728922e-06, "loss": 33.8314, "step": 85980 }, { "epoch": 0.17370524044813085, "grad_norm": 124.01139068603516, "learning_rate": 9.843002602988143e-06, "loss": 24.8427, "step": 85990 }, { "epoch": 0.17372544108081464, "grad_norm": 5.2383341789245605, "learning_rate": 9.842915805643156e-06, "loss": 21.6277, "step": 86000 }, { "epoch": 0.17374564171349846, "grad_norm": 178.3483428955078, "learning_rate": 9.842828984694385e-06, "loss": 15.1281, "step": 86010 }, { "epoch": 0.17376584234618228, "grad_norm": 448.1756896972656, "learning_rate": 9.842742140142255e-06, "loss": 31.3179, "step": 86020 }, { "epoch": 0.1737860429788661, "grad_norm": 287.5810852050781, "learning_rate": 9.842655271987185e-06, "loss": 21.2155, "step": 86030 }, { "epoch": 0.17380624361154992, "grad_norm": 366.2618713378906, "learning_rate": 9.8425683802296e-06, "loss": 33.0428, "step": 86040 }, { "epoch": 0.17382644424423374, "grad_norm": 161.5236358642578, "learning_rate": 9.842481464869926e-06, "loss": 32.701, "step": 86050 }, { "epoch": 0.17384664487691753, "grad_norm": 648.8118286132812, "learning_rate": 9.842394525908585e-06, "loss": 43.9964, "step": 86060 }, { "epoch": 0.17386684550960135, "grad_norm": 361.2855224609375, "learning_rate": 9.842307563345999e-06, "loss": 16.9019, "step": 86070 }, { "epoch": 0.17388704614228517, "grad_norm": 361.4525146484375, "learning_rate": 9.842220577182592e-06, "loss": 24.7891, "step": 86080 }, { "epoch": 0.173907246774969, "grad_norm": 136.2117462158203, "learning_rate": 9.842133567418793e-06, "loss": 28.6969, "step": 86090 }, { "epoch": 0.1739274474076528, "grad_norm": 46.72444152832031, "learning_rate": 9.84204653405502e-06, "loss": 40.0708, "step": 86100 }, { "epoch": 0.17394764804033663, "grad_norm": 313.0541687011719, "learning_rate": 9.841959477091698e-06, "loss": 11.4933, "step": 86110 }, { "epoch": 0.17396784867302043, "grad_norm": 213.4998016357422, "learning_rate": 9.841872396529255e-06, "loss": 42.5187, "step": 86120 }, { "epoch": 0.17398804930570425, "grad_norm": 327.9616394042969, "learning_rate": 9.841785292368113e-06, "loss": 31.9306, "step": 86130 }, { "epoch": 0.17400824993838807, "grad_norm": 0.0, "learning_rate": 9.841698164608696e-06, "loss": 20.154, "step": 86140 }, { "epoch": 0.17402845057107189, "grad_norm": 342.0955810546875, "learning_rate": 9.841611013251428e-06, "loss": 27.4456, "step": 86150 }, { "epoch": 0.1740486512037557, "grad_norm": 245.87460327148438, "learning_rate": 9.841523838296738e-06, "loss": 27.2515, "step": 86160 }, { "epoch": 0.17406885183643953, "grad_norm": 306.50714111328125, "learning_rate": 9.841436639745046e-06, "loss": 23.3546, "step": 86170 }, { "epoch": 0.17408905246912335, "grad_norm": 12.992705345153809, "learning_rate": 9.84134941759678e-06, "loss": 22.5053, "step": 86180 }, { "epoch": 0.17410925310180714, "grad_norm": 434.20733642578125, "learning_rate": 9.841262171852364e-06, "loss": 31.2974, "step": 86190 }, { "epoch": 0.17412945373449096, "grad_norm": 400.7278747558594, "learning_rate": 9.841174902512223e-06, "loss": 18.0311, "step": 86200 }, { "epoch": 0.17414965436717478, "grad_norm": 182.572021484375, "learning_rate": 9.841087609576782e-06, "loss": 22.4021, "step": 86210 }, { "epoch": 0.1741698549998586, "grad_norm": 434.7891845703125, "learning_rate": 9.841000293046469e-06, "loss": 15.3625, "step": 86220 }, { "epoch": 0.17419005563254242, "grad_norm": 190.50379943847656, "learning_rate": 9.840912952921707e-06, "loss": 26.352, "step": 86230 }, { "epoch": 0.17421025626522624, "grad_norm": 546.002197265625, "learning_rate": 9.840825589202922e-06, "loss": 33.616, "step": 86240 }, { "epoch": 0.17423045689791003, "grad_norm": 209.64810180664062, "learning_rate": 9.84073820189054e-06, "loss": 25.9001, "step": 86250 }, { "epoch": 0.17425065753059385, "grad_norm": 374.17620849609375, "learning_rate": 9.840650790984988e-06, "loss": 31.2641, "step": 86260 }, { "epoch": 0.17427085816327767, "grad_norm": 430.3647766113281, "learning_rate": 9.84056335648669e-06, "loss": 24.3916, "step": 86270 }, { "epoch": 0.1742910587959615, "grad_norm": 82.22520446777344, "learning_rate": 9.840475898396073e-06, "loss": 26.7707, "step": 86280 }, { "epoch": 0.1743112594286453, "grad_norm": 103.35070037841797, "learning_rate": 9.840388416713564e-06, "loss": 14.3607, "step": 86290 }, { "epoch": 0.17433146006132913, "grad_norm": 208.44935607910156, "learning_rate": 9.84030091143959e-06, "loss": 22.5298, "step": 86300 }, { "epoch": 0.17435166069401295, "grad_norm": 414.38311767578125, "learning_rate": 9.840213382574575e-06, "loss": 30.4002, "step": 86310 }, { "epoch": 0.17437186132669674, "grad_norm": 362.54290771484375, "learning_rate": 9.840125830118949e-06, "loss": 24.5089, "step": 86320 }, { "epoch": 0.17439206195938056, "grad_norm": 487.76312255859375, "learning_rate": 9.840038254073136e-06, "loss": 28.7847, "step": 86330 }, { "epoch": 0.17441226259206438, "grad_norm": 225.82655334472656, "learning_rate": 9.839950654437563e-06, "loss": 27.841, "step": 86340 }, { "epoch": 0.1744324632247482, "grad_norm": 216.51235961914062, "learning_rate": 9.839863031212657e-06, "loss": 23.0318, "step": 86350 }, { "epoch": 0.17445266385743202, "grad_norm": 326.4658508300781, "learning_rate": 9.839775384398846e-06, "loss": 19.3791, "step": 86360 }, { "epoch": 0.17447286449011584, "grad_norm": 304.0533142089844, "learning_rate": 9.839687713996558e-06, "loss": 17.6429, "step": 86370 }, { "epoch": 0.17449306512279963, "grad_norm": 251.84552001953125, "learning_rate": 9.839600020006217e-06, "loss": 35.9038, "step": 86380 }, { "epoch": 0.17451326575548345, "grad_norm": 730.36572265625, "learning_rate": 9.839512302428254e-06, "loss": 30.4181, "step": 86390 }, { "epoch": 0.17453346638816727, "grad_norm": 497.6684875488281, "learning_rate": 9.839424561263094e-06, "loss": 41.7013, "step": 86400 }, { "epoch": 0.1745536670208511, "grad_norm": 186.3009796142578, "learning_rate": 9.839336796511167e-06, "loss": 25.9393, "step": 86410 }, { "epoch": 0.17457386765353491, "grad_norm": 273.61004638671875, "learning_rate": 9.839249008172897e-06, "loss": 18.8838, "step": 86420 }, { "epoch": 0.17459406828621873, "grad_norm": 437.0420227050781, "learning_rate": 9.839161196248717e-06, "loss": 29.7356, "step": 86430 }, { "epoch": 0.17461426891890253, "grad_norm": 155.9214324951172, "learning_rate": 9.839073360739052e-06, "loss": 23.6401, "step": 86440 }, { "epoch": 0.17463446955158635, "grad_norm": 82.72378540039062, "learning_rate": 9.838985501644329e-06, "loss": 8.5619, "step": 86450 }, { "epoch": 0.17465467018427017, "grad_norm": 476.47650146484375, "learning_rate": 9.838897618964978e-06, "loss": 33.683, "step": 86460 }, { "epoch": 0.174674870816954, "grad_norm": 289.3583679199219, "learning_rate": 9.838809712701426e-06, "loss": 36.5238, "step": 86470 }, { "epoch": 0.1746950714496378, "grad_norm": 245.28480529785156, "learning_rate": 9.838721782854103e-06, "loss": 21.4639, "step": 86480 }, { "epoch": 0.17471527208232163, "grad_norm": 206.80816650390625, "learning_rate": 9.838633829423437e-06, "loss": 21.1138, "step": 86490 }, { "epoch": 0.17473547271500545, "grad_norm": 302.3206787109375, "learning_rate": 9.838545852409857e-06, "loss": 35.2079, "step": 86500 }, { "epoch": 0.17475567334768924, "grad_norm": 453.5033874511719, "learning_rate": 9.83845785181379e-06, "loss": 26.0613, "step": 86510 }, { "epoch": 0.17477587398037306, "grad_norm": 0.0, "learning_rate": 9.838369827635668e-06, "loss": 13.7745, "step": 86520 }, { "epoch": 0.17479607461305688, "grad_norm": 147.36013793945312, "learning_rate": 9.838281779875918e-06, "loss": 24.0167, "step": 86530 }, { "epoch": 0.1748162752457407, "grad_norm": 267.82464599609375, "learning_rate": 9.838193708534969e-06, "loss": 30.1507, "step": 86540 }, { "epoch": 0.17483647587842452, "grad_norm": 234.4063720703125, "learning_rate": 9.83810561361325e-06, "loss": 29.5222, "step": 86550 }, { "epoch": 0.17485667651110834, "grad_norm": 336.3824462890625, "learning_rate": 9.838017495111191e-06, "loss": 31.0369, "step": 86560 }, { "epoch": 0.17487687714379213, "grad_norm": 433.2615051269531, "learning_rate": 9.837929353029223e-06, "loss": 15.572, "step": 86570 }, { "epoch": 0.17489707777647595, "grad_norm": 524.951904296875, "learning_rate": 9.837841187367774e-06, "loss": 23.8866, "step": 86580 }, { "epoch": 0.17491727840915977, "grad_norm": 173.6634979248047, "learning_rate": 9.837752998127272e-06, "loss": 15.646, "step": 86590 }, { "epoch": 0.1749374790418436, "grad_norm": 240.8075408935547, "learning_rate": 9.83766478530815e-06, "loss": 22.6934, "step": 86600 }, { "epoch": 0.1749576796745274, "grad_norm": 495.738525390625, "learning_rate": 9.837576548910836e-06, "loss": 29.8548, "step": 86610 }, { "epoch": 0.17497788030721123, "grad_norm": 455.8369140625, "learning_rate": 9.837488288935761e-06, "loss": 24.0963, "step": 86620 }, { "epoch": 0.17499808093989505, "grad_norm": 1143.65185546875, "learning_rate": 9.837400005383355e-06, "loss": 25.775, "step": 86630 }, { "epoch": 0.17501828157257884, "grad_norm": 149.33277893066406, "learning_rate": 9.837311698254048e-06, "loss": 21.9569, "step": 86640 }, { "epoch": 0.17503848220526266, "grad_norm": 0.0, "learning_rate": 9.837223367548271e-06, "loss": 41.5356, "step": 86650 }, { "epoch": 0.17505868283794648, "grad_norm": 0.0, "learning_rate": 9.837135013266452e-06, "loss": 13.0002, "step": 86660 }, { "epoch": 0.1750788834706303, "grad_norm": 116.92649841308594, "learning_rate": 9.837046635409026e-06, "loss": 21.7658, "step": 86670 }, { "epoch": 0.17509908410331412, "grad_norm": 130.19313049316406, "learning_rate": 9.83695823397642e-06, "loss": 21.2905, "step": 86680 }, { "epoch": 0.17511928473599794, "grad_norm": 48.689640045166016, "learning_rate": 9.836869808969068e-06, "loss": 20.8135, "step": 86690 }, { "epoch": 0.17513948536868174, "grad_norm": 146.6575164794922, "learning_rate": 9.836781360387396e-06, "loss": 20.8527, "step": 86700 }, { "epoch": 0.17515968600136556, "grad_norm": 581.2515869140625, "learning_rate": 9.83669288823184e-06, "loss": 22.3005, "step": 86710 }, { "epoch": 0.17517988663404938, "grad_norm": 217.97181701660156, "learning_rate": 9.836604392502829e-06, "loss": 52.8381, "step": 86720 }, { "epoch": 0.1752000872667332, "grad_norm": 320.7890930175781, "learning_rate": 9.836515873200796e-06, "loss": 31.946, "step": 86730 }, { "epoch": 0.17522028789941702, "grad_norm": 0.0, "learning_rate": 9.83642733032617e-06, "loss": 17.8551, "step": 86740 }, { "epoch": 0.17524048853210084, "grad_norm": 135.2252197265625, "learning_rate": 9.836338763879386e-06, "loss": 16.9755, "step": 86750 }, { "epoch": 0.17526068916478463, "grad_norm": 264.128173828125, "learning_rate": 9.83625017386087e-06, "loss": 30.3462, "step": 86760 }, { "epoch": 0.17528088979746845, "grad_norm": 144.09060668945312, "learning_rate": 9.836161560271058e-06, "loss": 27.7033, "step": 86770 }, { "epoch": 0.17530109043015227, "grad_norm": 189.78616333007812, "learning_rate": 9.836072923110384e-06, "loss": 27.2165, "step": 86780 }, { "epoch": 0.1753212910628361, "grad_norm": 267.1493225097656, "learning_rate": 9.835984262379275e-06, "loss": 33.0677, "step": 86790 }, { "epoch": 0.1753414916955199, "grad_norm": 134.94338989257812, "learning_rate": 9.835895578078165e-06, "loss": 28.6202, "step": 86800 }, { "epoch": 0.17536169232820373, "grad_norm": 310.7304382324219, "learning_rate": 9.835806870207487e-06, "loss": 12.2578, "step": 86810 }, { "epoch": 0.17538189296088755, "grad_norm": 341.72418212890625, "learning_rate": 9.835718138767672e-06, "loss": 30.9801, "step": 86820 }, { "epoch": 0.17540209359357134, "grad_norm": 363.3751525878906, "learning_rate": 9.835629383759155e-06, "loss": 13.7851, "step": 86830 }, { "epoch": 0.17542229422625516, "grad_norm": 247.45960998535156, "learning_rate": 9.835540605182366e-06, "loss": 24.5224, "step": 86840 }, { "epoch": 0.17544249485893898, "grad_norm": 616.5775756835938, "learning_rate": 9.835451803037738e-06, "loss": 36.0374, "step": 86850 }, { "epoch": 0.1754626954916228, "grad_norm": 782.5467529296875, "learning_rate": 9.835362977325703e-06, "loss": 23.5117, "step": 86860 }, { "epoch": 0.17548289612430662, "grad_norm": 185.7198028564453, "learning_rate": 9.835274128046698e-06, "loss": 21.7507, "step": 86870 }, { "epoch": 0.17550309675699044, "grad_norm": 448.16583251953125, "learning_rate": 9.835185255201153e-06, "loss": 28.1759, "step": 86880 }, { "epoch": 0.17552329738967423, "grad_norm": 182.14102172851562, "learning_rate": 9.835096358789501e-06, "loss": 29.8701, "step": 86890 }, { "epoch": 0.17554349802235805, "grad_norm": 294.3314514160156, "learning_rate": 9.835007438812177e-06, "loss": 24.2942, "step": 86900 }, { "epoch": 0.17556369865504187, "grad_norm": 375.54937744140625, "learning_rate": 9.834918495269611e-06, "loss": 22.0337, "step": 86910 }, { "epoch": 0.1755838992877257, "grad_norm": 212.92361450195312, "learning_rate": 9.83482952816224e-06, "loss": 20.1294, "step": 86920 }, { "epoch": 0.1756040999204095, "grad_norm": 228.71690368652344, "learning_rate": 9.834740537490495e-06, "loss": 43.7038, "step": 86930 }, { "epoch": 0.17562430055309333, "grad_norm": 230.81985473632812, "learning_rate": 9.834651523254812e-06, "loss": 22.8129, "step": 86940 }, { "epoch": 0.17564450118577715, "grad_norm": 73.87609100341797, "learning_rate": 9.834562485455622e-06, "loss": 11.5567, "step": 86950 }, { "epoch": 0.17566470181846094, "grad_norm": 236.40960693359375, "learning_rate": 9.834473424093364e-06, "loss": 17.1244, "step": 86960 }, { "epoch": 0.17568490245114476, "grad_norm": 224.12725830078125, "learning_rate": 9.834384339168468e-06, "loss": 25.0718, "step": 86970 }, { "epoch": 0.17570510308382858, "grad_norm": 291.0908508300781, "learning_rate": 9.834295230681368e-06, "loss": 21.8121, "step": 86980 }, { "epoch": 0.1757253037165124, "grad_norm": 409.4087219238281, "learning_rate": 9.834206098632499e-06, "loss": 19.9877, "step": 86990 }, { "epoch": 0.17574550434919622, "grad_norm": 342.7087097167969, "learning_rate": 9.834116943022299e-06, "loss": 26.1323, "step": 87000 }, { "epoch": 0.17576570498188004, "grad_norm": 161.49472045898438, "learning_rate": 9.834027763851196e-06, "loss": 23.7984, "step": 87010 }, { "epoch": 0.17578590561456384, "grad_norm": 171.11636352539062, "learning_rate": 9.833938561119629e-06, "loss": 37.579, "step": 87020 }, { "epoch": 0.17580610624724766, "grad_norm": 229.868408203125, "learning_rate": 9.833849334828033e-06, "loss": 21.8186, "step": 87030 }, { "epoch": 0.17582630687993148, "grad_norm": 152.10107421875, "learning_rate": 9.833760084976838e-06, "loss": 11.729, "step": 87040 }, { "epoch": 0.1758465075126153, "grad_norm": 318.3954772949219, "learning_rate": 9.833670811566485e-06, "loss": 19.8234, "step": 87050 }, { "epoch": 0.17586670814529912, "grad_norm": 352.87811279296875, "learning_rate": 9.833581514597408e-06, "loss": 22.8857, "step": 87060 }, { "epoch": 0.17588690877798294, "grad_norm": 344.8995361328125, "learning_rate": 9.833492194070039e-06, "loss": 14.9546, "step": 87070 }, { "epoch": 0.17590710941066673, "grad_norm": 134.50465393066406, "learning_rate": 9.833402849984815e-06, "loss": 25.6468, "step": 87080 }, { "epoch": 0.17592731004335055, "grad_norm": 197.21588134765625, "learning_rate": 9.833313482342173e-06, "loss": 8.0059, "step": 87090 }, { "epoch": 0.17594751067603437, "grad_norm": 145.87562561035156, "learning_rate": 9.833224091142548e-06, "loss": 21.4444, "step": 87100 }, { "epoch": 0.1759677113087182, "grad_norm": 262.25494384765625, "learning_rate": 9.833134676386373e-06, "loss": 28.8698, "step": 87110 }, { "epoch": 0.175987911941402, "grad_norm": 198.03170776367188, "learning_rate": 9.833045238074085e-06, "loss": 36.288, "step": 87120 }, { "epoch": 0.17600811257408583, "grad_norm": 312.36383056640625, "learning_rate": 9.832955776206123e-06, "loss": 24.0453, "step": 87130 }, { "epoch": 0.17602831320676965, "grad_norm": 212.31517028808594, "learning_rate": 9.832866290782922e-06, "loss": 19.1372, "step": 87140 }, { "epoch": 0.17604851383945344, "grad_norm": 143.73716735839844, "learning_rate": 9.832776781804913e-06, "loss": 17.2943, "step": 87150 }, { "epoch": 0.17606871447213726, "grad_norm": 297.767822265625, "learning_rate": 9.83268724927254e-06, "loss": 52.2459, "step": 87160 }, { "epoch": 0.17608891510482108, "grad_norm": 323.555908203125, "learning_rate": 9.832597693186233e-06, "loss": 19.0136, "step": 87170 }, { "epoch": 0.1761091157375049, "grad_norm": 324.16455078125, "learning_rate": 9.83250811354643e-06, "loss": 18.9792, "step": 87180 }, { "epoch": 0.17612931637018872, "grad_norm": 423.2179260253906, "learning_rate": 9.832418510353572e-06, "loss": 27.9144, "step": 87190 }, { "epoch": 0.17614951700287254, "grad_norm": 163.1009979248047, "learning_rate": 9.832328883608088e-06, "loss": 23.4754, "step": 87200 }, { "epoch": 0.17616971763555633, "grad_norm": 285.90374755859375, "learning_rate": 9.832239233310421e-06, "loss": 24.1709, "step": 87210 }, { "epoch": 0.17618991826824015, "grad_norm": 267.52447509765625, "learning_rate": 9.832149559461009e-06, "loss": 23.4203, "step": 87220 }, { "epoch": 0.17621011890092397, "grad_norm": 92.87543487548828, "learning_rate": 9.832059862060282e-06, "loss": 27.2802, "step": 87230 }, { "epoch": 0.1762303195336078, "grad_norm": 290.3955383300781, "learning_rate": 9.831970141108684e-06, "loss": 34.4869, "step": 87240 }, { "epoch": 0.1762505201662916, "grad_norm": 205.77059936523438, "learning_rate": 9.831880396606649e-06, "loss": 14.9208, "step": 87250 }, { "epoch": 0.17627072079897543, "grad_norm": 398.066650390625, "learning_rate": 9.831790628554613e-06, "loss": 28.055, "step": 87260 }, { "epoch": 0.17629092143165925, "grad_norm": 325.053466796875, "learning_rate": 9.831700836953017e-06, "loss": 25.4613, "step": 87270 }, { "epoch": 0.17631112206434305, "grad_norm": 129.12359619140625, "learning_rate": 9.831611021802297e-06, "loss": 33.7937, "step": 87280 }, { "epoch": 0.17633132269702687, "grad_norm": 128.78982543945312, "learning_rate": 9.83152118310289e-06, "loss": 33.2044, "step": 87290 }, { "epoch": 0.17635152332971069, "grad_norm": 307.33984375, "learning_rate": 9.831431320855235e-06, "loss": 22.1982, "step": 87300 }, { "epoch": 0.1763717239623945, "grad_norm": 698.712890625, "learning_rate": 9.831341435059772e-06, "loss": 19.1124, "step": 87310 }, { "epoch": 0.17639192459507833, "grad_norm": 453.36285400390625, "learning_rate": 9.831251525716934e-06, "loss": 27.7465, "step": 87320 }, { "epoch": 0.17641212522776215, "grad_norm": 202.10128784179688, "learning_rate": 9.831161592827164e-06, "loss": 30.1077, "step": 87330 }, { "epoch": 0.17643232586044594, "grad_norm": 314.0151062011719, "learning_rate": 9.831071636390899e-06, "loss": 20.8082, "step": 87340 }, { "epoch": 0.17645252649312976, "grad_norm": 168.37147521972656, "learning_rate": 9.830981656408575e-06, "loss": 26.9779, "step": 87350 }, { "epoch": 0.17647272712581358, "grad_norm": 217.84182739257812, "learning_rate": 9.830891652880632e-06, "loss": 21.6826, "step": 87360 }, { "epoch": 0.1764929277584974, "grad_norm": 213.054443359375, "learning_rate": 9.83080162580751e-06, "loss": 21.2525, "step": 87370 }, { "epoch": 0.17651312839118122, "grad_norm": 289.6729431152344, "learning_rate": 9.830711575189646e-06, "loss": 22.0228, "step": 87380 }, { "epoch": 0.17653332902386504, "grad_norm": 32.493797302246094, "learning_rate": 9.83062150102748e-06, "loss": 12.8066, "step": 87390 }, { "epoch": 0.17655352965654883, "grad_norm": 615.0816650390625, "learning_rate": 9.830531403321451e-06, "loss": 23.2385, "step": 87400 }, { "epoch": 0.17657373028923265, "grad_norm": 219.1350555419922, "learning_rate": 9.830441282071999e-06, "loss": 33.3352, "step": 87410 }, { "epoch": 0.17659393092191647, "grad_norm": 264.3847351074219, "learning_rate": 9.830351137279559e-06, "loss": 19.3754, "step": 87420 }, { "epoch": 0.1766141315546003, "grad_norm": 1236.837890625, "learning_rate": 9.830260968944577e-06, "loss": 27.7418, "step": 87430 }, { "epoch": 0.1766343321872841, "grad_norm": 371.8904724121094, "learning_rate": 9.830170777067486e-06, "loss": 33.5782, "step": 87440 }, { "epoch": 0.17665453281996793, "grad_norm": 131.86477661132812, "learning_rate": 9.83008056164873e-06, "loss": 21.2082, "step": 87450 }, { "epoch": 0.17667473345265175, "grad_norm": 327.4102783203125, "learning_rate": 9.829990322688746e-06, "loss": 30.4934, "step": 87460 }, { "epoch": 0.17669493408533554, "grad_norm": 644.6732177734375, "learning_rate": 9.829900060187976e-06, "loss": 26.5776, "step": 87470 }, { "epoch": 0.17671513471801936, "grad_norm": 126.85614776611328, "learning_rate": 9.82980977414686e-06, "loss": 19.3674, "step": 87480 }, { "epoch": 0.17673533535070318, "grad_norm": 76.8537826538086, "learning_rate": 9.829719464565834e-06, "loss": 32.7964, "step": 87490 }, { "epoch": 0.176755535983387, "grad_norm": 73.37107849121094, "learning_rate": 9.829629131445342e-06, "loss": 19.5794, "step": 87500 }, { "epoch": 0.17677573661607082, "grad_norm": 190.6021728515625, "learning_rate": 9.829538774785825e-06, "loss": 33.3725, "step": 87510 }, { "epoch": 0.17679593724875464, "grad_norm": 375.3675842285156, "learning_rate": 9.82944839458772e-06, "loss": 22.6454, "step": 87520 }, { "epoch": 0.17681613788143843, "grad_norm": 250.5619354248047, "learning_rate": 9.82935799085147e-06, "loss": 20.1139, "step": 87530 }, { "epoch": 0.17683633851412225, "grad_norm": 229.90878295898438, "learning_rate": 9.829267563577514e-06, "loss": 23.8008, "step": 87540 }, { "epoch": 0.17685653914680607, "grad_norm": 279.0864562988281, "learning_rate": 9.829177112766295e-06, "loss": 18.1789, "step": 87550 }, { "epoch": 0.1768767397794899, "grad_norm": 395.2419738769531, "learning_rate": 9.829086638418252e-06, "loss": 18.116, "step": 87560 }, { "epoch": 0.17689694041217371, "grad_norm": 309.5771484375, "learning_rate": 9.828996140533826e-06, "loss": 30.0049, "step": 87570 }, { "epoch": 0.17691714104485753, "grad_norm": 0.0, "learning_rate": 9.82890561911346e-06, "loss": 18.8262, "step": 87580 }, { "epoch": 0.17693734167754135, "grad_norm": 278.1049499511719, "learning_rate": 9.828815074157591e-06, "loss": 16.8648, "step": 87590 }, { "epoch": 0.17695754231022515, "grad_norm": 140.77272033691406, "learning_rate": 9.828724505666664e-06, "loss": 28.5643, "step": 87600 }, { "epoch": 0.17697774294290897, "grad_norm": 295.2495422363281, "learning_rate": 9.82863391364112e-06, "loss": 36.9115, "step": 87610 }, { "epoch": 0.1769979435755928, "grad_norm": 141.46099853515625, "learning_rate": 9.828543298081401e-06, "loss": 19.3839, "step": 87620 }, { "epoch": 0.1770181442082766, "grad_norm": 493.9354248046875, "learning_rate": 9.828452658987946e-06, "loss": 32.2454, "step": 87630 }, { "epoch": 0.17703834484096043, "grad_norm": 519.1752319335938, "learning_rate": 9.828361996361199e-06, "loss": 16.147, "step": 87640 }, { "epoch": 0.17705854547364425, "grad_norm": 533.32177734375, "learning_rate": 9.828271310201601e-06, "loss": 23.8997, "step": 87650 }, { "epoch": 0.17707874610632804, "grad_norm": 308.5531311035156, "learning_rate": 9.828180600509595e-06, "loss": 15.8899, "step": 87660 }, { "epoch": 0.17709894673901186, "grad_norm": 134.51585388183594, "learning_rate": 9.828089867285622e-06, "loss": 12.7207, "step": 87670 }, { "epoch": 0.17711914737169568, "grad_norm": 454.7893371582031, "learning_rate": 9.827999110530124e-06, "loss": 35.0782, "step": 87680 }, { "epoch": 0.1771393480043795, "grad_norm": 320.4456481933594, "learning_rate": 9.827908330243545e-06, "loss": 32.181, "step": 87690 }, { "epoch": 0.17715954863706332, "grad_norm": 380.5893859863281, "learning_rate": 9.827817526426324e-06, "loss": 24.1342, "step": 87700 }, { "epoch": 0.17717974926974714, "grad_norm": 0.0, "learning_rate": 9.827726699078907e-06, "loss": 15.9616, "step": 87710 }, { "epoch": 0.17719994990243093, "grad_norm": 250.0610809326172, "learning_rate": 9.827635848201737e-06, "loss": 19.622, "step": 87720 }, { "epoch": 0.17722015053511475, "grad_norm": 706.1143798828125, "learning_rate": 9.827544973795254e-06, "loss": 20.9055, "step": 87730 }, { "epoch": 0.17724035116779857, "grad_norm": 440.0935363769531, "learning_rate": 9.827454075859904e-06, "loss": 41.7492, "step": 87740 }, { "epoch": 0.1772605518004824, "grad_norm": 454.4348449707031, "learning_rate": 9.827363154396126e-06, "loss": 35.7966, "step": 87750 }, { "epoch": 0.1772807524331662, "grad_norm": 151.57730102539062, "learning_rate": 9.827272209404366e-06, "loss": 26.9214, "step": 87760 }, { "epoch": 0.17730095306585003, "grad_norm": 211.90438842773438, "learning_rate": 9.827181240885068e-06, "loss": 16.6205, "step": 87770 }, { "epoch": 0.17732115369853385, "grad_norm": 381.280029296875, "learning_rate": 9.827090248838673e-06, "loss": 24.3637, "step": 87780 }, { "epoch": 0.17734135433121764, "grad_norm": 384.0274353027344, "learning_rate": 9.826999233265626e-06, "loss": 19.4312, "step": 87790 }, { "epoch": 0.17736155496390146, "grad_norm": 152.40518188476562, "learning_rate": 9.82690819416637e-06, "loss": 18.0365, "step": 87800 }, { "epoch": 0.17738175559658528, "grad_norm": 411.5858154296875, "learning_rate": 9.826817131541349e-06, "loss": 16.0279, "step": 87810 }, { "epoch": 0.1774019562292691, "grad_norm": 616.9690551757812, "learning_rate": 9.826726045391006e-06, "loss": 15.5234, "step": 87820 }, { "epoch": 0.17742215686195292, "grad_norm": 166.96945190429688, "learning_rate": 9.826634935715787e-06, "loss": 18.7473, "step": 87830 }, { "epoch": 0.17744235749463674, "grad_norm": 93.75395965576172, "learning_rate": 9.826543802516135e-06, "loss": 20.4554, "step": 87840 }, { "epoch": 0.17746255812732054, "grad_norm": 280.4478454589844, "learning_rate": 9.826452645792493e-06, "loss": 24.8111, "step": 87850 }, { "epoch": 0.17748275876000436, "grad_norm": 227.49392700195312, "learning_rate": 9.826361465545306e-06, "loss": 23.3413, "step": 87860 }, { "epoch": 0.17750295939268818, "grad_norm": 116.5406723022461, "learning_rate": 9.826270261775018e-06, "loss": 27.1088, "step": 87870 }, { "epoch": 0.177523160025372, "grad_norm": 650.1035766601562, "learning_rate": 9.826179034482074e-06, "loss": 28.3423, "step": 87880 }, { "epoch": 0.17754336065805582, "grad_norm": 658.8649291992188, "learning_rate": 9.82608778366692e-06, "loss": 16.7598, "step": 87890 }, { "epoch": 0.17756356129073964, "grad_norm": 243.3831329345703, "learning_rate": 9.825996509330001e-06, "loss": 21.1419, "step": 87900 }, { "epoch": 0.17758376192342346, "grad_norm": 399.00592041015625, "learning_rate": 9.825905211471757e-06, "loss": 37.4083, "step": 87910 }, { "epoch": 0.17760396255610725, "grad_norm": 40.176597595214844, "learning_rate": 9.825813890092639e-06, "loss": 20.0813, "step": 87920 }, { "epoch": 0.17762416318879107, "grad_norm": 317.7182312011719, "learning_rate": 9.825722545193087e-06, "loss": 18.5993, "step": 87930 }, { "epoch": 0.1776443638214749, "grad_norm": 643.2432250976562, "learning_rate": 9.82563117677355e-06, "loss": 25.1883, "step": 87940 }, { "epoch": 0.1776645644541587, "grad_norm": 396.70867919921875, "learning_rate": 9.825539784834472e-06, "loss": 15.7088, "step": 87950 }, { "epoch": 0.17768476508684253, "grad_norm": 280.4801330566406, "learning_rate": 9.825448369376298e-06, "loss": 26.4348, "step": 87960 }, { "epoch": 0.17770496571952635, "grad_norm": 387.7401123046875, "learning_rate": 9.825356930399474e-06, "loss": 16.1847, "step": 87970 }, { "epoch": 0.17772516635221014, "grad_norm": 434.7317199707031, "learning_rate": 9.825265467904446e-06, "loss": 21.0146, "step": 87980 }, { "epoch": 0.17774536698489396, "grad_norm": 382.1975402832031, "learning_rate": 9.825173981891658e-06, "loss": 25.8324, "step": 87990 }, { "epoch": 0.17776556761757778, "grad_norm": 235.26638793945312, "learning_rate": 9.825082472361558e-06, "loss": 32.5366, "step": 88000 }, { "epoch": 0.1777857682502616, "grad_norm": 342.127685546875, "learning_rate": 9.82499093931459e-06, "loss": 19.0823, "step": 88010 }, { "epoch": 0.17780596888294542, "grad_norm": 361.04473876953125, "learning_rate": 9.824899382751204e-06, "loss": 18.3347, "step": 88020 }, { "epoch": 0.17782616951562924, "grad_norm": 314.1026916503906, "learning_rate": 9.824807802671843e-06, "loss": 38.6059, "step": 88030 }, { "epoch": 0.17784637014831303, "grad_norm": 524.85107421875, "learning_rate": 9.824716199076952e-06, "loss": 29.9364, "step": 88040 }, { "epoch": 0.17786657078099685, "grad_norm": 393.987548828125, "learning_rate": 9.824624571966982e-06, "loss": 30.345, "step": 88050 }, { "epoch": 0.17788677141368067, "grad_norm": 33.73677444458008, "learning_rate": 9.824532921342375e-06, "loss": 15.8548, "step": 88060 }, { "epoch": 0.1779069720463645, "grad_norm": 794.4762573242188, "learning_rate": 9.82444124720358e-06, "loss": 44.8928, "step": 88070 }, { "epoch": 0.1779271726790483, "grad_norm": 101.84048461914062, "learning_rate": 9.824349549551045e-06, "loss": 26.8743, "step": 88080 }, { "epoch": 0.17794737331173213, "grad_norm": 187.67832946777344, "learning_rate": 9.824257828385213e-06, "loss": 7.9055, "step": 88090 }, { "epoch": 0.17796757394441595, "grad_norm": 96.26985168457031, "learning_rate": 9.824166083706534e-06, "loss": 11.7907, "step": 88100 }, { "epoch": 0.17798777457709974, "grad_norm": 154.53553771972656, "learning_rate": 9.824074315515457e-06, "loss": 23.6691, "step": 88110 }, { "epoch": 0.17800797520978356, "grad_norm": 284.51043701171875, "learning_rate": 9.823982523812424e-06, "loss": 34.7754, "step": 88120 }, { "epoch": 0.17802817584246738, "grad_norm": 226.8983154296875, "learning_rate": 9.823890708597887e-06, "loss": 19.5166, "step": 88130 }, { "epoch": 0.1780483764751512, "grad_norm": 452.9185791015625, "learning_rate": 9.823798869872291e-06, "loss": 21.277, "step": 88140 }, { "epoch": 0.17806857710783502, "grad_norm": 228.4601287841797, "learning_rate": 9.823707007636085e-06, "loss": 25.4295, "step": 88150 }, { "epoch": 0.17808877774051884, "grad_norm": 75.23994445800781, "learning_rate": 9.823615121889716e-06, "loss": 33.4419, "step": 88160 }, { "epoch": 0.17810897837320264, "grad_norm": 67.10015106201172, "learning_rate": 9.82352321263363e-06, "loss": 18.3832, "step": 88170 }, { "epoch": 0.17812917900588646, "grad_norm": 113.98564910888672, "learning_rate": 9.823431279868278e-06, "loss": 17.5204, "step": 88180 }, { "epoch": 0.17814937963857028, "grad_norm": 323.4461975097656, "learning_rate": 9.823339323594107e-06, "loss": 95.1474, "step": 88190 }, { "epoch": 0.1781695802712541, "grad_norm": 217.24609375, "learning_rate": 9.823247343811567e-06, "loss": 31.4243, "step": 88200 }, { "epoch": 0.17818978090393792, "grad_norm": 220.82000732421875, "learning_rate": 9.823155340521104e-06, "loss": 23.4389, "step": 88210 }, { "epoch": 0.17820998153662174, "grad_norm": 230.14634704589844, "learning_rate": 9.823063313723165e-06, "loss": 22.335, "step": 88220 }, { "epoch": 0.17823018216930553, "grad_norm": 386.20904541015625, "learning_rate": 9.822971263418202e-06, "loss": 20.8649, "step": 88230 }, { "epoch": 0.17825038280198935, "grad_norm": 290.13372802734375, "learning_rate": 9.82287918960666e-06, "loss": 21.0775, "step": 88240 }, { "epoch": 0.17827058343467317, "grad_norm": 224.82049560546875, "learning_rate": 9.822787092288991e-06, "loss": 35.149, "step": 88250 }, { "epoch": 0.178290784067357, "grad_norm": 305.95068359375, "learning_rate": 9.822694971465643e-06, "loss": 24.9108, "step": 88260 }, { "epoch": 0.1783109847000408, "grad_norm": 541.3460083007812, "learning_rate": 9.822602827137065e-06, "loss": 31.2173, "step": 88270 }, { "epoch": 0.17833118533272463, "grad_norm": 195.98936462402344, "learning_rate": 9.822510659303704e-06, "loss": 16.992, "step": 88280 }, { "epoch": 0.17835138596540845, "grad_norm": 392.2302551269531, "learning_rate": 9.822418467966013e-06, "loss": 17.4479, "step": 88290 }, { "epoch": 0.17837158659809224, "grad_norm": 811.0719604492188, "learning_rate": 9.822326253124436e-06, "loss": 38.1161, "step": 88300 }, { "epoch": 0.17839178723077606, "grad_norm": 29.430376052856445, "learning_rate": 9.82223401477943e-06, "loss": 20.5362, "step": 88310 }, { "epoch": 0.17841198786345988, "grad_norm": 132.15870666503906, "learning_rate": 9.822141752931438e-06, "loss": 23.0963, "step": 88320 }, { "epoch": 0.1784321884961437, "grad_norm": 281.7650146484375, "learning_rate": 9.822049467580912e-06, "loss": 18.371, "step": 88330 }, { "epoch": 0.17845238912882752, "grad_norm": 515.8067626953125, "learning_rate": 9.821957158728302e-06, "loss": 29.2727, "step": 88340 }, { "epoch": 0.17847258976151134, "grad_norm": 73.66029357910156, "learning_rate": 9.821864826374057e-06, "loss": 19.8096, "step": 88350 }, { "epoch": 0.17849279039419513, "grad_norm": 392.0162658691406, "learning_rate": 9.82177247051863e-06, "loss": 13.7095, "step": 88360 }, { "epoch": 0.17851299102687895, "grad_norm": 426.9225158691406, "learning_rate": 9.821680091162466e-06, "loss": 33.2019, "step": 88370 }, { "epoch": 0.17853319165956277, "grad_norm": 237.74159240722656, "learning_rate": 9.821587688306017e-06, "loss": 17.7858, "step": 88380 }, { "epoch": 0.1785533922922466, "grad_norm": 175.9855194091797, "learning_rate": 9.821495261949739e-06, "loss": 21.8052, "step": 88390 }, { "epoch": 0.1785735929249304, "grad_norm": 336.14727783203125, "learning_rate": 9.821402812094074e-06, "loss": 12.5534, "step": 88400 }, { "epoch": 0.17859379355761423, "grad_norm": 279.4413757324219, "learning_rate": 9.821310338739478e-06, "loss": 20.7332, "step": 88410 }, { "epoch": 0.17861399419029805, "grad_norm": 447.8175048828125, "learning_rate": 9.821217841886399e-06, "loss": 33.6549, "step": 88420 }, { "epoch": 0.17863419482298185, "grad_norm": 604.95947265625, "learning_rate": 9.82112532153529e-06, "loss": 51.1137, "step": 88430 }, { "epoch": 0.17865439545566567, "grad_norm": 280.2557067871094, "learning_rate": 9.821032777686601e-06, "loss": 27.0212, "step": 88440 }, { "epoch": 0.17867459608834949, "grad_norm": 81.83779907226562, "learning_rate": 9.820940210340784e-06, "loss": 21.6027, "step": 88450 }, { "epoch": 0.1786947967210333, "grad_norm": 137.42953491210938, "learning_rate": 9.820847619498288e-06, "loss": 16.0693, "step": 88460 }, { "epoch": 0.17871499735371713, "grad_norm": 295.6145324707031, "learning_rate": 9.820755005159565e-06, "loss": 24.0464, "step": 88470 }, { "epoch": 0.17873519798640095, "grad_norm": 552.690673828125, "learning_rate": 9.820662367325067e-06, "loss": 16.8595, "step": 88480 }, { "epoch": 0.17875539861908474, "grad_norm": 148.03260803222656, "learning_rate": 9.820569705995244e-06, "loss": 19.3173, "step": 88490 }, { "epoch": 0.17877559925176856, "grad_norm": 917.1246948242188, "learning_rate": 9.82047702117055e-06, "loss": 23.3397, "step": 88500 }, { "epoch": 0.17879579988445238, "grad_norm": 35.34273147583008, "learning_rate": 9.820384312851437e-06, "loss": 16.6297, "step": 88510 }, { "epoch": 0.1788160005171362, "grad_norm": 190.709228515625, "learning_rate": 9.820291581038354e-06, "loss": 35.9748, "step": 88520 }, { "epoch": 0.17883620114982002, "grad_norm": 269.6910705566406, "learning_rate": 9.820198825731757e-06, "loss": 38.2183, "step": 88530 }, { "epoch": 0.17885640178250384, "grad_norm": 505.0124206542969, "learning_rate": 9.820106046932092e-06, "loss": 52.1391, "step": 88540 }, { "epoch": 0.17887660241518763, "grad_norm": 487.30389404296875, "learning_rate": 9.820013244639817e-06, "loss": 24.1674, "step": 88550 }, { "epoch": 0.17889680304787145, "grad_norm": 395.43212890625, "learning_rate": 9.81992041885538e-06, "loss": 25.883, "step": 88560 }, { "epoch": 0.17891700368055527, "grad_norm": 157.04296875, "learning_rate": 9.819827569579237e-06, "loss": 22.6705, "step": 88570 }, { "epoch": 0.1789372043132391, "grad_norm": 175.14764404296875, "learning_rate": 9.819734696811839e-06, "loss": 14.3984, "step": 88580 }, { "epoch": 0.1789574049459229, "grad_norm": 494.252685546875, "learning_rate": 9.81964180055364e-06, "loss": 25.6814, "step": 88590 }, { "epoch": 0.17897760557860673, "grad_norm": 390.87335205078125, "learning_rate": 9.819548880805087e-06, "loss": 30.5818, "step": 88600 }, { "epoch": 0.17899780621129055, "grad_norm": 374.8076477050781, "learning_rate": 9.819455937566642e-06, "loss": 24.5028, "step": 88610 }, { "epoch": 0.17901800684397434, "grad_norm": 159.79075622558594, "learning_rate": 9.819362970838751e-06, "loss": 21.5444, "step": 88620 }, { "epoch": 0.17903820747665816, "grad_norm": 537.8025512695312, "learning_rate": 9.819269980621869e-06, "loss": 18.6094, "step": 88630 }, { "epoch": 0.17905840810934198, "grad_norm": 283.3327331542969, "learning_rate": 9.819176966916451e-06, "loss": 24.9194, "step": 88640 }, { "epoch": 0.1790786087420258, "grad_norm": 748.4666748046875, "learning_rate": 9.819083929722947e-06, "loss": 33.7638, "step": 88650 }, { "epoch": 0.17909880937470962, "grad_norm": 132.85516357421875, "learning_rate": 9.818990869041816e-06, "loss": 18.0207, "step": 88660 }, { "epoch": 0.17911901000739344, "grad_norm": 0.0, "learning_rate": 9.818897784873504e-06, "loss": 23.2612, "step": 88670 }, { "epoch": 0.17913921064007723, "grad_norm": 255.06044006347656, "learning_rate": 9.818804677218472e-06, "loss": 26.4935, "step": 88680 }, { "epoch": 0.17915941127276105, "grad_norm": 172.03802490234375, "learning_rate": 9.818711546077169e-06, "loss": 15.9128, "step": 88690 }, { "epoch": 0.17917961190544487, "grad_norm": 112.43183135986328, "learning_rate": 9.81861839145005e-06, "loss": 39.5096, "step": 88700 }, { "epoch": 0.1791998125381287, "grad_norm": 407.60894775390625, "learning_rate": 9.818525213337568e-06, "loss": 37.678, "step": 88710 }, { "epoch": 0.17922001317081251, "grad_norm": 40.86723327636719, "learning_rate": 9.818432011740181e-06, "loss": 10.7925, "step": 88720 }, { "epoch": 0.17924021380349633, "grad_norm": 232.61131286621094, "learning_rate": 9.81833878665834e-06, "loss": 17.489, "step": 88730 }, { "epoch": 0.17926041443618015, "grad_norm": 346.08209228515625, "learning_rate": 9.8182455380925e-06, "loss": 21.7934, "step": 88740 }, { "epoch": 0.17928061506886395, "grad_norm": 923.2282104492188, "learning_rate": 9.818152266043115e-06, "loss": 38.2958, "step": 88750 }, { "epoch": 0.17930081570154777, "grad_norm": 259.6517028808594, "learning_rate": 9.818058970510642e-06, "loss": 16.7239, "step": 88760 }, { "epoch": 0.1793210163342316, "grad_norm": 106.92383575439453, "learning_rate": 9.817965651495533e-06, "loss": 22.87, "step": 88770 }, { "epoch": 0.1793412169669154, "grad_norm": 333.0312805175781, "learning_rate": 9.817872308998242e-06, "loss": 15.6598, "step": 88780 }, { "epoch": 0.17936141759959923, "grad_norm": 497.65924072265625, "learning_rate": 9.817778943019228e-06, "loss": 13.9234, "step": 88790 }, { "epoch": 0.17938161823228305, "grad_norm": 521.1090087890625, "learning_rate": 9.817685553558945e-06, "loss": 26.3945, "step": 88800 }, { "epoch": 0.17940181886496684, "grad_norm": 93.6242446899414, "learning_rate": 9.817592140617844e-06, "loss": 34.3224, "step": 88810 }, { "epoch": 0.17942201949765066, "grad_norm": 260.9701232910156, "learning_rate": 9.817498704196384e-06, "loss": 25.167, "step": 88820 }, { "epoch": 0.17944222013033448, "grad_norm": 506.6679382324219, "learning_rate": 9.81740524429502e-06, "loss": 37.593, "step": 88830 }, { "epoch": 0.1794624207630183, "grad_norm": 541.3052978515625, "learning_rate": 9.817311760914206e-06, "loss": 29.344, "step": 88840 }, { "epoch": 0.17948262139570212, "grad_norm": 284.0600280761719, "learning_rate": 9.8172182540544e-06, "loss": 26.7408, "step": 88850 }, { "epoch": 0.17950282202838594, "grad_norm": 435.5898742675781, "learning_rate": 9.817124723716057e-06, "loss": 16.8702, "step": 88860 }, { "epoch": 0.17952302266106973, "grad_norm": 543.1546020507812, "learning_rate": 9.817031169899631e-06, "loss": 24.5535, "step": 88870 }, { "epoch": 0.17954322329375355, "grad_norm": 588.6611328125, "learning_rate": 9.81693759260558e-06, "loss": 32.2188, "step": 88880 }, { "epoch": 0.17956342392643737, "grad_norm": 0.0, "learning_rate": 9.81684399183436e-06, "loss": 19.8466, "step": 88890 }, { "epoch": 0.1795836245591212, "grad_norm": 194.99050903320312, "learning_rate": 9.816750367586424e-06, "loss": 25.7784, "step": 88900 }, { "epoch": 0.179603825191805, "grad_norm": 225.34027099609375, "learning_rate": 9.816656719862234e-06, "loss": 22.3907, "step": 88910 }, { "epoch": 0.17962402582448883, "grad_norm": 226.8335723876953, "learning_rate": 9.816563048662242e-06, "loss": 16.6792, "step": 88920 }, { "epoch": 0.17964422645717265, "grad_norm": 541.466552734375, "learning_rate": 9.816469353986905e-06, "loss": 21.5861, "step": 88930 }, { "epoch": 0.17966442708985644, "grad_norm": 196.82374572753906, "learning_rate": 9.816375635836683e-06, "loss": 25.1037, "step": 88940 }, { "epoch": 0.17968462772254026, "grad_norm": 224.67971801757812, "learning_rate": 9.816281894212028e-06, "loss": 23.1942, "step": 88950 }, { "epoch": 0.17970482835522408, "grad_norm": 408.5575866699219, "learning_rate": 9.8161881291134e-06, "loss": 18.2696, "step": 88960 }, { "epoch": 0.1797250289879079, "grad_norm": 304.554931640625, "learning_rate": 9.816094340541256e-06, "loss": 26.3656, "step": 88970 }, { "epoch": 0.17974522962059172, "grad_norm": 139.71128845214844, "learning_rate": 9.81600052849605e-06, "loss": 29.8753, "step": 88980 }, { "epoch": 0.17976543025327554, "grad_norm": 260.28497314453125, "learning_rate": 9.815906692978244e-06, "loss": 19.0647, "step": 88990 }, { "epoch": 0.17978563088595934, "grad_norm": 166.05047607421875, "learning_rate": 9.815812833988292e-06, "loss": 21.935, "step": 89000 }, { "epoch": 0.17980583151864316, "grad_norm": 221.55885314941406, "learning_rate": 9.815718951526651e-06, "loss": 16.3071, "step": 89010 }, { "epoch": 0.17982603215132698, "grad_norm": 368.6068420410156, "learning_rate": 9.815625045593783e-06, "loss": 35.3579, "step": 89020 }, { "epoch": 0.1798462327840108, "grad_norm": 164.25991821289062, "learning_rate": 9.81553111619014e-06, "loss": 37.572, "step": 89030 }, { "epoch": 0.17986643341669462, "grad_norm": 545.6224975585938, "learning_rate": 9.815437163316182e-06, "loss": 25.6647, "step": 89040 }, { "epoch": 0.17988663404937844, "grad_norm": 19.44648551940918, "learning_rate": 9.815343186972369e-06, "loss": 16.0453, "step": 89050 }, { "epoch": 0.17990683468206226, "grad_norm": 199.39236450195312, "learning_rate": 9.815249187159158e-06, "loss": 17.9481, "step": 89060 }, { "epoch": 0.17992703531474605, "grad_norm": 351.96795654296875, "learning_rate": 9.815155163877003e-06, "loss": 38.7047, "step": 89070 }, { "epoch": 0.17994723594742987, "grad_norm": 313.02191162109375, "learning_rate": 9.81506111712637e-06, "loss": 16.1076, "step": 89080 }, { "epoch": 0.1799674365801137, "grad_norm": 78.45370483398438, "learning_rate": 9.81496704690771e-06, "loss": 12.5636, "step": 89090 }, { "epoch": 0.1799876372127975, "grad_norm": 364.7314453125, "learning_rate": 9.814872953221487e-06, "loss": 14.563, "step": 89100 }, { "epoch": 0.18000783784548133, "grad_norm": 178.97434997558594, "learning_rate": 9.814778836068154e-06, "loss": 19.7114, "step": 89110 }, { "epoch": 0.18002803847816515, "grad_norm": 176.7615966796875, "learning_rate": 9.814684695448176e-06, "loss": 21.1311, "step": 89120 }, { "epoch": 0.18004823911084894, "grad_norm": 0.0, "learning_rate": 9.814590531362006e-06, "loss": 18.1872, "step": 89130 }, { "epoch": 0.18006843974353276, "grad_norm": 500.0653076171875, "learning_rate": 9.814496343810109e-06, "loss": 27.7393, "step": 89140 }, { "epoch": 0.18008864037621658, "grad_norm": 516.8353271484375, "learning_rate": 9.814402132792939e-06, "loss": 19.235, "step": 89150 }, { "epoch": 0.1801088410089004, "grad_norm": 298.10931396484375, "learning_rate": 9.814307898310957e-06, "loss": 46.7951, "step": 89160 }, { "epoch": 0.18012904164158422, "grad_norm": 83.99188232421875, "learning_rate": 9.814213640364623e-06, "loss": 24.2066, "step": 89170 }, { "epoch": 0.18014924227426804, "grad_norm": 180.19522094726562, "learning_rate": 9.814119358954394e-06, "loss": 19.9833, "step": 89180 }, { "epoch": 0.18016944290695183, "grad_norm": 151.4966278076172, "learning_rate": 9.81402505408073e-06, "loss": 16.6468, "step": 89190 }, { "epoch": 0.18018964353963565, "grad_norm": 156.45269775390625, "learning_rate": 9.813930725744095e-06, "loss": 16.1555, "step": 89200 }, { "epoch": 0.18020984417231947, "grad_norm": 238.8367462158203, "learning_rate": 9.813836373944945e-06, "loss": 25.4537, "step": 89210 }, { "epoch": 0.1802300448050033, "grad_norm": 247.60960388183594, "learning_rate": 9.813741998683738e-06, "loss": 28.6155, "step": 89220 }, { "epoch": 0.1802502454376871, "grad_norm": 37.78065872192383, "learning_rate": 9.813647599960938e-06, "loss": 41.7229, "step": 89230 }, { "epoch": 0.18027044607037093, "grad_norm": 723.7213134765625, "learning_rate": 9.813553177777005e-06, "loss": 24.869, "step": 89240 }, { "epoch": 0.18029064670305475, "grad_norm": 877.6924438476562, "learning_rate": 9.813458732132395e-06, "loss": 33.4757, "step": 89250 }, { "epoch": 0.18031084733573854, "grad_norm": 87.00105285644531, "learning_rate": 9.813364263027572e-06, "loss": 35.9405, "step": 89260 }, { "epoch": 0.18033104796842236, "grad_norm": 0.0, "learning_rate": 9.813269770462995e-06, "loss": 28.4922, "step": 89270 }, { "epoch": 0.18035124860110618, "grad_norm": 235.01284790039062, "learning_rate": 9.813175254439125e-06, "loss": 17.6382, "step": 89280 }, { "epoch": 0.18037144923379, "grad_norm": 299.8266296386719, "learning_rate": 9.813080714956422e-06, "loss": 14.1315, "step": 89290 }, { "epoch": 0.18039164986647382, "grad_norm": 414.4109802246094, "learning_rate": 9.812986152015349e-06, "loss": 21.2475, "step": 89300 }, { "epoch": 0.18041185049915764, "grad_norm": 152.4951629638672, "learning_rate": 9.812891565616363e-06, "loss": 33.1424, "step": 89310 }, { "epoch": 0.18043205113184144, "grad_norm": 244.38819885253906, "learning_rate": 9.812796955759929e-06, "loss": 32.5858, "step": 89320 }, { "epoch": 0.18045225176452526, "grad_norm": 297.23797607421875, "learning_rate": 9.812702322446506e-06, "loss": 18.2187, "step": 89330 }, { "epoch": 0.18047245239720908, "grad_norm": 331.11163330078125, "learning_rate": 9.812607665676555e-06, "loss": 10.4179, "step": 89340 }, { "epoch": 0.1804926530298929, "grad_norm": 155.2808380126953, "learning_rate": 9.812512985450539e-06, "loss": 19.2281, "step": 89350 }, { "epoch": 0.18051285366257672, "grad_norm": 83.92605590820312, "learning_rate": 9.812418281768919e-06, "loss": 27.2519, "step": 89360 }, { "epoch": 0.18053305429526054, "grad_norm": 290.2738342285156, "learning_rate": 9.812323554632153e-06, "loss": 21.5722, "step": 89370 }, { "epoch": 0.18055325492794436, "grad_norm": 664.5491943359375, "learning_rate": 9.812228804040708e-06, "loss": 31.7113, "step": 89380 }, { "epoch": 0.18057345556062815, "grad_norm": 271.4676818847656, "learning_rate": 9.812134029995043e-06, "loss": 19.7243, "step": 89390 }, { "epoch": 0.18059365619331197, "grad_norm": 253.90518188476562, "learning_rate": 9.81203923249562e-06, "loss": 30.6821, "step": 89400 }, { "epoch": 0.1806138568259958, "grad_norm": 411.1011962890625, "learning_rate": 9.811944411542903e-06, "loss": 24.9222, "step": 89410 }, { "epoch": 0.1806340574586796, "grad_norm": 313.4715270996094, "learning_rate": 9.811849567137351e-06, "loss": 24.4792, "step": 89420 }, { "epoch": 0.18065425809136343, "grad_norm": 172.39218139648438, "learning_rate": 9.811754699279428e-06, "loss": 36.4926, "step": 89430 }, { "epoch": 0.18067445872404725, "grad_norm": 206.0220489501953, "learning_rate": 9.811659807969596e-06, "loss": 27.978, "step": 89440 }, { "epoch": 0.18069465935673104, "grad_norm": 246.46450805664062, "learning_rate": 9.811564893208317e-06, "loss": 28.763, "step": 89450 }, { "epoch": 0.18071485998941486, "grad_norm": 196.3307647705078, "learning_rate": 9.811469954996056e-06, "loss": 14.6906, "step": 89460 }, { "epoch": 0.18073506062209868, "grad_norm": 33.52510070800781, "learning_rate": 9.811374993333274e-06, "loss": 21.4156, "step": 89470 }, { "epoch": 0.1807552612547825, "grad_norm": 205.0218505859375, "learning_rate": 9.811280008220432e-06, "loss": 33.0377, "step": 89480 }, { "epoch": 0.18077546188746632, "grad_norm": 305.7770690917969, "learning_rate": 9.811184999657996e-06, "loss": 16.314, "step": 89490 }, { "epoch": 0.18079566252015014, "grad_norm": 409.166259765625, "learning_rate": 9.811089967646427e-06, "loss": 21.063, "step": 89500 }, { "epoch": 0.18081586315283393, "grad_norm": 534.9361572265625, "learning_rate": 9.81099491218619e-06, "loss": 20.8204, "step": 89510 }, { "epoch": 0.18083606378551775, "grad_norm": 132.49903869628906, "learning_rate": 9.810899833277747e-06, "loss": 29.1078, "step": 89520 }, { "epoch": 0.18085626441820157, "grad_norm": 190.5088348388672, "learning_rate": 9.810804730921561e-06, "loss": 24.4246, "step": 89530 }, { "epoch": 0.1808764650508854, "grad_norm": 339.5248107910156, "learning_rate": 9.810709605118098e-06, "loss": 27.4584, "step": 89540 }, { "epoch": 0.1808966656835692, "grad_norm": 161.75758361816406, "learning_rate": 9.810614455867818e-06, "loss": 14.8899, "step": 89550 }, { "epoch": 0.18091686631625303, "grad_norm": 529.0466918945312, "learning_rate": 9.810519283171189e-06, "loss": 14.1074, "step": 89560 }, { "epoch": 0.18093706694893685, "grad_norm": 262.55224609375, "learning_rate": 9.810424087028669e-06, "loss": 20.3928, "step": 89570 }, { "epoch": 0.18095726758162065, "grad_norm": 364.3786315917969, "learning_rate": 9.810328867440729e-06, "loss": 30.1708, "step": 89580 }, { "epoch": 0.18097746821430447, "grad_norm": 167.35716247558594, "learning_rate": 9.810233624407827e-06, "loss": 34.9857, "step": 89590 }, { "epoch": 0.18099766884698829, "grad_norm": 368.57550048828125, "learning_rate": 9.81013835793043e-06, "loss": 23.159, "step": 89600 }, { "epoch": 0.1810178694796721, "grad_norm": 376.44647216796875, "learning_rate": 9.810043068009002e-06, "loss": 23.05, "step": 89610 }, { "epoch": 0.18103807011235593, "grad_norm": 268.0745849609375, "learning_rate": 9.809947754644009e-06, "loss": 18.5461, "step": 89620 }, { "epoch": 0.18105827074503975, "grad_norm": 277.0478210449219, "learning_rate": 9.809852417835913e-06, "loss": 18.6318, "step": 89630 }, { "epoch": 0.18107847137772354, "grad_norm": 265.89459228515625, "learning_rate": 9.80975705758518e-06, "loss": 13.8636, "step": 89640 }, { "epoch": 0.18109867201040736, "grad_norm": 185.8348846435547, "learning_rate": 9.809661673892274e-06, "loss": 35.6141, "step": 89650 }, { "epoch": 0.18111887264309118, "grad_norm": 94.65213012695312, "learning_rate": 9.80956626675766e-06, "loss": 14.138, "step": 89660 }, { "epoch": 0.181139073275775, "grad_norm": 318.062255859375, "learning_rate": 9.809470836181804e-06, "loss": 17.4031, "step": 89670 }, { "epoch": 0.18115927390845882, "grad_norm": 306.4349060058594, "learning_rate": 9.80937538216517e-06, "loss": 20.0934, "step": 89680 }, { "epoch": 0.18117947454114264, "grad_norm": 295.681640625, "learning_rate": 9.809279904708224e-06, "loss": 28.9379, "step": 89690 }, { "epoch": 0.18119967517382646, "grad_norm": 736.1948852539062, "learning_rate": 9.809184403811432e-06, "loss": 20.3253, "step": 89700 }, { "epoch": 0.18121987580651025, "grad_norm": 333.8109130859375, "learning_rate": 9.809088879475257e-06, "loss": 27.889, "step": 89710 }, { "epoch": 0.18124007643919407, "grad_norm": 328.3408203125, "learning_rate": 9.808993331700167e-06, "loss": 20.7144, "step": 89720 }, { "epoch": 0.1812602770718779, "grad_norm": 205.58743286132812, "learning_rate": 9.808897760486626e-06, "loss": 25.5582, "step": 89730 }, { "epoch": 0.1812804777045617, "grad_norm": 144.90841674804688, "learning_rate": 9.808802165835101e-06, "loss": 21.2842, "step": 89740 }, { "epoch": 0.18130067833724553, "grad_norm": 322.42254638671875, "learning_rate": 9.808706547746057e-06, "loss": 19.0585, "step": 89750 }, { "epoch": 0.18132087896992935, "grad_norm": 368.81854248046875, "learning_rate": 9.808610906219963e-06, "loss": 20.119, "step": 89760 }, { "epoch": 0.18134107960261314, "grad_norm": 390.12750244140625, "learning_rate": 9.80851524125728e-06, "loss": 30.5359, "step": 89770 }, { "epoch": 0.18136128023529696, "grad_norm": 348.4322509765625, "learning_rate": 9.808419552858477e-06, "loss": 20.3969, "step": 89780 }, { "epoch": 0.18138148086798078, "grad_norm": 240.08444213867188, "learning_rate": 9.808323841024021e-06, "loss": 35.2249, "step": 89790 }, { "epoch": 0.1814016815006646, "grad_norm": 422.1143798828125, "learning_rate": 9.808228105754378e-06, "loss": 18.2356, "step": 89800 }, { "epoch": 0.18142188213334842, "grad_norm": 117.8640365600586, "learning_rate": 9.808132347050013e-06, "loss": 10.2526, "step": 89810 }, { "epoch": 0.18144208276603224, "grad_norm": 476.56640625, "learning_rate": 9.808036564911396e-06, "loss": 18.1843, "step": 89820 }, { "epoch": 0.18146228339871603, "grad_norm": 121.27227020263672, "learning_rate": 9.80794075933899e-06, "loss": 18.8648, "step": 89830 }, { "epoch": 0.18148248403139985, "grad_norm": 644.5889282226562, "learning_rate": 9.807844930333266e-06, "loss": 19.8763, "step": 89840 }, { "epoch": 0.18150268466408367, "grad_norm": 285.93524169921875, "learning_rate": 9.807749077894686e-06, "loss": 20.2361, "step": 89850 }, { "epoch": 0.1815228852967675, "grad_norm": 237.0696563720703, "learning_rate": 9.807653202023723e-06, "loss": 21.1433, "step": 89860 }, { "epoch": 0.18154308592945131, "grad_norm": 274.2760314941406, "learning_rate": 9.80755730272084e-06, "loss": 30.8541, "step": 89870 }, { "epoch": 0.18156328656213513, "grad_norm": 199.8031463623047, "learning_rate": 9.807461379986506e-06, "loss": 13.7245, "step": 89880 }, { "epoch": 0.18158348719481895, "grad_norm": 190.23023986816406, "learning_rate": 9.807365433821188e-06, "loss": 10.8357, "step": 89890 }, { "epoch": 0.18160368782750275, "grad_norm": 286.6399230957031, "learning_rate": 9.807269464225355e-06, "loss": 13.278, "step": 89900 }, { "epoch": 0.18162388846018657, "grad_norm": 544.5938110351562, "learning_rate": 9.807173471199474e-06, "loss": 28.3184, "step": 89910 }, { "epoch": 0.1816440890928704, "grad_norm": 328.9489440917969, "learning_rate": 9.80707745474401e-06, "loss": 17.3515, "step": 89920 }, { "epoch": 0.1816642897255542, "grad_norm": 319.2054443359375, "learning_rate": 9.806981414859435e-06, "loss": 17.7308, "step": 89930 }, { "epoch": 0.18168449035823803, "grad_norm": 465.5575256347656, "learning_rate": 9.806885351546215e-06, "loss": 24.0338, "step": 89940 }, { "epoch": 0.18170469099092185, "grad_norm": 67.2480697631836, "learning_rate": 9.806789264804821e-06, "loss": 17.7284, "step": 89950 }, { "epoch": 0.18172489162360564, "grad_norm": 371.47216796875, "learning_rate": 9.806693154635719e-06, "loss": 33.2382, "step": 89960 }, { "epoch": 0.18174509225628946, "grad_norm": 229.34434509277344, "learning_rate": 9.806597021039374e-06, "loss": 17.3654, "step": 89970 }, { "epoch": 0.18176529288897328, "grad_norm": 136.34521484375, "learning_rate": 9.806500864016261e-06, "loss": 19.6998, "step": 89980 }, { "epoch": 0.1817854935216571, "grad_norm": 425.6612243652344, "learning_rate": 9.806404683566845e-06, "loss": 21.3713, "step": 89990 }, { "epoch": 0.18180569415434092, "grad_norm": 368.97113037109375, "learning_rate": 9.806308479691595e-06, "loss": 28.6899, "step": 90000 }, { "epoch": 0.18182589478702474, "grad_norm": 107.99005889892578, "learning_rate": 9.80621225239098e-06, "loss": 22.4536, "step": 90010 }, { "epoch": 0.18184609541970856, "grad_norm": 432.3868103027344, "learning_rate": 9.806116001665471e-06, "loss": 19.8999, "step": 90020 }, { "epoch": 0.18186629605239235, "grad_norm": 247.11830139160156, "learning_rate": 9.806019727515534e-06, "loss": 29.867, "step": 90030 }, { "epoch": 0.18188649668507617, "grad_norm": 818.8187255859375, "learning_rate": 9.805923429941642e-06, "loss": 30.0503, "step": 90040 }, { "epoch": 0.18190669731776, "grad_norm": 336.6963806152344, "learning_rate": 9.80582710894426e-06, "loss": 34.7439, "step": 90050 }, { "epoch": 0.1819268979504438, "grad_norm": 149.7480926513672, "learning_rate": 9.805730764523861e-06, "loss": 20.2646, "step": 90060 }, { "epoch": 0.18194709858312763, "grad_norm": 312.17694091796875, "learning_rate": 9.805634396680912e-06, "loss": 16.8644, "step": 90070 }, { "epoch": 0.18196729921581145, "grad_norm": 328.3172302246094, "learning_rate": 9.805538005415885e-06, "loss": 20.2186, "step": 90080 }, { "epoch": 0.18198749984849524, "grad_norm": 420.58184814453125, "learning_rate": 9.805441590729246e-06, "loss": 28.8704, "step": 90090 }, { "epoch": 0.18200770048117906, "grad_norm": 523.5330810546875, "learning_rate": 9.80534515262147e-06, "loss": 26.8603, "step": 90100 }, { "epoch": 0.18202790111386288, "grad_norm": 58.76906204223633, "learning_rate": 9.805248691093023e-06, "loss": 11.2044, "step": 90110 }, { "epoch": 0.1820481017465467, "grad_norm": 431.5325012207031, "learning_rate": 9.805152206144378e-06, "loss": 38.4186, "step": 90120 }, { "epoch": 0.18206830237923052, "grad_norm": 307.7421569824219, "learning_rate": 9.805055697776003e-06, "loss": 35.2943, "step": 90130 }, { "epoch": 0.18208850301191434, "grad_norm": 220.8982391357422, "learning_rate": 9.80495916598837e-06, "loss": 30.7191, "step": 90140 }, { "epoch": 0.18210870364459814, "grad_norm": 403.3353271484375, "learning_rate": 9.804862610781949e-06, "loss": 19.8359, "step": 90150 }, { "epoch": 0.18212890427728196, "grad_norm": 206.422119140625, "learning_rate": 9.80476603215721e-06, "loss": 24.1402, "step": 90160 }, { "epoch": 0.18214910490996578, "grad_norm": 438.1856384277344, "learning_rate": 9.804669430114625e-06, "loss": 29.6056, "step": 90170 }, { "epoch": 0.1821693055426496, "grad_norm": 264.57232666015625, "learning_rate": 9.804572804654662e-06, "loss": 16.6938, "step": 90180 }, { "epoch": 0.18218950617533342, "grad_norm": 535.250732421875, "learning_rate": 9.804476155777796e-06, "loss": 34.2558, "step": 90190 }, { "epoch": 0.18220970680801724, "grad_norm": 207.51319885253906, "learning_rate": 9.804379483484493e-06, "loss": 17.0246, "step": 90200 }, { "epoch": 0.18222990744070106, "grad_norm": 440.0328369140625, "learning_rate": 9.80428278777523e-06, "loss": 25.4496, "step": 90210 }, { "epoch": 0.18225010807338485, "grad_norm": 495.13568115234375, "learning_rate": 9.804186068650474e-06, "loss": 22.1629, "step": 90220 }, { "epoch": 0.18227030870606867, "grad_norm": 541.8395385742188, "learning_rate": 9.804089326110697e-06, "loss": 41.2208, "step": 90230 }, { "epoch": 0.1822905093387525, "grad_norm": 321.6528625488281, "learning_rate": 9.803992560156372e-06, "loss": 20.3038, "step": 90240 }, { "epoch": 0.1823107099714363, "grad_norm": 489.01446533203125, "learning_rate": 9.803895770787972e-06, "loss": 22.5001, "step": 90250 }, { "epoch": 0.18233091060412013, "grad_norm": 214.21728515625, "learning_rate": 9.803798958005965e-06, "loss": 14.9408, "step": 90260 }, { "epoch": 0.18235111123680395, "grad_norm": 271.10479736328125, "learning_rate": 9.803702121810823e-06, "loss": 25.8385, "step": 90270 }, { "epoch": 0.18237131186948774, "grad_norm": 79.72005462646484, "learning_rate": 9.803605262203022e-06, "loss": 12.2337, "step": 90280 }, { "epoch": 0.18239151250217156, "grad_norm": 222.9102325439453, "learning_rate": 9.80350837918303e-06, "loss": 11.8653, "step": 90290 }, { "epoch": 0.18241171313485538, "grad_norm": 103.3753890991211, "learning_rate": 9.803411472751321e-06, "loss": 23.6717, "step": 90300 }, { "epoch": 0.1824319137675392, "grad_norm": 530.2542114257812, "learning_rate": 9.803314542908368e-06, "loss": 19.7868, "step": 90310 }, { "epoch": 0.18245211440022302, "grad_norm": 303.6331481933594, "learning_rate": 9.803217589654642e-06, "loss": 37.0409, "step": 90320 }, { "epoch": 0.18247231503290684, "grad_norm": 221.1060028076172, "learning_rate": 9.803120612990616e-06, "loss": 18.2828, "step": 90330 }, { "epoch": 0.18249251566559066, "grad_norm": 188.35067749023438, "learning_rate": 9.803023612916763e-06, "loss": 15.6838, "step": 90340 }, { "epoch": 0.18251271629827445, "grad_norm": 350.8931884765625, "learning_rate": 9.802926589433553e-06, "loss": 21.3968, "step": 90350 }, { "epoch": 0.18253291693095827, "grad_norm": 364.2025146484375, "learning_rate": 9.802829542541463e-06, "loss": 21.0922, "step": 90360 }, { "epoch": 0.1825531175636421, "grad_norm": 191.167724609375, "learning_rate": 9.802732472240966e-06, "loss": 25.2859, "step": 90370 }, { "epoch": 0.1825733181963259, "grad_norm": 213.8785858154297, "learning_rate": 9.802635378532531e-06, "loss": 21.837, "step": 90380 }, { "epoch": 0.18259351882900973, "grad_norm": 206.26742553710938, "learning_rate": 9.802538261416635e-06, "loss": 53.389, "step": 90390 }, { "epoch": 0.18261371946169355, "grad_norm": 268.4217529296875, "learning_rate": 9.80244112089375e-06, "loss": 21.6778, "step": 90400 }, { "epoch": 0.18263392009437734, "grad_norm": 455.7522277832031, "learning_rate": 9.802343956964348e-06, "loss": 23.158, "step": 90410 }, { "epoch": 0.18265412072706116, "grad_norm": 430.4161376953125, "learning_rate": 9.802246769628906e-06, "loss": 23.107, "step": 90420 }, { "epoch": 0.18267432135974498, "grad_norm": 283.0844421386719, "learning_rate": 9.802149558887895e-06, "loss": 11.7509, "step": 90430 }, { "epoch": 0.1826945219924288, "grad_norm": 183.84896850585938, "learning_rate": 9.802052324741789e-06, "loss": 16.0784, "step": 90440 }, { "epoch": 0.18271472262511262, "grad_norm": 493.0817565917969, "learning_rate": 9.801955067191062e-06, "loss": 26.0139, "step": 90450 }, { "epoch": 0.18273492325779644, "grad_norm": 185.8978729248047, "learning_rate": 9.80185778623619e-06, "loss": 19.4684, "step": 90460 }, { "epoch": 0.18275512389048024, "grad_norm": 267.78900146484375, "learning_rate": 9.801760481877644e-06, "loss": 21.525, "step": 90470 }, { "epoch": 0.18277532452316406, "grad_norm": 523.8119506835938, "learning_rate": 9.8016631541159e-06, "loss": 18.989, "step": 90480 }, { "epoch": 0.18279552515584788, "grad_norm": 225.72286987304688, "learning_rate": 9.801565802951432e-06, "loss": 15.1261, "step": 90490 }, { "epoch": 0.1828157257885317, "grad_norm": 151.74826049804688, "learning_rate": 9.801468428384716e-06, "loss": 17.7139, "step": 90500 }, { "epoch": 0.18283592642121552, "grad_norm": 34.376441955566406, "learning_rate": 9.801371030416224e-06, "loss": 15.8384, "step": 90510 }, { "epoch": 0.18285612705389934, "grad_norm": 494.1421813964844, "learning_rate": 9.801273609046433e-06, "loss": 25.6854, "step": 90520 }, { "epoch": 0.18287632768658316, "grad_norm": 204.3738555908203, "learning_rate": 9.801176164275816e-06, "loss": 19.1849, "step": 90530 }, { "epoch": 0.18289652831926695, "grad_norm": 793.3532104492188, "learning_rate": 9.801078696104849e-06, "loss": 44.7251, "step": 90540 }, { "epoch": 0.18291672895195077, "grad_norm": 255.67288208007812, "learning_rate": 9.800981204534006e-06, "loss": 34.3957, "step": 90550 }, { "epoch": 0.1829369295846346, "grad_norm": 694.2034301757812, "learning_rate": 9.800883689563764e-06, "loss": 21.1567, "step": 90560 }, { "epoch": 0.1829571302173184, "grad_norm": 256.64776611328125, "learning_rate": 9.800786151194596e-06, "loss": 20.5658, "step": 90570 }, { "epoch": 0.18297733085000223, "grad_norm": 259.2400817871094, "learning_rate": 9.800688589426978e-06, "loss": 20.0763, "step": 90580 }, { "epoch": 0.18299753148268605, "grad_norm": 221.7714080810547, "learning_rate": 9.800591004261388e-06, "loss": 14.6221, "step": 90590 }, { "epoch": 0.18301773211536984, "grad_norm": 268.4925537109375, "learning_rate": 9.8004933956983e-06, "loss": 31.4021, "step": 90600 }, { "epoch": 0.18303793274805366, "grad_norm": 227.88052368164062, "learning_rate": 9.800395763738189e-06, "loss": 11.1439, "step": 90610 }, { "epoch": 0.18305813338073748, "grad_norm": 193.87892150878906, "learning_rate": 9.80029810838153e-06, "loss": 21.434, "step": 90620 }, { "epoch": 0.1830783340134213, "grad_norm": 328.88946533203125, "learning_rate": 9.8002004296288e-06, "loss": 15.3099, "step": 90630 }, { "epoch": 0.18309853464610512, "grad_norm": 235.24884033203125, "learning_rate": 9.800102727480476e-06, "loss": 17.6108, "step": 90640 }, { "epoch": 0.18311873527878894, "grad_norm": 220.6587371826172, "learning_rate": 9.800005001937034e-06, "loss": 16.967, "step": 90650 }, { "epoch": 0.18313893591147276, "grad_norm": 881.6375122070312, "learning_rate": 9.79990725299895e-06, "loss": 30.7735, "step": 90660 }, { "epoch": 0.18315913654415655, "grad_norm": 64.15321350097656, "learning_rate": 9.7998094806667e-06, "loss": 22.5834, "step": 90670 }, { "epoch": 0.18317933717684037, "grad_norm": 212.12612915039062, "learning_rate": 9.79971168494076e-06, "loss": 25.1256, "step": 90680 }, { "epoch": 0.1831995378095242, "grad_norm": 416.0219421386719, "learning_rate": 9.799613865821608e-06, "loss": 30.2963, "step": 90690 }, { "epoch": 0.183219738442208, "grad_norm": 194.556640625, "learning_rate": 9.799516023309719e-06, "loss": 26.4247, "step": 90700 }, { "epoch": 0.18323993907489183, "grad_norm": 204.8665771484375, "learning_rate": 9.799418157405571e-06, "loss": 32.3678, "step": 90710 }, { "epoch": 0.18326013970757565, "grad_norm": 1059.2159423828125, "learning_rate": 9.799320268109644e-06, "loss": 29.3281, "step": 90720 }, { "epoch": 0.18328034034025945, "grad_norm": 78.5074691772461, "learning_rate": 9.799222355422409e-06, "loss": 16.1517, "step": 90730 }, { "epoch": 0.18330054097294327, "grad_norm": 97.50385284423828, "learning_rate": 9.799124419344348e-06, "loss": 21.5929, "step": 90740 }, { "epoch": 0.18332074160562709, "grad_norm": 134.6094970703125, "learning_rate": 9.799026459875935e-06, "loss": 18.09, "step": 90750 }, { "epoch": 0.1833409422383109, "grad_norm": 237.9413299560547, "learning_rate": 9.798928477017651e-06, "loss": 23.9544, "step": 90760 }, { "epoch": 0.18336114287099473, "grad_norm": 87.28053283691406, "learning_rate": 9.79883047076997e-06, "loss": 24.1455, "step": 90770 }, { "epoch": 0.18338134350367855, "grad_norm": 422.739013671875, "learning_rate": 9.798732441133372e-06, "loss": 30.2733, "step": 90780 }, { "epoch": 0.18340154413636234, "grad_norm": 431.4217834472656, "learning_rate": 9.798634388108334e-06, "loss": 31.8996, "step": 90790 }, { "epoch": 0.18342174476904616, "grad_norm": 660.9007568359375, "learning_rate": 9.798536311695334e-06, "loss": 32.9462, "step": 90800 }, { "epoch": 0.18344194540172998, "grad_norm": 172.05870056152344, "learning_rate": 9.79843821189485e-06, "loss": 28.2498, "step": 90810 }, { "epoch": 0.1834621460344138, "grad_norm": 334.6192932128906, "learning_rate": 9.79834008870736e-06, "loss": 20.9908, "step": 90820 }, { "epoch": 0.18348234666709762, "grad_norm": 203.6584930419922, "learning_rate": 9.798241942133344e-06, "loss": 28.3082, "step": 90830 }, { "epoch": 0.18350254729978144, "grad_norm": 202.3249969482422, "learning_rate": 9.798143772173276e-06, "loss": 25.2366, "step": 90840 }, { "epoch": 0.18352274793246526, "grad_norm": 199.6689910888672, "learning_rate": 9.79804557882764e-06, "loss": 18.9866, "step": 90850 }, { "epoch": 0.18354294856514905, "grad_norm": 252.27464294433594, "learning_rate": 9.797947362096909e-06, "loss": 23.6128, "step": 90860 }, { "epoch": 0.18356314919783287, "grad_norm": 250.51498413085938, "learning_rate": 9.797849121981566e-06, "loss": 16.6518, "step": 90870 }, { "epoch": 0.1835833498305167, "grad_norm": 104.45295715332031, "learning_rate": 9.797750858482088e-06, "loss": 17.6892, "step": 90880 }, { "epoch": 0.1836035504632005, "grad_norm": 416.5125732421875, "learning_rate": 9.797652571598954e-06, "loss": 25.8792, "step": 90890 }, { "epoch": 0.18362375109588433, "grad_norm": 73.38689422607422, "learning_rate": 9.797554261332644e-06, "loss": 21.6498, "step": 90900 }, { "epoch": 0.18364395172856815, "grad_norm": 258.2563781738281, "learning_rate": 9.797455927683637e-06, "loss": 27.3358, "step": 90910 }, { "epoch": 0.18366415236125194, "grad_norm": 246.04855346679688, "learning_rate": 9.79735757065241e-06, "loss": 33.6978, "step": 90920 }, { "epoch": 0.18368435299393576, "grad_norm": 384.07635498046875, "learning_rate": 9.797259190239444e-06, "loss": 27.6343, "step": 90930 }, { "epoch": 0.18370455362661958, "grad_norm": 398.51708984375, "learning_rate": 9.797160786445218e-06, "loss": 21.04, "step": 90940 }, { "epoch": 0.1837247542593034, "grad_norm": 483.1817321777344, "learning_rate": 9.797062359270215e-06, "loss": 31.7678, "step": 90950 }, { "epoch": 0.18374495489198722, "grad_norm": 802.4188842773438, "learning_rate": 9.79696390871491e-06, "loss": 33.1617, "step": 90960 }, { "epoch": 0.18376515552467104, "grad_norm": 56.54692077636719, "learning_rate": 9.796865434779786e-06, "loss": 27.8253, "step": 90970 }, { "epoch": 0.18378535615735486, "grad_norm": 383.1007385253906, "learning_rate": 9.79676693746532e-06, "loss": 19.2899, "step": 90980 }, { "epoch": 0.18380555679003865, "grad_norm": 392.0801086425781, "learning_rate": 9.796668416771996e-06, "loss": 47.6075, "step": 90990 }, { "epoch": 0.18382575742272247, "grad_norm": 505.0868835449219, "learning_rate": 9.796569872700287e-06, "loss": 20.0266, "step": 91000 }, { "epoch": 0.1838459580554063, "grad_norm": 691.2631225585938, "learning_rate": 9.796471305250683e-06, "loss": 27.0098, "step": 91010 }, { "epoch": 0.18386615868809011, "grad_norm": 359.839111328125, "learning_rate": 9.79637271442366e-06, "loss": 19.8132, "step": 91020 }, { "epoch": 0.18388635932077393, "grad_norm": 139.86512756347656, "learning_rate": 9.796274100219693e-06, "loss": 18.8508, "step": 91030 }, { "epoch": 0.18390655995345775, "grad_norm": 104.32476043701172, "learning_rate": 9.796175462639273e-06, "loss": 20.1327, "step": 91040 }, { "epoch": 0.18392676058614155, "grad_norm": 413.5173645019531, "learning_rate": 9.796076801682873e-06, "loss": 19.0459, "step": 91050 }, { "epoch": 0.18394696121882537, "grad_norm": 582.4884643554688, "learning_rate": 9.795978117350976e-06, "loss": 28.6586, "step": 91060 }, { "epoch": 0.1839671618515092, "grad_norm": 297.2491455078125, "learning_rate": 9.795879409644064e-06, "loss": 26.5994, "step": 91070 }, { "epoch": 0.183987362484193, "grad_norm": 339.6156921386719, "learning_rate": 9.795780678562618e-06, "loss": 22.7162, "step": 91080 }, { "epoch": 0.18400756311687683, "grad_norm": 139.22975158691406, "learning_rate": 9.79568192410712e-06, "loss": 10.7981, "step": 91090 }, { "epoch": 0.18402776374956065, "grad_norm": 357.7681884765625, "learning_rate": 9.795583146278047e-06, "loss": 35.7915, "step": 91100 }, { "epoch": 0.18404796438224444, "grad_norm": 203.07501220703125, "learning_rate": 9.795484345075882e-06, "loss": 30.6753, "step": 91110 }, { "epoch": 0.18406816501492826, "grad_norm": 352.80810546875, "learning_rate": 9.795385520501113e-06, "loss": 31.4639, "step": 91120 }, { "epoch": 0.18408836564761208, "grad_norm": 297.5950622558594, "learning_rate": 9.795286672554214e-06, "loss": 18.9528, "step": 91130 }, { "epoch": 0.1841085662802959, "grad_norm": 214.2798309326172, "learning_rate": 9.795187801235668e-06, "loss": 29.4953, "step": 91140 }, { "epoch": 0.18412876691297972, "grad_norm": 417.59375, "learning_rate": 9.795088906545959e-06, "loss": 37.9874, "step": 91150 }, { "epoch": 0.18414896754566354, "grad_norm": 153.76675415039062, "learning_rate": 9.794989988485571e-06, "loss": 25.0945, "step": 91160 }, { "epoch": 0.18416916817834736, "grad_norm": 208.49266052246094, "learning_rate": 9.79489104705498e-06, "loss": 23.6561, "step": 91170 }, { "epoch": 0.18418936881103115, "grad_norm": 784.4653930664062, "learning_rate": 9.794792082254673e-06, "loss": 23.6042, "step": 91180 }, { "epoch": 0.18420956944371497, "grad_norm": 167.4105224609375, "learning_rate": 9.79469309408513e-06, "loss": 26.9369, "step": 91190 }, { "epoch": 0.1842297700763988, "grad_norm": 377.4635925292969, "learning_rate": 9.794594082546835e-06, "loss": 19.2767, "step": 91200 }, { "epoch": 0.1842499707090826, "grad_norm": 75.48277282714844, "learning_rate": 9.794495047640271e-06, "loss": 19.9254, "step": 91210 }, { "epoch": 0.18427017134176643, "grad_norm": 308.2733154296875, "learning_rate": 9.79439598936592e-06, "loss": 21.3518, "step": 91220 }, { "epoch": 0.18429037197445025, "grad_norm": 98.36454772949219, "learning_rate": 9.794296907724262e-06, "loss": 20.1614, "step": 91230 }, { "epoch": 0.18431057260713404, "grad_norm": 203.58319091796875, "learning_rate": 9.794197802715784e-06, "loss": 17.0827, "step": 91240 }, { "epoch": 0.18433077323981786, "grad_norm": 454.954833984375, "learning_rate": 9.794098674340966e-06, "loss": 31.5911, "step": 91250 }, { "epoch": 0.18435097387250168, "grad_norm": 329.8617858886719, "learning_rate": 9.793999522600293e-06, "loss": 41.7694, "step": 91260 }, { "epoch": 0.1843711745051855, "grad_norm": 243.45652770996094, "learning_rate": 9.793900347494248e-06, "loss": 27.0239, "step": 91270 }, { "epoch": 0.18439137513786932, "grad_norm": 358.07928466796875, "learning_rate": 9.793801149023315e-06, "loss": 29.0192, "step": 91280 }, { "epoch": 0.18441157577055314, "grad_norm": 394.2966003417969, "learning_rate": 9.793701927187975e-06, "loss": 23.9562, "step": 91290 }, { "epoch": 0.18443177640323694, "grad_norm": 111.50665283203125, "learning_rate": 9.793602681988714e-06, "loss": 22.6037, "step": 91300 }, { "epoch": 0.18445197703592076, "grad_norm": 10.694595336914062, "learning_rate": 9.793503413426016e-06, "loss": 26.552, "step": 91310 }, { "epoch": 0.18447217766860458, "grad_norm": 291.20263671875, "learning_rate": 9.793404121500362e-06, "loss": 27.9445, "step": 91320 }, { "epoch": 0.1844923783012884, "grad_norm": 292.54962158203125, "learning_rate": 9.79330480621224e-06, "loss": 24.1209, "step": 91330 }, { "epoch": 0.18451257893397222, "grad_norm": 182.7834014892578, "learning_rate": 9.793205467562131e-06, "loss": 26.8689, "step": 91340 }, { "epoch": 0.18453277956665604, "grad_norm": 207.84133911132812, "learning_rate": 9.793106105550518e-06, "loss": 12.1805, "step": 91350 }, { "epoch": 0.18455298019933986, "grad_norm": 741.1467895507812, "learning_rate": 9.793006720177887e-06, "loss": 24.6049, "step": 91360 }, { "epoch": 0.18457318083202365, "grad_norm": 200.22657775878906, "learning_rate": 9.792907311444724e-06, "loss": 24.2828, "step": 91370 }, { "epoch": 0.18459338146470747, "grad_norm": 318.26361083984375, "learning_rate": 9.792807879351513e-06, "loss": 35.9923, "step": 91380 }, { "epoch": 0.1846135820973913, "grad_norm": 150.55072021484375, "learning_rate": 9.792708423898735e-06, "loss": 13.4644, "step": 91390 }, { "epoch": 0.1846337827300751, "grad_norm": 364.8077087402344, "learning_rate": 9.79260894508688e-06, "loss": 21.4716, "step": 91400 }, { "epoch": 0.18465398336275893, "grad_norm": 591.1673583984375, "learning_rate": 9.79250944291643e-06, "loss": 23.0874, "step": 91410 }, { "epoch": 0.18467418399544275, "grad_norm": 416.2530822753906, "learning_rate": 9.792409917387869e-06, "loss": 17.7856, "step": 91420 }, { "epoch": 0.18469438462812654, "grad_norm": 253.9188690185547, "learning_rate": 9.792310368501684e-06, "loss": 12.5121, "step": 91430 }, { "epoch": 0.18471458526081036, "grad_norm": 514.592529296875, "learning_rate": 9.792210796258358e-06, "loss": 22.4407, "step": 91440 }, { "epoch": 0.18473478589349418, "grad_norm": 257.2930908203125, "learning_rate": 9.79211120065838e-06, "loss": 14.1409, "step": 91450 }, { "epoch": 0.184754986526178, "grad_norm": 302.0247802734375, "learning_rate": 9.792011581702234e-06, "loss": 24.009, "step": 91460 }, { "epoch": 0.18477518715886182, "grad_norm": 158.4606475830078, "learning_rate": 9.791911939390401e-06, "loss": 11.5797, "step": 91470 }, { "epoch": 0.18479538779154564, "grad_norm": 196.00820922851562, "learning_rate": 9.791812273723374e-06, "loss": 23.4315, "step": 91480 }, { "epoch": 0.18481558842422946, "grad_norm": 112.18689727783203, "learning_rate": 9.791712584701634e-06, "loss": 23.7941, "step": 91490 }, { "epoch": 0.18483578905691325, "grad_norm": 33.265220642089844, "learning_rate": 9.791612872325667e-06, "loss": 30.5944, "step": 91500 }, { "epoch": 0.18485598968959707, "grad_norm": 300.4789733886719, "learning_rate": 9.79151313659596e-06, "loss": 24.0576, "step": 91510 }, { "epoch": 0.1848761903222809, "grad_norm": 132.36769104003906, "learning_rate": 9.791413377513001e-06, "loss": 29.6082, "step": 91520 }, { "epoch": 0.1848963909549647, "grad_norm": 261.1658020019531, "learning_rate": 9.791313595077272e-06, "loss": 19.2469, "step": 91530 }, { "epoch": 0.18491659158764853, "grad_norm": 239.2753143310547, "learning_rate": 9.791213789289264e-06, "loss": 16.4799, "step": 91540 }, { "epoch": 0.18493679222033235, "grad_norm": 322.2201843261719, "learning_rate": 9.791113960149458e-06, "loss": 16.0324, "step": 91550 }, { "epoch": 0.18495699285301614, "grad_norm": 813.63134765625, "learning_rate": 9.791014107658348e-06, "loss": 35.9425, "step": 91560 }, { "epoch": 0.18497719348569996, "grad_norm": 333.999755859375, "learning_rate": 9.790914231816414e-06, "loss": 14.5045, "step": 91570 }, { "epoch": 0.18499739411838378, "grad_norm": 146.5482177734375, "learning_rate": 9.790814332624144e-06, "loss": 19.8326, "step": 91580 }, { "epoch": 0.1850175947510676, "grad_norm": 201.1363067626953, "learning_rate": 9.790714410082027e-06, "loss": 34.4231, "step": 91590 }, { "epoch": 0.18503779538375142, "grad_norm": 119.81622314453125, "learning_rate": 9.79061446419055e-06, "loss": 11.6963, "step": 91600 }, { "epoch": 0.18505799601643524, "grad_norm": 0.0, "learning_rate": 9.790514494950196e-06, "loss": 37.7837, "step": 91610 }, { "epoch": 0.18507819664911904, "grad_norm": 160.59439086914062, "learning_rate": 9.790414502361458e-06, "loss": 31.7303, "step": 91620 }, { "epoch": 0.18509839728180286, "grad_norm": 556.55859375, "learning_rate": 9.790314486424821e-06, "loss": 25.4992, "step": 91630 }, { "epoch": 0.18511859791448668, "grad_norm": 393.51043701171875, "learning_rate": 9.790214447140771e-06, "loss": 37.1771, "step": 91640 }, { "epoch": 0.1851387985471705, "grad_norm": 235.26254272460938, "learning_rate": 9.790114384509796e-06, "loss": 23.0472, "step": 91650 }, { "epoch": 0.18515899917985432, "grad_norm": 290.5931701660156, "learning_rate": 9.790014298532386e-06, "loss": 11.0497, "step": 91660 }, { "epoch": 0.18517919981253814, "grad_norm": 303.9353332519531, "learning_rate": 9.789914189209028e-06, "loss": 16.9299, "step": 91670 }, { "epoch": 0.18519940044522196, "grad_norm": 83.9176254272461, "learning_rate": 9.789814056540207e-06, "loss": 32.9422, "step": 91680 }, { "epoch": 0.18521960107790575, "grad_norm": 0.0, "learning_rate": 9.789713900526415e-06, "loss": 18.4059, "step": 91690 }, { "epoch": 0.18523980171058957, "grad_norm": 217.85464477539062, "learning_rate": 9.789613721168138e-06, "loss": 22.3585, "step": 91700 }, { "epoch": 0.1852600023432734, "grad_norm": 527.1065063476562, "learning_rate": 9.789513518465866e-06, "loss": 23.5811, "step": 91710 }, { "epoch": 0.1852802029759572, "grad_norm": 427.72125244140625, "learning_rate": 9.789413292420082e-06, "loss": 21.7291, "step": 91720 }, { "epoch": 0.18530040360864103, "grad_norm": 361.2764892578125, "learning_rate": 9.789313043031281e-06, "loss": 20.1113, "step": 91730 }, { "epoch": 0.18532060424132485, "grad_norm": 142.89356994628906, "learning_rate": 9.78921277029995e-06, "loss": 14.2255, "step": 91740 }, { "epoch": 0.18534080487400864, "grad_norm": 136.4586944580078, "learning_rate": 9.789112474226575e-06, "loss": 16.5208, "step": 91750 }, { "epoch": 0.18536100550669246, "grad_norm": 311.0579833984375, "learning_rate": 9.789012154811648e-06, "loss": 18.4777, "step": 91760 }, { "epoch": 0.18538120613937628, "grad_norm": 235.29742431640625, "learning_rate": 9.788911812055656e-06, "loss": 15.6108, "step": 91770 }, { "epoch": 0.1854014067720601, "grad_norm": 44.77545928955078, "learning_rate": 9.788811445959088e-06, "loss": 20.0434, "step": 91780 }, { "epoch": 0.18542160740474392, "grad_norm": 365.3935546875, "learning_rate": 9.788711056522436e-06, "loss": 26.7643, "step": 91790 }, { "epoch": 0.18544180803742774, "grad_norm": 284.0947265625, "learning_rate": 9.788610643746184e-06, "loss": 16.9639, "step": 91800 }, { "epoch": 0.18546200867011156, "grad_norm": 264.39312744140625, "learning_rate": 9.788510207630825e-06, "loss": 21.1654, "step": 91810 }, { "epoch": 0.18548220930279535, "grad_norm": 219.45204162597656, "learning_rate": 9.78840974817685e-06, "loss": 26.7185, "step": 91820 }, { "epoch": 0.18550240993547917, "grad_norm": 297.1233215332031, "learning_rate": 9.788309265384745e-06, "loss": 23.0185, "step": 91830 }, { "epoch": 0.185522610568163, "grad_norm": 445.6880187988281, "learning_rate": 9.788208759255003e-06, "loss": 24.4438, "step": 91840 }, { "epoch": 0.1855428112008468, "grad_norm": 227.6763153076172, "learning_rate": 9.788108229788111e-06, "loss": 22.9985, "step": 91850 }, { "epoch": 0.18556301183353063, "grad_norm": 542.4153442382812, "learning_rate": 9.788007676984562e-06, "loss": 30.7971, "step": 91860 }, { "epoch": 0.18558321246621445, "grad_norm": 504.9271545410156, "learning_rate": 9.787907100844842e-06, "loss": 37.8358, "step": 91870 }, { "epoch": 0.18560341309889825, "grad_norm": 421.9410095214844, "learning_rate": 9.787806501369446e-06, "loss": 23.6416, "step": 91880 }, { "epoch": 0.18562361373158207, "grad_norm": 306.1210632324219, "learning_rate": 9.78770587855886e-06, "loss": 25.7165, "step": 91890 }, { "epoch": 0.18564381436426589, "grad_norm": 368.301025390625, "learning_rate": 9.787605232413575e-06, "loss": 32.6715, "step": 91900 }, { "epoch": 0.1856640149969497, "grad_norm": 673.1619262695312, "learning_rate": 9.787504562934085e-06, "loss": 32.4196, "step": 91910 }, { "epoch": 0.18568421562963353, "grad_norm": 233.02880859375, "learning_rate": 9.787403870120877e-06, "loss": 17.1022, "step": 91920 }, { "epoch": 0.18570441626231735, "grad_norm": 192.5258026123047, "learning_rate": 9.787303153974444e-06, "loss": 25.3073, "step": 91930 }, { "epoch": 0.18572461689500114, "grad_norm": 388.9276428222656, "learning_rate": 9.787202414495275e-06, "loss": 20.593, "step": 91940 }, { "epoch": 0.18574481752768496, "grad_norm": 222.32278442382812, "learning_rate": 9.787101651683864e-06, "loss": 14.7478, "step": 91950 }, { "epoch": 0.18576501816036878, "grad_norm": 482.6448669433594, "learning_rate": 9.787000865540698e-06, "loss": 22.3802, "step": 91960 }, { "epoch": 0.1857852187930526, "grad_norm": 293.3865966796875, "learning_rate": 9.786900056066272e-06, "loss": 14.3311, "step": 91970 }, { "epoch": 0.18580541942573642, "grad_norm": 95.84113311767578, "learning_rate": 9.786799223261076e-06, "loss": 27.883, "step": 91980 }, { "epoch": 0.18582562005842024, "grad_norm": 129.98643493652344, "learning_rate": 9.7866983671256e-06, "loss": 22.878, "step": 91990 }, { "epoch": 0.18584582069110406, "grad_norm": 646.4723510742188, "learning_rate": 9.786597487660336e-06, "loss": 20.3505, "step": 92000 }, { "epoch": 0.18586602132378785, "grad_norm": 339.1436767578125, "learning_rate": 9.786496584865778e-06, "loss": 14.2769, "step": 92010 }, { "epoch": 0.18588622195647167, "grad_norm": 272.45489501953125, "learning_rate": 9.786395658742415e-06, "loss": 19.8973, "step": 92020 }, { "epoch": 0.1859064225891555, "grad_norm": 63.38334274291992, "learning_rate": 9.786294709290741e-06, "loss": 10.2713, "step": 92030 }, { "epoch": 0.1859266232218393, "grad_norm": 196.5893096923828, "learning_rate": 9.786193736511247e-06, "loss": 18.1057, "step": 92040 }, { "epoch": 0.18594682385452313, "grad_norm": 262.9736633300781, "learning_rate": 9.786092740404424e-06, "loss": 19.949, "step": 92050 }, { "epoch": 0.18596702448720695, "grad_norm": 105.48812866210938, "learning_rate": 9.78599172097077e-06, "loss": 9.7883, "step": 92060 }, { "epoch": 0.18598722511989074, "grad_norm": 358.4338073730469, "learning_rate": 9.785890678210768e-06, "loss": 25.9648, "step": 92070 }, { "epoch": 0.18600742575257456, "grad_norm": 184.5376434326172, "learning_rate": 9.785789612124916e-06, "loss": 27.4411, "step": 92080 }, { "epoch": 0.18602762638525838, "grad_norm": 425.42291259765625, "learning_rate": 9.785688522713707e-06, "loss": 20.1251, "step": 92090 }, { "epoch": 0.1860478270179422, "grad_norm": 501.2784729003906, "learning_rate": 9.785587409977632e-06, "loss": 45.2366, "step": 92100 }, { "epoch": 0.18606802765062602, "grad_norm": 270.1343078613281, "learning_rate": 9.785486273917184e-06, "loss": 23.0714, "step": 92110 }, { "epoch": 0.18608822828330984, "grad_norm": 517.252685546875, "learning_rate": 9.785385114532858e-06, "loss": 24.4452, "step": 92120 }, { "epoch": 0.18610842891599366, "grad_norm": 359.87835693359375, "learning_rate": 9.785283931825143e-06, "loss": 20.6682, "step": 92130 }, { "epoch": 0.18612862954867745, "grad_norm": 42.77140808105469, "learning_rate": 9.785182725794535e-06, "loss": 49.4772, "step": 92140 }, { "epoch": 0.18614883018136127, "grad_norm": 144.9082489013672, "learning_rate": 9.785081496441528e-06, "loss": 30.7034, "step": 92150 }, { "epoch": 0.1861690308140451, "grad_norm": 140.69186401367188, "learning_rate": 9.784980243766613e-06, "loss": 14.8735, "step": 92160 }, { "epoch": 0.18618923144672891, "grad_norm": 273.7860107421875, "learning_rate": 9.784878967770286e-06, "loss": 26.4269, "step": 92170 }, { "epoch": 0.18620943207941273, "grad_norm": 965.4298095703125, "learning_rate": 9.784777668453039e-06, "loss": 34.0015, "step": 92180 }, { "epoch": 0.18622963271209655, "grad_norm": 311.4125671386719, "learning_rate": 9.784676345815364e-06, "loss": 27.4822, "step": 92190 }, { "epoch": 0.18624983334478035, "grad_norm": 479.19256591796875, "learning_rate": 9.784574999857757e-06, "loss": 13.2305, "step": 92200 }, { "epoch": 0.18627003397746417, "grad_norm": 419.57513427734375, "learning_rate": 9.784473630580713e-06, "loss": 37.7935, "step": 92210 }, { "epoch": 0.186290234610148, "grad_norm": 318.4390869140625, "learning_rate": 9.784372237984726e-06, "loss": 30.3359, "step": 92220 }, { "epoch": 0.1863104352428318, "grad_norm": 527.5970458984375, "learning_rate": 9.784270822070288e-06, "loss": 26.1903, "step": 92230 }, { "epoch": 0.18633063587551563, "grad_norm": 223.4822998046875, "learning_rate": 9.784169382837893e-06, "loss": 38.1354, "step": 92240 }, { "epoch": 0.18635083650819945, "grad_norm": 369.0224304199219, "learning_rate": 9.78406792028804e-06, "loss": 15.2236, "step": 92250 }, { "epoch": 0.18637103714088324, "grad_norm": 334.6349792480469, "learning_rate": 9.783966434421215e-06, "loss": 21.04, "step": 92260 }, { "epoch": 0.18639123777356706, "grad_norm": 319.9170227050781, "learning_rate": 9.783864925237922e-06, "loss": 22.3442, "step": 92270 }, { "epoch": 0.18641143840625088, "grad_norm": 209.58740234375, "learning_rate": 9.78376339273865e-06, "loss": 24.6905, "step": 92280 }, { "epoch": 0.1864316390389347, "grad_norm": 437.90899658203125, "learning_rate": 9.783661836923894e-06, "loss": 26.8407, "step": 92290 }, { "epoch": 0.18645183967161852, "grad_norm": 267.0625305175781, "learning_rate": 9.783560257794153e-06, "loss": 15.9292, "step": 92300 }, { "epoch": 0.18647204030430234, "grad_norm": 547.52685546875, "learning_rate": 9.783458655349919e-06, "loss": 17.2839, "step": 92310 }, { "epoch": 0.18649224093698616, "grad_norm": 596.3805541992188, "learning_rate": 9.783357029591686e-06, "loss": 41.1593, "step": 92320 }, { "epoch": 0.18651244156966995, "grad_norm": 366.0256042480469, "learning_rate": 9.783255380519953e-06, "loss": 40.038, "step": 92330 }, { "epoch": 0.18653264220235377, "grad_norm": 464.4693603515625, "learning_rate": 9.783153708135214e-06, "loss": 34.8394, "step": 92340 }, { "epoch": 0.1865528428350376, "grad_norm": 276.56170654296875, "learning_rate": 9.783052012437962e-06, "loss": 23.2176, "step": 92350 }, { "epoch": 0.1865730434677214, "grad_norm": 404.85186767578125, "learning_rate": 9.782950293428695e-06, "loss": 17.9402, "step": 92360 }, { "epoch": 0.18659324410040523, "grad_norm": 165.71824645996094, "learning_rate": 9.782848551107908e-06, "loss": 16.8055, "step": 92370 }, { "epoch": 0.18661344473308905, "grad_norm": 1401.714111328125, "learning_rate": 9.782746785476098e-06, "loss": 21.3945, "step": 92380 }, { "epoch": 0.18663364536577284, "grad_norm": 465.0721130371094, "learning_rate": 9.78264499653376e-06, "loss": 39.5277, "step": 92390 }, { "epoch": 0.18665384599845666, "grad_norm": 492.55767822265625, "learning_rate": 9.78254318428139e-06, "loss": 23.5998, "step": 92400 }, { "epoch": 0.18667404663114048, "grad_norm": 1109.703125, "learning_rate": 9.782441348719485e-06, "loss": 52.6514, "step": 92410 }, { "epoch": 0.1866942472638243, "grad_norm": 319.2273254394531, "learning_rate": 9.782339489848541e-06, "loss": 38.2507, "step": 92420 }, { "epoch": 0.18671444789650812, "grad_norm": 273.02703857421875, "learning_rate": 9.782237607669053e-06, "loss": 23.155, "step": 92430 }, { "epoch": 0.18673464852919194, "grad_norm": 336.2960205078125, "learning_rate": 9.782135702181521e-06, "loss": 14.6035, "step": 92440 }, { "epoch": 0.18675484916187576, "grad_norm": 308.3573303222656, "learning_rate": 9.782033773386439e-06, "loss": 23.2049, "step": 92450 }, { "epoch": 0.18677504979455956, "grad_norm": 477.781982421875, "learning_rate": 9.781931821284305e-06, "loss": 25.2623, "step": 92460 }, { "epoch": 0.18679525042724338, "grad_norm": 421.1153259277344, "learning_rate": 9.781829845875613e-06, "loss": 57.3622, "step": 92470 }, { "epoch": 0.1868154510599272, "grad_norm": 184.33380126953125, "learning_rate": 9.781727847160865e-06, "loss": 11.5084, "step": 92480 }, { "epoch": 0.18683565169261102, "grad_norm": 614.2846069335938, "learning_rate": 9.781625825140552e-06, "loss": 32.8354, "step": 92490 }, { "epoch": 0.18685585232529484, "grad_norm": 429.2632141113281, "learning_rate": 9.781523779815178e-06, "loss": 18.7977, "step": 92500 }, { "epoch": 0.18687605295797866, "grad_norm": 348.3353576660156, "learning_rate": 9.781421711185236e-06, "loss": 18.3037, "step": 92510 }, { "epoch": 0.18689625359066245, "grad_norm": 434.90045166015625, "learning_rate": 9.781319619251223e-06, "loss": 28.5953, "step": 92520 }, { "epoch": 0.18691645422334627, "grad_norm": 233.76258850097656, "learning_rate": 9.78121750401364e-06, "loss": 21.635, "step": 92530 }, { "epoch": 0.1869366548560301, "grad_norm": 256.8558654785156, "learning_rate": 9.781115365472983e-06, "loss": 28.7361, "step": 92540 }, { "epoch": 0.1869568554887139, "grad_norm": 949.9705810546875, "learning_rate": 9.781013203629748e-06, "loss": 19.4207, "step": 92550 }, { "epoch": 0.18697705612139773, "grad_norm": 403.1462097167969, "learning_rate": 9.780911018484436e-06, "loss": 24.1767, "step": 92560 }, { "epoch": 0.18699725675408155, "grad_norm": 147.24513244628906, "learning_rate": 9.780808810037543e-06, "loss": 22.6574, "step": 92570 }, { "epoch": 0.18701745738676534, "grad_norm": 317.802734375, "learning_rate": 9.780706578289567e-06, "loss": 24.9987, "step": 92580 }, { "epoch": 0.18703765801944916, "grad_norm": 255.35914611816406, "learning_rate": 9.780604323241007e-06, "loss": 27.3496, "step": 92590 }, { "epoch": 0.18705785865213298, "grad_norm": 78.62438201904297, "learning_rate": 9.780502044892363e-06, "loss": 12.7722, "step": 92600 }, { "epoch": 0.1870780592848168, "grad_norm": 636.4462890625, "learning_rate": 9.78039974324413e-06, "loss": 26.6544, "step": 92610 }, { "epoch": 0.18709825991750062, "grad_norm": 227.80630493164062, "learning_rate": 9.78029741829681e-06, "loss": 24.5362, "step": 92620 }, { "epoch": 0.18711846055018444, "grad_norm": 163.21299743652344, "learning_rate": 9.780195070050898e-06, "loss": 15.0206, "step": 92630 }, { "epoch": 0.18713866118286826, "grad_norm": 348.76824951171875, "learning_rate": 9.780092698506897e-06, "loss": 23.8625, "step": 92640 }, { "epoch": 0.18715886181555205, "grad_norm": 184.10012817382812, "learning_rate": 9.779990303665303e-06, "loss": 13.7342, "step": 92650 }, { "epoch": 0.18717906244823587, "grad_norm": 274.6661071777344, "learning_rate": 9.779887885526616e-06, "loss": 15.5961, "step": 92660 }, { "epoch": 0.1871992630809197, "grad_norm": 181.11634826660156, "learning_rate": 9.779785444091336e-06, "loss": 12.5081, "step": 92670 }, { "epoch": 0.1872194637136035, "grad_norm": 41.04671859741211, "learning_rate": 9.779682979359961e-06, "loss": 21.379, "step": 92680 }, { "epoch": 0.18723966434628733, "grad_norm": 475.67352294921875, "learning_rate": 9.77958049133299e-06, "loss": 24.3355, "step": 92690 }, { "epoch": 0.18725986497897115, "grad_norm": 220.15103149414062, "learning_rate": 9.779477980010924e-06, "loss": 28.9252, "step": 92700 }, { "epoch": 0.18728006561165494, "grad_norm": 279.75555419921875, "learning_rate": 9.779375445394262e-06, "loss": 20.8538, "step": 92710 }, { "epoch": 0.18730026624433876, "grad_norm": 154.594482421875, "learning_rate": 9.779272887483503e-06, "loss": 24.3734, "step": 92720 }, { "epoch": 0.18732046687702258, "grad_norm": 257.41424560546875, "learning_rate": 9.77917030627915e-06, "loss": 29.6833, "step": 92730 }, { "epoch": 0.1873406675097064, "grad_norm": 171.96484375, "learning_rate": 9.779067701781698e-06, "loss": 27.1791, "step": 92740 }, { "epoch": 0.18736086814239022, "grad_norm": 599.0753173828125, "learning_rate": 9.778965073991652e-06, "loss": 36.2864, "step": 92750 }, { "epoch": 0.18738106877507404, "grad_norm": 239.88487243652344, "learning_rate": 9.778862422909507e-06, "loss": 22.5362, "step": 92760 }, { "epoch": 0.18740126940775786, "grad_norm": 374.866943359375, "learning_rate": 9.778759748535768e-06, "loss": 26.1549, "step": 92770 }, { "epoch": 0.18742147004044166, "grad_norm": 92.84783935546875, "learning_rate": 9.778657050870934e-06, "loss": 21.3917, "step": 92780 }, { "epoch": 0.18744167067312548, "grad_norm": 332.0933837890625, "learning_rate": 9.778554329915503e-06, "loss": 29.8005, "step": 92790 }, { "epoch": 0.1874618713058093, "grad_norm": 216.3912811279297, "learning_rate": 9.778451585669982e-06, "loss": 12.2478, "step": 92800 }, { "epoch": 0.18748207193849312, "grad_norm": 175.2101593017578, "learning_rate": 9.778348818134864e-06, "loss": 40.4927, "step": 92810 }, { "epoch": 0.18750227257117694, "grad_norm": 725.046142578125, "learning_rate": 9.778246027310654e-06, "loss": 45.7492, "step": 92820 }, { "epoch": 0.18752247320386076, "grad_norm": 11.89516830444336, "learning_rate": 9.778143213197852e-06, "loss": 24.3861, "step": 92830 }, { "epoch": 0.18754267383654455, "grad_norm": 184.01686096191406, "learning_rate": 9.77804037579696e-06, "loss": 20.5928, "step": 92840 }, { "epoch": 0.18756287446922837, "grad_norm": 423.7904052734375, "learning_rate": 9.777937515108478e-06, "loss": 17.577, "step": 92850 }, { "epoch": 0.1875830751019122, "grad_norm": 89.69942474365234, "learning_rate": 9.77783463113291e-06, "loss": 25.6279, "step": 92860 }, { "epoch": 0.187603275734596, "grad_norm": 153.4837188720703, "learning_rate": 9.777731723870753e-06, "loss": 16.7496, "step": 92870 }, { "epoch": 0.18762347636727983, "grad_norm": 145.99000549316406, "learning_rate": 9.777628793322513e-06, "loss": 22.7945, "step": 92880 }, { "epoch": 0.18764367699996365, "grad_norm": 333.08892822265625, "learning_rate": 9.777525839488688e-06, "loss": 23.2938, "step": 92890 }, { "epoch": 0.18766387763264744, "grad_norm": 344.0342102050781, "learning_rate": 9.777422862369782e-06, "loss": 26.7189, "step": 92900 }, { "epoch": 0.18768407826533126, "grad_norm": 563.7380981445312, "learning_rate": 9.777319861966298e-06, "loss": 22.5155, "step": 92910 }, { "epoch": 0.18770427889801508, "grad_norm": 147.90626525878906, "learning_rate": 9.777216838278735e-06, "loss": 12.5397, "step": 92920 }, { "epoch": 0.1877244795306989, "grad_norm": 307.164794921875, "learning_rate": 9.777113791307597e-06, "loss": 33.2205, "step": 92930 }, { "epoch": 0.18774468016338272, "grad_norm": 100.38538360595703, "learning_rate": 9.777010721053387e-06, "loss": 27.459, "step": 92940 }, { "epoch": 0.18776488079606654, "grad_norm": 315.0317077636719, "learning_rate": 9.776907627516604e-06, "loss": 35.9583, "step": 92950 }, { "epoch": 0.18778508142875036, "grad_norm": 228.95358276367188, "learning_rate": 9.776804510697753e-06, "loss": 24.183, "step": 92960 }, { "epoch": 0.18780528206143415, "grad_norm": 38.490966796875, "learning_rate": 9.776701370597337e-06, "loss": 18.3173, "step": 92970 }, { "epoch": 0.18782548269411797, "grad_norm": 31.06801986694336, "learning_rate": 9.776598207215857e-06, "loss": 31.653, "step": 92980 }, { "epoch": 0.1878456833268018, "grad_norm": 287.96893310546875, "learning_rate": 9.776495020553817e-06, "loss": 17.1507, "step": 92990 }, { "epoch": 0.1878658839594856, "grad_norm": 368.7970275878906, "learning_rate": 9.776391810611719e-06, "loss": 13.2291, "step": 93000 }, { "epoch": 0.18788608459216943, "grad_norm": 288.640869140625, "learning_rate": 9.776288577390067e-06, "loss": 12.3, "step": 93010 }, { "epoch": 0.18790628522485325, "grad_norm": 90.69969940185547, "learning_rate": 9.776185320889364e-06, "loss": 16.856, "step": 93020 }, { "epoch": 0.18792648585753705, "grad_norm": 370.4809875488281, "learning_rate": 9.776082041110112e-06, "loss": 8.0837, "step": 93030 }, { "epoch": 0.18794668649022087, "grad_norm": 249.63800048828125, "learning_rate": 9.775978738052818e-06, "loss": 25.7264, "step": 93040 }, { "epoch": 0.18796688712290469, "grad_norm": 154.9631805419922, "learning_rate": 9.775875411717981e-06, "loss": 18.825, "step": 93050 }, { "epoch": 0.1879870877555885, "grad_norm": 199.0854034423828, "learning_rate": 9.775772062106106e-06, "loss": 28.3037, "step": 93060 }, { "epoch": 0.18800728838827233, "grad_norm": 249.96978759765625, "learning_rate": 9.775668689217698e-06, "loss": 15.0171, "step": 93070 }, { "epoch": 0.18802748902095615, "grad_norm": 184.31199645996094, "learning_rate": 9.775565293053262e-06, "loss": 22.9865, "step": 93080 }, { "epoch": 0.18804768965363997, "grad_norm": 190.9927215576172, "learning_rate": 9.775461873613297e-06, "loss": 20.681, "step": 93090 }, { "epoch": 0.18806789028632376, "grad_norm": 227.64498901367188, "learning_rate": 9.775358430898311e-06, "loss": 19.7993, "step": 93100 }, { "epoch": 0.18808809091900758, "grad_norm": 47.130821228027344, "learning_rate": 9.775254964908807e-06, "loss": 18.8707, "step": 93110 }, { "epoch": 0.1881082915516914, "grad_norm": 124.72563171386719, "learning_rate": 9.77515147564529e-06, "loss": 16.2885, "step": 93120 }, { "epoch": 0.18812849218437522, "grad_norm": 243.3814697265625, "learning_rate": 9.775047963108264e-06, "loss": 14.4609, "step": 93130 }, { "epoch": 0.18814869281705904, "grad_norm": 324.383056640625, "learning_rate": 9.774944427298232e-06, "loss": 22.2099, "step": 93140 }, { "epoch": 0.18816889344974286, "grad_norm": 324.10308837890625, "learning_rate": 9.7748408682157e-06, "loss": 41.552, "step": 93150 }, { "epoch": 0.18818909408242665, "grad_norm": 264.2583923339844, "learning_rate": 9.774737285861176e-06, "loss": 32.8211, "step": 93160 }, { "epoch": 0.18820929471511047, "grad_norm": 281.4826965332031, "learning_rate": 9.774633680235158e-06, "loss": 21.6038, "step": 93170 }, { "epoch": 0.1882294953477943, "grad_norm": 227.94175720214844, "learning_rate": 9.774530051338155e-06, "loss": 15.1333, "step": 93180 }, { "epoch": 0.1882496959804781, "grad_norm": 159.8232421875, "learning_rate": 9.774426399170673e-06, "loss": 21.6667, "step": 93190 }, { "epoch": 0.18826989661316193, "grad_norm": 170.5233917236328, "learning_rate": 9.774322723733216e-06, "loss": 19.5051, "step": 93200 }, { "epoch": 0.18829009724584575, "grad_norm": 346.1835632324219, "learning_rate": 9.774219025026289e-06, "loss": 22.4781, "step": 93210 }, { "epoch": 0.18831029787852954, "grad_norm": 158.45950317382812, "learning_rate": 9.774115303050395e-06, "loss": 23.2249, "step": 93220 }, { "epoch": 0.18833049851121336, "grad_norm": 323.4689025878906, "learning_rate": 9.774011557806044e-06, "loss": 14.3453, "step": 93230 }, { "epoch": 0.18835069914389718, "grad_norm": 76.60681915283203, "learning_rate": 9.773907789293739e-06, "loss": 15.824, "step": 93240 }, { "epoch": 0.188370899776581, "grad_norm": 109.42433166503906, "learning_rate": 9.77380399751399e-06, "loss": 10.6543, "step": 93250 }, { "epoch": 0.18839110040926482, "grad_norm": 459.72149658203125, "learning_rate": 9.773700182467295e-06, "loss": 29.1354, "step": 93260 }, { "epoch": 0.18841130104194864, "grad_norm": 388.5712890625, "learning_rate": 9.773596344154165e-06, "loss": 14.1405, "step": 93270 }, { "epoch": 0.18843150167463246, "grad_norm": 161.18971252441406, "learning_rate": 9.773492482575106e-06, "loss": 37.9628, "step": 93280 }, { "epoch": 0.18845170230731625, "grad_norm": 146.5068817138672, "learning_rate": 9.773388597730623e-06, "loss": 20.1156, "step": 93290 }, { "epoch": 0.18847190294000007, "grad_norm": 375.2857360839844, "learning_rate": 9.773284689621223e-06, "loss": 22.5066, "step": 93300 }, { "epoch": 0.1884921035726839, "grad_norm": 196.74069213867188, "learning_rate": 9.773180758247413e-06, "loss": 34.3991, "step": 93310 }, { "epoch": 0.18851230420536771, "grad_norm": 60.855690002441406, "learning_rate": 9.773076803609699e-06, "loss": 10.1751, "step": 93320 }, { "epoch": 0.18853250483805153, "grad_norm": 80.93605041503906, "learning_rate": 9.772972825708587e-06, "loss": 10.9719, "step": 93330 }, { "epoch": 0.18855270547073535, "grad_norm": 107.1663589477539, "learning_rate": 9.772868824544585e-06, "loss": 24.8381, "step": 93340 }, { "epoch": 0.18857290610341915, "grad_norm": 259.0866394042969, "learning_rate": 9.7727648001182e-06, "loss": 33.0172, "step": 93350 }, { "epoch": 0.18859310673610297, "grad_norm": 268.72882080078125, "learning_rate": 9.772660752429937e-06, "loss": 21.2042, "step": 93360 }, { "epoch": 0.1886133073687868, "grad_norm": 110.12059783935547, "learning_rate": 9.772556681480303e-06, "loss": 11.787, "step": 93370 }, { "epoch": 0.1886335080014706, "grad_norm": 140.58726501464844, "learning_rate": 9.772452587269808e-06, "loss": 19.989, "step": 93380 }, { "epoch": 0.18865370863415443, "grad_norm": 206.03546142578125, "learning_rate": 9.772348469798958e-06, "loss": 25.8638, "step": 93390 }, { "epoch": 0.18867390926683825, "grad_norm": 436.09320068359375, "learning_rate": 9.772244329068261e-06, "loss": 24.034, "step": 93400 }, { "epoch": 0.18869410989952207, "grad_norm": 292.06451416015625, "learning_rate": 9.772140165078223e-06, "loss": 28.7028, "step": 93410 }, { "epoch": 0.18871431053220586, "grad_norm": 165.27633666992188, "learning_rate": 9.772035977829352e-06, "loss": 68.6614, "step": 93420 }, { "epoch": 0.18873451116488968, "grad_norm": 272.1954040527344, "learning_rate": 9.771931767322158e-06, "loss": 20.7266, "step": 93430 }, { "epoch": 0.1887547117975735, "grad_norm": 255.28759765625, "learning_rate": 9.771827533557147e-06, "loss": 19.449, "step": 93440 }, { "epoch": 0.18877491243025732, "grad_norm": 209.20069885253906, "learning_rate": 9.771723276534825e-06, "loss": 16.8313, "step": 93450 }, { "epoch": 0.18879511306294114, "grad_norm": 175.1893768310547, "learning_rate": 9.771618996255704e-06, "loss": 20.5461, "step": 93460 }, { "epoch": 0.18881531369562496, "grad_norm": 304.0472412109375, "learning_rate": 9.771514692720293e-06, "loss": 14.6777, "step": 93470 }, { "epoch": 0.18883551432830875, "grad_norm": 303.6784362792969, "learning_rate": 9.771410365929097e-06, "loss": 21.1494, "step": 93480 }, { "epoch": 0.18885571496099257, "grad_norm": 53.35122299194336, "learning_rate": 9.771306015882624e-06, "loss": 17.4921, "step": 93490 }, { "epoch": 0.1888759155936764, "grad_norm": 393.27252197265625, "learning_rate": 9.771201642581384e-06, "loss": 28.4851, "step": 93500 }, { "epoch": 0.1888961162263602, "grad_norm": 299.2636413574219, "learning_rate": 9.771097246025889e-06, "loss": 21.3983, "step": 93510 }, { "epoch": 0.18891631685904403, "grad_norm": 455.76611328125, "learning_rate": 9.770992826216642e-06, "loss": 17.8919, "step": 93520 }, { "epoch": 0.18893651749172785, "grad_norm": 379.54132080078125, "learning_rate": 9.770888383154156e-06, "loss": 25.3868, "step": 93530 }, { "epoch": 0.18895671812441164, "grad_norm": 407.341064453125, "learning_rate": 9.770783916838938e-06, "loss": 33.4259, "step": 93540 }, { "epoch": 0.18897691875709546, "grad_norm": 283.1967468261719, "learning_rate": 9.770679427271496e-06, "loss": 27.0361, "step": 93550 }, { "epoch": 0.18899711938977928, "grad_norm": 1946.7451171875, "learning_rate": 9.770574914452343e-06, "loss": 39.6896, "step": 93560 }, { "epoch": 0.1890173200224631, "grad_norm": 524.5565795898438, "learning_rate": 9.770470378381986e-06, "loss": 23.1471, "step": 93570 }, { "epoch": 0.18903752065514692, "grad_norm": 371.9656677246094, "learning_rate": 9.770365819060936e-06, "loss": 40.9287, "step": 93580 }, { "epoch": 0.18905772128783074, "grad_norm": 525.6839599609375, "learning_rate": 9.7702612364897e-06, "loss": 19.3103, "step": 93590 }, { "epoch": 0.18907792192051456, "grad_norm": 8.457381248474121, "learning_rate": 9.77015663066879e-06, "loss": 18.2702, "step": 93600 }, { "epoch": 0.18909812255319836, "grad_norm": 23.82843589782715, "learning_rate": 9.770052001598716e-06, "loss": 16.9854, "step": 93610 }, { "epoch": 0.18911832318588218, "grad_norm": 70.30158996582031, "learning_rate": 9.769947349279987e-06, "loss": 21.6895, "step": 93620 }, { "epoch": 0.189138523818566, "grad_norm": 401.6694030761719, "learning_rate": 9.769842673713112e-06, "loss": 29.5224, "step": 93630 }, { "epoch": 0.18915872445124982, "grad_norm": 548.55419921875, "learning_rate": 9.769737974898602e-06, "loss": 22.6383, "step": 93640 }, { "epoch": 0.18917892508393364, "grad_norm": 174.5022430419922, "learning_rate": 9.769633252836969e-06, "loss": 31.2135, "step": 93650 }, { "epoch": 0.18919912571661746, "grad_norm": 87.2772445678711, "learning_rate": 9.76952850752872e-06, "loss": 10.153, "step": 93660 }, { "epoch": 0.18921932634930125, "grad_norm": 304.46466064453125, "learning_rate": 9.76942373897437e-06, "loss": 23.2588, "step": 93670 }, { "epoch": 0.18923952698198507, "grad_norm": 253.52284240722656, "learning_rate": 9.769318947174426e-06, "loss": 29.9644, "step": 93680 }, { "epoch": 0.1892597276146689, "grad_norm": 48.23614501953125, "learning_rate": 9.769214132129399e-06, "loss": 32.1818, "step": 93690 }, { "epoch": 0.1892799282473527, "grad_norm": 187.70848083496094, "learning_rate": 9.769109293839803e-06, "loss": 22.2778, "step": 93700 }, { "epoch": 0.18930012888003653, "grad_norm": 178.39630126953125, "learning_rate": 9.769004432306145e-06, "loss": 17.2074, "step": 93710 }, { "epoch": 0.18932032951272035, "grad_norm": 265.5152282714844, "learning_rate": 9.768899547528939e-06, "loss": 22.8454, "step": 93720 }, { "epoch": 0.18934053014540417, "grad_norm": 362.0118408203125, "learning_rate": 9.768794639508693e-06, "loss": 29.6607, "step": 93730 }, { "epoch": 0.18936073077808796, "grad_norm": 200.31251525878906, "learning_rate": 9.768689708245921e-06, "loss": 17.5838, "step": 93740 }, { "epoch": 0.18938093141077178, "grad_norm": 339.9333801269531, "learning_rate": 9.768584753741134e-06, "loss": 21.0676, "step": 93750 }, { "epoch": 0.1894011320434556, "grad_norm": 838.4348754882812, "learning_rate": 9.768479775994846e-06, "loss": 25.1376, "step": 93760 }, { "epoch": 0.18942133267613942, "grad_norm": 115.03831481933594, "learning_rate": 9.768374775007562e-06, "loss": 18.573, "step": 93770 }, { "epoch": 0.18944153330882324, "grad_norm": 191.3826446533203, "learning_rate": 9.7682697507798e-06, "loss": 26.249, "step": 93780 }, { "epoch": 0.18946173394150706, "grad_norm": 206.6949920654297, "learning_rate": 9.768164703312068e-06, "loss": 33.9653, "step": 93790 }, { "epoch": 0.18948193457419085, "grad_norm": 129.89993286132812, "learning_rate": 9.768059632604881e-06, "loss": 22.9958, "step": 93800 }, { "epoch": 0.18950213520687467, "grad_norm": 194.9502410888672, "learning_rate": 9.767954538658749e-06, "loss": 30.9239, "step": 93810 }, { "epoch": 0.1895223358395585, "grad_norm": 130.5808563232422, "learning_rate": 9.767849421474185e-06, "loss": 16.9261, "step": 93820 }, { "epoch": 0.1895425364722423, "grad_norm": 0.0, "learning_rate": 9.767744281051702e-06, "loss": 9.0598, "step": 93830 }, { "epoch": 0.18956273710492613, "grad_norm": 348.2535400390625, "learning_rate": 9.76763911739181e-06, "loss": 30.5812, "step": 93840 }, { "epoch": 0.18958293773760995, "grad_norm": 366.2294006347656, "learning_rate": 9.767533930495023e-06, "loss": 15.8062, "step": 93850 }, { "epoch": 0.18960313837029374, "grad_norm": 318.7607421875, "learning_rate": 9.767428720361854e-06, "loss": 21.7142, "step": 93860 }, { "epoch": 0.18962333900297756, "grad_norm": 607.3001708984375, "learning_rate": 9.767323486992816e-06, "loss": 35.8998, "step": 93870 }, { "epoch": 0.18964353963566138, "grad_norm": 191.8736114501953, "learning_rate": 9.767218230388423e-06, "loss": 15.5391, "step": 93880 }, { "epoch": 0.1896637402683452, "grad_norm": 200.32989501953125, "learning_rate": 9.767112950549184e-06, "loss": 20.4374, "step": 93890 }, { "epoch": 0.18968394090102902, "grad_norm": 376.5995178222656, "learning_rate": 9.767007647475618e-06, "loss": 30.1357, "step": 93900 }, { "epoch": 0.18970414153371284, "grad_norm": 308.5623779296875, "learning_rate": 9.766902321168232e-06, "loss": 21.8727, "step": 93910 }, { "epoch": 0.18972434216639666, "grad_norm": 328.0945129394531, "learning_rate": 9.766796971627543e-06, "loss": 21.2163, "step": 93920 }, { "epoch": 0.18974454279908046, "grad_norm": 201.4481201171875, "learning_rate": 9.766691598854064e-06, "loss": 16.0522, "step": 93930 }, { "epoch": 0.18976474343176428, "grad_norm": 207.6687774658203, "learning_rate": 9.766586202848306e-06, "loss": 19.5377, "step": 93940 }, { "epoch": 0.1897849440644481, "grad_norm": 1019.5209350585938, "learning_rate": 9.766480783610789e-06, "loss": 18.6292, "step": 93950 }, { "epoch": 0.18980514469713192, "grad_norm": 403.9526672363281, "learning_rate": 9.76637534114202e-06, "loss": 22.3055, "step": 93960 }, { "epoch": 0.18982534532981574, "grad_norm": 308.828857421875, "learning_rate": 9.766269875442517e-06, "loss": 26.1687, "step": 93970 }, { "epoch": 0.18984554596249956, "grad_norm": 405.8609313964844, "learning_rate": 9.766164386512794e-06, "loss": 15.2053, "step": 93980 }, { "epoch": 0.18986574659518335, "grad_norm": 401.2463684082031, "learning_rate": 9.766058874353361e-06, "loss": 16.7258, "step": 93990 }, { "epoch": 0.18988594722786717, "grad_norm": 562.200439453125, "learning_rate": 9.765953338964736e-06, "loss": 36.9043, "step": 94000 }, { "epoch": 0.189906147860551, "grad_norm": 300.2681884765625, "learning_rate": 9.765847780347433e-06, "loss": 25.1656, "step": 94010 }, { "epoch": 0.1899263484932348, "grad_norm": 52.099822998046875, "learning_rate": 9.765742198501965e-06, "loss": 33.614, "step": 94020 }, { "epoch": 0.18994654912591863, "grad_norm": 453.072998046875, "learning_rate": 9.765636593428849e-06, "loss": 19.1368, "step": 94030 }, { "epoch": 0.18996674975860245, "grad_norm": 661.9375, "learning_rate": 9.765530965128597e-06, "loss": 30.2394, "step": 94040 }, { "epoch": 0.18998695039128627, "grad_norm": 323.73626708984375, "learning_rate": 9.765425313601726e-06, "loss": 21.7898, "step": 94050 }, { "epoch": 0.19000715102397006, "grad_norm": 123.1952133178711, "learning_rate": 9.765319638848749e-06, "loss": 12.7572, "step": 94060 }, { "epoch": 0.19002735165665388, "grad_norm": 376.8539733886719, "learning_rate": 9.765213940870183e-06, "loss": 30.478, "step": 94070 }, { "epoch": 0.1900475522893377, "grad_norm": 111.04681396484375, "learning_rate": 9.765108219666542e-06, "loss": 21.6014, "step": 94080 }, { "epoch": 0.19006775292202152, "grad_norm": 85.04240417480469, "learning_rate": 9.76500247523834e-06, "loss": 17.435, "step": 94090 }, { "epoch": 0.19008795355470534, "grad_norm": 216.96707153320312, "learning_rate": 9.764896707586095e-06, "loss": 20.1689, "step": 94100 }, { "epoch": 0.19010815418738916, "grad_norm": 499.3671569824219, "learning_rate": 9.76479091671032e-06, "loss": 16.2538, "step": 94110 }, { "epoch": 0.19012835482007295, "grad_norm": 181.33810424804688, "learning_rate": 9.764685102611535e-06, "loss": 28.2095, "step": 94120 }, { "epoch": 0.19014855545275677, "grad_norm": 300.9886474609375, "learning_rate": 9.76457926529025e-06, "loss": 9.7337, "step": 94130 }, { "epoch": 0.1901687560854406, "grad_norm": 588.2806396484375, "learning_rate": 9.764473404746986e-06, "loss": 29.8183, "step": 94140 }, { "epoch": 0.1901889567181244, "grad_norm": 731.3653564453125, "learning_rate": 9.764367520982255e-06, "loss": 25.9644, "step": 94150 }, { "epoch": 0.19020915735080823, "grad_norm": 143.86343383789062, "learning_rate": 9.764261613996574e-06, "loss": 16.8982, "step": 94160 }, { "epoch": 0.19022935798349205, "grad_norm": 325.8880920410156, "learning_rate": 9.764155683790461e-06, "loss": 11.9652, "step": 94170 }, { "epoch": 0.19024955861617585, "grad_norm": 158.3896026611328, "learning_rate": 9.76404973036443e-06, "loss": 28.6336, "step": 94180 }, { "epoch": 0.19026975924885967, "grad_norm": 267.0805969238281, "learning_rate": 9.763943753719e-06, "loss": 38.0129, "step": 94190 }, { "epoch": 0.19028995988154349, "grad_norm": 94.22724151611328, "learning_rate": 9.763837753854684e-06, "loss": 18.5603, "step": 94200 }, { "epoch": 0.1903101605142273, "grad_norm": 1245.8756103515625, "learning_rate": 9.763731730772001e-06, "loss": 21.5466, "step": 94210 }, { "epoch": 0.19033036114691113, "grad_norm": 721.010498046875, "learning_rate": 9.763625684471467e-06, "loss": 12.6052, "step": 94220 }, { "epoch": 0.19035056177959495, "grad_norm": 230.8107452392578, "learning_rate": 9.7635196149536e-06, "loss": 30.8716, "step": 94230 }, { "epoch": 0.19037076241227877, "grad_norm": 422.5337219238281, "learning_rate": 9.763413522218917e-06, "loss": 18.39, "step": 94240 }, { "epoch": 0.19039096304496256, "grad_norm": 329.68505859375, "learning_rate": 9.763307406267933e-06, "loss": 44.0506, "step": 94250 }, { "epoch": 0.19041116367764638, "grad_norm": 312.242431640625, "learning_rate": 9.763201267101165e-06, "loss": 13.9257, "step": 94260 }, { "epoch": 0.1904313643103302, "grad_norm": 319.14361572265625, "learning_rate": 9.763095104719133e-06, "loss": 19.7361, "step": 94270 }, { "epoch": 0.19045156494301402, "grad_norm": 230.5769500732422, "learning_rate": 9.762988919122354e-06, "loss": 26.351, "step": 94280 }, { "epoch": 0.19047176557569784, "grad_norm": 296.7395324707031, "learning_rate": 9.762882710311345e-06, "loss": 22.8948, "step": 94290 }, { "epoch": 0.19049196620838166, "grad_norm": 230.9600067138672, "learning_rate": 9.762776478286622e-06, "loss": 33.2527, "step": 94300 }, { "epoch": 0.19051216684106545, "grad_norm": 0.0, "learning_rate": 9.762670223048705e-06, "loss": 28.6461, "step": 94310 }, { "epoch": 0.19053236747374927, "grad_norm": 155.49974060058594, "learning_rate": 9.76256394459811e-06, "loss": 15.9737, "step": 94320 }, { "epoch": 0.1905525681064331, "grad_norm": 186.82366943359375, "learning_rate": 9.762457642935357e-06, "loss": 11.3011, "step": 94330 }, { "epoch": 0.1905727687391169, "grad_norm": 139.59580993652344, "learning_rate": 9.762351318060962e-06, "loss": 21.1467, "step": 94340 }, { "epoch": 0.19059296937180073, "grad_norm": 267.1019287109375, "learning_rate": 9.762244969975446e-06, "loss": 18.8771, "step": 94350 }, { "epoch": 0.19061317000448455, "grad_norm": 301.9827575683594, "learning_rate": 9.762138598679324e-06, "loss": 30.5744, "step": 94360 }, { "epoch": 0.19063337063716834, "grad_norm": 279.70867919921875, "learning_rate": 9.762032204173116e-06, "loss": 24.4182, "step": 94370 }, { "epoch": 0.19065357126985216, "grad_norm": 358.9949645996094, "learning_rate": 9.761925786457343e-06, "loss": 19.5592, "step": 94380 }, { "epoch": 0.19067377190253598, "grad_norm": 13.200350761413574, "learning_rate": 9.761819345532519e-06, "loss": 14.4839, "step": 94390 }, { "epoch": 0.1906939725352198, "grad_norm": 184.69224548339844, "learning_rate": 9.761712881399164e-06, "loss": 18.3217, "step": 94400 }, { "epoch": 0.19071417316790362, "grad_norm": 561.8948364257812, "learning_rate": 9.7616063940578e-06, "loss": 50.3835, "step": 94410 }, { "epoch": 0.19073437380058744, "grad_norm": 277.8091735839844, "learning_rate": 9.761499883508942e-06, "loss": 23.2119, "step": 94420 }, { "epoch": 0.19075457443327126, "grad_norm": 247.43226623535156, "learning_rate": 9.761393349753115e-06, "loss": 25.75, "step": 94430 }, { "epoch": 0.19077477506595505, "grad_norm": 144.9276885986328, "learning_rate": 9.76128679279083e-06, "loss": 28.4236, "step": 94440 }, { "epoch": 0.19079497569863887, "grad_norm": 511.2065124511719, "learning_rate": 9.761180212622613e-06, "loss": 28.8809, "step": 94450 }, { "epoch": 0.1908151763313227, "grad_norm": 411.8133239746094, "learning_rate": 9.761073609248981e-06, "loss": 25.5485, "step": 94460 }, { "epoch": 0.19083537696400651, "grad_norm": 229.08224487304688, "learning_rate": 9.760966982670453e-06, "loss": 36.879, "step": 94470 }, { "epoch": 0.19085557759669033, "grad_norm": 176.70693969726562, "learning_rate": 9.760860332887549e-06, "loss": 26.8028, "step": 94480 }, { "epoch": 0.19087577822937415, "grad_norm": 11.710991859436035, "learning_rate": 9.76075365990079e-06, "loss": 19.1592, "step": 94490 }, { "epoch": 0.19089597886205795, "grad_norm": 545.06103515625, "learning_rate": 9.760646963710694e-06, "loss": 19.8018, "step": 94500 }, { "epoch": 0.19091617949474177, "grad_norm": 200.0942840576172, "learning_rate": 9.760540244317784e-06, "loss": 20.5061, "step": 94510 }, { "epoch": 0.1909363801274256, "grad_norm": 294.2701110839844, "learning_rate": 9.760433501722576e-06, "loss": 11.7314, "step": 94520 }, { "epoch": 0.1909565807601094, "grad_norm": 109.44598388671875, "learning_rate": 9.760326735925594e-06, "loss": 22.3162, "step": 94530 }, { "epoch": 0.19097678139279323, "grad_norm": 377.84002685546875, "learning_rate": 9.760219946927357e-06, "loss": 30.1068, "step": 94540 }, { "epoch": 0.19099698202547705, "grad_norm": 155.59359741210938, "learning_rate": 9.760113134728383e-06, "loss": 25.9628, "step": 94550 }, { "epoch": 0.19101718265816087, "grad_norm": 141.0349884033203, "learning_rate": 9.760006299329198e-06, "loss": 18.8411, "step": 94560 }, { "epoch": 0.19103738329084466, "grad_norm": 484.6749572753906, "learning_rate": 9.759899440730318e-06, "loss": 37.8275, "step": 94570 }, { "epoch": 0.19105758392352848, "grad_norm": 164.3726043701172, "learning_rate": 9.759792558932267e-06, "loss": 21.4583, "step": 94580 }, { "epoch": 0.1910777845562123, "grad_norm": 148.2220001220703, "learning_rate": 9.759685653935563e-06, "loss": 20.4273, "step": 94590 }, { "epoch": 0.19109798518889612, "grad_norm": 297.6227111816406, "learning_rate": 9.759578725740726e-06, "loss": 22.7885, "step": 94600 }, { "epoch": 0.19111818582157994, "grad_norm": 160.47183227539062, "learning_rate": 9.759471774348284e-06, "loss": 30.8847, "step": 94610 }, { "epoch": 0.19113838645426376, "grad_norm": 136.57672119140625, "learning_rate": 9.759364799758751e-06, "loss": 15.5163, "step": 94620 }, { "epoch": 0.19115858708694755, "grad_norm": 161.4788818359375, "learning_rate": 9.759257801972652e-06, "loss": 20.1493, "step": 94630 }, { "epoch": 0.19117878771963137, "grad_norm": 230.7214813232422, "learning_rate": 9.759150780990508e-06, "loss": 21.3414, "step": 94640 }, { "epoch": 0.1911989883523152, "grad_norm": 175.5135498046875, "learning_rate": 9.75904373681284e-06, "loss": 15.3607, "step": 94650 }, { "epoch": 0.191219188984999, "grad_norm": 274.60491943359375, "learning_rate": 9.75893666944017e-06, "loss": 24.7289, "step": 94660 }, { "epoch": 0.19123938961768283, "grad_norm": 374.5411682128906, "learning_rate": 9.758829578873019e-06, "loss": 20.2489, "step": 94670 }, { "epoch": 0.19125959025036665, "grad_norm": 417.80078125, "learning_rate": 9.758722465111912e-06, "loss": 36.4468, "step": 94680 }, { "epoch": 0.19127979088305044, "grad_norm": 131.9131317138672, "learning_rate": 9.758615328157367e-06, "loss": 29.5282, "step": 94690 }, { "epoch": 0.19129999151573426, "grad_norm": 143.06561279296875, "learning_rate": 9.758508168009908e-06, "loss": 25.6554, "step": 94700 }, { "epoch": 0.19132019214841808, "grad_norm": 196.27479553222656, "learning_rate": 9.75840098467006e-06, "loss": 24.9366, "step": 94710 }, { "epoch": 0.1913403927811019, "grad_norm": 284.20318603515625, "learning_rate": 9.758293778138339e-06, "loss": 32.4492, "step": 94720 }, { "epoch": 0.19136059341378572, "grad_norm": 361.5111389160156, "learning_rate": 9.758186548415274e-06, "loss": 22.5207, "step": 94730 }, { "epoch": 0.19138079404646954, "grad_norm": 61.05268859863281, "learning_rate": 9.758079295501384e-06, "loss": 24.47, "step": 94740 }, { "epoch": 0.19140099467915336, "grad_norm": 260.6299133300781, "learning_rate": 9.757972019397192e-06, "loss": 27.2134, "step": 94750 }, { "epoch": 0.19142119531183716, "grad_norm": 285.7840881347656, "learning_rate": 9.757864720103222e-06, "loss": 15.2161, "step": 94760 }, { "epoch": 0.19144139594452098, "grad_norm": 414.1669616699219, "learning_rate": 9.757757397619995e-06, "loss": 16.5277, "step": 94770 }, { "epoch": 0.1914615965772048, "grad_norm": 227.98500061035156, "learning_rate": 9.757650051948037e-06, "loss": 21.3607, "step": 94780 }, { "epoch": 0.19148179720988862, "grad_norm": 170.49644470214844, "learning_rate": 9.757542683087871e-06, "loss": 26.3786, "step": 94790 }, { "epoch": 0.19150199784257244, "grad_norm": 252.81288146972656, "learning_rate": 9.757435291040016e-06, "loss": 18.7577, "step": 94800 }, { "epoch": 0.19152219847525626, "grad_norm": 319.0530090332031, "learning_rate": 9.757327875805e-06, "loss": 29.0448, "step": 94810 }, { "epoch": 0.19154239910794005, "grad_norm": 350.5924987792969, "learning_rate": 9.757220437383345e-06, "loss": 14.4896, "step": 94820 }, { "epoch": 0.19156259974062387, "grad_norm": 238.575927734375, "learning_rate": 9.757112975775575e-06, "loss": 15.704, "step": 94830 }, { "epoch": 0.1915828003733077, "grad_norm": 192.0004425048828, "learning_rate": 9.757005490982213e-06, "loss": 13.9367, "step": 94840 }, { "epoch": 0.1916030010059915, "grad_norm": 54.848663330078125, "learning_rate": 9.756897983003782e-06, "loss": 19.6473, "step": 94850 }, { "epoch": 0.19162320163867533, "grad_norm": 879.12646484375, "learning_rate": 9.756790451840807e-06, "loss": 22.8635, "step": 94860 }, { "epoch": 0.19164340227135915, "grad_norm": 272.5241394042969, "learning_rate": 9.756682897493814e-06, "loss": 23.3922, "step": 94870 }, { "epoch": 0.19166360290404297, "grad_norm": 181.5653533935547, "learning_rate": 9.756575319963325e-06, "loss": 25.6455, "step": 94880 }, { "epoch": 0.19168380353672676, "grad_norm": 241.05845642089844, "learning_rate": 9.756467719249865e-06, "loss": 40.4587, "step": 94890 }, { "epoch": 0.19170400416941058, "grad_norm": 509.5583801269531, "learning_rate": 9.756360095353957e-06, "loss": 29.4029, "step": 94900 }, { "epoch": 0.1917242048020944, "grad_norm": 197.718017578125, "learning_rate": 9.756252448276128e-06, "loss": 16.8377, "step": 94910 }, { "epoch": 0.19174440543477822, "grad_norm": 204.44497680664062, "learning_rate": 9.756144778016901e-06, "loss": 14.1396, "step": 94920 }, { "epoch": 0.19176460606746204, "grad_norm": 554.4592895507812, "learning_rate": 9.756037084576801e-06, "loss": 41.0981, "step": 94930 }, { "epoch": 0.19178480670014586, "grad_norm": 113.94884490966797, "learning_rate": 9.755929367956354e-06, "loss": 20.1497, "step": 94940 }, { "epoch": 0.19180500733282965, "grad_norm": 506.2403869628906, "learning_rate": 9.755821628156083e-06, "loss": 31.7573, "step": 94950 }, { "epoch": 0.19182520796551347, "grad_norm": 263.2411193847656, "learning_rate": 9.755713865176514e-06, "loss": 29.4999, "step": 94960 }, { "epoch": 0.1918454085981973, "grad_norm": 318.5038757324219, "learning_rate": 9.755606079018174e-06, "loss": 13.7535, "step": 94970 }, { "epoch": 0.1918656092308811, "grad_norm": 776.6326293945312, "learning_rate": 9.755498269681585e-06, "loss": 32.3711, "step": 94980 }, { "epoch": 0.19188580986356493, "grad_norm": 286.9014587402344, "learning_rate": 9.755390437167274e-06, "loss": 21.2811, "step": 94990 }, { "epoch": 0.19190601049624875, "grad_norm": 266.9434509277344, "learning_rate": 9.755282581475769e-06, "loss": 14.0959, "step": 95000 }, { "epoch": 0.19192621112893254, "grad_norm": 247.54443359375, "learning_rate": 9.755174702607592e-06, "loss": 37.5691, "step": 95010 }, { "epoch": 0.19194641176161636, "grad_norm": 435.61798095703125, "learning_rate": 9.75506680056327e-06, "loss": 29.0224, "step": 95020 }, { "epoch": 0.19196661239430018, "grad_norm": 338.204345703125, "learning_rate": 9.75495887534333e-06, "loss": 24.5888, "step": 95030 }, { "epoch": 0.191986813026984, "grad_norm": 307.31964111328125, "learning_rate": 9.754850926948295e-06, "loss": 20.6208, "step": 95040 }, { "epoch": 0.19200701365966782, "grad_norm": 419.23101806640625, "learning_rate": 9.754742955378697e-06, "loss": 36.9696, "step": 95050 }, { "epoch": 0.19202721429235164, "grad_norm": 169.93528747558594, "learning_rate": 9.754634960635057e-06, "loss": 20.3551, "step": 95060 }, { "epoch": 0.19204741492503546, "grad_norm": 372.470458984375, "learning_rate": 9.754526942717901e-06, "loss": 23.3006, "step": 95070 }, { "epoch": 0.19206761555771926, "grad_norm": 358.7287292480469, "learning_rate": 9.75441890162776e-06, "loss": 29.907, "step": 95080 }, { "epoch": 0.19208781619040308, "grad_norm": 160.1354217529297, "learning_rate": 9.754310837365155e-06, "loss": 19.8063, "step": 95090 }, { "epoch": 0.1921080168230869, "grad_norm": 241.99264526367188, "learning_rate": 9.754202749930618e-06, "loss": 19.9173, "step": 95100 }, { "epoch": 0.19212821745577072, "grad_norm": 359.1595153808594, "learning_rate": 9.754094639324672e-06, "loss": 25.3583, "step": 95110 }, { "epoch": 0.19214841808845454, "grad_norm": 198.9616241455078, "learning_rate": 9.753986505547845e-06, "loss": 26.2163, "step": 95120 }, { "epoch": 0.19216861872113836, "grad_norm": 104.04499816894531, "learning_rate": 9.753878348600666e-06, "loss": 21.2637, "step": 95130 }, { "epoch": 0.19218881935382215, "grad_norm": 525.4815063476562, "learning_rate": 9.75377016848366e-06, "loss": 25.2792, "step": 95140 }, { "epoch": 0.19220901998650597, "grad_norm": 96.44377136230469, "learning_rate": 9.753661965197355e-06, "loss": 13.069, "step": 95150 }, { "epoch": 0.1922292206191898, "grad_norm": 343.96258544921875, "learning_rate": 9.753553738742278e-06, "loss": 28.2801, "step": 95160 }, { "epoch": 0.1922494212518736, "grad_norm": 107.3106460571289, "learning_rate": 9.753445489118955e-06, "loss": 19.462, "step": 95170 }, { "epoch": 0.19226962188455743, "grad_norm": 481.8016662597656, "learning_rate": 9.753337216327917e-06, "loss": 39.2131, "step": 95180 }, { "epoch": 0.19228982251724125, "grad_norm": 2518.981689453125, "learning_rate": 9.75322892036969e-06, "loss": 36.9015, "step": 95190 }, { "epoch": 0.19231002314992507, "grad_norm": 215.01841735839844, "learning_rate": 9.7531206012448e-06, "loss": 18.9696, "step": 95200 }, { "epoch": 0.19233022378260886, "grad_norm": 393.1829528808594, "learning_rate": 9.753012258953778e-06, "loss": 39.3242, "step": 95210 }, { "epoch": 0.19235042441529268, "grad_norm": 252.64239501953125, "learning_rate": 9.752903893497152e-06, "loss": 22.1273, "step": 95220 }, { "epoch": 0.1923706250479765, "grad_norm": 219.7644500732422, "learning_rate": 9.752795504875447e-06, "loss": 23.7214, "step": 95230 }, { "epoch": 0.19239082568066032, "grad_norm": 242.53004455566406, "learning_rate": 9.752687093089192e-06, "loss": 32.8952, "step": 95240 }, { "epoch": 0.19241102631334414, "grad_norm": 395.8253173828125, "learning_rate": 9.75257865813892e-06, "loss": 33.9298, "step": 95250 }, { "epoch": 0.19243122694602796, "grad_norm": 227.04054260253906, "learning_rate": 9.752470200025153e-06, "loss": 21.3083, "step": 95260 }, { "epoch": 0.19245142757871175, "grad_norm": 377.0764465332031, "learning_rate": 9.752361718748425e-06, "loss": 12.2949, "step": 95270 }, { "epoch": 0.19247162821139557, "grad_norm": 205.21334838867188, "learning_rate": 9.75225321430926e-06, "loss": 13.4782, "step": 95280 }, { "epoch": 0.1924918288440794, "grad_norm": 349.34429931640625, "learning_rate": 9.752144686708192e-06, "loss": 34.3252, "step": 95290 }, { "epoch": 0.1925120294767632, "grad_norm": 195.7550048828125, "learning_rate": 9.752036135945743e-06, "loss": 27.1141, "step": 95300 }, { "epoch": 0.19253223010944703, "grad_norm": 431.22381591796875, "learning_rate": 9.75192756202245e-06, "loss": 26.2202, "step": 95310 }, { "epoch": 0.19255243074213085, "grad_norm": 679.7407836914062, "learning_rate": 9.751818964938837e-06, "loss": 20.9817, "step": 95320 }, { "epoch": 0.19257263137481465, "grad_norm": 25.23150634765625, "learning_rate": 9.751710344695436e-06, "loss": 16.6901, "step": 95330 }, { "epoch": 0.19259283200749847, "grad_norm": 248.8192596435547, "learning_rate": 9.751601701292773e-06, "loss": 22.5811, "step": 95340 }, { "epoch": 0.19261303264018229, "grad_norm": 168.5537567138672, "learning_rate": 9.75149303473138e-06, "loss": 33.0731, "step": 95350 }, { "epoch": 0.1926332332728661, "grad_norm": 30.652673721313477, "learning_rate": 9.751384345011787e-06, "loss": 15.46, "step": 95360 }, { "epoch": 0.19265343390554993, "grad_norm": 79.17930603027344, "learning_rate": 9.751275632134523e-06, "loss": 17.265, "step": 95370 }, { "epoch": 0.19267363453823375, "grad_norm": 18.52083969116211, "learning_rate": 9.751166896100119e-06, "loss": 18.0805, "step": 95380 }, { "epoch": 0.19269383517091757, "grad_norm": 0.0, "learning_rate": 9.751058136909102e-06, "loss": 34.931, "step": 95390 }, { "epoch": 0.19271403580360136, "grad_norm": 696.1566162109375, "learning_rate": 9.750949354562006e-06, "loss": 25.6002, "step": 95400 }, { "epoch": 0.19273423643628518, "grad_norm": 322.2468566894531, "learning_rate": 9.750840549059354e-06, "loss": 15.1772, "step": 95410 }, { "epoch": 0.192754437068969, "grad_norm": 90.83790588378906, "learning_rate": 9.750731720401685e-06, "loss": 21.752, "step": 95420 }, { "epoch": 0.19277463770165282, "grad_norm": 553.0520629882812, "learning_rate": 9.750622868589527e-06, "loss": 12.8505, "step": 95430 }, { "epoch": 0.19279483833433664, "grad_norm": 347.9486083984375, "learning_rate": 9.750513993623406e-06, "loss": 25.3648, "step": 95440 }, { "epoch": 0.19281503896702046, "grad_norm": 573.2507934570312, "learning_rate": 9.750405095503859e-06, "loss": 45.4055, "step": 95450 }, { "epoch": 0.19283523959970425, "grad_norm": 262.56768798828125, "learning_rate": 9.750296174231412e-06, "loss": 15.7627, "step": 95460 }, { "epoch": 0.19285544023238807, "grad_norm": 393.2696533203125, "learning_rate": 9.7501872298066e-06, "loss": 20.8417, "step": 95470 }, { "epoch": 0.1928756408650719, "grad_norm": 261.73406982421875, "learning_rate": 9.75007826222995e-06, "loss": 17.0172, "step": 95480 }, { "epoch": 0.1928958414977557, "grad_norm": 277.8756408691406, "learning_rate": 9.749969271501993e-06, "loss": 16.9049, "step": 95490 }, { "epoch": 0.19291604213043953, "grad_norm": 80.61328125, "learning_rate": 9.749860257623262e-06, "loss": 32.6337, "step": 95500 }, { "epoch": 0.19293624276312335, "grad_norm": 86.0202865600586, "learning_rate": 9.74975122059429e-06, "loss": 22.4509, "step": 95510 }, { "epoch": 0.19295644339580717, "grad_norm": 365.150146484375, "learning_rate": 9.749642160415606e-06, "loss": 17.8832, "step": 95520 }, { "epoch": 0.19297664402849096, "grad_norm": 370.7186584472656, "learning_rate": 9.749533077087742e-06, "loss": 21.3197, "step": 95530 }, { "epoch": 0.19299684466117478, "grad_norm": 90.87162780761719, "learning_rate": 9.749423970611232e-06, "loss": 27.6465, "step": 95540 }, { "epoch": 0.1930170452938586, "grad_norm": 276.3020935058594, "learning_rate": 9.749314840986604e-06, "loss": 16.7394, "step": 95550 }, { "epoch": 0.19303724592654242, "grad_norm": 353.3509826660156, "learning_rate": 9.74920568821439e-06, "loss": 20.6332, "step": 95560 }, { "epoch": 0.19305744655922624, "grad_norm": 175.0183868408203, "learning_rate": 9.749096512295124e-06, "loss": 18.862, "step": 95570 }, { "epoch": 0.19307764719191006, "grad_norm": 340.98291015625, "learning_rate": 9.748987313229339e-06, "loss": 23.1005, "step": 95580 }, { "epoch": 0.19309784782459385, "grad_norm": 29.349199295043945, "learning_rate": 9.748878091017565e-06, "loss": 15.6519, "step": 95590 }, { "epoch": 0.19311804845727767, "grad_norm": 218.99395751953125, "learning_rate": 9.748768845660335e-06, "loss": 13.3862, "step": 95600 }, { "epoch": 0.1931382490899615, "grad_norm": 266.6187438964844, "learning_rate": 9.748659577158182e-06, "loss": 30.7888, "step": 95610 }, { "epoch": 0.19315844972264531, "grad_norm": 314.4416198730469, "learning_rate": 9.748550285511637e-06, "loss": 23.9491, "step": 95620 }, { "epoch": 0.19317865035532913, "grad_norm": 228.47279357910156, "learning_rate": 9.748440970721236e-06, "loss": 9.8307, "step": 95630 }, { "epoch": 0.19319885098801295, "grad_norm": 427.0989990234375, "learning_rate": 9.74833163278751e-06, "loss": 15.0773, "step": 95640 }, { "epoch": 0.19321905162069675, "grad_norm": 383.8903503417969, "learning_rate": 9.748222271710988e-06, "loss": 23.3138, "step": 95650 }, { "epoch": 0.19323925225338057, "grad_norm": 124.07269287109375, "learning_rate": 9.74811288749221e-06, "loss": 14.0068, "step": 95660 }, { "epoch": 0.1932594528860644, "grad_norm": 38.94042205810547, "learning_rate": 9.748003480131702e-06, "loss": 33.4643, "step": 95670 }, { "epoch": 0.1932796535187482, "grad_norm": 143.6829376220703, "learning_rate": 9.747894049630004e-06, "loss": 44.7015, "step": 95680 }, { "epoch": 0.19329985415143203, "grad_norm": 65.9195785522461, "learning_rate": 9.747784595987645e-06, "loss": 41.7456, "step": 95690 }, { "epoch": 0.19332005478411585, "grad_norm": 290.4505615234375, "learning_rate": 9.74767511920516e-06, "loss": 24.0831, "step": 95700 }, { "epoch": 0.19334025541679967, "grad_norm": 132.1361083984375, "learning_rate": 9.747565619283083e-06, "loss": 15.5977, "step": 95710 }, { "epoch": 0.19336045604948346, "grad_norm": 262.4844665527344, "learning_rate": 9.747456096221946e-06, "loss": 44.0224, "step": 95720 }, { "epoch": 0.19338065668216728, "grad_norm": 84.67986297607422, "learning_rate": 9.747346550022282e-06, "loss": 18.8693, "step": 95730 }, { "epoch": 0.1934008573148511, "grad_norm": 71.9924545288086, "learning_rate": 9.747236980684632e-06, "loss": 27.4934, "step": 95740 }, { "epoch": 0.19342105794753492, "grad_norm": 405.0351257324219, "learning_rate": 9.74712738820952e-06, "loss": 21.1638, "step": 95750 }, { "epoch": 0.19344125858021874, "grad_norm": 421.6388854980469, "learning_rate": 9.747017772597487e-06, "loss": 20.3181, "step": 95760 }, { "epoch": 0.19346145921290256, "grad_norm": 244.19358825683594, "learning_rate": 9.746908133849065e-06, "loss": 24.4423, "step": 95770 }, { "epoch": 0.19348165984558635, "grad_norm": 305.1314392089844, "learning_rate": 9.746798471964787e-06, "loss": 22.5049, "step": 95780 }, { "epoch": 0.19350186047827017, "grad_norm": 75.7957992553711, "learning_rate": 9.74668878694519e-06, "loss": 20.3523, "step": 95790 }, { "epoch": 0.193522061110954, "grad_norm": 739.3309326171875, "learning_rate": 9.746579078790808e-06, "loss": 33.3719, "step": 95800 }, { "epoch": 0.1935422617436378, "grad_norm": 413.32086181640625, "learning_rate": 9.746469347502174e-06, "loss": 37.5853, "step": 95810 }, { "epoch": 0.19356246237632163, "grad_norm": 238.80490112304688, "learning_rate": 9.746359593079825e-06, "loss": 24.8697, "step": 95820 }, { "epoch": 0.19358266300900545, "grad_norm": 142.33499145507812, "learning_rate": 9.746249815524295e-06, "loss": 20.0239, "step": 95830 }, { "epoch": 0.19360286364168927, "grad_norm": 79.61769104003906, "learning_rate": 9.746140014836118e-06, "loss": 29.2592, "step": 95840 }, { "epoch": 0.19362306427437306, "grad_norm": 1728.9298095703125, "learning_rate": 9.746030191015831e-06, "loss": 50.5635, "step": 95850 }, { "epoch": 0.19364326490705688, "grad_norm": 259.8597412109375, "learning_rate": 9.745920344063969e-06, "loss": 18.6628, "step": 95860 }, { "epoch": 0.1936634655397407, "grad_norm": 280.1614074707031, "learning_rate": 9.745810473981067e-06, "loss": 20.735, "step": 95870 }, { "epoch": 0.19368366617242452, "grad_norm": 76.76167297363281, "learning_rate": 9.74570058076766e-06, "loss": 18.0585, "step": 95880 }, { "epoch": 0.19370386680510834, "grad_norm": 200.63792419433594, "learning_rate": 9.745590664424283e-06, "loss": 18.7056, "step": 95890 }, { "epoch": 0.19372406743779216, "grad_norm": 756.7163696289062, "learning_rate": 9.745480724951473e-06, "loss": 43.6336, "step": 95900 }, { "epoch": 0.19374426807047596, "grad_norm": 294.02825927734375, "learning_rate": 9.745370762349766e-06, "loss": 30.0023, "step": 95910 }, { "epoch": 0.19376446870315978, "grad_norm": 285.096923828125, "learning_rate": 9.745260776619698e-06, "loss": 22.5513, "step": 95920 }, { "epoch": 0.1937846693358436, "grad_norm": 240.5572509765625, "learning_rate": 9.745150767761805e-06, "loss": 18.9648, "step": 95930 }, { "epoch": 0.19380486996852742, "grad_norm": 72.63945007324219, "learning_rate": 9.745040735776622e-06, "loss": 17.33, "step": 95940 }, { "epoch": 0.19382507060121124, "grad_norm": 225.8397216796875, "learning_rate": 9.744930680664685e-06, "loss": 28.9746, "step": 95950 }, { "epoch": 0.19384527123389506, "grad_norm": 441.1708679199219, "learning_rate": 9.74482060242653e-06, "loss": 20.5151, "step": 95960 }, { "epoch": 0.19386547186657885, "grad_norm": 247.70404052734375, "learning_rate": 9.744710501062698e-06, "loss": 12.2384, "step": 95970 }, { "epoch": 0.19388567249926267, "grad_norm": 384.8417663574219, "learning_rate": 9.744600376573721e-06, "loss": 27.5712, "step": 95980 }, { "epoch": 0.1939058731319465, "grad_norm": 353.2216491699219, "learning_rate": 9.744490228960137e-06, "loss": 26.1591, "step": 95990 }, { "epoch": 0.1939260737646303, "grad_norm": 167.35850524902344, "learning_rate": 9.744380058222483e-06, "loss": 14.242, "step": 96000 }, { "epoch": 0.19394627439731413, "grad_norm": 323.9517822265625, "learning_rate": 9.744269864361298e-06, "loss": 16.1544, "step": 96010 }, { "epoch": 0.19396647502999795, "grad_norm": 220.4581756591797, "learning_rate": 9.744159647377114e-06, "loss": 17.6776, "step": 96020 }, { "epoch": 0.19398667566268177, "grad_norm": 219.27639770507812, "learning_rate": 9.744049407270472e-06, "loss": 18.4711, "step": 96030 }, { "epoch": 0.19400687629536556, "grad_norm": 352.016357421875, "learning_rate": 9.74393914404191e-06, "loss": 24.6081, "step": 96040 }, { "epoch": 0.19402707692804938, "grad_norm": 31.736488342285156, "learning_rate": 9.743828857691964e-06, "loss": 30.9457, "step": 96050 }, { "epoch": 0.1940472775607332, "grad_norm": 328.4178466796875, "learning_rate": 9.74371854822117e-06, "loss": 43.8515, "step": 96060 }, { "epoch": 0.19406747819341702, "grad_norm": 91.76180267333984, "learning_rate": 9.74360821563007e-06, "loss": 19.7191, "step": 96070 }, { "epoch": 0.19408767882610084, "grad_norm": 274.35321044921875, "learning_rate": 9.743497859919196e-06, "loss": 27.5315, "step": 96080 }, { "epoch": 0.19410787945878466, "grad_norm": 372.7965087890625, "learning_rate": 9.743387481089091e-06, "loss": 24.65, "step": 96090 }, { "epoch": 0.19412808009146845, "grad_norm": 201.8687744140625, "learning_rate": 9.743277079140288e-06, "loss": 11.5645, "step": 96100 }, { "epoch": 0.19414828072415227, "grad_norm": 577.77734375, "learning_rate": 9.74316665407333e-06, "loss": 32.1118, "step": 96110 }, { "epoch": 0.1941684813568361, "grad_norm": 191.4524383544922, "learning_rate": 9.743056205888752e-06, "loss": 24.6408, "step": 96120 }, { "epoch": 0.1941886819895199, "grad_norm": 244.0301055908203, "learning_rate": 9.742945734587093e-06, "loss": 26.1209, "step": 96130 }, { "epoch": 0.19420888262220373, "grad_norm": 227.45179748535156, "learning_rate": 9.742835240168893e-06, "loss": 23.7475, "step": 96140 }, { "epoch": 0.19422908325488755, "grad_norm": 214.3621063232422, "learning_rate": 9.742724722634688e-06, "loss": 38.4291, "step": 96150 }, { "epoch": 0.19424928388757137, "grad_norm": 285.793701171875, "learning_rate": 9.742614181985019e-06, "loss": 22.6651, "step": 96160 }, { "epoch": 0.19426948452025516, "grad_norm": 257.5290222167969, "learning_rate": 9.742503618220422e-06, "loss": 24.9823, "step": 96170 }, { "epoch": 0.19428968515293898, "grad_norm": 271.3304748535156, "learning_rate": 9.74239303134144e-06, "loss": 18.7487, "step": 96180 }, { "epoch": 0.1943098857856228, "grad_norm": 359.68438720703125, "learning_rate": 9.742282421348607e-06, "loss": 17.2555, "step": 96190 }, { "epoch": 0.19433008641830662, "grad_norm": 178.17193603515625, "learning_rate": 9.742171788242468e-06, "loss": 38.88, "step": 96200 }, { "epoch": 0.19435028705099044, "grad_norm": 597.6038818359375, "learning_rate": 9.742061132023555e-06, "loss": 17.4659, "step": 96210 }, { "epoch": 0.19437048768367426, "grad_norm": 567.9053955078125, "learning_rate": 9.741950452692414e-06, "loss": 19.8858, "step": 96220 }, { "epoch": 0.19439068831635806, "grad_norm": 288.12969970703125, "learning_rate": 9.741839750249579e-06, "loss": 16.9945, "step": 96230 }, { "epoch": 0.19441088894904188, "grad_norm": 992.8629150390625, "learning_rate": 9.741729024695594e-06, "loss": 29.0722, "step": 96240 }, { "epoch": 0.1944310895817257, "grad_norm": 267.5902404785156, "learning_rate": 9.741618276030998e-06, "loss": 22.5625, "step": 96250 }, { "epoch": 0.19445129021440952, "grad_norm": 222.5215301513672, "learning_rate": 9.741507504256327e-06, "loss": 11.3755, "step": 96260 }, { "epoch": 0.19447149084709334, "grad_norm": 203.91122436523438, "learning_rate": 9.741396709372126e-06, "loss": 20.6363, "step": 96270 }, { "epoch": 0.19449169147977716, "grad_norm": 323.2353820800781, "learning_rate": 9.74128589137893e-06, "loss": 25.2465, "step": 96280 }, { "epoch": 0.19451189211246095, "grad_norm": 130.77078247070312, "learning_rate": 9.741175050277283e-06, "loss": 14.6527, "step": 96290 }, { "epoch": 0.19453209274514477, "grad_norm": 314.6050720214844, "learning_rate": 9.741064186067723e-06, "loss": 18.1552, "step": 96300 }, { "epoch": 0.1945522933778286, "grad_norm": 723.0255126953125, "learning_rate": 9.740953298750792e-06, "loss": 40.6123, "step": 96310 }, { "epoch": 0.1945724940105124, "grad_norm": 253.1793670654297, "learning_rate": 9.74084238832703e-06, "loss": 33.3602, "step": 96320 }, { "epoch": 0.19459269464319623, "grad_norm": 461.7002258300781, "learning_rate": 9.740731454796976e-06, "loss": 36.5425, "step": 96330 }, { "epoch": 0.19461289527588005, "grad_norm": 260.8619384765625, "learning_rate": 9.740620498161173e-06, "loss": 15.3413, "step": 96340 }, { "epoch": 0.19463309590856387, "grad_norm": 285.22735595703125, "learning_rate": 9.74050951842016e-06, "loss": 17.8447, "step": 96350 }, { "epoch": 0.19465329654124766, "grad_norm": 269.1016540527344, "learning_rate": 9.74039851557448e-06, "loss": 19.2363, "step": 96360 }, { "epoch": 0.19467349717393148, "grad_norm": 395.37945556640625, "learning_rate": 9.740287489624671e-06, "loss": 28.8128, "step": 96370 }, { "epoch": 0.1946936978066153, "grad_norm": 37.152591705322266, "learning_rate": 9.740176440571277e-06, "loss": 19.2669, "step": 96380 }, { "epoch": 0.19471389843929912, "grad_norm": 17.456867218017578, "learning_rate": 9.740065368414837e-06, "loss": 14.4412, "step": 96390 }, { "epoch": 0.19473409907198294, "grad_norm": 334.1023864746094, "learning_rate": 9.739954273155892e-06, "loss": 24.0759, "step": 96400 }, { "epoch": 0.19475429970466676, "grad_norm": 120.37042236328125, "learning_rate": 9.739843154794985e-06, "loss": 17.9768, "step": 96410 }, { "epoch": 0.19477450033735055, "grad_norm": 553.760009765625, "learning_rate": 9.73973201333266e-06, "loss": 28.7827, "step": 96420 }, { "epoch": 0.19479470097003437, "grad_norm": 151.39419555664062, "learning_rate": 9.739620848769455e-06, "loss": 23.7254, "step": 96430 }, { "epoch": 0.1948149016027182, "grad_norm": 0.0, "learning_rate": 9.739509661105912e-06, "loss": 24.8632, "step": 96440 }, { "epoch": 0.194835102235402, "grad_norm": 214.82327270507812, "learning_rate": 9.739398450342573e-06, "loss": 44.8893, "step": 96450 }, { "epoch": 0.19485530286808583, "grad_norm": 84.92851257324219, "learning_rate": 9.739287216479983e-06, "loss": 14.9344, "step": 96460 }, { "epoch": 0.19487550350076965, "grad_norm": 354.8134765625, "learning_rate": 9.73917595951868e-06, "loss": 31.1577, "step": 96470 }, { "epoch": 0.19489570413345347, "grad_norm": 179.74053955078125, "learning_rate": 9.73906467945921e-06, "loss": 12.0049, "step": 96480 }, { "epoch": 0.19491590476613727, "grad_norm": 159.14772033691406, "learning_rate": 9.738953376302111e-06, "loss": 26.9698, "step": 96490 }, { "epoch": 0.19493610539882109, "grad_norm": 224.09188842773438, "learning_rate": 9.73884205004793e-06, "loss": 19.1605, "step": 96500 }, { "epoch": 0.1949563060315049, "grad_norm": 135.1748809814453, "learning_rate": 9.738730700697207e-06, "loss": 8.5663, "step": 96510 }, { "epoch": 0.19497650666418873, "grad_norm": 559.7260131835938, "learning_rate": 9.738619328250485e-06, "loss": 25.9012, "step": 96520 }, { "epoch": 0.19499670729687255, "grad_norm": 271.65869140625, "learning_rate": 9.738507932708308e-06, "loss": 12.4679, "step": 96530 }, { "epoch": 0.19501690792955637, "grad_norm": 135.4747772216797, "learning_rate": 9.738396514071216e-06, "loss": 12.7304, "step": 96540 }, { "epoch": 0.19503710856224016, "grad_norm": 251.24769592285156, "learning_rate": 9.738285072339756e-06, "loss": 15.5672, "step": 96550 }, { "epoch": 0.19505730919492398, "grad_norm": 450.76068115234375, "learning_rate": 9.73817360751447e-06, "loss": 25.9529, "step": 96560 }, { "epoch": 0.1950775098276078, "grad_norm": 433.70367431640625, "learning_rate": 9.738062119595897e-06, "loss": 28.7535, "step": 96570 }, { "epoch": 0.19509771046029162, "grad_norm": 116.71699523925781, "learning_rate": 9.737950608584588e-06, "loss": 17.8756, "step": 96580 }, { "epoch": 0.19511791109297544, "grad_norm": 242.9219970703125, "learning_rate": 9.73783907448108e-06, "loss": 18.926, "step": 96590 }, { "epoch": 0.19513811172565926, "grad_norm": 279.7112121582031, "learning_rate": 9.73772751728592e-06, "loss": 24.5375, "step": 96600 }, { "epoch": 0.19515831235834305, "grad_norm": 186.05599975585938, "learning_rate": 9.73761593699965e-06, "loss": 19.4824, "step": 96610 }, { "epoch": 0.19517851299102687, "grad_norm": 346.80926513671875, "learning_rate": 9.737504333622814e-06, "loss": 37.1142, "step": 96620 }, { "epoch": 0.1951987136237107, "grad_norm": 24.45218849182129, "learning_rate": 9.737392707155957e-06, "loss": 31.3391, "step": 96630 }, { "epoch": 0.1952189142563945, "grad_norm": 485.159912109375, "learning_rate": 9.737281057599623e-06, "loss": 34.6418, "step": 96640 }, { "epoch": 0.19523911488907833, "grad_norm": 282.0603942871094, "learning_rate": 9.737169384954356e-06, "loss": 21.6527, "step": 96650 }, { "epoch": 0.19525931552176215, "grad_norm": 274.04254150390625, "learning_rate": 9.7370576892207e-06, "loss": 17.9513, "step": 96660 }, { "epoch": 0.19527951615444597, "grad_norm": 394.24615478515625, "learning_rate": 9.736945970399198e-06, "loss": 17.0055, "step": 96670 }, { "epoch": 0.19529971678712976, "grad_norm": 353.31475830078125, "learning_rate": 9.736834228490398e-06, "loss": 22.0683, "step": 96680 }, { "epoch": 0.19531991741981358, "grad_norm": 238.63516235351562, "learning_rate": 9.73672246349484e-06, "loss": 29.0128, "step": 96690 }, { "epoch": 0.1953401180524974, "grad_norm": 402.3677062988281, "learning_rate": 9.736610675413073e-06, "loss": 44.0142, "step": 96700 }, { "epoch": 0.19536031868518122, "grad_norm": 246.15524291992188, "learning_rate": 9.736498864245638e-06, "loss": 28.133, "step": 96710 }, { "epoch": 0.19538051931786504, "grad_norm": 285.1415710449219, "learning_rate": 9.736387029993084e-06, "loss": 11.6178, "step": 96720 }, { "epoch": 0.19540071995054886, "grad_norm": 376.5553283691406, "learning_rate": 9.736275172655954e-06, "loss": 28.5167, "step": 96730 }, { "epoch": 0.19542092058323265, "grad_norm": 464.0888671875, "learning_rate": 9.736163292234792e-06, "loss": 18.7389, "step": 96740 }, { "epoch": 0.19544112121591647, "grad_norm": 0.0, "learning_rate": 9.736051388730146e-06, "loss": 25.0037, "step": 96750 }, { "epoch": 0.1954613218486003, "grad_norm": 165.46104431152344, "learning_rate": 9.735939462142558e-06, "loss": 22.5168, "step": 96760 }, { "epoch": 0.19548152248128411, "grad_norm": 315.6774597167969, "learning_rate": 9.735827512472576e-06, "loss": 34.8082, "step": 96770 }, { "epoch": 0.19550172311396793, "grad_norm": 404.5007019042969, "learning_rate": 9.735715539720747e-06, "loss": 22.6193, "step": 96780 }, { "epoch": 0.19552192374665175, "grad_norm": 44.535030364990234, "learning_rate": 9.735603543887613e-06, "loss": 18.0135, "step": 96790 }, { "epoch": 0.19554212437933557, "grad_norm": 205.98338317871094, "learning_rate": 9.735491524973723e-06, "loss": 15.866, "step": 96800 }, { "epoch": 0.19556232501201937, "grad_norm": 232.63088989257812, "learning_rate": 9.73537948297962e-06, "loss": 19.5045, "step": 96810 }, { "epoch": 0.1955825256447032, "grad_norm": 276.49127197265625, "learning_rate": 9.735267417905852e-06, "loss": 28.1941, "step": 96820 }, { "epoch": 0.195602726277387, "grad_norm": 73.9383316040039, "learning_rate": 9.735155329752965e-06, "loss": 23.5444, "step": 96830 }, { "epoch": 0.19562292691007083, "grad_norm": 208.9409942626953, "learning_rate": 9.735043218521507e-06, "loss": 21.3211, "step": 96840 }, { "epoch": 0.19564312754275465, "grad_norm": 258.2750549316406, "learning_rate": 9.734931084212021e-06, "loss": 16.1605, "step": 96850 }, { "epoch": 0.19566332817543847, "grad_norm": 278.203369140625, "learning_rate": 9.734818926825056e-06, "loss": 14.7016, "step": 96860 }, { "epoch": 0.19568352880812226, "grad_norm": 58.29170608520508, "learning_rate": 9.734706746361157e-06, "loss": 24.0589, "step": 96870 }, { "epoch": 0.19570372944080608, "grad_norm": 275.7850341796875, "learning_rate": 9.734594542820871e-06, "loss": 18.3391, "step": 96880 }, { "epoch": 0.1957239300734899, "grad_norm": 248.12994384765625, "learning_rate": 9.734482316204747e-06, "loss": 18.6827, "step": 96890 }, { "epoch": 0.19574413070617372, "grad_norm": 146.92031860351562, "learning_rate": 9.73437006651333e-06, "loss": 17.2605, "step": 96900 }, { "epoch": 0.19576433133885754, "grad_norm": 40.12916564941406, "learning_rate": 9.734257793747168e-06, "loss": 27.3161, "step": 96910 }, { "epoch": 0.19578453197154136, "grad_norm": 1153.8665771484375, "learning_rate": 9.734145497906807e-06, "loss": 36.8018, "step": 96920 }, { "epoch": 0.19580473260422515, "grad_norm": 99.87059783935547, "learning_rate": 9.734033178992794e-06, "loss": 36.013, "step": 96930 }, { "epoch": 0.19582493323690897, "grad_norm": 810.9580688476562, "learning_rate": 9.73392083700568e-06, "loss": 28.4979, "step": 96940 }, { "epoch": 0.1958451338695928, "grad_norm": 127.88633728027344, "learning_rate": 9.73380847194601e-06, "loss": 34.1958, "step": 96950 }, { "epoch": 0.1958653345022766, "grad_norm": 306.71905517578125, "learning_rate": 9.733696083814327e-06, "loss": 21.3418, "step": 96960 }, { "epoch": 0.19588553513496043, "grad_norm": 372.10955810546875, "learning_rate": 9.733583672611189e-06, "loss": 14.934, "step": 96970 }, { "epoch": 0.19590573576764425, "grad_norm": 663.7210693359375, "learning_rate": 9.733471238337136e-06, "loss": 29.18, "step": 96980 }, { "epoch": 0.19592593640032807, "grad_norm": 264.6695556640625, "learning_rate": 9.733358780992717e-06, "loss": 31.6866, "step": 96990 }, { "epoch": 0.19594613703301186, "grad_norm": 175.69618225097656, "learning_rate": 9.733246300578482e-06, "loss": 25.1688, "step": 97000 }, { "epoch": 0.19596633766569568, "grad_norm": 263.0272521972656, "learning_rate": 9.73313379709498e-06, "loss": 32.0319, "step": 97010 }, { "epoch": 0.1959865382983795, "grad_norm": 796.26513671875, "learning_rate": 9.733021270542758e-06, "loss": 28.0547, "step": 97020 }, { "epoch": 0.19600673893106332, "grad_norm": 361.3654479980469, "learning_rate": 9.732908720922362e-06, "loss": 29.4879, "step": 97030 }, { "epoch": 0.19602693956374714, "grad_norm": 83.1963119506836, "learning_rate": 9.732796148234345e-06, "loss": 13.3773, "step": 97040 }, { "epoch": 0.19604714019643096, "grad_norm": 176.7535400390625, "learning_rate": 9.732683552479253e-06, "loss": 15.2503, "step": 97050 }, { "epoch": 0.19606734082911476, "grad_norm": 414.7829284667969, "learning_rate": 9.732570933657635e-06, "loss": 18.5738, "step": 97060 }, { "epoch": 0.19608754146179858, "grad_norm": 27.866424560546875, "learning_rate": 9.73245829177004e-06, "loss": 15.36, "step": 97070 }, { "epoch": 0.1961077420944824, "grad_norm": 290.2229309082031, "learning_rate": 9.732345626817018e-06, "loss": 30.9024, "step": 97080 }, { "epoch": 0.19612794272716622, "grad_norm": 193.75547790527344, "learning_rate": 9.732232938799118e-06, "loss": 30.941, "step": 97090 }, { "epoch": 0.19614814335985004, "grad_norm": 221.0733642578125, "learning_rate": 9.732120227716887e-06, "loss": 29.5317, "step": 97100 }, { "epoch": 0.19616834399253386, "grad_norm": 262.6974182128906, "learning_rate": 9.732007493570877e-06, "loss": 28.5785, "step": 97110 }, { "epoch": 0.19618854462521768, "grad_norm": 334.64459228515625, "learning_rate": 9.731894736361636e-06, "loss": 20.8498, "step": 97120 }, { "epoch": 0.19620874525790147, "grad_norm": 168.98025512695312, "learning_rate": 9.731781956089713e-06, "loss": 18.6019, "step": 97130 }, { "epoch": 0.1962289458905853, "grad_norm": 157.30670166015625, "learning_rate": 9.731669152755662e-06, "loss": 9.2831, "step": 97140 }, { "epoch": 0.1962491465232691, "grad_norm": 360.90399169921875, "learning_rate": 9.731556326360027e-06, "loss": 33.1598, "step": 97150 }, { "epoch": 0.19626934715595293, "grad_norm": 194.34375, "learning_rate": 9.73144347690336e-06, "loss": 30.6536, "step": 97160 }, { "epoch": 0.19628954778863675, "grad_norm": 645.79345703125, "learning_rate": 9.731330604386214e-06, "loss": 24.0124, "step": 97170 }, { "epoch": 0.19630974842132057, "grad_norm": 595.2700805664062, "learning_rate": 9.731217708809133e-06, "loss": 44.2931, "step": 97180 }, { "epoch": 0.19632994905400436, "grad_norm": 196.8307342529297, "learning_rate": 9.731104790172672e-06, "loss": 23.8934, "step": 97190 }, { "epoch": 0.19635014968668818, "grad_norm": 155.22686767578125, "learning_rate": 9.73099184847738e-06, "loss": 20.1686, "step": 97200 }, { "epoch": 0.196370350319372, "grad_norm": 324.48004150390625, "learning_rate": 9.73087888372381e-06, "loss": 35.764, "step": 97210 }, { "epoch": 0.19639055095205582, "grad_norm": 543.6165161132812, "learning_rate": 9.730765895912506e-06, "loss": 24.9832, "step": 97220 }, { "epoch": 0.19641075158473964, "grad_norm": 146.55519104003906, "learning_rate": 9.730652885044025e-06, "loss": 10.8252, "step": 97230 }, { "epoch": 0.19643095221742346, "grad_norm": 287.410400390625, "learning_rate": 9.730539851118913e-06, "loss": 34.2418, "step": 97240 }, { "epoch": 0.19645115285010725, "grad_norm": 143.31375122070312, "learning_rate": 9.730426794137727e-06, "loss": 36.2543, "step": 97250 }, { "epoch": 0.19647135348279107, "grad_norm": 90.16635131835938, "learning_rate": 9.730313714101014e-06, "loss": 22.1729, "step": 97260 }, { "epoch": 0.1964915541154749, "grad_norm": 374.04132080078125, "learning_rate": 9.730200611009324e-06, "loss": 24.6492, "step": 97270 }, { "epoch": 0.1965117547481587, "grad_norm": 226.41285705566406, "learning_rate": 9.73008748486321e-06, "loss": 20.8283, "step": 97280 }, { "epoch": 0.19653195538084253, "grad_norm": 257.3214111328125, "learning_rate": 9.729974335663227e-06, "loss": 16.0735, "step": 97290 }, { "epoch": 0.19655215601352635, "grad_norm": 197.03897094726562, "learning_rate": 9.72986116340992e-06, "loss": 26.1317, "step": 97300 }, { "epoch": 0.19657235664621017, "grad_norm": 346.800048828125, "learning_rate": 9.729747968103842e-06, "loss": 38.4951, "step": 97310 }, { "epoch": 0.19659255727889396, "grad_norm": 511.3269348144531, "learning_rate": 9.729634749745547e-06, "loss": 24.6841, "step": 97320 }, { "epoch": 0.19661275791157778, "grad_norm": 202.23475646972656, "learning_rate": 9.729521508335586e-06, "loss": 11.6219, "step": 97330 }, { "epoch": 0.1966329585442616, "grad_norm": 130.47726440429688, "learning_rate": 9.729408243874511e-06, "loss": 20.0369, "step": 97340 }, { "epoch": 0.19665315917694542, "grad_norm": 153.2672119140625, "learning_rate": 9.729294956362873e-06, "loss": 13.7015, "step": 97350 }, { "epoch": 0.19667335980962924, "grad_norm": 90.11873626708984, "learning_rate": 9.729181645801227e-06, "loss": 13.7299, "step": 97360 }, { "epoch": 0.19669356044231306, "grad_norm": 234.05706787109375, "learning_rate": 9.729068312190122e-06, "loss": 10.7021, "step": 97370 }, { "epoch": 0.19671376107499686, "grad_norm": 287.4801940917969, "learning_rate": 9.728954955530113e-06, "loss": 17.0463, "step": 97380 }, { "epoch": 0.19673396170768068, "grad_norm": 337.21307373046875, "learning_rate": 9.728841575821749e-06, "loss": 20.7346, "step": 97390 }, { "epoch": 0.1967541623403645, "grad_norm": 55.69681930541992, "learning_rate": 9.728728173065584e-06, "loss": 17.5838, "step": 97400 }, { "epoch": 0.19677436297304832, "grad_norm": 721.6976928710938, "learning_rate": 9.728614747262173e-06, "loss": 38.8009, "step": 97410 }, { "epoch": 0.19679456360573214, "grad_norm": 435.33465576171875, "learning_rate": 9.728501298412067e-06, "loss": 25.3421, "step": 97420 }, { "epoch": 0.19681476423841596, "grad_norm": 334.5414733886719, "learning_rate": 9.72838782651582e-06, "loss": 24.3405, "step": 97430 }, { "epoch": 0.19683496487109975, "grad_norm": 531.4854736328125, "learning_rate": 9.728274331573983e-06, "loss": 23.8407, "step": 97440 }, { "epoch": 0.19685516550378357, "grad_norm": 673.7432250976562, "learning_rate": 9.728160813587111e-06, "loss": 22.707, "step": 97450 }, { "epoch": 0.1968753661364674, "grad_norm": 244.62831115722656, "learning_rate": 9.728047272555756e-06, "loss": 20.2877, "step": 97460 }, { "epoch": 0.1968955667691512, "grad_norm": 869.8458251953125, "learning_rate": 9.727933708480474e-06, "loss": 32.254, "step": 97470 }, { "epoch": 0.19691576740183503, "grad_norm": 202.1588134765625, "learning_rate": 9.727820121361815e-06, "loss": 21.9149, "step": 97480 }, { "epoch": 0.19693596803451885, "grad_norm": 107.15644073486328, "learning_rate": 9.727706511200335e-06, "loss": 17.4516, "step": 97490 }, { "epoch": 0.19695616866720267, "grad_norm": 553.6289672851562, "learning_rate": 9.727592877996585e-06, "loss": 38.9305, "step": 97500 }, { "epoch": 0.19697636929988646, "grad_norm": 92.82726287841797, "learning_rate": 9.727479221751122e-06, "loss": 17.9001, "step": 97510 }, { "epoch": 0.19699656993257028, "grad_norm": 272.8840026855469, "learning_rate": 9.727365542464498e-06, "loss": 52.1301, "step": 97520 }, { "epoch": 0.1970167705652541, "grad_norm": 371.44952392578125, "learning_rate": 9.727251840137269e-06, "loss": 31.3395, "step": 97530 }, { "epoch": 0.19703697119793792, "grad_norm": 74.12994384765625, "learning_rate": 9.727138114769986e-06, "loss": 17.171, "step": 97540 }, { "epoch": 0.19705717183062174, "grad_norm": 356.3588562011719, "learning_rate": 9.727024366363208e-06, "loss": 24.0116, "step": 97550 }, { "epoch": 0.19707737246330556, "grad_norm": 347.5828552246094, "learning_rate": 9.726910594917482e-06, "loss": 17.8262, "step": 97560 }, { "epoch": 0.19709757309598935, "grad_norm": 413.9042663574219, "learning_rate": 9.726796800433371e-06, "loss": 24.5151, "step": 97570 }, { "epoch": 0.19711777372867317, "grad_norm": 250.22528076171875, "learning_rate": 9.726682982911423e-06, "loss": 43.6541, "step": 97580 }, { "epoch": 0.197137974361357, "grad_norm": 425.1312561035156, "learning_rate": 9.726569142352197e-06, "loss": 22.2498, "step": 97590 }, { "epoch": 0.1971581749940408, "grad_norm": 361.1580810546875, "learning_rate": 9.726455278756249e-06, "loss": 38.933, "step": 97600 }, { "epoch": 0.19717837562672463, "grad_norm": 290.5803527832031, "learning_rate": 9.726341392124127e-06, "loss": 14.7052, "step": 97610 }, { "epoch": 0.19719857625940845, "grad_norm": 260.94561767578125, "learning_rate": 9.726227482456391e-06, "loss": 17.2065, "step": 97620 }, { "epoch": 0.19721877689209227, "grad_norm": 611.4354858398438, "learning_rate": 9.726113549753597e-06, "loss": 31.4418, "step": 97630 }, { "epoch": 0.19723897752477607, "grad_norm": 138.56640625, "learning_rate": 9.725999594016298e-06, "loss": 21.758, "step": 97640 }, { "epoch": 0.19725917815745989, "grad_norm": 1559.7120361328125, "learning_rate": 9.72588561524505e-06, "loss": 43.7294, "step": 97650 }, { "epoch": 0.1972793787901437, "grad_norm": 171.50360107421875, "learning_rate": 9.725771613440408e-06, "loss": 29.2016, "step": 97660 }, { "epoch": 0.19729957942282753, "grad_norm": 425.60235595703125, "learning_rate": 9.72565758860293e-06, "loss": 17.8847, "step": 97670 }, { "epoch": 0.19731978005551135, "grad_norm": 903.0280151367188, "learning_rate": 9.725543540733168e-06, "loss": 30.4499, "step": 97680 }, { "epoch": 0.19733998068819517, "grad_norm": 136.61473083496094, "learning_rate": 9.725429469831682e-06, "loss": 29.8291, "step": 97690 }, { "epoch": 0.19736018132087896, "grad_norm": 278.9027404785156, "learning_rate": 9.725315375899025e-06, "loss": 15.9614, "step": 97700 }, { "epoch": 0.19738038195356278, "grad_norm": 378.0606689453125, "learning_rate": 9.725201258935755e-06, "loss": 14.2585, "step": 97710 }, { "epoch": 0.1974005825862466, "grad_norm": 340.623046875, "learning_rate": 9.725087118942425e-06, "loss": 16.6017, "step": 97720 }, { "epoch": 0.19742078321893042, "grad_norm": 5.402549743652344, "learning_rate": 9.724972955919594e-06, "loss": 17.2133, "step": 97730 }, { "epoch": 0.19744098385161424, "grad_norm": 191.53790283203125, "learning_rate": 9.72485876986782e-06, "loss": 17.5356, "step": 97740 }, { "epoch": 0.19746118448429806, "grad_norm": 0.0, "learning_rate": 9.724744560787655e-06, "loss": 21.2057, "step": 97750 }, { "epoch": 0.19748138511698185, "grad_norm": 262.20660400390625, "learning_rate": 9.724630328679658e-06, "loss": 21.7272, "step": 97760 }, { "epoch": 0.19750158574966567, "grad_norm": 343.0996398925781, "learning_rate": 9.724516073544388e-06, "loss": 24.6976, "step": 97770 }, { "epoch": 0.1975217863823495, "grad_norm": 243.66822814941406, "learning_rate": 9.724401795382398e-06, "loss": 13.0549, "step": 97780 }, { "epoch": 0.1975419870150333, "grad_norm": 95.13641357421875, "learning_rate": 9.724287494194247e-06, "loss": 18.2958, "step": 97790 }, { "epoch": 0.19756218764771713, "grad_norm": 88.34515380859375, "learning_rate": 9.724173169980492e-06, "loss": 28.6988, "step": 97800 }, { "epoch": 0.19758238828040095, "grad_norm": 142.56207275390625, "learning_rate": 9.72405882274169e-06, "loss": 15.4086, "step": 97810 }, { "epoch": 0.19760258891308477, "grad_norm": 65.24618530273438, "learning_rate": 9.723944452478398e-06, "loss": 20.7329, "step": 97820 }, { "epoch": 0.19762278954576856, "grad_norm": 438.9810791015625, "learning_rate": 9.723830059191173e-06, "loss": 21.2783, "step": 97830 }, { "epoch": 0.19764299017845238, "grad_norm": 284.0565490722656, "learning_rate": 9.723715642880574e-06, "loss": 21.2488, "step": 97840 }, { "epoch": 0.1976631908111362, "grad_norm": 70.16313171386719, "learning_rate": 9.723601203547158e-06, "loss": 16.3742, "step": 97850 }, { "epoch": 0.19768339144382002, "grad_norm": 172.20765686035156, "learning_rate": 9.723486741191482e-06, "loss": 24.458, "step": 97860 }, { "epoch": 0.19770359207650384, "grad_norm": 662.7827758789062, "learning_rate": 9.723372255814105e-06, "loss": 33.3773, "step": 97870 }, { "epoch": 0.19772379270918766, "grad_norm": 378.53338623046875, "learning_rate": 9.723257747415584e-06, "loss": 24.7305, "step": 97880 }, { "epoch": 0.19774399334187145, "grad_norm": 291.2622375488281, "learning_rate": 9.723143215996479e-06, "loss": 34.4995, "step": 97890 }, { "epoch": 0.19776419397455527, "grad_norm": 152.14317321777344, "learning_rate": 9.723028661557345e-06, "loss": 16.3774, "step": 97900 }, { "epoch": 0.1977843946072391, "grad_norm": 318.4854736328125, "learning_rate": 9.722914084098745e-06, "loss": 38.9058, "step": 97910 }, { "epoch": 0.19780459523992291, "grad_norm": 78.7882308959961, "learning_rate": 9.722799483621232e-06, "loss": 16.474, "step": 97920 }, { "epoch": 0.19782479587260673, "grad_norm": 304.7064514160156, "learning_rate": 9.722684860125367e-06, "loss": 14.3633, "step": 97930 }, { "epoch": 0.19784499650529055, "grad_norm": 202.84268188476562, "learning_rate": 9.72257021361171e-06, "loss": 17.2395, "step": 97940 }, { "epoch": 0.19786519713797437, "grad_norm": 40.743778228759766, "learning_rate": 9.722455544080818e-06, "loss": 20.9886, "step": 97950 }, { "epoch": 0.19788539777065817, "grad_norm": 36.47918701171875, "learning_rate": 9.72234085153325e-06, "loss": 24.6554, "step": 97960 }, { "epoch": 0.197905598403342, "grad_norm": 181.87567138671875, "learning_rate": 9.722226135969565e-06, "loss": 21.0774, "step": 97970 }, { "epoch": 0.1979257990360258, "grad_norm": 532.9088134765625, "learning_rate": 9.722111397390325e-06, "loss": 22.104, "step": 97980 }, { "epoch": 0.19794599966870963, "grad_norm": 474.55511474609375, "learning_rate": 9.721996635796085e-06, "loss": 25.0127, "step": 97990 }, { "epoch": 0.19796620030139345, "grad_norm": 324.6297912597656, "learning_rate": 9.721881851187406e-06, "loss": 37.688, "step": 98000 }, { "epoch": 0.19798640093407727, "grad_norm": 250.3404541015625, "learning_rate": 9.721767043564848e-06, "loss": 29.4403, "step": 98010 }, { "epoch": 0.19800660156676106, "grad_norm": 393.2076721191406, "learning_rate": 9.72165221292897e-06, "loss": 20.1927, "step": 98020 }, { "epoch": 0.19802680219944488, "grad_norm": 450.0534362792969, "learning_rate": 9.721537359280332e-06, "loss": 22.8202, "step": 98030 }, { "epoch": 0.1980470028321287, "grad_norm": 415.8062438964844, "learning_rate": 9.721422482619493e-06, "loss": 37.255, "step": 98040 }, { "epoch": 0.19806720346481252, "grad_norm": 506.333251953125, "learning_rate": 9.721307582947014e-06, "loss": 24.6032, "step": 98050 }, { "epoch": 0.19808740409749634, "grad_norm": 183.85205078125, "learning_rate": 9.721192660263454e-06, "loss": 21.5878, "step": 98060 }, { "epoch": 0.19810760473018016, "grad_norm": 253.01136779785156, "learning_rate": 9.721077714569374e-06, "loss": 16.4773, "step": 98070 }, { "epoch": 0.19812780536286395, "grad_norm": 424.3438415527344, "learning_rate": 9.720962745865334e-06, "loss": 26.9662, "step": 98080 }, { "epoch": 0.19814800599554777, "grad_norm": 500.70458984375, "learning_rate": 9.720847754151894e-06, "loss": 18.2547, "step": 98090 }, { "epoch": 0.1981682066282316, "grad_norm": 171.4901580810547, "learning_rate": 9.720732739429614e-06, "loss": 40.6336, "step": 98100 }, { "epoch": 0.1981884072609154, "grad_norm": 454.698486328125, "learning_rate": 9.720617701699056e-06, "loss": 30.5316, "step": 98110 }, { "epoch": 0.19820860789359923, "grad_norm": 318.5708923339844, "learning_rate": 9.72050264096078e-06, "loss": 30.5645, "step": 98120 }, { "epoch": 0.19822880852628305, "grad_norm": 139.35525512695312, "learning_rate": 9.720387557215344e-06, "loss": 13.1828, "step": 98130 }, { "epoch": 0.19824900915896687, "grad_norm": 33.87257766723633, "learning_rate": 9.720272450463315e-06, "loss": 17.3278, "step": 98140 }, { "epoch": 0.19826920979165066, "grad_norm": 70.30767822265625, "learning_rate": 9.72015732070525e-06, "loss": 12.8659, "step": 98150 }, { "epoch": 0.19828941042433448, "grad_norm": 252.4209442138672, "learning_rate": 9.72004216794171e-06, "loss": 38.9927, "step": 98160 }, { "epoch": 0.1983096110570183, "grad_norm": 288.0030212402344, "learning_rate": 9.719926992173257e-06, "loss": 24.6436, "step": 98170 }, { "epoch": 0.19832981168970212, "grad_norm": 20.311687469482422, "learning_rate": 9.71981179340045e-06, "loss": 26.3239, "step": 98180 }, { "epoch": 0.19835001232238594, "grad_norm": 749.3348388671875, "learning_rate": 9.719696571623857e-06, "loss": 29.5786, "step": 98190 }, { "epoch": 0.19837021295506976, "grad_norm": 319.80419921875, "learning_rate": 9.719581326844033e-06, "loss": 11.6144, "step": 98200 }, { "epoch": 0.19839041358775356, "grad_norm": 158.99436950683594, "learning_rate": 9.719466059061542e-06, "loss": 14.1043, "step": 98210 }, { "epoch": 0.19841061422043738, "grad_norm": 7.4780731201171875, "learning_rate": 9.719350768276947e-06, "loss": 16.6632, "step": 98220 }, { "epoch": 0.1984308148531212, "grad_norm": 301.0854187011719, "learning_rate": 9.719235454490807e-06, "loss": 26.7407, "step": 98230 }, { "epoch": 0.19845101548580502, "grad_norm": 395.88421630859375, "learning_rate": 9.719120117703688e-06, "loss": 12.5527, "step": 98240 }, { "epoch": 0.19847121611848884, "grad_norm": 423.3048095703125, "learning_rate": 9.719004757916149e-06, "loss": 18.9005, "step": 98250 }, { "epoch": 0.19849141675117266, "grad_norm": 354.8130187988281, "learning_rate": 9.718889375128752e-06, "loss": 18.7664, "step": 98260 }, { "epoch": 0.19851161738385648, "grad_norm": 139.29637145996094, "learning_rate": 9.71877396934206e-06, "loss": 11.9694, "step": 98270 }, { "epoch": 0.19853181801654027, "grad_norm": 598.0411376953125, "learning_rate": 9.718658540556638e-06, "loss": 22.0937, "step": 98280 }, { "epoch": 0.1985520186492241, "grad_norm": 185.76893615722656, "learning_rate": 9.718543088773047e-06, "loss": 22.0584, "step": 98290 }, { "epoch": 0.1985722192819079, "grad_norm": 377.5035400390625, "learning_rate": 9.718427613991848e-06, "loss": 38.5457, "step": 98300 }, { "epoch": 0.19859241991459173, "grad_norm": 459.6527404785156, "learning_rate": 9.718312116213604e-06, "loss": 26.7324, "step": 98310 }, { "epoch": 0.19861262054727555, "grad_norm": 205.0708465576172, "learning_rate": 9.71819659543888e-06, "loss": 26.1898, "step": 98320 }, { "epoch": 0.19863282117995937, "grad_norm": 301.8660583496094, "learning_rate": 9.71808105166824e-06, "loss": 30.3056, "step": 98330 }, { "epoch": 0.19865302181264316, "grad_norm": 137.33424377441406, "learning_rate": 9.717965484902244e-06, "loss": 25.9636, "step": 98340 }, { "epoch": 0.19867322244532698, "grad_norm": 367.8601989746094, "learning_rate": 9.717849895141455e-06, "loss": 24.0928, "step": 98350 }, { "epoch": 0.1986934230780108, "grad_norm": 0.0, "learning_rate": 9.717734282386439e-06, "loss": 22.1945, "step": 98360 }, { "epoch": 0.19871362371069462, "grad_norm": 265.67181396484375, "learning_rate": 9.717618646637758e-06, "loss": 8.3731, "step": 98370 }, { "epoch": 0.19873382434337844, "grad_norm": 268.3922424316406, "learning_rate": 9.717502987895975e-06, "loss": 19.6438, "step": 98380 }, { "epoch": 0.19875402497606226, "grad_norm": 747.2258911132812, "learning_rate": 9.717387306161657e-06, "loss": 31.5013, "step": 98390 }, { "epoch": 0.19877422560874605, "grad_norm": 472.0653076171875, "learning_rate": 9.717271601435363e-06, "loss": 26.4325, "step": 98400 }, { "epoch": 0.19879442624142987, "grad_norm": 322.92938232421875, "learning_rate": 9.71715587371766e-06, "loss": 20.7588, "step": 98410 }, { "epoch": 0.1988146268741137, "grad_norm": 218.5413818359375, "learning_rate": 9.717040123009111e-06, "loss": 17.9819, "step": 98420 }, { "epoch": 0.1988348275067975, "grad_norm": 419.1317443847656, "learning_rate": 9.716924349310281e-06, "loss": 29.5121, "step": 98430 }, { "epoch": 0.19885502813948133, "grad_norm": 142.14505004882812, "learning_rate": 9.716808552621735e-06, "loss": 10.6915, "step": 98440 }, { "epoch": 0.19887522877216515, "grad_norm": 624.1358642578125, "learning_rate": 9.716692732944036e-06, "loss": 35.2713, "step": 98450 }, { "epoch": 0.19889542940484897, "grad_norm": 231.04356384277344, "learning_rate": 9.716576890277747e-06, "loss": 35.0937, "step": 98460 }, { "epoch": 0.19891563003753276, "grad_norm": 815.4649047851562, "learning_rate": 9.716461024623437e-06, "loss": 40.0061, "step": 98470 }, { "epoch": 0.19893583067021658, "grad_norm": 533.4913940429688, "learning_rate": 9.716345135981663e-06, "loss": 43.5685, "step": 98480 }, { "epoch": 0.1989560313029004, "grad_norm": 461.6580505371094, "learning_rate": 9.716229224353e-06, "loss": 19.9491, "step": 98490 }, { "epoch": 0.19897623193558422, "grad_norm": 209.63107299804688, "learning_rate": 9.716113289738005e-06, "loss": 25.9622, "step": 98500 }, { "epoch": 0.19899643256826804, "grad_norm": 311.4048156738281, "learning_rate": 9.715997332137248e-06, "loss": 22.4662, "step": 98510 }, { "epoch": 0.19901663320095186, "grad_norm": 190.1439208984375, "learning_rate": 9.71588135155129e-06, "loss": 19.4421, "step": 98520 }, { "epoch": 0.19903683383363566, "grad_norm": 349.7489318847656, "learning_rate": 9.7157653479807e-06, "loss": 14.1321, "step": 98530 }, { "epoch": 0.19905703446631948, "grad_norm": 226.54476928710938, "learning_rate": 9.71564932142604e-06, "loss": 18.1201, "step": 98540 }, { "epoch": 0.1990772350990033, "grad_norm": 186.29421997070312, "learning_rate": 9.715533271887877e-06, "loss": 24.1411, "step": 98550 }, { "epoch": 0.19909743573168712, "grad_norm": 441.80401611328125, "learning_rate": 9.715417199366778e-06, "loss": 22.3155, "step": 98560 }, { "epoch": 0.19911763636437094, "grad_norm": 66.22430419921875, "learning_rate": 9.715301103863306e-06, "loss": 16.0716, "step": 98570 }, { "epoch": 0.19913783699705476, "grad_norm": 103.42139434814453, "learning_rate": 9.71518498537803e-06, "loss": 21.8337, "step": 98580 }, { "epoch": 0.19915803762973858, "grad_norm": 325.03289794921875, "learning_rate": 9.715068843911513e-06, "loss": 19.0939, "step": 98590 }, { "epoch": 0.19917823826242237, "grad_norm": 133.36276245117188, "learning_rate": 9.714952679464324e-06, "loss": 13.8354, "step": 98600 }, { "epoch": 0.1991984388951062, "grad_norm": 1028.3541259765625, "learning_rate": 9.714836492037025e-06, "loss": 27.1384, "step": 98610 }, { "epoch": 0.19921863952779, "grad_norm": 294.4929504394531, "learning_rate": 9.714720281630186e-06, "loss": 17.0999, "step": 98620 }, { "epoch": 0.19923884016047383, "grad_norm": 439.0268859863281, "learning_rate": 9.714604048244372e-06, "loss": 19.3045, "step": 98630 }, { "epoch": 0.19925904079315765, "grad_norm": 39.686763763427734, "learning_rate": 9.714487791880151e-06, "loss": 14.3688, "step": 98640 }, { "epoch": 0.19927924142584147, "grad_norm": 405.557373046875, "learning_rate": 9.714371512538088e-06, "loss": 29.7627, "step": 98650 }, { "epoch": 0.19929944205852526, "grad_norm": 479.8412780761719, "learning_rate": 9.714255210218747e-06, "loss": 16.1344, "step": 98660 }, { "epoch": 0.19931964269120908, "grad_norm": 294.318603515625, "learning_rate": 9.7141388849227e-06, "loss": 29.7241, "step": 98670 }, { "epoch": 0.1993398433238929, "grad_norm": 240.9055633544922, "learning_rate": 9.714022536650513e-06, "loss": 25.0006, "step": 98680 }, { "epoch": 0.19936004395657672, "grad_norm": 121.38719940185547, "learning_rate": 9.713906165402751e-06, "loss": 35.7723, "step": 98690 }, { "epoch": 0.19938024458926054, "grad_norm": 206.0741729736328, "learning_rate": 9.713789771179983e-06, "loss": 22.2205, "step": 98700 }, { "epoch": 0.19940044522194436, "grad_norm": 460.3736572265625, "learning_rate": 9.713673353982773e-06, "loss": 22.2774, "step": 98710 }, { "epoch": 0.19942064585462815, "grad_norm": 308.9148254394531, "learning_rate": 9.713556913811693e-06, "loss": 18.269, "step": 98720 }, { "epoch": 0.19944084648731197, "grad_norm": 88.11115264892578, "learning_rate": 9.713440450667307e-06, "loss": 14.1615, "step": 98730 }, { "epoch": 0.1994610471199958, "grad_norm": 86.2733383178711, "learning_rate": 9.713323964550185e-06, "loss": 16.2301, "step": 98740 }, { "epoch": 0.1994812477526796, "grad_norm": 333.8813781738281, "learning_rate": 9.713207455460893e-06, "loss": 22.5134, "step": 98750 }, { "epoch": 0.19950144838536343, "grad_norm": 324.0524597167969, "learning_rate": 9.713090923399999e-06, "loss": 16.3721, "step": 98760 }, { "epoch": 0.19952164901804725, "grad_norm": 976.88427734375, "learning_rate": 9.712974368368072e-06, "loss": 34.2582, "step": 98770 }, { "epoch": 0.19954184965073107, "grad_norm": 660.9552001953125, "learning_rate": 9.71285779036568e-06, "loss": 44.675, "step": 98780 }, { "epoch": 0.19956205028341487, "grad_norm": 493.498291015625, "learning_rate": 9.71274118939339e-06, "loss": 17.2781, "step": 98790 }, { "epoch": 0.19958225091609869, "grad_norm": 277.3286437988281, "learning_rate": 9.712624565451772e-06, "loss": 29.4594, "step": 98800 }, { "epoch": 0.1996024515487825, "grad_norm": 365.8147277832031, "learning_rate": 9.712507918541391e-06, "loss": 20.8043, "step": 98810 }, { "epoch": 0.19962265218146633, "grad_norm": 280.39227294921875, "learning_rate": 9.712391248662821e-06, "loss": 16.1489, "step": 98820 }, { "epoch": 0.19964285281415015, "grad_norm": 232.89385986328125, "learning_rate": 9.712274555816626e-06, "loss": 22.03, "step": 98830 }, { "epoch": 0.19966305344683397, "grad_norm": 183.40164184570312, "learning_rate": 9.712157840003377e-06, "loss": 15.6782, "step": 98840 }, { "epoch": 0.19968325407951776, "grad_norm": 436.7102355957031, "learning_rate": 9.71204110122364e-06, "loss": 57.8466, "step": 98850 }, { "epoch": 0.19970345471220158, "grad_norm": 169.69039916992188, "learning_rate": 9.71192433947799e-06, "loss": 21.5869, "step": 98860 }, { "epoch": 0.1997236553448854, "grad_norm": 1089.2491455078125, "learning_rate": 9.71180755476699e-06, "loss": 31.2453, "step": 98870 }, { "epoch": 0.19974385597756922, "grad_norm": 144.77748107910156, "learning_rate": 9.711690747091211e-06, "loss": 16.7495, "step": 98880 }, { "epoch": 0.19976405661025304, "grad_norm": 162.23171997070312, "learning_rate": 9.711573916451224e-06, "loss": 12.5862, "step": 98890 }, { "epoch": 0.19978425724293686, "grad_norm": 113.84131622314453, "learning_rate": 9.711457062847596e-06, "loss": 24.189, "step": 98900 }, { "epoch": 0.19980445787562068, "grad_norm": 101.76972961425781, "learning_rate": 9.7113401862809e-06, "loss": 17.4471, "step": 98910 }, { "epoch": 0.19982465850830447, "grad_norm": 113.3838119506836, "learning_rate": 9.7112232867517e-06, "loss": 25.4606, "step": 98920 }, { "epoch": 0.1998448591409883, "grad_norm": 270.42425537109375, "learning_rate": 9.711106364260572e-06, "loss": 15.6557, "step": 98930 }, { "epoch": 0.1998650597736721, "grad_norm": 438.3560791015625, "learning_rate": 9.71098941880808e-06, "loss": 12.8353, "step": 98940 }, { "epoch": 0.19988526040635593, "grad_norm": 269.7732238769531, "learning_rate": 9.7108724503948e-06, "loss": 17.0326, "step": 98950 }, { "epoch": 0.19990546103903975, "grad_norm": 329.3045654296875, "learning_rate": 9.710755459021297e-06, "loss": 21.4894, "step": 98960 }, { "epoch": 0.19992566167172357, "grad_norm": 626.2535400390625, "learning_rate": 9.710638444688146e-06, "loss": 19.8331, "step": 98970 }, { "epoch": 0.19994586230440736, "grad_norm": 705.7029418945312, "learning_rate": 9.71052140739591e-06, "loss": 10.5097, "step": 98980 }, { "epoch": 0.19996606293709118, "grad_norm": 291.2868957519531, "learning_rate": 9.710404347145168e-06, "loss": 17.7347, "step": 98990 }, { "epoch": 0.199986263569775, "grad_norm": 269.8981018066406, "learning_rate": 9.710287263936485e-06, "loss": 17.2337, "step": 99000 }, { "epoch": 0.20000646420245882, "grad_norm": 115.70751190185547, "learning_rate": 9.710170157770434e-06, "loss": 27.0905, "step": 99010 }, { "epoch": 0.20002666483514264, "grad_norm": 549.883544921875, "learning_rate": 9.710053028647583e-06, "loss": 44.8836, "step": 99020 }, { "epoch": 0.20004686546782646, "grad_norm": 402.4552307128906, "learning_rate": 9.709935876568506e-06, "loss": 16.6749, "step": 99030 }, { "epoch": 0.20006706610051025, "grad_norm": 294.47607421875, "learning_rate": 9.709818701533774e-06, "loss": 21.639, "step": 99040 }, { "epoch": 0.20008726673319407, "grad_norm": 355.9969787597656, "learning_rate": 9.709701503543954e-06, "loss": 12.0298, "step": 99050 }, { "epoch": 0.2001074673658779, "grad_norm": 520.0482177734375, "learning_rate": 9.709584282599623e-06, "loss": 18.5309, "step": 99060 }, { "epoch": 0.20012766799856171, "grad_norm": 529.7421875, "learning_rate": 9.709467038701348e-06, "loss": 26.9491, "step": 99070 }, { "epoch": 0.20014786863124553, "grad_norm": 312.5910339355469, "learning_rate": 9.709349771849701e-06, "loss": 14.5761, "step": 99080 }, { "epoch": 0.20016806926392935, "grad_norm": 213.33078002929688, "learning_rate": 9.709232482045254e-06, "loss": 22.411, "step": 99090 }, { "epoch": 0.20018826989661317, "grad_norm": 157.93893432617188, "learning_rate": 9.709115169288582e-06, "loss": 28.6149, "step": 99100 }, { "epoch": 0.20020847052929697, "grad_norm": 916.6813354492188, "learning_rate": 9.708997833580251e-06, "loss": 42.1374, "step": 99110 }, { "epoch": 0.2002286711619808, "grad_norm": 193.66603088378906, "learning_rate": 9.708880474920836e-06, "loss": 25.3623, "step": 99120 }, { "epoch": 0.2002488717946646, "grad_norm": 555.9515991210938, "learning_rate": 9.708763093310911e-06, "loss": 22.9251, "step": 99130 }, { "epoch": 0.20026907242734843, "grad_norm": 527.6494140625, "learning_rate": 9.708645688751043e-06, "loss": 33.6146, "step": 99140 }, { "epoch": 0.20028927306003225, "grad_norm": 449.22235107421875, "learning_rate": 9.70852826124181e-06, "loss": 27.3952, "step": 99150 }, { "epoch": 0.20030947369271607, "grad_norm": 373.18133544921875, "learning_rate": 9.70841081078378e-06, "loss": 21.8619, "step": 99160 }, { "epoch": 0.20032967432539986, "grad_norm": 154.5146484375, "learning_rate": 9.708293337377525e-06, "loss": 18.326, "step": 99170 }, { "epoch": 0.20034987495808368, "grad_norm": 413.056396484375, "learning_rate": 9.70817584102362e-06, "loss": 19.0279, "step": 99180 }, { "epoch": 0.2003700755907675, "grad_norm": 274.89739990234375, "learning_rate": 9.70805832172264e-06, "loss": 27.4357, "step": 99190 }, { "epoch": 0.20039027622345132, "grad_norm": 471.5213317871094, "learning_rate": 9.707940779475151e-06, "loss": 28.8472, "step": 99200 }, { "epoch": 0.20041047685613514, "grad_norm": 434.703125, "learning_rate": 9.707823214281733e-06, "loss": 31.5052, "step": 99210 }, { "epoch": 0.20043067748881896, "grad_norm": 228.05068969726562, "learning_rate": 9.707705626142952e-06, "loss": 16.7138, "step": 99220 }, { "epoch": 0.20045087812150278, "grad_norm": 536.454833984375, "learning_rate": 9.707588015059387e-06, "loss": 19.5309, "step": 99230 }, { "epoch": 0.20047107875418657, "grad_norm": 446.0665588378906, "learning_rate": 9.707470381031608e-06, "loss": 34.802, "step": 99240 }, { "epoch": 0.2004912793868704, "grad_norm": 312.688232421875, "learning_rate": 9.70735272406019e-06, "loss": 30.9383, "step": 99250 }, { "epoch": 0.2005114800195542, "grad_norm": 358.7753601074219, "learning_rate": 9.707235044145707e-06, "loss": 13.7764, "step": 99260 }, { "epoch": 0.20053168065223803, "grad_norm": 195.270263671875, "learning_rate": 9.707117341288728e-06, "loss": 18.0138, "step": 99270 }, { "epoch": 0.20055188128492185, "grad_norm": 348.90325927734375, "learning_rate": 9.706999615489833e-06, "loss": 22.9395, "step": 99280 }, { "epoch": 0.20057208191760567, "grad_norm": 209.95248413085938, "learning_rate": 9.70688186674959e-06, "loss": 23.2172, "step": 99290 }, { "epoch": 0.20059228255028946, "grad_norm": 405.9944152832031, "learning_rate": 9.706764095068579e-06, "loss": 22.3916, "step": 99300 }, { "epoch": 0.20061248318297328, "grad_norm": 191.47349548339844, "learning_rate": 9.706646300447369e-06, "loss": 21.2667, "step": 99310 }, { "epoch": 0.2006326838156571, "grad_norm": 3292.76416015625, "learning_rate": 9.706528482886535e-06, "loss": 37.0352, "step": 99320 }, { "epoch": 0.20065288444834092, "grad_norm": 374.0223388671875, "learning_rate": 9.706410642386653e-06, "loss": 22.9794, "step": 99330 }, { "epoch": 0.20067308508102474, "grad_norm": 315.8014831542969, "learning_rate": 9.706292778948297e-06, "loss": 26.9426, "step": 99340 }, { "epoch": 0.20069328571370856, "grad_norm": 144.34873962402344, "learning_rate": 9.706174892572038e-06, "loss": 19.3427, "step": 99350 }, { "epoch": 0.20071348634639236, "grad_norm": 510.6126403808594, "learning_rate": 9.706056983258456e-06, "loss": 21.2325, "step": 99360 }, { "epoch": 0.20073368697907618, "grad_norm": 279.7928771972656, "learning_rate": 9.705939051008124e-06, "loss": 8.1298, "step": 99370 }, { "epoch": 0.20075388761176, "grad_norm": 387.5137634277344, "learning_rate": 9.705821095821612e-06, "loss": 15.2213, "step": 99380 }, { "epoch": 0.20077408824444382, "grad_norm": 292.31317138671875, "learning_rate": 9.705703117699501e-06, "loss": 27.0379, "step": 99390 }, { "epoch": 0.20079428887712764, "grad_norm": 353.5614929199219, "learning_rate": 9.705585116642364e-06, "loss": 24.0259, "step": 99400 }, { "epoch": 0.20081448950981146, "grad_norm": 206.29518127441406, "learning_rate": 9.705467092650775e-06, "loss": 28.2081, "step": 99410 }, { "epoch": 0.20083469014249528, "grad_norm": 217.4748077392578, "learning_rate": 9.705349045725313e-06, "loss": 20.6052, "step": 99420 }, { "epoch": 0.20085489077517907, "grad_norm": 113.75446319580078, "learning_rate": 9.705230975866547e-06, "loss": 23.9714, "step": 99430 }, { "epoch": 0.2008750914078629, "grad_norm": 257.2599792480469, "learning_rate": 9.705112883075055e-06, "loss": 11.9611, "step": 99440 }, { "epoch": 0.2008952920405467, "grad_norm": 164.62890625, "learning_rate": 9.704994767351417e-06, "loss": 16.2368, "step": 99450 }, { "epoch": 0.20091549267323053, "grad_norm": 646.8043212890625, "learning_rate": 9.704876628696202e-06, "loss": 43.7545, "step": 99460 }, { "epoch": 0.20093569330591435, "grad_norm": 81.92411804199219, "learning_rate": 9.70475846710999e-06, "loss": 32.4631, "step": 99470 }, { "epoch": 0.20095589393859817, "grad_norm": 94.27066040039062, "learning_rate": 9.704640282593359e-06, "loss": 19.8909, "step": 99480 }, { "epoch": 0.20097609457128196, "grad_norm": 374.1415710449219, "learning_rate": 9.704522075146878e-06, "loss": 30.131, "step": 99490 }, { "epoch": 0.20099629520396578, "grad_norm": 179.7720184326172, "learning_rate": 9.704403844771128e-06, "loss": 24.925, "step": 99500 }, { "epoch": 0.2010164958366496, "grad_norm": 143.40892028808594, "learning_rate": 9.704285591466685e-06, "loss": 19.6643, "step": 99510 }, { "epoch": 0.20103669646933342, "grad_norm": 213.973388671875, "learning_rate": 9.704167315234124e-06, "loss": 55.64, "step": 99520 }, { "epoch": 0.20105689710201724, "grad_norm": 76.79525756835938, "learning_rate": 9.704049016074022e-06, "loss": 39.9364, "step": 99530 }, { "epoch": 0.20107709773470106, "grad_norm": 285.5968322753906, "learning_rate": 9.703930693986956e-06, "loss": 14.2452, "step": 99540 }, { "epoch": 0.20109729836738488, "grad_norm": 104.43171691894531, "learning_rate": 9.703812348973501e-06, "loss": 33.9176, "step": 99550 }, { "epoch": 0.20111749900006867, "grad_norm": 393.65924072265625, "learning_rate": 9.703693981034236e-06, "loss": 19.5273, "step": 99560 }, { "epoch": 0.2011376996327525, "grad_norm": 223.10870361328125, "learning_rate": 9.703575590169738e-06, "loss": 12.441, "step": 99570 }, { "epoch": 0.2011579002654363, "grad_norm": 66.7582778930664, "learning_rate": 9.703457176380581e-06, "loss": 33.0507, "step": 99580 }, { "epoch": 0.20117810089812013, "grad_norm": 139.29322814941406, "learning_rate": 9.703338739667347e-06, "loss": 24.3839, "step": 99590 }, { "epoch": 0.20119830153080395, "grad_norm": 247.2187957763672, "learning_rate": 9.703220280030607e-06, "loss": 30.1304, "step": 99600 }, { "epoch": 0.20121850216348777, "grad_norm": 0.0, "learning_rate": 9.703101797470944e-06, "loss": 15.4728, "step": 99610 }, { "epoch": 0.20123870279617156, "grad_norm": 105.39654541015625, "learning_rate": 9.702983291988934e-06, "loss": 45.9757, "step": 99620 }, { "epoch": 0.20125890342885538, "grad_norm": 77.55934143066406, "learning_rate": 9.702864763585152e-06, "loss": 14.9886, "step": 99630 }, { "epoch": 0.2012791040615392, "grad_norm": 386.4718933105469, "learning_rate": 9.702746212260179e-06, "loss": 28.3126, "step": 99640 }, { "epoch": 0.20129930469422302, "grad_norm": 405.66888427734375, "learning_rate": 9.70262763801459e-06, "loss": 17.9073, "step": 99650 }, { "epoch": 0.20131950532690684, "grad_norm": 203.88990783691406, "learning_rate": 9.702509040848964e-06, "loss": 14.26, "step": 99660 }, { "epoch": 0.20133970595959066, "grad_norm": 238.68861389160156, "learning_rate": 9.70239042076388e-06, "loss": 17.7461, "step": 99670 }, { "epoch": 0.20135990659227446, "grad_norm": 285.1687316894531, "learning_rate": 9.702271777759915e-06, "loss": 14.8119, "step": 99680 }, { "epoch": 0.20138010722495828, "grad_norm": 394.012451171875, "learning_rate": 9.70215311183765e-06, "loss": 21.7798, "step": 99690 }, { "epoch": 0.2014003078576421, "grad_norm": 27.218387603759766, "learning_rate": 9.702034422997658e-06, "loss": 32.2686, "step": 99700 }, { "epoch": 0.20142050849032592, "grad_norm": 189.9946746826172, "learning_rate": 9.701915711240522e-06, "loss": 25.7137, "step": 99710 }, { "epoch": 0.20144070912300974, "grad_norm": 194.99853515625, "learning_rate": 9.70179697656682e-06, "loss": 22.1752, "step": 99720 }, { "epoch": 0.20146090975569356, "grad_norm": 294.8200988769531, "learning_rate": 9.701678218977128e-06, "loss": 16.4786, "step": 99730 }, { "epoch": 0.20148111038837738, "grad_norm": 148.3772735595703, "learning_rate": 9.701559438472026e-06, "loss": 9.0358, "step": 99740 }, { "epoch": 0.20150131102106117, "grad_norm": 171.16970825195312, "learning_rate": 9.701440635052094e-06, "loss": 16.7796, "step": 99750 }, { "epoch": 0.201521511653745, "grad_norm": 381.7772521972656, "learning_rate": 9.701321808717912e-06, "loss": 16.5348, "step": 99760 }, { "epoch": 0.2015417122864288, "grad_norm": 132.0386962890625, "learning_rate": 9.701202959470057e-06, "loss": 23.1716, "step": 99770 }, { "epoch": 0.20156191291911263, "grad_norm": 214.1879425048828, "learning_rate": 9.70108408730911e-06, "loss": 20.5303, "step": 99780 }, { "epoch": 0.20158211355179645, "grad_norm": 367.2514343261719, "learning_rate": 9.700965192235647e-06, "loss": 19.535, "step": 99790 }, { "epoch": 0.20160231418448027, "grad_norm": 67.16075897216797, "learning_rate": 9.700846274250252e-06, "loss": 9.9516, "step": 99800 }, { "epoch": 0.20162251481716406, "grad_norm": 419.40521240234375, "learning_rate": 9.700727333353502e-06, "loss": 14.5154, "step": 99810 }, { "epoch": 0.20164271544984788, "grad_norm": 208.8455047607422, "learning_rate": 9.700608369545976e-06, "loss": 22.0586, "step": 99820 }, { "epoch": 0.2016629160825317, "grad_norm": 3.9294593334198, "learning_rate": 9.700489382828255e-06, "loss": 23.3985, "step": 99830 }, { "epoch": 0.20168311671521552, "grad_norm": 128.4691925048828, "learning_rate": 9.70037037320092e-06, "loss": 22.9115, "step": 99840 }, { "epoch": 0.20170331734789934, "grad_norm": 413.1893005371094, "learning_rate": 9.70025134066455e-06, "loss": 21.6746, "step": 99850 }, { "epoch": 0.20172351798058316, "grad_norm": 27.66179847717285, "learning_rate": 9.700132285219724e-06, "loss": 19.9071, "step": 99860 }, { "epoch": 0.20174371861326698, "grad_norm": 25.85624122619629, "learning_rate": 9.700013206867022e-06, "loss": 25.3441, "step": 99870 }, { "epoch": 0.20176391924595077, "grad_norm": 224.59249877929688, "learning_rate": 9.699894105607028e-06, "loss": 24.5861, "step": 99880 }, { "epoch": 0.2017841198786346, "grad_norm": 31.978776931762695, "learning_rate": 9.69977498144032e-06, "loss": 43.2479, "step": 99890 }, { "epoch": 0.2018043205113184, "grad_norm": 12.322134017944336, "learning_rate": 9.699655834367479e-06, "loss": 20.1651, "step": 99900 }, { "epoch": 0.20182452114400223, "grad_norm": 207.30181884765625, "learning_rate": 9.699536664389084e-06, "loss": 17.7694, "step": 99910 }, { "epoch": 0.20184472177668605, "grad_norm": 460.2311096191406, "learning_rate": 9.699417471505717e-06, "loss": 38.1702, "step": 99920 }, { "epoch": 0.20186492240936987, "grad_norm": 71.38224029541016, "learning_rate": 9.699298255717961e-06, "loss": 19.548, "step": 99930 }, { "epoch": 0.20188512304205367, "grad_norm": 265.2184753417969, "learning_rate": 9.699179017026395e-06, "loss": 22.3463, "step": 99940 }, { "epoch": 0.20190532367473749, "grad_norm": 423.18817138671875, "learning_rate": 9.699059755431599e-06, "loss": 26.3937, "step": 99950 }, { "epoch": 0.2019255243074213, "grad_norm": 241.22601318359375, "learning_rate": 9.698940470934158e-06, "loss": 26.7638, "step": 99960 }, { "epoch": 0.20194572494010513, "grad_norm": 386.8973388671875, "learning_rate": 9.698821163534649e-06, "loss": 27.9189, "step": 99970 }, { "epoch": 0.20196592557278895, "grad_norm": 713.7116088867188, "learning_rate": 9.698701833233654e-06, "loss": 20.9334, "step": 99980 }, { "epoch": 0.20198612620547277, "grad_norm": 130.43789672851562, "learning_rate": 9.69858248003176e-06, "loss": 19.0897, "step": 99990 }, { "epoch": 0.20200632683815656, "grad_norm": 245.17103576660156, "learning_rate": 9.698463103929542e-06, "loss": 21.2309, "step": 100000 }, { "epoch": 0.20202652747084038, "grad_norm": 314.5679931640625, "learning_rate": 9.698343704927586e-06, "loss": 36.1042, "step": 100010 }, { "epoch": 0.2020467281035242, "grad_norm": 577.81201171875, "learning_rate": 9.698224283026473e-06, "loss": 25.5394, "step": 100020 }, { "epoch": 0.20206692873620802, "grad_norm": 0.0, "learning_rate": 9.698104838226783e-06, "loss": 29.0769, "step": 100030 }, { "epoch": 0.20208712936889184, "grad_norm": 221.71168518066406, "learning_rate": 9.697985370529101e-06, "loss": 8.4257, "step": 100040 }, { "epoch": 0.20210733000157566, "grad_norm": 300.45306396484375, "learning_rate": 9.69786587993401e-06, "loss": 14.6654, "step": 100050 }, { "epoch": 0.20212753063425948, "grad_norm": 323.5521240234375, "learning_rate": 9.697746366442087e-06, "loss": 34.0911, "step": 100060 }, { "epoch": 0.20214773126694327, "grad_norm": 536.4998168945312, "learning_rate": 9.69762683005392e-06, "loss": 41.3249, "step": 100070 }, { "epoch": 0.2021679318996271, "grad_norm": 245.37864685058594, "learning_rate": 9.69750727077009e-06, "loss": 26.6861, "step": 100080 }, { "epoch": 0.2021881325323109, "grad_norm": 23.844661712646484, "learning_rate": 9.697387688591178e-06, "loss": 21.8645, "step": 100090 }, { "epoch": 0.20220833316499473, "grad_norm": 220.0467987060547, "learning_rate": 9.697268083517767e-06, "loss": 31.5455, "step": 100100 }, { "epoch": 0.20222853379767855, "grad_norm": 471.7340087890625, "learning_rate": 9.697148455550444e-06, "loss": 18.1641, "step": 100110 }, { "epoch": 0.20224873443036237, "grad_norm": 221.2740478515625, "learning_rate": 9.697028804689788e-06, "loss": 11.9623, "step": 100120 }, { "epoch": 0.20226893506304616, "grad_norm": 512.1837158203125, "learning_rate": 9.696909130936382e-06, "loss": 14.4331, "step": 100130 }, { "epoch": 0.20228913569572998, "grad_norm": 120.90528106689453, "learning_rate": 9.696789434290812e-06, "loss": 13.3736, "step": 100140 }, { "epoch": 0.2023093363284138, "grad_norm": 231.32386779785156, "learning_rate": 9.696669714753658e-06, "loss": 23.3514, "step": 100150 }, { "epoch": 0.20232953696109762, "grad_norm": 603.4599609375, "learning_rate": 9.696549972325509e-06, "loss": 19.9133, "step": 100160 }, { "epoch": 0.20234973759378144, "grad_norm": 198.6626739501953, "learning_rate": 9.696430207006942e-06, "loss": 21.4118, "step": 100170 }, { "epoch": 0.20236993822646526, "grad_norm": 279.544189453125, "learning_rate": 9.696310418798544e-06, "loss": 24.4203, "step": 100180 }, { "epoch": 0.20239013885914908, "grad_norm": 126.50409698486328, "learning_rate": 9.696190607700901e-06, "loss": 16.0397, "step": 100190 }, { "epoch": 0.20241033949183287, "grad_norm": 262.1365966796875, "learning_rate": 9.696070773714592e-06, "loss": 30.8267, "step": 100200 }, { "epoch": 0.2024305401245167, "grad_norm": 196.5624542236328, "learning_rate": 9.695950916840204e-06, "loss": 20.507, "step": 100210 }, { "epoch": 0.20245074075720051, "grad_norm": 444.5268249511719, "learning_rate": 9.695831037078323e-06, "loss": 27.3487, "step": 100220 }, { "epoch": 0.20247094138988433, "grad_norm": 209.4663543701172, "learning_rate": 9.695711134429529e-06, "loss": 42.3472, "step": 100230 }, { "epoch": 0.20249114202256815, "grad_norm": 550.9859619140625, "learning_rate": 9.695591208894408e-06, "loss": 22.1687, "step": 100240 }, { "epoch": 0.20251134265525197, "grad_norm": 227.11026000976562, "learning_rate": 9.695471260473546e-06, "loss": 51.4432, "step": 100250 }, { "epoch": 0.20253154328793577, "grad_norm": 384.5789794921875, "learning_rate": 9.695351289167527e-06, "loss": 23.332, "step": 100260 }, { "epoch": 0.2025517439206196, "grad_norm": 560.181640625, "learning_rate": 9.695231294976935e-06, "loss": 22.877, "step": 100270 }, { "epoch": 0.2025719445533034, "grad_norm": 330.4541320800781, "learning_rate": 9.695111277902353e-06, "loss": 26.3467, "step": 100280 }, { "epoch": 0.20259214518598723, "grad_norm": 270.53961181640625, "learning_rate": 9.69499123794437e-06, "loss": 37.2936, "step": 100290 }, { "epoch": 0.20261234581867105, "grad_norm": 194.23202514648438, "learning_rate": 9.69487117510357e-06, "loss": 22.7556, "step": 100300 }, { "epoch": 0.20263254645135487, "grad_norm": 322.431640625, "learning_rate": 9.694751089380536e-06, "loss": 29.1479, "step": 100310 }, { "epoch": 0.20265274708403866, "grad_norm": 0.0, "learning_rate": 9.694630980775856e-06, "loss": 20.9701, "step": 100320 }, { "epoch": 0.20267294771672248, "grad_norm": 127.05522918701172, "learning_rate": 9.694510849290113e-06, "loss": 29.5523, "step": 100330 }, { "epoch": 0.2026931483494063, "grad_norm": 44.90702819824219, "learning_rate": 9.694390694923893e-06, "loss": 16.2128, "step": 100340 }, { "epoch": 0.20271334898209012, "grad_norm": 297.4532165527344, "learning_rate": 9.694270517677782e-06, "loss": 19.48, "step": 100350 }, { "epoch": 0.20273354961477394, "grad_norm": 375.3468322753906, "learning_rate": 9.694150317552367e-06, "loss": 19.0759, "step": 100360 }, { "epoch": 0.20275375024745776, "grad_norm": 241.880859375, "learning_rate": 9.694030094548233e-06, "loss": 29.7394, "step": 100370 }, { "epoch": 0.20277395088014158, "grad_norm": 150.0443572998047, "learning_rate": 9.693909848665962e-06, "loss": 13.697, "step": 100380 }, { "epoch": 0.20279415151282537, "grad_norm": 199.79251098632812, "learning_rate": 9.693789579906147e-06, "loss": 24.1209, "step": 100390 }, { "epoch": 0.2028143521455092, "grad_norm": 124.97339630126953, "learning_rate": 9.693669288269371e-06, "loss": 13.9262, "step": 100400 }, { "epoch": 0.202834552778193, "grad_norm": 417.5665283203125, "learning_rate": 9.69354897375622e-06, "loss": 26.3442, "step": 100410 }, { "epoch": 0.20285475341087683, "grad_norm": 214.94752502441406, "learning_rate": 9.693428636367279e-06, "loss": 23.3356, "step": 100420 }, { "epoch": 0.20287495404356065, "grad_norm": 235.81387329101562, "learning_rate": 9.693308276103136e-06, "loss": 16.8861, "step": 100430 }, { "epoch": 0.20289515467624447, "grad_norm": 455.8398132324219, "learning_rate": 9.693187892964381e-06, "loss": 35.5196, "step": 100440 }, { "epoch": 0.20291535530892826, "grad_norm": 288.5793151855469, "learning_rate": 9.693067486951595e-06, "loss": 40.5406, "step": 100450 }, { "epoch": 0.20293555594161208, "grad_norm": 105.90095520019531, "learning_rate": 9.692947058065367e-06, "loss": 28.9018, "step": 100460 }, { "epoch": 0.2029557565742959, "grad_norm": 362.96771240234375, "learning_rate": 9.692826606306284e-06, "loss": 34.8058, "step": 100470 }, { "epoch": 0.20297595720697972, "grad_norm": 175.43389892578125, "learning_rate": 9.692706131674935e-06, "loss": 26.5804, "step": 100480 }, { "epoch": 0.20299615783966354, "grad_norm": 365.2595520019531, "learning_rate": 9.692585634171906e-06, "loss": 19.1601, "step": 100490 }, { "epoch": 0.20301635847234736, "grad_norm": 201.9995574951172, "learning_rate": 9.69246511379778e-06, "loss": 18.6526, "step": 100500 }, { "epoch": 0.20303655910503116, "grad_norm": 81.64897918701172, "learning_rate": 9.692344570553152e-06, "loss": 20.8771, "step": 100510 }, { "epoch": 0.20305675973771498, "grad_norm": 274.175537109375, "learning_rate": 9.692224004438603e-06, "loss": 15.7539, "step": 100520 }, { "epoch": 0.2030769603703988, "grad_norm": 171.67437744140625, "learning_rate": 9.692103415454724e-06, "loss": 26.9594, "step": 100530 }, { "epoch": 0.20309716100308262, "grad_norm": 339.04473876953125, "learning_rate": 9.691982803602102e-06, "loss": 23.6957, "step": 100540 }, { "epoch": 0.20311736163576644, "grad_norm": 385.194091796875, "learning_rate": 9.691862168881325e-06, "loss": 24.8272, "step": 100550 }, { "epoch": 0.20313756226845026, "grad_norm": 269.65301513671875, "learning_rate": 9.691741511292983e-06, "loss": 18.3892, "step": 100560 }, { "epoch": 0.20315776290113408, "grad_norm": 189.0652313232422, "learning_rate": 9.691620830837659e-06, "loss": 22.2232, "step": 100570 }, { "epoch": 0.20317796353381787, "grad_norm": 222.02357482910156, "learning_rate": 9.691500127515945e-06, "loss": 12.258, "step": 100580 }, { "epoch": 0.2031981641665017, "grad_norm": 279.23236083984375, "learning_rate": 9.69137940132843e-06, "loss": 16.3792, "step": 100590 }, { "epoch": 0.2032183647991855, "grad_norm": 244.89645385742188, "learning_rate": 9.691258652275698e-06, "loss": 16.032, "step": 100600 }, { "epoch": 0.20323856543186933, "grad_norm": 320.59259033203125, "learning_rate": 9.691137880358341e-06, "loss": 20.0968, "step": 100610 }, { "epoch": 0.20325876606455315, "grad_norm": 567.2601928710938, "learning_rate": 9.691017085576947e-06, "loss": 26.9291, "step": 100620 }, { "epoch": 0.20327896669723697, "grad_norm": 207.60702514648438, "learning_rate": 9.690896267932106e-06, "loss": 15.7136, "step": 100630 }, { "epoch": 0.20329916732992076, "grad_norm": 251.47674560546875, "learning_rate": 9.690775427424406e-06, "loss": 35.7013, "step": 100640 }, { "epoch": 0.20331936796260458, "grad_norm": 709.9905395507812, "learning_rate": 9.690654564054433e-06, "loss": 30.6934, "step": 100650 }, { "epoch": 0.2033395685952884, "grad_norm": 453.2863464355469, "learning_rate": 9.69053367782278e-06, "loss": 20.6635, "step": 100660 }, { "epoch": 0.20335976922797222, "grad_norm": 180.77523803710938, "learning_rate": 9.690412768730036e-06, "loss": 12.9956, "step": 100670 }, { "epoch": 0.20337996986065604, "grad_norm": 178.20401000976562, "learning_rate": 9.690291836776786e-06, "loss": 13.7819, "step": 100680 }, { "epoch": 0.20340017049333986, "grad_norm": 331.78826904296875, "learning_rate": 9.690170881963624e-06, "loss": 17.3382, "step": 100690 }, { "epoch": 0.20342037112602368, "grad_norm": 3.337109088897705, "learning_rate": 9.690049904291139e-06, "loss": 20.2997, "step": 100700 }, { "epoch": 0.20344057175870747, "grad_norm": 173.89857482910156, "learning_rate": 9.689928903759918e-06, "loss": 12.5831, "step": 100710 }, { "epoch": 0.2034607723913913, "grad_norm": 103.36121368408203, "learning_rate": 9.689807880370554e-06, "loss": 12.6478, "step": 100720 }, { "epoch": 0.2034809730240751, "grad_norm": 179.9310760498047, "learning_rate": 9.689686834123633e-06, "loss": 19.103, "step": 100730 }, { "epoch": 0.20350117365675893, "grad_norm": 155.61456298828125, "learning_rate": 9.689565765019748e-06, "loss": 19.2574, "step": 100740 }, { "epoch": 0.20352137428944275, "grad_norm": 173.2294464111328, "learning_rate": 9.68944467305949e-06, "loss": 9.4848, "step": 100750 }, { "epoch": 0.20354157492212657, "grad_norm": 408.27398681640625, "learning_rate": 9.689323558243446e-06, "loss": 24.7973, "step": 100760 }, { "epoch": 0.20356177555481036, "grad_norm": 180.55873107910156, "learning_rate": 9.689202420572207e-06, "loss": 20.6911, "step": 100770 }, { "epoch": 0.20358197618749418, "grad_norm": 186.79248046875, "learning_rate": 9.689081260046365e-06, "loss": 15.396, "step": 100780 }, { "epoch": 0.203602176820178, "grad_norm": 128.36752319335938, "learning_rate": 9.68896007666651e-06, "loss": 8.1728, "step": 100790 }, { "epoch": 0.20362237745286182, "grad_norm": 263.9230041503906, "learning_rate": 9.68883887043323e-06, "loss": 33.5694, "step": 100800 }, { "epoch": 0.20364257808554564, "grad_norm": 265.1966857910156, "learning_rate": 9.688717641347121e-06, "loss": 31.6376, "step": 100810 }, { "epoch": 0.20366277871822946, "grad_norm": 420.6526794433594, "learning_rate": 9.688596389408769e-06, "loss": 24.3424, "step": 100820 }, { "epoch": 0.20368297935091326, "grad_norm": 348.3215637207031, "learning_rate": 9.688475114618768e-06, "loss": 10.1822, "step": 100830 }, { "epoch": 0.20370317998359708, "grad_norm": 215.42433166503906, "learning_rate": 9.688353816977708e-06, "loss": 23.9912, "step": 100840 }, { "epoch": 0.2037233806162809, "grad_norm": 212.43057250976562, "learning_rate": 9.688232496486179e-06, "loss": 11.4526, "step": 100850 }, { "epoch": 0.20374358124896472, "grad_norm": 349.4958801269531, "learning_rate": 9.688111153144775e-06, "loss": 17.9661, "step": 100860 }, { "epoch": 0.20376378188164854, "grad_norm": 245.00538635253906, "learning_rate": 9.687989786954084e-06, "loss": 24.3112, "step": 100870 }, { "epoch": 0.20378398251433236, "grad_norm": 167.31199645996094, "learning_rate": 9.687868397914701e-06, "loss": 27.8401, "step": 100880 }, { "epoch": 0.20380418314701618, "grad_norm": 62.575767517089844, "learning_rate": 9.687746986027215e-06, "loss": 15.0136, "step": 100890 }, { "epoch": 0.20382438377969997, "grad_norm": 531.1142578125, "learning_rate": 9.687625551292219e-06, "loss": 26.1669, "step": 100900 }, { "epoch": 0.2038445844123838, "grad_norm": 378.8331604003906, "learning_rate": 9.687504093710304e-06, "loss": 25.2076, "step": 100910 }, { "epoch": 0.2038647850450676, "grad_norm": 226.28968811035156, "learning_rate": 9.687382613282063e-06, "loss": 19.1094, "step": 100920 }, { "epoch": 0.20388498567775143, "grad_norm": 177.3240509033203, "learning_rate": 9.687261110008088e-06, "loss": 12.8736, "step": 100930 }, { "epoch": 0.20390518631043525, "grad_norm": 206.7989501953125, "learning_rate": 9.687139583888971e-06, "loss": 22.3381, "step": 100940 }, { "epoch": 0.20392538694311907, "grad_norm": 283.3512878417969, "learning_rate": 9.687018034925304e-06, "loss": 34.5664, "step": 100950 }, { "epoch": 0.20394558757580286, "grad_norm": 582.2257690429688, "learning_rate": 9.686896463117679e-06, "loss": 24.7272, "step": 100960 }, { "epoch": 0.20396578820848668, "grad_norm": 465.8105773925781, "learning_rate": 9.68677486846669e-06, "loss": 34.7137, "step": 100970 }, { "epoch": 0.2039859888411705, "grad_norm": 101.00984954833984, "learning_rate": 9.686653250972928e-06, "loss": 14.6118, "step": 100980 }, { "epoch": 0.20400618947385432, "grad_norm": 233.64952087402344, "learning_rate": 9.686531610636986e-06, "loss": 12.9257, "step": 100990 }, { "epoch": 0.20402639010653814, "grad_norm": 324.09844970703125, "learning_rate": 9.68640994745946e-06, "loss": 23.3647, "step": 101000 }, { "epoch": 0.20404659073922196, "grad_norm": 198.11285400390625, "learning_rate": 9.686288261440937e-06, "loss": 22.1919, "step": 101010 }, { "epoch": 0.20406679137190578, "grad_norm": 196.38626098632812, "learning_rate": 9.686166552582015e-06, "loss": 19.6241, "step": 101020 }, { "epoch": 0.20408699200458957, "grad_norm": 431.7923278808594, "learning_rate": 9.686044820883284e-06, "loss": 21.4521, "step": 101030 }, { "epoch": 0.2041071926372734, "grad_norm": 163.1473388671875, "learning_rate": 9.68592306634534e-06, "loss": 11.8944, "step": 101040 }, { "epoch": 0.2041273932699572, "grad_norm": 387.5158386230469, "learning_rate": 9.685801288968777e-06, "loss": 30.5978, "step": 101050 }, { "epoch": 0.20414759390264103, "grad_norm": 255.41900634765625, "learning_rate": 9.685679488754184e-06, "loss": 17.0589, "step": 101060 }, { "epoch": 0.20416779453532485, "grad_norm": 464.91583251953125, "learning_rate": 9.685557665702158e-06, "loss": 25.8247, "step": 101070 }, { "epoch": 0.20418799516800867, "grad_norm": 296.2279968261719, "learning_rate": 9.685435819813294e-06, "loss": 24.4835, "step": 101080 }, { "epoch": 0.20420819580069247, "grad_norm": 147.9890899658203, "learning_rate": 9.685313951088184e-06, "loss": 13.8668, "step": 101090 }, { "epoch": 0.20422839643337629, "grad_norm": 221.9048614501953, "learning_rate": 9.68519205952742e-06, "loss": 37.8182, "step": 101100 }, { "epoch": 0.2042485970660601, "grad_norm": 158.10537719726562, "learning_rate": 9.6850701451316e-06, "loss": 26.7528, "step": 101110 }, { "epoch": 0.20426879769874393, "grad_norm": 241.25279235839844, "learning_rate": 9.684948207901315e-06, "loss": 18.7917, "step": 101120 }, { "epoch": 0.20428899833142775, "grad_norm": 53.3740234375, "learning_rate": 9.684826247837162e-06, "loss": 15.8906, "step": 101130 }, { "epoch": 0.20430919896411157, "grad_norm": 0.0, "learning_rate": 9.684704264939734e-06, "loss": 20.5161, "step": 101140 }, { "epoch": 0.20432939959679536, "grad_norm": 522.28173828125, "learning_rate": 9.684582259209625e-06, "loss": 14.4733, "step": 101150 }, { "epoch": 0.20434960022947918, "grad_norm": 194.93247985839844, "learning_rate": 9.68446023064743e-06, "loss": 16.5803, "step": 101160 }, { "epoch": 0.204369800862163, "grad_norm": 522.0534057617188, "learning_rate": 9.684338179253744e-06, "loss": 16.0077, "step": 101170 }, { "epoch": 0.20439000149484682, "grad_norm": 90.7969741821289, "learning_rate": 9.684216105029163e-06, "loss": 23.3637, "step": 101180 }, { "epoch": 0.20441020212753064, "grad_norm": 316.24688720703125, "learning_rate": 9.684094007974278e-06, "loss": 17.5849, "step": 101190 }, { "epoch": 0.20443040276021446, "grad_norm": 293.0252685546875, "learning_rate": 9.68397188808969e-06, "loss": 25.1432, "step": 101200 }, { "epoch": 0.20445060339289828, "grad_norm": 133.29168701171875, "learning_rate": 9.683849745375991e-06, "loss": 29.9024, "step": 101210 }, { "epoch": 0.20447080402558207, "grad_norm": 244.81536865234375, "learning_rate": 9.683727579833776e-06, "loss": 25.947, "step": 101220 }, { "epoch": 0.2044910046582659, "grad_norm": 207.00535583496094, "learning_rate": 9.68360539146364e-06, "loss": 22.5179, "step": 101230 }, { "epoch": 0.2045112052909497, "grad_norm": 330.05523681640625, "learning_rate": 9.683483180266179e-06, "loss": 28.8703, "step": 101240 }, { "epoch": 0.20453140592363353, "grad_norm": 553.4660034179688, "learning_rate": 9.683360946241988e-06, "loss": 30.1329, "step": 101250 }, { "epoch": 0.20455160655631735, "grad_norm": 152.03053283691406, "learning_rate": 9.683238689391667e-06, "loss": 15.844, "step": 101260 }, { "epoch": 0.20457180718900117, "grad_norm": 137.86778259277344, "learning_rate": 9.683116409715807e-06, "loss": 25.6283, "step": 101270 }, { "epoch": 0.20459200782168496, "grad_norm": 285.3121032714844, "learning_rate": 9.682994107215005e-06, "loss": 20.0973, "step": 101280 }, { "epoch": 0.20461220845436878, "grad_norm": 236.88717651367188, "learning_rate": 9.682871781889858e-06, "loss": 19.6079, "step": 101290 }, { "epoch": 0.2046324090870526, "grad_norm": 152.05108642578125, "learning_rate": 9.682749433740963e-06, "loss": 35.8053, "step": 101300 }, { "epoch": 0.20465260971973642, "grad_norm": 529.3506469726562, "learning_rate": 9.682627062768914e-06, "loss": 17.5771, "step": 101310 }, { "epoch": 0.20467281035242024, "grad_norm": 231.28106689453125, "learning_rate": 9.682504668974308e-06, "loss": 17.8264, "step": 101320 }, { "epoch": 0.20469301098510406, "grad_norm": 280.0007629394531, "learning_rate": 9.682382252357745e-06, "loss": 11.4027, "step": 101330 }, { "epoch": 0.20471321161778788, "grad_norm": 216.47772216796875, "learning_rate": 9.682259812919817e-06, "loss": 25.0653, "step": 101340 }, { "epoch": 0.20473341225047167, "grad_norm": 232.0452117919922, "learning_rate": 9.682137350661123e-06, "loss": 16.3263, "step": 101350 }, { "epoch": 0.2047536128831555, "grad_norm": 273.3803405761719, "learning_rate": 9.682014865582259e-06, "loss": 15.3185, "step": 101360 }, { "epoch": 0.20477381351583931, "grad_norm": 283.5557556152344, "learning_rate": 9.681892357683822e-06, "loss": 34.6981, "step": 101370 }, { "epoch": 0.20479401414852313, "grad_norm": 513.2012939453125, "learning_rate": 9.68176982696641e-06, "loss": 29.9087, "step": 101380 }, { "epoch": 0.20481421478120695, "grad_norm": 485.1903381347656, "learning_rate": 9.681647273430618e-06, "loss": 37.4014, "step": 101390 }, { "epoch": 0.20483441541389077, "grad_norm": 319.6163635253906, "learning_rate": 9.681524697077047e-06, "loss": 18.6017, "step": 101400 }, { "epoch": 0.20485461604657457, "grad_norm": 414.2503662109375, "learning_rate": 9.681402097906293e-06, "loss": 18.731, "step": 101410 }, { "epoch": 0.2048748166792584, "grad_norm": 181.6617889404297, "learning_rate": 9.681279475918952e-06, "loss": 20.5834, "step": 101420 }, { "epoch": 0.2048950173119422, "grad_norm": 30.911605834960938, "learning_rate": 9.681156831115622e-06, "loss": 31.1317, "step": 101430 }, { "epoch": 0.20491521794462603, "grad_norm": 165.45419311523438, "learning_rate": 9.681034163496902e-06, "loss": 28.7302, "step": 101440 }, { "epoch": 0.20493541857730985, "grad_norm": 192.30438232421875, "learning_rate": 9.68091147306339e-06, "loss": 30.2266, "step": 101450 }, { "epoch": 0.20495561920999367, "grad_norm": 82.55146789550781, "learning_rate": 9.680788759815682e-06, "loss": 11.2901, "step": 101460 }, { "epoch": 0.20497581984267746, "grad_norm": 373.444091796875, "learning_rate": 9.680666023754377e-06, "loss": 23.5239, "step": 101470 }, { "epoch": 0.20499602047536128, "grad_norm": 363.4557800292969, "learning_rate": 9.680543264880075e-06, "loss": 13.8462, "step": 101480 }, { "epoch": 0.2050162211080451, "grad_norm": 439.7153015136719, "learning_rate": 9.680420483193371e-06, "loss": 24.3751, "step": 101490 }, { "epoch": 0.20503642174072892, "grad_norm": 142.22608947753906, "learning_rate": 9.680297678694867e-06, "loss": 15.8608, "step": 101500 }, { "epoch": 0.20505662237341274, "grad_norm": 123.50161743164062, "learning_rate": 9.680174851385158e-06, "loss": 15.9612, "step": 101510 }, { "epoch": 0.20507682300609656, "grad_norm": 188.69131469726562, "learning_rate": 9.680052001264847e-06, "loss": 21.6949, "step": 101520 }, { "epoch": 0.20509702363878038, "grad_norm": 148.80606079101562, "learning_rate": 9.679929128334529e-06, "loss": 38.8519, "step": 101530 }, { "epoch": 0.20511722427146417, "grad_norm": 226.045654296875, "learning_rate": 9.679806232594803e-06, "loss": 10.462, "step": 101540 }, { "epoch": 0.205137424904148, "grad_norm": 804.4356079101562, "learning_rate": 9.67968331404627e-06, "loss": 17.534, "step": 101550 }, { "epoch": 0.2051576255368318, "grad_norm": 421.71392822265625, "learning_rate": 9.679560372689527e-06, "loss": 42.2448, "step": 101560 }, { "epoch": 0.20517782616951563, "grad_norm": 426.9685974121094, "learning_rate": 9.679437408525175e-06, "loss": 19.0105, "step": 101570 }, { "epoch": 0.20519802680219945, "grad_norm": 314.91339111328125, "learning_rate": 9.679314421553814e-06, "loss": 24.2534, "step": 101580 }, { "epoch": 0.20521822743488327, "grad_norm": 186.50088500976562, "learning_rate": 9.67919141177604e-06, "loss": 23.435, "step": 101590 }, { "epoch": 0.20523842806756706, "grad_norm": 124.22186279296875, "learning_rate": 9.679068379192455e-06, "loss": 18.1522, "step": 101600 }, { "epoch": 0.20525862870025088, "grad_norm": 190.21499633789062, "learning_rate": 9.67894532380366e-06, "loss": 33.0944, "step": 101610 }, { "epoch": 0.2052788293329347, "grad_norm": 190.8085479736328, "learning_rate": 9.67882224561025e-06, "loss": 19.0575, "step": 101620 }, { "epoch": 0.20529902996561852, "grad_norm": 184.5612335205078, "learning_rate": 9.678699144612829e-06, "loss": 18.3627, "step": 101630 }, { "epoch": 0.20531923059830234, "grad_norm": 208.79469299316406, "learning_rate": 9.678576020811996e-06, "loss": 13.3477, "step": 101640 }, { "epoch": 0.20533943123098616, "grad_norm": 266.97796630859375, "learning_rate": 9.678452874208352e-06, "loss": 17.8014, "step": 101650 }, { "epoch": 0.20535963186366998, "grad_norm": 427.71832275390625, "learning_rate": 9.678329704802495e-06, "loss": 17.9769, "step": 101660 }, { "epoch": 0.20537983249635378, "grad_norm": 294.06573486328125, "learning_rate": 9.678206512595027e-06, "loss": 20.3378, "step": 101670 }, { "epoch": 0.2054000331290376, "grad_norm": 419.09326171875, "learning_rate": 9.678083297586547e-06, "loss": 29.2313, "step": 101680 }, { "epoch": 0.20542023376172142, "grad_norm": 163.67494201660156, "learning_rate": 9.677960059777656e-06, "loss": 24.9221, "step": 101690 }, { "epoch": 0.20544043439440524, "grad_norm": 293.5347595214844, "learning_rate": 9.677836799168958e-06, "loss": 34.6048, "step": 101700 }, { "epoch": 0.20546063502708906, "grad_norm": 457.09808349609375, "learning_rate": 9.677713515761046e-06, "loss": 34.9383, "step": 101710 }, { "epoch": 0.20548083565977288, "grad_norm": 562.3930053710938, "learning_rate": 9.677590209554531e-06, "loss": 28.4236, "step": 101720 }, { "epoch": 0.20550103629245667, "grad_norm": 713.4270629882812, "learning_rate": 9.677466880550004e-06, "loss": 30.2746, "step": 101730 }, { "epoch": 0.2055212369251405, "grad_norm": 145.2191162109375, "learning_rate": 9.677343528748073e-06, "loss": 23.949, "step": 101740 }, { "epoch": 0.2055414375578243, "grad_norm": 299.7864074707031, "learning_rate": 9.677220154149338e-06, "loss": 32.7873, "step": 101750 }, { "epoch": 0.20556163819050813, "grad_norm": 962.3814086914062, "learning_rate": 9.677096756754397e-06, "loss": 25.6118, "step": 101760 }, { "epoch": 0.20558183882319195, "grad_norm": 0.0, "learning_rate": 9.676973336563856e-06, "loss": 20.8869, "step": 101770 }, { "epoch": 0.20560203945587577, "grad_norm": 461.8876647949219, "learning_rate": 9.676849893578312e-06, "loss": 23.715, "step": 101780 }, { "epoch": 0.20562224008855956, "grad_norm": 301.40191650390625, "learning_rate": 9.67672642779837e-06, "loss": 16.9546, "step": 101790 }, { "epoch": 0.20564244072124338, "grad_norm": 204.1714324951172, "learning_rate": 9.67660293922463e-06, "loss": 17.3807, "step": 101800 }, { "epoch": 0.2056626413539272, "grad_norm": 273.2787170410156, "learning_rate": 9.676479427857694e-06, "loss": 32.7158, "step": 101810 }, { "epoch": 0.20568284198661102, "grad_norm": 357.1844177246094, "learning_rate": 9.676355893698165e-06, "loss": 28.1791, "step": 101820 }, { "epoch": 0.20570304261929484, "grad_norm": 298.16827392578125, "learning_rate": 9.676232336746645e-06, "loss": 23.5094, "step": 101830 }, { "epoch": 0.20572324325197866, "grad_norm": 54.44477462768555, "learning_rate": 9.676108757003735e-06, "loss": 18.8246, "step": 101840 }, { "epoch": 0.20574344388466248, "grad_norm": 908.67333984375, "learning_rate": 9.67598515447004e-06, "loss": 46.381, "step": 101850 }, { "epoch": 0.20576364451734627, "grad_norm": 325.52313232421875, "learning_rate": 9.67586152914616e-06, "loss": 30.6589, "step": 101860 }, { "epoch": 0.2057838451500301, "grad_norm": 71.09828186035156, "learning_rate": 9.675737881032696e-06, "loss": 15.167, "step": 101870 }, { "epoch": 0.2058040457827139, "grad_norm": 232.748291015625, "learning_rate": 9.675614210130252e-06, "loss": 44.8976, "step": 101880 }, { "epoch": 0.20582424641539773, "grad_norm": 553.4481811523438, "learning_rate": 9.675490516439434e-06, "loss": 31.7713, "step": 101890 }, { "epoch": 0.20584444704808155, "grad_norm": 200.46878051757812, "learning_rate": 9.675366799960842e-06, "loss": 23.7576, "step": 101900 }, { "epoch": 0.20586464768076537, "grad_norm": 146.7136688232422, "learning_rate": 9.675243060695079e-06, "loss": 22.6254, "step": 101910 }, { "epoch": 0.20588484831344916, "grad_norm": 221.5194091796875, "learning_rate": 9.675119298642748e-06, "loss": 37.0698, "step": 101920 }, { "epoch": 0.20590504894613298, "grad_norm": 169.0518341064453, "learning_rate": 9.674995513804452e-06, "loss": 19.5829, "step": 101930 }, { "epoch": 0.2059252495788168, "grad_norm": 431.8672180175781, "learning_rate": 9.674871706180796e-06, "loss": 21.7051, "step": 101940 }, { "epoch": 0.20594545021150062, "grad_norm": 1407.1002197265625, "learning_rate": 9.674747875772381e-06, "loss": 19.8814, "step": 101950 }, { "epoch": 0.20596565084418444, "grad_norm": 741.7932739257812, "learning_rate": 9.674624022579814e-06, "loss": 25.9222, "step": 101960 }, { "epoch": 0.20598585147686826, "grad_norm": 335.3535461425781, "learning_rate": 9.674500146603695e-06, "loss": 17.1545, "step": 101970 }, { "epoch": 0.20600605210955208, "grad_norm": 132.72311401367188, "learning_rate": 9.674376247844628e-06, "loss": 18.7138, "step": 101980 }, { "epoch": 0.20602625274223588, "grad_norm": 425.0877990722656, "learning_rate": 9.67425232630322e-06, "loss": 26.037, "step": 101990 }, { "epoch": 0.2060464533749197, "grad_norm": 204.48147583007812, "learning_rate": 9.674128381980073e-06, "loss": 27.55, "step": 102000 }, { "epoch": 0.20606665400760352, "grad_norm": 303.63409423828125, "learning_rate": 9.67400441487579e-06, "loss": 16.9221, "step": 102010 }, { "epoch": 0.20608685464028734, "grad_norm": 185.57276916503906, "learning_rate": 9.673880424990978e-06, "loss": 32.9878, "step": 102020 }, { "epoch": 0.20610705527297116, "grad_norm": 308.2160949707031, "learning_rate": 9.673756412326238e-06, "loss": 17.6505, "step": 102030 }, { "epoch": 0.20612725590565498, "grad_norm": 75.07550811767578, "learning_rate": 9.673632376882178e-06, "loss": 22.7074, "step": 102040 }, { "epoch": 0.20614745653833877, "grad_norm": 153.60760498046875, "learning_rate": 9.673508318659399e-06, "loss": 16.2566, "step": 102050 }, { "epoch": 0.2061676571710226, "grad_norm": 418.9142761230469, "learning_rate": 9.673384237658508e-06, "loss": 27.5682, "step": 102060 }, { "epoch": 0.2061878578037064, "grad_norm": 312.63079833984375, "learning_rate": 9.67326013388011e-06, "loss": 13.5159, "step": 102070 }, { "epoch": 0.20620805843639023, "grad_norm": 301.280029296875, "learning_rate": 9.673136007324806e-06, "loss": 23.4476, "step": 102080 }, { "epoch": 0.20622825906907405, "grad_norm": 526.4796142578125, "learning_rate": 9.673011857993207e-06, "loss": 24.2594, "step": 102090 }, { "epoch": 0.20624845970175787, "grad_norm": 346.55914306640625, "learning_rate": 9.672887685885913e-06, "loss": 26.2171, "step": 102100 }, { "epoch": 0.20626866033444166, "grad_norm": 0.0, "learning_rate": 9.672763491003531e-06, "loss": 26.3869, "step": 102110 }, { "epoch": 0.20628886096712548, "grad_norm": 159.93003845214844, "learning_rate": 9.672639273346668e-06, "loss": 20.6293, "step": 102120 }, { "epoch": 0.2063090615998093, "grad_norm": 349.87353515625, "learning_rate": 9.672515032915926e-06, "loss": 28.9994, "step": 102130 }, { "epoch": 0.20632926223249312, "grad_norm": 111.333251953125, "learning_rate": 9.672390769711914e-06, "loss": 15.0239, "step": 102140 }, { "epoch": 0.20634946286517694, "grad_norm": 242.29188537597656, "learning_rate": 9.672266483735235e-06, "loss": 15.6374, "step": 102150 }, { "epoch": 0.20636966349786076, "grad_norm": 381.5447998046875, "learning_rate": 9.672142174986497e-06, "loss": 30.8005, "step": 102160 }, { "epoch": 0.20638986413054458, "grad_norm": 261.2716369628906, "learning_rate": 9.672017843466305e-06, "loss": 43.295, "step": 102170 }, { "epoch": 0.20641006476322837, "grad_norm": 412.9109802246094, "learning_rate": 9.671893489175263e-06, "loss": 36.7552, "step": 102180 }, { "epoch": 0.2064302653959122, "grad_norm": 157.13751220703125, "learning_rate": 9.67176911211398e-06, "loss": 45.0401, "step": 102190 }, { "epoch": 0.206450466028596, "grad_norm": 577.581787109375, "learning_rate": 9.671644712283061e-06, "loss": 27.7555, "step": 102200 }, { "epoch": 0.20647066666127983, "grad_norm": 472.9656982421875, "learning_rate": 9.671520289683112e-06, "loss": 13.3014, "step": 102210 }, { "epoch": 0.20649086729396365, "grad_norm": 55.94514083862305, "learning_rate": 9.671395844314739e-06, "loss": 20.2171, "step": 102220 }, { "epoch": 0.20651106792664747, "grad_norm": 321.1738586425781, "learning_rate": 9.67127137617855e-06, "loss": 26.8423, "step": 102230 }, { "epoch": 0.20653126855933127, "grad_norm": 232.24632263183594, "learning_rate": 9.67114688527515e-06, "loss": 17.7059, "step": 102240 }, { "epoch": 0.20655146919201509, "grad_norm": 59.65571975708008, "learning_rate": 9.671022371605148e-06, "loss": 21.8404, "step": 102250 }, { "epoch": 0.2065716698246989, "grad_norm": 499.35040283203125, "learning_rate": 9.670897835169149e-06, "loss": 26.2061, "step": 102260 }, { "epoch": 0.20659187045738273, "grad_norm": 278.8811340332031, "learning_rate": 9.67077327596776e-06, "loss": 20.3674, "step": 102270 }, { "epoch": 0.20661207109006655, "grad_norm": 112.78215789794922, "learning_rate": 9.67064869400159e-06, "loss": 29.8452, "step": 102280 }, { "epoch": 0.20663227172275037, "grad_norm": 0.0, "learning_rate": 9.670524089271242e-06, "loss": 27.9536, "step": 102290 }, { "epoch": 0.20665247235543419, "grad_norm": 348.2746276855469, "learning_rate": 9.670399461777328e-06, "loss": 17.836, "step": 102300 }, { "epoch": 0.20667267298811798, "grad_norm": 519.5477294921875, "learning_rate": 9.670274811520454e-06, "loss": 18.3711, "step": 102310 }, { "epoch": 0.2066928736208018, "grad_norm": 191.66493225097656, "learning_rate": 9.670150138501226e-06, "loss": 21.3575, "step": 102320 }, { "epoch": 0.20671307425348562, "grad_norm": 368.1506042480469, "learning_rate": 9.670025442720253e-06, "loss": 23.9723, "step": 102330 }, { "epoch": 0.20673327488616944, "grad_norm": 117.31512451171875, "learning_rate": 9.669900724178142e-06, "loss": 16.0795, "step": 102340 }, { "epoch": 0.20675347551885326, "grad_norm": 291.7911071777344, "learning_rate": 9.6697759828755e-06, "loss": 36.4837, "step": 102350 }, { "epoch": 0.20677367615153708, "grad_norm": 396.9490966796875, "learning_rate": 9.669651218812938e-06, "loss": 27.9856, "step": 102360 }, { "epoch": 0.20679387678422087, "grad_norm": 187.86724853515625, "learning_rate": 9.669526431991063e-06, "loss": 19.2581, "step": 102370 }, { "epoch": 0.2068140774169047, "grad_norm": 257.0814514160156, "learning_rate": 9.669401622410481e-06, "loss": 22.0447, "step": 102380 }, { "epoch": 0.2068342780495885, "grad_norm": 529.5189819335938, "learning_rate": 9.669276790071803e-06, "loss": 36.7995, "step": 102390 }, { "epoch": 0.20685447868227233, "grad_norm": 362.74688720703125, "learning_rate": 9.669151934975635e-06, "loss": 21.2922, "step": 102400 }, { "epoch": 0.20687467931495615, "grad_norm": 591.704345703125, "learning_rate": 9.669027057122586e-06, "loss": 29.7817, "step": 102410 }, { "epoch": 0.20689487994763997, "grad_norm": 25.636741638183594, "learning_rate": 9.668902156513268e-06, "loss": 11.6815, "step": 102420 }, { "epoch": 0.20691508058032376, "grad_norm": 355.00103759765625, "learning_rate": 9.668777233148285e-06, "loss": 17.6625, "step": 102430 }, { "epoch": 0.20693528121300758, "grad_norm": 148.0870361328125, "learning_rate": 9.668652287028249e-06, "loss": 18.6561, "step": 102440 }, { "epoch": 0.2069554818456914, "grad_norm": 649.0115966796875, "learning_rate": 9.668527318153769e-06, "loss": 28.3282, "step": 102450 }, { "epoch": 0.20697568247837522, "grad_norm": 237.8546142578125, "learning_rate": 9.66840232652545e-06, "loss": 36.2316, "step": 102460 }, { "epoch": 0.20699588311105904, "grad_norm": 285.3208312988281, "learning_rate": 9.668277312143908e-06, "loss": 18.9138, "step": 102470 }, { "epoch": 0.20701608374374286, "grad_norm": 277.7880554199219, "learning_rate": 9.668152275009747e-06, "loss": 26.0631, "step": 102480 }, { "epoch": 0.20703628437642668, "grad_norm": 292.02093505859375, "learning_rate": 9.668027215123577e-06, "loss": 27.2076, "step": 102490 }, { "epoch": 0.20705648500911047, "grad_norm": 323.2879943847656, "learning_rate": 9.667902132486009e-06, "loss": 26.2994, "step": 102500 }, { "epoch": 0.2070766856417943, "grad_norm": 315.9519958496094, "learning_rate": 9.667777027097654e-06, "loss": 20.0582, "step": 102510 }, { "epoch": 0.20709688627447811, "grad_norm": 303.208740234375, "learning_rate": 9.667651898959118e-06, "loss": 20.0916, "step": 102520 }, { "epoch": 0.20711708690716193, "grad_norm": 154.44418334960938, "learning_rate": 9.667526748071013e-06, "loss": 18.2196, "step": 102530 }, { "epoch": 0.20713728753984575, "grad_norm": 316.3031005859375, "learning_rate": 9.667401574433948e-06, "loss": 23.968, "step": 102540 }, { "epoch": 0.20715748817252957, "grad_norm": 235.7454833984375, "learning_rate": 9.667276378048535e-06, "loss": 17.8411, "step": 102550 }, { "epoch": 0.20717768880521337, "grad_norm": 450.3836975097656, "learning_rate": 9.667151158915382e-06, "loss": 23.7501, "step": 102560 }, { "epoch": 0.2071978894378972, "grad_norm": 254.8665008544922, "learning_rate": 9.667025917035102e-06, "loss": 16.5429, "step": 102570 }, { "epoch": 0.207218090070581, "grad_norm": 324.4065856933594, "learning_rate": 9.666900652408302e-06, "loss": 18.0588, "step": 102580 }, { "epoch": 0.20723829070326483, "grad_norm": 202.435546875, "learning_rate": 9.666775365035596e-06, "loss": 24.0999, "step": 102590 }, { "epoch": 0.20725849133594865, "grad_norm": 251.97874450683594, "learning_rate": 9.666650054917591e-06, "loss": 15.4843, "step": 102600 }, { "epoch": 0.20727869196863247, "grad_norm": 512.2381591796875, "learning_rate": 9.666524722054902e-06, "loss": 18.0385, "step": 102610 }, { "epoch": 0.2072988926013163, "grad_norm": 116.76329040527344, "learning_rate": 9.666399366448135e-06, "loss": 12.987, "step": 102620 }, { "epoch": 0.20731909323400008, "grad_norm": 511.04852294921875, "learning_rate": 9.666273988097904e-06, "loss": 31.21, "step": 102630 }, { "epoch": 0.2073392938666839, "grad_norm": 392.749755859375, "learning_rate": 9.66614858700482e-06, "loss": 22.0985, "step": 102640 }, { "epoch": 0.20735949449936772, "grad_norm": 386.3563537597656, "learning_rate": 9.666023163169493e-06, "loss": 27.4457, "step": 102650 }, { "epoch": 0.20737969513205154, "grad_norm": 23.971805572509766, "learning_rate": 9.665897716592537e-06, "loss": 13.5012, "step": 102660 }, { "epoch": 0.20739989576473536, "grad_norm": 158.0446319580078, "learning_rate": 9.66577224727456e-06, "loss": 13.3726, "step": 102670 }, { "epoch": 0.20742009639741918, "grad_norm": 418.9306945800781, "learning_rate": 9.665646755216175e-06, "loss": 21.2681, "step": 102680 }, { "epoch": 0.20744029703010297, "grad_norm": 293.07769775390625, "learning_rate": 9.665521240417993e-06, "loss": 22.7681, "step": 102690 }, { "epoch": 0.2074604976627868, "grad_norm": 371.9073486328125, "learning_rate": 9.665395702880627e-06, "loss": 13.724, "step": 102700 }, { "epoch": 0.2074806982954706, "grad_norm": 436.6129150390625, "learning_rate": 9.665270142604688e-06, "loss": 14.4142, "step": 102710 }, { "epoch": 0.20750089892815443, "grad_norm": 382.1452331542969, "learning_rate": 9.665144559590789e-06, "loss": 19.2626, "step": 102720 }, { "epoch": 0.20752109956083825, "grad_norm": 268.47113037109375, "learning_rate": 9.66501895383954e-06, "loss": 19.752, "step": 102730 }, { "epoch": 0.20754130019352207, "grad_norm": 501.5208740234375, "learning_rate": 9.664893325351556e-06, "loss": 29.8046, "step": 102740 }, { "epoch": 0.20756150082620586, "grad_norm": 228.3681182861328, "learning_rate": 9.664767674127447e-06, "loss": 28.3846, "step": 102750 }, { "epoch": 0.20758170145888968, "grad_norm": 181.8822784423828, "learning_rate": 9.664642000167825e-06, "loss": 17.537, "step": 102760 }, { "epoch": 0.2076019020915735, "grad_norm": 540.6127319335938, "learning_rate": 9.664516303473305e-06, "loss": 26.6785, "step": 102770 }, { "epoch": 0.20762210272425732, "grad_norm": 284.40789794921875, "learning_rate": 9.664390584044497e-06, "loss": 32.3802, "step": 102780 }, { "epoch": 0.20764230335694114, "grad_norm": 466.1345520019531, "learning_rate": 9.664264841882016e-06, "loss": 20.6958, "step": 102790 }, { "epoch": 0.20766250398962496, "grad_norm": 146.665283203125, "learning_rate": 9.664139076986473e-06, "loss": 16.0455, "step": 102800 }, { "epoch": 0.20768270462230878, "grad_norm": 547.5885620117188, "learning_rate": 9.664013289358483e-06, "loss": 29.0434, "step": 102810 }, { "epoch": 0.20770290525499258, "grad_norm": 296.29119873046875, "learning_rate": 9.663887478998657e-06, "loss": 22.1883, "step": 102820 }, { "epoch": 0.2077231058876764, "grad_norm": 346.2167663574219, "learning_rate": 9.66376164590761e-06, "loss": 23.4366, "step": 102830 }, { "epoch": 0.20774330652036022, "grad_norm": 465.2508850097656, "learning_rate": 9.663635790085954e-06, "loss": 39.0275, "step": 102840 }, { "epoch": 0.20776350715304404, "grad_norm": 18.415067672729492, "learning_rate": 9.663509911534302e-06, "loss": 22.2039, "step": 102850 }, { "epoch": 0.20778370778572786, "grad_norm": 304.82012939453125, "learning_rate": 9.663384010253269e-06, "loss": 23.9466, "step": 102860 }, { "epoch": 0.20780390841841168, "grad_norm": 163.3314208984375, "learning_rate": 9.663258086243468e-06, "loss": 14.6294, "step": 102870 }, { "epoch": 0.20782410905109547, "grad_norm": 399.7298278808594, "learning_rate": 9.663132139505513e-06, "loss": 14.6858, "step": 102880 }, { "epoch": 0.2078443096837793, "grad_norm": 352.4617614746094, "learning_rate": 9.663006170040016e-06, "loss": 17.5318, "step": 102890 }, { "epoch": 0.2078645103164631, "grad_norm": 530.8956298828125, "learning_rate": 9.662880177847595e-06, "loss": 13.0121, "step": 102900 }, { "epoch": 0.20788471094914693, "grad_norm": 391.5675354003906, "learning_rate": 9.66275416292886e-06, "loss": 15.0694, "step": 102910 }, { "epoch": 0.20790491158183075, "grad_norm": 293.18292236328125, "learning_rate": 9.662628125284426e-06, "loss": 28.9092, "step": 102920 }, { "epoch": 0.20792511221451457, "grad_norm": 17.718252182006836, "learning_rate": 9.66250206491491e-06, "loss": 32.1881, "step": 102930 }, { "epoch": 0.2079453128471984, "grad_norm": 718.5989990234375, "learning_rate": 9.662375981820921e-06, "loss": 43.8416, "step": 102940 }, { "epoch": 0.20796551347988218, "grad_norm": 414.27960205078125, "learning_rate": 9.66224987600308e-06, "loss": 13.5731, "step": 102950 }, { "epoch": 0.207985714112566, "grad_norm": 376.6858825683594, "learning_rate": 9.662123747461998e-06, "loss": 24.3768, "step": 102960 }, { "epoch": 0.20800591474524982, "grad_norm": 247.4778289794922, "learning_rate": 9.66199759619829e-06, "loss": 35.0505, "step": 102970 }, { "epoch": 0.20802611537793364, "grad_norm": 125.6076431274414, "learning_rate": 9.661871422212572e-06, "loss": 11.4807, "step": 102980 }, { "epoch": 0.20804631601061746, "grad_norm": 242.71240234375, "learning_rate": 9.661745225505457e-06, "loss": 21.7035, "step": 102990 }, { "epoch": 0.20806651664330128, "grad_norm": 30.93901252746582, "learning_rate": 9.661619006077562e-06, "loss": 13.5016, "step": 103000 }, { "epoch": 0.20808671727598507, "grad_norm": 252.66506958007812, "learning_rate": 9.661492763929502e-06, "loss": 28.4262, "step": 103010 }, { "epoch": 0.2081069179086689, "grad_norm": 362.56353759765625, "learning_rate": 9.66136649906189e-06, "loss": 15.8979, "step": 103020 }, { "epoch": 0.2081271185413527, "grad_norm": 207.79449462890625, "learning_rate": 9.661240211475342e-06, "loss": 20.0809, "step": 103030 }, { "epoch": 0.20814731917403653, "grad_norm": 211.7888946533203, "learning_rate": 9.661113901170477e-06, "loss": 20.09, "step": 103040 }, { "epoch": 0.20816751980672035, "grad_norm": 638.0621337890625, "learning_rate": 9.660987568147907e-06, "loss": 20.4668, "step": 103050 }, { "epoch": 0.20818772043940417, "grad_norm": 113.02466583251953, "learning_rate": 9.66086121240825e-06, "loss": 36.5033, "step": 103060 }, { "epoch": 0.20820792107208796, "grad_norm": 117.58332061767578, "learning_rate": 9.66073483395212e-06, "loss": 16.5878, "step": 103070 }, { "epoch": 0.20822812170477178, "grad_norm": 274.260498046875, "learning_rate": 9.660608432780133e-06, "loss": 10.5389, "step": 103080 }, { "epoch": 0.2082483223374556, "grad_norm": 229.77178955078125, "learning_rate": 9.660482008892907e-06, "loss": 14.782, "step": 103090 }, { "epoch": 0.20826852297013942, "grad_norm": 496.8359375, "learning_rate": 9.660355562291055e-06, "loss": 33.856, "step": 103100 }, { "epoch": 0.20828872360282324, "grad_norm": 288.0745544433594, "learning_rate": 9.660229092975197e-06, "loss": 14.7125, "step": 103110 }, { "epoch": 0.20830892423550706, "grad_norm": 346.4180908203125, "learning_rate": 9.660102600945947e-06, "loss": 16.8883, "step": 103120 }, { "epoch": 0.20832912486819088, "grad_norm": 286.86895751953125, "learning_rate": 9.659976086203922e-06, "loss": 14.8164, "step": 103130 }, { "epoch": 0.20834932550087468, "grad_norm": 208.9429473876953, "learning_rate": 9.65984954874974e-06, "loss": 26.5682, "step": 103140 }, { "epoch": 0.2083695261335585, "grad_norm": 131.174560546875, "learning_rate": 9.659722988584015e-06, "loss": 32.1609, "step": 103150 }, { "epoch": 0.20838972676624232, "grad_norm": 460.0570373535156, "learning_rate": 9.659596405707366e-06, "loss": 31.5538, "step": 103160 }, { "epoch": 0.20840992739892614, "grad_norm": 317.60687255859375, "learning_rate": 9.659469800120408e-06, "loss": 23.7823, "step": 103170 }, { "epoch": 0.20843012803160996, "grad_norm": 86.06813049316406, "learning_rate": 9.65934317182376e-06, "loss": 13.583, "step": 103180 }, { "epoch": 0.20845032866429378, "grad_norm": 110.59232330322266, "learning_rate": 9.65921652081804e-06, "loss": 20.0235, "step": 103190 }, { "epoch": 0.20847052929697757, "grad_norm": 183.11984252929688, "learning_rate": 9.659089847103863e-06, "loss": 19.0219, "step": 103200 }, { "epoch": 0.2084907299296614, "grad_norm": 507.72210693359375, "learning_rate": 9.658963150681848e-06, "loss": 35.1277, "step": 103210 }, { "epoch": 0.2085109305623452, "grad_norm": 248.78558349609375, "learning_rate": 9.658836431552609e-06, "loss": 26.616, "step": 103220 }, { "epoch": 0.20853113119502903, "grad_norm": 508.5252990722656, "learning_rate": 9.658709689716768e-06, "loss": 18.8354, "step": 103230 }, { "epoch": 0.20855133182771285, "grad_norm": 378.1915283203125, "learning_rate": 9.65858292517494e-06, "loss": 16.2519, "step": 103240 }, { "epoch": 0.20857153246039667, "grad_norm": 263.7326965332031, "learning_rate": 9.658456137927745e-06, "loss": 28.4527, "step": 103250 }, { "epoch": 0.20859173309308046, "grad_norm": 179.5367431640625, "learning_rate": 9.6583293279758e-06, "loss": 17.5166, "step": 103260 }, { "epoch": 0.20861193372576428, "grad_norm": 300.1049499511719, "learning_rate": 9.658202495319721e-06, "loss": 14.7651, "step": 103270 }, { "epoch": 0.2086321343584481, "grad_norm": 410.1790771484375, "learning_rate": 9.65807563996013e-06, "loss": 14.7067, "step": 103280 }, { "epoch": 0.20865233499113192, "grad_norm": 200.55282592773438, "learning_rate": 9.657948761897643e-06, "loss": 20.7336, "step": 103290 }, { "epoch": 0.20867253562381574, "grad_norm": 49.709964752197266, "learning_rate": 9.65782186113288e-06, "loss": 16.4297, "step": 103300 }, { "epoch": 0.20869273625649956, "grad_norm": 0.0, "learning_rate": 9.657694937666454e-06, "loss": 14.8246, "step": 103310 }, { "epoch": 0.20871293688918338, "grad_norm": 194.11756896972656, "learning_rate": 9.65756799149899e-06, "loss": 25.8598, "step": 103320 }, { "epoch": 0.20873313752186717, "grad_norm": 171.61805725097656, "learning_rate": 9.657441022631105e-06, "loss": 33.056, "step": 103330 }, { "epoch": 0.208753338154551, "grad_norm": 324.0179443359375, "learning_rate": 9.657314031063419e-06, "loss": 15.3228, "step": 103340 }, { "epoch": 0.2087735387872348, "grad_norm": 249.92178344726562, "learning_rate": 9.657187016796546e-06, "loss": 14.0316, "step": 103350 }, { "epoch": 0.20879373941991863, "grad_norm": 108.13058471679688, "learning_rate": 9.657059979831109e-06, "loss": 16.4686, "step": 103360 }, { "epoch": 0.20881394005260245, "grad_norm": 0.0, "learning_rate": 9.656932920167727e-06, "loss": 27.668, "step": 103370 }, { "epoch": 0.20883414068528627, "grad_norm": 425.6054382324219, "learning_rate": 9.65680583780702e-06, "loss": 20.3278, "step": 103380 }, { "epoch": 0.20885434131797007, "grad_norm": 61.50248336791992, "learning_rate": 9.656678732749605e-06, "loss": 12.1155, "step": 103390 }, { "epoch": 0.20887454195065389, "grad_norm": 142.77931213378906, "learning_rate": 9.656551604996102e-06, "loss": 29.2958, "step": 103400 }, { "epoch": 0.2088947425833377, "grad_norm": 254.4815673828125, "learning_rate": 9.656424454547131e-06, "loss": 21.3157, "step": 103410 }, { "epoch": 0.20891494321602153, "grad_norm": 378.2716369628906, "learning_rate": 9.656297281403315e-06, "loss": 16.6601, "step": 103420 }, { "epoch": 0.20893514384870535, "grad_norm": 498.78179931640625, "learning_rate": 9.656170085565268e-06, "loss": 23.2032, "step": 103430 }, { "epoch": 0.20895534448138917, "grad_norm": 76.46498107910156, "learning_rate": 9.656042867033613e-06, "loss": 16.7704, "step": 103440 }, { "epoch": 0.20897554511407299, "grad_norm": 346.49713134765625, "learning_rate": 9.655915625808971e-06, "loss": 18.2792, "step": 103450 }, { "epoch": 0.20899574574675678, "grad_norm": 209.88772583007812, "learning_rate": 9.65578836189196e-06, "loss": 26.1949, "step": 103460 }, { "epoch": 0.2090159463794406, "grad_norm": 560.983154296875, "learning_rate": 9.6556610752832e-06, "loss": 26.251, "step": 103470 }, { "epoch": 0.20903614701212442, "grad_norm": 329.51025390625, "learning_rate": 9.655533765983315e-06, "loss": 18.6906, "step": 103480 }, { "epoch": 0.20905634764480824, "grad_norm": 260.24334716796875, "learning_rate": 9.655406433992922e-06, "loss": 38.4353, "step": 103490 }, { "epoch": 0.20907654827749206, "grad_norm": 381.6573181152344, "learning_rate": 9.655279079312643e-06, "loss": 27.1913, "step": 103500 }, { "epoch": 0.20909674891017588, "grad_norm": 178.34376525878906, "learning_rate": 9.655151701943098e-06, "loss": 29.7122, "step": 103510 }, { "epoch": 0.20911694954285967, "grad_norm": 152.13702392578125, "learning_rate": 9.655024301884908e-06, "loss": 20.308, "step": 103520 }, { "epoch": 0.2091371501755435, "grad_norm": 225.90870666503906, "learning_rate": 9.654896879138693e-06, "loss": 11.6898, "step": 103530 }, { "epoch": 0.2091573508082273, "grad_norm": 132.42198181152344, "learning_rate": 9.654769433705079e-06, "loss": 17.4469, "step": 103540 }, { "epoch": 0.20917755144091113, "grad_norm": 120.67960357666016, "learning_rate": 9.65464196558468e-06, "loss": 69.0334, "step": 103550 }, { "epoch": 0.20919775207359495, "grad_norm": 284.81524658203125, "learning_rate": 9.65451447477812e-06, "loss": 15.848, "step": 103560 }, { "epoch": 0.20921795270627877, "grad_norm": 235.26646423339844, "learning_rate": 9.654386961286023e-06, "loss": 15.7236, "step": 103570 }, { "epoch": 0.20923815333896256, "grad_norm": 224.3607177734375, "learning_rate": 9.654259425109009e-06, "loss": 18.5969, "step": 103580 }, { "epoch": 0.20925835397164638, "grad_norm": 241.5373077392578, "learning_rate": 9.654131866247698e-06, "loss": 23.2734, "step": 103590 }, { "epoch": 0.2092785546043302, "grad_norm": 229.8438262939453, "learning_rate": 9.654004284702712e-06, "loss": 11.5853, "step": 103600 }, { "epoch": 0.20929875523701402, "grad_norm": 331.02032470703125, "learning_rate": 9.653876680474674e-06, "loss": 27.2241, "step": 103610 }, { "epoch": 0.20931895586969784, "grad_norm": 256.1419372558594, "learning_rate": 9.653749053564206e-06, "loss": 25.809, "step": 103620 }, { "epoch": 0.20933915650238166, "grad_norm": 376.040771484375, "learning_rate": 9.65362140397193e-06, "loss": 16.9358, "step": 103630 }, { "epoch": 0.20935935713506548, "grad_norm": 249.8609161376953, "learning_rate": 9.653493731698467e-06, "loss": 20.8871, "step": 103640 }, { "epoch": 0.20937955776774927, "grad_norm": 237.93675231933594, "learning_rate": 9.65336603674444e-06, "loss": 21.7635, "step": 103650 }, { "epoch": 0.2093997584004331, "grad_norm": 625.8997802734375, "learning_rate": 9.653238319110473e-06, "loss": 18.7193, "step": 103660 }, { "epoch": 0.20941995903311691, "grad_norm": 135.2100372314453, "learning_rate": 9.653110578797183e-06, "loss": 32.2682, "step": 103670 }, { "epoch": 0.20944015966580073, "grad_norm": 301.8247985839844, "learning_rate": 9.652982815805199e-06, "loss": 19.4579, "step": 103680 }, { "epoch": 0.20946036029848455, "grad_norm": 432.82275390625, "learning_rate": 9.652855030135139e-06, "loss": 22.084, "step": 103690 }, { "epoch": 0.20948056093116837, "grad_norm": 330.8104553222656, "learning_rate": 9.65272722178763e-06, "loss": 33.8258, "step": 103700 }, { "epoch": 0.20950076156385217, "grad_norm": 41.922027587890625, "learning_rate": 9.652599390763294e-06, "loss": 35.665, "step": 103710 }, { "epoch": 0.209520962196536, "grad_norm": 144.7200164794922, "learning_rate": 9.652471537062751e-06, "loss": 12.1727, "step": 103720 }, { "epoch": 0.2095411628292198, "grad_norm": 143.56564331054688, "learning_rate": 9.652343660686626e-06, "loss": 16.0691, "step": 103730 }, { "epoch": 0.20956136346190363, "grad_norm": 315.73785400390625, "learning_rate": 9.652215761635541e-06, "loss": 22.49, "step": 103740 }, { "epoch": 0.20958156409458745, "grad_norm": 189.2168731689453, "learning_rate": 9.652087839910123e-06, "loss": 31.174, "step": 103750 }, { "epoch": 0.20960176472727127, "grad_norm": 85.02334594726562, "learning_rate": 9.651959895510992e-06, "loss": 28.5205, "step": 103760 }, { "epoch": 0.2096219653599551, "grad_norm": 158.50022888183594, "learning_rate": 9.651831928438773e-06, "loss": 24.1713, "step": 103770 }, { "epoch": 0.20964216599263888, "grad_norm": 227.3443603515625, "learning_rate": 9.65170393869409e-06, "loss": 13.4693, "step": 103780 }, { "epoch": 0.2096623666253227, "grad_norm": 208.55650329589844, "learning_rate": 9.651575926277566e-06, "loss": 14.5945, "step": 103790 }, { "epoch": 0.20968256725800652, "grad_norm": 349.9908752441406, "learning_rate": 9.651447891189824e-06, "loss": 19.4639, "step": 103800 }, { "epoch": 0.20970276789069034, "grad_norm": 27.307661056518555, "learning_rate": 9.65131983343149e-06, "loss": 31.7465, "step": 103810 }, { "epoch": 0.20972296852337416, "grad_norm": 446.0776062011719, "learning_rate": 9.651191753003187e-06, "loss": 24.9699, "step": 103820 }, { "epoch": 0.20974316915605798, "grad_norm": 473.46527099609375, "learning_rate": 9.65106364990554e-06, "loss": 36.0089, "step": 103830 }, { "epoch": 0.20976336978874177, "grad_norm": 353.6355895996094, "learning_rate": 9.650935524139172e-06, "loss": 22.1413, "step": 103840 }, { "epoch": 0.2097835704214256, "grad_norm": 321.7108459472656, "learning_rate": 9.650807375704708e-06, "loss": 26.7669, "step": 103850 }, { "epoch": 0.2098037710541094, "grad_norm": 177.98133850097656, "learning_rate": 9.650679204602773e-06, "loss": 17.699, "step": 103860 }, { "epoch": 0.20982397168679323, "grad_norm": 124.41465759277344, "learning_rate": 9.650551010833993e-06, "loss": 15.7084, "step": 103870 }, { "epoch": 0.20984417231947705, "grad_norm": 289.26739501953125, "learning_rate": 9.650422794398991e-06, "loss": 26.655, "step": 103880 }, { "epoch": 0.20986437295216087, "grad_norm": 682.9315795898438, "learning_rate": 9.650294555298392e-06, "loss": 33.765, "step": 103890 }, { "epoch": 0.20988457358484466, "grad_norm": 456.018798828125, "learning_rate": 9.650166293532822e-06, "loss": 19.7399, "step": 103900 }, { "epoch": 0.20990477421752848, "grad_norm": 407.6540832519531, "learning_rate": 9.650038009102905e-06, "loss": 15.1263, "step": 103910 }, { "epoch": 0.2099249748502123, "grad_norm": 414.1145935058594, "learning_rate": 9.649909702009265e-06, "loss": 20.1588, "step": 103920 }, { "epoch": 0.20994517548289612, "grad_norm": 379.83331298828125, "learning_rate": 9.649781372252532e-06, "loss": 13.4499, "step": 103930 }, { "epoch": 0.20996537611557994, "grad_norm": 223.89675903320312, "learning_rate": 9.649653019833327e-06, "loss": 22.7131, "step": 103940 }, { "epoch": 0.20998557674826376, "grad_norm": 84.50098419189453, "learning_rate": 9.649524644752278e-06, "loss": 22.1393, "step": 103950 }, { "epoch": 0.21000577738094758, "grad_norm": 331.37652587890625, "learning_rate": 9.649396247010008e-06, "loss": 19.6865, "step": 103960 }, { "epoch": 0.21002597801363138, "grad_norm": 342.3661804199219, "learning_rate": 9.649267826607145e-06, "loss": 24.6141, "step": 103970 }, { "epoch": 0.2100461786463152, "grad_norm": 551.010986328125, "learning_rate": 9.649139383544315e-06, "loss": 23.7316, "step": 103980 }, { "epoch": 0.21006637927899902, "grad_norm": 64.73431396484375, "learning_rate": 9.649010917822145e-06, "loss": 18.5022, "step": 103990 }, { "epoch": 0.21008657991168284, "grad_norm": 274.8889465332031, "learning_rate": 9.648882429441258e-06, "loss": 31.935, "step": 104000 }, { "epoch": 0.21010678054436666, "grad_norm": 303.0697937011719, "learning_rate": 9.648753918402283e-06, "loss": 17.8945, "step": 104010 }, { "epoch": 0.21012698117705048, "grad_norm": 559.698974609375, "learning_rate": 9.648625384705844e-06, "loss": 25.65, "step": 104020 }, { "epoch": 0.21014718180973427, "grad_norm": 303.9476013183594, "learning_rate": 9.648496828352569e-06, "loss": 20.6367, "step": 104030 }, { "epoch": 0.2101673824424181, "grad_norm": 368.31103515625, "learning_rate": 9.648368249343084e-06, "loss": 24.1459, "step": 104040 }, { "epoch": 0.2101875830751019, "grad_norm": 61.97323226928711, "learning_rate": 9.648239647678017e-06, "loss": 12.5804, "step": 104050 }, { "epoch": 0.21020778370778573, "grad_norm": 106.02571105957031, "learning_rate": 9.648111023357994e-06, "loss": 26.3438, "step": 104060 }, { "epoch": 0.21022798434046955, "grad_norm": 377.2306213378906, "learning_rate": 9.64798237638364e-06, "loss": 17.2546, "step": 104070 }, { "epoch": 0.21024818497315337, "grad_norm": 359.0749816894531, "learning_rate": 9.647853706755583e-06, "loss": 33.4835, "step": 104080 }, { "epoch": 0.2102683856058372, "grad_norm": 325.0760192871094, "learning_rate": 9.647725014474452e-06, "loss": 20.7829, "step": 104090 }, { "epoch": 0.21028858623852098, "grad_norm": 170.282470703125, "learning_rate": 9.647596299540874e-06, "loss": 15.6414, "step": 104100 }, { "epoch": 0.2103087868712048, "grad_norm": 231.60687255859375, "learning_rate": 9.647467561955474e-06, "loss": 17.2345, "step": 104110 }, { "epoch": 0.21032898750388862, "grad_norm": 489.0950622558594, "learning_rate": 9.647338801718882e-06, "loss": 30.0807, "step": 104120 }, { "epoch": 0.21034918813657244, "grad_norm": 448.69207763671875, "learning_rate": 9.647210018831723e-06, "loss": 24.4047, "step": 104130 }, { "epoch": 0.21036938876925626, "grad_norm": 175.00436401367188, "learning_rate": 9.647081213294627e-06, "loss": 20.2769, "step": 104140 }, { "epoch": 0.21038958940194008, "grad_norm": 21.99513816833496, "learning_rate": 9.64695238510822e-06, "loss": 18.1207, "step": 104150 }, { "epoch": 0.21040979003462387, "grad_norm": 262.26751708984375, "learning_rate": 9.646823534273131e-06, "loss": 23.9191, "step": 104160 }, { "epoch": 0.2104299906673077, "grad_norm": 82.71932983398438, "learning_rate": 9.646694660789986e-06, "loss": 21.4089, "step": 104170 }, { "epoch": 0.2104501912999915, "grad_norm": 422.9774169921875, "learning_rate": 9.646565764659418e-06, "loss": 29.9018, "step": 104180 }, { "epoch": 0.21047039193267533, "grad_norm": 220.1215057373047, "learning_rate": 9.64643684588205e-06, "loss": 24.9346, "step": 104190 }, { "epoch": 0.21049059256535915, "grad_norm": 597.5738525390625, "learning_rate": 9.646307904458513e-06, "loss": 31.3659, "step": 104200 }, { "epoch": 0.21051079319804297, "grad_norm": 151.74749755859375, "learning_rate": 9.646178940389435e-06, "loss": 11.8469, "step": 104210 }, { "epoch": 0.21053099383072676, "grad_norm": 293.6542663574219, "learning_rate": 9.646049953675443e-06, "loss": 22.1822, "step": 104220 }, { "epoch": 0.21055119446341058, "grad_norm": 165.4585418701172, "learning_rate": 9.645920944317168e-06, "loss": 17.8779, "step": 104230 }, { "epoch": 0.2105713950960944, "grad_norm": 269.0075988769531, "learning_rate": 9.645791912315239e-06, "loss": 30.2071, "step": 104240 }, { "epoch": 0.21059159572877822, "grad_norm": 664.5529174804688, "learning_rate": 9.645662857670281e-06, "loss": 33.1124, "step": 104250 }, { "epoch": 0.21061179636146204, "grad_norm": 284.2259826660156, "learning_rate": 9.645533780382928e-06, "loss": 44.9402, "step": 104260 }, { "epoch": 0.21063199699414586, "grad_norm": 158.02810668945312, "learning_rate": 9.645404680453805e-06, "loss": 18.042, "step": 104270 }, { "epoch": 0.21065219762682968, "grad_norm": 323.9316711425781, "learning_rate": 9.645275557883545e-06, "loss": 9.9265, "step": 104280 }, { "epoch": 0.21067239825951348, "grad_norm": 284.59295654296875, "learning_rate": 9.645146412672774e-06, "loss": 17.8738, "step": 104290 }, { "epoch": 0.2106925988921973, "grad_norm": 43.930110931396484, "learning_rate": 9.645017244822124e-06, "loss": 35.4989, "step": 104300 }, { "epoch": 0.21071279952488112, "grad_norm": 436.6199035644531, "learning_rate": 9.644888054332222e-06, "loss": 25.0478, "step": 104310 }, { "epoch": 0.21073300015756494, "grad_norm": 384.9183044433594, "learning_rate": 9.644758841203698e-06, "loss": 30.4263, "step": 104320 }, { "epoch": 0.21075320079024876, "grad_norm": 874.2238159179688, "learning_rate": 9.644629605437186e-06, "loss": 29.8694, "step": 104330 }, { "epoch": 0.21077340142293258, "grad_norm": 372.44146728515625, "learning_rate": 9.64450034703331e-06, "loss": 27.9564, "step": 104340 }, { "epoch": 0.21079360205561637, "grad_norm": 361.510986328125, "learning_rate": 9.644371065992704e-06, "loss": 24.6135, "step": 104350 }, { "epoch": 0.2108138026883002, "grad_norm": 4508.28955078125, "learning_rate": 9.644241762315995e-06, "loss": 34.6603, "step": 104360 }, { "epoch": 0.210834003320984, "grad_norm": 364.2524719238281, "learning_rate": 9.644112436003817e-06, "loss": 17.0285, "step": 104370 }, { "epoch": 0.21085420395366783, "grad_norm": 245.035400390625, "learning_rate": 9.643983087056796e-06, "loss": 47.9645, "step": 104380 }, { "epoch": 0.21087440458635165, "grad_norm": 277.9680480957031, "learning_rate": 9.643853715475567e-06, "loss": 9.2819, "step": 104390 }, { "epoch": 0.21089460521903547, "grad_norm": 510.8111267089844, "learning_rate": 9.643724321260757e-06, "loss": 23.0039, "step": 104400 }, { "epoch": 0.2109148058517193, "grad_norm": 225.08944702148438, "learning_rate": 9.643594904412998e-06, "loss": 26.7927, "step": 104410 }, { "epoch": 0.21093500648440308, "grad_norm": 341.3807678222656, "learning_rate": 9.64346546493292e-06, "loss": 28.8842, "step": 104420 }, { "epoch": 0.2109552071170869, "grad_norm": 297.9506530761719, "learning_rate": 9.643336002821155e-06, "loss": 22.7881, "step": 104430 }, { "epoch": 0.21097540774977072, "grad_norm": 105.12297821044922, "learning_rate": 9.643206518078335e-06, "loss": 20.2999, "step": 104440 }, { "epoch": 0.21099560838245454, "grad_norm": 51.366390228271484, "learning_rate": 9.643077010705088e-06, "loss": 16.2354, "step": 104450 }, { "epoch": 0.21101580901513836, "grad_norm": 56.0854606628418, "learning_rate": 9.642947480702047e-06, "loss": 13.5975, "step": 104460 }, { "epoch": 0.21103600964782218, "grad_norm": 476.8445739746094, "learning_rate": 9.642817928069843e-06, "loss": 27.2962, "step": 104470 }, { "epoch": 0.21105621028050597, "grad_norm": 256.180419921875, "learning_rate": 9.642688352809108e-06, "loss": 13.5201, "step": 104480 }, { "epoch": 0.2110764109131898, "grad_norm": 690.0086669921875, "learning_rate": 9.642558754920472e-06, "loss": 26.808, "step": 104490 }, { "epoch": 0.2110966115458736, "grad_norm": 173.50205993652344, "learning_rate": 9.642429134404568e-06, "loss": 17.2649, "step": 104500 }, { "epoch": 0.21111681217855743, "grad_norm": 57.49715042114258, "learning_rate": 9.642299491262028e-06, "loss": 64.9536, "step": 104510 }, { "epoch": 0.21113701281124125, "grad_norm": 171.11810302734375, "learning_rate": 9.642169825493483e-06, "loss": 17.1769, "step": 104520 }, { "epoch": 0.21115721344392507, "grad_norm": 487.3818054199219, "learning_rate": 9.642040137099567e-06, "loss": 25.4235, "step": 104530 }, { "epoch": 0.21117741407660887, "grad_norm": 109.66612243652344, "learning_rate": 9.641910426080909e-06, "loss": 15.034, "step": 104540 }, { "epoch": 0.21119761470929269, "grad_norm": 156.33468627929688, "learning_rate": 9.641780692438142e-06, "loss": 18.9927, "step": 104550 }, { "epoch": 0.2112178153419765, "grad_norm": 136.85943603515625, "learning_rate": 9.641650936171899e-06, "loss": 21.3994, "step": 104560 }, { "epoch": 0.21123801597466033, "grad_norm": 60.97770309448242, "learning_rate": 9.641521157282812e-06, "loss": 40.4596, "step": 104570 }, { "epoch": 0.21125821660734415, "grad_norm": 150.77688598632812, "learning_rate": 9.641391355771515e-06, "loss": 42.5986, "step": 104580 }, { "epoch": 0.21127841724002797, "grad_norm": 327.51324462890625, "learning_rate": 9.641261531638639e-06, "loss": 18.184, "step": 104590 }, { "epoch": 0.21129861787271179, "grad_norm": 303.8610534667969, "learning_rate": 9.641131684884817e-06, "loss": 23.5176, "step": 104600 }, { "epoch": 0.21131881850539558, "grad_norm": 351.04229736328125, "learning_rate": 9.641001815510683e-06, "loss": 21.3066, "step": 104610 }, { "epoch": 0.2113390191380794, "grad_norm": 339.583740234375, "learning_rate": 9.640871923516868e-06, "loss": 14.1561, "step": 104620 }, { "epoch": 0.21135921977076322, "grad_norm": 226.4427947998047, "learning_rate": 9.640742008904006e-06, "loss": 18.3923, "step": 104630 }, { "epoch": 0.21137942040344704, "grad_norm": 198.38636779785156, "learning_rate": 9.64061207167273e-06, "loss": 32.9941, "step": 104640 }, { "epoch": 0.21139962103613086, "grad_norm": 288.5070495605469, "learning_rate": 9.640482111823675e-06, "loss": 14.6537, "step": 104650 }, { "epoch": 0.21141982166881468, "grad_norm": 160.81338500976562, "learning_rate": 9.640352129357473e-06, "loss": 15.6248, "step": 104660 }, { "epoch": 0.21144002230149847, "grad_norm": 269.2532043457031, "learning_rate": 9.640222124274756e-06, "loss": 24.979, "step": 104670 }, { "epoch": 0.2114602229341823, "grad_norm": 299.9215087890625, "learning_rate": 9.64009209657616e-06, "loss": 25.109, "step": 104680 }, { "epoch": 0.2114804235668661, "grad_norm": 296.0850524902344, "learning_rate": 9.639962046262319e-06, "loss": 20.3916, "step": 104690 }, { "epoch": 0.21150062419954993, "grad_norm": 204.85963439941406, "learning_rate": 9.639831973333864e-06, "loss": 19.3891, "step": 104700 }, { "epoch": 0.21152082483223375, "grad_norm": 649.3578491210938, "learning_rate": 9.63970187779143e-06, "loss": 31.2013, "step": 104710 }, { "epoch": 0.21154102546491757, "grad_norm": 316.62310791015625, "learning_rate": 9.639571759635655e-06, "loss": 29.5007, "step": 104720 }, { "epoch": 0.2115612260976014, "grad_norm": 220.18722534179688, "learning_rate": 9.639441618867167e-06, "loss": 32.95, "step": 104730 }, { "epoch": 0.21158142673028518, "grad_norm": 761.5950927734375, "learning_rate": 9.639311455486603e-06, "loss": 21.1751, "step": 104740 }, { "epoch": 0.211601627362969, "grad_norm": 318.3962097167969, "learning_rate": 9.6391812694946e-06, "loss": 68.8238, "step": 104750 }, { "epoch": 0.21162182799565282, "grad_norm": 316.50250244140625, "learning_rate": 9.639051060891789e-06, "loss": 28.4423, "step": 104760 }, { "epoch": 0.21164202862833664, "grad_norm": 343.4327392578125, "learning_rate": 9.638920829678806e-06, "loss": 18.8878, "step": 104770 }, { "epoch": 0.21166222926102046, "grad_norm": 235.34609985351562, "learning_rate": 9.638790575856285e-06, "loss": 24.0749, "step": 104780 }, { "epoch": 0.21168242989370428, "grad_norm": 180.32077026367188, "learning_rate": 9.638660299424863e-06, "loss": 12.5654, "step": 104790 }, { "epoch": 0.21170263052638807, "grad_norm": 321.5289306640625, "learning_rate": 9.638530000385171e-06, "loss": 23.5807, "step": 104800 }, { "epoch": 0.2117228311590719, "grad_norm": 1004.6520385742188, "learning_rate": 9.63839967873785e-06, "loss": 22.6372, "step": 104810 }, { "epoch": 0.21174303179175571, "grad_norm": 105.34928894042969, "learning_rate": 9.638269334483528e-06, "loss": 22.633, "step": 104820 }, { "epoch": 0.21176323242443953, "grad_norm": 320.77606201171875, "learning_rate": 9.638138967622845e-06, "loss": 14.8516, "step": 104830 }, { "epoch": 0.21178343305712335, "grad_norm": 319.5592346191406, "learning_rate": 9.638008578156435e-06, "loss": 30.7289, "step": 104840 }, { "epoch": 0.21180363368980717, "grad_norm": 187.1366424560547, "learning_rate": 9.637878166084932e-06, "loss": 23.1764, "step": 104850 }, { "epoch": 0.21182383432249097, "grad_norm": 288.5953369140625, "learning_rate": 9.637747731408975e-06, "loss": 39.4081, "step": 104860 }, { "epoch": 0.2118440349551748, "grad_norm": 1297.93994140625, "learning_rate": 9.637617274129198e-06, "loss": 39.6277, "step": 104870 }, { "epoch": 0.2118642355878586, "grad_norm": 508.4700927734375, "learning_rate": 9.637486794246237e-06, "loss": 21.0616, "step": 104880 }, { "epoch": 0.21188443622054243, "grad_norm": 418.4725341796875, "learning_rate": 9.637356291760729e-06, "loss": 10.7736, "step": 104890 }, { "epoch": 0.21190463685322625, "grad_norm": 576.0739135742188, "learning_rate": 9.637225766673309e-06, "loss": 16.9417, "step": 104900 }, { "epoch": 0.21192483748591007, "grad_norm": 298.3703308105469, "learning_rate": 9.63709521898461e-06, "loss": 19.9643, "step": 104910 }, { "epoch": 0.2119450381185939, "grad_norm": 279.878662109375, "learning_rate": 9.636964648695272e-06, "loss": 29.0694, "step": 104920 }, { "epoch": 0.21196523875127768, "grad_norm": 451.3514709472656, "learning_rate": 9.636834055805933e-06, "loss": 18.0491, "step": 104930 }, { "epoch": 0.2119854393839615, "grad_norm": 324.270751953125, "learning_rate": 9.636703440317225e-06, "loss": 16.7996, "step": 104940 }, { "epoch": 0.21200564001664532, "grad_norm": 262.1705322265625, "learning_rate": 9.636572802229789e-06, "loss": 23.7403, "step": 104950 }, { "epoch": 0.21202584064932914, "grad_norm": 171.71412658691406, "learning_rate": 9.636442141544259e-06, "loss": 20.0182, "step": 104960 }, { "epoch": 0.21204604128201296, "grad_norm": 271.63800048828125, "learning_rate": 9.63631145826127e-06, "loss": 17.9347, "step": 104970 }, { "epoch": 0.21206624191469678, "grad_norm": 195.18312072753906, "learning_rate": 9.636180752381464e-06, "loss": 21.4692, "step": 104980 }, { "epoch": 0.21208644254738057, "grad_norm": 409.5891418457031, "learning_rate": 9.636050023905473e-06, "loss": 21.1449, "step": 104990 }, { "epoch": 0.2121066431800644, "grad_norm": 340.1304931640625, "learning_rate": 9.635919272833938e-06, "loss": 15.003, "step": 105000 }, { "epoch": 0.2121268438127482, "grad_norm": 518.0468139648438, "learning_rate": 9.635788499167494e-06, "loss": 24.4424, "step": 105010 }, { "epoch": 0.21214704444543203, "grad_norm": 263.8879089355469, "learning_rate": 9.63565770290678e-06, "loss": 25.2839, "step": 105020 }, { "epoch": 0.21216724507811585, "grad_norm": 38.22035598754883, "learning_rate": 9.635526884052431e-06, "loss": 28.5216, "step": 105030 }, { "epoch": 0.21218744571079967, "grad_norm": 86.98069763183594, "learning_rate": 9.635396042605088e-06, "loss": 21.1652, "step": 105040 }, { "epoch": 0.2122076463434835, "grad_norm": 227.91021728515625, "learning_rate": 9.635265178565386e-06, "loss": 12.0167, "step": 105050 }, { "epoch": 0.21222784697616728, "grad_norm": 277.041015625, "learning_rate": 9.635134291933964e-06, "loss": 14.2661, "step": 105060 }, { "epoch": 0.2122480476088511, "grad_norm": 302.7667541503906, "learning_rate": 9.63500338271146e-06, "loss": 16.8204, "step": 105070 }, { "epoch": 0.21226824824153492, "grad_norm": 238.7325897216797, "learning_rate": 9.634872450898511e-06, "loss": 18.988, "step": 105080 }, { "epoch": 0.21228844887421874, "grad_norm": 314.8899841308594, "learning_rate": 9.634741496495755e-06, "loss": 17.4912, "step": 105090 }, { "epoch": 0.21230864950690256, "grad_norm": 360.9333190917969, "learning_rate": 9.634610519503833e-06, "loss": 17.358, "step": 105100 }, { "epoch": 0.21232885013958638, "grad_norm": 185.99758911132812, "learning_rate": 9.63447951992338e-06, "loss": 16.6416, "step": 105110 }, { "epoch": 0.21234905077227018, "grad_norm": 343.02032470703125, "learning_rate": 9.634348497755035e-06, "loss": 32.9777, "step": 105120 }, { "epoch": 0.212369251404954, "grad_norm": 139.03012084960938, "learning_rate": 9.63421745299944e-06, "loss": 24.6247, "step": 105130 }, { "epoch": 0.21238945203763782, "grad_norm": 431.29638671875, "learning_rate": 9.634086385657231e-06, "loss": 18.5554, "step": 105140 }, { "epoch": 0.21240965267032164, "grad_norm": 947.23388671875, "learning_rate": 9.633955295729044e-06, "loss": 28.1401, "step": 105150 }, { "epoch": 0.21242985330300546, "grad_norm": 393.277587890625, "learning_rate": 9.633824183215525e-06, "loss": 33.8416, "step": 105160 }, { "epoch": 0.21245005393568928, "grad_norm": 288.1396484375, "learning_rate": 9.633693048117306e-06, "loss": 14.9546, "step": 105170 }, { "epoch": 0.21247025456837307, "grad_norm": 166.85462951660156, "learning_rate": 9.633561890435031e-06, "loss": 36.9617, "step": 105180 }, { "epoch": 0.2124904552010569, "grad_norm": 22.609355926513672, "learning_rate": 9.633430710169335e-06, "loss": 28.9448, "step": 105190 }, { "epoch": 0.2125106558337407, "grad_norm": 77.50933074951172, "learning_rate": 9.633299507320862e-06, "loss": 24.8744, "step": 105200 }, { "epoch": 0.21253085646642453, "grad_norm": 464.5925598144531, "learning_rate": 9.633168281890248e-06, "loss": 25.1761, "step": 105210 }, { "epoch": 0.21255105709910835, "grad_norm": 440.99298095703125, "learning_rate": 9.633037033878135e-06, "loss": 17.654, "step": 105220 }, { "epoch": 0.21257125773179217, "grad_norm": 216.55613708496094, "learning_rate": 9.632905763285159e-06, "loss": 16.055, "step": 105230 }, { "epoch": 0.212591458364476, "grad_norm": 384.1953430175781, "learning_rate": 9.632774470111964e-06, "loss": 18.3084, "step": 105240 }, { "epoch": 0.21261165899715978, "grad_norm": 878.2191772460938, "learning_rate": 9.632643154359187e-06, "loss": 26.2253, "step": 105250 }, { "epoch": 0.2126318596298436, "grad_norm": 772.4561767578125, "learning_rate": 9.63251181602747e-06, "loss": 37.6871, "step": 105260 }, { "epoch": 0.21265206026252742, "grad_norm": 234.08395385742188, "learning_rate": 9.632380455117452e-06, "loss": 33.5746, "step": 105270 }, { "epoch": 0.21267226089521124, "grad_norm": 627.2276000976562, "learning_rate": 9.632249071629773e-06, "loss": 26.3537, "step": 105280 }, { "epoch": 0.21269246152789506, "grad_norm": 158.49603271484375, "learning_rate": 9.632117665565075e-06, "loss": 18.1509, "step": 105290 }, { "epoch": 0.21271266216057888, "grad_norm": 2.398019790649414, "learning_rate": 9.631986236923998e-06, "loss": 30.485, "step": 105300 }, { "epoch": 0.21273286279326267, "grad_norm": 292.9380798339844, "learning_rate": 9.63185478570718e-06, "loss": 30.7661, "step": 105310 }, { "epoch": 0.2127530634259465, "grad_norm": 379.8131103515625, "learning_rate": 9.631723311915264e-06, "loss": 19.1902, "step": 105320 }, { "epoch": 0.2127732640586303, "grad_norm": 374.7728271484375, "learning_rate": 9.63159181554889e-06, "loss": 47.9937, "step": 105330 }, { "epoch": 0.21279346469131413, "grad_norm": 253.642822265625, "learning_rate": 9.6314602966087e-06, "loss": 17.219, "step": 105340 }, { "epoch": 0.21281366532399795, "grad_norm": 513.5677490234375, "learning_rate": 9.631328755095334e-06, "loss": 43.2263, "step": 105350 }, { "epoch": 0.21283386595668177, "grad_norm": 365.41046142578125, "learning_rate": 9.631197191009433e-06, "loss": 27.858, "step": 105360 }, { "epoch": 0.2128540665893656, "grad_norm": 226.05201721191406, "learning_rate": 9.631065604351639e-06, "loss": 16.3724, "step": 105370 }, { "epoch": 0.21287426722204938, "grad_norm": 189.9601287841797, "learning_rate": 9.630933995122594e-06, "loss": 14.4952, "step": 105380 }, { "epoch": 0.2128944678547332, "grad_norm": 0.0, "learning_rate": 9.630802363322936e-06, "loss": 15.7821, "step": 105390 }, { "epoch": 0.21291466848741702, "grad_norm": 338.6743469238281, "learning_rate": 9.630670708953311e-06, "loss": 23.6862, "step": 105400 }, { "epoch": 0.21293486912010084, "grad_norm": 370.22412109375, "learning_rate": 9.630539032014358e-06, "loss": 22.5308, "step": 105410 }, { "epoch": 0.21295506975278466, "grad_norm": 78.70195770263672, "learning_rate": 9.630407332506718e-06, "loss": 13.7915, "step": 105420 }, { "epoch": 0.21297527038546848, "grad_norm": 349.9084167480469, "learning_rate": 9.630275610431036e-06, "loss": 16.0523, "step": 105430 }, { "epoch": 0.21299547101815228, "grad_norm": 281.9499206542969, "learning_rate": 9.630143865787951e-06, "loss": 16.5955, "step": 105440 }, { "epoch": 0.2130156716508361, "grad_norm": 116.06376647949219, "learning_rate": 9.630012098578108e-06, "loss": 20.6115, "step": 105450 }, { "epoch": 0.21303587228351992, "grad_norm": 74.47708129882812, "learning_rate": 9.629880308802147e-06, "loss": 37.6371, "step": 105460 }, { "epoch": 0.21305607291620374, "grad_norm": 357.697265625, "learning_rate": 9.62974849646071e-06, "loss": 24.6789, "step": 105470 }, { "epoch": 0.21307627354888756, "grad_norm": 1725.8330078125, "learning_rate": 9.62961666155444e-06, "loss": 20.0851, "step": 105480 }, { "epoch": 0.21309647418157138, "grad_norm": 235.48776245117188, "learning_rate": 9.629484804083982e-06, "loss": 22.9216, "step": 105490 }, { "epoch": 0.21311667481425517, "grad_norm": 265.4479064941406, "learning_rate": 9.629352924049975e-06, "loss": 30.0702, "step": 105500 }, { "epoch": 0.213136875446939, "grad_norm": 238.599365234375, "learning_rate": 9.629221021453063e-06, "loss": 17.0826, "step": 105510 }, { "epoch": 0.2131570760796228, "grad_norm": 476.8735046386719, "learning_rate": 9.62908909629389e-06, "loss": 20.3765, "step": 105520 }, { "epoch": 0.21317727671230663, "grad_norm": 71.6496810913086, "learning_rate": 9.628957148573099e-06, "loss": 24.0874, "step": 105530 }, { "epoch": 0.21319747734499045, "grad_norm": 177.56884765625, "learning_rate": 9.62882517829133e-06, "loss": 28.0321, "step": 105540 }, { "epoch": 0.21321767797767427, "grad_norm": 7.011865615844727, "learning_rate": 9.62869318544923e-06, "loss": 24.1994, "step": 105550 }, { "epoch": 0.2132378786103581, "grad_norm": 300.8640441894531, "learning_rate": 9.62856117004744e-06, "loss": 26.4784, "step": 105560 }, { "epoch": 0.21325807924304188, "grad_norm": 721.7553100585938, "learning_rate": 9.628429132086606e-06, "loss": 23.2202, "step": 105570 }, { "epoch": 0.2132782798757257, "grad_norm": 180.47679138183594, "learning_rate": 9.628297071567368e-06, "loss": 45.8591, "step": 105580 }, { "epoch": 0.21329848050840952, "grad_norm": 479.1435852050781, "learning_rate": 9.628164988490372e-06, "loss": 20.7352, "step": 105590 }, { "epoch": 0.21331868114109334, "grad_norm": 193.14048767089844, "learning_rate": 9.628032882856262e-06, "loss": 16.019, "step": 105600 }, { "epoch": 0.21333888177377716, "grad_norm": 221.986083984375, "learning_rate": 9.62790075466568e-06, "loss": 19.0894, "step": 105610 }, { "epoch": 0.21335908240646098, "grad_norm": 188.98023986816406, "learning_rate": 9.62776860391927e-06, "loss": 12.9585, "step": 105620 }, { "epoch": 0.21337928303914477, "grad_norm": 273.6766357421875, "learning_rate": 9.62763643061768e-06, "loss": 20.0069, "step": 105630 }, { "epoch": 0.2133994836718286, "grad_norm": 382.5378112792969, "learning_rate": 9.62750423476155e-06, "loss": 26.3941, "step": 105640 }, { "epoch": 0.2134196843045124, "grad_norm": 295.88555908203125, "learning_rate": 9.627372016351524e-06, "loss": 31.1788, "step": 105650 }, { "epoch": 0.21343988493719623, "grad_norm": 607.588134765625, "learning_rate": 9.62723977538825e-06, "loss": 23.7711, "step": 105660 }, { "epoch": 0.21346008556988005, "grad_norm": 331.9056091308594, "learning_rate": 9.62710751187237e-06, "loss": 20.5323, "step": 105670 }, { "epoch": 0.21348028620256387, "grad_norm": 329.4512023925781, "learning_rate": 9.62697522580453e-06, "loss": 26.2296, "step": 105680 }, { "epoch": 0.2135004868352477, "grad_norm": 226.72816467285156, "learning_rate": 9.62684291718537e-06, "loss": 21.6093, "step": 105690 }, { "epoch": 0.21352068746793149, "grad_norm": 208.45993041992188, "learning_rate": 9.626710586015543e-06, "loss": 19.1318, "step": 105700 }, { "epoch": 0.2135408881006153, "grad_norm": 477.7166748046875, "learning_rate": 9.626578232295689e-06, "loss": 15.984, "step": 105710 }, { "epoch": 0.21356108873329913, "grad_norm": 295.1589660644531, "learning_rate": 9.626445856026453e-06, "loss": 11.6897, "step": 105720 }, { "epoch": 0.21358128936598295, "grad_norm": 154.92701721191406, "learning_rate": 9.626313457208482e-06, "loss": 11.1461, "step": 105730 }, { "epoch": 0.21360148999866677, "grad_norm": 252.69992065429688, "learning_rate": 9.626181035842418e-06, "loss": 18.9806, "step": 105740 }, { "epoch": 0.21362169063135059, "grad_norm": 158.94003295898438, "learning_rate": 9.626048591928911e-06, "loss": 23.2484, "step": 105750 }, { "epoch": 0.21364189126403438, "grad_norm": 373.8208923339844, "learning_rate": 9.625916125468604e-06, "loss": 31.1856, "step": 105760 }, { "epoch": 0.2136620918967182, "grad_norm": 211.7491912841797, "learning_rate": 9.625783636462143e-06, "loss": 16.5013, "step": 105770 }, { "epoch": 0.21368229252940202, "grad_norm": 483.8285217285156, "learning_rate": 9.625651124910173e-06, "loss": 34.4307, "step": 105780 }, { "epoch": 0.21370249316208584, "grad_norm": 404.6306457519531, "learning_rate": 9.625518590813342e-06, "loss": 24.3287, "step": 105790 }, { "epoch": 0.21372269379476966, "grad_norm": 95.21009063720703, "learning_rate": 9.62538603417229e-06, "loss": 29.0808, "step": 105800 }, { "epoch": 0.21374289442745348, "grad_norm": 437.2822570800781, "learning_rate": 9.625253454987672e-06, "loss": 12.2931, "step": 105810 }, { "epoch": 0.21376309506013727, "grad_norm": 295.445068359375, "learning_rate": 9.625120853260129e-06, "loss": 26.5589, "step": 105820 }, { "epoch": 0.2137832956928211, "grad_norm": 94.1972885131836, "learning_rate": 9.624988228990305e-06, "loss": 38.8505, "step": 105830 }, { "epoch": 0.2138034963255049, "grad_norm": 332.0558166503906, "learning_rate": 9.624855582178852e-06, "loss": 30.0248, "step": 105840 }, { "epoch": 0.21382369695818873, "grad_norm": 159.7136993408203, "learning_rate": 9.624722912826412e-06, "loss": 10.313, "step": 105850 }, { "epoch": 0.21384389759087255, "grad_norm": 195.3963165283203, "learning_rate": 9.624590220933635e-06, "loss": 20.2715, "step": 105860 }, { "epoch": 0.21386409822355637, "grad_norm": 761.79443359375, "learning_rate": 9.624457506501165e-06, "loss": 31.3446, "step": 105870 }, { "epoch": 0.2138842988562402, "grad_norm": 92.88241577148438, "learning_rate": 9.624324769529652e-06, "loss": 17.9932, "step": 105880 }, { "epoch": 0.21390449948892398, "grad_norm": 43.32359313964844, "learning_rate": 9.624192010019739e-06, "loss": 18.8395, "step": 105890 }, { "epoch": 0.2139247001216078, "grad_norm": 453.7969665527344, "learning_rate": 9.624059227972077e-06, "loss": 18.1595, "step": 105900 }, { "epoch": 0.21394490075429162, "grad_norm": 218.4197540283203, "learning_rate": 9.623926423387308e-06, "loss": 18.0742, "step": 105910 }, { "epoch": 0.21396510138697544, "grad_norm": 576.3978881835938, "learning_rate": 9.623793596266084e-06, "loss": 28.2719, "step": 105920 }, { "epoch": 0.21398530201965926, "grad_norm": 73.23743438720703, "learning_rate": 9.623660746609051e-06, "loss": 28.922, "step": 105930 }, { "epoch": 0.21400550265234308, "grad_norm": 3.7779650688171387, "learning_rate": 9.623527874416857e-06, "loss": 24.5243, "step": 105940 }, { "epoch": 0.21402570328502687, "grad_norm": 264.5213623046875, "learning_rate": 9.623394979690149e-06, "loss": 18.6367, "step": 105950 }, { "epoch": 0.2140459039177107, "grad_norm": 292.0513610839844, "learning_rate": 9.623262062429573e-06, "loss": 22.1004, "step": 105960 }, { "epoch": 0.21406610455039451, "grad_norm": 488.6725769042969, "learning_rate": 9.623129122635778e-06, "loss": 45.1755, "step": 105970 }, { "epoch": 0.21408630518307833, "grad_norm": 170.14381408691406, "learning_rate": 9.622996160309415e-06, "loss": 17.6074, "step": 105980 }, { "epoch": 0.21410650581576215, "grad_norm": 288.9234313964844, "learning_rate": 9.622863175451128e-06, "loss": 12.7857, "step": 105990 }, { "epoch": 0.21412670644844597, "grad_norm": 119.61241149902344, "learning_rate": 9.622730168061568e-06, "loss": 24.0052, "step": 106000 }, { "epoch": 0.2141469070811298, "grad_norm": 613.4786987304688, "learning_rate": 9.622597138141379e-06, "loss": 23.0194, "step": 106010 }, { "epoch": 0.2141671077138136, "grad_norm": 184.5247039794922, "learning_rate": 9.622464085691214e-06, "loss": 33.9082, "step": 106020 }, { "epoch": 0.2141873083464974, "grad_norm": 519.2050170898438, "learning_rate": 9.622331010711718e-06, "loss": 14.1886, "step": 106030 }, { "epoch": 0.21420750897918123, "grad_norm": 149.73831176757812, "learning_rate": 9.622197913203543e-06, "loss": 18.3357, "step": 106040 }, { "epoch": 0.21422770961186505, "grad_norm": 495.5517578125, "learning_rate": 9.622064793167336e-06, "loss": 29.9822, "step": 106050 }, { "epoch": 0.21424791024454887, "grad_norm": 201.2396697998047, "learning_rate": 9.621931650603747e-06, "loss": 22.7199, "step": 106060 }, { "epoch": 0.2142681108772327, "grad_norm": 0.0, "learning_rate": 9.62179848551342e-06, "loss": 17.4113, "step": 106070 }, { "epoch": 0.21428831150991648, "grad_norm": 291.56414794921875, "learning_rate": 9.62166529789701e-06, "loss": 20.356, "step": 106080 }, { "epoch": 0.2143085121426003, "grad_norm": 303.57208251953125, "learning_rate": 9.621532087755163e-06, "loss": 17.2772, "step": 106090 }, { "epoch": 0.21432871277528412, "grad_norm": 408.2618103027344, "learning_rate": 9.62139885508853e-06, "loss": 28.912, "step": 106100 }, { "epoch": 0.21434891340796794, "grad_norm": 104.41629028320312, "learning_rate": 9.621265599897759e-06, "loss": 22.3483, "step": 106110 }, { "epoch": 0.21436911404065176, "grad_norm": 255.6110076904297, "learning_rate": 9.621132322183502e-06, "loss": 31.7342, "step": 106120 }, { "epoch": 0.21438931467333558, "grad_norm": 133.45814514160156, "learning_rate": 9.620999021946404e-06, "loss": 19.5567, "step": 106130 }, { "epoch": 0.21440951530601937, "grad_norm": 500.24884033203125, "learning_rate": 9.620865699187118e-06, "loss": 36.9846, "step": 106140 }, { "epoch": 0.2144297159387032, "grad_norm": 159.55352783203125, "learning_rate": 9.620732353906293e-06, "loss": 17.1326, "step": 106150 }, { "epoch": 0.214449916571387, "grad_norm": 161.65435791015625, "learning_rate": 9.620598986104578e-06, "loss": 29.1916, "step": 106160 }, { "epoch": 0.21447011720407083, "grad_norm": 265.5152282714844, "learning_rate": 9.620465595782626e-06, "loss": 18.9994, "step": 106170 }, { "epoch": 0.21449031783675465, "grad_norm": 336.00970458984375, "learning_rate": 9.620332182941084e-06, "loss": 21.4921, "step": 106180 }, { "epoch": 0.21451051846943847, "grad_norm": 351.4840087890625, "learning_rate": 9.620198747580604e-06, "loss": 14.4498, "step": 106190 }, { "epoch": 0.2145307191021223, "grad_norm": 204.02935791015625, "learning_rate": 9.620065289701835e-06, "loss": 14.3646, "step": 106200 }, { "epoch": 0.21455091973480608, "grad_norm": 224.67112731933594, "learning_rate": 9.619931809305428e-06, "loss": 23.9177, "step": 106210 }, { "epoch": 0.2145711203674899, "grad_norm": 346.0800476074219, "learning_rate": 9.619798306392034e-06, "loss": 50.74, "step": 106220 }, { "epoch": 0.21459132100017372, "grad_norm": 447.60443115234375, "learning_rate": 9.619664780962304e-06, "loss": 18.7376, "step": 106230 }, { "epoch": 0.21461152163285754, "grad_norm": 262.0855407714844, "learning_rate": 9.619531233016885e-06, "loss": 18.7321, "step": 106240 }, { "epoch": 0.21463172226554136, "grad_norm": 200.1279296875, "learning_rate": 9.619397662556434e-06, "loss": 23.0538, "step": 106250 }, { "epoch": 0.21465192289822518, "grad_norm": 440.759033203125, "learning_rate": 9.6192640695816e-06, "loss": 20.2649, "step": 106260 }, { "epoch": 0.21467212353090898, "grad_norm": 242.90237426757812, "learning_rate": 9.619130454093031e-06, "loss": 17.5758, "step": 106270 }, { "epoch": 0.2146923241635928, "grad_norm": 330.26971435546875, "learning_rate": 9.618996816091382e-06, "loss": 21.6324, "step": 106280 }, { "epoch": 0.21471252479627662, "grad_norm": 543.4573364257812, "learning_rate": 9.6188631555773e-06, "loss": 37.7204, "step": 106290 }, { "epoch": 0.21473272542896044, "grad_norm": 498.699462890625, "learning_rate": 9.61872947255144e-06, "loss": 19.968, "step": 106300 }, { "epoch": 0.21475292606164426, "grad_norm": 370.1468811035156, "learning_rate": 9.618595767014456e-06, "loss": 28.2623, "step": 106310 }, { "epoch": 0.21477312669432808, "grad_norm": 260.3758239746094, "learning_rate": 9.618462038966994e-06, "loss": 19.6196, "step": 106320 }, { "epoch": 0.21479332732701187, "grad_norm": 337.6847839355469, "learning_rate": 9.618328288409708e-06, "loss": 26.7189, "step": 106330 }, { "epoch": 0.2148135279596957, "grad_norm": 201.85775756835938, "learning_rate": 9.61819451534325e-06, "loss": 17.1842, "step": 106340 }, { "epoch": 0.2148337285923795, "grad_norm": 156.12777709960938, "learning_rate": 9.618060719768272e-06, "loss": 17.81, "step": 106350 }, { "epoch": 0.21485392922506333, "grad_norm": 42.30607604980469, "learning_rate": 9.617926901685427e-06, "loss": 18.0648, "step": 106360 }, { "epoch": 0.21487412985774715, "grad_norm": 27.655582427978516, "learning_rate": 9.617793061095366e-06, "loss": 24.5426, "step": 106370 }, { "epoch": 0.21489433049043097, "grad_norm": 221.77169799804688, "learning_rate": 9.617659197998741e-06, "loss": 14.1819, "step": 106380 }, { "epoch": 0.2149145311231148, "grad_norm": 93.31748962402344, "learning_rate": 9.617525312396206e-06, "loss": 24.0785, "step": 106390 }, { "epoch": 0.21493473175579858, "grad_norm": 275.8955993652344, "learning_rate": 9.617391404288412e-06, "loss": 39.4091, "step": 106400 }, { "epoch": 0.2149549323884824, "grad_norm": 207.01129150390625, "learning_rate": 9.617257473676014e-06, "loss": 13.7423, "step": 106410 }, { "epoch": 0.21497513302116622, "grad_norm": 199.89573669433594, "learning_rate": 9.617123520559662e-06, "loss": 26.6089, "step": 106420 }, { "epoch": 0.21499533365385004, "grad_norm": 147.3986358642578, "learning_rate": 9.616989544940008e-06, "loss": 27.5068, "step": 106430 }, { "epoch": 0.21501553428653386, "grad_norm": 246.6013946533203, "learning_rate": 9.61685554681771e-06, "loss": 20.1848, "step": 106440 }, { "epoch": 0.21503573491921768, "grad_norm": 255.0700225830078, "learning_rate": 9.616721526193416e-06, "loss": 24.2637, "step": 106450 }, { "epoch": 0.21505593555190147, "grad_norm": 366.08502197265625, "learning_rate": 9.616587483067782e-06, "loss": 32.2432, "step": 106460 }, { "epoch": 0.2150761361845853, "grad_norm": 267.01690673828125, "learning_rate": 9.61645341744146e-06, "loss": 17.1347, "step": 106470 }, { "epoch": 0.2150963368172691, "grad_norm": 188.93504333496094, "learning_rate": 9.616319329315105e-06, "loss": 17.776, "step": 106480 }, { "epoch": 0.21511653744995293, "grad_norm": 400.7081604003906, "learning_rate": 9.616185218689369e-06, "loss": 19.1246, "step": 106490 }, { "epoch": 0.21513673808263675, "grad_norm": 73.69828796386719, "learning_rate": 9.616051085564905e-06, "loss": 23.3073, "step": 106500 }, { "epoch": 0.21515693871532057, "grad_norm": 276.0678405761719, "learning_rate": 9.61591692994237e-06, "loss": 19.2271, "step": 106510 }, { "epoch": 0.2151771393480044, "grad_norm": 0.0, "learning_rate": 9.615782751822413e-06, "loss": 17.6632, "step": 106520 }, { "epoch": 0.21519733998068818, "grad_norm": 143.78457641601562, "learning_rate": 9.615648551205692e-06, "loss": 12.8896, "step": 106530 }, { "epoch": 0.215217540613372, "grad_norm": 335.8498840332031, "learning_rate": 9.61551432809286e-06, "loss": 18.4195, "step": 106540 }, { "epoch": 0.21523774124605582, "grad_norm": 105.0717544555664, "learning_rate": 9.615380082484573e-06, "loss": 18.9704, "step": 106550 }, { "epoch": 0.21525794187873964, "grad_norm": 299.888427734375, "learning_rate": 9.61524581438148e-06, "loss": 13.4649, "step": 106560 }, { "epoch": 0.21527814251142346, "grad_norm": 449.4877624511719, "learning_rate": 9.61511152378424e-06, "loss": 22.7937, "step": 106570 }, { "epoch": 0.21529834314410728, "grad_norm": 248.3882598876953, "learning_rate": 9.614977210693505e-06, "loss": 29.0592, "step": 106580 }, { "epoch": 0.21531854377679108, "grad_norm": 186.11126708984375, "learning_rate": 9.614842875109933e-06, "loss": 28.02, "step": 106590 }, { "epoch": 0.2153387444094749, "grad_norm": 412.6356201171875, "learning_rate": 9.614708517034176e-06, "loss": 16.7692, "step": 106600 }, { "epoch": 0.21535894504215872, "grad_norm": 205.19479370117188, "learning_rate": 9.61457413646689e-06, "loss": 33.2389, "step": 106610 }, { "epoch": 0.21537914567484254, "grad_norm": 125.66544342041016, "learning_rate": 9.614439733408726e-06, "loss": 66.0418, "step": 106620 }, { "epoch": 0.21539934630752636, "grad_norm": 120.4734115600586, "learning_rate": 9.614305307860346e-06, "loss": 20.7608, "step": 106630 }, { "epoch": 0.21541954694021018, "grad_norm": 23.36737060546875, "learning_rate": 9.6141708598224e-06, "loss": 22.4962, "step": 106640 }, { "epoch": 0.21543974757289397, "grad_norm": 248.92030334472656, "learning_rate": 9.614036389295545e-06, "loss": 31.842, "step": 106650 }, { "epoch": 0.2154599482055778, "grad_norm": 286.855712890625, "learning_rate": 9.613901896280436e-06, "loss": 29.6725, "step": 106660 }, { "epoch": 0.2154801488382616, "grad_norm": 96.57420349121094, "learning_rate": 9.61376738077773e-06, "loss": 21.1612, "step": 106670 }, { "epoch": 0.21550034947094543, "grad_norm": 85.65487670898438, "learning_rate": 9.613632842788079e-06, "loss": 19.5072, "step": 106680 }, { "epoch": 0.21552055010362925, "grad_norm": 435.2016906738281, "learning_rate": 9.613498282312143e-06, "loss": 37.9844, "step": 106690 }, { "epoch": 0.21554075073631307, "grad_norm": 561.2977905273438, "learning_rate": 9.613363699350575e-06, "loss": 24.9823, "step": 106700 }, { "epoch": 0.2155609513689969, "grad_norm": 359.30987548828125, "learning_rate": 9.613229093904033e-06, "loss": 19.4469, "step": 106710 }, { "epoch": 0.21558115200168068, "grad_norm": 238.3653564453125, "learning_rate": 9.613094465973171e-06, "loss": 20.3945, "step": 106720 }, { "epoch": 0.2156013526343645, "grad_norm": 253.5447540283203, "learning_rate": 9.612959815558645e-06, "loss": 19.8929, "step": 106730 }, { "epoch": 0.21562155326704832, "grad_norm": 202.6088104248047, "learning_rate": 9.612825142661115e-06, "loss": 20.7044, "step": 106740 }, { "epoch": 0.21564175389973214, "grad_norm": 136.239990234375, "learning_rate": 9.612690447281233e-06, "loss": 18.1369, "step": 106750 }, { "epoch": 0.21566195453241596, "grad_norm": 284.8649597167969, "learning_rate": 9.612555729419656e-06, "loss": 33.3957, "step": 106760 }, { "epoch": 0.21568215516509978, "grad_norm": 315.91180419921875, "learning_rate": 9.612420989077045e-06, "loss": 15.3427, "step": 106770 }, { "epoch": 0.21570235579778357, "grad_norm": 330.9490051269531, "learning_rate": 9.61228622625405e-06, "loss": 37.0962, "step": 106780 }, { "epoch": 0.2157225564304674, "grad_norm": 263.6752624511719, "learning_rate": 9.612151440951334e-06, "loss": 28.4726, "step": 106790 }, { "epoch": 0.2157427570631512, "grad_norm": 214.970458984375, "learning_rate": 9.612016633169552e-06, "loss": 20.3807, "step": 106800 }, { "epoch": 0.21576295769583503, "grad_norm": 95.76106262207031, "learning_rate": 9.611881802909358e-06, "loss": 43.9942, "step": 106810 }, { "epoch": 0.21578315832851885, "grad_norm": 450.0078430175781, "learning_rate": 9.611746950171412e-06, "loss": 31.3555, "step": 106820 }, { "epoch": 0.21580335896120267, "grad_norm": 376.7423095703125, "learning_rate": 9.61161207495637e-06, "loss": 22.6472, "step": 106830 }, { "epoch": 0.2158235595938865, "grad_norm": 159.64370727539062, "learning_rate": 9.611477177264892e-06, "loss": 30.3398, "step": 106840 }, { "epoch": 0.21584376022657029, "grad_norm": 324.8980712890625, "learning_rate": 9.611342257097632e-06, "loss": 36.6795, "step": 106850 }, { "epoch": 0.2158639608592541, "grad_norm": 488.6799011230469, "learning_rate": 9.61120731445525e-06, "loss": 15.9887, "step": 106860 }, { "epoch": 0.21588416149193793, "grad_norm": 6.957035541534424, "learning_rate": 9.611072349338402e-06, "loss": 22.8913, "step": 106870 }, { "epoch": 0.21590436212462175, "grad_norm": 236.8123321533203, "learning_rate": 9.610937361747747e-06, "loss": 13.8541, "step": 106880 }, { "epoch": 0.21592456275730557, "grad_norm": 666.7322998046875, "learning_rate": 9.610802351683943e-06, "loss": 32.0046, "step": 106890 }, { "epoch": 0.21594476338998939, "grad_norm": 107.41796112060547, "learning_rate": 9.610667319147648e-06, "loss": 15.9569, "step": 106900 }, { "epoch": 0.21596496402267318, "grad_norm": 271.19805908203125, "learning_rate": 9.610532264139518e-06, "loss": 19.3172, "step": 106910 }, { "epoch": 0.215985164655357, "grad_norm": 196.64076232910156, "learning_rate": 9.610397186660214e-06, "loss": 14.4721, "step": 106920 }, { "epoch": 0.21600536528804082, "grad_norm": 237.2180633544922, "learning_rate": 9.610262086710393e-06, "loss": 16.3908, "step": 106930 }, { "epoch": 0.21602556592072464, "grad_norm": 276.1612243652344, "learning_rate": 9.610126964290714e-06, "loss": 17.4921, "step": 106940 }, { "epoch": 0.21604576655340846, "grad_norm": 281.9231872558594, "learning_rate": 9.609991819401836e-06, "loss": 31.4999, "step": 106950 }, { "epoch": 0.21606596718609228, "grad_norm": 161.05043029785156, "learning_rate": 9.609856652044417e-06, "loss": 18.6208, "step": 106960 }, { "epoch": 0.21608616781877607, "grad_norm": 435.0048828125, "learning_rate": 9.609721462219115e-06, "loss": 27.5914, "step": 106970 }, { "epoch": 0.2161063684514599, "grad_norm": 303.5618591308594, "learning_rate": 9.609586249926589e-06, "loss": 21.9395, "step": 106980 }, { "epoch": 0.2161265690841437, "grad_norm": 467.8877868652344, "learning_rate": 9.609451015167498e-06, "loss": 14.5573, "step": 106990 }, { "epoch": 0.21614676971682753, "grad_norm": 260.1849670410156, "learning_rate": 9.609315757942504e-06, "loss": 27.5315, "step": 107000 }, { "epoch": 0.21616697034951135, "grad_norm": 17.326231002807617, "learning_rate": 9.609180478252262e-06, "loss": 24.9081, "step": 107010 }, { "epoch": 0.21618717098219517, "grad_norm": 148.7440185546875, "learning_rate": 9.609045176097435e-06, "loss": 16.3596, "step": 107020 }, { "epoch": 0.216207371614879, "grad_norm": 514.1832885742188, "learning_rate": 9.608909851478681e-06, "loss": 16.5759, "step": 107030 }, { "epoch": 0.21622757224756278, "grad_norm": 241.41944885253906, "learning_rate": 9.608774504396659e-06, "loss": 17.2498, "step": 107040 }, { "epoch": 0.2162477728802466, "grad_norm": 404.53680419921875, "learning_rate": 9.608639134852028e-06, "loss": 32.4074, "step": 107050 }, { "epoch": 0.21626797351293042, "grad_norm": 177.2047576904297, "learning_rate": 9.60850374284545e-06, "loss": 20.5349, "step": 107060 }, { "epoch": 0.21628817414561424, "grad_norm": 59.91279602050781, "learning_rate": 9.608368328377582e-06, "loss": 24.816, "step": 107070 }, { "epoch": 0.21630837477829806, "grad_norm": 250.4579315185547, "learning_rate": 9.608232891449088e-06, "loss": 22.2701, "step": 107080 }, { "epoch": 0.21632857541098188, "grad_norm": 337.57965087890625, "learning_rate": 9.608097432060626e-06, "loss": 28.41, "step": 107090 }, { "epoch": 0.21634877604366567, "grad_norm": 153.24810791015625, "learning_rate": 9.607961950212855e-06, "loss": 30.3755, "step": 107100 }, { "epoch": 0.2163689766763495, "grad_norm": 146.79263305664062, "learning_rate": 9.607826445906434e-06, "loss": 26.5177, "step": 107110 }, { "epoch": 0.21638917730903331, "grad_norm": 80.44586181640625, "learning_rate": 9.607690919142028e-06, "loss": 25.1438, "step": 107120 }, { "epoch": 0.21640937794171713, "grad_norm": 594.1906127929688, "learning_rate": 9.607555369920296e-06, "loss": 17.2628, "step": 107130 }, { "epoch": 0.21642957857440095, "grad_norm": 260.6003723144531, "learning_rate": 9.607419798241897e-06, "loss": 23.0884, "step": 107140 }, { "epoch": 0.21644977920708477, "grad_norm": 335.043701171875, "learning_rate": 9.607284204107493e-06, "loss": 19.2464, "step": 107150 }, { "epoch": 0.2164699798397686, "grad_norm": 233.84124755859375, "learning_rate": 9.607148587517746e-06, "loss": 19.0385, "step": 107160 }, { "epoch": 0.2164901804724524, "grad_norm": 265.35498046875, "learning_rate": 9.607012948473314e-06, "loss": 26.1909, "step": 107170 }, { "epoch": 0.2165103811051362, "grad_norm": 258.1189880371094, "learning_rate": 9.606877286974859e-06, "loss": 27.4926, "step": 107180 }, { "epoch": 0.21653058173782003, "grad_norm": 325.67572021484375, "learning_rate": 9.606741603023043e-06, "loss": 26.2767, "step": 107190 }, { "epoch": 0.21655078237050385, "grad_norm": 275.08880615234375, "learning_rate": 9.606605896618528e-06, "loss": 20.985, "step": 107200 }, { "epoch": 0.21657098300318767, "grad_norm": 269.8556213378906, "learning_rate": 9.606470167761975e-06, "loss": 28.4515, "step": 107210 }, { "epoch": 0.2165911836358715, "grad_norm": 393.21337890625, "learning_rate": 9.606334416454045e-06, "loss": 20.8325, "step": 107220 }, { "epoch": 0.21661138426855528, "grad_norm": 120.07611083984375, "learning_rate": 9.606198642695398e-06, "loss": 15.0357, "step": 107230 }, { "epoch": 0.2166315849012391, "grad_norm": 227.2926483154297, "learning_rate": 9.606062846486698e-06, "loss": 26.071, "step": 107240 }, { "epoch": 0.21665178553392292, "grad_norm": 275.21160888671875, "learning_rate": 9.605927027828608e-06, "loss": 28.3203, "step": 107250 }, { "epoch": 0.21667198616660674, "grad_norm": 448.6011657714844, "learning_rate": 9.605791186721787e-06, "loss": 53.7212, "step": 107260 }, { "epoch": 0.21669218679929056, "grad_norm": 321.1164855957031, "learning_rate": 9.605655323166899e-06, "loss": 20.4426, "step": 107270 }, { "epoch": 0.21671238743197438, "grad_norm": 306.23114013671875, "learning_rate": 9.605519437164604e-06, "loss": 16.2751, "step": 107280 }, { "epoch": 0.21673258806465817, "grad_norm": 210.52279663085938, "learning_rate": 9.605383528715567e-06, "loss": 23.5066, "step": 107290 }, { "epoch": 0.216752788697342, "grad_norm": 158.95877075195312, "learning_rate": 9.605247597820448e-06, "loss": 17.2413, "step": 107300 }, { "epoch": 0.2167729893300258, "grad_norm": 524.9472045898438, "learning_rate": 9.605111644479913e-06, "loss": 22.2996, "step": 107310 }, { "epoch": 0.21679318996270963, "grad_norm": 607.3709716796875, "learning_rate": 9.604975668694621e-06, "loss": 21.8113, "step": 107320 }, { "epoch": 0.21681339059539345, "grad_norm": 379.6607666015625, "learning_rate": 9.604839670465236e-06, "loss": 25.891, "step": 107330 }, { "epoch": 0.21683359122807727, "grad_norm": 278.7035217285156, "learning_rate": 9.604703649792421e-06, "loss": 24.2937, "step": 107340 }, { "epoch": 0.2168537918607611, "grad_norm": 456.75299072265625, "learning_rate": 9.60456760667684e-06, "loss": 17.7398, "step": 107350 }, { "epoch": 0.21687399249344488, "grad_norm": 518.2752685546875, "learning_rate": 9.604431541119155e-06, "loss": 25.3883, "step": 107360 }, { "epoch": 0.2168941931261287, "grad_norm": 188.93148803710938, "learning_rate": 9.604295453120028e-06, "loss": 19.0449, "step": 107370 }, { "epoch": 0.21691439375881252, "grad_norm": 457.2570495605469, "learning_rate": 9.604159342680125e-06, "loss": 18.2155, "step": 107380 }, { "epoch": 0.21693459439149634, "grad_norm": 824.507080078125, "learning_rate": 9.604023209800106e-06, "loss": 36.0788, "step": 107390 }, { "epoch": 0.21695479502418016, "grad_norm": 303.3965148925781, "learning_rate": 9.603887054480636e-06, "loss": 13.5811, "step": 107400 }, { "epoch": 0.21697499565686398, "grad_norm": 235.50184631347656, "learning_rate": 9.60375087672238e-06, "loss": 32.5375, "step": 107410 }, { "epoch": 0.21699519628954778, "grad_norm": 454.1246643066406, "learning_rate": 9.603614676526e-06, "loss": 19.2736, "step": 107420 }, { "epoch": 0.2170153969222316, "grad_norm": 456.62677001953125, "learning_rate": 9.603478453892161e-06, "loss": 58.7979, "step": 107430 }, { "epoch": 0.21703559755491542, "grad_norm": 232.4381103515625, "learning_rate": 9.603342208821526e-06, "loss": 21.1144, "step": 107440 }, { "epoch": 0.21705579818759924, "grad_norm": 339.83349609375, "learning_rate": 9.60320594131476e-06, "loss": 30.519, "step": 107450 }, { "epoch": 0.21707599882028306, "grad_norm": 270.2113952636719, "learning_rate": 9.603069651372526e-06, "loss": 27.1976, "step": 107460 }, { "epoch": 0.21709619945296688, "grad_norm": 307.9425354003906, "learning_rate": 9.602933338995489e-06, "loss": 29.3763, "step": 107470 }, { "epoch": 0.2171164000856507, "grad_norm": 217.4210662841797, "learning_rate": 9.602797004184311e-06, "loss": 21.8612, "step": 107480 }, { "epoch": 0.2171366007183345, "grad_norm": 133.30247497558594, "learning_rate": 9.602660646939663e-06, "loss": 21.6754, "step": 107490 }, { "epoch": 0.2171568013510183, "grad_norm": 225.86122131347656, "learning_rate": 9.602524267262202e-06, "loss": 16.3392, "step": 107500 }, { "epoch": 0.21717700198370213, "grad_norm": 234.44146728515625, "learning_rate": 9.602387865152597e-06, "loss": 16.9369, "step": 107510 }, { "epoch": 0.21719720261638595, "grad_norm": 139.84844970703125, "learning_rate": 9.602251440611512e-06, "loss": 14.481, "step": 107520 }, { "epoch": 0.21721740324906977, "grad_norm": 182.9287872314453, "learning_rate": 9.602114993639611e-06, "loss": 28.6464, "step": 107530 }, { "epoch": 0.2172376038817536, "grad_norm": 654.9391479492188, "learning_rate": 9.60197852423756e-06, "loss": 22.9633, "step": 107540 }, { "epoch": 0.21725780451443738, "grad_norm": 181.9678955078125, "learning_rate": 9.601842032406024e-06, "loss": 15.2439, "step": 107550 }, { "epoch": 0.2172780051471212, "grad_norm": 130.39013671875, "learning_rate": 9.601705518145668e-06, "loss": 17.68, "step": 107560 }, { "epoch": 0.21729820577980502, "grad_norm": 389.73785400390625, "learning_rate": 9.601568981457156e-06, "loss": 19.4148, "step": 107570 }, { "epoch": 0.21731840641248884, "grad_norm": 425.60223388671875, "learning_rate": 9.601432422341156e-06, "loss": 35.5176, "step": 107580 }, { "epoch": 0.21733860704517266, "grad_norm": 196.87591552734375, "learning_rate": 9.601295840798333e-06, "loss": 15.9003, "step": 107590 }, { "epoch": 0.21735880767785648, "grad_norm": 607.0701904296875, "learning_rate": 9.601159236829353e-06, "loss": 34.1867, "step": 107600 }, { "epoch": 0.21737900831054027, "grad_norm": 513.3591918945312, "learning_rate": 9.60102261043488e-06, "loss": 29.3436, "step": 107610 }, { "epoch": 0.2173992089432241, "grad_norm": 136.21954345703125, "learning_rate": 9.60088596161558e-06, "loss": 30.0418, "step": 107620 }, { "epoch": 0.2174194095759079, "grad_norm": 308.0111999511719, "learning_rate": 9.60074929037212e-06, "loss": 10.6795, "step": 107630 }, { "epoch": 0.21743961020859173, "grad_norm": 350.7782897949219, "learning_rate": 9.600612596705167e-06, "loss": 22.8826, "step": 107640 }, { "epoch": 0.21745981084127555, "grad_norm": 499.0043640136719, "learning_rate": 9.600475880615385e-06, "loss": 36.2341, "step": 107650 }, { "epoch": 0.21748001147395937, "grad_norm": 240.3244171142578, "learning_rate": 9.600339142103441e-06, "loss": 23.0441, "step": 107660 }, { "epoch": 0.2175002121066432, "grad_norm": 442.1656188964844, "learning_rate": 9.600202381170004e-06, "loss": 14.5552, "step": 107670 }, { "epoch": 0.21752041273932698, "grad_norm": 556.6654052734375, "learning_rate": 9.600065597815738e-06, "loss": 49.5072, "step": 107680 }, { "epoch": 0.2175406133720108, "grad_norm": 150.6599884033203, "learning_rate": 9.59992879204131e-06, "loss": 32.5429, "step": 107690 }, { "epoch": 0.21756081400469462, "grad_norm": 288.51495361328125, "learning_rate": 9.599791963847388e-06, "loss": 26.6803, "step": 107700 }, { "epoch": 0.21758101463737844, "grad_norm": 158.26039123535156, "learning_rate": 9.599655113234635e-06, "loss": 13.2999, "step": 107710 }, { "epoch": 0.21760121527006226, "grad_norm": 269.87530517578125, "learning_rate": 9.599518240203722e-06, "loss": 16.497, "step": 107720 }, { "epoch": 0.21762141590274608, "grad_norm": 289.9524230957031, "learning_rate": 9.599381344755315e-06, "loss": 13.8225, "step": 107730 }, { "epoch": 0.21764161653542988, "grad_norm": 154.6095428466797, "learning_rate": 9.599244426890082e-06, "loss": 10.4594, "step": 107740 }, { "epoch": 0.2176618171681137, "grad_norm": 388.4635925292969, "learning_rate": 9.59910748660869e-06, "loss": 17.6812, "step": 107750 }, { "epoch": 0.21768201780079752, "grad_norm": 365.8880920410156, "learning_rate": 9.598970523911803e-06, "loss": 41.5921, "step": 107760 }, { "epoch": 0.21770221843348134, "grad_norm": 294.0865478515625, "learning_rate": 9.598833538800094e-06, "loss": 27.987, "step": 107770 }, { "epoch": 0.21772241906616516, "grad_norm": 73.1674575805664, "learning_rate": 9.598696531274227e-06, "loss": 22.5753, "step": 107780 }, { "epoch": 0.21774261969884898, "grad_norm": 363.1200256347656, "learning_rate": 9.598559501334872e-06, "loss": 24.2533, "step": 107790 }, { "epoch": 0.2177628203315328, "grad_norm": 329.3001708984375, "learning_rate": 9.598422448982697e-06, "loss": 24.5151, "step": 107800 }, { "epoch": 0.2177830209642166, "grad_norm": 452.3674621582031, "learning_rate": 9.598285374218367e-06, "loss": 18.9495, "step": 107810 }, { "epoch": 0.2178032215969004, "grad_norm": 335.8105773925781, "learning_rate": 9.59814827704255e-06, "loss": 14.7686, "step": 107820 }, { "epoch": 0.21782342222958423, "grad_norm": 160.59051513671875, "learning_rate": 9.598011157455917e-06, "loss": 13.9756, "step": 107830 }, { "epoch": 0.21784362286226805, "grad_norm": 232.37924194335938, "learning_rate": 9.597874015459136e-06, "loss": 17.7612, "step": 107840 }, { "epoch": 0.21786382349495187, "grad_norm": 177.9893341064453, "learning_rate": 9.597736851052874e-06, "loss": 24.2646, "step": 107850 }, { "epoch": 0.2178840241276357, "grad_norm": 530.2075805664062, "learning_rate": 9.597599664237801e-06, "loss": 14.6917, "step": 107860 }, { "epoch": 0.21790422476031948, "grad_norm": 73.30565643310547, "learning_rate": 9.597462455014585e-06, "loss": 36.4938, "step": 107870 }, { "epoch": 0.2179244253930033, "grad_norm": 0.0, "learning_rate": 9.597325223383895e-06, "loss": 12.1612, "step": 107880 }, { "epoch": 0.21794462602568712, "grad_norm": 135.74151611328125, "learning_rate": 9.597187969346398e-06, "loss": 12.1315, "step": 107890 }, { "epoch": 0.21796482665837094, "grad_norm": 161.26498413085938, "learning_rate": 9.597050692902765e-06, "loss": 21.0963, "step": 107900 }, { "epoch": 0.21798502729105476, "grad_norm": 252.5303497314453, "learning_rate": 9.596913394053664e-06, "loss": 13.6714, "step": 107910 }, { "epoch": 0.21800522792373858, "grad_norm": 291.6688537597656, "learning_rate": 9.596776072799767e-06, "loss": 15.8053, "step": 107920 }, { "epoch": 0.21802542855642237, "grad_norm": 129.48028564453125, "learning_rate": 9.596638729141739e-06, "loss": 10.4088, "step": 107930 }, { "epoch": 0.2180456291891062, "grad_norm": 225.05010986328125, "learning_rate": 9.59650136308025e-06, "loss": 16.8224, "step": 107940 }, { "epoch": 0.21806582982179, "grad_norm": 426.91485595703125, "learning_rate": 9.596363974615973e-06, "loss": 22.2329, "step": 107950 }, { "epoch": 0.21808603045447383, "grad_norm": 195.3730010986328, "learning_rate": 9.596226563749575e-06, "loss": 26.4733, "step": 107960 }, { "epoch": 0.21810623108715765, "grad_norm": 63.74600601196289, "learning_rate": 9.596089130481727e-06, "loss": 13.7169, "step": 107970 }, { "epoch": 0.21812643171984147, "grad_norm": 388.7047119140625, "learning_rate": 9.595951674813096e-06, "loss": 27.0351, "step": 107980 }, { "epoch": 0.2181466323525253, "grad_norm": 98.61304473876953, "learning_rate": 9.595814196744355e-06, "loss": 13.4275, "step": 107990 }, { "epoch": 0.21816683298520909, "grad_norm": 259.72894287109375, "learning_rate": 9.595676696276173e-06, "loss": 21.3734, "step": 108000 }, { "epoch": 0.2181870336178929, "grad_norm": 575.6867065429688, "learning_rate": 9.595539173409222e-06, "loss": 26.2364, "step": 108010 }, { "epoch": 0.21820723425057673, "grad_norm": 245.23495483398438, "learning_rate": 9.595401628144166e-06, "loss": 23.4144, "step": 108020 }, { "epoch": 0.21822743488326055, "grad_norm": 0.0, "learning_rate": 9.595264060481684e-06, "loss": 22.3271, "step": 108030 }, { "epoch": 0.21824763551594437, "grad_norm": 207.33966064453125, "learning_rate": 9.59512647042244e-06, "loss": 18.741, "step": 108040 }, { "epoch": 0.21826783614862819, "grad_norm": 86.89115142822266, "learning_rate": 9.594988857967107e-06, "loss": 24.3179, "step": 108050 }, { "epoch": 0.21828803678131198, "grad_norm": 187.32989501953125, "learning_rate": 9.594851223116357e-06, "loss": 14.3451, "step": 108060 }, { "epoch": 0.2183082374139958, "grad_norm": 258.950439453125, "learning_rate": 9.594713565870859e-06, "loss": 15.6137, "step": 108070 }, { "epoch": 0.21832843804667962, "grad_norm": 233.75839233398438, "learning_rate": 9.594575886231284e-06, "loss": 17.3568, "step": 108080 }, { "epoch": 0.21834863867936344, "grad_norm": 171.03057861328125, "learning_rate": 9.594438184198302e-06, "loss": 49.0775, "step": 108090 }, { "epoch": 0.21836883931204726, "grad_norm": 364.1061706542969, "learning_rate": 9.594300459772588e-06, "loss": 17.9668, "step": 108100 }, { "epoch": 0.21838903994473108, "grad_norm": 514.6207885742188, "learning_rate": 9.59416271295481e-06, "loss": 21.1087, "step": 108110 }, { "epoch": 0.2184092405774149, "grad_norm": 297.905517578125, "learning_rate": 9.594024943745638e-06, "loss": 27.0666, "step": 108120 }, { "epoch": 0.2184294412100987, "grad_norm": 266.54425048828125, "learning_rate": 9.59388715214575e-06, "loss": 34.426, "step": 108130 }, { "epoch": 0.2184496418427825, "grad_norm": 12.773734092712402, "learning_rate": 9.593749338155809e-06, "loss": 10.9998, "step": 108140 }, { "epoch": 0.21846984247546633, "grad_norm": 155.4762725830078, "learning_rate": 9.593611501776493e-06, "loss": 23.7766, "step": 108150 }, { "epoch": 0.21849004310815015, "grad_norm": 396.9547119140625, "learning_rate": 9.59347364300847e-06, "loss": 21.1208, "step": 108160 }, { "epoch": 0.21851024374083397, "grad_norm": 626.3073120117188, "learning_rate": 9.593335761852416e-06, "loss": 27.2051, "step": 108170 }, { "epoch": 0.2185304443735178, "grad_norm": 296.9278869628906, "learning_rate": 9.593197858309e-06, "loss": 29.1539, "step": 108180 }, { "epoch": 0.21855064500620158, "grad_norm": 323.0968933105469, "learning_rate": 9.593059932378894e-06, "loss": 25.9907, "step": 108190 }, { "epoch": 0.2185708456388854, "grad_norm": 127.27818298339844, "learning_rate": 9.592921984062771e-06, "loss": 22.8253, "step": 108200 }, { "epoch": 0.21859104627156922, "grad_norm": 173.2344512939453, "learning_rate": 9.592784013361303e-06, "loss": 31.8089, "step": 108210 }, { "epoch": 0.21861124690425304, "grad_norm": 231.95574951171875, "learning_rate": 9.592646020275165e-06, "loss": 28.6918, "step": 108220 }, { "epoch": 0.21863144753693686, "grad_norm": 386.8862609863281, "learning_rate": 9.592508004805023e-06, "loss": 27.1284, "step": 108230 }, { "epoch": 0.21865164816962068, "grad_norm": 102.5257339477539, "learning_rate": 9.592369966951557e-06, "loss": 58.8672, "step": 108240 }, { "epoch": 0.21867184880230447, "grad_norm": 901.2593383789062, "learning_rate": 9.592231906715437e-06, "loss": 31.4512, "step": 108250 }, { "epoch": 0.2186920494349883, "grad_norm": 739.1260375976562, "learning_rate": 9.592093824097335e-06, "loss": 32.4041, "step": 108260 }, { "epoch": 0.21871225006767211, "grad_norm": 176.38848876953125, "learning_rate": 9.591955719097924e-06, "loss": 28.1958, "step": 108270 }, { "epoch": 0.21873245070035593, "grad_norm": 253.26763916015625, "learning_rate": 9.591817591717878e-06, "loss": 19.1675, "step": 108280 }, { "epoch": 0.21875265133303975, "grad_norm": 391.62359619140625, "learning_rate": 9.591679441957871e-06, "loss": 24.1376, "step": 108290 }, { "epoch": 0.21877285196572357, "grad_norm": 371.51220703125, "learning_rate": 9.591541269818574e-06, "loss": 22.0051, "step": 108300 }, { "epoch": 0.2187930525984074, "grad_norm": 155.7129669189453, "learning_rate": 9.591403075300662e-06, "loss": 20.6682, "step": 108310 }, { "epoch": 0.2188132532310912, "grad_norm": 407.6725769042969, "learning_rate": 9.591264858404809e-06, "loss": 24.0403, "step": 108320 }, { "epoch": 0.218833453863775, "grad_norm": 317.60467529296875, "learning_rate": 9.591126619131687e-06, "loss": 23.5042, "step": 108330 }, { "epoch": 0.21885365449645883, "grad_norm": 124.71794891357422, "learning_rate": 9.590988357481972e-06, "loss": 9.3276, "step": 108340 }, { "epoch": 0.21887385512914265, "grad_norm": 157.57655334472656, "learning_rate": 9.590850073456335e-06, "loss": 27.8766, "step": 108350 }, { "epoch": 0.21889405576182647, "grad_norm": 239.76011657714844, "learning_rate": 9.590711767055454e-06, "loss": 16.3247, "step": 108360 }, { "epoch": 0.2189142563945103, "grad_norm": 254.53814697265625, "learning_rate": 9.590573438279997e-06, "loss": 17.6227, "step": 108370 }, { "epoch": 0.21893445702719408, "grad_norm": 572.0093994140625, "learning_rate": 9.590435087130645e-06, "loss": 35.2497, "step": 108380 }, { "epoch": 0.2189546576598779, "grad_norm": 140.5100555419922, "learning_rate": 9.590296713608068e-06, "loss": 14.5744, "step": 108390 }, { "epoch": 0.21897485829256172, "grad_norm": 380.4225769042969, "learning_rate": 9.590158317712941e-06, "loss": 21.6416, "step": 108400 }, { "epoch": 0.21899505892524554, "grad_norm": 169.7812042236328, "learning_rate": 9.59001989944594e-06, "loss": 23.7964, "step": 108410 }, { "epoch": 0.21901525955792936, "grad_norm": 316.658203125, "learning_rate": 9.589881458807738e-06, "loss": 25.3774, "step": 108420 }, { "epoch": 0.21903546019061318, "grad_norm": 354.240966796875, "learning_rate": 9.589742995799011e-06, "loss": 24.7112, "step": 108430 }, { "epoch": 0.219055660823297, "grad_norm": 637.869873046875, "learning_rate": 9.589604510420434e-06, "loss": 22.6492, "step": 108440 }, { "epoch": 0.2190758614559808, "grad_norm": 254.7999725341797, "learning_rate": 9.58946600267268e-06, "loss": 16.4531, "step": 108450 }, { "epoch": 0.2190960620886646, "grad_norm": 1081.049560546875, "learning_rate": 9.589327472556424e-06, "loss": 47.2779, "step": 108460 }, { "epoch": 0.21911626272134843, "grad_norm": 185.78102111816406, "learning_rate": 9.589188920072346e-06, "loss": 18.6179, "step": 108470 }, { "epoch": 0.21913646335403225, "grad_norm": 206.04837036132812, "learning_rate": 9.589050345221116e-06, "loss": 17.233, "step": 108480 }, { "epoch": 0.21915666398671607, "grad_norm": 225.95614624023438, "learning_rate": 9.58891174800341e-06, "loss": 21.9581, "step": 108490 }, { "epoch": 0.2191768646193999, "grad_norm": 632.118408203125, "learning_rate": 9.588773128419907e-06, "loss": 28.8825, "step": 108500 }, { "epoch": 0.21919706525208368, "grad_norm": 277.2879943847656, "learning_rate": 9.588634486471279e-06, "loss": 18.2186, "step": 108510 }, { "epoch": 0.2192172658847675, "grad_norm": 903.176513671875, "learning_rate": 9.588495822158203e-06, "loss": 29.135, "step": 108520 }, { "epoch": 0.21923746651745132, "grad_norm": 299.3230895996094, "learning_rate": 9.588357135481355e-06, "loss": 26.2037, "step": 108530 }, { "epoch": 0.21925766715013514, "grad_norm": 290.62646484375, "learning_rate": 9.58821842644141e-06, "loss": 21.9512, "step": 108540 }, { "epoch": 0.21927786778281896, "grad_norm": 360.2542419433594, "learning_rate": 9.588079695039048e-06, "loss": 30.69, "step": 108550 }, { "epoch": 0.21929806841550278, "grad_norm": 371.7599792480469, "learning_rate": 9.587940941274939e-06, "loss": 26.0734, "step": 108560 }, { "epoch": 0.21931826904818658, "grad_norm": 121.51842498779297, "learning_rate": 9.587802165149763e-06, "loss": 23.3653, "step": 108570 }, { "epoch": 0.2193384696808704, "grad_norm": 231.95567321777344, "learning_rate": 9.587663366664197e-06, "loss": 22.2887, "step": 108580 }, { "epoch": 0.21935867031355422, "grad_norm": 124.8155288696289, "learning_rate": 9.587524545818914e-06, "loss": 20.4211, "step": 108590 }, { "epoch": 0.21937887094623804, "grad_norm": 495.8392333984375, "learning_rate": 9.587385702614593e-06, "loss": 18.0654, "step": 108600 }, { "epoch": 0.21939907157892186, "grad_norm": 486.1311950683594, "learning_rate": 9.587246837051912e-06, "loss": 34.7977, "step": 108610 }, { "epoch": 0.21941927221160568, "grad_norm": 19.4840030670166, "learning_rate": 9.587107949131545e-06, "loss": 10.922, "step": 108620 }, { "epoch": 0.2194394728442895, "grad_norm": 271.9948425292969, "learning_rate": 9.586969038854172e-06, "loss": 23.9728, "step": 108630 }, { "epoch": 0.2194596734769733, "grad_norm": 340.553955078125, "learning_rate": 9.586830106220466e-06, "loss": 8.9331, "step": 108640 }, { "epoch": 0.2194798741096571, "grad_norm": 241.0862274169922, "learning_rate": 9.586691151231109e-06, "loss": 13.8368, "step": 108650 }, { "epoch": 0.21950007474234093, "grad_norm": 267.4566955566406, "learning_rate": 9.586552173886773e-06, "loss": 25.1959, "step": 108660 }, { "epoch": 0.21952027537502475, "grad_norm": 303.6412658691406, "learning_rate": 9.586413174188139e-06, "loss": 18.0029, "step": 108670 }, { "epoch": 0.21954047600770857, "grad_norm": 388.5754699707031, "learning_rate": 9.586274152135883e-06, "loss": 23.9984, "step": 108680 }, { "epoch": 0.2195606766403924, "grad_norm": 49.13725662231445, "learning_rate": 9.586135107730684e-06, "loss": 28.5929, "step": 108690 }, { "epoch": 0.21958087727307618, "grad_norm": 310.35247802734375, "learning_rate": 9.585996040973218e-06, "loss": 11.7547, "step": 108700 }, { "epoch": 0.21960107790576, "grad_norm": 179.86180114746094, "learning_rate": 9.585856951864163e-06, "loss": 27.5362, "step": 108710 }, { "epoch": 0.21962127853844382, "grad_norm": 322.9899597167969, "learning_rate": 9.585717840404198e-06, "loss": 14.3267, "step": 108720 }, { "epoch": 0.21964147917112764, "grad_norm": 534.9094848632812, "learning_rate": 9.585578706594e-06, "loss": 19.7108, "step": 108730 }, { "epoch": 0.21966167980381146, "grad_norm": 344.44940185546875, "learning_rate": 9.585439550434249e-06, "loss": 32.2886, "step": 108740 }, { "epoch": 0.21968188043649528, "grad_norm": 383.5118103027344, "learning_rate": 9.58530037192562e-06, "loss": 35.2527, "step": 108750 }, { "epoch": 0.2197020810691791, "grad_norm": 292.1354064941406, "learning_rate": 9.585161171068796e-06, "loss": 22.5884, "step": 108760 }, { "epoch": 0.2197222817018629, "grad_norm": 182.46669006347656, "learning_rate": 9.58502194786445e-06, "loss": 13.0608, "step": 108770 }, { "epoch": 0.2197424823345467, "grad_norm": 256.6542663574219, "learning_rate": 9.584882702313263e-06, "loss": 13.3005, "step": 108780 }, { "epoch": 0.21976268296723053, "grad_norm": 245.9798126220703, "learning_rate": 9.584743434415915e-06, "loss": 19.9255, "step": 108790 }, { "epoch": 0.21978288359991435, "grad_norm": 136.26907348632812, "learning_rate": 9.584604144173084e-06, "loss": 18.2431, "step": 108800 }, { "epoch": 0.21980308423259817, "grad_norm": 106.89862823486328, "learning_rate": 9.584464831585446e-06, "loss": 20.2378, "step": 108810 }, { "epoch": 0.219823284865282, "grad_norm": 431.8809509277344, "learning_rate": 9.584325496653683e-06, "loss": 45.3507, "step": 108820 }, { "epoch": 0.21984348549796578, "grad_norm": 191.08580017089844, "learning_rate": 9.584186139378476e-06, "loss": 17.7379, "step": 108830 }, { "epoch": 0.2198636861306496, "grad_norm": 359.6819763183594, "learning_rate": 9.5840467597605e-06, "loss": 35.5953, "step": 108840 }, { "epoch": 0.21988388676333342, "grad_norm": 43.08987808227539, "learning_rate": 9.583907357800436e-06, "loss": 23.5871, "step": 108850 }, { "epoch": 0.21990408739601724, "grad_norm": 227.52537536621094, "learning_rate": 9.583767933498964e-06, "loss": 33.6513, "step": 108860 }, { "epoch": 0.21992428802870106, "grad_norm": 357.66046142578125, "learning_rate": 9.583628486856761e-06, "loss": 26.0342, "step": 108870 }, { "epoch": 0.21994448866138488, "grad_norm": 152.5916290283203, "learning_rate": 9.583489017874512e-06, "loss": 21.7273, "step": 108880 }, { "epoch": 0.21996468929406868, "grad_norm": 183.95913696289062, "learning_rate": 9.583349526552893e-06, "loss": 18.2408, "step": 108890 }, { "epoch": 0.2199848899267525, "grad_norm": 248.4196319580078, "learning_rate": 9.583210012892582e-06, "loss": 17.4888, "step": 108900 }, { "epoch": 0.22000509055943632, "grad_norm": 380.6676330566406, "learning_rate": 9.583070476894263e-06, "loss": 24.02, "step": 108910 }, { "epoch": 0.22002529119212014, "grad_norm": 461.1121826171875, "learning_rate": 9.582930918558613e-06, "loss": 22.6228, "step": 108920 }, { "epoch": 0.22004549182480396, "grad_norm": 197.8666229248047, "learning_rate": 9.582791337886314e-06, "loss": 22.8376, "step": 108930 }, { "epoch": 0.22006569245748778, "grad_norm": 184.83290100097656, "learning_rate": 9.582651734878048e-06, "loss": 11.217, "step": 108940 }, { "epoch": 0.2200858930901716, "grad_norm": 0.0, "learning_rate": 9.58251210953449e-06, "loss": 23.9865, "step": 108950 }, { "epoch": 0.2201060937228554, "grad_norm": 288.3175354003906, "learning_rate": 9.582372461856326e-06, "loss": 24.568, "step": 108960 }, { "epoch": 0.2201262943555392, "grad_norm": 396.1977844238281, "learning_rate": 9.582232791844233e-06, "loss": 26.6908, "step": 108970 }, { "epoch": 0.22014649498822303, "grad_norm": 220.32412719726562, "learning_rate": 9.582093099498895e-06, "loss": 41.471, "step": 108980 }, { "epoch": 0.22016669562090685, "grad_norm": 256.9543762207031, "learning_rate": 9.581953384820989e-06, "loss": 22.0214, "step": 108990 }, { "epoch": 0.22018689625359067, "grad_norm": 86.44841003417969, "learning_rate": 9.581813647811199e-06, "loss": 14.774, "step": 109000 }, { "epoch": 0.2202070968862745, "grad_norm": 380.6009521484375, "learning_rate": 9.581673888470203e-06, "loss": 9.4313, "step": 109010 }, { "epoch": 0.22022729751895828, "grad_norm": 167.26853942871094, "learning_rate": 9.581534106798687e-06, "loss": 12.5383, "step": 109020 }, { "epoch": 0.2202474981516421, "grad_norm": 645.7265625, "learning_rate": 9.58139430279733e-06, "loss": 27.0389, "step": 109030 }, { "epoch": 0.22026769878432592, "grad_norm": 254.64410400390625, "learning_rate": 9.58125447646681e-06, "loss": 13.0877, "step": 109040 }, { "epoch": 0.22028789941700974, "grad_norm": 187.8528594970703, "learning_rate": 9.581114627807812e-06, "loss": 14.387, "step": 109050 }, { "epoch": 0.22030810004969356, "grad_norm": 485.74896240234375, "learning_rate": 9.580974756821017e-06, "loss": 21.5271, "step": 109060 }, { "epoch": 0.22032830068237738, "grad_norm": 256.3517761230469, "learning_rate": 9.580834863507107e-06, "loss": 16.2901, "step": 109070 }, { "epoch": 0.2203485013150612, "grad_norm": 109.03662109375, "learning_rate": 9.580694947866765e-06, "loss": 15.6666, "step": 109080 }, { "epoch": 0.220368701947745, "grad_norm": 269.5740966796875, "learning_rate": 9.58055500990067e-06, "loss": 32.3944, "step": 109090 }, { "epoch": 0.2203889025804288, "grad_norm": 465.8354797363281, "learning_rate": 9.580415049609503e-06, "loss": 22.2541, "step": 109100 }, { "epoch": 0.22040910321311263, "grad_norm": 342.383544921875, "learning_rate": 9.580275066993952e-06, "loss": 20.3796, "step": 109110 }, { "epoch": 0.22042930384579645, "grad_norm": 492.9195556640625, "learning_rate": 9.580135062054693e-06, "loss": 35.4921, "step": 109120 }, { "epoch": 0.22044950447848027, "grad_norm": 311.0647277832031, "learning_rate": 9.579995034792415e-06, "loss": 16.6343, "step": 109130 }, { "epoch": 0.2204697051111641, "grad_norm": 301.5606689453125, "learning_rate": 9.579854985207794e-06, "loss": 21.8599, "step": 109140 }, { "epoch": 0.22048990574384789, "grad_norm": 141.0708465576172, "learning_rate": 9.579714913301516e-06, "loss": 16.1184, "step": 109150 }, { "epoch": 0.2205101063765317, "grad_norm": 186.341552734375, "learning_rate": 9.579574819074263e-06, "loss": 20.8767, "step": 109160 }, { "epoch": 0.22053030700921553, "grad_norm": 346.0263366699219, "learning_rate": 9.579434702526717e-06, "loss": 37.2339, "step": 109170 }, { "epoch": 0.22055050764189935, "grad_norm": 264.92535400390625, "learning_rate": 9.579294563659562e-06, "loss": 27.8171, "step": 109180 }, { "epoch": 0.22057070827458317, "grad_norm": 133.20028686523438, "learning_rate": 9.579154402473482e-06, "loss": 41.5218, "step": 109190 }, { "epoch": 0.22059090890726699, "grad_norm": 9.992910385131836, "learning_rate": 9.579014218969158e-06, "loss": 26.5247, "step": 109200 }, { "epoch": 0.22061110953995078, "grad_norm": 521.8563232421875, "learning_rate": 9.578874013147274e-06, "loss": 17.451, "step": 109210 }, { "epoch": 0.2206313101726346, "grad_norm": 282.55377197265625, "learning_rate": 9.578733785008513e-06, "loss": 16.5939, "step": 109220 }, { "epoch": 0.22065151080531842, "grad_norm": 108.06549835205078, "learning_rate": 9.578593534553558e-06, "loss": 15.9802, "step": 109230 }, { "epoch": 0.22067171143800224, "grad_norm": 240.70773315429688, "learning_rate": 9.578453261783096e-06, "loss": 30.5075, "step": 109240 }, { "epoch": 0.22069191207068606, "grad_norm": 152.92335510253906, "learning_rate": 9.578312966697807e-06, "loss": 15.8215, "step": 109250 }, { "epoch": 0.22071211270336988, "grad_norm": 506.390380859375, "learning_rate": 9.578172649298374e-06, "loss": 33.3052, "step": 109260 }, { "epoch": 0.2207323133360537, "grad_norm": 298.7432556152344, "learning_rate": 9.578032309585485e-06, "loss": 20.7337, "step": 109270 }, { "epoch": 0.2207525139687375, "grad_norm": 503.2953796386719, "learning_rate": 9.577891947559821e-06, "loss": 34.1363, "step": 109280 }, { "epoch": 0.2207727146014213, "grad_norm": 139.85357666015625, "learning_rate": 9.577751563222067e-06, "loss": 28.0128, "step": 109290 }, { "epoch": 0.22079291523410513, "grad_norm": 149.286865234375, "learning_rate": 9.577611156572908e-06, "loss": 20.6176, "step": 109300 }, { "epoch": 0.22081311586678895, "grad_norm": 109.60212707519531, "learning_rate": 9.577470727613025e-06, "loss": 18.1801, "step": 109310 }, { "epoch": 0.22083331649947277, "grad_norm": 293.6245422363281, "learning_rate": 9.577330276343106e-06, "loss": 21.7977, "step": 109320 }, { "epoch": 0.2208535171321566, "grad_norm": 104.18318176269531, "learning_rate": 9.577189802763836e-06, "loss": 21.6488, "step": 109330 }, { "epoch": 0.22087371776484038, "grad_norm": 44.328025817871094, "learning_rate": 9.577049306875895e-06, "loss": 49.7055, "step": 109340 }, { "epoch": 0.2208939183975242, "grad_norm": 175.99925231933594, "learning_rate": 9.576908788679975e-06, "loss": 21.9065, "step": 109350 }, { "epoch": 0.22091411903020802, "grad_norm": 369.8132629394531, "learning_rate": 9.576768248176752e-06, "loss": 20.8455, "step": 109360 }, { "epoch": 0.22093431966289184, "grad_norm": 496.7899169921875, "learning_rate": 9.576627685366919e-06, "loss": 17.5363, "step": 109370 }, { "epoch": 0.22095452029557566, "grad_norm": 183.15475463867188, "learning_rate": 9.576487100251157e-06, "loss": 21.0793, "step": 109380 }, { "epoch": 0.22097472092825948, "grad_norm": 281.3775329589844, "learning_rate": 9.576346492830151e-06, "loss": 19.8756, "step": 109390 }, { "epoch": 0.22099492156094327, "grad_norm": 490.3565673828125, "learning_rate": 9.576205863104588e-06, "loss": 20.3739, "step": 109400 }, { "epoch": 0.2210151221936271, "grad_norm": 479.0928649902344, "learning_rate": 9.576065211075153e-06, "loss": 29.0558, "step": 109410 }, { "epoch": 0.22103532282631091, "grad_norm": 607.8324584960938, "learning_rate": 9.575924536742532e-06, "loss": 23.1803, "step": 109420 }, { "epoch": 0.22105552345899473, "grad_norm": 543.3140258789062, "learning_rate": 9.575783840107407e-06, "loss": 31.014, "step": 109430 }, { "epoch": 0.22107572409167855, "grad_norm": 224.99176025390625, "learning_rate": 9.575643121170468e-06, "loss": 16.1485, "step": 109440 }, { "epoch": 0.22109592472436237, "grad_norm": 355.9615478515625, "learning_rate": 9.5755023799324e-06, "loss": 31.9128, "step": 109450 }, { "epoch": 0.2211161253570462, "grad_norm": 6939.14501953125, "learning_rate": 9.575361616393888e-06, "loss": 29.7712, "step": 109460 }, { "epoch": 0.22113632598973, "grad_norm": 81.89857482910156, "learning_rate": 9.575220830555618e-06, "loss": 21.5051, "step": 109470 }, { "epoch": 0.2211565266224138, "grad_norm": 407.1587219238281, "learning_rate": 9.575080022418276e-06, "loss": 16.1761, "step": 109480 }, { "epoch": 0.22117672725509763, "grad_norm": 341.4566955566406, "learning_rate": 9.574939191982549e-06, "loss": 18.1402, "step": 109490 }, { "epoch": 0.22119692788778145, "grad_norm": 273.4284362792969, "learning_rate": 9.574798339249124e-06, "loss": 22.7358, "step": 109500 }, { "epoch": 0.22121712852046527, "grad_norm": 199.22698974609375, "learning_rate": 9.574657464218688e-06, "loss": 41.5092, "step": 109510 }, { "epoch": 0.2212373291531491, "grad_norm": 478.27496337890625, "learning_rate": 9.574516566891925e-06, "loss": 27.7296, "step": 109520 }, { "epoch": 0.22125752978583288, "grad_norm": 127.85787200927734, "learning_rate": 9.574375647269523e-06, "loss": 36.9516, "step": 109530 }, { "epoch": 0.2212777304185167, "grad_norm": 386.7724914550781, "learning_rate": 9.574234705352167e-06, "loss": 15.1221, "step": 109540 }, { "epoch": 0.22129793105120052, "grad_norm": 195.94418334960938, "learning_rate": 9.574093741140549e-06, "loss": 19.0289, "step": 109550 }, { "epoch": 0.22131813168388434, "grad_norm": 118.6589126586914, "learning_rate": 9.573952754635351e-06, "loss": 14.7089, "step": 109560 }, { "epoch": 0.22133833231656816, "grad_norm": 525.5623779296875, "learning_rate": 9.573811745837262e-06, "loss": 32.1876, "step": 109570 }, { "epoch": 0.22135853294925198, "grad_norm": 187.52427673339844, "learning_rate": 9.573670714746973e-06, "loss": 16.6708, "step": 109580 }, { "epoch": 0.2213787335819358, "grad_norm": 338.6473693847656, "learning_rate": 9.573529661365163e-06, "loss": 26.0287, "step": 109590 }, { "epoch": 0.2213989342146196, "grad_norm": 263.54986572265625, "learning_rate": 9.573388585692525e-06, "loss": 17.1169, "step": 109600 }, { "epoch": 0.2214191348473034, "grad_norm": 365.46826171875, "learning_rate": 9.573247487729747e-06, "loss": 25.0077, "step": 109610 }, { "epoch": 0.22143933547998723, "grad_norm": 285.3547668457031, "learning_rate": 9.573106367477515e-06, "loss": 21.5376, "step": 109620 }, { "epoch": 0.22145953611267105, "grad_norm": 38.95759963989258, "learning_rate": 9.572965224936517e-06, "loss": 15.4801, "step": 109630 }, { "epoch": 0.22147973674535487, "grad_norm": 366.3638610839844, "learning_rate": 9.572824060107442e-06, "loss": 15.9213, "step": 109640 }, { "epoch": 0.2214999373780387, "grad_norm": 453.1857604980469, "learning_rate": 9.572682872990975e-06, "loss": 26.2439, "step": 109650 }, { "epoch": 0.22152013801072248, "grad_norm": 404.8188781738281, "learning_rate": 9.572541663587809e-06, "loss": 13.8608, "step": 109660 }, { "epoch": 0.2215403386434063, "grad_norm": 85.65292358398438, "learning_rate": 9.572400431898627e-06, "loss": 27.3549, "step": 109670 }, { "epoch": 0.22156053927609012, "grad_norm": 213.8715057373047, "learning_rate": 9.572259177924122e-06, "loss": 25.3196, "step": 109680 }, { "epoch": 0.22158073990877394, "grad_norm": 129.02317810058594, "learning_rate": 9.572117901664979e-06, "loss": 23.3134, "step": 109690 }, { "epoch": 0.22160094054145776, "grad_norm": 244.6214141845703, "learning_rate": 9.571976603121889e-06, "loss": 40.8936, "step": 109700 }, { "epoch": 0.22162114117414158, "grad_norm": 288.1636047363281, "learning_rate": 9.571835282295539e-06, "loss": 24.8268, "step": 109710 }, { "epoch": 0.22164134180682538, "grad_norm": 153.31565856933594, "learning_rate": 9.571693939186618e-06, "loss": 13.5628, "step": 109720 }, { "epoch": 0.2216615424395092, "grad_norm": 450.1059265136719, "learning_rate": 9.571552573795814e-06, "loss": 19.0032, "step": 109730 }, { "epoch": 0.22168174307219302, "grad_norm": 696.9970092773438, "learning_rate": 9.571411186123818e-06, "loss": 27.2788, "step": 109740 }, { "epoch": 0.22170194370487684, "grad_norm": 325.3358459472656, "learning_rate": 9.571269776171319e-06, "loss": 30.878, "step": 109750 }, { "epoch": 0.22172214433756066, "grad_norm": 243.46710205078125, "learning_rate": 9.571128343939006e-06, "loss": 11.7761, "step": 109760 }, { "epoch": 0.22174234497024448, "grad_norm": 311.0843505859375, "learning_rate": 9.570986889427567e-06, "loss": 24.008, "step": 109770 }, { "epoch": 0.2217625456029283, "grad_norm": 175.88487243652344, "learning_rate": 9.570845412637692e-06, "loss": 34.7465, "step": 109780 }, { "epoch": 0.2217827462356121, "grad_norm": 263.451904296875, "learning_rate": 9.570703913570072e-06, "loss": 23.5451, "step": 109790 }, { "epoch": 0.2218029468682959, "grad_norm": 232.73577880859375, "learning_rate": 9.570562392225395e-06, "loss": 7.0586, "step": 109800 }, { "epoch": 0.22182314750097973, "grad_norm": 438.08477783203125, "learning_rate": 9.570420848604351e-06, "loss": 13.1313, "step": 109810 }, { "epoch": 0.22184334813366355, "grad_norm": 311.274658203125, "learning_rate": 9.57027928270763e-06, "loss": 22.5568, "step": 109820 }, { "epoch": 0.22186354876634737, "grad_norm": 155.60787963867188, "learning_rate": 9.570137694535922e-06, "loss": 27.3418, "step": 109830 }, { "epoch": 0.2218837493990312, "grad_norm": 228.4898223876953, "learning_rate": 9.569996084089918e-06, "loss": 16.1038, "step": 109840 }, { "epoch": 0.22190395003171498, "grad_norm": 591.5414428710938, "learning_rate": 9.569854451370306e-06, "loss": 38.803, "step": 109850 }, { "epoch": 0.2219241506643988, "grad_norm": 527.4155883789062, "learning_rate": 9.569712796377779e-06, "loss": 34.0214, "step": 109860 }, { "epoch": 0.22194435129708262, "grad_norm": 184.05740356445312, "learning_rate": 9.569571119113025e-06, "loss": 25.3467, "step": 109870 }, { "epoch": 0.22196455192976644, "grad_norm": 275.1253967285156, "learning_rate": 9.569429419576737e-06, "loss": 34.2447, "step": 109880 }, { "epoch": 0.22198475256245026, "grad_norm": 92.5997543334961, "learning_rate": 9.569287697769602e-06, "loss": 13.3419, "step": 109890 }, { "epoch": 0.22200495319513408, "grad_norm": 256.9894104003906, "learning_rate": 9.569145953692316e-06, "loss": 20.5945, "step": 109900 }, { "epoch": 0.2220251538278179, "grad_norm": 122.87030029296875, "learning_rate": 9.569004187345565e-06, "loss": 26.5605, "step": 109910 }, { "epoch": 0.2220453544605017, "grad_norm": 17.069637298583984, "learning_rate": 9.568862398730042e-06, "loss": 23.4545, "step": 109920 }, { "epoch": 0.2220655550931855, "grad_norm": 250.59152221679688, "learning_rate": 9.568720587846437e-06, "loss": 16.7365, "step": 109930 }, { "epoch": 0.22208575572586933, "grad_norm": 376.0030212402344, "learning_rate": 9.568578754695443e-06, "loss": 33.5143, "step": 109940 }, { "epoch": 0.22210595635855315, "grad_norm": 325.27960205078125, "learning_rate": 9.56843689927775e-06, "loss": 23.8858, "step": 109950 }, { "epoch": 0.22212615699123697, "grad_norm": 264.2638244628906, "learning_rate": 9.568295021594049e-06, "loss": 30.5285, "step": 109960 }, { "epoch": 0.2221463576239208, "grad_norm": 210.65663146972656, "learning_rate": 9.568153121645033e-06, "loss": 13.0775, "step": 109970 }, { "epoch": 0.22216655825660458, "grad_norm": 432.63165283203125, "learning_rate": 9.568011199431392e-06, "loss": 13.9511, "step": 109980 }, { "epoch": 0.2221867588892884, "grad_norm": 295.8706970214844, "learning_rate": 9.56786925495382e-06, "loss": 46.2132, "step": 109990 }, { "epoch": 0.22220695952197222, "grad_norm": 922.2898559570312, "learning_rate": 9.567727288213005e-06, "loss": 37.3061, "step": 110000 }, { "epoch": 0.22222716015465604, "grad_norm": 62.03990936279297, "learning_rate": 9.567585299209642e-06, "loss": 15.3629, "step": 110010 }, { "epoch": 0.22224736078733986, "grad_norm": 322.0854187011719, "learning_rate": 9.567443287944422e-06, "loss": 22.156, "step": 110020 }, { "epoch": 0.22226756142002368, "grad_norm": 420.1650085449219, "learning_rate": 9.567301254418038e-06, "loss": 24.48, "step": 110030 }, { "epoch": 0.22228776205270748, "grad_norm": 458.9040832519531, "learning_rate": 9.567159198631181e-06, "loss": 26.5465, "step": 110040 }, { "epoch": 0.2223079626853913, "grad_norm": 366.5994567871094, "learning_rate": 9.567017120584545e-06, "loss": 31.9622, "step": 110050 }, { "epoch": 0.22232816331807512, "grad_norm": 119.48983764648438, "learning_rate": 9.566875020278822e-06, "loss": 33.9006, "step": 110060 }, { "epoch": 0.22234836395075894, "grad_norm": 145.07269287109375, "learning_rate": 9.566732897714703e-06, "loss": 26.4339, "step": 110070 }, { "epoch": 0.22236856458344276, "grad_norm": 120.31144714355469, "learning_rate": 9.56659075289288e-06, "loss": 19.3976, "step": 110080 }, { "epoch": 0.22238876521612658, "grad_norm": 257.7059020996094, "learning_rate": 9.566448585814051e-06, "loss": 22.1622, "step": 110090 }, { "epoch": 0.2224089658488104, "grad_norm": 26.78160285949707, "learning_rate": 9.566306396478904e-06, "loss": 19.9579, "step": 110100 }, { "epoch": 0.2224291664814942, "grad_norm": 206.0853729248047, "learning_rate": 9.566164184888134e-06, "loss": 20.1954, "step": 110110 }, { "epoch": 0.222449367114178, "grad_norm": 284.22076416015625, "learning_rate": 9.566021951042432e-06, "loss": 15.5114, "step": 110120 }, { "epoch": 0.22246956774686183, "grad_norm": 207.64578247070312, "learning_rate": 9.565879694942493e-06, "loss": 18.3823, "step": 110130 }, { "epoch": 0.22248976837954565, "grad_norm": 160.10696411132812, "learning_rate": 9.565737416589011e-06, "loss": 10.6541, "step": 110140 }, { "epoch": 0.22250996901222947, "grad_norm": 271.3604431152344, "learning_rate": 9.565595115982678e-06, "loss": 16.2753, "step": 110150 }, { "epoch": 0.2225301696449133, "grad_norm": 373.8836364746094, "learning_rate": 9.56545279312419e-06, "loss": 20.3337, "step": 110160 }, { "epoch": 0.22255037027759708, "grad_norm": 391.1517028808594, "learning_rate": 9.565310448014236e-06, "loss": 21.7344, "step": 110170 }, { "epoch": 0.2225705709102809, "grad_norm": 195.36814880371094, "learning_rate": 9.565168080653514e-06, "loss": 23.4376, "step": 110180 }, { "epoch": 0.22259077154296472, "grad_norm": 270.07147216796875, "learning_rate": 9.565025691042717e-06, "loss": 43.0316, "step": 110190 }, { "epoch": 0.22261097217564854, "grad_norm": 229.08473205566406, "learning_rate": 9.564883279182538e-06, "loss": 13.3064, "step": 110200 }, { "epoch": 0.22263117280833236, "grad_norm": 396.48858642578125, "learning_rate": 9.56474084507367e-06, "loss": 17.1356, "step": 110210 }, { "epoch": 0.22265137344101618, "grad_norm": 226.9693603515625, "learning_rate": 9.56459838871681e-06, "loss": 29.6693, "step": 110220 }, { "epoch": 0.2226715740737, "grad_norm": 159.76206970214844, "learning_rate": 9.564455910112651e-06, "loss": 12.995, "step": 110230 }, { "epoch": 0.2226917747063838, "grad_norm": 268.24200439453125, "learning_rate": 9.564313409261888e-06, "loss": 37.0859, "step": 110240 }, { "epoch": 0.2227119753390676, "grad_norm": 253.6333770751953, "learning_rate": 9.564170886165214e-06, "loss": 23.8516, "step": 110250 }, { "epoch": 0.22273217597175143, "grad_norm": 80.44719696044922, "learning_rate": 9.564028340823325e-06, "loss": 19.3532, "step": 110260 }, { "epoch": 0.22275237660443525, "grad_norm": 325.6434020996094, "learning_rate": 9.563885773236917e-06, "loss": 18.1428, "step": 110270 }, { "epoch": 0.22277257723711907, "grad_norm": 420.10430908203125, "learning_rate": 9.563743183406683e-06, "loss": 21.0136, "step": 110280 }, { "epoch": 0.2227927778698029, "grad_norm": 689.209716796875, "learning_rate": 9.563600571333316e-06, "loss": 24.9799, "step": 110290 }, { "epoch": 0.22281297850248669, "grad_norm": 19.64137077331543, "learning_rate": 9.563457937017514e-06, "loss": 28.9119, "step": 110300 }, { "epoch": 0.2228331791351705, "grad_norm": 193.09048461914062, "learning_rate": 9.563315280459973e-06, "loss": 36.4431, "step": 110310 }, { "epoch": 0.22285337976785433, "grad_norm": 432.9487609863281, "learning_rate": 9.563172601661386e-06, "loss": 43.3742, "step": 110320 }, { "epoch": 0.22287358040053815, "grad_norm": 229.73614501953125, "learning_rate": 9.56302990062245e-06, "loss": 18.5886, "step": 110330 }, { "epoch": 0.22289378103322197, "grad_norm": 93.28858184814453, "learning_rate": 9.562887177343858e-06, "loss": 9.417, "step": 110340 }, { "epoch": 0.22291398166590579, "grad_norm": 640.6190185546875, "learning_rate": 9.562744431826307e-06, "loss": 28.5879, "step": 110350 }, { "epoch": 0.22293418229858958, "grad_norm": 170.08168029785156, "learning_rate": 9.562601664070495e-06, "loss": 13.6694, "step": 110360 }, { "epoch": 0.2229543829312734, "grad_norm": 868.7437133789062, "learning_rate": 9.562458874077114e-06, "loss": 30.0569, "step": 110370 }, { "epoch": 0.22297458356395722, "grad_norm": 641.279296875, "learning_rate": 9.562316061846863e-06, "loss": 21.7944, "step": 110380 }, { "epoch": 0.22299478419664104, "grad_norm": 306.93719482421875, "learning_rate": 9.562173227380437e-06, "loss": 17.9975, "step": 110390 }, { "epoch": 0.22301498482932486, "grad_norm": 279.1541442871094, "learning_rate": 9.562030370678533e-06, "loss": 15.9147, "step": 110400 }, { "epoch": 0.22303518546200868, "grad_norm": 92.586181640625, "learning_rate": 9.561887491741844e-06, "loss": 22.8295, "step": 110410 }, { "epoch": 0.2230553860946925, "grad_norm": 12.00794506072998, "learning_rate": 9.56174459057107e-06, "loss": 13.3735, "step": 110420 }, { "epoch": 0.2230755867273763, "grad_norm": 658.4755249023438, "learning_rate": 9.561601667166905e-06, "loss": 22.1554, "step": 110430 }, { "epoch": 0.2230957873600601, "grad_norm": 146.3546600341797, "learning_rate": 9.561458721530047e-06, "loss": 20.7923, "step": 110440 }, { "epoch": 0.22311598799274393, "grad_norm": 145.18399047851562, "learning_rate": 9.561315753661194e-06, "loss": 19.8183, "step": 110450 }, { "epoch": 0.22313618862542775, "grad_norm": 403.1817321777344, "learning_rate": 9.56117276356104e-06, "loss": 16.83, "step": 110460 }, { "epoch": 0.22315638925811157, "grad_norm": 382.11468505859375, "learning_rate": 9.561029751230282e-06, "loss": 26.9456, "step": 110470 }, { "epoch": 0.2231765898907954, "grad_norm": 141.77001953125, "learning_rate": 9.56088671666962e-06, "loss": 12.3762, "step": 110480 }, { "epoch": 0.22319679052347918, "grad_norm": 358.4727478027344, "learning_rate": 9.56074365987975e-06, "loss": 16.8705, "step": 110490 }, { "epoch": 0.223216991156163, "grad_norm": 600.1101684570312, "learning_rate": 9.560600580861366e-06, "loss": 32.9541, "step": 110500 }, { "epoch": 0.22323719178884682, "grad_norm": 405.09149169921875, "learning_rate": 9.560457479615168e-06, "loss": 20.4584, "step": 110510 }, { "epoch": 0.22325739242153064, "grad_norm": 305.84527587890625, "learning_rate": 9.560314356141856e-06, "loss": 20.6952, "step": 110520 }, { "epoch": 0.22327759305421446, "grad_norm": 354.3216857910156, "learning_rate": 9.560171210442123e-06, "loss": 24.3883, "step": 110530 }, { "epoch": 0.22329779368689828, "grad_norm": 174.32066345214844, "learning_rate": 9.56002804251667e-06, "loss": 15.0791, "step": 110540 }, { "epoch": 0.2233179943195821, "grad_norm": 214.17581176757812, "learning_rate": 9.559884852366191e-06, "loss": 25.2138, "step": 110550 }, { "epoch": 0.2233381949522659, "grad_norm": 127.36180114746094, "learning_rate": 9.55974163999139e-06, "loss": 33.2041, "step": 110560 }, { "epoch": 0.22335839558494971, "grad_norm": 543.6390991210938, "learning_rate": 9.559598405392959e-06, "loss": 16.8199, "step": 110570 }, { "epoch": 0.22337859621763353, "grad_norm": 33.91348648071289, "learning_rate": 9.559455148571597e-06, "loss": 27.0009, "step": 110580 }, { "epoch": 0.22339879685031735, "grad_norm": 239.38070678710938, "learning_rate": 9.559311869528005e-06, "loss": 29.9802, "step": 110590 }, { "epoch": 0.22341899748300117, "grad_norm": 224.66683959960938, "learning_rate": 9.55916856826288e-06, "loss": 25.8993, "step": 110600 }, { "epoch": 0.223439198115685, "grad_norm": 256.8668212890625, "learning_rate": 9.55902524477692e-06, "loss": 10.9828, "step": 110610 }, { "epoch": 0.2234593987483688, "grad_norm": 338.1199951171875, "learning_rate": 9.558881899070824e-06, "loss": 32.8665, "step": 110620 }, { "epoch": 0.2234795993810526, "grad_norm": 95.58212280273438, "learning_rate": 9.55873853114529e-06, "loss": 26.4117, "step": 110630 }, { "epoch": 0.22349980001373643, "grad_norm": 0.0, "learning_rate": 9.55859514100102e-06, "loss": 17.9668, "step": 110640 }, { "epoch": 0.22352000064642025, "grad_norm": 374.7452392578125, "learning_rate": 9.558451728638706e-06, "loss": 15.4454, "step": 110650 }, { "epoch": 0.22354020127910407, "grad_norm": 193.03927612304688, "learning_rate": 9.558308294059055e-06, "loss": 8.2423, "step": 110660 }, { "epoch": 0.2235604019117879, "grad_norm": 207.62689208984375, "learning_rate": 9.55816483726276e-06, "loss": 27.8443, "step": 110670 }, { "epoch": 0.22358060254447168, "grad_norm": 155.8469696044922, "learning_rate": 9.558021358250523e-06, "loss": 31.2844, "step": 110680 }, { "epoch": 0.2236008031771555, "grad_norm": 680.109130859375, "learning_rate": 9.557877857023043e-06, "loss": 22.0174, "step": 110690 }, { "epoch": 0.22362100380983932, "grad_norm": 1149.5538330078125, "learning_rate": 9.557734333581019e-06, "loss": 49.6211, "step": 110700 }, { "epoch": 0.22364120444252314, "grad_norm": 151.69715881347656, "learning_rate": 9.557590787925151e-06, "loss": 41.828, "step": 110710 }, { "epoch": 0.22366140507520696, "grad_norm": 276.3315734863281, "learning_rate": 9.557447220056137e-06, "loss": 24.0015, "step": 110720 }, { "epoch": 0.22368160570789078, "grad_norm": 165.53768920898438, "learning_rate": 9.557303629974679e-06, "loss": 22.7491, "step": 110730 }, { "epoch": 0.2237018063405746, "grad_norm": 227.6033477783203, "learning_rate": 9.557160017681475e-06, "loss": 16.3804, "step": 110740 }, { "epoch": 0.2237220069732584, "grad_norm": 226.91281127929688, "learning_rate": 9.557016383177226e-06, "loss": 34.8747, "step": 110750 }, { "epoch": 0.2237422076059422, "grad_norm": 579.3111572265625, "learning_rate": 9.556872726462634e-06, "loss": 17.5548, "step": 110760 }, { "epoch": 0.22376240823862603, "grad_norm": 461.57305908203125, "learning_rate": 9.556729047538395e-06, "loss": 22.2927, "step": 110770 }, { "epoch": 0.22378260887130985, "grad_norm": 359.79510498046875, "learning_rate": 9.556585346405211e-06, "loss": 21.6176, "step": 110780 }, { "epoch": 0.22380280950399367, "grad_norm": 383.796875, "learning_rate": 9.556441623063783e-06, "loss": 26.1557, "step": 110790 }, { "epoch": 0.2238230101366775, "grad_norm": 243.87445068359375, "learning_rate": 9.556297877514812e-06, "loss": 17.1647, "step": 110800 }, { "epoch": 0.22384321076936128, "grad_norm": 270.326171875, "learning_rate": 9.556154109758998e-06, "loss": 23.9335, "step": 110810 }, { "epoch": 0.2238634114020451, "grad_norm": 77.00601959228516, "learning_rate": 9.55601031979704e-06, "loss": 15.2184, "step": 110820 }, { "epoch": 0.22388361203472892, "grad_norm": 773.5745849609375, "learning_rate": 9.55586650762964e-06, "loss": 47.3383, "step": 110830 }, { "epoch": 0.22390381266741274, "grad_norm": 224.04649353027344, "learning_rate": 9.555722673257502e-06, "loss": 20.3658, "step": 110840 }, { "epoch": 0.22392401330009656, "grad_norm": 70.09964752197266, "learning_rate": 9.555578816681321e-06, "loss": 21.4103, "step": 110850 }, { "epoch": 0.22394421393278038, "grad_norm": 159.92333984375, "learning_rate": 9.555434937901801e-06, "loss": 28.2302, "step": 110860 }, { "epoch": 0.2239644145654642, "grad_norm": 576.155517578125, "learning_rate": 9.555291036919647e-06, "loss": 19.3274, "step": 110870 }, { "epoch": 0.223984615198148, "grad_norm": 292.8363037109375, "learning_rate": 9.555147113735555e-06, "loss": 17.7205, "step": 110880 }, { "epoch": 0.22400481583083182, "grad_norm": 306.83905029296875, "learning_rate": 9.55500316835023e-06, "loss": 20.457, "step": 110890 }, { "epoch": 0.22402501646351564, "grad_norm": 494.1678466796875, "learning_rate": 9.554859200764371e-06, "loss": 14.2849, "step": 110900 }, { "epoch": 0.22404521709619946, "grad_norm": 138.06004333496094, "learning_rate": 9.55471521097868e-06, "loss": 12.0977, "step": 110910 }, { "epoch": 0.22406541772888328, "grad_norm": 8.808663368225098, "learning_rate": 9.554571198993858e-06, "loss": 14.2956, "step": 110920 }, { "epoch": 0.2240856183615671, "grad_norm": 0.0, "learning_rate": 9.55442716481061e-06, "loss": 37.3573, "step": 110930 }, { "epoch": 0.2241058189942509, "grad_norm": 241.3163299560547, "learning_rate": 9.554283108429639e-06, "loss": 12.0447, "step": 110940 }, { "epoch": 0.2241260196269347, "grad_norm": 340.2216796875, "learning_rate": 9.55413902985164e-06, "loss": 21.5378, "step": 110950 }, { "epoch": 0.22414622025961853, "grad_norm": 401.5752258300781, "learning_rate": 9.553994929077323e-06, "loss": 22.1208, "step": 110960 }, { "epoch": 0.22416642089230235, "grad_norm": 116.00533294677734, "learning_rate": 9.553850806107387e-06, "loss": 16.8488, "step": 110970 }, { "epoch": 0.22418662152498617, "grad_norm": 125.61019897460938, "learning_rate": 9.553706660942532e-06, "loss": 24.6811, "step": 110980 }, { "epoch": 0.22420682215767, "grad_norm": 1310.6671142578125, "learning_rate": 9.553562493583466e-06, "loss": 25.4104, "step": 110990 }, { "epoch": 0.22422702279035378, "grad_norm": 520.9737548828125, "learning_rate": 9.553418304030886e-06, "loss": 22.5397, "step": 111000 }, { "epoch": 0.2242472234230376, "grad_norm": 531.1607666015625, "learning_rate": 9.553274092285498e-06, "loss": 19.8249, "step": 111010 }, { "epoch": 0.22426742405572142, "grad_norm": 138.3127899169922, "learning_rate": 9.553129858348006e-06, "loss": 26.3741, "step": 111020 }, { "epoch": 0.22428762468840524, "grad_norm": 364.5376892089844, "learning_rate": 9.55298560221911e-06, "loss": 30.8241, "step": 111030 }, { "epoch": 0.22430782532108906, "grad_norm": 169.05796813964844, "learning_rate": 9.552841323899515e-06, "loss": 21.3775, "step": 111040 }, { "epoch": 0.22432802595377288, "grad_norm": 164.5149383544922, "learning_rate": 9.552697023389923e-06, "loss": 19.6569, "step": 111050 }, { "epoch": 0.2243482265864567, "grad_norm": 176.39230346679688, "learning_rate": 9.552552700691037e-06, "loss": 23.7723, "step": 111060 }, { "epoch": 0.2243684272191405, "grad_norm": 671.264404296875, "learning_rate": 9.552408355803564e-06, "loss": 30.7752, "step": 111070 }, { "epoch": 0.2243886278518243, "grad_norm": 153.21730041503906, "learning_rate": 9.552263988728204e-06, "loss": 21.6372, "step": 111080 }, { "epoch": 0.22440882848450813, "grad_norm": 325.82635498046875, "learning_rate": 9.552119599465659e-06, "loss": 33.9417, "step": 111090 }, { "epoch": 0.22442902911719195, "grad_norm": 444.9881286621094, "learning_rate": 9.551975188016638e-06, "loss": 27.9078, "step": 111100 }, { "epoch": 0.22444922974987577, "grad_norm": 101.31181335449219, "learning_rate": 9.55183075438184e-06, "loss": 11.423, "step": 111110 }, { "epoch": 0.2244694303825596, "grad_norm": 563.1441040039062, "learning_rate": 9.551686298561972e-06, "loss": 24.1211, "step": 111120 }, { "epoch": 0.22448963101524338, "grad_norm": 372.5039367675781, "learning_rate": 9.551541820557737e-06, "loss": 25.3072, "step": 111130 }, { "epoch": 0.2245098316479272, "grad_norm": 245.90341186523438, "learning_rate": 9.551397320369838e-06, "loss": 20.3606, "step": 111140 }, { "epoch": 0.22453003228061102, "grad_norm": 88.66056060791016, "learning_rate": 9.551252797998982e-06, "loss": 11.6917, "step": 111150 }, { "epoch": 0.22455023291329484, "grad_norm": 134.4423370361328, "learning_rate": 9.55110825344587e-06, "loss": 13.7895, "step": 111160 }, { "epoch": 0.22457043354597866, "grad_norm": 184.56472778320312, "learning_rate": 9.550963686711212e-06, "loss": 28.1436, "step": 111170 }, { "epoch": 0.22459063417866248, "grad_norm": 302.3545227050781, "learning_rate": 9.550819097795706e-06, "loss": 26.7283, "step": 111180 }, { "epoch": 0.2246108348113463, "grad_norm": 264.8648681640625, "learning_rate": 9.55067448670006e-06, "loss": 7.1499, "step": 111190 }, { "epoch": 0.2246310354440301, "grad_norm": 555.6761474609375, "learning_rate": 9.550529853424979e-06, "loss": 21.4105, "step": 111200 }, { "epoch": 0.22465123607671392, "grad_norm": 491.2004699707031, "learning_rate": 9.550385197971168e-06, "loss": 20.0296, "step": 111210 }, { "epoch": 0.22467143670939774, "grad_norm": 404.06719970703125, "learning_rate": 9.55024052033933e-06, "loss": 27.9131, "step": 111220 }, { "epoch": 0.22469163734208156, "grad_norm": 287.04010009765625, "learning_rate": 9.550095820530172e-06, "loss": 16.0414, "step": 111230 }, { "epoch": 0.22471183797476538, "grad_norm": 148.7548065185547, "learning_rate": 9.549951098544399e-06, "loss": 13.5729, "step": 111240 }, { "epoch": 0.2247320386074492, "grad_norm": 221.74923706054688, "learning_rate": 9.549806354382716e-06, "loss": 32.9145, "step": 111250 }, { "epoch": 0.224752239240133, "grad_norm": 240.642578125, "learning_rate": 9.54966158804583e-06, "loss": 27.9709, "step": 111260 }, { "epoch": 0.2247724398728168, "grad_norm": 615.2760620117188, "learning_rate": 9.549516799534444e-06, "loss": 21.4, "step": 111270 }, { "epoch": 0.22479264050550063, "grad_norm": 199.6186065673828, "learning_rate": 9.549371988849266e-06, "loss": 28.8442, "step": 111280 }, { "epoch": 0.22481284113818445, "grad_norm": 211.4435577392578, "learning_rate": 9.549227155991e-06, "loss": 27.7854, "step": 111290 }, { "epoch": 0.22483304177086827, "grad_norm": 296.22930908203125, "learning_rate": 9.549082300960351e-06, "loss": 16.6462, "step": 111300 }, { "epoch": 0.2248532424035521, "grad_norm": 237.1007843017578, "learning_rate": 9.54893742375803e-06, "loss": 14.019, "step": 111310 }, { "epoch": 0.22487344303623588, "grad_norm": 490.4355163574219, "learning_rate": 9.548792524384735e-06, "loss": 35.2833, "step": 111320 }, { "epoch": 0.2248936436689197, "grad_norm": 307.8839111328125, "learning_rate": 9.54864760284118e-06, "loss": 19.1933, "step": 111330 }, { "epoch": 0.22491384430160352, "grad_norm": 414.67303466796875, "learning_rate": 9.548502659128069e-06, "loss": 24.4811, "step": 111340 }, { "epoch": 0.22493404493428734, "grad_norm": 295.5519104003906, "learning_rate": 9.548357693246107e-06, "loss": 30.9987, "step": 111350 }, { "epoch": 0.22495424556697116, "grad_norm": 229.44546508789062, "learning_rate": 9.548212705196e-06, "loss": 17.1033, "step": 111360 }, { "epoch": 0.22497444619965498, "grad_norm": 571.6144409179688, "learning_rate": 9.548067694978457e-06, "loss": 46.2738, "step": 111370 }, { "epoch": 0.2249946468323388, "grad_norm": 48.2669563293457, "learning_rate": 9.547922662594183e-06, "loss": 25.2015, "step": 111380 }, { "epoch": 0.2250148474650226, "grad_norm": 410.2971496582031, "learning_rate": 9.547777608043886e-06, "loss": 25.7602, "step": 111390 }, { "epoch": 0.2250350480977064, "grad_norm": 310.7509765625, "learning_rate": 9.547632531328273e-06, "loss": 29.0417, "step": 111400 }, { "epoch": 0.22505524873039023, "grad_norm": 381.1390380859375, "learning_rate": 9.54748743244805e-06, "loss": 32.5122, "step": 111410 }, { "epoch": 0.22507544936307405, "grad_norm": 401.6119384765625, "learning_rate": 9.547342311403924e-06, "loss": 23.0681, "step": 111420 }, { "epoch": 0.22509564999575787, "grad_norm": 490.1851806640625, "learning_rate": 9.547197168196605e-06, "loss": 24.3746, "step": 111430 }, { "epoch": 0.2251158506284417, "grad_norm": 509.3726806640625, "learning_rate": 9.547052002826797e-06, "loss": 18.277, "step": 111440 }, { "epoch": 0.22513605126112549, "grad_norm": 198.0814208984375, "learning_rate": 9.546906815295209e-06, "loss": 15.922, "step": 111450 }, { "epoch": 0.2251562518938093, "grad_norm": 255.11842346191406, "learning_rate": 9.54676160560255e-06, "loss": 32.0514, "step": 111460 }, { "epoch": 0.22517645252649313, "grad_norm": 493.787353515625, "learning_rate": 9.546616373749525e-06, "loss": 22.4951, "step": 111470 }, { "epoch": 0.22519665315917695, "grad_norm": 281.7157897949219, "learning_rate": 9.546471119736845e-06, "loss": 25.4333, "step": 111480 }, { "epoch": 0.22521685379186077, "grad_norm": 217.32823181152344, "learning_rate": 9.546325843565213e-06, "loss": 23.0987, "step": 111490 }, { "epoch": 0.22523705442454459, "grad_norm": 390.8224182128906, "learning_rate": 9.546180545235344e-06, "loss": 33.5996, "step": 111500 }, { "epoch": 0.2252572550572284, "grad_norm": 397.07232666015625, "learning_rate": 9.54603522474794e-06, "loss": 34.1884, "step": 111510 }, { "epoch": 0.2252774556899122, "grad_norm": 279.00152587890625, "learning_rate": 9.545889882103712e-06, "loss": 21.5092, "step": 111520 }, { "epoch": 0.22529765632259602, "grad_norm": 190.16567993164062, "learning_rate": 9.545744517303368e-06, "loss": 20.9833, "step": 111530 }, { "epoch": 0.22531785695527984, "grad_norm": 97.52873992919922, "learning_rate": 9.545599130347618e-06, "loss": 18.0239, "step": 111540 }, { "epoch": 0.22533805758796366, "grad_norm": 221.30447387695312, "learning_rate": 9.545453721237167e-06, "loss": 40.0455, "step": 111550 }, { "epoch": 0.22535825822064748, "grad_norm": 168.78677368164062, "learning_rate": 9.545308289972727e-06, "loss": 23.4019, "step": 111560 }, { "epoch": 0.2253784588533313, "grad_norm": 386.3153991699219, "learning_rate": 9.545162836555006e-06, "loss": 32.4465, "step": 111570 }, { "epoch": 0.2253986594860151, "grad_norm": 163.6924285888672, "learning_rate": 9.545017360984713e-06, "loss": 10.4205, "step": 111580 }, { "epoch": 0.2254188601186989, "grad_norm": 333.16943359375, "learning_rate": 9.544871863262556e-06, "loss": 15.5458, "step": 111590 }, { "epoch": 0.22543906075138273, "grad_norm": 203.3138427734375, "learning_rate": 9.544726343389245e-06, "loss": 16.6073, "step": 111600 }, { "epoch": 0.22545926138406655, "grad_norm": 181.36683654785156, "learning_rate": 9.544580801365488e-06, "loss": 19.9915, "step": 111610 }, { "epoch": 0.22547946201675037, "grad_norm": 25.083070755004883, "learning_rate": 9.544435237191996e-06, "loss": 13.0138, "step": 111620 }, { "epoch": 0.2254996626494342, "grad_norm": 541.534912109375, "learning_rate": 9.544289650869477e-06, "loss": 34.6503, "step": 111630 }, { "epoch": 0.22551986328211798, "grad_norm": 206.7118682861328, "learning_rate": 9.544144042398643e-06, "loss": 18.7425, "step": 111640 }, { "epoch": 0.2255400639148018, "grad_norm": 650.0897216796875, "learning_rate": 9.543998411780202e-06, "loss": 20.84, "step": 111650 }, { "epoch": 0.22556026454748562, "grad_norm": 188.9458770751953, "learning_rate": 9.543852759014863e-06, "loss": 25.6135, "step": 111660 }, { "epoch": 0.22558046518016944, "grad_norm": 225.12799072265625, "learning_rate": 9.543707084103337e-06, "loss": 18.5994, "step": 111670 }, { "epoch": 0.22560066581285326, "grad_norm": 274.1219787597656, "learning_rate": 9.543561387046333e-06, "loss": 15.8969, "step": 111680 }, { "epoch": 0.22562086644553708, "grad_norm": 367.08087158203125, "learning_rate": 9.543415667844562e-06, "loss": 13.8718, "step": 111690 }, { "epoch": 0.2256410670782209, "grad_norm": 475.44427490234375, "learning_rate": 9.543269926498735e-06, "loss": 23.5043, "step": 111700 }, { "epoch": 0.2256612677109047, "grad_norm": 541.9603271484375, "learning_rate": 9.54312416300956e-06, "loss": 32.4593, "step": 111710 }, { "epoch": 0.22568146834358851, "grad_norm": 583.3786010742188, "learning_rate": 9.542978377377752e-06, "loss": 16.3796, "step": 111720 }, { "epoch": 0.22570166897627233, "grad_norm": 82.69410705566406, "learning_rate": 9.542832569604014e-06, "loss": 20.2078, "step": 111730 }, { "epoch": 0.22572186960895615, "grad_norm": 330.8370666503906, "learning_rate": 9.542686739689064e-06, "loss": 30.1284, "step": 111740 }, { "epoch": 0.22574207024163997, "grad_norm": 671.6046752929688, "learning_rate": 9.54254088763361e-06, "loss": 36.5993, "step": 111750 }, { "epoch": 0.2257622708743238, "grad_norm": 162.51528930664062, "learning_rate": 9.54239501343836e-06, "loss": 11.4647, "step": 111760 }, { "epoch": 0.2257824715070076, "grad_norm": 505.20843505859375, "learning_rate": 9.542249117104032e-06, "loss": 25.6839, "step": 111770 }, { "epoch": 0.2258026721396914, "grad_norm": 643.5164794921875, "learning_rate": 9.54210319863133e-06, "loss": 27.2235, "step": 111780 }, { "epoch": 0.22582287277237523, "grad_norm": 177.39443969726562, "learning_rate": 9.541957258020967e-06, "loss": 19.5204, "step": 111790 }, { "epoch": 0.22584307340505905, "grad_norm": 333.2828063964844, "learning_rate": 9.541811295273657e-06, "loss": 18.8141, "step": 111800 }, { "epoch": 0.22586327403774287, "grad_norm": 143.3480682373047, "learning_rate": 9.541665310390109e-06, "loss": 24.3916, "step": 111810 }, { "epoch": 0.2258834746704267, "grad_norm": 218.4166259765625, "learning_rate": 9.541519303371034e-06, "loss": 15.0641, "step": 111820 }, { "epoch": 0.2259036753031105, "grad_norm": 99.32813262939453, "learning_rate": 9.541373274217145e-06, "loss": 18.7956, "step": 111830 }, { "epoch": 0.2259238759357943, "grad_norm": 179.49815368652344, "learning_rate": 9.541227222929155e-06, "loss": 16.2047, "step": 111840 }, { "epoch": 0.22594407656847812, "grad_norm": 118.6130599975586, "learning_rate": 9.541081149507774e-06, "loss": 24.7374, "step": 111850 }, { "epoch": 0.22596427720116194, "grad_norm": 129.2187957763672, "learning_rate": 9.540935053953713e-06, "loss": 29.4245, "step": 111860 }, { "epoch": 0.22598447783384576, "grad_norm": 12.94835090637207, "learning_rate": 9.540788936267686e-06, "loss": 29.3608, "step": 111870 }, { "epoch": 0.22600467846652958, "grad_norm": 26.18402862548828, "learning_rate": 9.540642796450403e-06, "loss": 19.0267, "step": 111880 }, { "epoch": 0.2260248790992134, "grad_norm": 429.79986572265625, "learning_rate": 9.540496634502581e-06, "loss": 20.1726, "step": 111890 }, { "epoch": 0.2260450797318972, "grad_norm": 350.2685546875, "learning_rate": 9.540350450424927e-06, "loss": 17.6844, "step": 111900 }, { "epoch": 0.226065280364581, "grad_norm": 322.7061462402344, "learning_rate": 9.540204244218155e-06, "loss": 24.1571, "step": 111910 }, { "epoch": 0.22608548099726483, "grad_norm": 78.60765075683594, "learning_rate": 9.54005801588298e-06, "loss": 20.8224, "step": 111920 }, { "epoch": 0.22610568162994865, "grad_norm": 224.6051025390625, "learning_rate": 9.539911765420112e-06, "loss": 19.4606, "step": 111930 }, { "epoch": 0.22612588226263247, "grad_norm": 160.77513122558594, "learning_rate": 9.539765492830265e-06, "loss": 17.9713, "step": 111940 }, { "epoch": 0.2261460828953163, "grad_norm": 327.71551513671875, "learning_rate": 9.53961919811415e-06, "loss": 15.9525, "step": 111950 }, { "epoch": 0.22616628352800008, "grad_norm": 386.08416748046875, "learning_rate": 9.539472881272483e-06, "loss": 22.4013, "step": 111960 }, { "epoch": 0.2261864841606839, "grad_norm": 228.72006225585938, "learning_rate": 9.539326542305975e-06, "loss": 15.4197, "step": 111970 }, { "epoch": 0.22620668479336772, "grad_norm": 191.68983459472656, "learning_rate": 9.53918018121534e-06, "loss": 16.597, "step": 111980 }, { "epoch": 0.22622688542605154, "grad_norm": 537.5358276367188, "learning_rate": 9.539033798001293e-06, "loss": 22.5538, "step": 111990 }, { "epoch": 0.22624708605873536, "grad_norm": 14.784648895263672, "learning_rate": 9.538887392664544e-06, "loss": 16.6156, "step": 112000 }, { "epoch": 0.22626728669141918, "grad_norm": 222.35081481933594, "learning_rate": 9.538740965205809e-06, "loss": 16.6909, "step": 112010 }, { "epoch": 0.226287487324103, "grad_norm": 177.054443359375, "learning_rate": 9.538594515625802e-06, "loss": 20.4373, "step": 112020 }, { "epoch": 0.2263076879567868, "grad_norm": 631.4635009765625, "learning_rate": 9.538448043925234e-06, "loss": 25.8838, "step": 112030 }, { "epoch": 0.22632788858947062, "grad_norm": 341.78863525390625, "learning_rate": 9.538301550104822e-06, "loss": 14.9466, "step": 112040 }, { "epoch": 0.22634808922215444, "grad_norm": 124.77214813232422, "learning_rate": 9.538155034165277e-06, "loss": 16.1986, "step": 112050 }, { "epoch": 0.22636828985483826, "grad_norm": 597.2428588867188, "learning_rate": 9.538008496107317e-06, "loss": 21.6635, "step": 112060 }, { "epoch": 0.22638849048752208, "grad_norm": 82.80120086669922, "learning_rate": 9.537861935931651e-06, "loss": 17.6344, "step": 112070 }, { "epoch": 0.2264086911202059, "grad_norm": 441.96807861328125, "learning_rate": 9.537715353639e-06, "loss": 21.7987, "step": 112080 }, { "epoch": 0.2264288917528897, "grad_norm": 453.10125732421875, "learning_rate": 9.537568749230074e-06, "loss": 20.0909, "step": 112090 }, { "epoch": 0.2264490923855735, "grad_norm": 88.02518463134766, "learning_rate": 9.537422122705585e-06, "loss": 30.0022, "step": 112100 }, { "epoch": 0.22646929301825733, "grad_norm": 900.0540771484375, "learning_rate": 9.537275474066254e-06, "loss": 33.1661, "step": 112110 }, { "epoch": 0.22648949365094115, "grad_norm": 517.3802490234375, "learning_rate": 9.537128803312792e-06, "loss": 27.6802, "step": 112120 }, { "epoch": 0.22650969428362497, "grad_norm": 148.8919219970703, "learning_rate": 9.536982110445913e-06, "loss": 30.2707, "step": 112130 }, { "epoch": 0.2265298949163088, "grad_norm": 135.48175048828125, "learning_rate": 9.536835395466334e-06, "loss": 12.0782, "step": 112140 }, { "epoch": 0.2265500955489926, "grad_norm": 537.9564819335938, "learning_rate": 9.536688658374771e-06, "loss": 33.243, "step": 112150 }, { "epoch": 0.2265702961816764, "grad_norm": 237.89910888671875, "learning_rate": 9.536541899171936e-06, "loss": 24.3281, "step": 112160 }, { "epoch": 0.22659049681436022, "grad_norm": 446.0911865234375, "learning_rate": 9.536395117858548e-06, "loss": 19.643, "step": 112170 }, { "epoch": 0.22661069744704404, "grad_norm": 268.34075927734375, "learning_rate": 9.53624831443532e-06, "loss": 29.6149, "step": 112180 }, { "epoch": 0.22663089807972786, "grad_norm": 241.6955108642578, "learning_rate": 9.536101488902966e-06, "loss": 20.7392, "step": 112190 }, { "epoch": 0.22665109871241168, "grad_norm": 220.80264282226562, "learning_rate": 9.535954641262206e-06, "loss": 22.963, "step": 112200 }, { "epoch": 0.2266712993450955, "grad_norm": 174.48768615722656, "learning_rate": 9.535807771513751e-06, "loss": 14.8331, "step": 112210 }, { "epoch": 0.2266914999777793, "grad_norm": 368.04510498046875, "learning_rate": 9.53566087965832e-06, "loss": 16.6621, "step": 112220 }, { "epoch": 0.2267117006104631, "grad_norm": 96.47853088378906, "learning_rate": 9.535513965696628e-06, "loss": 14.5611, "step": 112230 }, { "epoch": 0.22673190124314693, "grad_norm": 53.43557357788086, "learning_rate": 9.535367029629392e-06, "loss": 20.1495, "step": 112240 }, { "epoch": 0.22675210187583075, "grad_norm": 183.43394470214844, "learning_rate": 9.535220071457325e-06, "loss": 13.4537, "step": 112250 }, { "epoch": 0.22677230250851457, "grad_norm": 264.83465576171875, "learning_rate": 9.535073091181148e-06, "loss": 30.3332, "step": 112260 }, { "epoch": 0.2267925031411984, "grad_norm": 342.2509460449219, "learning_rate": 9.534926088801572e-06, "loss": 16.5997, "step": 112270 }, { "epoch": 0.22681270377388218, "grad_norm": 406.4085998535156, "learning_rate": 9.534779064319318e-06, "loss": 21.0168, "step": 112280 }, { "epoch": 0.226832904406566, "grad_norm": 167.63113403320312, "learning_rate": 9.5346320177351e-06, "loss": 18.396, "step": 112290 }, { "epoch": 0.22685310503924982, "grad_norm": 256.9101867675781, "learning_rate": 9.534484949049636e-06, "loss": 13.9384, "step": 112300 }, { "epoch": 0.22687330567193364, "grad_norm": 236.91322326660156, "learning_rate": 9.534337858263643e-06, "loss": 43.3676, "step": 112310 }, { "epoch": 0.22689350630461746, "grad_norm": 186.569580078125, "learning_rate": 9.534190745377837e-06, "loss": 13.8521, "step": 112320 }, { "epoch": 0.22691370693730128, "grad_norm": 263.2310485839844, "learning_rate": 9.534043610392934e-06, "loss": 37.4611, "step": 112330 }, { "epoch": 0.2269339075699851, "grad_norm": 323.05389404296875, "learning_rate": 9.533896453309654e-06, "loss": 21.0291, "step": 112340 }, { "epoch": 0.2269541082026689, "grad_norm": 118.12061309814453, "learning_rate": 9.533749274128712e-06, "loss": 15.5998, "step": 112350 }, { "epoch": 0.22697430883535272, "grad_norm": 194.26510620117188, "learning_rate": 9.533602072850826e-06, "loss": 21.6035, "step": 112360 }, { "epoch": 0.22699450946803654, "grad_norm": 235.61212158203125, "learning_rate": 9.533454849476712e-06, "loss": 27.7623, "step": 112370 }, { "epoch": 0.22701471010072036, "grad_norm": 258.5925598144531, "learning_rate": 9.533307604007089e-06, "loss": 10.5474, "step": 112380 }, { "epoch": 0.22703491073340418, "grad_norm": 69.9465560913086, "learning_rate": 9.533160336442677e-06, "loss": 29.0241, "step": 112390 }, { "epoch": 0.227055111366088, "grad_norm": 555.9659423828125, "learning_rate": 9.53301304678419e-06, "loss": 27.3766, "step": 112400 }, { "epoch": 0.2270753119987718, "grad_norm": 72.3573226928711, "learning_rate": 9.532865735032346e-06, "loss": 11.8379, "step": 112410 }, { "epoch": 0.2270955126314556, "grad_norm": 215.6322479248047, "learning_rate": 9.532718401187866e-06, "loss": 17.5852, "step": 112420 }, { "epoch": 0.22711571326413943, "grad_norm": 450.5770263671875, "learning_rate": 9.532571045251465e-06, "loss": 21.2904, "step": 112430 }, { "epoch": 0.22713591389682325, "grad_norm": 452.8306884765625, "learning_rate": 9.532423667223863e-06, "loss": 29.0047, "step": 112440 }, { "epoch": 0.22715611452950707, "grad_norm": 482.26495361328125, "learning_rate": 9.532276267105778e-06, "loss": 17.3377, "step": 112450 }, { "epoch": 0.2271763151621909, "grad_norm": 519.3146362304688, "learning_rate": 9.532128844897928e-06, "loss": 26.1101, "step": 112460 }, { "epoch": 0.22719651579487468, "grad_norm": 109.70201110839844, "learning_rate": 9.531981400601032e-06, "loss": 16.4237, "step": 112470 }, { "epoch": 0.2272167164275585, "grad_norm": 487.9176940917969, "learning_rate": 9.531833934215807e-06, "loss": 30.0001, "step": 112480 }, { "epoch": 0.22723691706024232, "grad_norm": 164.81201171875, "learning_rate": 9.531686445742973e-06, "loss": 14.6916, "step": 112490 }, { "epoch": 0.22725711769292614, "grad_norm": 230.17823791503906, "learning_rate": 9.531538935183252e-06, "loss": 26.4062, "step": 112500 }, { "epoch": 0.22727731832560996, "grad_norm": 221.95223999023438, "learning_rate": 9.531391402537355e-06, "loss": 18.2298, "step": 112510 }, { "epoch": 0.22729751895829378, "grad_norm": 182.32850646972656, "learning_rate": 9.53124384780601e-06, "loss": 28.1996, "step": 112520 }, { "epoch": 0.2273177195909776, "grad_norm": 308.8759765625, "learning_rate": 9.53109627098993e-06, "loss": 20.6992, "step": 112530 }, { "epoch": 0.2273379202236614, "grad_norm": 377.97552490234375, "learning_rate": 9.530948672089837e-06, "loss": 14.8791, "step": 112540 }, { "epoch": 0.2273581208563452, "grad_norm": 194.27133178710938, "learning_rate": 9.530801051106449e-06, "loss": 15.003, "step": 112550 }, { "epoch": 0.22737832148902903, "grad_norm": 306.134033203125, "learning_rate": 9.530653408040487e-06, "loss": 33.5115, "step": 112560 }, { "epoch": 0.22739852212171285, "grad_norm": 297.3280334472656, "learning_rate": 9.53050574289267e-06, "loss": 10.526, "step": 112570 }, { "epoch": 0.22741872275439667, "grad_norm": 173.81304931640625, "learning_rate": 9.530358055663718e-06, "loss": 41.6411, "step": 112580 }, { "epoch": 0.2274389233870805, "grad_norm": 345.82025146484375, "learning_rate": 9.53021034635435e-06, "loss": 17.1832, "step": 112590 }, { "epoch": 0.22745912401976429, "grad_norm": 333.15576171875, "learning_rate": 9.530062614965286e-06, "loss": 17.8934, "step": 112600 }, { "epoch": 0.2274793246524481, "grad_norm": 122.50267028808594, "learning_rate": 9.529914861497244e-06, "loss": 20.7555, "step": 112610 }, { "epoch": 0.22749952528513193, "grad_norm": 328.8847351074219, "learning_rate": 9.52976708595095e-06, "loss": 33.0993, "step": 112620 }, { "epoch": 0.22751972591781575, "grad_norm": 307.8403625488281, "learning_rate": 9.529619288327118e-06, "loss": 11.1892, "step": 112630 }, { "epoch": 0.22753992655049957, "grad_norm": 469.670654296875, "learning_rate": 9.529471468626472e-06, "loss": 15.5333, "step": 112640 }, { "epoch": 0.22756012718318339, "grad_norm": 67.62223815917969, "learning_rate": 9.529323626849731e-06, "loss": 22.9688, "step": 112650 }, { "epoch": 0.2275803278158672, "grad_norm": 422.0882873535156, "learning_rate": 9.529175762997616e-06, "loss": 29.4144, "step": 112660 }, { "epoch": 0.227600528448551, "grad_norm": 55.547786712646484, "learning_rate": 9.529027877070848e-06, "loss": 14.8384, "step": 112670 }, { "epoch": 0.22762072908123482, "grad_norm": 367.7314453125, "learning_rate": 9.528879969070148e-06, "loss": 14.9889, "step": 112680 }, { "epoch": 0.22764092971391864, "grad_norm": 117.3646240234375, "learning_rate": 9.528732038996236e-06, "loss": 17.0586, "step": 112690 }, { "epoch": 0.22766113034660246, "grad_norm": 136.54458618164062, "learning_rate": 9.528584086849832e-06, "loss": 14.9386, "step": 112700 }, { "epoch": 0.22768133097928628, "grad_norm": 404.7271728515625, "learning_rate": 9.528436112631659e-06, "loss": 20.5147, "step": 112710 }, { "epoch": 0.2277015316119701, "grad_norm": 365.51177978515625, "learning_rate": 9.528288116342439e-06, "loss": 21.3694, "step": 112720 }, { "epoch": 0.2277217322446539, "grad_norm": 420.58416748046875, "learning_rate": 9.52814009798289e-06, "loss": 31.4154, "step": 112730 }, { "epoch": 0.2277419328773377, "grad_norm": 246.86138916015625, "learning_rate": 9.527992057553736e-06, "loss": 22.861, "step": 112740 }, { "epoch": 0.22776213351002153, "grad_norm": 630.15625, "learning_rate": 9.527843995055698e-06, "loss": 10.6887, "step": 112750 }, { "epoch": 0.22778233414270535, "grad_norm": 168.36195373535156, "learning_rate": 9.527695910489498e-06, "loss": 18.8022, "step": 112760 }, { "epoch": 0.22780253477538917, "grad_norm": 211.66310119628906, "learning_rate": 9.527547803855858e-06, "loss": 20.6571, "step": 112770 }, { "epoch": 0.227822735408073, "grad_norm": 309.4031677246094, "learning_rate": 9.527399675155497e-06, "loss": 20.4308, "step": 112780 }, { "epoch": 0.22784293604075678, "grad_norm": 167.15574645996094, "learning_rate": 9.52725152438914e-06, "loss": 57.0778, "step": 112790 }, { "epoch": 0.2278631366734406, "grad_norm": 698.3370971679688, "learning_rate": 9.52710335155751e-06, "loss": 19.6343, "step": 112800 }, { "epoch": 0.22788333730612442, "grad_norm": 204.9141082763672, "learning_rate": 9.526955156661324e-06, "loss": 13.4991, "step": 112810 }, { "epoch": 0.22790353793880824, "grad_norm": 389.8515625, "learning_rate": 9.52680693970131e-06, "loss": 29.4096, "step": 112820 }, { "epoch": 0.22792373857149206, "grad_norm": 341.4795227050781, "learning_rate": 9.526658700678188e-06, "loss": 32.6992, "step": 112830 }, { "epoch": 0.22794393920417588, "grad_norm": 463.5843200683594, "learning_rate": 9.52651043959268e-06, "loss": 17.0772, "step": 112840 }, { "epoch": 0.2279641398368597, "grad_norm": 133.0093536376953, "learning_rate": 9.526362156445508e-06, "loss": 18.639, "step": 112850 }, { "epoch": 0.2279843404695435, "grad_norm": 523.920654296875, "learning_rate": 9.526213851237396e-06, "loss": 23.5746, "step": 112860 }, { "epoch": 0.22800454110222731, "grad_norm": 114.31873321533203, "learning_rate": 9.526065523969068e-06, "loss": 22.8792, "step": 112870 }, { "epoch": 0.22802474173491113, "grad_norm": 335.0586853027344, "learning_rate": 9.525917174641246e-06, "loss": 34.0691, "step": 112880 }, { "epoch": 0.22804494236759495, "grad_norm": 249.7847442626953, "learning_rate": 9.525768803254651e-06, "loss": 22.8768, "step": 112890 }, { "epoch": 0.22806514300027877, "grad_norm": 383.92205810546875, "learning_rate": 9.525620409810009e-06, "loss": 17.7124, "step": 112900 }, { "epoch": 0.2280853436329626, "grad_norm": 830.947998046875, "learning_rate": 9.52547199430804e-06, "loss": 22.5441, "step": 112910 }, { "epoch": 0.2281055442656464, "grad_norm": 150.75888061523438, "learning_rate": 9.525323556749472e-06, "loss": 21.5858, "step": 112920 }, { "epoch": 0.2281257448983302, "grad_norm": 1595.56298828125, "learning_rate": 9.525175097135024e-06, "loss": 23.8102, "step": 112930 }, { "epoch": 0.22814594553101403, "grad_norm": 219.15087890625, "learning_rate": 9.525026615465423e-06, "loss": 24.9359, "step": 112940 }, { "epoch": 0.22816614616369785, "grad_norm": 248.51905822753906, "learning_rate": 9.524878111741388e-06, "loss": 10.6474, "step": 112950 }, { "epoch": 0.22818634679638167, "grad_norm": 225.31533813476562, "learning_rate": 9.52472958596365e-06, "loss": 36.2922, "step": 112960 }, { "epoch": 0.2282065474290655, "grad_norm": 748.4426879882812, "learning_rate": 9.524581038132927e-06, "loss": 53.232, "step": 112970 }, { "epoch": 0.2282267480617493, "grad_norm": 92.30492401123047, "learning_rate": 9.524432468249944e-06, "loss": 30.2049, "step": 112980 }, { "epoch": 0.2282469486944331, "grad_norm": 156.64344787597656, "learning_rate": 9.524283876315427e-06, "loss": 51.6649, "step": 112990 }, { "epoch": 0.22826714932711692, "grad_norm": 331.4731750488281, "learning_rate": 9.524135262330098e-06, "loss": 29.7415, "step": 113000 }, { "epoch": 0.22828734995980074, "grad_norm": 45.06083679199219, "learning_rate": 9.523986626294683e-06, "loss": 16.8343, "step": 113010 }, { "epoch": 0.22830755059248456, "grad_norm": 236.55995178222656, "learning_rate": 9.523837968209906e-06, "loss": 21.7943, "step": 113020 }, { "epoch": 0.22832775122516838, "grad_norm": 43.76736068725586, "learning_rate": 9.523689288076491e-06, "loss": 19.2343, "step": 113030 }, { "epoch": 0.2283479518578522, "grad_norm": 266.6197814941406, "learning_rate": 9.523540585895164e-06, "loss": 30.162, "step": 113040 }, { "epoch": 0.228368152490536, "grad_norm": 121.21473693847656, "learning_rate": 9.523391861666649e-06, "loss": 14.4985, "step": 113050 }, { "epoch": 0.2283883531232198, "grad_norm": 519.8643798828125, "learning_rate": 9.52324311539167e-06, "loss": 31.9868, "step": 113060 }, { "epoch": 0.22840855375590363, "grad_norm": 299.750244140625, "learning_rate": 9.523094347070951e-06, "loss": 28.0309, "step": 113070 }, { "epoch": 0.22842875438858745, "grad_norm": 154.18032836914062, "learning_rate": 9.522945556705221e-06, "loss": 26.7213, "step": 113080 }, { "epoch": 0.22844895502127127, "grad_norm": 138.68246459960938, "learning_rate": 9.522796744295202e-06, "loss": 14.362, "step": 113090 }, { "epoch": 0.2284691556539551, "grad_norm": 231.65834045410156, "learning_rate": 9.52264790984162e-06, "loss": 20.931, "step": 113100 }, { "epoch": 0.22848935628663888, "grad_norm": 294.6171569824219, "learning_rate": 9.522499053345203e-06, "loss": 23.0037, "step": 113110 }, { "epoch": 0.2285095569193227, "grad_norm": 455.7163391113281, "learning_rate": 9.522350174806672e-06, "loss": 26.8028, "step": 113120 }, { "epoch": 0.22852975755200652, "grad_norm": 178.55416870117188, "learning_rate": 9.522201274226755e-06, "loss": 18.1708, "step": 113130 }, { "epoch": 0.22854995818469034, "grad_norm": 190.93338012695312, "learning_rate": 9.522052351606177e-06, "loss": 18.7666, "step": 113140 }, { "epoch": 0.22857015881737416, "grad_norm": 133.92486572265625, "learning_rate": 9.521903406945666e-06, "loss": 26.9253, "step": 113150 }, { "epoch": 0.22859035945005798, "grad_norm": 406.804443359375, "learning_rate": 9.521754440245944e-06, "loss": 15.7564, "step": 113160 }, { "epoch": 0.2286105600827418, "grad_norm": 365.006103515625, "learning_rate": 9.52160545150774e-06, "loss": 34.8258, "step": 113170 }, { "epoch": 0.2286307607154256, "grad_norm": 281.6316833496094, "learning_rate": 9.52145644073178e-06, "loss": 18.8012, "step": 113180 }, { "epoch": 0.22865096134810942, "grad_norm": 576.7046508789062, "learning_rate": 9.52130740791879e-06, "loss": 22.7102, "step": 113190 }, { "epoch": 0.22867116198079324, "grad_norm": 245.35906982421875, "learning_rate": 9.521158353069494e-06, "loss": 20.6009, "step": 113200 }, { "epoch": 0.22869136261347706, "grad_norm": 158.6051025390625, "learning_rate": 9.521009276184624e-06, "loss": 12.1152, "step": 113210 }, { "epoch": 0.22871156324616088, "grad_norm": 522.2172241210938, "learning_rate": 9.520860177264898e-06, "loss": 12.6111, "step": 113220 }, { "epoch": 0.2287317638788447, "grad_norm": 715.5775146484375, "learning_rate": 9.520711056311052e-06, "loss": 34.6106, "step": 113230 }, { "epoch": 0.2287519645115285, "grad_norm": 0.0, "learning_rate": 9.520561913323807e-06, "loss": 14.284, "step": 113240 }, { "epoch": 0.2287721651442123, "grad_norm": 528.0171508789062, "learning_rate": 9.520412748303894e-06, "loss": 34.0772, "step": 113250 }, { "epoch": 0.22879236577689613, "grad_norm": 849.075439453125, "learning_rate": 9.520263561252035e-06, "loss": 31.3894, "step": 113260 }, { "epoch": 0.22881256640957995, "grad_norm": 193.38079833984375, "learning_rate": 9.52011435216896e-06, "loss": 26.7543, "step": 113270 }, { "epoch": 0.22883276704226377, "grad_norm": 183.6104736328125, "learning_rate": 9.519965121055395e-06, "loss": 18.4991, "step": 113280 }, { "epoch": 0.2288529676749476, "grad_norm": 560.3463745117188, "learning_rate": 9.51981586791207e-06, "loss": 29.9851, "step": 113290 }, { "epoch": 0.2288731683076314, "grad_norm": 30.70647621154785, "learning_rate": 9.51966659273971e-06, "loss": 26.0317, "step": 113300 }, { "epoch": 0.2288933689403152, "grad_norm": 190.85581970214844, "learning_rate": 9.519517295539042e-06, "loss": 33.7654, "step": 113310 }, { "epoch": 0.22891356957299902, "grad_norm": 278.62890625, "learning_rate": 9.519367976310796e-06, "loss": 17.8456, "step": 113320 }, { "epoch": 0.22893377020568284, "grad_norm": 416.687255859375, "learning_rate": 9.5192186350557e-06, "loss": 19.4678, "step": 113330 }, { "epoch": 0.22895397083836666, "grad_norm": 114.79515838623047, "learning_rate": 9.519069271774479e-06, "loss": 9.1036, "step": 113340 }, { "epoch": 0.22897417147105048, "grad_norm": 276.6917419433594, "learning_rate": 9.51891988646786e-06, "loss": 29.1252, "step": 113350 }, { "epoch": 0.2289943721037343, "grad_norm": 382.2373962402344, "learning_rate": 9.51877047913658e-06, "loss": 24.8351, "step": 113360 }, { "epoch": 0.2290145727364181, "grad_norm": 433.1361389160156, "learning_rate": 9.518621049781356e-06, "loss": 22.5087, "step": 113370 }, { "epoch": 0.2290347733691019, "grad_norm": 184.1038360595703, "learning_rate": 9.51847159840292e-06, "loss": 20.1139, "step": 113380 }, { "epoch": 0.22905497400178573, "grad_norm": 299.3089294433594, "learning_rate": 9.518322125002004e-06, "loss": 15.2201, "step": 113390 }, { "epoch": 0.22907517463446955, "grad_norm": 279.9103088378906, "learning_rate": 9.518172629579334e-06, "loss": 12.347, "step": 113400 }, { "epoch": 0.22909537526715337, "grad_norm": 406.2547302246094, "learning_rate": 9.518023112135636e-06, "loss": 20.5069, "step": 113410 }, { "epoch": 0.2291155758998372, "grad_norm": 165.06637573242188, "learning_rate": 9.517873572671646e-06, "loss": 24.1852, "step": 113420 }, { "epoch": 0.22913577653252098, "grad_norm": 617.1575317382812, "learning_rate": 9.517724011188083e-06, "loss": 28.5046, "step": 113430 }, { "epoch": 0.2291559771652048, "grad_norm": 509.9048156738281, "learning_rate": 9.517574427685686e-06, "loss": 31.1524, "step": 113440 }, { "epoch": 0.22917617779788862, "grad_norm": 218.7311553955078, "learning_rate": 9.517424822165175e-06, "loss": 27.8674, "step": 113450 }, { "epoch": 0.22919637843057244, "grad_norm": 415.2308044433594, "learning_rate": 9.517275194627285e-06, "loss": 22.7286, "step": 113460 }, { "epoch": 0.22921657906325626, "grad_norm": 476.2141418457031, "learning_rate": 9.517125545072743e-06, "loss": 17.5455, "step": 113470 }, { "epoch": 0.22923677969594008, "grad_norm": 154.2548370361328, "learning_rate": 9.51697587350228e-06, "loss": 20.9967, "step": 113480 }, { "epoch": 0.2292569803286239, "grad_norm": 371.7786560058594, "learning_rate": 9.516826179916625e-06, "loss": 26.6846, "step": 113490 }, { "epoch": 0.2292771809613077, "grad_norm": 234.97409057617188, "learning_rate": 9.516676464316505e-06, "loss": 26.6518, "step": 113500 }, { "epoch": 0.22929738159399152, "grad_norm": 194.86431884765625, "learning_rate": 9.516526726702653e-06, "loss": 17.1482, "step": 113510 }, { "epoch": 0.22931758222667534, "grad_norm": 474.3934326171875, "learning_rate": 9.516376967075797e-06, "loss": 48.6975, "step": 113520 }, { "epoch": 0.22933778285935916, "grad_norm": 322.1300048828125, "learning_rate": 9.516227185436667e-06, "loss": 26.6292, "step": 113530 }, { "epoch": 0.22935798349204298, "grad_norm": 225.9384307861328, "learning_rate": 9.516077381785995e-06, "loss": 12.3186, "step": 113540 }, { "epoch": 0.2293781841247268, "grad_norm": 327.8215026855469, "learning_rate": 9.515927556124508e-06, "loss": 21.0015, "step": 113550 }, { "epoch": 0.2293983847574106, "grad_norm": 49.5264778137207, "learning_rate": 9.515777708452938e-06, "loss": 34.9526, "step": 113560 }, { "epoch": 0.2294185853900944, "grad_norm": 240.5894317626953, "learning_rate": 9.515627838772016e-06, "loss": 10.9957, "step": 113570 }, { "epoch": 0.22943878602277823, "grad_norm": 467.2842712402344, "learning_rate": 9.515477947082473e-06, "loss": 19.5038, "step": 113580 }, { "epoch": 0.22945898665546205, "grad_norm": 304.7475280761719, "learning_rate": 9.515328033385035e-06, "loss": 18.4164, "step": 113590 }, { "epoch": 0.22947918728814587, "grad_norm": 174.3208770751953, "learning_rate": 9.515178097680437e-06, "loss": 18.2285, "step": 113600 }, { "epoch": 0.2294993879208297, "grad_norm": 357.3243103027344, "learning_rate": 9.515028139969409e-06, "loss": 28.8297, "step": 113610 }, { "epoch": 0.2295195885535135, "grad_norm": 913.253173828125, "learning_rate": 9.514878160252681e-06, "loss": 30.5256, "step": 113620 }, { "epoch": 0.2295397891861973, "grad_norm": 389.654296875, "learning_rate": 9.514728158530983e-06, "loss": 18.6807, "step": 113630 }, { "epoch": 0.22955998981888112, "grad_norm": 738.0441284179688, "learning_rate": 9.51457813480505e-06, "loss": 22.4011, "step": 113640 }, { "epoch": 0.22958019045156494, "grad_norm": 232.118408203125, "learning_rate": 9.514428089075611e-06, "loss": 32.433, "step": 113650 }, { "epoch": 0.22960039108424876, "grad_norm": 375.2558898925781, "learning_rate": 9.514278021343395e-06, "loss": 11.8731, "step": 113660 }, { "epoch": 0.22962059171693258, "grad_norm": 611.1576538085938, "learning_rate": 9.514127931609136e-06, "loss": 28.5512, "step": 113670 }, { "epoch": 0.2296407923496164, "grad_norm": 96.59660339355469, "learning_rate": 9.513977819873565e-06, "loss": 21.7414, "step": 113680 }, { "epoch": 0.2296609929823002, "grad_norm": 71.1219711303711, "learning_rate": 9.513827686137415e-06, "loss": 43.2778, "step": 113690 }, { "epoch": 0.229681193614984, "grad_norm": 430.4513244628906, "learning_rate": 9.513677530401415e-06, "loss": 25.6094, "step": 113700 }, { "epoch": 0.22970139424766783, "grad_norm": 208.1913604736328, "learning_rate": 9.513527352666298e-06, "loss": 25.9188, "step": 113710 }, { "epoch": 0.22972159488035165, "grad_norm": 261.4417724609375, "learning_rate": 9.513377152932796e-06, "loss": 16.2174, "step": 113720 }, { "epoch": 0.22974179551303547, "grad_norm": 51.24182891845703, "learning_rate": 9.513226931201642e-06, "loss": 28.5293, "step": 113730 }, { "epoch": 0.2297619961457193, "grad_norm": 445.5868225097656, "learning_rate": 9.513076687473568e-06, "loss": 33.1071, "step": 113740 }, { "epoch": 0.22978219677840309, "grad_norm": 423.3970947265625, "learning_rate": 9.512926421749305e-06, "loss": 29.95, "step": 113750 }, { "epoch": 0.2298023974110869, "grad_norm": 392.1083984375, "learning_rate": 9.512776134029585e-06, "loss": 24.5808, "step": 113760 }, { "epoch": 0.22982259804377073, "grad_norm": 450.7821960449219, "learning_rate": 9.512625824315142e-06, "loss": 24.2156, "step": 113770 }, { "epoch": 0.22984279867645455, "grad_norm": 479.0993957519531, "learning_rate": 9.512475492606707e-06, "loss": 28.2415, "step": 113780 }, { "epoch": 0.22986299930913837, "grad_norm": 512.4356689453125, "learning_rate": 9.512325138905015e-06, "loss": 19.6172, "step": 113790 }, { "epoch": 0.22988319994182219, "grad_norm": 321.239501953125, "learning_rate": 9.512174763210798e-06, "loss": 18.2375, "step": 113800 }, { "epoch": 0.229903400574506, "grad_norm": 144.9734649658203, "learning_rate": 9.512024365524788e-06, "loss": 27.0912, "step": 113810 }, { "epoch": 0.2299236012071898, "grad_norm": 177.15646362304688, "learning_rate": 9.511873945847718e-06, "loss": 19.9602, "step": 113820 }, { "epoch": 0.22994380183987362, "grad_norm": 256.6202392578125, "learning_rate": 9.511723504180321e-06, "loss": 10.8808, "step": 113830 }, { "epoch": 0.22996400247255744, "grad_norm": 25.940935134887695, "learning_rate": 9.511573040523332e-06, "loss": 27.7697, "step": 113840 }, { "epoch": 0.22998420310524126, "grad_norm": 258.6063232421875, "learning_rate": 9.511422554877482e-06, "loss": 53.9014, "step": 113850 }, { "epoch": 0.23000440373792508, "grad_norm": 121.90843200683594, "learning_rate": 9.511272047243507e-06, "loss": 20.7172, "step": 113860 }, { "epoch": 0.2300246043706089, "grad_norm": 66.36710357666016, "learning_rate": 9.51112151762214e-06, "loss": 22.6572, "step": 113870 }, { "epoch": 0.2300448050032927, "grad_norm": 471.5945129394531, "learning_rate": 9.510970966014112e-06, "loss": 16.7224, "step": 113880 }, { "epoch": 0.2300650056359765, "grad_norm": 398.6106262207031, "learning_rate": 9.51082039242016e-06, "loss": 15.9681, "step": 113890 }, { "epoch": 0.23008520626866033, "grad_norm": 395.5736999511719, "learning_rate": 9.510669796841014e-06, "loss": 38.1401, "step": 113900 }, { "epoch": 0.23010540690134415, "grad_norm": 2585.0234375, "learning_rate": 9.510519179277414e-06, "loss": 39.6098, "step": 113910 }, { "epoch": 0.23012560753402797, "grad_norm": 443.54632568359375, "learning_rate": 9.510368539730089e-06, "loss": 23.6138, "step": 113920 }, { "epoch": 0.2301458081667118, "grad_norm": 77.28975677490234, "learning_rate": 9.510217878199773e-06, "loss": 26.3488, "step": 113930 }, { "epoch": 0.2301660087993956, "grad_norm": 388.1447448730469, "learning_rate": 9.510067194687205e-06, "loss": 23.6337, "step": 113940 }, { "epoch": 0.2301862094320794, "grad_norm": 152.34646606445312, "learning_rate": 9.509916489193114e-06, "loss": 27.5738, "step": 113950 }, { "epoch": 0.23020641006476322, "grad_norm": 457.2374572753906, "learning_rate": 9.50976576171824e-06, "loss": 20.3041, "step": 113960 }, { "epoch": 0.23022661069744704, "grad_norm": 458.5242919921875, "learning_rate": 9.509615012263311e-06, "loss": 21.2088, "step": 113970 }, { "epoch": 0.23024681133013086, "grad_norm": 168.6975555419922, "learning_rate": 9.509464240829067e-06, "loss": 22.458, "step": 113980 }, { "epoch": 0.23026701196281468, "grad_norm": 652.1724853515625, "learning_rate": 9.509313447416241e-06, "loss": 23.0017, "step": 113990 }, { "epoch": 0.2302872125954985, "grad_norm": 221.48512268066406, "learning_rate": 9.50916263202557e-06, "loss": 15.0697, "step": 114000 }, { "epoch": 0.2303074132281823, "grad_norm": 154.8582305908203, "learning_rate": 9.509011794657785e-06, "loss": 19.2463, "step": 114010 }, { "epoch": 0.23032761386086611, "grad_norm": 272.1348876953125, "learning_rate": 9.508860935313623e-06, "loss": 18.1986, "step": 114020 }, { "epoch": 0.23034781449354993, "grad_norm": 491.47967529296875, "learning_rate": 9.508710053993822e-06, "loss": 26.2896, "step": 114030 }, { "epoch": 0.23036801512623375, "grad_norm": 154.8836212158203, "learning_rate": 9.508559150699115e-06, "loss": 13.5475, "step": 114040 }, { "epoch": 0.23038821575891757, "grad_norm": 69.89427185058594, "learning_rate": 9.508408225430237e-06, "loss": 27.1364, "step": 114050 }, { "epoch": 0.2304084163916014, "grad_norm": 284.37078857421875, "learning_rate": 9.508257278187923e-06, "loss": 22.8416, "step": 114060 }, { "epoch": 0.2304286170242852, "grad_norm": 257.5578918457031, "learning_rate": 9.50810630897291e-06, "loss": 14.3847, "step": 114070 }, { "epoch": 0.230448817656969, "grad_norm": 253.07391357421875, "learning_rate": 9.507955317785935e-06, "loss": 22.8352, "step": 114080 }, { "epoch": 0.23046901828965283, "grad_norm": 156.57289123535156, "learning_rate": 9.50780430462773e-06, "loss": 25.3198, "step": 114090 }, { "epoch": 0.23048921892233665, "grad_norm": 324.0006408691406, "learning_rate": 9.507653269499035e-06, "loss": 27.2447, "step": 114100 }, { "epoch": 0.23050941955502047, "grad_norm": 315.4477844238281, "learning_rate": 9.507502212400585e-06, "loss": 25.8123, "step": 114110 }, { "epoch": 0.2305296201877043, "grad_norm": 102.92755126953125, "learning_rate": 9.507351133333116e-06, "loss": 23.1058, "step": 114120 }, { "epoch": 0.2305498208203881, "grad_norm": 10.381075859069824, "learning_rate": 9.507200032297364e-06, "loss": 41.9177, "step": 114130 }, { "epoch": 0.2305700214530719, "grad_norm": 142.60189819335938, "learning_rate": 9.507048909294065e-06, "loss": 29.112, "step": 114140 }, { "epoch": 0.23059022208575572, "grad_norm": 123.96956634521484, "learning_rate": 9.506897764323957e-06, "loss": 28.7307, "step": 114150 }, { "epoch": 0.23061042271843954, "grad_norm": 283.449951171875, "learning_rate": 9.506746597387776e-06, "loss": 23.1124, "step": 114160 }, { "epoch": 0.23063062335112336, "grad_norm": 314.1993408203125, "learning_rate": 9.50659540848626e-06, "loss": 12.5608, "step": 114170 }, { "epoch": 0.23065082398380718, "grad_norm": 172.37960815429688, "learning_rate": 9.506444197620142e-06, "loss": 13.7691, "step": 114180 }, { "epoch": 0.230671024616491, "grad_norm": 123.49884796142578, "learning_rate": 9.506292964790162e-06, "loss": 19.669, "step": 114190 }, { "epoch": 0.2306912252491748, "grad_norm": 231.80490112304688, "learning_rate": 9.506141709997058e-06, "loss": 19.8659, "step": 114200 }, { "epoch": 0.2307114258818586, "grad_norm": 481.56884765625, "learning_rate": 9.505990433241565e-06, "loss": 16.9548, "step": 114210 }, { "epoch": 0.23073162651454243, "grad_norm": 345.1126708984375, "learning_rate": 9.50583913452442e-06, "loss": 32.036, "step": 114220 }, { "epoch": 0.23075182714722625, "grad_norm": 251.91358947753906, "learning_rate": 9.505687813846363e-06, "loss": 8.1122, "step": 114230 }, { "epoch": 0.23077202777991007, "grad_norm": 1302.815673828125, "learning_rate": 9.50553647120813e-06, "loss": 36.2205, "step": 114240 }, { "epoch": 0.2307922284125939, "grad_norm": 370.979736328125, "learning_rate": 9.505385106610458e-06, "loss": 22.7, "step": 114250 }, { "epoch": 0.2308124290452777, "grad_norm": 253.0781707763672, "learning_rate": 9.505233720054086e-06, "loss": 27.355, "step": 114260 }, { "epoch": 0.2308326296779615, "grad_norm": 326.8742980957031, "learning_rate": 9.505082311539752e-06, "loss": 28.0495, "step": 114270 }, { "epoch": 0.23085283031064532, "grad_norm": 268.0210266113281, "learning_rate": 9.504930881068193e-06, "loss": 21.9086, "step": 114280 }, { "epoch": 0.23087303094332914, "grad_norm": 328.5484619140625, "learning_rate": 9.504779428640146e-06, "loss": 13.9366, "step": 114290 }, { "epoch": 0.23089323157601296, "grad_norm": 269.9913330078125, "learning_rate": 9.504627954256352e-06, "loss": 14.3812, "step": 114300 }, { "epoch": 0.23091343220869678, "grad_norm": 466.7132873535156, "learning_rate": 9.504476457917546e-06, "loss": 20.1958, "step": 114310 }, { "epoch": 0.2309336328413806, "grad_norm": 458.2937316894531, "learning_rate": 9.504324939624469e-06, "loss": 26.1697, "step": 114320 }, { "epoch": 0.2309538334740644, "grad_norm": 514.73583984375, "learning_rate": 9.504173399377858e-06, "loss": 14.7337, "step": 114330 }, { "epoch": 0.23097403410674822, "grad_norm": 438.46331787109375, "learning_rate": 9.504021837178452e-06, "loss": 18.689, "step": 114340 }, { "epoch": 0.23099423473943204, "grad_norm": 629.4722900390625, "learning_rate": 9.503870253026992e-06, "loss": 34.6146, "step": 114350 }, { "epoch": 0.23101443537211586, "grad_norm": 364.96942138671875, "learning_rate": 9.503718646924211e-06, "loss": 25.5941, "step": 114360 }, { "epoch": 0.23103463600479968, "grad_norm": 277.3636169433594, "learning_rate": 9.503567018870855e-06, "loss": 24.3521, "step": 114370 }, { "epoch": 0.2310548366374835, "grad_norm": 200.32037353515625, "learning_rate": 9.503415368867658e-06, "loss": 14.9406, "step": 114380 }, { "epoch": 0.2310750372701673, "grad_norm": 205.8577880859375, "learning_rate": 9.503263696915361e-06, "loss": 18.8345, "step": 114390 }, { "epoch": 0.2310952379028511, "grad_norm": 576.2305908203125, "learning_rate": 9.503112003014702e-06, "loss": 33.4671, "step": 114400 }, { "epoch": 0.23111543853553493, "grad_norm": 69.50376892089844, "learning_rate": 9.502960287166423e-06, "loss": 15.7711, "step": 114410 }, { "epoch": 0.23113563916821875, "grad_norm": 47.69266891479492, "learning_rate": 9.50280854937126e-06, "loss": 22.6323, "step": 114420 }, { "epoch": 0.23115583980090257, "grad_norm": 132.7666015625, "learning_rate": 9.502656789629956e-06, "loss": 19.3547, "step": 114430 }, { "epoch": 0.2311760404335864, "grad_norm": 1027.045166015625, "learning_rate": 9.502505007943248e-06, "loss": 27.0426, "step": 114440 }, { "epoch": 0.2311962410662702, "grad_norm": 185.64422607421875, "learning_rate": 9.502353204311876e-06, "loss": 24.2755, "step": 114450 }, { "epoch": 0.231216441698954, "grad_norm": 417.8638610839844, "learning_rate": 9.50220137873658e-06, "loss": 26.9362, "step": 114460 }, { "epoch": 0.23123664233163782, "grad_norm": 184.1611328125, "learning_rate": 9.502049531218103e-06, "loss": 27.1389, "step": 114470 }, { "epoch": 0.23125684296432164, "grad_norm": 226.7974090576172, "learning_rate": 9.501897661757182e-06, "loss": 18.628, "step": 114480 }, { "epoch": 0.23127704359700546, "grad_norm": 156.2099151611328, "learning_rate": 9.501745770354555e-06, "loss": 14.7228, "step": 114490 }, { "epoch": 0.23129724422968928, "grad_norm": 128.4508056640625, "learning_rate": 9.501593857010968e-06, "loss": 21.8937, "step": 114500 }, { "epoch": 0.2313174448623731, "grad_norm": 0.0, "learning_rate": 9.501441921727158e-06, "loss": 22.348, "step": 114510 }, { "epoch": 0.2313376454950569, "grad_norm": 258.7344970703125, "learning_rate": 9.501289964503866e-06, "loss": 12.7737, "step": 114520 }, { "epoch": 0.2313578461277407, "grad_norm": 200.10719299316406, "learning_rate": 9.501137985341832e-06, "loss": 21.955, "step": 114530 }, { "epoch": 0.23137804676042453, "grad_norm": 176.73500061035156, "learning_rate": 9.500985984241797e-06, "loss": 25.1645, "step": 114540 }, { "epoch": 0.23139824739310835, "grad_norm": 632.90185546875, "learning_rate": 9.500833961204504e-06, "loss": 25.4594, "step": 114550 }, { "epoch": 0.23141844802579217, "grad_norm": 491.0087890625, "learning_rate": 9.500681916230692e-06, "loss": 14.0645, "step": 114560 }, { "epoch": 0.231438648658476, "grad_norm": 304.6210632324219, "learning_rate": 9.500529849321101e-06, "loss": 30.8771, "step": 114570 }, { "epoch": 0.2314588492911598, "grad_norm": 414.1082458496094, "learning_rate": 9.500377760476473e-06, "loss": 30.3141, "step": 114580 }, { "epoch": 0.2314790499238436, "grad_norm": 120.51696014404297, "learning_rate": 9.500225649697551e-06, "loss": 16.6046, "step": 114590 }, { "epoch": 0.23149925055652743, "grad_norm": 519.7973022460938, "learning_rate": 9.500073516985074e-06, "loss": 33.7718, "step": 114600 }, { "epoch": 0.23151945118921125, "grad_norm": 290.2353515625, "learning_rate": 9.499921362339786e-06, "loss": 21.9736, "step": 114610 }, { "epoch": 0.23153965182189506, "grad_norm": 93.01692199707031, "learning_rate": 9.499769185762425e-06, "loss": 30.8202, "step": 114620 }, { "epoch": 0.23155985245457888, "grad_norm": 179.5181121826172, "learning_rate": 9.499616987253734e-06, "loss": 38.2711, "step": 114630 }, { "epoch": 0.2315800530872627, "grad_norm": 825.6603393554688, "learning_rate": 9.49946476681446e-06, "loss": 32.8871, "step": 114640 }, { "epoch": 0.2316002537199465, "grad_norm": 367.6302795410156, "learning_rate": 9.499312524445337e-06, "loss": 10.1616, "step": 114650 }, { "epoch": 0.23162045435263032, "grad_norm": 0.0, "learning_rate": 9.499160260147111e-06, "loss": 26.7396, "step": 114660 }, { "epoch": 0.23164065498531414, "grad_norm": 373.1811218261719, "learning_rate": 9.499007973920523e-06, "loss": 23.469, "step": 114670 }, { "epoch": 0.23166085561799796, "grad_norm": 58.23490524291992, "learning_rate": 9.498855665766316e-06, "loss": 14.6932, "step": 114680 }, { "epoch": 0.23168105625068178, "grad_norm": 383.4538269042969, "learning_rate": 9.498703335685233e-06, "loss": 26.9858, "step": 114690 }, { "epoch": 0.2317012568833656, "grad_norm": 397.5417785644531, "learning_rate": 9.498550983678016e-06, "loss": 26.0168, "step": 114700 }, { "epoch": 0.2317214575160494, "grad_norm": 270.2843322753906, "learning_rate": 9.498398609745405e-06, "loss": 27.1876, "step": 114710 }, { "epoch": 0.2317416581487332, "grad_norm": 350.39111328125, "learning_rate": 9.498246213888148e-06, "loss": 18.5244, "step": 114720 }, { "epoch": 0.23176185878141703, "grad_norm": 263.347412109375, "learning_rate": 9.498093796106982e-06, "loss": 22.4962, "step": 114730 }, { "epoch": 0.23178205941410085, "grad_norm": 364.1878662109375, "learning_rate": 9.497941356402653e-06, "loss": 17.6536, "step": 114740 }, { "epoch": 0.23180226004678467, "grad_norm": 232.7297821044922, "learning_rate": 9.497788894775903e-06, "loss": 18.156, "step": 114750 }, { "epoch": 0.2318224606794685, "grad_norm": 72.26591491699219, "learning_rate": 9.497636411227476e-06, "loss": 22.2819, "step": 114760 }, { "epoch": 0.2318426613121523, "grad_norm": 376.6095275878906, "learning_rate": 9.497483905758114e-06, "loss": 17.708, "step": 114770 }, { "epoch": 0.2318628619448361, "grad_norm": 404.580810546875, "learning_rate": 9.49733137836856e-06, "loss": 23.5071, "step": 114780 }, { "epoch": 0.23188306257751992, "grad_norm": 227.01113891601562, "learning_rate": 9.497178829059561e-06, "loss": 23.3519, "step": 114790 }, { "epoch": 0.23190326321020374, "grad_norm": 167.198974609375, "learning_rate": 9.497026257831856e-06, "loss": 24.7744, "step": 114800 }, { "epoch": 0.23192346384288756, "grad_norm": 230.96066284179688, "learning_rate": 9.49687366468619e-06, "loss": 38.9848, "step": 114810 }, { "epoch": 0.23194366447557138, "grad_norm": 342.1895446777344, "learning_rate": 9.49672104962331e-06, "loss": 25.9467, "step": 114820 }, { "epoch": 0.2319638651082552, "grad_norm": 744.8779907226562, "learning_rate": 9.496568412643954e-06, "loss": 46.9535, "step": 114830 }, { "epoch": 0.231984065740939, "grad_norm": 209.69110107421875, "learning_rate": 9.49641575374887e-06, "loss": 17.6925, "step": 114840 }, { "epoch": 0.23200426637362281, "grad_norm": 239.82237243652344, "learning_rate": 9.496263072938801e-06, "loss": 15.8941, "step": 114850 }, { "epoch": 0.23202446700630663, "grad_norm": 156.1233367919922, "learning_rate": 9.49611037021449e-06, "loss": 19.5506, "step": 114860 }, { "epoch": 0.23204466763899045, "grad_norm": 397.75762939453125, "learning_rate": 9.495957645576685e-06, "loss": 31.5992, "step": 114870 }, { "epoch": 0.23206486827167427, "grad_norm": 107.61036682128906, "learning_rate": 9.495804899026126e-06, "loss": 20.3872, "step": 114880 }, { "epoch": 0.2320850689043581, "grad_norm": 53.30876922607422, "learning_rate": 9.49565213056356e-06, "loss": 24.5599, "step": 114890 }, { "epoch": 0.23210526953704191, "grad_norm": 240.40597534179688, "learning_rate": 9.495499340189729e-06, "loss": 36.9456, "step": 114900 }, { "epoch": 0.2321254701697257, "grad_norm": 424.49298095703125, "learning_rate": 9.495346527905381e-06, "loss": 30.3928, "step": 114910 }, { "epoch": 0.23214567080240953, "grad_norm": 529.7340698242188, "learning_rate": 9.495193693711259e-06, "loss": 20.0728, "step": 114920 }, { "epoch": 0.23216587143509335, "grad_norm": 514.596435546875, "learning_rate": 9.495040837608107e-06, "loss": 36.239, "step": 114930 }, { "epoch": 0.23218607206777717, "grad_norm": 249.42138671875, "learning_rate": 9.494887959596672e-06, "loss": 22.9949, "step": 114940 }, { "epoch": 0.23220627270046099, "grad_norm": 451.52288818359375, "learning_rate": 9.4947350596777e-06, "loss": 24.1953, "step": 114950 }, { "epoch": 0.2322264733331448, "grad_norm": 266.20111083984375, "learning_rate": 9.494582137851932e-06, "loss": 22.9859, "step": 114960 }, { "epoch": 0.2322466739658286, "grad_norm": 29.28197479248047, "learning_rate": 9.494429194120117e-06, "loss": 26.2508, "step": 114970 }, { "epoch": 0.23226687459851242, "grad_norm": 209.37469482421875, "learning_rate": 9.494276228482998e-06, "loss": 19.6544, "step": 114980 }, { "epoch": 0.23228707523119624, "grad_norm": 370.8959045410156, "learning_rate": 9.494123240941321e-06, "loss": 26.3807, "step": 114990 }, { "epoch": 0.23230727586388006, "grad_norm": 441.63861083984375, "learning_rate": 9.493970231495836e-06, "loss": 25.2299, "step": 115000 }, { "epoch": 0.23232747649656388, "grad_norm": 328.81890869140625, "learning_rate": 9.493817200147282e-06, "loss": 34.6184, "step": 115010 }, { "epoch": 0.2323476771292477, "grad_norm": 304.5109558105469, "learning_rate": 9.493664146896411e-06, "loss": 23.2631, "step": 115020 }, { "epoch": 0.2323678777619315, "grad_norm": 193.64573669433594, "learning_rate": 9.493511071743963e-06, "loss": 12.8254, "step": 115030 }, { "epoch": 0.2323880783946153, "grad_norm": 400.81829833984375, "learning_rate": 9.493357974690689e-06, "loss": 26.9655, "step": 115040 }, { "epoch": 0.23240827902729913, "grad_norm": 225.1509552001953, "learning_rate": 9.493204855737332e-06, "loss": 26.7258, "step": 115050 }, { "epoch": 0.23242847965998295, "grad_norm": 112.218994140625, "learning_rate": 9.49305171488464e-06, "loss": 13.236, "step": 115060 }, { "epoch": 0.23244868029266677, "grad_norm": 0.0, "learning_rate": 9.492898552133358e-06, "loss": 14.0513, "step": 115070 }, { "epoch": 0.2324688809253506, "grad_norm": 258.75933837890625, "learning_rate": 9.492745367484234e-06, "loss": 10.9198, "step": 115080 }, { "epoch": 0.2324890815580344, "grad_norm": 299.4322814941406, "learning_rate": 9.492592160938014e-06, "loss": 19.2684, "step": 115090 }, { "epoch": 0.2325092821907182, "grad_norm": 274.05670166015625, "learning_rate": 9.492438932495444e-06, "loss": 13.9111, "step": 115100 }, { "epoch": 0.23252948282340202, "grad_norm": 539.0249633789062, "learning_rate": 9.492285682157274e-06, "loss": 23.2064, "step": 115110 }, { "epoch": 0.23254968345608584, "grad_norm": 334.9559326171875, "learning_rate": 9.492132409924247e-06, "loss": 31.7509, "step": 115120 }, { "epoch": 0.23256988408876966, "grad_norm": 448.2852478027344, "learning_rate": 9.49197911579711e-06, "loss": 22.7987, "step": 115130 }, { "epoch": 0.23259008472145348, "grad_norm": 65.74850463867188, "learning_rate": 9.491825799776613e-06, "loss": 31.0649, "step": 115140 }, { "epoch": 0.2326102853541373, "grad_norm": 199.54507446289062, "learning_rate": 9.491672461863501e-06, "loss": 31.4747, "step": 115150 }, { "epoch": 0.2326304859868211, "grad_norm": 3.6141393184661865, "learning_rate": 9.491519102058523e-06, "loss": 31.1211, "step": 115160 }, { "epoch": 0.23265068661950492, "grad_norm": 189.00340270996094, "learning_rate": 9.491365720362427e-06, "loss": 16.9994, "step": 115170 }, { "epoch": 0.23267088725218874, "grad_norm": 150.65887451171875, "learning_rate": 9.491212316775956e-06, "loss": 22.8305, "step": 115180 }, { "epoch": 0.23269108788487256, "grad_norm": 307.58612060546875, "learning_rate": 9.491058891299863e-06, "loss": 27.3476, "step": 115190 }, { "epoch": 0.23271128851755638, "grad_norm": 42.90616226196289, "learning_rate": 9.490905443934892e-06, "loss": 27.5094, "step": 115200 }, { "epoch": 0.2327314891502402, "grad_norm": 361.72845458984375, "learning_rate": 9.490751974681795e-06, "loss": 14.7811, "step": 115210 }, { "epoch": 0.23275168978292402, "grad_norm": 237.04403686523438, "learning_rate": 9.490598483541316e-06, "loss": 26.1357, "step": 115220 }, { "epoch": 0.2327718904156078, "grad_norm": 357.5845947265625, "learning_rate": 9.490444970514205e-06, "loss": 20.2135, "step": 115230 }, { "epoch": 0.23279209104829163, "grad_norm": 442.728271484375, "learning_rate": 9.49029143560121e-06, "loss": 31.4604, "step": 115240 }, { "epoch": 0.23281229168097545, "grad_norm": 47.02629852294922, "learning_rate": 9.490137878803078e-06, "loss": 20.1302, "step": 115250 }, { "epoch": 0.23283249231365927, "grad_norm": 385.4635314941406, "learning_rate": 9.48998430012056e-06, "loss": 15.4748, "step": 115260 }, { "epoch": 0.2328526929463431, "grad_norm": 629.806640625, "learning_rate": 9.489830699554403e-06, "loss": 24.8844, "step": 115270 }, { "epoch": 0.2328728935790269, "grad_norm": 117.03358459472656, "learning_rate": 9.489677077105355e-06, "loss": 19.8503, "step": 115280 }, { "epoch": 0.2328930942117107, "grad_norm": 287.9032897949219, "learning_rate": 9.489523432774165e-06, "loss": 28.559, "step": 115290 }, { "epoch": 0.23291329484439452, "grad_norm": 92.2516098022461, "learning_rate": 9.489369766561584e-06, "loss": 21.6764, "step": 115300 }, { "epoch": 0.23293349547707834, "grad_norm": 420.4307861328125, "learning_rate": 9.489216078468359e-06, "loss": 14.3042, "step": 115310 }, { "epoch": 0.23295369610976216, "grad_norm": 167.9078369140625, "learning_rate": 9.48906236849524e-06, "loss": 39.1072, "step": 115320 }, { "epoch": 0.23297389674244598, "grad_norm": 152.89942932128906, "learning_rate": 9.488908636642972e-06, "loss": 16.3339, "step": 115330 }, { "epoch": 0.2329940973751298, "grad_norm": 319.19256591796875, "learning_rate": 9.48875488291231e-06, "loss": 28.0587, "step": 115340 }, { "epoch": 0.2330142980078136, "grad_norm": 236.2490234375, "learning_rate": 9.488601107304001e-06, "loss": 24.3486, "step": 115350 }, { "epoch": 0.2330344986404974, "grad_norm": 306.9745788574219, "learning_rate": 9.488447309818795e-06, "loss": 28.3157, "step": 115360 }, { "epoch": 0.23305469927318123, "grad_norm": 65.7059097290039, "learning_rate": 9.488293490457441e-06, "loss": 16.7435, "step": 115370 }, { "epoch": 0.23307489990586505, "grad_norm": 524.2155151367188, "learning_rate": 9.48813964922069e-06, "loss": 33.0214, "step": 115380 }, { "epoch": 0.23309510053854887, "grad_norm": 423.25030517578125, "learning_rate": 9.487985786109288e-06, "loss": 16.939, "step": 115390 }, { "epoch": 0.2331153011712327, "grad_norm": 435.7621765136719, "learning_rate": 9.487831901123989e-06, "loss": 22.0929, "step": 115400 }, { "epoch": 0.2331355018039165, "grad_norm": 41.3888053894043, "learning_rate": 9.48767799426554e-06, "loss": 12.1645, "step": 115410 }, { "epoch": 0.2331557024366003, "grad_norm": 293.6173095703125, "learning_rate": 9.487524065534696e-06, "loss": 19.0997, "step": 115420 }, { "epoch": 0.23317590306928412, "grad_norm": 458.6754455566406, "learning_rate": 9.487370114932201e-06, "loss": 21.6715, "step": 115430 }, { "epoch": 0.23319610370196794, "grad_norm": 246.3583221435547, "learning_rate": 9.48721614245881e-06, "loss": 34.3372, "step": 115440 }, { "epoch": 0.23321630433465176, "grad_norm": 158.93910217285156, "learning_rate": 9.487062148115272e-06, "loss": 27.3645, "step": 115450 }, { "epoch": 0.23323650496733558, "grad_norm": 292.28607177734375, "learning_rate": 9.486908131902336e-06, "loss": 25.1873, "step": 115460 }, { "epoch": 0.2332567056000194, "grad_norm": 225.8481903076172, "learning_rate": 9.486754093820755e-06, "loss": 19.9089, "step": 115470 }, { "epoch": 0.2332769062327032, "grad_norm": 339.4962463378906, "learning_rate": 9.486600033871279e-06, "loss": 43.7078, "step": 115480 }, { "epoch": 0.23329710686538702, "grad_norm": 165.0774383544922, "learning_rate": 9.486445952054658e-06, "loss": 17.6215, "step": 115490 }, { "epoch": 0.23331730749807084, "grad_norm": 435.8450622558594, "learning_rate": 9.486291848371642e-06, "loss": 29.6204, "step": 115500 }, { "epoch": 0.23333750813075466, "grad_norm": 223.5486602783203, "learning_rate": 9.486137722822986e-06, "loss": 21.8454, "step": 115510 }, { "epoch": 0.23335770876343848, "grad_norm": 131.8701629638672, "learning_rate": 9.48598357540944e-06, "loss": 12.4179, "step": 115520 }, { "epoch": 0.2333779093961223, "grad_norm": 289.216552734375, "learning_rate": 9.48582940613175e-06, "loss": 27.9102, "step": 115530 }, { "epoch": 0.2333981100288061, "grad_norm": 110.80671691894531, "learning_rate": 9.485675214990673e-06, "loss": 20.0876, "step": 115540 }, { "epoch": 0.2334183106614899, "grad_norm": 383.3585510253906, "learning_rate": 9.485521001986961e-06, "loss": 22.5682, "step": 115550 }, { "epoch": 0.23343851129417373, "grad_norm": 885.8735961914062, "learning_rate": 9.485366767121363e-06, "loss": 51.4464, "step": 115560 }, { "epoch": 0.23345871192685755, "grad_norm": 118.47296905517578, "learning_rate": 9.48521251039463e-06, "loss": 27.0323, "step": 115570 }, { "epoch": 0.23347891255954137, "grad_norm": 172.71435546875, "learning_rate": 9.485058231807515e-06, "loss": 20.2332, "step": 115580 }, { "epoch": 0.2334991131922252, "grad_norm": 509.81591796875, "learning_rate": 9.484903931360772e-06, "loss": 30.6632, "step": 115590 }, { "epoch": 0.233519313824909, "grad_norm": 225.19039916992188, "learning_rate": 9.484749609055151e-06, "loss": 22.7842, "step": 115600 }, { "epoch": 0.2335395144575928, "grad_norm": 175.8430938720703, "learning_rate": 9.484595264891403e-06, "loss": 24.859, "step": 115610 }, { "epoch": 0.23355971509027662, "grad_norm": 0.0, "learning_rate": 9.484440898870282e-06, "loss": 20.3621, "step": 115620 }, { "epoch": 0.23357991572296044, "grad_norm": 62.73441696166992, "learning_rate": 9.484286510992541e-06, "loss": 18.718, "step": 115630 }, { "epoch": 0.23360011635564426, "grad_norm": 351.0538330078125, "learning_rate": 9.48413210125893e-06, "loss": 23.5974, "step": 115640 }, { "epoch": 0.23362031698832808, "grad_norm": 150.0969696044922, "learning_rate": 9.483977669670204e-06, "loss": 17.5927, "step": 115650 }, { "epoch": 0.2336405176210119, "grad_norm": 224.1090545654297, "learning_rate": 9.483823216227115e-06, "loss": 29.4322, "step": 115660 }, { "epoch": 0.2336607182536957, "grad_norm": 487.00048828125, "learning_rate": 9.483668740930413e-06, "loss": 24.1888, "step": 115670 }, { "epoch": 0.2336809188863795, "grad_norm": 81.06041717529297, "learning_rate": 9.483514243780856e-06, "loss": 16.0977, "step": 115680 }, { "epoch": 0.23370111951906333, "grad_norm": 165.6612091064453, "learning_rate": 9.483359724779194e-06, "loss": 35.3067, "step": 115690 }, { "epoch": 0.23372132015174715, "grad_norm": 256.6815185546875, "learning_rate": 9.48320518392618e-06, "loss": 29.1159, "step": 115700 }, { "epoch": 0.23374152078443097, "grad_norm": 595.8396606445312, "learning_rate": 9.483050621222567e-06, "loss": 20.4748, "step": 115710 }, { "epoch": 0.2337617214171148, "grad_norm": 431.47296142578125, "learning_rate": 9.482896036669111e-06, "loss": 49.6124, "step": 115720 }, { "epoch": 0.2337819220497986, "grad_norm": 68.43452453613281, "learning_rate": 9.482741430266563e-06, "loss": 23.3192, "step": 115730 }, { "epoch": 0.2338021226824824, "grad_norm": 363.4878845214844, "learning_rate": 9.482586802015673e-06, "loss": 18.7817, "step": 115740 }, { "epoch": 0.23382232331516623, "grad_norm": 498.78912353515625, "learning_rate": 9.482432151917204e-06, "loss": 26.1025, "step": 115750 }, { "epoch": 0.23384252394785005, "grad_norm": 328.8289794921875, "learning_rate": 9.482277479971902e-06, "loss": 18.5876, "step": 115760 }, { "epoch": 0.23386272458053387, "grad_norm": 1277.3660888671875, "learning_rate": 9.482122786180524e-06, "loss": 41.8383, "step": 115770 }, { "epoch": 0.23388292521321769, "grad_norm": 495.3123474121094, "learning_rate": 9.481968070543822e-06, "loss": 23.7981, "step": 115780 }, { "epoch": 0.2339031258459015, "grad_norm": 152.3654327392578, "learning_rate": 9.48181333306255e-06, "loss": 21.6763, "step": 115790 }, { "epoch": 0.2339233264785853, "grad_norm": 507.03741455078125, "learning_rate": 9.481658573737465e-06, "loss": 20.4101, "step": 115800 }, { "epoch": 0.23394352711126912, "grad_norm": 67.91798400878906, "learning_rate": 9.48150379256932e-06, "loss": 24.7752, "step": 115810 }, { "epoch": 0.23396372774395294, "grad_norm": 557.4493408203125, "learning_rate": 9.48134898955887e-06, "loss": 20.2928, "step": 115820 }, { "epoch": 0.23398392837663676, "grad_norm": 153.82925415039062, "learning_rate": 9.481194164706866e-06, "loss": 29.2407, "step": 115830 }, { "epoch": 0.23400412900932058, "grad_norm": 101.29877471923828, "learning_rate": 9.481039318014068e-06, "loss": 21.0803, "step": 115840 }, { "epoch": 0.2340243296420044, "grad_norm": 318.5856628417969, "learning_rate": 9.480884449481224e-06, "loss": 28.2023, "step": 115850 }, { "epoch": 0.2340445302746882, "grad_norm": 648.8766479492188, "learning_rate": 9.480729559109096e-06, "loss": 45.022, "step": 115860 }, { "epoch": 0.234064730907372, "grad_norm": 20.010025024414062, "learning_rate": 9.480574646898434e-06, "loss": 14.8475, "step": 115870 }, { "epoch": 0.23408493154005583, "grad_norm": 1451.508056640625, "learning_rate": 9.480419712849996e-06, "loss": 20.1901, "step": 115880 }, { "epoch": 0.23410513217273965, "grad_norm": 116.3333969116211, "learning_rate": 9.480264756964535e-06, "loss": 22.7051, "step": 115890 }, { "epoch": 0.23412533280542347, "grad_norm": 76.51915740966797, "learning_rate": 9.480109779242805e-06, "loss": 13.8775, "step": 115900 }, { "epoch": 0.2341455334381073, "grad_norm": 60.78822708129883, "learning_rate": 9.479954779685566e-06, "loss": 13.1547, "step": 115910 }, { "epoch": 0.2341657340707911, "grad_norm": 226.81393432617188, "learning_rate": 9.47979975829357e-06, "loss": 18.4371, "step": 115920 }, { "epoch": 0.2341859347034749, "grad_norm": 247.3161163330078, "learning_rate": 9.479644715067572e-06, "loss": 15.7734, "step": 115930 }, { "epoch": 0.23420613533615872, "grad_norm": 201.0279998779297, "learning_rate": 9.479489650008331e-06, "loss": 33.2515, "step": 115940 }, { "epoch": 0.23422633596884254, "grad_norm": 123.91373443603516, "learning_rate": 9.4793345631166e-06, "loss": 14.385, "step": 115950 }, { "epoch": 0.23424653660152636, "grad_norm": 192.431396484375, "learning_rate": 9.479179454393135e-06, "loss": 16.3499, "step": 115960 }, { "epoch": 0.23426673723421018, "grad_norm": 124.63282775878906, "learning_rate": 9.479024323838694e-06, "loss": 28.0819, "step": 115970 }, { "epoch": 0.234286937866894, "grad_norm": 103.68933868408203, "learning_rate": 9.478869171454031e-06, "loss": 10.7974, "step": 115980 }, { "epoch": 0.2343071384995778, "grad_norm": 295.1546325683594, "learning_rate": 9.478713997239902e-06, "loss": 35.138, "step": 115990 }, { "epoch": 0.23432733913226161, "grad_norm": 110.17132568359375, "learning_rate": 9.478558801197065e-06, "loss": 20.7887, "step": 116000 }, { "epoch": 0.23434753976494543, "grad_norm": 142.2524871826172, "learning_rate": 9.478403583326275e-06, "loss": 16.7661, "step": 116010 }, { "epoch": 0.23436774039762925, "grad_norm": 220.80076599121094, "learning_rate": 9.47824834362829e-06, "loss": 15.5966, "step": 116020 }, { "epoch": 0.23438794103031307, "grad_norm": 59.51459884643555, "learning_rate": 9.478093082103865e-06, "loss": 30.9357, "step": 116030 }, { "epoch": 0.2344081416629969, "grad_norm": 561.8474731445312, "learning_rate": 9.477937798753757e-06, "loss": 30.552, "step": 116040 }, { "epoch": 0.23442834229568071, "grad_norm": 324.6946105957031, "learning_rate": 9.477782493578724e-06, "loss": 13.6852, "step": 116050 }, { "epoch": 0.2344485429283645, "grad_norm": 262.02618408203125, "learning_rate": 9.477627166579523e-06, "loss": 21.8963, "step": 116060 }, { "epoch": 0.23446874356104833, "grad_norm": 194.013671875, "learning_rate": 9.47747181775691e-06, "loss": 39.6916, "step": 116070 }, { "epoch": 0.23448894419373215, "grad_norm": 628.759521484375, "learning_rate": 9.477316447111642e-06, "loss": 28.2137, "step": 116080 }, { "epoch": 0.23450914482641597, "grad_norm": 317.8814392089844, "learning_rate": 9.477161054644475e-06, "loss": 18.0995, "step": 116090 }, { "epoch": 0.2345293454590998, "grad_norm": 160.2928466796875, "learning_rate": 9.47700564035617e-06, "loss": 25.9985, "step": 116100 }, { "epoch": 0.2345495460917836, "grad_norm": 181.4545440673828, "learning_rate": 9.476850204247483e-06, "loss": 36.9144, "step": 116110 }, { "epoch": 0.2345697467244674, "grad_norm": 349.69781494140625, "learning_rate": 9.47669474631917e-06, "loss": 18.0826, "step": 116120 }, { "epoch": 0.23458994735715122, "grad_norm": 394.7893981933594, "learning_rate": 9.476539266571988e-06, "loss": 19.9794, "step": 116130 }, { "epoch": 0.23461014798983504, "grad_norm": 324.2250671386719, "learning_rate": 9.4763837650067e-06, "loss": 24.1869, "step": 116140 }, { "epoch": 0.23463034862251886, "grad_norm": 173.2347869873047, "learning_rate": 9.476228241624059e-06, "loss": 22.6035, "step": 116150 }, { "epoch": 0.23465054925520268, "grad_norm": 286.79046630859375, "learning_rate": 9.476072696424825e-06, "loss": 26.1696, "step": 116160 }, { "epoch": 0.2346707498878865, "grad_norm": 129.0603790283203, "learning_rate": 9.475917129409755e-06, "loss": 23.7962, "step": 116170 }, { "epoch": 0.2346909505205703, "grad_norm": 419.9548645019531, "learning_rate": 9.475761540579607e-06, "loss": 23.2985, "step": 116180 }, { "epoch": 0.2347111511532541, "grad_norm": 205.3668212890625, "learning_rate": 9.475605929935142e-06, "loss": 16.7907, "step": 116190 }, { "epoch": 0.23473135178593793, "grad_norm": 57.63746643066406, "learning_rate": 9.475450297477113e-06, "loss": 17.1005, "step": 116200 }, { "epoch": 0.23475155241862175, "grad_norm": 277.07806396484375, "learning_rate": 9.475294643206285e-06, "loss": 16.5662, "step": 116210 }, { "epoch": 0.23477175305130557, "grad_norm": 1.5893373489379883, "learning_rate": 9.475138967123414e-06, "loss": 12.0859, "step": 116220 }, { "epoch": 0.2347919536839894, "grad_norm": 166.39889526367188, "learning_rate": 9.474983269229256e-06, "loss": 14.0511, "step": 116230 }, { "epoch": 0.2348121543166732, "grad_norm": 172.66754150390625, "learning_rate": 9.474827549524574e-06, "loss": 15.6439, "step": 116240 }, { "epoch": 0.234832354949357, "grad_norm": 197.40219116210938, "learning_rate": 9.474671808010126e-06, "loss": 11.6597, "step": 116250 }, { "epoch": 0.23485255558204082, "grad_norm": 315.5557861328125, "learning_rate": 9.47451604468667e-06, "loss": 19.0722, "step": 116260 }, { "epoch": 0.23487275621472464, "grad_norm": 174.75418090820312, "learning_rate": 9.474360259554965e-06, "loss": 22.147, "step": 116270 }, { "epoch": 0.23489295684740846, "grad_norm": 103.79345703125, "learning_rate": 9.47420445261577e-06, "loss": 25.8521, "step": 116280 }, { "epoch": 0.23491315748009228, "grad_norm": 296.58953857421875, "learning_rate": 9.474048623869846e-06, "loss": 13.0831, "step": 116290 }, { "epoch": 0.2349333581127761, "grad_norm": 2281.25927734375, "learning_rate": 9.473892773317952e-06, "loss": 33.7196, "step": 116300 }, { "epoch": 0.2349535587454599, "grad_norm": 626.5997314453125, "learning_rate": 9.473736900960845e-06, "loss": 27.4872, "step": 116310 }, { "epoch": 0.23497375937814372, "grad_norm": 271.4847412109375, "learning_rate": 9.47358100679929e-06, "loss": 19.5322, "step": 116320 }, { "epoch": 0.23499396001082754, "grad_norm": 192.460205078125, "learning_rate": 9.473425090834041e-06, "loss": 26.1777, "step": 116330 }, { "epoch": 0.23501416064351136, "grad_norm": 258.49566650390625, "learning_rate": 9.473269153065863e-06, "loss": 32.0623, "step": 116340 }, { "epoch": 0.23503436127619518, "grad_norm": 164.61317443847656, "learning_rate": 9.473113193495513e-06, "loss": 24.7378, "step": 116350 }, { "epoch": 0.235054561908879, "grad_norm": 167.504150390625, "learning_rate": 9.472957212123751e-06, "loss": 9.2017, "step": 116360 }, { "epoch": 0.23507476254156282, "grad_norm": 268.6871032714844, "learning_rate": 9.472801208951339e-06, "loss": 17.6163, "step": 116370 }, { "epoch": 0.2350949631742466, "grad_norm": 385.9167175292969, "learning_rate": 9.472645183979037e-06, "loss": 28.0912, "step": 116380 }, { "epoch": 0.23511516380693043, "grad_norm": 481.64013671875, "learning_rate": 9.472489137207604e-06, "loss": 17.8221, "step": 116390 }, { "epoch": 0.23513536443961425, "grad_norm": 417.0739440917969, "learning_rate": 9.4723330686378e-06, "loss": 22.5587, "step": 116400 }, { "epoch": 0.23515556507229807, "grad_norm": 380.9157409667969, "learning_rate": 9.472176978270389e-06, "loss": 24.0513, "step": 116410 }, { "epoch": 0.2351757657049819, "grad_norm": 417.9544677734375, "learning_rate": 9.472020866106128e-06, "loss": 28.2791, "step": 116420 }, { "epoch": 0.2351959663376657, "grad_norm": 396.08917236328125, "learning_rate": 9.47186473214578e-06, "loss": 21.0597, "step": 116430 }, { "epoch": 0.2352161669703495, "grad_norm": 473.193359375, "learning_rate": 9.471708576390108e-06, "loss": 23.1658, "step": 116440 }, { "epoch": 0.23523636760303332, "grad_norm": 413.8469543457031, "learning_rate": 9.47155239883987e-06, "loss": 19.9192, "step": 116450 }, { "epoch": 0.23525656823571714, "grad_norm": 411.5003356933594, "learning_rate": 9.471396199495825e-06, "loss": 19.8897, "step": 116460 }, { "epoch": 0.23527676886840096, "grad_norm": 595.5771484375, "learning_rate": 9.471239978358741e-06, "loss": 26.3298, "step": 116470 }, { "epoch": 0.23529696950108478, "grad_norm": 292.48785400390625, "learning_rate": 9.471083735429374e-06, "loss": 40.6243, "step": 116480 }, { "epoch": 0.2353171701337686, "grad_norm": 209.79086303710938, "learning_rate": 9.470927470708486e-06, "loss": 56.8708, "step": 116490 }, { "epoch": 0.2353373707664524, "grad_norm": 352.2021789550781, "learning_rate": 9.470771184196842e-06, "loss": 43.5514, "step": 116500 }, { "epoch": 0.2353575713991362, "grad_norm": 325.6732482910156, "learning_rate": 9.4706148758952e-06, "loss": 17.2414, "step": 116510 }, { "epoch": 0.23537777203182003, "grad_norm": 204.5680694580078, "learning_rate": 9.470458545804325e-06, "loss": 33.4062, "step": 116520 }, { "epoch": 0.23539797266450385, "grad_norm": 326.3584289550781, "learning_rate": 9.470302193924975e-06, "loss": 18.4058, "step": 116530 }, { "epoch": 0.23541817329718767, "grad_norm": 292.36224365234375, "learning_rate": 9.470145820257915e-06, "loss": 27.9851, "step": 116540 }, { "epoch": 0.2354383739298715, "grad_norm": 315.6614685058594, "learning_rate": 9.469989424803907e-06, "loss": 12.8219, "step": 116550 }, { "epoch": 0.2354585745625553, "grad_norm": 193.84877014160156, "learning_rate": 9.469833007563712e-06, "loss": 15.6498, "step": 116560 }, { "epoch": 0.2354787751952391, "grad_norm": 88.86463928222656, "learning_rate": 9.469676568538094e-06, "loss": 30.0117, "step": 116570 }, { "epoch": 0.23549897582792292, "grad_norm": 201.30694580078125, "learning_rate": 9.469520107727815e-06, "loss": 22.2753, "step": 116580 }, { "epoch": 0.23551917646060674, "grad_norm": 327.0163879394531, "learning_rate": 9.469363625133634e-06, "loss": 16.5487, "step": 116590 }, { "epoch": 0.23553937709329056, "grad_norm": 185.83753967285156, "learning_rate": 9.46920712075632e-06, "loss": 31.962, "step": 116600 }, { "epoch": 0.23555957772597438, "grad_norm": 335.5920715332031, "learning_rate": 9.469050594596631e-06, "loss": 15.5151, "step": 116610 }, { "epoch": 0.2355797783586582, "grad_norm": 175.83265686035156, "learning_rate": 9.468894046655332e-06, "loss": 23.5292, "step": 116620 }, { "epoch": 0.235599978991342, "grad_norm": 252.6981658935547, "learning_rate": 9.468737476933186e-06, "loss": 19.0242, "step": 116630 }, { "epoch": 0.23562017962402582, "grad_norm": 300.33477783203125, "learning_rate": 9.468580885430953e-06, "loss": 20.014, "step": 116640 }, { "epoch": 0.23564038025670964, "grad_norm": 378.6827392578125, "learning_rate": 9.468424272149402e-06, "loss": 33.182, "step": 116650 }, { "epoch": 0.23566058088939346, "grad_norm": 0.0, "learning_rate": 9.46826763708929e-06, "loss": 17.9242, "step": 116660 }, { "epoch": 0.23568078152207728, "grad_norm": 291.97723388671875, "learning_rate": 9.468110980251386e-06, "loss": 25.2904, "step": 116670 }, { "epoch": 0.2357009821547611, "grad_norm": 339.9057922363281, "learning_rate": 9.467954301636451e-06, "loss": 23.5504, "step": 116680 }, { "epoch": 0.23572118278744492, "grad_norm": 131.88233947753906, "learning_rate": 9.467797601245246e-06, "loss": 21.179, "step": 116690 }, { "epoch": 0.2357413834201287, "grad_norm": 480.6405029296875, "learning_rate": 9.46764087907854e-06, "loss": 17.7476, "step": 116700 }, { "epoch": 0.23576158405281253, "grad_norm": 317.6056213378906, "learning_rate": 9.467484135137093e-06, "loss": 31.3481, "step": 116710 }, { "epoch": 0.23578178468549635, "grad_norm": 847.7156982421875, "learning_rate": 9.46732736942167e-06, "loss": 27.6137, "step": 116720 }, { "epoch": 0.23580198531818017, "grad_norm": 338.5979919433594, "learning_rate": 9.467170581933037e-06, "loss": 29.3574, "step": 116730 }, { "epoch": 0.235822185950864, "grad_norm": 158.7078399658203, "learning_rate": 9.467013772671953e-06, "loss": 14.0838, "step": 116740 }, { "epoch": 0.2358423865835478, "grad_norm": 277.1037292480469, "learning_rate": 9.46685694163919e-06, "loss": 19.5375, "step": 116750 }, { "epoch": 0.2358625872162316, "grad_norm": 360.1117858886719, "learning_rate": 9.466700088835505e-06, "loss": 21.8513, "step": 116760 }, { "epoch": 0.23588278784891542, "grad_norm": 0.0, "learning_rate": 9.466543214261666e-06, "loss": 19.1082, "step": 116770 }, { "epoch": 0.23590298848159924, "grad_norm": 168.9488525390625, "learning_rate": 9.466386317918436e-06, "loss": 23.645, "step": 116780 }, { "epoch": 0.23592318911428306, "grad_norm": 481.7696228027344, "learning_rate": 9.466229399806583e-06, "loss": 25.8601, "step": 116790 }, { "epoch": 0.23594338974696688, "grad_norm": 306.4312438964844, "learning_rate": 9.46607245992687e-06, "loss": 31.2524, "step": 116800 }, { "epoch": 0.2359635903796507, "grad_norm": 207.95323181152344, "learning_rate": 9.465915498280058e-06, "loss": 18.7351, "step": 116810 }, { "epoch": 0.2359837910123345, "grad_norm": 367.41192626953125, "learning_rate": 9.465758514866919e-06, "loss": 31.1703, "step": 116820 }, { "epoch": 0.2360039916450183, "grad_norm": 75.71501922607422, "learning_rate": 9.465601509688212e-06, "loss": 21.0953, "step": 116830 }, { "epoch": 0.23602419227770213, "grad_norm": 244.92633056640625, "learning_rate": 9.465444482744708e-06, "loss": 22.5941, "step": 116840 }, { "epoch": 0.23604439291038595, "grad_norm": 152.49436950683594, "learning_rate": 9.465287434037167e-06, "loss": 20.8551, "step": 116850 }, { "epoch": 0.23606459354306977, "grad_norm": 797.0574951171875, "learning_rate": 9.465130363566357e-06, "loss": 26.2382, "step": 116860 }, { "epoch": 0.2360847941757536, "grad_norm": 656.8142700195312, "learning_rate": 9.464973271333042e-06, "loss": 15.786, "step": 116870 }, { "epoch": 0.2361049948084374, "grad_norm": 308.963623046875, "learning_rate": 9.464816157337991e-06, "loss": 28.987, "step": 116880 }, { "epoch": 0.2361251954411212, "grad_norm": 363.33013916015625, "learning_rate": 9.464659021581966e-06, "loss": 20.5675, "step": 116890 }, { "epoch": 0.23614539607380503, "grad_norm": 184.370849609375, "learning_rate": 9.464501864065735e-06, "loss": 21.2541, "step": 116900 }, { "epoch": 0.23616559670648885, "grad_norm": 279.04638671875, "learning_rate": 9.464344684790063e-06, "loss": 24.7038, "step": 116910 }, { "epoch": 0.23618579733917267, "grad_norm": 360.7041320800781, "learning_rate": 9.464187483755718e-06, "loss": 30.3314, "step": 116920 }, { "epoch": 0.23620599797185649, "grad_norm": 243.63458251953125, "learning_rate": 9.464030260963463e-06, "loss": 18.6166, "step": 116930 }, { "epoch": 0.2362261986045403, "grad_norm": 369.84539794921875, "learning_rate": 9.463873016414066e-06, "loss": 20.8273, "step": 116940 }, { "epoch": 0.2362463992372241, "grad_norm": 187.4353485107422, "learning_rate": 9.463715750108293e-06, "loss": 17.9306, "step": 116950 }, { "epoch": 0.23626659986990792, "grad_norm": 32.80021667480469, "learning_rate": 9.463558462046912e-06, "loss": 10.5943, "step": 116960 }, { "epoch": 0.23628680050259174, "grad_norm": 0.0, "learning_rate": 9.463401152230688e-06, "loss": 11.2117, "step": 116970 }, { "epoch": 0.23630700113527556, "grad_norm": 274.64471435546875, "learning_rate": 9.463243820660389e-06, "loss": 21.4471, "step": 116980 }, { "epoch": 0.23632720176795938, "grad_norm": 48.2933235168457, "learning_rate": 9.463086467336779e-06, "loss": 22.1026, "step": 116990 }, { "epoch": 0.2363474024006432, "grad_norm": 188.52525329589844, "learning_rate": 9.46292909226063e-06, "loss": 21.0083, "step": 117000 }, { "epoch": 0.23636760303332702, "grad_norm": 193.25814819335938, "learning_rate": 9.462771695432702e-06, "loss": 14.1363, "step": 117010 }, { "epoch": 0.2363878036660108, "grad_norm": 244.10838317871094, "learning_rate": 9.462614276853767e-06, "loss": 31.5207, "step": 117020 }, { "epoch": 0.23640800429869463, "grad_norm": 137.51309204101562, "learning_rate": 9.462456836524593e-06, "loss": 22.6153, "step": 117030 }, { "epoch": 0.23642820493137845, "grad_norm": 64.69757843017578, "learning_rate": 9.462299374445944e-06, "loss": 19.6515, "step": 117040 }, { "epoch": 0.23644840556406227, "grad_norm": 330.66900634765625, "learning_rate": 9.46214189061859e-06, "loss": 33.7222, "step": 117050 }, { "epoch": 0.2364686061967461, "grad_norm": 134.41558837890625, "learning_rate": 9.461984385043297e-06, "loss": 21.0589, "step": 117060 }, { "epoch": 0.2364888068294299, "grad_norm": 398.85162353515625, "learning_rate": 9.461826857720835e-06, "loss": 34.7141, "step": 117070 }, { "epoch": 0.2365090074621137, "grad_norm": 866.6536865234375, "learning_rate": 9.461669308651968e-06, "loss": 32.193, "step": 117080 }, { "epoch": 0.23652920809479752, "grad_norm": 377.9045715332031, "learning_rate": 9.461511737837467e-06, "loss": 16.784, "step": 117090 }, { "epoch": 0.23654940872748134, "grad_norm": 367.3238830566406, "learning_rate": 9.461354145278098e-06, "loss": 21.5466, "step": 117100 }, { "epoch": 0.23656960936016516, "grad_norm": 213.2590789794922, "learning_rate": 9.46119653097463e-06, "loss": 19.6061, "step": 117110 }, { "epoch": 0.23658980999284898, "grad_norm": 0.0, "learning_rate": 9.461038894927833e-06, "loss": 25.6167, "step": 117120 }, { "epoch": 0.2366100106255328, "grad_norm": 314.18994140625, "learning_rate": 9.460881237138472e-06, "loss": 14.4783, "step": 117130 }, { "epoch": 0.2366302112582166, "grad_norm": 430.2128601074219, "learning_rate": 9.460723557607317e-06, "loss": 30.9133, "step": 117140 }, { "epoch": 0.23665041189090041, "grad_norm": 156.36801147460938, "learning_rate": 9.460565856335136e-06, "loss": 22.759, "step": 117150 }, { "epoch": 0.23667061252358423, "grad_norm": 198.0590057373047, "learning_rate": 9.460408133322698e-06, "loss": 36.564, "step": 117160 }, { "epoch": 0.23669081315626805, "grad_norm": 591.2015991210938, "learning_rate": 9.460250388570772e-06, "loss": 47.4942, "step": 117170 }, { "epoch": 0.23671101378895187, "grad_norm": 512.3015747070312, "learning_rate": 9.460092622080128e-06, "loss": 40.0301, "step": 117180 }, { "epoch": 0.2367312144216357, "grad_norm": 9.764747619628906, "learning_rate": 9.459934833851531e-06, "loss": 16.187, "step": 117190 }, { "epoch": 0.23675141505431951, "grad_norm": 199.7691192626953, "learning_rate": 9.459777023885754e-06, "loss": 33.236, "step": 117200 }, { "epoch": 0.2367716156870033, "grad_norm": 451.58013916015625, "learning_rate": 9.459619192183565e-06, "loss": 29.7396, "step": 117210 }, { "epoch": 0.23679181631968713, "grad_norm": 518.8576049804688, "learning_rate": 9.459461338745733e-06, "loss": 11.7279, "step": 117220 }, { "epoch": 0.23681201695237095, "grad_norm": 149.84983825683594, "learning_rate": 9.459303463573027e-06, "loss": 30.5872, "step": 117230 }, { "epoch": 0.23683221758505477, "grad_norm": 447.8902587890625, "learning_rate": 9.459145566666216e-06, "loss": 26.6792, "step": 117240 }, { "epoch": 0.2368524182177386, "grad_norm": 558.4925537109375, "learning_rate": 9.458987648026071e-06, "loss": 17.5827, "step": 117250 }, { "epoch": 0.2368726188504224, "grad_norm": 155.72071838378906, "learning_rate": 9.458829707653362e-06, "loss": 32.905, "step": 117260 }, { "epoch": 0.2368928194831062, "grad_norm": 220.75125122070312, "learning_rate": 9.458671745548855e-06, "loss": 16.1217, "step": 117270 }, { "epoch": 0.23691302011579002, "grad_norm": 376.62677001953125, "learning_rate": 9.458513761713324e-06, "loss": 20.5402, "step": 117280 }, { "epoch": 0.23693322074847384, "grad_norm": 593.46875, "learning_rate": 9.45835575614754e-06, "loss": 29.8588, "step": 117290 }, { "epoch": 0.23695342138115766, "grad_norm": 191.43443298339844, "learning_rate": 9.458197728852268e-06, "loss": 19.1071, "step": 117300 }, { "epoch": 0.23697362201384148, "grad_norm": 297.27484130859375, "learning_rate": 9.458039679828281e-06, "loss": 21.4789, "step": 117310 }, { "epoch": 0.2369938226465253, "grad_norm": 215.38121032714844, "learning_rate": 9.457881609076352e-06, "loss": 15.8397, "step": 117320 }, { "epoch": 0.23701402327920912, "grad_norm": 50.61270523071289, "learning_rate": 9.457723516597247e-06, "loss": 25.7818, "step": 117330 }, { "epoch": 0.2370342239118929, "grad_norm": 159.84866333007812, "learning_rate": 9.457565402391738e-06, "loss": 8.0159, "step": 117340 }, { "epoch": 0.23705442454457673, "grad_norm": 321.8773193359375, "learning_rate": 9.457407266460595e-06, "loss": 22.4598, "step": 117350 }, { "epoch": 0.23707462517726055, "grad_norm": 297.4977111816406, "learning_rate": 9.45724910880459e-06, "loss": 21.7997, "step": 117360 }, { "epoch": 0.23709482580994437, "grad_norm": 348.7813720703125, "learning_rate": 9.457090929424495e-06, "loss": 28.5469, "step": 117370 }, { "epoch": 0.2371150264426282, "grad_norm": 226.37010192871094, "learning_rate": 9.456932728321078e-06, "loss": 17.2521, "step": 117380 }, { "epoch": 0.237135227075312, "grad_norm": 350.39068603515625, "learning_rate": 9.456774505495112e-06, "loss": 11.5749, "step": 117390 }, { "epoch": 0.2371554277079958, "grad_norm": 408.2802734375, "learning_rate": 9.456616260947367e-06, "loss": 19.4748, "step": 117400 }, { "epoch": 0.23717562834067962, "grad_norm": 130.07420349121094, "learning_rate": 9.456457994678616e-06, "loss": 18.2044, "step": 117410 }, { "epoch": 0.23719582897336344, "grad_norm": 243.47438049316406, "learning_rate": 9.456299706689627e-06, "loss": 28.645, "step": 117420 }, { "epoch": 0.23721602960604726, "grad_norm": 50.50710678100586, "learning_rate": 9.456141396981176e-06, "loss": 21.0549, "step": 117430 }, { "epoch": 0.23723623023873108, "grad_norm": 194.81814575195312, "learning_rate": 9.455983065554032e-06, "loss": 26.0802, "step": 117440 }, { "epoch": 0.2372564308714149, "grad_norm": 414.76788330078125, "learning_rate": 9.455824712408967e-06, "loss": 30.4182, "step": 117450 }, { "epoch": 0.2372766315040987, "grad_norm": 726.7940063476562, "learning_rate": 9.455666337546751e-06, "loss": 39.4889, "step": 117460 }, { "epoch": 0.23729683213678252, "grad_norm": 372.9963684082031, "learning_rate": 9.45550794096816e-06, "loss": 18.5182, "step": 117470 }, { "epoch": 0.23731703276946634, "grad_norm": 647.8799438476562, "learning_rate": 9.455349522673962e-06, "loss": 40.8851, "step": 117480 }, { "epoch": 0.23733723340215016, "grad_norm": 290.5063781738281, "learning_rate": 9.455191082664931e-06, "loss": 18.0214, "step": 117490 }, { "epoch": 0.23735743403483398, "grad_norm": 247.71400451660156, "learning_rate": 9.45503262094184e-06, "loss": 28.55, "step": 117500 }, { "epoch": 0.2373776346675178, "grad_norm": 416.66278076171875, "learning_rate": 9.45487413750546e-06, "loss": 33.1901, "step": 117510 }, { "epoch": 0.23739783530020162, "grad_norm": 483.87030029296875, "learning_rate": 9.454715632356564e-06, "loss": 22.6464, "step": 117520 }, { "epoch": 0.2374180359328854, "grad_norm": 392.309326171875, "learning_rate": 9.454557105495922e-06, "loss": 20.4732, "step": 117530 }, { "epoch": 0.23743823656556923, "grad_norm": 322.05072021484375, "learning_rate": 9.454398556924312e-06, "loss": 23.8289, "step": 117540 }, { "epoch": 0.23745843719825305, "grad_norm": 587.4776000976562, "learning_rate": 9.454239986642503e-06, "loss": 19.7423, "step": 117550 }, { "epoch": 0.23747863783093687, "grad_norm": 227.86947631835938, "learning_rate": 9.454081394651267e-06, "loss": 14.4032, "step": 117560 }, { "epoch": 0.2374988384636207, "grad_norm": 339.7706604003906, "learning_rate": 9.453922780951382e-06, "loss": 19.9441, "step": 117570 }, { "epoch": 0.2375190390963045, "grad_norm": 44.9554557800293, "learning_rate": 9.453764145543614e-06, "loss": 24.1403, "step": 117580 }, { "epoch": 0.2375392397289883, "grad_norm": 311.1480712890625, "learning_rate": 9.453605488428741e-06, "loss": 15.1596, "step": 117590 }, { "epoch": 0.23755944036167212, "grad_norm": 337.8089904785156, "learning_rate": 9.453446809607534e-06, "loss": 31.2768, "step": 117600 }, { "epoch": 0.23757964099435594, "grad_norm": 556.7191162109375, "learning_rate": 9.453288109080768e-06, "loss": 26.3424, "step": 117610 }, { "epoch": 0.23759984162703976, "grad_norm": 224.0070037841797, "learning_rate": 9.453129386849216e-06, "loss": 29.3746, "step": 117620 }, { "epoch": 0.23762004225972358, "grad_norm": 363.7107849121094, "learning_rate": 9.452970642913652e-06, "loss": 14.2529, "step": 117630 }, { "epoch": 0.2376402428924074, "grad_norm": 288.22247314453125, "learning_rate": 9.452811877274848e-06, "loss": 16.8513, "step": 117640 }, { "epoch": 0.23766044352509122, "grad_norm": 284.9219665527344, "learning_rate": 9.45265308993358e-06, "loss": 11.8292, "step": 117650 }, { "epoch": 0.237680644157775, "grad_norm": 361.7940368652344, "learning_rate": 9.452494280890621e-06, "loss": 33.7807, "step": 117660 }, { "epoch": 0.23770084479045883, "grad_norm": 416.88372802734375, "learning_rate": 9.452335450146744e-06, "loss": 24.6006, "step": 117670 }, { "epoch": 0.23772104542314265, "grad_norm": 363.78521728515625, "learning_rate": 9.452176597702724e-06, "loss": 27.2187, "step": 117680 }, { "epoch": 0.23774124605582647, "grad_norm": 325.37945556640625, "learning_rate": 9.452017723559337e-06, "loss": 22.3384, "step": 117690 }, { "epoch": 0.2377614466885103, "grad_norm": 7.764430522918701, "learning_rate": 9.451858827717354e-06, "loss": 13.0589, "step": 117700 }, { "epoch": 0.2377816473211941, "grad_norm": 40.181312561035156, "learning_rate": 9.451699910177551e-06, "loss": 23.1554, "step": 117710 }, { "epoch": 0.2378018479538779, "grad_norm": 255.05734252929688, "learning_rate": 9.451540970940703e-06, "loss": 14.2716, "step": 117720 }, { "epoch": 0.23782204858656172, "grad_norm": 32.237266540527344, "learning_rate": 9.451382010007584e-06, "loss": 26.1946, "step": 117730 }, { "epoch": 0.23784224921924554, "grad_norm": 277.17413330078125, "learning_rate": 9.45122302737897e-06, "loss": 21.38, "step": 117740 }, { "epoch": 0.23786244985192936, "grad_norm": 331.87176513671875, "learning_rate": 9.451064023055634e-06, "loss": 18.2291, "step": 117750 }, { "epoch": 0.23788265048461318, "grad_norm": 168.07290649414062, "learning_rate": 9.450904997038351e-06, "loss": 22.9285, "step": 117760 }, { "epoch": 0.237902851117297, "grad_norm": 300.5836486816406, "learning_rate": 9.450745949327897e-06, "loss": 13.2962, "step": 117770 }, { "epoch": 0.2379230517499808, "grad_norm": 346.7259826660156, "learning_rate": 9.450586879925048e-06, "loss": 17.5449, "step": 117780 }, { "epoch": 0.23794325238266462, "grad_norm": 347.44921875, "learning_rate": 9.450427788830578e-06, "loss": 20.6787, "step": 117790 }, { "epoch": 0.23796345301534844, "grad_norm": 302.5928955078125, "learning_rate": 9.450268676045261e-06, "loss": 23.8739, "step": 117800 }, { "epoch": 0.23798365364803226, "grad_norm": 352.51922607421875, "learning_rate": 9.450109541569879e-06, "loss": 23.889, "step": 117810 }, { "epoch": 0.23800385428071608, "grad_norm": 734.048095703125, "learning_rate": 9.4499503854052e-06, "loss": 23.9263, "step": 117820 }, { "epoch": 0.2380240549133999, "grad_norm": 406.5575256347656, "learning_rate": 9.449791207552001e-06, "loss": 14.6824, "step": 117830 }, { "epoch": 0.23804425554608372, "grad_norm": 293.6494140625, "learning_rate": 9.44963200801106e-06, "loss": 13.7375, "step": 117840 }, { "epoch": 0.2380644561787675, "grad_norm": 450.2076721191406, "learning_rate": 9.449472786783153e-06, "loss": 20.1875, "step": 117850 }, { "epoch": 0.23808465681145133, "grad_norm": 343.6477355957031, "learning_rate": 9.449313543869056e-06, "loss": 27.0467, "step": 117860 }, { "epoch": 0.23810485744413515, "grad_norm": 373.0891418457031, "learning_rate": 9.449154279269543e-06, "loss": 24.3926, "step": 117870 }, { "epoch": 0.23812505807681897, "grad_norm": 349.4042663574219, "learning_rate": 9.448994992985393e-06, "loss": 17.1544, "step": 117880 }, { "epoch": 0.2381452587095028, "grad_norm": 427.3686828613281, "learning_rate": 9.44883568501738e-06, "loss": 13.7653, "step": 117890 }, { "epoch": 0.2381654593421866, "grad_norm": 203.18197631835938, "learning_rate": 9.448676355366282e-06, "loss": 26.6483, "step": 117900 }, { "epoch": 0.2381856599748704, "grad_norm": 417.5151062011719, "learning_rate": 9.448517004032876e-06, "loss": 16.2833, "step": 117910 }, { "epoch": 0.23820586060755422, "grad_norm": 391.86376953125, "learning_rate": 9.448357631017934e-06, "loss": 19.0544, "step": 117920 }, { "epoch": 0.23822606124023804, "grad_norm": 284.106201171875, "learning_rate": 9.44819823632224e-06, "loss": 21.6818, "step": 117930 }, { "epoch": 0.23824626187292186, "grad_norm": 148.77249145507812, "learning_rate": 9.448038819946566e-06, "loss": 14.9604, "step": 117940 }, { "epoch": 0.23826646250560568, "grad_norm": 554.3720703125, "learning_rate": 9.447879381891691e-06, "loss": 35.8304, "step": 117950 }, { "epoch": 0.2382866631382895, "grad_norm": 227.2013702392578, "learning_rate": 9.447719922158391e-06, "loss": 14.9789, "step": 117960 }, { "epoch": 0.23830686377097332, "grad_norm": 561.2802124023438, "learning_rate": 9.447560440747443e-06, "loss": 31.1686, "step": 117970 }, { "epoch": 0.2383270644036571, "grad_norm": 306.873779296875, "learning_rate": 9.447400937659625e-06, "loss": 20.7896, "step": 117980 }, { "epoch": 0.23834726503634093, "grad_norm": 169.5961151123047, "learning_rate": 9.447241412895714e-06, "loss": 23.5197, "step": 117990 }, { "epoch": 0.23836746566902475, "grad_norm": 614.7345581054688, "learning_rate": 9.44708186645649e-06, "loss": 35.156, "step": 118000 }, { "epoch": 0.23838766630170857, "grad_norm": 107.5583724975586, "learning_rate": 9.446922298342725e-06, "loss": 28.9668, "step": 118010 }, { "epoch": 0.2384078669343924, "grad_norm": 199.6429443359375, "learning_rate": 9.446762708555202e-06, "loss": 20.2679, "step": 118020 }, { "epoch": 0.2384280675670762, "grad_norm": 395.7974548339844, "learning_rate": 9.446603097094696e-06, "loss": 27.6805, "step": 118030 }, { "epoch": 0.23844826819976, "grad_norm": 222.95150756835938, "learning_rate": 9.446443463961986e-06, "loss": 10.8955, "step": 118040 }, { "epoch": 0.23846846883244383, "grad_norm": 176.54493713378906, "learning_rate": 9.44628380915785e-06, "loss": 8.8042, "step": 118050 }, { "epoch": 0.23848866946512765, "grad_norm": 812.7444458007812, "learning_rate": 9.446124132683066e-06, "loss": 26.1784, "step": 118060 }, { "epoch": 0.23850887009781147, "grad_norm": 362.6645202636719, "learning_rate": 9.445964434538412e-06, "loss": 23.1598, "step": 118070 }, { "epoch": 0.23852907073049529, "grad_norm": 281.22711181640625, "learning_rate": 9.445804714724667e-06, "loss": 27.3094, "step": 118080 }, { "epoch": 0.2385492713631791, "grad_norm": 6.805578708648682, "learning_rate": 9.44564497324261e-06, "loss": 29.9969, "step": 118090 }, { "epoch": 0.2385694719958629, "grad_norm": 600.4402465820312, "learning_rate": 9.445485210093018e-06, "loss": 19.3735, "step": 118100 }, { "epoch": 0.23858967262854672, "grad_norm": 187.56173706054688, "learning_rate": 9.445325425276668e-06, "loss": 13.3554, "step": 118110 }, { "epoch": 0.23860987326123054, "grad_norm": 456.8238830566406, "learning_rate": 9.445165618794343e-06, "loss": 21.8485, "step": 118120 }, { "epoch": 0.23863007389391436, "grad_norm": 239.3457794189453, "learning_rate": 9.44500579064682e-06, "loss": 17.2076, "step": 118130 }, { "epoch": 0.23865027452659818, "grad_norm": 80.32792663574219, "learning_rate": 9.444845940834876e-06, "loss": 17.7516, "step": 118140 }, { "epoch": 0.238670475159282, "grad_norm": 182.7498016357422, "learning_rate": 9.444686069359294e-06, "loss": 28.7072, "step": 118150 }, { "epoch": 0.23869067579196582, "grad_norm": 357.3796691894531, "learning_rate": 9.444526176220851e-06, "loss": 33.7402, "step": 118160 }, { "epoch": 0.2387108764246496, "grad_norm": 121.15896606445312, "learning_rate": 9.444366261420328e-06, "loss": 12.6098, "step": 118170 }, { "epoch": 0.23873107705733343, "grad_norm": 20.90473747253418, "learning_rate": 9.4442063249585e-06, "loss": 28.1215, "step": 118180 }, { "epoch": 0.23875127769001725, "grad_norm": 176.94920349121094, "learning_rate": 9.44404636683615e-06, "loss": 27.1577, "step": 118190 }, { "epoch": 0.23877147832270107, "grad_norm": 421.61492919921875, "learning_rate": 9.443886387054058e-06, "loss": 33.1294, "step": 118200 }, { "epoch": 0.2387916789553849, "grad_norm": 407.5395812988281, "learning_rate": 9.443726385613003e-06, "loss": 14.4095, "step": 118210 }, { "epoch": 0.2388118795880687, "grad_norm": 398.8727722167969, "learning_rate": 9.443566362513763e-06, "loss": 22.9406, "step": 118220 }, { "epoch": 0.2388320802207525, "grad_norm": 1023.3953857421875, "learning_rate": 9.44340631775712e-06, "loss": 46.0572, "step": 118230 }, { "epoch": 0.23885228085343632, "grad_norm": 21.568626403808594, "learning_rate": 9.443246251343855e-06, "loss": 12.39, "step": 118240 }, { "epoch": 0.23887248148612014, "grad_norm": 709.4979248046875, "learning_rate": 9.443086163274745e-06, "loss": 40.496, "step": 118250 }, { "epoch": 0.23889268211880396, "grad_norm": 202.491943359375, "learning_rate": 9.442926053550572e-06, "loss": 18.7838, "step": 118260 }, { "epoch": 0.23891288275148778, "grad_norm": 972.7313232421875, "learning_rate": 9.442765922172117e-06, "loss": 24.6157, "step": 118270 }, { "epoch": 0.2389330833841716, "grad_norm": 153.022216796875, "learning_rate": 9.442605769140159e-06, "loss": 17.5482, "step": 118280 }, { "epoch": 0.23895328401685542, "grad_norm": 193.31625366210938, "learning_rate": 9.44244559445548e-06, "loss": 17.9818, "step": 118290 }, { "epoch": 0.23897348464953921, "grad_norm": 267.36505126953125, "learning_rate": 9.44228539811886e-06, "loss": 32.4069, "step": 118300 }, { "epoch": 0.23899368528222303, "grad_norm": 311.41204833984375, "learning_rate": 9.44212518013108e-06, "loss": 21.6396, "step": 118310 }, { "epoch": 0.23901388591490685, "grad_norm": 143.06936645507812, "learning_rate": 9.44196494049292e-06, "loss": 24.7254, "step": 118320 }, { "epoch": 0.23903408654759067, "grad_norm": 306.09222412109375, "learning_rate": 9.44180467920516e-06, "loss": 13.3936, "step": 118330 }, { "epoch": 0.2390542871802745, "grad_norm": 26.182607650756836, "learning_rate": 9.441644396268586e-06, "loss": 17.833, "step": 118340 }, { "epoch": 0.23907448781295831, "grad_norm": 38.274784088134766, "learning_rate": 9.441484091683975e-06, "loss": 13.707, "step": 118350 }, { "epoch": 0.2390946884456421, "grad_norm": 293.3565368652344, "learning_rate": 9.441323765452107e-06, "loss": 11.788, "step": 118360 }, { "epoch": 0.23911488907832593, "grad_norm": 369.33099365234375, "learning_rate": 9.441163417573768e-06, "loss": 21.7304, "step": 118370 }, { "epoch": 0.23913508971100975, "grad_norm": 506.4439697265625, "learning_rate": 9.441003048049734e-06, "loss": 21.7197, "step": 118380 }, { "epoch": 0.23915529034369357, "grad_norm": 333.64599609375, "learning_rate": 9.440842656880792e-06, "loss": 22.1197, "step": 118390 }, { "epoch": 0.2391754909763774, "grad_norm": 198.65504455566406, "learning_rate": 9.440682244067724e-06, "loss": 13.9232, "step": 118400 }, { "epoch": 0.2391956916090612, "grad_norm": 154.79962158203125, "learning_rate": 9.440521809611307e-06, "loss": 17.7654, "step": 118410 }, { "epoch": 0.239215892241745, "grad_norm": 0.0, "learning_rate": 9.440361353512325e-06, "loss": 13.7585, "step": 118420 }, { "epoch": 0.23923609287442882, "grad_norm": 501.8160095214844, "learning_rate": 9.44020087577156e-06, "loss": 20.8894, "step": 118430 }, { "epoch": 0.23925629350711264, "grad_norm": 62.49715042114258, "learning_rate": 9.440040376389795e-06, "loss": 19.2416, "step": 118440 }, { "epoch": 0.23927649413979646, "grad_norm": 292.3954162597656, "learning_rate": 9.439879855367813e-06, "loss": 20.491, "step": 118450 }, { "epoch": 0.23929669477248028, "grad_norm": 928.7267456054688, "learning_rate": 9.439719312706393e-06, "loss": 16.8924, "step": 118460 }, { "epoch": 0.2393168954051641, "grad_norm": 343.5155029296875, "learning_rate": 9.43955874840632e-06, "loss": 30.8803, "step": 118470 }, { "epoch": 0.23933709603784792, "grad_norm": 517.1893310546875, "learning_rate": 9.439398162468376e-06, "loss": 24.5752, "step": 118480 }, { "epoch": 0.2393572966705317, "grad_norm": 54.659637451171875, "learning_rate": 9.439237554893344e-06, "loss": 23.0367, "step": 118490 }, { "epoch": 0.23937749730321553, "grad_norm": 210.98716735839844, "learning_rate": 9.439076925682006e-06, "loss": 28.4695, "step": 118500 }, { "epoch": 0.23939769793589935, "grad_norm": 196.0767822265625, "learning_rate": 9.438916274835148e-06, "loss": 37.2937, "step": 118510 }, { "epoch": 0.23941789856858317, "grad_norm": 207.98785400390625, "learning_rate": 9.438755602353549e-06, "loss": 11.4519, "step": 118520 }, { "epoch": 0.239438099201267, "grad_norm": 339.5860595703125, "learning_rate": 9.438594908237993e-06, "loss": 15.5868, "step": 118530 }, { "epoch": 0.2394582998339508, "grad_norm": 401.66278076171875, "learning_rate": 9.438434192489263e-06, "loss": 18.5073, "step": 118540 }, { "epoch": 0.2394785004666346, "grad_norm": 504.3293762207031, "learning_rate": 9.438273455108145e-06, "loss": 21.1141, "step": 118550 }, { "epoch": 0.23949870109931842, "grad_norm": 285.8705139160156, "learning_rate": 9.43811269609542e-06, "loss": 16.2533, "step": 118560 }, { "epoch": 0.23951890173200224, "grad_norm": 395.63616943359375, "learning_rate": 9.43795191545187e-06, "loss": 14.6873, "step": 118570 }, { "epoch": 0.23953910236468606, "grad_norm": 588.4963989257812, "learning_rate": 9.437791113178283e-06, "loss": 15.8899, "step": 118580 }, { "epoch": 0.23955930299736988, "grad_norm": 360.1812744140625, "learning_rate": 9.43763028927544e-06, "loss": 19.0557, "step": 118590 }, { "epoch": 0.2395795036300537, "grad_norm": 690.2476196289062, "learning_rate": 9.437469443744124e-06, "loss": 19.2488, "step": 118600 }, { "epoch": 0.2395997042627375, "grad_norm": 245.7722930908203, "learning_rate": 9.437308576585121e-06, "loss": 24.3936, "step": 118610 }, { "epoch": 0.23961990489542132, "grad_norm": 96.1200180053711, "learning_rate": 9.437147687799213e-06, "loss": 24.8914, "step": 118620 }, { "epoch": 0.23964010552810514, "grad_norm": 366.4589538574219, "learning_rate": 9.436986777387187e-06, "loss": 13.0576, "step": 118630 }, { "epoch": 0.23966030616078896, "grad_norm": 415.3909606933594, "learning_rate": 9.436825845349826e-06, "loss": 29.5765, "step": 118640 }, { "epoch": 0.23968050679347278, "grad_norm": 514.5730590820312, "learning_rate": 9.436664891687911e-06, "loss": 28.89, "step": 118650 }, { "epoch": 0.2397007074261566, "grad_norm": 68.4682388305664, "learning_rate": 9.436503916402234e-06, "loss": 26.0349, "step": 118660 }, { "epoch": 0.23972090805884042, "grad_norm": 514.3900146484375, "learning_rate": 9.436342919493571e-06, "loss": 34.0345, "step": 118670 }, { "epoch": 0.2397411086915242, "grad_norm": 320.9086608886719, "learning_rate": 9.436181900962713e-06, "loss": 28.5125, "step": 118680 }, { "epoch": 0.23976130932420803, "grad_norm": 252.7588653564453, "learning_rate": 9.43602086081044e-06, "loss": 14.7082, "step": 118690 }, { "epoch": 0.23978150995689185, "grad_norm": 235.26602172851562, "learning_rate": 9.435859799037541e-06, "loss": 15.1221, "step": 118700 }, { "epoch": 0.23980171058957567, "grad_norm": 678.27783203125, "learning_rate": 9.4356987156448e-06, "loss": 20.1973, "step": 118710 }, { "epoch": 0.2398219112222595, "grad_norm": 262.7927551269531, "learning_rate": 9.435537610633002e-06, "loss": 22.7828, "step": 118720 }, { "epoch": 0.2398421118549433, "grad_norm": 256.73931884765625, "learning_rate": 9.435376484002927e-06, "loss": 23.4175, "step": 118730 }, { "epoch": 0.2398623124876271, "grad_norm": 941.4635620117188, "learning_rate": 9.43521533575537e-06, "loss": 37.351, "step": 118740 }, { "epoch": 0.23988251312031092, "grad_norm": 213.60459899902344, "learning_rate": 9.43505416589111e-06, "loss": 14.1768, "step": 118750 }, { "epoch": 0.23990271375299474, "grad_norm": 457.46624755859375, "learning_rate": 9.434892974410932e-06, "loss": 27.8173, "step": 118760 }, { "epoch": 0.23992291438567856, "grad_norm": 201.59657287597656, "learning_rate": 9.434731761315625e-06, "loss": 25.6821, "step": 118770 }, { "epoch": 0.23994311501836238, "grad_norm": 265.4921569824219, "learning_rate": 9.434570526605974e-06, "loss": 25.5342, "step": 118780 }, { "epoch": 0.2399633156510462, "grad_norm": 434.61944580078125, "learning_rate": 9.434409270282762e-06, "loss": 26.8165, "step": 118790 }, { "epoch": 0.23998351628373002, "grad_norm": 356.3874206542969, "learning_rate": 9.43424799234678e-06, "loss": 26.6591, "step": 118800 }, { "epoch": 0.2400037169164138, "grad_norm": 456.2731018066406, "learning_rate": 9.43408669279881e-06, "loss": 12.1767, "step": 118810 }, { "epoch": 0.24002391754909763, "grad_norm": 96.01459503173828, "learning_rate": 9.433925371639639e-06, "loss": 19.0958, "step": 118820 }, { "epoch": 0.24004411818178145, "grad_norm": 253.6698455810547, "learning_rate": 9.433764028870053e-06, "loss": 25.6554, "step": 118830 }, { "epoch": 0.24006431881446527, "grad_norm": 257.3154296875, "learning_rate": 9.433602664490838e-06, "loss": 11.6508, "step": 118840 }, { "epoch": 0.2400845194471491, "grad_norm": 183.56983947753906, "learning_rate": 9.433441278502784e-06, "loss": 17.0794, "step": 118850 }, { "epoch": 0.2401047200798329, "grad_norm": 391.90142822265625, "learning_rate": 9.433279870906673e-06, "loss": 22.43, "step": 118860 }, { "epoch": 0.2401249207125167, "grad_norm": 426.3934631347656, "learning_rate": 9.433118441703293e-06, "loss": 26.7638, "step": 118870 }, { "epoch": 0.24014512134520052, "grad_norm": 328.7181701660156, "learning_rate": 9.432956990893434e-06, "loss": 22.9283, "step": 118880 }, { "epoch": 0.24016532197788434, "grad_norm": 304.7919006347656, "learning_rate": 9.432795518477878e-06, "loss": 14.4975, "step": 118890 }, { "epoch": 0.24018552261056816, "grad_norm": 332.95086669921875, "learning_rate": 9.432634024457414e-06, "loss": 14.9595, "step": 118900 }, { "epoch": 0.24020572324325198, "grad_norm": 171.62965393066406, "learning_rate": 9.43247250883283e-06, "loss": 18.7712, "step": 118910 }, { "epoch": 0.2402259238759358, "grad_norm": 170.53123474121094, "learning_rate": 9.432310971604914e-06, "loss": 19.4807, "step": 118920 }, { "epoch": 0.2402461245086196, "grad_norm": 258.8545837402344, "learning_rate": 9.432149412774452e-06, "loss": 21.5037, "step": 118930 }, { "epoch": 0.24026632514130342, "grad_norm": 8.17953109741211, "learning_rate": 9.431987832342228e-06, "loss": 7.1345, "step": 118940 }, { "epoch": 0.24028652577398724, "grad_norm": 232.8226776123047, "learning_rate": 9.431826230309035e-06, "loss": 20.0599, "step": 118950 }, { "epoch": 0.24030672640667106, "grad_norm": 156.0710906982422, "learning_rate": 9.431664606675659e-06, "loss": 28.7288, "step": 118960 }, { "epoch": 0.24032692703935488, "grad_norm": 330.46087646484375, "learning_rate": 9.431502961442887e-06, "loss": 19.2782, "step": 118970 }, { "epoch": 0.2403471276720387, "grad_norm": 334.182861328125, "learning_rate": 9.431341294611506e-06, "loss": 24.2004, "step": 118980 }, { "epoch": 0.24036732830472252, "grad_norm": 264.09185791015625, "learning_rate": 9.431179606182306e-06, "loss": 33.9679, "step": 118990 }, { "epoch": 0.2403875289374063, "grad_norm": 224.2251434326172, "learning_rate": 9.431017896156074e-06, "loss": 20.2358, "step": 119000 }, { "epoch": 0.24040772957009013, "grad_norm": 338.1694641113281, "learning_rate": 9.430856164533598e-06, "loss": 30.619, "step": 119010 }, { "epoch": 0.24042793020277395, "grad_norm": 129.0458221435547, "learning_rate": 9.430694411315667e-06, "loss": 22.8661, "step": 119020 }, { "epoch": 0.24044813083545777, "grad_norm": 36.140262603759766, "learning_rate": 9.430532636503067e-06, "loss": 12.8355, "step": 119030 }, { "epoch": 0.2404683314681416, "grad_norm": 233.96710205078125, "learning_rate": 9.43037084009659e-06, "loss": 15.9879, "step": 119040 }, { "epoch": 0.2404885321008254, "grad_norm": 175.3660125732422, "learning_rate": 9.430209022097024e-06, "loss": 24.3113, "step": 119050 }, { "epoch": 0.2405087327335092, "grad_norm": 154.4591064453125, "learning_rate": 9.430047182505152e-06, "loss": 18.9509, "step": 119060 }, { "epoch": 0.24052893336619302, "grad_norm": 267.5834045410156, "learning_rate": 9.429885321321772e-06, "loss": 23.5716, "step": 119070 }, { "epoch": 0.24054913399887684, "grad_norm": 288.2119445800781, "learning_rate": 9.429723438547666e-06, "loss": 13.0416, "step": 119080 }, { "epoch": 0.24056933463156066, "grad_norm": 508.619384765625, "learning_rate": 9.429561534183627e-06, "loss": 21.063, "step": 119090 }, { "epoch": 0.24058953526424448, "grad_norm": 350.1383056640625, "learning_rate": 9.429399608230441e-06, "loss": 29.3523, "step": 119100 }, { "epoch": 0.2406097358969283, "grad_norm": 226.75405883789062, "learning_rate": 9.429237660688896e-06, "loss": 16.0684, "step": 119110 }, { "epoch": 0.24062993652961212, "grad_norm": 236.6155548095703, "learning_rate": 9.429075691559788e-06, "loss": 14.9039, "step": 119120 }, { "epoch": 0.2406501371622959, "grad_norm": 307.2601013183594, "learning_rate": 9.4289137008439e-06, "loss": 27.6225, "step": 119130 }, { "epoch": 0.24067033779497973, "grad_norm": 336.33343505859375, "learning_rate": 9.428751688542025e-06, "loss": 17.3004, "step": 119140 }, { "epoch": 0.24069053842766355, "grad_norm": 135.11190795898438, "learning_rate": 9.428589654654951e-06, "loss": 24.0894, "step": 119150 }, { "epoch": 0.24071073906034737, "grad_norm": 19.425426483154297, "learning_rate": 9.428427599183467e-06, "loss": 9.8759, "step": 119160 }, { "epoch": 0.2407309396930312, "grad_norm": 214.51687622070312, "learning_rate": 9.428265522128366e-06, "loss": 14.6841, "step": 119170 }, { "epoch": 0.240751140325715, "grad_norm": 233.45635986328125, "learning_rate": 9.428103423490434e-06, "loss": 23.8779, "step": 119180 }, { "epoch": 0.2407713409583988, "grad_norm": 2.926909923553467, "learning_rate": 9.427941303270464e-06, "loss": 11.8927, "step": 119190 }, { "epoch": 0.24079154159108263, "grad_norm": 213.74078369140625, "learning_rate": 9.427779161469246e-06, "loss": 25.2592, "step": 119200 }, { "epoch": 0.24081174222376645, "grad_norm": 106.98066711425781, "learning_rate": 9.427616998087568e-06, "loss": 28.0096, "step": 119210 }, { "epoch": 0.24083194285645027, "grad_norm": 59.56482696533203, "learning_rate": 9.427454813126222e-06, "loss": 29.2036, "step": 119220 }, { "epoch": 0.24085214348913409, "grad_norm": 55.645904541015625, "learning_rate": 9.427292606585998e-06, "loss": 15.4215, "step": 119230 }, { "epoch": 0.2408723441218179, "grad_norm": 201.77101135253906, "learning_rate": 9.427130378467689e-06, "loss": 29.8479, "step": 119240 }, { "epoch": 0.2408925447545017, "grad_norm": 408.36370849609375, "learning_rate": 9.42696812877208e-06, "loss": 21.8428, "step": 119250 }, { "epoch": 0.24091274538718552, "grad_norm": 143.8035888671875, "learning_rate": 9.426805857499968e-06, "loss": 16.9094, "step": 119260 }, { "epoch": 0.24093294601986934, "grad_norm": 0.0, "learning_rate": 9.426643564652139e-06, "loss": 18.7899, "step": 119270 }, { "epoch": 0.24095314665255316, "grad_norm": 507.158203125, "learning_rate": 9.426481250229387e-06, "loss": 16.8405, "step": 119280 }, { "epoch": 0.24097334728523698, "grad_norm": 77.88159942626953, "learning_rate": 9.426318914232503e-06, "loss": 19.7075, "step": 119290 }, { "epoch": 0.2409935479179208, "grad_norm": 137.0773468017578, "learning_rate": 9.426156556662276e-06, "loss": 32.8942, "step": 119300 }, { "epoch": 0.24101374855060462, "grad_norm": 382.8200378417969, "learning_rate": 9.425994177519501e-06, "loss": 25.3018, "step": 119310 }, { "epoch": 0.2410339491832884, "grad_norm": 175.3247528076172, "learning_rate": 9.425831776804966e-06, "loss": 18.232, "step": 119320 }, { "epoch": 0.24105414981597223, "grad_norm": 39.69611740112305, "learning_rate": 9.425669354519464e-06, "loss": 9.4747, "step": 119330 }, { "epoch": 0.24107435044865605, "grad_norm": 260.4827575683594, "learning_rate": 9.425506910663785e-06, "loss": 25.6596, "step": 119340 }, { "epoch": 0.24109455108133987, "grad_norm": 201.00762939453125, "learning_rate": 9.425344445238723e-06, "loss": 11.4864, "step": 119350 }, { "epoch": 0.2411147517140237, "grad_norm": 274.724365234375, "learning_rate": 9.425181958245069e-06, "loss": 23.224, "step": 119360 }, { "epoch": 0.2411349523467075, "grad_norm": 271.5369567871094, "learning_rate": 9.425019449683614e-06, "loss": 9.4876, "step": 119370 }, { "epoch": 0.2411551529793913, "grad_norm": 29.145477294921875, "learning_rate": 9.424856919555152e-06, "loss": 17.7834, "step": 119380 }, { "epoch": 0.24117535361207512, "grad_norm": 224.85635375976562, "learning_rate": 9.424694367860475e-06, "loss": 16.8953, "step": 119390 }, { "epoch": 0.24119555424475894, "grad_norm": 381.78167724609375, "learning_rate": 9.424531794600372e-06, "loss": 14.2327, "step": 119400 }, { "epoch": 0.24121575487744276, "grad_norm": 276.1485595703125, "learning_rate": 9.424369199775639e-06, "loss": 28.1062, "step": 119410 }, { "epoch": 0.24123595551012658, "grad_norm": 342.0525207519531, "learning_rate": 9.424206583387066e-06, "loss": 26.7705, "step": 119420 }, { "epoch": 0.2412561561428104, "grad_norm": 248.26943969726562, "learning_rate": 9.424043945435449e-06, "loss": 33.6305, "step": 119430 }, { "epoch": 0.24127635677549422, "grad_norm": 217.65374755859375, "learning_rate": 9.423881285921576e-06, "loss": 14.8882, "step": 119440 }, { "epoch": 0.24129655740817801, "grad_norm": 376.2751159667969, "learning_rate": 9.423718604846243e-06, "loss": 26.1535, "step": 119450 }, { "epoch": 0.24131675804086183, "grad_norm": 286.88134765625, "learning_rate": 9.423555902210241e-06, "loss": 28.0835, "step": 119460 }, { "epoch": 0.24133695867354565, "grad_norm": 302.4078369140625, "learning_rate": 9.423393178014366e-06, "loss": 24.41, "step": 119470 }, { "epoch": 0.24135715930622947, "grad_norm": 742.1220092773438, "learning_rate": 9.423230432259409e-06, "loss": 18.1322, "step": 119480 }, { "epoch": 0.2413773599389133, "grad_norm": 468.3274841308594, "learning_rate": 9.423067664946162e-06, "loss": 16.7812, "step": 119490 }, { "epoch": 0.24139756057159711, "grad_norm": 275.4114074707031, "learning_rate": 9.42290487607542e-06, "loss": 19.0073, "step": 119500 }, { "epoch": 0.2414177612042809, "grad_norm": 215.1481475830078, "learning_rate": 9.422742065647976e-06, "loss": 13.4064, "step": 119510 }, { "epoch": 0.24143796183696473, "grad_norm": 3.9338667392730713, "learning_rate": 9.422579233664624e-06, "loss": 13.592, "step": 119520 }, { "epoch": 0.24145816246964855, "grad_norm": 494.72369384765625, "learning_rate": 9.422416380126157e-06, "loss": 32.9191, "step": 119530 }, { "epoch": 0.24147836310233237, "grad_norm": 256.0563659667969, "learning_rate": 9.42225350503337e-06, "loss": 23.6946, "step": 119540 }, { "epoch": 0.2414985637350162, "grad_norm": 216.1681671142578, "learning_rate": 9.422090608387056e-06, "loss": 37.2672, "step": 119550 }, { "epoch": 0.2415187643677, "grad_norm": 263.00750732421875, "learning_rate": 9.421927690188006e-06, "loss": 23.5019, "step": 119560 }, { "epoch": 0.2415389650003838, "grad_norm": 272.0700378417969, "learning_rate": 9.421764750437019e-06, "loss": 22.0341, "step": 119570 }, { "epoch": 0.24155916563306762, "grad_norm": 320.50030517578125, "learning_rate": 9.421601789134887e-06, "loss": 28.9372, "step": 119580 }, { "epoch": 0.24157936626575144, "grad_norm": 118.74726104736328, "learning_rate": 9.421438806282402e-06, "loss": 17.3047, "step": 119590 }, { "epoch": 0.24159956689843526, "grad_norm": 0.0, "learning_rate": 9.421275801880363e-06, "loss": 25.9638, "step": 119600 }, { "epoch": 0.24161976753111908, "grad_norm": 158.32595825195312, "learning_rate": 9.42111277592956e-06, "loss": 10.6666, "step": 119610 }, { "epoch": 0.2416399681638029, "grad_norm": 344.6068420410156, "learning_rate": 9.42094972843079e-06, "loss": 34.0057, "step": 119620 }, { "epoch": 0.24166016879648672, "grad_norm": 96.82149505615234, "learning_rate": 9.420786659384849e-06, "loss": 11.0618, "step": 119630 }, { "epoch": 0.2416803694291705, "grad_norm": 465.26446533203125, "learning_rate": 9.420623568792528e-06, "loss": 10.0618, "step": 119640 }, { "epoch": 0.24170057006185433, "grad_norm": 335.7552490234375, "learning_rate": 9.420460456654625e-06, "loss": 42.181, "step": 119650 }, { "epoch": 0.24172077069453815, "grad_norm": 153.27305603027344, "learning_rate": 9.420297322971934e-06, "loss": 37.4387, "step": 119660 }, { "epoch": 0.24174097132722197, "grad_norm": 167.6658172607422, "learning_rate": 9.420134167745249e-06, "loss": 15.8226, "step": 119670 }, { "epoch": 0.2417611719599058, "grad_norm": 355.1344299316406, "learning_rate": 9.419970990975366e-06, "loss": 33.4005, "step": 119680 }, { "epoch": 0.2417813725925896, "grad_norm": 35.95206069946289, "learning_rate": 9.41980779266308e-06, "loss": 21.7989, "step": 119690 }, { "epoch": 0.2418015732252734, "grad_norm": 100.01174926757812, "learning_rate": 9.419644572809189e-06, "loss": 23.2164, "step": 119700 }, { "epoch": 0.24182177385795722, "grad_norm": 165.1349639892578, "learning_rate": 9.419481331414485e-06, "loss": 29.5017, "step": 119710 }, { "epoch": 0.24184197449064104, "grad_norm": 415.4508972167969, "learning_rate": 9.419318068479765e-06, "loss": 24.3705, "step": 119720 }, { "epoch": 0.24186217512332486, "grad_norm": 394.56085205078125, "learning_rate": 9.419154784005826e-06, "loss": 24.004, "step": 119730 }, { "epoch": 0.24188237575600868, "grad_norm": 379.84283447265625, "learning_rate": 9.418991477993461e-06, "loss": 29.6979, "step": 119740 }, { "epoch": 0.2419025763886925, "grad_norm": 246.4163055419922, "learning_rate": 9.418828150443469e-06, "loss": 29.6626, "step": 119750 }, { "epoch": 0.24192277702137632, "grad_norm": 551.1161499023438, "learning_rate": 9.418664801356643e-06, "loss": 23.3755, "step": 119760 }, { "epoch": 0.24194297765406012, "grad_norm": 413.4835205078125, "learning_rate": 9.418501430733781e-06, "loss": 29.8083, "step": 119770 }, { "epoch": 0.24196317828674394, "grad_norm": 535.16650390625, "learning_rate": 9.418338038575678e-06, "loss": 19.3101, "step": 119780 }, { "epoch": 0.24198337891942776, "grad_norm": 179.21755981445312, "learning_rate": 9.418174624883134e-06, "loss": 19.2788, "step": 119790 }, { "epoch": 0.24200357955211158, "grad_norm": 232.50453186035156, "learning_rate": 9.418011189656942e-06, "loss": 32.1123, "step": 119800 }, { "epoch": 0.2420237801847954, "grad_norm": 282.3289489746094, "learning_rate": 9.417847732897897e-06, "loss": 25.5361, "step": 119810 }, { "epoch": 0.24204398081747922, "grad_norm": 207.13058471679688, "learning_rate": 9.4176842546068e-06, "loss": 18.5277, "step": 119820 }, { "epoch": 0.242064181450163, "grad_norm": 530.10498046875, "learning_rate": 9.417520754784445e-06, "loss": 19.2475, "step": 119830 }, { "epoch": 0.24208438208284683, "grad_norm": 422.8984069824219, "learning_rate": 9.41735723343163e-06, "loss": 30.5048, "step": 119840 }, { "epoch": 0.24210458271553065, "grad_norm": 371.456787109375, "learning_rate": 9.417193690549151e-06, "loss": 22.6655, "step": 119850 }, { "epoch": 0.24212478334821447, "grad_norm": 344.6277160644531, "learning_rate": 9.417030126137807e-06, "loss": 21.2155, "step": 119860 }, { "epoch": 0.2421449839808983, "grad_norm": 246.3647003173828, "learning_rate": 9.416866540198393e-06, "loss": 27.1905, "step": 119870 }, { "epoch": 0.2421651846135821, "grad_norm": 246.70916748046875, "learning_rate": 9.416702932731707e-06, "loss": 36.5872, "step": 119880 }, { "epoch": 0.2421853852462659, "grad_norm": 656.2944946289062, "learning_rate": 9.416539303738546e-06, "loss": 26.456, "step": 119890 }, { "epoch": 0.24220558587894972, "grad_norm": 445.89337158203125, "learning_rate": 9.41637565321971e-06, "loss": 20.4933, "step": 119900 }, { "epoch": 0.24222578651163354, "grad_norm": 376.8062438964844, "learning_rate": 9.416211981175993e-06, "loss": 41.4292, "step": 119910 }, { "epoch": 0.24224598714431736, "grad_norm": 356.6213684082031, "learning_rate": 9.416048287608195e-06, "loss": 15.9308, "step": 119920 }, { "epoch": 0.24226618777700118, "grad_norm": 172.50473022460938, "learning_rate": 9.415884572517113e-06, "loss": 19.2967, "step": 119930 }, { "epoch": 0.242286388409685, "grad_norm": 272.31610107421875, "learning_rate": 9.415720835903546e-06, "loss": 29.0887, "step": 119940 }, { "epoch": 0.24230658904236882, "grad_norm": 301.7942810058594, "learning_rate": 9.41555707776829e-06, "loss": 29.8424, "step": 119950 }, { "epoch": 0.2423267896750526, "grad_norm": 552.9899291992188, "learning_rate": 9.415393298112145e-06, "loss": 34.692, "step": 119960 }, { "epoch": 0.24234699030773643, "grad_norm": 234.46559143066406, "learning_rate": 9.415229496935909e-06, "loss": 27.4155, "step": 119970 }, { "epoch": 0.24236719094042025, "grad_norm": 262.8635559082031, "learning_rate": 9.41506567424038e-06, "loss": 24.8239, "step": 119980 }, { "epoch": 0.24238739157310407, "grad_norm": 594.6438598632812, "learning_rate": 9.414901830026355e-06, "loss": 32.8427, "step": 119990 }, { "epoch": 0.2424075922057879, "grad_norm": 335.4156799316406, "learning_rate": 9.414737964294636e-06, "loss": 21.9172, "step": 120000 }, { "epoch": 0.2424277928384717, "grad_norm": 239.851318359375, "learning_rate": 9.414574077046019e-06, "loss": 12.6644, "step": 120010 }, { "epoch": 0.2424479934711555, "grad_norm": 182.68507385253906, "learning_rate": 9.414410168281303e-06, "loss": 20.0028, "step": 120020 }, { "epoch": 0.24246819410383932, "grad_norm": 187.0391082763672, "learning_rate": 9.414246238001286e-06, "loss": 21.3896, "step": 120030 }, { "epoch": 0.24248839473652314, "grad_norm": 255.75006103515625, "learning_rate": 9.414082286206769e-06, "loss": 20.925, "step": 120040 }, { "epoch": 0.24250859536920696, "grad_norm": 390.3116760253906, "learning_rate": 9.41391831289855e-06, "loss": 19.961, "step": 120050 }, { "epoch": 0.24252879600189078, "grad_norm": 91.03765106201172, "learning_rate": 9.41375431807743e-06, "loss": 36.8115, "step": 120060 }, { "epoch": 0.2425489966345746, "grad_norm": 405.22119140625, "learning_rate": 9.413590301744207e-06, "loss": 18.8813, "step": 120070 }, { "epoch": 0.24256919726725842, "grad_norm": 299.14923095703125, "learning_rate": 9.413426263899677e-06, "loss": 16.227, "step": 120080 }, { "epoch": 0.24258939789994222, "grad_norm": 592.90087890625, "learning_rate": 9.413262204544645e-06, "loss": 13.8146, "step": 120090 }, { "epoch": 0.24260959853262604, "grad_norm": 233.53634643554688, "learning_rate": 9.41309812367991e-06, "loss": 15.2115, "step": 120100 }, { "epoch": 0.24262979916530986, "grad_norm": 697.5430908203125, "learning_rate": 9.412934021306267e-06, "loss": 27.7553, "step": 120110 }, { "epoch": 0.24264999979799368, "grad_norm": 125.15768432617188, "learning_rate": 9.412769897424519e-06, "loss": 17.6684, "step": 120120 }, { "epoch": 0.2426702004306775, "grad_norm": 1433.709716796875, "learning_rate": 9.412605752035467e-06, "loss": 31.0054, "step": 120130 }, { "epoch": 0.24269040106336132, "grad_norm": 154.92730712890625, "learning_rate": 9.412441585139908e-06, "loss": 24.1465, "step": 120140 }, { "epoch": 0.2427106016960451, "grad_norm": 405.1814880371094, "learning_rate": 9.412277396738647e-06, "loss": 22.7093, "step": 120150 }, { "epoch": 0.24273080232872893, "grad_norm": 247.89254760742188, "learning_rate": 9.41211318683248e-06, "loss": 28.5932, "step": 120160 }, { "epoch": 0.24275100296141275, "grad_norm": 301.9068298339844, "learning_rate": 9.411948955422207e-06, "loss": 25.1532, "step": 120170 }, { "epoch": 0.24277120359409657, "grad_norm": 1441.0035400390625, "learning_rate": 9.411784702508631e-06, "loss": 16.853, "step": 120180 }, { "epoch": 0.2427914042267804, "grad_norm": 49.738121032714844, "learning_rate": 9.41162042809255e-06, "loss": 8.8249, "step": 120190 }, { "epoch": 0.2428116048594642, "grad_norm": 485.3486633300781, "learning_rate": 9.411456132174768e-06, "loss": 38.4211, "step": 120200 }, { "epoch": 0.242831805492148, "grad_norm": 349.3159484863281, "learning_rate": 9.411291814756082e-06, "loss": 20.706, "step": 120210 }, { "epoch": 0.24285200612483182, "grad_norm": 649.6109619140625, "learning_rate": 9.411127475837297e-06, "loss": 24.275, "step": 120220 }, { "epoch": 0.24287220675751564, "grad_norm": 364.24188232421875, "learning_rate": 9.410963115419209e-06, "loss": 27.5562, "step": 120230 }, { "epoch": 0.24289240739019946, "grad_norm": 204.6254425048828, "learning_rate": 9.410798733502624e-06, "loss": 27.4137, "step": 120240 }, { "epoch": 0.24291260802288328, "grad_norm": 294.7472839355469, "learning_rate": 9.41063433008834e-06, "loss": 28.9704, "step": 120250 }, { "epoch": 0.2429328086555671, "grad_norm": 194.8037109375, "learning_rate": 9.410469905177159e-06, "loss": 22.5311, "step": 120260 }, { "epoch": 0.24295300928825092, "grad_norm": 502.5238342285156, "learning_rate": 9.410305458769882e-06, "loss": 26.5972, "step": 120270 }, { "epoch": 0.2429732099209347, "grad_norm": 202.7635955810547, "learning_rate": 9.410140990867313e-06, "loss": 18.6156, "step": 120280 }, { "epoch": 0.24299341055361853, "grad_norm": 72.56381225585938, "learning_rate": 9.40997650147025e-06, "loss": 11.4973, "step": 120290 }, { "epoch": 0.24301361118630235, "grad_norm": 304.3734436035156, "learning_rate": 9.409811990579498e-06, "loss": 37.2537, "step": 120300 }, { "epoch": 0.24303381181898617, "grad_norm": 260.8908996582031, "learning_rate": 9.409647458195857e-06, "loss": 18.3587, "step": 120310 }, { "epoch": 0.24305401245167, "grad_norm": 242.3203125, "learning_rate": 9.409482904320128e-06, "loss": 15.7708, "step": 120320 }, { "epoch": 0.2430742130843538, "grad_norm": 289.6940612792969, "learning_rate": 9.409318328953115e-06, "loss": 18.7303, "step": 120330 }, { "epoch": 0.2430944137170376, "grad_norm": 392.9562072753906, "learning_rate": 9.409153732095617e-06, "loss": 18.6718, "step": 120340 }, { "epoch": 0.24311461434972143, "grad_norm": 46.090721130371094, "learning_rate": 9.408989113748442e-06, "loss": 22.714, "step": 120350 }, { "epoch": 0.24313481498240525, "grad_norm": 413.3352966308594, "learning_rate": 9.408824473912387e-06, "loss": 13.9668, "step": 120360 }, { "epoch": 0.24315501561508907, "grad_norm": 261.628173828125, "learning_rate": 9.408659812588257e-06, "loss": 28.7658, "step": 120370 }, { "epoch": 0.24317521624777289, "grad_norm": 274.11456298828125, "learning_rate": 9.408495129776851e-06, "loss": 19.9927, "step": 120380 }, { "epoch": 0.2431954168804567, "grad_norm": 412.7942199707031, "learning_rate": 9.408330425478978e-06, "loss": 33.9303, "step": 120390 }, { "epoch": 0.24321561751314053, "grad_norm": 347.9386291503906, "learning_rate": 9.408165699695435e-06, "loss": 26.2526, "step": 120400 }, { "epoch": 0.24323581814582432, "grad_norm": 562.2677612304688, "learning_rate": 9.408000952427028e-06, "loss": 20.9464, "step": 120410 }, { "epoch": 0.24325601877850814, "grad_norm": 315.24951171875, "learning_rate": 9.40783618367456e-06, "loss": 12.4533, "step": 120420 }, { "epoch": 0.24327621941119196, "grad_norm": 75.31167602539062, "learning_rate": 9.40767139343883e-06, "loss": 13.3501, "step": 120430 }, { "epoch": 0.24329642004387578, "grad_norm": 82.92420959472656, "learning_rate": 9.407506581720647e-06, "loss": 19.6451, "step": 120440 }, { "epoch": 0.2433166206765596, "grad_norm": 273.968994140625, "learning_rate": 9.407341748520811e-06, "loss": 36.5831, "step": 120450 }, { "epoch": 0.24333682130924342, "grad_norm": 121.8277587890625, "learning_rate": 9.407176893840125e-06, "loss": 17.7766, "step": 120460 }, { "epoch": 0.2433570219419272, "grad_norm": 89.76295471191406, "learning_rate": 9.407012017679393e-06, "loss": 17.1449, "step": 120470 }, { "epoch": 0.24337722257461103, "grad_norm": 338.8143310546875, "learning_rate": 9.40684712003942e-06, "loss": 27.2123, "step": 120480 }, { "epoch": 0.24339742320729485, "grad_norm": 280.63568115234375, "learning_rate": 9.40668220092101e-06, "loss": 21.6199, "step": 120490 }, { "epoch": 0.24341762383997867, "grad_norm": 347.4132995605469, "learning_rate": 9.406517260324962e-06, "loss": 22.1424, "step": 120500 }, { "epoch": 0.2434378244726625, "grad_norm": 351.1105651855469, "learning_rate": 9.406352298252085e-06, "loss": 35.1136, "step": 120510 }, { "epoch": 0.2434580251053463, "grad_norm": 365.5403747558594, "learning_rate": 9.406187314703182e-06, "loss": 30.8422, "step": 120520 }, { "epoch": 0.2434782257380301, "grad_norm": 399.21185302734375, "learning_rate": 9.406022309679055e-06, "loss": 20.5746, "step": 120530 }, { "epoch": 0.24349842637071392, "grad_norm": 172.81338500976562, "learning_rate": 9.40585728318051e-06, "loss": 16.8838, "step": 120540 }, { "epoch": 0.24351862700339774, "grad_norm": 234.37033081054688, "learning_rate": 9.405692235208353e-06, "loss": 23.917, "step": 120550 }, { "epoch": 0.24353882763608156, "grad_norm": 291.97027587890625, "learning_rate": 9.405527165763384e-06, "loss": 26.7501, "step": 120560 }, { "epoch": 0.24355902826876538, "grad_norm": 491.2039794921875, "learning_rate": 9.40536207484641e-06, "loss": 21.0199, "step": 120570 }, { "epoch": 0.2435792289014492, "grad_norm": 206.25115966796875, "learning_rate": 9.405196962458235e-06, "loss": 12.1808, "step": 120580 }, { "epoch": 0.24359942953413302, "grad_norm": 561.973388671875, "learning_rate": 9.405031828599666e-06, "loss": 37.5688, "step": 120590 }, { "epoch": 0.24361963016681681, "grad_norm": 78.26828002929688, "learning_rate": 9.404866673271506e-06, "loss": 6.9708, "step": 120600 }, { "epoch": 0.24363983079950063, "grad_norm": 403.9635925292969, "learning_rate": 9.40470149647456e-06, "loss": 17.0011, "step": 120610 }, { "epoch": 0.24366003143218445, "grad_norm": 139.24371337890625, "learning_rate": 9.404536298209633e-06, "loss": 15.4379, "step": 120620 }, { "epoch": 0.24368023206486827, "grad_norm": 271.83636474609375, "learning_rate": 9.40437107847753e-06, "loss": 17.931, "step": 120630 }, { "epoch": 0.2437004326975521, "grad_norm": 334.8166198730469, "learning_rate": 9.404205837279057e-06, "loss": 15.2801, "step": 120640 }, { "epoch": 0.24372063333023591, "grad_norm": 453.80877685546875, "learning_rate": 9.404040574615018e-06, "loss": 24.6505, "step": 120650 }, { "epoch": 0.2437408339629197, "grad_norm": 243.88304138183594, "learning_rate": 9.40387529048622e-06, "loss": 12.8928, "step": 120660 }, { "epoch": 0.24376103459560353, "grad_norm": 167.43174743652344, "learning_rate": 9.403709984893469e-06, "loss": 20.0423, "step": 120670 }, { "epoch": 0.24378123522828735, "grad_norm": 145.2700958251953, "learning_rate": 9.403544657837569e-06, "loss": 15.216, "step": 120680 }, { "epoch": 0.24380143586097117, "grad_norm": 359.1965637207031, "learning_rate": 9.403379309319325e-06, "loss": 14.9416, "step": 120690 }, { "epoch": 0.243821636493655, "grad_norm": 212.04473876953125, "learning_rate": 9.403213939339546e-06, "loss": 19.9499, "step": 120700 }, { "epoch": 0.2438418371263388, "grad_norm": 330.40899658203125, "learning_rate": 9.403048547899034e-06, "loss": 21.2314, "step": 120710 }, { "epoch": 0.24386203775902263, "grad_norm": 0.0, "learning_rate": 9.402883134998601e-06, "loss": 22.9857, "step": 120720 }, { "epoch": 0.24388223839170642, "grad_norm": 118.38204956054688, "learning_rate": 9.402717700639047e-06, "loss": 17.497, "step": 120730 }, { "epoch": 0.24390243902439024, "grad_norm": 387.2480773925781, "learning_rate": 9.402552244821181e-06, "loss": 21.8154, "step": 120740 }, { "epoch": 0.24392263965707406, "grad_norm": 391.7861328125, "learning_rate": 9.40238676754581e-06, "loss": 9.7133, "step": 120750 }, { "epoch": 0.24394284028975788, "grad_norm": 507.9598388671875, "learning_rate": 9.402221268813741e-06, "loss": 27.8273, "step": 120760 }, { "epoch": 0.2439630409224417, "grad_norm": 333.6948547363281, "learning_rate": 9.402055748625779e-06, "loss": 15.8325, "step": 120770 }, { "epoch": 0.24398324155512552, "grad_norm": 342.9214172363281, "learning_rate": 9.40189020698273e-06, "loss": 19.36, "step": 120780 }, { "epoch": 0.2440034421878093, "grad_norm": 382.54254150390625, "learning_rate": 9.4017246438854e-06, "loss": 18.7334, "step": 120790 }, { "epoch": 0.24402364282049313, "grad_norm": 390.1339111328125, "learning_rate": 9.401559059334601e-06, "loss": 16.1105, "step": 120800 }, { "epoch": 0.24404384345317695, "grad_norm": 163.8233184814453, "learning_rate": 9.401393453331138e-06, "loss": 16.0933, "step": 120810 }, { "epoch": 0.24406404408586077, "grad_norm": 139.25506591796875, "learning_rate": 9.401227825875814e-06, "loss": 16.323, "step": 120820 }, { "epoch": 0.2440842447185446, "grad_norm": 200.1559295654297, "learning_rate": 9.401062176969442e-06, "loss": 17.177, "step": 120830 }, { "epoch": 0.2441044453512284, "grad_norm": 96.61769104003906, "learning_rate": 9.400896506612824e-06, "loss": 30.2775, "step": 120840 }, { "epoch": 0.2441246459839122, "grad_norm": 181.5607147216797, "learning_rate": 9.400730814806774e-06, "loss": 20.6223, "step": 120850 }, { "epoch": 0.24414484661659602, "grad_norm": 231.52061462402344, "learning_rate": 9.400565101552093e-06, "loss": 25.4999, "step": 120860 }, { "epoch": 0.24416504724927984, "grad_norm": 340.57684326171875, "learning_rate": 9.400399366849591e-06, "loss": 23.0424, "step": 120870 }, { "epoch": 0.24418524788196366, "grad_norm": 228.34629821777344, "learning_rate": 9.400233610700078e-06, "loss": 15.6797, "step": 120880 }, { "epoch": 0.24420544851464748, "grad_norm": 58.585472106933594, "learning_rate": 9.400067833104358e-06, "loss": 36.6559, "step": 120890 }, { "epoch": 0.2442256491473313, "grad_norm": 218.31689453125, "learning_rate": 9.399902034063244e-06, "loss": 18.6594, "step": 120900 }, { "epoch": 0.24424584978001512, "grad_norm": 140.39437866210938, "learning_rate": 9.399736213577537e-06, "loss": 16.9975, "step": 120910 }, { "epoch": 0.24426605041269892, "grad_norm": 267.8187255859375, "learning_rate": 9.399570371648052e-06, "loss": 30.6395, "step": 120920 }, { "epoch": 0.24428625104538274, "grad_norm": 328.7215881347656, "learning_rate": 9.399404508275596e-06, "loss": 16.0721, "step": 120930 }, { "epoch": 0.24430645167806656, "grad_norm": 186.5421142578125, "learning_rate": 9.399238623460973e-06, "loss": 10.4403, "step": 120940 }, { "epoch": 0.24432665231075038, "grad_norm": 354.9017333984375, "learning_rate": 9.399072717204995e-06, "loss": 21.2274, "step": 120950 }, { "epoch": 0.2443468529434342, "grad_norm": 0.0, "learning_rate": 9.398906789508474e-06, "loss": 11.8488, "step": 120960 }, { "epoch": 0.24436705357611802, "grad_norm": 582.15576171875, "learning_rate": 9.39874084037221e-06, "loss": 18.1882, "step": 120970 }, { "epoch": 0.2443872542088018, "grad_norm": 154.04116821289062, "learning_rate": 9.39857486979702e-06, "loss": 16.8386, "step": 120980 }, { "epoch": 0.24440745484148563, "grad_norm": 423.5422668457031, "learning_rate": 9.398408877783707e-06, "loss": 17.1731, "step": 120990 }, { "epoch": 0.24442765547416945, "grad_norm": 368.1719665527344, "learning_rate": 9.398242864333084e-06, "loss": 22.2117, "step": 121000 }, { "epoch": 0.24444785610685327, "grad_norm": 120.30873107910156, "learning_rate": 9.398076829445958e-06, "loss": 20.4903, "step": 121010 }, { "epoch": 0.2444680567395371, "grad_norm": 178.31857299804688, "learning_rate": 9.397910773123139e-06, "loss": 7.9651, "step": 121020 }, { "epoch": 0.2444882573722209, "grad_norm": 254.8719940185547, "learning_rate": 9.397744695365435e-06, "loss": 22.4121, "step": 121030 }, { "epoch": 0.24450845800490473, "grad_norm": 581.5372924804688, "learning_rate": 9.39757859617366e-06, "loss": 30.5508, "step": 121040 }, { "epoch": 0.24452865863758852, "grad_norm": 226.2692108154297, "learning_rate": 9.397412475548619e-06, "loss": 13.0596, "step": 121050 }, { "epoch": 0.24454885927027234, "grad_norm": 578.1724853515625, "learning_rate": 9.397246333491121e-06, "loss": 25.1033, "step": 121060 }, { "epoch": 0.24456905990295616, "grad_norm": 96.30291748046875, "learning_rate": 9.39708017000198e-06, "loss": 48.4035, "step": 121070 }, { "epoch": 0.24458926053563998, "grad_norm": 49.431785583496094, "learning_rate": 9.396913985082003e-06, "loss": 18.7674, "step": 121080 }, { "epoch": 0.2446094611683238, "grad_norm": 0.0, "learning_rate": 9.396747778732001e-06, "loss": 13.7805, "step": 121090 }, { "epoch": 0.24462966180100762, "grad_norm": 287.8648376464844, "learning_rate": 9.396581550952781e-06, "loss": 39.6277, "step": 121100 }, { "epoch": 0.2446498624336914, "grad_norm": 150.64413452148438, "learning_rate": 9.396415301745158e-06, "loss": 26.0459, "step": 121110 }, { "epoch": 0.24467006306637523, "grad_norm": 381.2494201660156, "learning_rate": 9.39624903110994e-06, "loss": 22.0562, "step": 121120 }, { "epoch": 0.24469026369905905, "grad_norm": 144.83619689941406, "learning_rate": 9.396082739047938e-06, "loss": 13.8725, "step": 121130 }, { "epoch": 0.24471046433174287, "grad_norm": 413.5066223144531, "learning_rate": 9.39591642555996e-06, "loss": 13.5893, "step": 121140 }, { "epoch": 0.2447306649644267, "grad_norm": 255.9199981689453, "learning_rate": 9.39575009064682e-06, "loss": 22.864, "step": 121150 }, { "epoch": 0.2447508655971105, "grad_norm": 410.5679016113281, "learning_rate": 9.395583734309327e-06, "loss": 13.4904, "step": 121160 }, { "epoch": 0.2447710662297943, "grad_norm": 219.406982421875, "learning_rate": 9.39541735654829e-06, "loss": 35.7142, "step": 121170 }, { "epoch": 0.24479126686247812, "grad_norm": 246.956787109375, "learning_rate": 9.395250957364526e-06, "loss": 13.3119, "step": 121180 }, { "epoch": 0.24481146749516194, "grad_norm": 524.91064453125, "learning_rate": 9.395084536758838e-06, "loss": 15.0943, "step": 121190 }, { "epoch": 0.24483166812784576, "grad_norm": 110.49063110351562, "learning_rate": 9.394918094732044e-06, "loss": 11.6117, "step": 121200 }, { "epoch": 0.24485186876052958, "grad_norm": 22.176515579223633, "learning_rate": 9.394751631284951e-06, "loss": 14.1694, "step": 121210 }, { "epoch": 0.2448720693932134, "grad_norm": 191.9032745361328, "learning_rate": 9.39458514641837e-06, "loss": 26.7247, "step": 121220 }, { "epoch": 0.24489227002589722, "grad_norm": 146.64915466308594, "learning_rate": 9.394418640133116e-06, "loss": 8.9537, "step": 121230 }, { "epoch": 0.24491247065858102, "grad_norm": 99.15907287597656, "learning_rate": 9.394252112429998e-06, "loss": 12.3838, "step": 121240 }, { "epoch": 0.24493267129126484, "grad_norm": 440.89471435546875, "learning_rate": 9.394085563309827e-06, "loss": 34.8588, "step": 121250 }, { "epoch": 0.24495287192394866, "grad_norm": 154.68734741210938, "learning_rate": 9.393918992773418e-06, "loss": 26.8893, "step": 121260 }, { "epoch": 0.24497307255663248, "grad_norm": 80.12486267089844, "learning_rate": 9.393752400821578e-06, "loss": 29.6035, "step": 121270 }, { "epoch": 0.2449932731893163, "grad_norm": 166.67759704589844, "learning_rate": 9.393585787455125e-06, "loss": 23.5264, "step": 121280 }, { "epoch": 0.24501347382200012, "grad_norm": 604.5081176757812, "learning_rate": 9.393419152674866e-06, "loss": 14.9049, "step": 121290 }, { "epoch": 0.2450336744546839, "grad_norm": 222.1289520263672, "learning_rate": 9.393252496481615e-06, "loss": 18.5054, "step": 121300 }, { "epoch": 0.24505387508736773, "grad_norm": 70.9024887084961, "learning_rate": 9.393085818876184e-06, "loss": 14.2175, "step": 121310 }, { "epoch": 0.24507407572005155, "grad_norm": 198.43609619140625, "learning_rate": 9.392919119859387e-06, "loss": 9.1978, "step": 121320 }, { "epoch": 0.24509427635273537, "grad_norm": 686.9952392578125, "learning_rate": 9.392752399432032e-06, "loss": 32.1844, "step": 121330 }, { "epoch": 0.2451144769854192, "grad_norm": 263.3272399902344, "learning_rate": 9.392585657594938e-06, "loss": 17.0281, "step": 121340 }, { "epoch": 0.245134677618103, "grad_norm": 386.4941711425781, "learning_rate": 9.392418894348912e-06, "loss": 24.2318, "step": 121350 }, { "epoch": 0.24515487825078683, "grad_norm": 217.57481384277344, "learning_rate": 9.39225210969477e-06, "loss": 21.9365, "step": 121360 }, { "epoch": 0.24517507888347062, "grad_norm": 98.60443878173828, "learning_rate": 9.392085303633322e-06, "loss": 20.9365, "step": 121370 }, { "epoch": 0.24519527951615444, "grad_norm": 471.8418273925781, "learning_rate": 9.391918476165385e-06, "loss": 25.0106, "step": 121380 }, { "epoch": 0.24521548014883826, "grad_norm": 127.27840423583984, "learning_rate": 9.39175162729177e-06, "loss": 16.1551, "step": 121390 }, { "epoch": 0.24523568078152208, "grad_norm": 621.3473510742188, "learning_rate": 9.39158475701329e-06, "loss": 32.8492, "step": 121400 }, { "epoch": 0.2452558814142059, "grad_norm": 432.1988830566406, "learning_rate": 9.391417865330759e-06, "loss": 24.7556, "step": 121410 }, { "epoch": 0.24527608204688972, "grad_norm": 364.0677490234375, "learning_rate": 9.391250952244987e-06, "loss": 14.576, "step": 121420 }, { "epoch": 0.2452962826795735, "grad_norm": 396.62530517578125, "learning_rate": 9.391084017756794e-06, "loss": 23.3051, "step": 121430 }, { "epoch": 0.24531648331225733, "grad_norm": 496.9324035644531, "learning_rate": 9.390917061866988e-06, "loss": 30.9506, "step": 121440 }, { "epoch": 0.24533668394494115, "grad_norm": 338.2589111328125, "learning_rate": 9.390750084576387e-06, "loss": 36.1758, "step": 121450 }, { "epoch": 0.24535688457762497, "grad_norm": 168.20997619628906, "learning_rate": 9.3905830858858e-06, "loss": 15.8816, "step": 121460 }, { "epoch": 0.2453770852103088, "grad_norm": 544.0722045898438, "learning_rate": 9.390416065796045e-06, "loss": 34.6447, "step": 121470 }, { "epoch": 0.2453972858429926, "grad_norm": 475.751220703125, "learning_rate": 9.390249024307934e-06, "loss": 19.0071, "step": 121480 }, { "epoch": 0.2454174864756764, "grad_norm": 295.1993408203125, "learning_rate": 9.390081961422283e-06, "loss": 15.9255, "step": 121490 }, { "epoch": 0.24543768710836023, "grad_norm": 622.0045166015625, "learning_rate": 9.389914877139903e-06, "loss": 28.3303, "step": 121500 }, { "epoch": 0.24545788774104405, "grad_norm": 135.67210388183594, "learning_rate": 9.389747771461612e-06, "loss": 19.7822, "step": 121510 }, { "epoch": 0.24547808837372787, "grad_norm": 352.0944519042969, "learning_rate": 9.389580644388222e-06, "loss": 15.5475, "step": 121520 }, { "epoch": 0.24549828900641169, "grad_norm": 914.2173461914062, "learning_rate": 9.38941349592055e-06, "loss": 45.046, "step": 121530 }, { "epoch": 0.2455184896390955, "grad_norm": 89.35135650634766, "learning_rate": 9.389246326059406e-06, "loss": 21.4963, "step": 121540 }, { "epoch": 0.24553869027177933, "grad_norm": 183.7488555908203, "learning_rate": 9.38907913480561e-06, "loss": 22.2537, "step": 121550 }, { "epoch": 0.24555889090446312, "grad_norm": 426.1815490722656, "learning_rate": 9.388911922159973e-06, "loss": 20.6709, "step": 121560 }, { "epoch": 0.24557909153714694, "grad_norm": 454.4010925292969, "learning_rate": 9.388744688123313e-06, "loss": 22.4055, "step": 121570 }, { "epoch": 0.24559929216983076, "grad_norm": 282.8498840332031, "learning_rate": 9.388577432696441e-06, "loss": 16.691, "step": 121580 }, { "epoch": 0.24561949280251458, "grad_norm": 453.11187744140625, "learning_rate": 9.388410155880178e-06, "loss": 22.0153, "step": 121590 }, { "epoch": 0.2456396934351984, "grad_norm": 188.14837646484375, "learning_rate": 9.388242857675336e-06, "loss": 18.0904, "step": 121600 }, { "epoch": 0.24565989406788222, "grad_norm": 42.5057487487793, "learning_rate": 9.388075538082729e-06, "loss": 18.1115, "step": 121610 }, { "epoch": 0.245680094700566, "grad_norm": 211.0455780029297, "learning_rate": 9.387908197103175e-06, "loss": 14.2646, "step": 121620 }, { "epoch": 0.24570029533324983, "grad_norm": 166.1518096923828, "learning_rate": 9.38774083473749e-06, "loss": 19.252, "step": 121630 }, { "epoch": 0.24572049596593365, "grad_norm": 102.5889892578125, "learning_rate": 9.387573450986485e-06, "loss": 21.5408, "step": 121640 }, { "epoch": 0.24574069659861747, "grad_norm": 122.47067260742188, "learning_rate": 9.38740604585098e-06, "loss": 16.1773, "step": 121650 }, { "epoch": 0.2457608972313013, "grad_norm": 169.6429901123047, "learning_rate": 9.387238619331791e-06, "loss": 27.0278, "step": 121660 }, { "epoch": 0.2457810978639851, "grad_norm": 216.02561950683594, "learning_rate": 9.387071171429734e-06, "loss": 10.4282, "step": 121670 }, { "epoch": 0.2458012984966689, "grad_norm": 20.382822036743164, "learning_rate": 9.386903702145622e-06, "loss": 25.5732, "step": 121680 }, { "epoch": 0.24582149912935272, "grad_norm": 369.7774658203125, "learning_rate": 9.386736211480276e-06, "loss": 23.5115, "step": 121690 }, { "epoch": 0.24584169976203654, "grad_norm": 451.6445617675781, "learning_rate": 9.386568699434509e-06, "loss": 16.9501, "step": 121700 }, { "epoch": 0.24586190039472036, "grad_norm": 291.1457824707031, "learning_rate": 9.386401166009135e-06, "loss": 23.7284, "step": 121710 }, { "epoch": 0.24588210102740418, "grad_norm": 5.184332370758057, "learning_rate": 9.386233611204979e-06, "loss": 41.9864, "step": 121720 }, { "epoch": 0.245902301660088, "grad_norm": 277.9345397949219, "learning_rate": 9.386066035022849e-06, "loss": 27.9085, "step": 121730 }, { "epoch": 0.24592250229277182, "grad_norm": 93.23077392578125, "learning_rate": 9.385898437463565e-06, "loss": 16.2392, "step": 121740 }, { "epoch": 0.24594270292545561, "grad_norm": 516.2257690429688, "learning_rate": 9.385730818527945e-06, "loss": 22.5362, "step": 121750 }, { "epoch": 0.24596290355813943, "grad_norm": 271.58746337890625, "learning_rate": 9.385563178216804e-06, "loss": 38.836, "step": 121760 }, { "epoch": 0.24598310419082325, "grad_norm": 485.14202880859375, "learning_rate": 9.38539551653096e-06, "loss": 24.078, "step": 121770 }, { "epoch": 0.24600330482350707, "grad_norm": 366.55657958984375, "learning_rate": 9.385227833471232e-06, "loss": 18.2928, "step": 121780 }, { "epoch": 0.2460235054561909, "grad_norm": 404.7623291015625, "learning_rate": 9.385060129038434e-06, "loss": 19.2057, "step": 121790 }, { "epoch": 0.24604370608887471, "grad_norm": 660.3389892578125, "learning_rate": 9.384892403233384e-06, "loss": 35.6633, "step": 121800 }, { "epoch": 0.2460639067215585, "grad_norm": 528.0242309570312, "learning_rate": 9.384724656056902e-06, "loss": 29.6892, "step": 121810 }, { "epoch": 0.24608410735424233, "grad_norm": 252.36282348632812, "learning_rate": 9.384556887509802e-06, "loss": 25.3004, "step": 121820 }, { "epoch": 0.24610430798692615, "grad_norm": 121.01582336425781, "learning_rate": 9.384389097592904e-06, "loss": 17.8493, "step": 121830 }, { "epoch": 0.24612450861960997, "grad_norm": 595.999267578125, "learning_rate": 9.384221286307028e-06, "loss": 18.1091, "step": 121840 }, { "epoch": 0.2461447092522938, "grad_norm": 737.8707885742188, "learning_rate": 9.384053453652986e-06, "loss": 29.4458, "step": 121850 }, { "epoch": 0.2461649098849776, "grad_norm": 144.3780517578125, "learning_rate": 9.3838855996316e-06, "loss": 15.2704, "step": 121860 }, { "epoch": 0.24618511051766143, "grad_norm": 348.924072265625, "learning_rate": 9.383717724243688e-06, "loss": 15.6098, "step": 121870 }, { "epoch": 0.24620531115034522, "grad_norm": 314.0735778808594, "learning_rate": 9.383549827490066e-06, "loss": 37.3985, "step": 121880 }, { "epoch": 0.24622551178302904, "grad_norm": 280.85693359375, "learning_rate": 9.383381909371555e-06, "loss": 28.0586, "step": 121890 }, { "epoch": 0.24624571241571286, "grad_norm": 193.33807373046875, "learning_rate": 9.383213969888972e-06, "loss": 24.7451, "step": 121900 }, { "epoch": 0.24626591304839668, "grad_norm": 233.02496337890625, "learning_rate": 9.383046009043134e-06, "loss": 23.71, "step": 121910 }, { "epoch": 0.2462861136810805, "grad_norm": 227.35826110839844, "learning_rate": 9.382878026834865e-06, "loss": 15.6972, "step": 121920 }, { "epoch": 0.24630631431376432, "grad_norm": 285.46063232421875, "learning_rate": 9.382710023264978e-06, "loss": 9.8602, "step": 121930 }, { "epoch": 0.2463265149464481, "grad_norm": 396.2301330566406, "learning_rate": 9.382541998334293e-06, "loss": 24.719, "step": 121940 }, { "epoch": 0.24634671557913193, "grad_norm": 161.89984130859375, "learning_rate": 9.382373952043631e-06, "loss": 22.7492, "step": 121950 }, { "epoch": 0.24636691621181575, "grad_norm": 103.46430206298828, "learning_rate": 9.38220588439381e-06, "loss": 17.3273, "step": 121960 }, { "epoch": 0.24638711684449957, "grad_norm": 468.0312194824219, "learning_rate": 9.38203779538565e-06, "loss": 16.1716, "step": 121970 }, { "epoch": 0.2464073174771834, "grad_norm": 297.6427307128906, "learning_rate": 9.381869685019967e-06, "loss": 25.3695, "step": 121980 }, { "epoch": 0.2464275181098672, "grad_norm": 295.02960205078125, "learning_rate": 9.381701553297584e-06, "loss": 10.092, "step": 121990 }, { "epoch": 0.246447718742551, "grad_norm": 4.670551300048828, "learning_rate": 9.381533400219319e-06, "loss": 30.9343, "step": 122000 }, { "epoch": 0.24646791937523482, "grad_norm": 184.30104064941406, "learning_rate": 9.38136522578599e-06, "loss": 24.176, "step": 122010 }, { "epoch": 0.24648812000791864, "grad_norm": 731.8292236328125, "learning_rate": 9.381197029998422e-06, "loss": 57.9426, "step": 122020 }, { "epoch": 0.24650832064060246, "grad_norm": 867.0030517578125, "learning_rate": 9.381028812857426e-06, "loss": 35.8635, "step": 122030 }, { "epoch": 0.24652852127328628, "grad_norm": 219.7139892578125, "learning_rate": 9.38086057436383e-06, "loss": 28.6944, "step": 122040 }, { "epoch": 0.2465487219059701, "grad_norm": 514.936767578125, "learning_rate": 9.38069231451845e-06, "loss": 30.1731, "step": 122050 }, { "epoch": 0.24656892253865392, "grad_norm": 67.72396850585938, "learning_rate": 9.380524033322108e-06, "loss": 16.2946, "step": 122060 }, { "epoch": 0.24658912317133772, "grad_norm": 148.10015869140625, "learning_rate": 9.380355730775623e-06, "loss": 26.3807, "step": 122070 }, { "epoch": 0.24660932380402154, "grad_norm": 592.2023315429688, "learning_rate": 9.380187406879815e-06, "loss": 41.1483, "step": 122080 }, { "epoch": 0.24662952443670536, "grad_norm": 250.60169982910156, "learning_rate": 9.380019061635506e-06, "loss": 17.1958, "step": 122090 }, { "epoch": 0.24664972506938918, "grad_norm": 373.3446350097656, "learning_rate": 9.379850695043513e-06, "loss": 14.0837, "step": 122100 }, { "epoch": 0.246669925702073, "grad_norm": 391.5966796875, "learning_rate": 9.37968230710466e-06, "loss": 12.887, "step": 122110 }, { "epoch": 0.24669012633475682, "grad_norm": 72.04792785644531, "learning_rate": 9.379513897819768e-06, "loss": 14.6065, "step": 122120 }, { "epoch": 0.2467103269674406, "grad_norm": 148.82769775390625, "learning_rate": 9.379345467189655e-06, "loss": 15.1996, "step": 122130 }, { "epoch": 0.24673052760012443, "grad_norm": 389.731201171875, "learning_rate": 9.379177015215145e-06, "loss": 24.2729, "step": 122140 }, { "epoch": 0.24675072823280825, "grad_norm": 336.28887939453125, "learning_rate": 9.379008541897054e-06, "loss": 23.7196, "step": 122150 }, { "epoch": 0.24677092886549207, "grad_norm": 389.7393493652344, "learning_rate": 9.378840047236209e-06, "loss": 21.0888, "step": 122160 }, { "epoch": 0.2467911294981759, "grad_norm": 417.93414306640625, "learning_rate": 9.378671531233428e-06, "loss": 30.7289, "step": 122170 }, { "epoch": 0.2468113301308597, "grad_norm": 434.58026123046875, "learning_rate": 9.378502993889533e-06, "loss": 15.342, "step": 122180 }, { "epoch": 0.24683153076354353, "grad_norm": 192.63336181640625, "learning_rate": 9.378334435205345e-06, "loss": 15.8853, "step": 122190 }, { "epoch": 0.24685173139622732, "grad_norm": 246.4895782470703, "learning_rate": 9.378165855181687e-06, "loss": 27.2555, "step": 122200 }, { "epoch": 0.24687193202891114, "grad_norm": 440.0880126953125, "learning_rate": 9.377997253819378e-06, "loss": 28.1729, "step": 122210 }, { "epoch": 0.24689213266159496, "grad_norm": 464.169189453125, "learning_rate": 9.377828631119243e-06, "loss": 29.9495, "step": 122220 }, { "epoch": 0.24691233329427878, "grad_norm": 190.8328857421875, "learning_rate": 9.377659987082101e-06, "loss": 13.6483, "step": 122230 }, { "epoch": 0.2469325339269626, "grad_norm": 460.78515625, "learning_rate": 9.377491321708777e-06, "loss": 19.3226, "step": 122240 }, { "epoch": 0.24695273455964642, "grad_norm": 193.47674560546875, "learning_rate": 9.37732263500009e-06, "loss": 32.7641, "step": 122250 }, { "epoch": 0.2469729351923302, "grad_norm": 438.4677734375, "learning_rate": 9.377153926956864e-06, "loss": 13.8252, "step": 122260 }, { "epoch": 0.24699313582501403, "grad_norm": 321.5914001464844, "learning_rate": 9.376985197579919e-06, "loss": 33.8653, "step": 122270 }, { "epoch": 0.24701333645769785, "grad_norm": 29.87141227722168, "learning_rate": 9.37681644687008e-06, "loss": 11.5188, "step": 122280 }, { "epoch": 0.24703353709038167, "grad_norm": 429.9920349121094, "learning_rate": 9.37664767482817e-06, "loss": 18.7175, "step": 122290 }, { "epoch": 0.2470537377230655, "grad_norm": 250.72093200683594, "learning_rate": 9.376478881455008e-06, "loss": 48.1337, "step": 122300 }, { "epoch": 0.2470739383557493, "grad_norm": 72.3940200805664, "learning_rate": 9.37631006675142e-06, "loss": 27.225, "step": 122310 }, { "epoch": 0.2470941389884331, "grad_norm": 233.46595764160156, "learning_rate": 9.376141230718228e-06, "loss": 32.6905, "step": 122320 }, { "epoch": 0.24711433962111692, "grad_norm": 230.80564880371094, "learning_rate": 9.375972373356253e-06, "loss": 25.3183, "step": 122330 }, { "epoch": 0.24713454025380074, "grad_norm": 268.08770751953125, "learning_rate": 9.375803494666319e-06, "loss": 20.5242, "step": 122340 }, { "epoch": 0.24715474088648456, "grad_norm": 131.09878540039062, "learning_rate": 9.37563459464925e-06, "loss": 12.4933, "step": 122350 }, { "epoch": 0.24717494151916838, "grad_norm": 351.6463928222656, "learning_rate": 9.37546567330587e-06, "loss": 28.0411, "step": 122360 }, { "epoch": 0.2471951421518522, "grad_norm": 236.28448486328125, "learning_rate": 9.375296730636999e-06, "loss": 18.2322, "step": 122370 }, { "epoch": 0.24721534278453602, "grad_norm": 240.4258575439453, "learning_rate": 9.375127766643464e-06, "loss": 25.0062, "step": 122380 }, { "epoch": 0.24723554341721982, "grad_norm": 415.5560607910156, "learning_rate": 9.374958781326085e-06, "loss": 19.2733, "step": 122390 }, { "epoch": 0.24725574404990364, "grad_norm": 280.2322998046875, "learning_rate": 9.37478977468569e-06, "loss": 22.2951, "step": 122400 }, { "epoch": 0.24727594468258746, "grad_norm": 305.5260009765625, "learning_rate": 9.374620746723097e-06, "loss": 15.3536, "step": 122410 }, { "epoch": 0.24729614531527128, "grad_norm": 49.19283676147461, "learning_rate": 9.374451697439137e-06, "loss": 27.83, "step": 122420 }, { "epoch": 0.2473163459479551, "grad_norm": 249.2811279296875, "learning_rate": 9.374282626834627e-06, "loss": 29.2883, "step": 122430 }, { "epoch": 0.24733654658063892, "grad_norm": 439.9292297363281, "learning_rate": 9.374113534910396e-06, "loss": 12.7591, "step": 122440 }, { "epoch": 0.2473567472133227, "grad_norm": 35.78599166870117, "learning_rate": 9.373944421667264e-06, "loss": 16.2186, "step": 122450 }, { "epoch": 0.24737694784600653, "grad_norm": 417.87945556640625, "learning_rate": 9.37377528710606e-06, "loss": 33.5259, "step": 122460 }, { "epoch": 0.24739714847869035, "grad_norm": 131.766357421875, "learning_rate": 9.373606131227604e-06, "loss": 25.682, "step": 122470 }, { "epoch": 0.24741734911137417, "grad_norm": 250.87960815429688, "learning_rate": 9.373436954032722e-06, "loss": 19.8886, "step": 122480 }, { "epoch": 0.247437549744058, "grad_norm": 628.5338134765625, "learning_rate": 9.373267755522239e-06, "loss": 28.6446, "step": 122490 }, { "epoch": 0.2474577503767418, "grad_norm": 393.8164978027344, "learning_rate": 9.37309853569698e-06, "loss": 13.5007, "step": 122500 }, { "epoch": 0.24747795100942563, "grad_norm": 517.8650512695312, "learning_rate": 9.372929294557768e-06, "loss": 27.8941, "step": 122510 }, { "epoch": 0.24749815164210942, "grad_norm": 625.7908325195312, "learning_rate": 9.37276003210543e-06, "loss": 18.7876, "step": 122520 }, { "epoch": 0.24751835227479324, "grad_norm": 382.54351806640625, "learning_rate": 9.37259074834079e-06, "loss": 26.2728, "step": 122530 }, { "epoch": 0.24753855290747706, "grad_norm": 24.26190948486328, "learning_rate": 9.372421443264672e-06, "loss": 14.259, "step": 122540 }, { "epoch": 0.24755875354016088, "grad_norm": 267.2773742675781, "learning_rate": 9.372252116877904e-06, "loss": 18.6299, "step": 122550 }, { "epoch": 0.2475789541728447, "grad_norm": 263.0425109863281, "learning_rate": 9.372082769181307e-06, "loss": 18.9713, "step": 122560 }, { "epoch": 0.24759915480552852, "grad_norm": 131.89404296875, "learning_rate": 9.371913400175711e-06, "loss": 12.7858, "step": 122570 }, { "epoch": 0.2476193554382123, "grad_norm": 117.1008529663086, "learning_rate": 9.371744009861938e-06, "loss": 19.392, "step": 122580 }, { "epoch": 0.24763955607089613, "grad_norm": 177.46963500976562, "learning_rate": 9.371574598240816e-06, "loss": 16.1147, "step": 122590 }, { "epoch": 0.24765975670357995, "grad_norm": 144.79193115234375, "learning_rate": 9.371405165313169e-06, "loss": 13.0362, "step": 122600 }, { "epoch": 0.24767995733626377, "grad_norm": 127.48401641845703, "learning_rate": 9.371235711079824e-06, "loss": 26.6757, "step": 122610 }, { "epoch": 0.2477001579689476, "grad_norm": 557.9957275390625, "learning_rate": 9.371066235541607e-06, "loss": 18.3614, "step": 122620 }, { "epoch": 0.2477203586016314, "grad_norm": 256.4150695800781, "learning_rate": 9.37089673869934e-06, "loss": 29.4805, "step": 122630 }, { "epoch": 0.2477405592343152, "grad_norm": 421.565185546875, "learning_rate": 9.370727220553854e-06, "loss": 31.1976, "step": 122640 }, { "epoch": 0.24776075986699903, "grad_norm": 802.7249755859375, "learning_rate": 9.370557681105975e-06, "loss": 23.2269, "step": 122650 }, { "epoch": 0.24778096049968285, "grad_norm": 261.2586975097656, "learning_rate": 9.370388120356527e-06, "loss": 28.0082, "step": 122660 }, { "epoch": 0.24780116113236667, "grad_norm": 363.9118957519531, "learning_rate": 9.370218538306338e-06, "loss": 21.6587, "step": 122670 }, { "epoch": 0.24782136176505049, "grad_norm": 168.5192413330078, "learning_rate": 9.370048934956232e-06, "loss": 24.1247, "step": 122680 }, { "epoch": 0.2478415623977343, "grad_norm": 548.4960327148438, "learning_rate": 9.36987931030704e-06, "loss": 34.2736, "step": 122690 }, { "epoch": 0.24786176303041813, "grad_norm": 223.31068420410156, "learning_rate": 9.369709664359585e-06, "loss": 19.4314, "step": 122700 }, { "epoch": 0.24788196366310192, "grad_norm": 40.617652893066406, "learning_rate": 9.369539997114694e-06, "loss": 20.9162, "step": 122710 }, { "epoch": 0.24790216429578574, "grad_norm": 115.05989837646484, "learning_rate": 9.369370308573198e-06, "loss": 16.1978, "step": 122720 }, { "epoch": 0.24792236492846956, "grad_norm": 219.8512725830078, "learning_rate": 9.36920059873592e-06, "loss": 18.2775, "step": 122730 }, { "epoch": 0.24794256556115338, "grad_norm": 191.3773651123047, "learning_rate": 9.369030867603686e-06, "loss": 19.4027, "step": 122740 }, { "epoch": 0.2479627661938372, "grad_norm": 32.820186614990234, "learning_rate": 9.368861115177327e-06, "loss": 18.4969, "step": 122750 }, { "epoch": 0.24798296682652102, "grad_norm": 805.6445922851562, "learning_rate": 9.36869134145767e-06, "loss": 31.723, "step": 122760 }, { "epoch": 0.2480031674592048, "grad_norm": 40.24922180175781, "learning_rate": 9.36852154644554e-06, "loss": 24.4859, "step": 122770 }, { "epoch": 0.24802336809188863, "grad_norm": 167.02395629882812, "learning_rate": 9.368351730141764e-06, "loss": 15.0863, "step": 122780 }, { "epoch": 0.24804356872457245, "grad_norm": 430.74090576171875, "learning_rate": 9.368181892547174e-06, "loss": 30.2307, "step": 122790 }, { "epoch": 0.24806376935725627, "grad_norm": 319.8312683105469, "learning_rate": 9.368012033662594e-06, "loss": 25.9489, "step": 122800 }, { "epoch": 0.2480839699899401, "grad_norm": 287.0397033691406, "learning_rate": 9.367842153488853e-06, "loss": 13.7859, "step": 122810 }, { "epoch": 0.2481041706226239, "grad_norm": 103.63453674316406, "learning_rate": 9.36767225202678e-06, "loss": 26.7782, "step": 122820 }, { "epoch": 0.24812437125530773, "grad_norm": 196.89942932128906, "learning_rate": 9.367502329277203e-06, "loss": 15.3531, "step": 122830 }, { "epoch": 0.24814457188799152, "grad_norm": 690.8020629882812, "learning_rate": 9.367332385240949e-06, "loss": 17.7495, "step": 122840 }, { "epoch": 0.24816477252067534, "grad_norm": 932.5863037109375, "learning_rate": 9.367162419918845e-06, "loss": 31.1508, "step": 122850 }, { "epoch": 0.24818497315335916, "grad_norm": 2194.69921875, "learning_rate": 9.366992433311722e-06, "loss": 23.4282, "step": 122860 }, { "epoch": 0.24820517378604298, "grad_norm": 283.1418762207031, "learning_rate": 9.366822425420407e-06, "loss": 16.7128, "step": 122870 }, { "epoch": 0.2482253744187268, "grad_norm": 591.3319702148438, "learning_rate": 9.36665239624573e-06, "loss": 34.3464, "step": 122880 }, { "epoch": 0.24824557505141062, "grad_norm": 96.37638092041016, "learning_rate": 9.366482345788519e-06, "loss": 27.6027, "step": 122890 }, { "epoch": 0.24826577568409441, "grad_norm": 179.017822265625, "learning_rate": 9.366312274049602e-06, "loss": 21.9747, "step": 122900 }, { "epoch": 0.24828597631677823, "grad_norm": 49.534324645996094, "learning_rate": 9.366142181029808e-06, "loss": 16.8947, "step": 122910 }, { "epoch": 0.24830617694946205, "grad_norm": 232.26951599121094, "learning_rate": 9.365972066729967e-06, "loss": 21.8574, "step": 122920 }, { "epoch": 0.24832637758214587, "grad_norm": 143.52928161621094, "learning_rate": 9.365801931150909e-06, "loss": 12.3172, "step": 122930 }, { "epoch": 0.2483465782148297, "grad_norm": 437.6436462402344, "learning_rate": 9.36563177429346e-06, "loss": 21.4025, "step": 122940 }, { "epoch": 0.24836677884751351, "grad_norm": 212.8344268798828, "learning_rate": 9.365461596158451e-06, "loss": 19.2729, "step": 122950 }, { "epoch": 0.2483869794801973, "grad_norm": 338.8185119628906, "learning_rate": 9.365291396746714e-06, "loss": 17.1719, "step": 122960 }, { "epoch": 0.24840718011288113, "grad_norm": 302.4955749511719, "learning_rate": 9.365121176059075e-06, "loss": 14.3824, "step": 122970 }, { "epoch": 0.24842738074556495, "grad_norm": 373.9198913574219, "learning_rate": 9.364950934096365e-06, "loss": 16.4941, "step": 122980 }, { "epoch": 0.24844758137824877, "grad_norm": 604.7946166992188, "learning_rate": 9.364780670859412e-06, "loss": 27.9342, "step": 122990 }, { "epoch": 0.2484677820109326, "grad_norm": 381.65887451171875, "learning_rate": 9.364610386349048e-06, "loss": 26.3774, "step": 123000 }, { "epoch": 0.2484879826436164, "grad_norm": 132.8224334716797, "learning_rate": 9.364440080566104e-06, "loss": 16.8638, "step": 123010 }, { "epoch": 0.24850818327630023, "grad_norm": 272.595947265625, "learning_rate": 9.364269753511407e-06, "loss": 32.2958, "step": 123020 }, { "epoch": 0.24852838390898402, "grad_norm": 323.96868896484375, "learning_rate": 9.36409940518579e-06, "loss": 33.3456, "step": 123030 }, { "epoch": 0.24854858454166784, "grad_norm": 454.6589660644531, "learning_rate": 9.363929035590081e-06, "loss": 26.3727, "step": 123040 }, { "epoch": 0.24856878517435166, "grad_norm": 261.1114196777344, "learning_rate": 9.36375864472511e-06, "loss": 37.1779, "step": 123050 }, { "epoch": 0.24858898580703548, "grad_norm": 135.88600158691406, "learning_rate": 9.363588232591709e-06, "loss": 44.2261, "step": 123060 }, { "epoch": 0.2486091864397193, "grad_norm": 0.0, "learning_rate": 9.363417799190708e-06, "loss": 26.4405, "step": 123070 }, { "epoch": 0.24862938707240312, "grad_norm": 153.91104125976562, "learning_rate": 9.363247344522939e-06, "loss": 18.9749, "step": 123080 }, { "epoch": 0.2486495877050869, "grad_norm": 256.20318603515625, "learning_rate": 9.363076868589232e-06, "loss": 11.7329, "step": 123090 }, { "epoch": 0.24866978833777073, "grad_norm": 191.7756805419922, "learning_rate": 9.362906371390416e-06, "loss": 21.2067, "step": 123100 }, { "epoch": 0.24868998897045455, "grad_norm": 924.91845703125, "learning_rate": 9.362735852927324e-06, "loss": 42.3433, "step": 123110 }, { "epoch": 0.24871018960313837, "grad_norm": 327.1133117675781, "learning_rate": 9.362565313200786e-06, "loss": 28.4112, "step": 123120 }, { "epoch": 0.2487303902358222, "grad_norm": 223.49862670898438, "learning_rate": 9.362394752211636e-06, "loss": 9.1256, "step": 123130 }, { "epoch": 0.248750590868506, "grad_norm": 343.7049560546875, "learning_rate": 9.3622241699607e-06, "loss": 17.278, "step": 123140 }, { "epoch": 0.24877079150118983, "grad_norm": 530.9073486328125, "learning_rate": 9.362053566448816e-06, "loss": 26.0197, "step": 123150 }, { "epoch": 0.24879099213387362, "grad_norm": 195.9426727294922, "learning_rate": 9.36188294167681e-06, "loss": 16.7225, "step": 123160 }, { "epoch": 0.24881119276655744, "grad_norm": 104.44295501708984, "learning_rate": 9.361712295645515e-06, "loss": 17.6279, "step": 123170 }, { "epoch": 0.24883139339924126, "grad_norm": 300.3389892578125, "learning_rate": 9.361541628355763e-06, "loss": 24.925, "step": 123180 }, { "epoch": 0.24885159403192508, "grad_norm": 172.18397521972656, "learning_rate": 9.361370939808387e-06, "loss": 12.0543, "step": 123190 }, { "epoch": 0.2488717946646089, "grad_norm": 454.7204284667969, "learning_rate": 9.361200230004219e-06, "loss": 33.4985, "step": 123200 }, { "epoch": 0.24889199529729272, "grad_norm": 115.46682739257812, "learning_rate": 9.36102949894409e-06, "loss": 12.5061, "step": 123210 }, { "epoch": 0.24891219592997652, "grad_norm": 598.9180297851562, "learning_rate": 9.36085874662883e-06, "loss": 25.8026, "step": 123220 }, { "epoch": 0.24893239656266034, "grad_norm": 487.46759033203125, "learning_rate": 9.360687973059274e-06, "loss": 26.749, "step": 123230 }, { "epoch": 0.24895259719534416, "grad_norm": 238.48448181152344, "learning_rate": 9.360517178236255e-06, "loss": 24.21, "step": 123240 }, { "epoch": 0.24897279782802798, "grad_norm": 368.4016418457031, "learning_rate": 9.360346362160604e-06, "loss": 18.1705, "step": 123250 }, { "epoch": 0.2489929984607118, "grad_norm": 250.23214721679688, "learning_rate": 9.360175524833153e-06, "loss": 37.1736, "step": 123260 }, { "epoch": 0.24901319909339562, "grad_norm": 15.857328414916992, "learning_rate": 9.360004666254735e-06, "loss": 23.373, "step": 123270 }, { "epoch": 0.2490333997260794, "grad_norm": 106.97638702392578, "learning_rate": 9.359833786426183e-06, "loss": 19.6804, "step": 123280 }, { "epoch": 0.24905360035876323, "grad_norm": 293.2815856933594, "learning_rate": 9.35966288534833e-06, "loss": 11.924, "step": 123290 }, { "epoch": 0.24907380099144705, "grad_norm": 411.2304992675781, "learning_rate": 9.35949196302201e-06, "loss": 26.7655, "step": 123300 }, { "epoch": 0.24909400162413087, "grad_norm": 210.4949493408203, "learning_rate": 9.359321019448054e-06, "loss": 21.2699, "step": 123310 }, { "epoch": 0.2491142022568147, "grad_norm": 431.54278564453125, "learning_rate": 9.359150054627298e-06, "loss": 17.6999, "step": 123320 }, { "epoch": 0.2491344028894985, "grad_norm": 359.3699951171875, "learning_rate": 9.35897906856057e-06, "loss": 11.3862, "step": 123330 }, { "epoch": 0.24915460352218233, "grad_norm": 589.803466796875, "learning_rate": 9.358808061248708e-06, "loss": 16.8608, "step": 123340 }, { "epoch": 0.24917480415486612, "grad_norm": 233.84031677246094, "learning_rate": 9.358637032692546e-06, "loss": 12.8964, "step": 123350 }, { "epoch": 0.24919500478754994, "grad_norm": 360.8388366699219, "learning_rate": 9.358465982892913e-06, "loss": 21.7194, "step": 123360 }, { "epoch": 0.24921520542023376, "grad_norm": 39.51784133911133, "learning_rate": 9.358294911850648e-06, "loss": 11.5187, "step": 123370 }, { "epoch": 0.24923540605291758, "grad_norm": 245.67733764648438, "learning_rate": 9.35812381956658e-06, "loss": 10.2482, "step": 123380 }, { "epoch": 0.2492556066856014, "grad_norm": 253.69053649902344, "learning_rate": 9.357952706041545e-06, "loss": 31.2953, "step": 123390 }, { "epoch": 0.24927580731828522, "grad_norm": 114.39710998535156, "learning_rate": 9.357781571276379e-06, "loss": 35.82, "step": 123400 }, { "epoch": 0.249296007950969, "grad_norm": 551.749755859375, "learning_rate": 9.357610415271913e-06, "loss": 26.747, "step": 123410 }, { "epoch": 0.24931620858365283, "grad_norm": 179.013427734375, "learning_rate": 9.357439238028982e-06, "loss": 24.2434, "step": 123420 }, { "epoch": 0.24933640921633665, "grad_norm": 480.8097839355469, "learning_rate": 9.357268039548422e-06, "loss": 24.0855, "step": 123430 }, { "epoch": 0.24935660984902047, "grad_norm": 54.79731369018555, "learning_rate": 9.357096819831065e-06, "loss": 16.7066, "step": 123440 }, { "epoch": 0.2493768104817043, "grad_norm": 641.5457763671875, "learning_rate": 9.356925578877748e-06, "loss": 26.33, "step": 123450 }, { "epoch": 0.2493970111143881, "grad_norm": 90.40922546386719, "learning_rate": 9.3567543166893e-06, "loss": 26.7114, "step": 123460 }, { "epoch": 0.24941721174707193, "grad_norm": 316.8450927734375, "learning_rate": 9.356583033266565e-06, "loss": 25.1294, "step": 123470 }, { "epoch": 0.24943741237975572, "grad_norm": 373.0270690917969, "learning_rate": 9.356411728610368e-06, "loss": 21.8709, "step": 123480 }, { "epoch": 0.24945761301243954, "grad_norm": 164.14083862304688, "learning_rate": 9.356240402721552e-06, "loss": 24.8697, "step": 123490 }, { "epoch": 0.24947781364512336, "grad_norm": 12.21251106262207, "learning_rate": 9.356069055600949e-06, "loss": 20.3257, "step": 123500 }, { "epoch": 0.24949801427780718, "grad_norm": 435.3240661621094, "learning_rate": 9.35589768724939e-06, "loss": 24.8401, "step": 123510 }, { "epoch": 0.249518214910491, "grad_norm": 509.5332336425781, "learning_rate": 9.355726297667717e-06, "loss": 31.7652, "step": 123520 }, { "epoch": 0.24953841554317482, "grad_norm": 188.9264373779297, "learning_rate": 9.355554886856762e-06, "loss": 19.4416, "step": 123530 }, { "epoch": 0.24955861617585862, "grad_norm": 225.8197021484375, "learning_rate": 9.355383454817362e-06, "loss": 17.1683, "step": 123540 }, { "epoch": 0.24957881680854244, "grad_norm": 130.89111328125, "learning_rate": 9.355212001550349e-06, "loss": 33.8624, "step": 123550 }, { "epoch": 0.24959901744122626, "grad_norm": 163.8568878173828, "learning_rate": 9.35504052705656e-06, "loss": 22.8477, "step": 123560 }, { "epoch": 0.24961921807391008, "grad_norm": 338.4559020996094, "learning_rate": 9.354869031336835e-06, "loss": 22.3504, "step": 123570 }, { "epoch": 0.2496394187065939, "grad_norm": 162.35919189453125, "learning_rate": 9.354697514392005e-06, "loss": 35.0757, "step": 123580 }, { "epoch": 0.24965961933927772, "grad_norm": 259.9451904296875, "learning_rate": 9.354525976222907e-06, "loss": 23.1219, "step": 123590 }, { "epoch": 0.2496798199719615, "grad_norm": 125.93836212158203, "learning_rate": 9.354354416830377e-06, "loss": 29.6408, "step": 123600 }, { "epoch": 0.24970002060464533, "grad_norm": 236.37745666503906, "learning_rate": 9.354182836215252e-06, "loss": 19.7488, "step": 123610 }, { "epoch": 0.24972022123732915, "grad_norm": 290.1505432128906, "learning_rate": 9.35401123437837e-06, "loss": 27.0128, "step": 123620 }, { "epoch": 0.24974042187001297, "grad_norm": 0.0, "learning_rate": 9.353839611320563e-06, "loss": 30.1252, "step": 123630 }, { "epoch": 0.2497606225026968, "grad_norm": 513.1502685546875, "learning_rate": 9.35366796704267e-06, "loss": 61.6449, "step": 123640 }, { "epoch": 0.2497808231353806, "grad_norm": 307.8948974609375, "learning_rate": 9.353496301545529e-06, "loss": 34.7093, "step": 123650 }, { "epoch": 0.24980102376806443, "grad_norm": 631.41796875, "learning_rate": 9.353324614829974e-06, "loss": 29.1964, "step": 123660 }, { "epoch": 0.24982122440074822, "grad_norm": 329.22991943359375, "learning_rate": 9.353152906896842e-06, "loss": 9.7159, "step": 123670 }, { "epoch": 0.24984142503343204, "grad_norm": 672.3096923828125, "learning_rate": 9.352981177746972e-06, "loss": 32.4797, "step": 123680 }, { "epoch": 0.24986162566611586, "grad_norm": 343.5790710449219, "learning_rate": 9.3528094273812e-06, "loss": 15.785, "step": 123690 }, { "epoch": 0.24988182629879968, "grad_norm": 250.75917053222656, "learning_rate": 9.352637655800362e-06, "loss": 7.4175, "step": 123700 }, { "epoch": 0.2499020269314835, "grad_norm": 195.49691772460938, "learning_rate": 9.352465863005295e-06, "loss": 23.4509, "step": 123710 }, { "epoch": 0.24992222756416732, "grad_norm": 334.6227111816406, "learning_rate": 9.35229404899684e-06, "loss": 20.8433, "step": 123720 }, { "epoch": 0.2499424281968511, "grad_norm": 325.35418701171875, "learning_rate": 9.352122213775829e-06, "loss": 27.6399, "step": 123730 }, { "epoch": 0.24996262882953493, "grad_norm": 251.87184143066406, "learning_rate": 9.351950357343103e-06, "loss": 7.0904, "step": 123740 }, { "epoch": 0.24998282946221875, "grad_norm": 616.6466674804688, "learning_rate": 9.351778479699499e-06, "loss": 35.509, "step": 123750 }, { "epoch": 0.25000303009490255, "grad_norm": 230.53585815429688, "learning_rate": 9.351606580845854e-06, "loss": 16.0929, "step": 123760 }, { "epoch": 0.25002323072758637, "grad_norm": 369.78643798828125, "learning_rate": 9.351434660783007e-06, "loss": 18.3902, "step": 123770 }, { "epoch": 0.2500434313602702, "grad_norm": 173.87010192871094, "learning_rate": 9.351262719511796e-06, "loss": 11.7529, "step": 123780 }, { "epoch": 0.250063631992954, "grad_norm": 191.7705078125, "learning_rate": 9.351090757033056e-06, "loss": 21.3356, "step": 123790 }, { "epoch": 0.2500838326256378, "grad_norm": 181.66111755371094, "learning_rate": 9.35091877334763e-06, "loss": 28.725, "step": 123800 }, { "epoch": 0.25010403325832165, "grad_norm": 242.77456665039062, "learning_rate": 9.350746768456351e-06, "loss": 17.9965, "step": 123810 }, { "epoch": 0.25012423389100547, "grad_norm": 415.34228515625, "learning_rate": 9.350574742360062e-06, "loss": 21.5591, "step": 123820 }, { "epoch": 0.2501444345236893, "grad_norm": 413.66143798828125, "learning_rate": 9.350402695059597e-06, "loss": 27.6494, "step": 123830 }, { "epoch": 0.2501646351563731, "grad_norm": 437.8022766113281, "learning_rate": 9.3502306265558e-06, "loss": 35.0518, "step": 123840 }, { "epoch": 0.2501848357890569, "grad_norm": 200.77239990234375, "learning_rate": 9.350058536849505e-06, "loss": 14.55, "step": 123850 }, { "epoch": 0.25020503642174075, "grad_norm": 255.29359436035156, "learning_rate": 9.349886425941553e-06, "loss": 16.4505, "step": 123860 }, { "epoch": 0.25022523705442457, "grad_norm": 668.2625732421875, "learning_rate": 9.34971429383278e-06, "loss": 51.0935, "step": 123870 }, { "epoch": 0.2502454376871084, "grad_norm": 384.2834777832031, "learning_rate": 9.349542140524029e-06, "loss": 31.5052, "step": 123880 }, { "epoch": 0.25026563831979215, "grad_norm": 179.54202270507812, "learning_rate": 9.349369966016135e-06, "loss": 18.6849, "step": 123890 }, { "epoch": 0.25028583895247597, "grad_norm": 597.1260375976562, "learning_rate": 9.349197770309942e-06, "loss": 20.8699, "step": 123900 }, { "epoch": 0.2503060395851598, "grad_norm": 224.730712890625, "learning_rate": 9.349025553406286e-06, "loss": 16.6768, "step": 123910 }, { "epoch": 0.2503262402178436, "grad_norm": 364.6436462402344, "learning_rate": 9.348853315306006e-06, "loss": 16.8055, "step": 123920 }, { "epoch": 0.25034644085052743, "grad_norm": 214.30043029785156, "learning_rate": 9.348681056009942e-06, "loss": 19.484, "step": 123930 }, { "epoch": 0.25036664148321125, "grad_norm": 159.8651123046875, "learning_rate": 9.348508775518935e-06, "loss": 13.1827, "step": 123940 }, { "epoch": 0.25038684211589507, "grad_norm": 244.53436279296875, "learning_rate": 9.348336473833824e-06, "loss": 23.7708, "step": 123950 }, { "epoch": 0.2504070427485789, "grad_norm": 45.33531188964844, "learning_rate": 9.348164150955448e-06, "loss": 18.3475, "step": 123960 }, { "epoch": 0.2504272433812627, "grad_norm": 219.2253875732422, "learning_rate": 9.347991806884646e-06, "loss": 19.6727, "step": 123970 }, { "epoch": 0.25044744401394653, "grad_norm": 287.69287109375, "learning_rate": 9.347819441622261e-06, "loss": 9.165, "step": 123980 }, { "epoch": 0.25046764464663035, "grad_norm": 707.6580810546875, "learning_rate": 9.347647055169132e-06, "loss": 32.5039, "step": 123990 }, { "epoch": 0.25048784527931417, "grad_norm": 296.4914855957031, "learning_rate": 9.347474647526095e-06, "loss": 24.5315, "step": 124000 }, { "epoch": 0.250508045911998, "grad_norm": 0.0, "learning_rate": 9.347302218693997e-06, "loss": 20.1231, "step": 124010 }, { "epoch": 0.25052824654468175, "grad_norm": 3435.32177734375, "learning_rate": 9.347129768673675e-06, "loss": 40.0886, "step": 124020 }, { "epoch": 0.2505484471773656, "grad_norm": 285.192626953125, "learning_rate": 9.346957297465968e-06, "loss": 14.1437, "step": 124030 }, { "epoch": 0.2505686478100494, "grad_norm": 24.632963180541992, "learning_rate": 9.34678480507172e-06, "loss": 20.2209, "step": 124040 }, { "epoch": 0.2505888484427332, "grad_norm": 178.5182647705078, "learning_rate": 9.34661229149177e-06, "loss": 10.0709, "step": 124050 }, { "epoch": 0.25060904907541703, "grad_norm": 290.93780517578125, "learning_rate": 9.346439756726959e-06, "loss": 30.1309, "step": 124060 }, { "epoch": 0.25062924970810085, "grad_norm": 268.833251953125, "learning_rate": 9.346267200778127e-06, "loss": 25.3874, "step": 124070 }, { "epoch": 0.2506494503407847, "grad_norm": 247.85108947753906, "learning_rate": 9.346094623646116e-06, "loss": 34.4358, "step": 124080 }, { "epoch": 0.2506696509734685, "grad_norm": 358.12646484375, "learning_rate": 9.345922025331765e-06, "loss": 19.616, "step": 124090 }, { "epoch": 0.2506898516061523, "grad_norm": 429.3909606933594, "learning_rate": 9.34574940583592e-06, "loss": 41.686, "step": 124100 }, { "epoch": 0.25071005223883613, "grad_norm": 593.8930053710938, "learning_rate": 9.345576765159419e-06, "loss": 12.9103, "step": 124110 }, { "epoch": 0.25073025287151995, "grad_norm": 133.08767700195312, "learning_rate": 9.345404103303104e-06, "loss": 22.221, "step": 124120 }, { "epoch": 0.2507504535042038, "grad_norm": 178.56536865234375, "learning_rate": 9.345231420267816e-06, "loss": 28.2989, "step": 124130 }, { "epoch": 0.2507706541368876, "grad_norm": 190.33303833007812, "learning_rate": 9.345058716054396e-06, "loss": 25.6046, "step": 124140 }, { "epoch": 0.25079085476957136, "grad_norm": 281.7882995605469, "learning_rate": 9.344885990663689e-06, "loss": 16.1992, "step": 124150 }, { "epoch": 0.2508110554022552, "grad_norm": 132.10134887695312, "learning_rate": 9.344713244096533e-06, "loss": 28.9698, "step": 124160 }, { "epoch": 0.250831256034939, "grad_norm": 474.2137145996094, "learning_rate": 9.344540476353772e-06, "loss": 26.2226, "step": 124170 }, { "epoch": 0.2508514566676228, "grad_norm": 408.5567626953125, "learning_rate": 9.344367687436246e-06, "loss": 18.6953, "step": 124180 }, { "epoch": 0.25087165730030664, "grad_norm": 157.7684783935547, "learning_rate": 9.344194877344802e-06, "loss": 18.0561, "step": 124190 }, { "epoch": 0.25089185793299046, "grad_norm": 225.82508850097656, "learning_rate": 9.344022046080277e-06, "loss": 12.7785, "step": 124200 }, { "epoch": 0.2509120585656743, "grad_norm": 105.58959197998047, "learning_rate": 9.343849193643517e-06, "loss": 15.6806, "step": 124210 }, { "epoch": 0.2509322591983581, "grad_norm": 248.32456970214844, "learning_rate": 9.343676320035362e-06, "loss": 14.6341, "step": 124220 }, { "epoch": 0.2509524598310419, "grad_norm": 494.2822265625, "learning_rate": 9.343503425256655e-06, "loss": 29.2746, "step": 124230 }, { "epoch": 0.25097266046372574, "grad_norm": 70.2621841430664, "learning_rate": 9.343330509308239e-06, "loss": 16.7526, "step": 124240 }, { "epoch": 0.25099286109640956, "grad_norm": 84.9740982055664, "learning_rate": 9.343157572190957e-06, "loss": 18.4734, "step": 124250 }, { "epoch": 0.2510130617290934, "grad_norm": 239.07630920410156, "learning_rate": 9.342984613905653e-06, "loss": 21.9763, "step": 124260 }, { "epoch": 0.2510332623617772, "grad_norm": 201.55491638183594, "learning_rate": 9.342811634453168e-06, "loss": 12.0071, "step": 124270 }, { "epoch": 0.25105346299446096, "grad_norm": 237.51779174804688, "learning_rate": 9.342638633834344e-06, "loss": 16.2464, "step": 124280 }, { "epoch": 0.2510736636271448, "grad_norm": 302.1296081542969, "learning_rate": 9.342465612050028e-06, "loss": 18.1939, "step": 124290 }, { "epoch": 0.2510938642598286, "grad_norm": 783.841552734375, "learning_rate": 9.342292569101061e-06, "loss": 30.8615, "step": 124300 }, { "epoch": 0.2511140648925124, "grad_norm": 562.7561645507812, "learning_rate": 9.342119504988287e-06, "loss": 27.7818, "step": 124310 }, { "epoch": 0.25113426552519624, "grad_norm": 267.5962219238281, "learning_rate": 9.341946419712549e-06, "loss": 22.4537, "step": 124320 }, { "epoch": 0.25115446615788006, "grad_norm": 556.510498046875, "learning_rate": 9.341773313274689e-06, "loss": 13.7982, "step": 124330 }, { "epoch": 0.2511746667905639, "grad_norm": 456.75445556640625, "learning_rate": 9.341600185675555e-06, "loss": 26.6828, "step": 124340 }, { "epoch": 0.2511948674232477, "grad_norm": 175.25938415527344, "learning_rate": 9.341427036915987e-06, "loss": 28.2716, "step": 124350 }, { "epoch": 0.2512150680559315, "grad_norm": 299.0726013183594, "learning_rate": 9.34125386699683e-06, "loss": 19.2651, "step": 124360 }, { "epoch": 0.25123526868861534, "grad_norm": 360.4134826660156, "learning_rate": 9.341080675918927e-06, "loss": 15.0368, "step": 124370 }, { "epoch": 0.25125546932129916, "grad_norm": 270.75042724609375, "learning_rate": 9.340907463683126e-06, "loss": 29.4371, "step": 124380 }, { "epoch": 0.251275669953983, "grad_norm": 340.90972900390625, "learning_rate": 9.340734230290267e-06, "loss": 18.6235, "step": 124390 }, { "epoch": 0.25129587058666675, "grad_norm": 371.0257873535156, "learning_rate": 9.340560975741198e-06, "loss": 25.8623, "step": 124400 }, { "epoch": 0.25131607121935057, "grad_norm": 75.68794250488281, "learning_rate": 9.340387700036758e-06, "loss": 23.008, "step": 124410 }, { "epoch": 0.2513362718520344, "grad_norm": 350.9789733886719, "learning_rate": 9.340214403177797e-06, "loss": 20.2839, "step": 124420 }, { "epoch": 0.2513564724847182, "grad_norm": 182.20391845703125, "learning_rate": 9.340041085165157e-06, "loss": 20.1371, "step": 124430 }, { "epoch": 0.251376673117402, "grad_norm": 279.5919494628906, "learning_rate": 9.339867745999682e-06, "loss": 19.0888, "step": 124440 }, { "epoch": 0.25139687375008585, "grad_norm": 383.01837158203125, "learning_rate": 9.339694385682219e-06, "loss": 12.1423, "step": 124450 }, { "epoch": 0.25141707438276967, "grad_norm": 317.1790466308594, "learning_rate": 9.339521004213611e-06, "loss": 19.7499, "step": 124460 }, { "epoch": 0.2514372750154535, "grad_norm": 229.35076904296875, "learning_rate": 9.339347601594704e-06, "loss": 23.5814, "step": 124470 }, { "epoch": 0.2514574756481373, "grad_norm": 480.51361083984375, "learning_rate": 9.339174177826345e-06, "loss": 16.0234, "step": 124480 }, { "epoch": 0.2514776762808211, "grad_norm": 443.734619140625, "learning_rate": 9.339000732909376e-06, "loss": 17.2467, "step": 124490 }, { "epoch": 0.25149787691350495, "grad_norm": 198.60899353027344, "learning_rate": 9.338827266844643e-06, "loss": 24.2906, "step": 124500 }, { "epoch": 0.25151807754618877, "grad_norm": 255.68441772460938, "learning_rate": 9.338653779632993e-06, "loss": 18.0378, "step": 124510 }, { "epoch": 0.2515382781788726, "grad_norm": 638.0189208984375, "learning_rate": 9.33848027127527e-06, "loss": 28.8514, "step": 124520 }, { "epoch": 0.25155847881155635, "grad_norm": 290.4230651855469, "learning_rate": 9.33830674177232e-06, "loss": 14.4071, "step": 124530 }, { "epoch": 0.25157867944424017, "grad_norm": 232.5870361328125, "learning_rate": 9.338133191124992e-06, "loss": 52.2937, "step": 124540 }, { "epoch": 0.251598880076924, "grad_norm": 0.0, "learning_rate": 9.337959619334125e-06, "loss": 20.8495, "step": 124550 }, { "epoch": 0.2516190807096078, "grad_norm": 435.4974670410156, "learning_rate": 9.337786026400573e-06, "loss": 21.0717, "step": 124560 }, { "epoch": 0.25163928134229163, "grad_norm": 92.65298461914062, "learning_rate": 9.337612412325174e-06, "loss": 27.3026, "step": 124570 }, { "epoch": 0.25165948197497545, "grad_norm": 447.2137756347656, "learning_rate": 9.33743877710878e-06, "loss": 20.9607, "step": 124580 }, { "epoch": 0.25167968260765927, "grad_norm": 365.7508239746094, "learning_rate": 9.337265120752236e-06, "loss": 21.5499, "step": 124590 }, { "epoch": 0.2516998832403431, "grad_norm": 234.18800354003906, "learning_rate": 9.337091443256388e-06, "loss": 13.2347, "step": 124600 }, { "epoch": 0.2517200838730269, "grad_norm": 367.9451904296875, "learning_rate": 9.336917744622083e-06, "loss": 25.392, "step": 124610 }, { "epoch": 0.25174028450571073, "grad_norm": 128.454833984375, "learning_rate": 9.336744024850165e-06, "loss": 25.4457, "step": 124620 }, { "epoch": 0.25176048513839455, "grad_norm": 53.5908203125, "learning_rate": 9.336570283941483e-06, "loss": 11.9431, "step": 124630 }, { "epoch": 0.25178068577107837, "grad_norm": 136.16563415527344, "learning_rate": 9.336396521896884e-06, "loss": 17.966, "step": 124640 }, { "epoch": 0.2518008864037622, "grad_norm": 573.5731201171875, "learning_rate": 9.336222738717214e-06, "loss": 21.4341, "step": 124650 }, { "epoch": 0.25182108703644596, "grad_norm": 455.7706604003906, "learning_rate": 9.33604893440332e-06, "loss": 22.2333, "step": 124660 }, { "epoch": 0.2518412876691298, "grad_norm": 266.3859558105469, "learning_rate": 9.33587510895605e-06, "loss": 28.805, "step": 124670 }, { "epoch": 0.2518614883018136, "grad_norm": 294.3205261230469, "learning_rate": 9.335701262376249e-06, "loss": 38.9596, "step": 124680 }, { "epoch": 0.2518816889344974, "grad_norm": 198.85488891601562, "learning_rate": 9.335527394664767e-06, "loss": 30.3623, "step": 124690 }, { "epoch": 0.25190188956718124, "grad_norm": 41.94761657714844, "learning_rate": 9.33535350582245e-06, "loss": 10.7231, "step": 124700 }, { "epoch": 0.25192209019986506, "grad_norm": 461.37457275390625, "learning_rate": 9.335179595850147e-06, "loss": 34.2405, "step": 124710 }, { "epoch": 0.2519422908325489, "grad_norm": 102.87339782714844, "learning_rate": 9.335005664748704e-06, "loss": 14.3101, "step": 124720 }, { "epoch": 0.2519624914652327, "grad_norm": 285.0992126464844, "learning_rate": 9.33483171251897e-06, "loss": 19.6407, "step": 124730 }, { "epoch": 0.2519826920979165, "grad_norm": 193.82872009277344, "learning_rate": 9.33465773916179e-06, "loss": 14.6809, "step": 124740 }, { "epoch": 0.25200289273060034, "grad_norm": 241.91380310058594, "learning_rate": 9.334483744678015e-06, "loss": 13.8757, "step": 124750 }, { "epoch": 0.25202309336328416, "grad_norm": 213.15020751953125, "learning_rate": 9.33430972906849e-06, "loss": 19.0529, "step": 124760 }, { "epoch": 0.252043293995968, "grad_norm": 625.1232299804688, "learning_rate": 9.334135692334067e-06, "loss": 29.7594, "step": 124770 }, { "epoch": 0.2520634946286518, "grad_norm": 682.9527587890625, "learning_rate": 9.333961634475593e-06, "loss": 29.5846, "step": 124780 }, { "epoch": 0.25208369526133556, "grad_norm": 543.7948608398438, "learning_rate": 9.333787555493915e-06, "loss": 20.8636, "step": 124790 }, { "epoch": 0.2521038958940194, "grad_norm": 319.0923767089844, "learning_rate": 9.333613455389883e-06, "loss": 13.624, "step": 124800 }, { "epoch": 0.2521240965267032, "grad_norm": 330.8236389160156, "learning_rate": 9.333439334164343e-06, "loss": 16.0879, "step": 124810 }, { "epoch": 0.252144297159387, "grad_norm": 435.4040832519531, "learning_rate": 9.333265191818145e-06, "loss": 27.631, "step": 124820 }, { "epoch": 0.25216449779207084, "grad_norm": 119.6236801147461, "learning_rate": 9.333091028352138e-06, "loss": 22.3374, "step": 124830 }, { "epoch": 0.25218469842475466, "grad_norm": 547.5093383789062, "learning_rate": 9.332916843767173e-06, "loss": 23.8678, "step": 124840 }, { "epoch": 0.2522048990574385, "grad_norm": 481.7539978027344, "learning_rate": 9.332742638064094e-06, "loss": 27.8122, "step": 124850 }, { "epoch": 0.2522250996901223, "grad_norm": 143.02764892578125, "learning_rate": 9.332568411243754e-06, "loss": 20.6971, "step": 124860 }, { "epoch": 0.2522453003228061, "grad_norm": 345.7292785644531, "learning_rate": 9.332394163307003e-06, "loss": 28.7941, "step": 124870 }, { "epoch": 0.25226550095548994, "grad_norm": 99.85334014892578, "learning_rate": 9.332219894254686e-06, "loss": 18.363, "step": 124880 }, { "epoch": 0.25228570158817376, "grad_norm": 112.85008239746094, "learning_rate": 9.332045604087656e-06, "loss": 20.632, "step": 124890 }, { "epoch": 0.2523059022208576, "grad_norm": 35.610538482666016, "learning_rate": 9.33187129280676e-06, "loss": 11.4066, "step": 124900 }, { "epoch": 0.25232610285354135, "grad_norm": 3.574336528778076, "learning_rate": 9.331696960412849e-06, "loss": 17.2343, "step": 124910 }, { "epoch": 0.25234630348622517, "grad_norm": 254.26499938964844, "learning_rate": 9.331522606906773e-06, "loss": 15.6364, "step": 124920 }, { "epoch": 0.252366504118909, "grad_norm": 50.86741638183594, "learning_rate": 9.331348232289382e-06, "loss": 15.2069, "step": 124930 }, { "epoch": 0.2523867047515928, "grad_norm": 395.1269226074219, "learning_rate": 9.331173836561522e-06, "loss": 50.2333, "step": 124940 }, { "epoch": 0.2524069053842766, "grad_norm": 354.30511474609375, "learning_rate": 9.330999419724048e-06, "loss": 32.1957, "step": 124950 }, { "epoch": 0.25242710601696045, "grad_norm": 475.2186584472656, "learning_rate": 9.330824981777808e-06, "loss": 20.7626, "step": 124960 }, { "epoch": 0.25244730664964427, "grad_norm": 270.8267822265625, "learning_rate": 9.330650522723653e-06, "loss": 26.7124, "step": 124970 }, { "epoch": 0.2524675072823281, "grad_norm": 494.4764099121094, "learning_rate": 9.33047604256243e-06, "loss": 32.1041, "step": 124980 }, { "epoch": 0.2524877079150119, "grad_norm": 240.25283813476562, "learning_rate": 9.330301541294994e-06, "loss": 33.3145, "step": 124990 }, { "epoch": 0.2525079085476957, "grad_norm": 229.66317749023438, "learning_rate": 9.330127018922195e-06, "loss": 16.9772, "step": 125000 }, { "epoch": 0.25252810918037955, "grad_norm": 213.88954162597656, "learning_rate": 9.32995247544488e-06, "loss": 24.5529, "step": 125010 }, { "epoch": 0.25254830981306337, "grad_norm": 232.55686950683594, "learning_rate": 9.329777910863902e-06, "loss": 12.5827, "step": 125020 }, { "epoch": 0.2525685104457472, "grad_norm": 255.99456787109375, "learning_rate": 9.32960332518011e-06, "loss": 22.7976, "step": 125030 }, { "epoch": 0.25258871107843095, "grad_norm": 94.27106475830078, "learning_rate": 9.32942871839436e-06, "loss": 13.7238, "step": 125040 }, { "epoch": 0.25260891171111477, "grad_norm": 621.2156372070312, "learning_rate": 9.329254090507498e-06, "loss": 16.8065, "step": 125050 }, { "epoch": 0.2526291123437986, "grad_norm": 377.02862548828125, "learning_rate": 9.329079441520377e-06, "loss": 25.6329, "step": 125060 }, { "epoch": 0.2526493129764824, "grad_norm": 228.85670471191406, "learning_rate": 9.328904771433846e-06, "loss": 46.8932, "step": 125070 }, { "epoch": 0.25266951360916623, "grad_norm": 46.960533142089844, "learning_rate": 9.32873008024876e-06, "loss": 15.0957, "step": 125080 }, { "epoch": 0.25268971424185005, "grad_norm": 117.58808898925781, "learning_rate": 9.328555367965969e-06, "loss": 15.3491, "step": 125090 }, { "epoch": 0.25270991487453387, "grad_norm": 461.1294860839844, "learning_rate": 9.328380634586322e-06, "loss": 47.3119, "step": 125100 }, { "epoch": 0.2527301155072177, "grad_norm": 472.2579040527344, "learning_rate": 9.328205880110675e-06, "loss": 26.5487, "step": 125110 }, { "epoch": 0.2527503161399015, "grad_norm": 229.46180725097656, "learning_rate": 9.328031104539876e-06, "loss": 29.1069, "step": 125120 }, { "epoch": 0.25277051677258533, "grad_norm": 4997.09716796875, "learning_rate": 9.32785630787478e-06, "loss": 29.8614, "step": 125130 }, { "epoch": 0.25279071740526915, "grad_norm": 1494.757568359375, "learning_rate": 9.327681490116233e-06, "loss": 31.6342, "step": 125140 }, { "epoch": 0.25281091803795297, "grad_norm": 522.27197265625, "learning_rate": 9.327506651265096e-06, "loss": 19.4945, "step": 125150 }, { "epoch": 0.2528311186706368, "grad_norm": 195.45606994628906, "learning_rate": 9.327331791322214e-06, "loss": 32.5437, "step": 125160 }, { "epoch": 0.25285131930332055, "grad_norm": 391.0869445800781, "learning_rate": 9.327156910288444e-06, "loss": 16.7898, "step": 125170 }, { "epoch": 0.2528715199360044, "grad_norm": 328.578369140625, "learning_rate": 9.326982008164633e-06, "loss": 10.8271, "step": 125180 }, { "epoch": 0.2528917205686882, "grad_norm": 492.76580810546875, "learning_rate": 9.326807084951639e-06, "loss": 25.2929, "step": 125190 }, { "epoch": 0.252911921201372, "grad_norm": 217.06048583984375, "learning_rate": 9.326632140650311e-06, "loss": 15.012, "step": 125200 }, { "epoch": 0.25293212183405583, "grad_norm": 170.3761749267578, "learning_rate": 9.326457175261503e-06, "loss": 37.3026, "step": 125210 }, { "epoch": 0.25295232246673965, "grad_norm": 490.0623779296875, "learning_rate": 9.326282188786066e-06, "loss": 30.7842, "step": 125220 }, { "epoch": 0.2529725230994235, "grad_norm": 185.15028381347656, "learning_rate": 9.326107181224857e-06, "loss": 31.3066, "step": 125230 }, { "epoch": 0.2529927237321073, "grad_norm": 286.67620849609375, "learning_rate": 9.325932152578726e-06, "loss": 32.0695, "step": 125240 }, { "epoch": 0.2530129243647911, "grad_norm": 201.3325958251953, "learning_rate": 9.325757102848523e-06, "loss": 26.4767, "step": 125250 }, { "epoch": 0.25303312499747493, "grad_norm": 199.63897705078125, "learning_rate": 9.325582032035108e-06, "loss": 19.9069, "step": 125260 }, { "epoch": 0.25305332563015875, "grad_norm": 187.22113037109375, "learning_rate": 9.325406940139327e-06, "loss": 19.9593, "step": 125270 }, { "epoch": 0.2530735262628426, "grad_norm": 461.49468994140625, "learning_rate": 9.32523182716204e-06, "loss": 22.8387, "step": 125280 }, { "epoch": 0.2530937268955264, "grad_norm": 159.59201049804688, "learning_rate": 9.325056693104099e-06, "loss": 26.8636, "step": 125290 }, { "epoch": 0.25311392752821016, "grad_norm": 578.2651977539062, "learning_rate": 9.324881537966355e-06, "loss": 18.7227, "step": 125300 }, { "epoch": 0.253134128160894, "grad_norm": 255.2460479736328, "learning_rate": 9.324706361749663e-06, "loss": 30.0312, "step": 125310 }, { "epoch": 0.2531543287935778, "grad_norm": 287.5524597167969, "learning_rate": 9.324531164454876e-06, "loss": 28.6109, "step": 125320 }, { "epoch": 0.2531745294262616, "grad_norm": 265.4790954589844, "learning_rate": 9.324355946082848e-06, "loss": 26.3125, "step": 125330 }, { "epoch": 0.25319473005894544, "grad_norm": 671.5846557617188, "learning_rate": 9.324180706634434e-06, "loss": 29.5127, "step": 125340 }, { "epoch": 0.25321493069162926, "grad_norm": 230.92584228515625, "learning_rate": 9.32400544611049e-06, "loss": 32.8245, "step": 125350 }, { "epoch": 0.2532351313243131, "grad_norm": 468.289794921875, "learning_rate": 9.323830164511865e-06, "loss": 31.5627, "step": 125360 }, { "epoch": 0.2532553319569969, "grad_norm": 346.8731384277344, "learning_rate": 9.323654861839418e-06, "loss": 15.6439, "step": 125370 }, { "epoch": 0.2532755325896807, "grad_norm": 337.8139953613281, "learning_rate": 9.323479538094001e-06, "loss": 25.2633, "step": 125380 }, { "epoch": 0.25329573322236454, "grad_norm": 178.82675170898438, "learning_rate": 9.323304193276468e-06, "loss": 14.5535, "step": 125390 }, { "epoch": 0.25331593385504836, "grad_norm": 189.02520751953125, "learning_rate": 9.323128827387675e-06, "loss": 28.925, "step": 125400 }, { "epoch": 0.2533361344877322, "grad_norm": 144.18157958984375, "learning_rate": 9.322953440428478e-06, "loss": 22.1393, "step": 125410 }, { "epoch": 0.253356335120416, "grad_norm": 130.12078857421875, "learning_rate": 9.322778032399728e-06, "loss": 9.6073, "step": 125420 }, { "epoch": 0.25337653575309976, "grad_norm": 81.38065338134766, "learning_rate": 9.322602603302285e-06, "loss": 17.1809, "step": 125430 }, { "epoch": 0.2533967363857836, "grad_norm": 97.88333892822266, "learning_rate": 9.322427153136999e-06, "loss": 14.1331, "step": 125440 }, { "epoch": 0.2534169370184674, "grad_norm": 273.32757568359375, "learning_rate": 9.322251681904728e-06, "loss": 16.5203, "step": 125450 }, { "epoch": 0.2534371376511512, "grad_norm": 301.1682434082031, "learning_rate": 9.322076189606326e-06, "loss": 23.1215, "step": 125460 }, { "epoch": 0.25345733828383504, "grad_norm": 100.85980224609375, "learning_rate": 9.32190067624265e-06, "loss": 25.5154, "step": 125470 }, { "epoch": 0.25347753891651886, "grad_norm": 542.1154174804688, "learning_rate": 9.321725141814553e-06, "loss": 26.0731, "step": 125480 }, { "epoch": 0.2534977395492027, "grad_norm": 199.27947998046875, "learning_rate": 9.321549586322894e-06, "loss": 31.1303, "step": 125490 }, { "epoch": 0.2535179401818865, "grad_norm": 262.4306945800781, "learning_rate": 9.321374009768525e-06, "loss": 9.1734, "step": 125500 }, { "epoch": 0.2535381408145703, "grad_norm": 289.0489501953125, "learning_rate": 9.321198412152303e-06, "loss": 15.0016, "step": 125510 }, { "epoch": 0.25355834144725414, "grad_norm": 143.01190185546875, "learning_rate": 9.321022793475082e-06, "loss": 21.2879, "step": 125520 }, { "epoch": 0.25357854207993796, "grad_norm": 460.72564697265625, "learning_rate": 9.320847153737724e-06, "loss": 20.2396, "step": 125530 }, { "epoch": 0.2535987427126218, "grad_norm": 1724.53173828125, "learning_rate": 9.320671492941079e-06, "loss": 90.0066, "step": 125540 }, { "epoch": 0.25361894334530555, "grad_norm": 107.61517333984375, "learning_rate": 9.320495811086005e-06, "loss": 29.6225, "step": 125550 }, { "epoch": 0.25363914397798937, "grad_norm": 113.27887725830078, "learning_rate": 9.320320108173359e-06, "loss": 12.2463, "step": 125560 }, { "epoch": 0.2536593446106732, "grad_norm": 21.763946533203125, "learning_rate": 9.320144384203997e-06, "loss": 16.5401, "step": 125570 }, { "epoch": 0.253679545243357, "grad_norm": 16.3815860748291, "learning_rate": 9.319968639178775e-06, "loss": 22.0476, "step": 125580 }, { "epoch": 0.2536997458760408, "grad_norm": 398.5463562011719, "learning_rate": 9.31979287309855e-06, "loss": 38.4361, "step": 125590 }, { "epoch": 0.25371994650872465, "grad_norm": 462.78643798828125, "learning_rate": 9.319617085964177e-06, "loss": 29.8813, "step": 125600 }, { "epoch": 0.25374014714140847, "grad_norm": 261.16448974609375, "learning_rate": 9.319441277776515e-06, "loss": 17.358, "step": 125610 }, { "epoch": 0.2537603477740923, "grad_norm": 342.7757568359375, "learning_rate": 9.31926544853642e-06, "loss": 23.6639, "step": 125620 }, { "epoch": 0.2537805484067761, "grad_norm": 190.1713104248047, "learning_rate": 9.319089598244751e-06, "loss": 19.1352, "step": 125630 }, { "epoch": 0.2538007490394599, "grad_norm": 294.1415710449219, "learning_rate": 9.318913726902361e-06, "loss": 11.6312, "step": 125640 }, { "epoch": 0.25382094967214375, "grad_norm": 271.00311279296875, "learning_rate": 9.31873783451011e-06, "loss": 25.0102, "step": 125650 }, { "epoch": 0.25384115030482757, "grad_norm": 141.09161376953125, "learning_rate": 9.318561921068856e-06, "loss": 22.6999, "step": 125660 }, { "epoch": 0.2538613509375114, "grad_norm": 381.7094421386719, "learning_rate": 9.318385986579453e-06, "loss": 28.2041, "step": 125670 }, { "epoch": 0.25388155157019515, "grad_norm": 181.6260986328125, "learning_rate": 9.31821003104276e-06, "loss": 14.2721, "step": 125680 }, { "epoch": 0.25390175220287897, "grad_norm": 252.18710327148438, "learning_rate": 9.318034054459637e-06, "loss": 20.9568, "step": 125690 }, { "epoch": 0.2539219528355628, "grad_norm": 187.28689575195312, "learning_rate": 9.317858056830938e-06, "loss": 24.6076, "step": 125700 }, { "epoch": 0.2539421534682466, "grad_norm": 386.4512634277344, "learning_rate": 9.317682038157523e-06, "loss": 25.9638, "step": 125710 }, { "epoch": 0.25396235410093043, "grad_norm": 440.57318115234375, "learning_rate": 9.31750599844025e-06, "loss": 40.7942, "step": 125720 }, { "epoch": 0.25398255473361425, "grad_norm": 188.8805389404297, "learning_rate": 9.317329937679976e-06, "loss": 14.4673, "step": 125730 }, { "epoch": 0.25400275536629807, "grad_norm": 434.0238037109375, "learning_rate": 9.31715385587756e-06, "loss": 20.5852, "step": 125740 }, { "epoch": 0.2540229559989819, "grad_norm": 0.0, "learning_rate": 9.316977753033858e-06, "loss": 26.7747, "step": 125750 }, { "epoch": 0.2540431566316657, "grad_norm": 288.9519958496094, "learning_rate": 9.316801629149732e-06, "loss": 23.3386, "step": 125760 }, { "epoch": 0.25406335726434953, "grad_norm": 247.3369140625, "learning_rate": 9.316625484226039e-06, "loss": 19.8987, "step": 125770 }, { "epoch": 0.25408355789703335, "grad_norm": 310.08734130859375, "learning_rate": 9.316449318263635e-06, "loss": 11.6422, "step": 125780 }, { "epoch": 0.25410375852971717, "grad_norm": 343.566162109375, "learning_rate": 9.316273131263382e-06, "loss": 28.2791, "step": 125790 }, { "epoch": 0.254123959162401, "grad_norm": 242.37730407714844, "learning_rate": 9.316096923226135e-06, "loss": 17.7466, "step": 125800 }, { "epoch": 0.25414415979508476, "grad_norm": 320.95953369140625, "learning_rate": 9.315920694152758e-06, "loss": 37.231, "step": 125810 }, { "epoch": 0.2541643604277686, "grad_norm": 226.88107299804688, "learning_rate": 9.315744444044105e-06, "loss": 27.4542, "step": 125820 }, { "epoch": 0.2541845610604524, "grad_norm": 307.2884826660156, "learning_rate": 9.315568172901038e-06, "loss": 34.3531, "step": 125830 }, { "epoch": 0.2542047616931362, "grad_norm": 131.5980682373047, "learning_rate": 9.315391880724414e-06, "loss": 29.8471, "step": 125840 }, { "epoch": 0.25422496232582004, "grad_norm": 205.1999053955078, "learning_rate": 9.315215567515095e-06, "loss": 11.8221, "step": 125850 }, { "epoch": 0.25424516295850386, "grad_norm": 230.94229125976562, "learning_rate": 9.315039233273937e-06, "loss": 28.4515, "step": 125860 }, { "epoch": 0.2542653635911877, "grad_norm": 422.6981506347656, "learning_rate": 9.314862878001802e-06, "loss": 22.4447, "step": 125870 }, { "epoch": 0.2542855642238715, "grad_norm": 16.488677978515625, "learning_rate": 9.314686501699548e-06, "loss": 27.791, "step": 125880 }, { "epoch": 0.2543057648565553, "grad_norm": 583.900390625, "learning_rate": 9.314510104368036e-06, "loss": 34.1691, "step": 125890 }, { "epoch": 0.25432596548923914, "grad_norm": 327.4513854980469, "learning_rate": 9.314333686008125e-06, "loss": 8.2879, "step": 125900 }, { "epoch": 0.25434616612192296, "grad_norm": 219.04054260253906, "learning_rate": 9.314157246620677e-06, "loss": 27.4592, "step": 125910 }, { "epoch": 0.2543663667546068, "grad_norm": 361.10211181640625, "learning_rate": 9.313980786206547e-06, "loss": 14.5885, "step": 125920 }, { "epoch": 0.2543865673872906, "grad_norm": 375.0379638671875, "learning_rate": 9.313804304766598e-06, "loss": 24.6759, "step": 125930 }, { "epoch": 0.25440676801997436, "grad_norm": 354.3824768066406, "learning_rate": 9.31362780230169e-06, "loss": 23.1383, "step": 125940 }, { "epoch": 0.2544269686526582, "grad_norm": 52.6887321472168, "learning_rate": 9.313451278812684e-06, "loss": 23.3823, "step": 125950 }, { "epoch": 0.254447169285342, "grad_norm": 241.7565155029297, "learning_rate": 9.31327473430044e-06, "loss": 14.7425, "step": 125960 }, { "epoch": 0.2544673699180258, "grad_norm": 199.9589385986328, "learning_rate": 9.313098168765818e-06, "loss": 17.7279, "step": 125970 }, { "epoch": 0.25448757055070964, "grad_norm": 10.749573707580566, "learning_rate": 9.312921582209678e-06, "loss": 22.7178, "step": 125980 }, { "epoch": 0.25450777118339346, "grad_norm": 297.8114318847656, "learning_rate": 9.31274497463288e-06, "loss": 17.81, "step": 125990 }, { "epoch": 0.2545279718160773, "grad_norm": 740.4017333984375, "learning_rate": 9.312568346036288e-06, "loss": 37.8648, "step": 126000 }, { "epoch": 0.2545481724487611, "grad_norm": 207.96878051757812, "learning_rate": 9.31239169642076e-06, "loss": 18.1566, "step": 126010 }, { "epoch": 0.2545683730814449, "grad_norm": 435.4790344238281, "learning_rate": 9.312215025787159e-06, "loss": 28.2671, "step": 126020 }, { "epoch": 0.25458857371412874, "grad_norm": 107.63961791992188, "learning_rate": 9.312038334136345e-06, "loss": 11.311, "step": 126030 }, { "epoch": 0.25460877434681256, "grad_norm": 210.0865478515625, "learning_rate": 9.311861621469178e-06, "loss": 14.7019, "step": 126040 }, { "epoch": 0.2546289749794964, "grad_norm": 354.4222106933594, "learning_rate": 9.31168488778652e-06, "loss": 16.2684, "step": 126050 }, { "epoch": 0.2546491756121802, "grad_norm": 331.6523132324219, "learning_rate": 9.311508133089234e-06, "loss": 18.4229, "step": 126060 }, { "epoch": 0.25466937624486397, "grad_norm": 370.1590270996094, "learning_rate": 9.311331357378181e-06, "loss": 30.8924, "step": 126070 }, { "epoch": 0.2546895768775478, "grad_norm": 329.59100341796875, "learning_rate": 9.31115456065422e-06, "loss": 13.7175, "step": 126080 }, { "epoch": 0.2547097775102316, "grad_norm": 119.8864974975586, "learning_rate": 9.310977742918215e-06, "loss": 25.3624, "step": 126090 }, { "epoch": 0.2547299781429154, "grad_norm": 223.53639221191406, "learning_rate": 9.31080090417103e-06, "loss": 18.1706, "step": 126100 }, { "epoch": 0.25475017877559925, "grad_norm": 352.60400390625, "learning_rate": 9.310624044413521e-06, "loss": 19.2033, "step": 126110 }, { "epoch": 0.25477037940828307, "grad_norm": 154.44265747070312, "learning_rate": 9.310447163646554e-06, "loss": 25.538, "step": 126120 }, { "epoch": 0.2547905800409669, "grad_norm": 264.8034362792969, "learning_rate": 9.31027026187099e-06, "loss": 17.9322, "step": 126130 }, { "epoch": 0.2548107806736507, "grad_norm": 39.84322738647461, "learning_rate": 9.31009333908769e-06, "loss": 19.6244, "step": 126140 }, { "epoch": 0.2548309813063345, "grad_norm": 223.84596252441406, "learning_rate": 9.309916395297523e-06, "loss": 23.4529, "step": 126150 }, { "epoch": 0.25485118193901835, "grad_norm": 535.3505859375, "learning_rate": 9.309739430501341e-06, "loss": 20.1318, "step": 126160 }, { "epoch": 0.25487138257170217, "grad_norm": 301.2459411621094, "learning_rate": 9.309562444700016e-06, "loss": 16.6626, "step": 126170 }, { "epoch": 0.254891583204386, "grad_norm": 52.512786865234375, "learning_rate": 9.309385437894402e-06, "loss": 12.5871, "step": 126180 }, { "epoch": 0.25491178383706975, "grad_norm": 188.44049072265625, "learning_rate": 9.309208410085368e-06, "loss": 28.2221, "step": 126190 }, { "epoch": 0.25493198446975357, "grad_norm": 467.5872802734375, "learning_rate": 9.309031361273775e-06, "loss": 24.3183, "step": 126200 }, { "epoch": 0.2549521851024374, "grad_norm": 211.31349182128906, "learning_rate": 9.308854291460487e-06, "loss": 17.392, "step": 126210 }, { "epoch": 0.2549723857351212, "grad_norm": 498.9366455078125, "learning_rate": 9.308677200646364e-06, "loss": 26.5786, "step": 126220 }, { "epoch": 0.25499258636780503, "grad_norm": 313.9642333984375, "learning_rate": 9.308500088832271e-06, "loss": 31.6491, "step": 126230 }, { "epoch": 0.25501278700048885, "grad_norm": 63.362548828125, "learning_rate": 9.308322956019073e-06, "loss": 14.7855, "step": 126240 }, { "epoch": 0.25503298763317267, "grad_norm": 224.39247131347656, "learning_rate": 9.30814580220763e-06, "loss": 21.3469, "step": 126250 }, { "epoch": 0.2550531882658565, "grad_norm": 46.55403518676758, "learning_rate": 9.307968627398807e-06, "loss": 15.9684, "step": 126260 }, { "epoch": 0.2550733888985403, "grad_norm": 140.37899780273438, "learning_rate": 9.307791431593468e-06, "loss": 33.4488, "step": 126270 }, { "epoch": 0.25509358953122413, "grad_norm": 357.38299560546875, "learning_rate": 9.307614214792474e-06, "loss": 21.8978, "step": 126280 }, { "epoch": 0.25511379016390795, "grad_norm": 374.3791809082031, "learning_rate": 9.307436976996692e-06, "loss": 24.2551, "step": 126290 }, { "epoch": 0.25513399079659177, "grad_norm": 518.65380859375, "learning_rate": 9.307259718206984e-06, "loss": 16.9555, "step": 126300 }, { "epoch": 0.2551541914292756, "grad_norm": 282.0771179199219, "learning_rate": 9.307082438424216e-06, "loss": 23.3983, "step": 126310 }, { "epoch": 0.25517439206195935, "grad_norm": 104.3943862915039, "learning_rate": 9.30690513764925e-06, "loss": 15.1709, "step": 126320 }, { "epoch": 0.2551945926946432, "grad_norm": 298.73016357421875, "learning_rate": 9.30672781588295e-06, "loss": 35.8566, "step": 126330 }, { "epoch": 0.255214793327327, "grad_norm": 280.745849609375, "learning_rate": 9.306550473126182e-06, "loss": 16.2384, "step": 126340 }, { "epoch": 0.2552349939600108, "grad_norm": 223.52572631835938, "learning_rate": 9.30637310937981e-06, "loss": 11.3731, "step": 126350 }, { "epoch": 0.25525519459269463, "grad_norm": 56.45671844482422, "learning_rate": 9.306195724644695e-06, "loss": 15.8941, "step": 126360 }, { "epoch": 0.25527539522537845, "grad_norm": 64.4088363647461, "learning_rate": 9.306018318921707e-06, "loss": 26.7766, "step": 126370 }, { "epoch": 0.2552955958580623, "grad_norm": 112.83087921142578, "learning_rate": 9.305840892211705e-06, "loss": 32.7322, "step": 126380 }, { "epoch": 0.2553157964907461, "grad_norm": 324.2310485839844, "learning_rate": 9.30566344451556e-06, "loss": 30.024, "step": 126390 }, { "epoch": 0.2553359971234299, "grad_norm": 284.4088439941406, "learning_rate": 9.305485975834132e-06, "loss": 24.54, "step": 126400 }, { "epoch": 0.25535619775611373, "grad_norm": 257.8036804199219, "learning_rate": 9.305308486168288e-06, "loss": 24.606, "step": 126410 }, { "epoch": 0.25537639838879755, "grad_norm": 5.635014533996582, "learning_rate": 9.305130975518893e-06, "loss": 11.7667, "step": 126420 }, { "epoch": 0.2553965990214814, "grad_norm": 408.3467102050781, "learning_rate": 9.304953443886811e-06, "loss": 40.8317, "step": 126430 }, { "epoch": 0.2554167996541652, "grad_norm": 232.42335510253906, "learning_rate": 9.304775891272908e-06, "loss": 26.212, "step": 126440 }, { "epoch": 0.25543700028684896, "grad_norm": 286.3448486328125, "learning_rate": 9.30459831767805e-06, "loss": 24.4721, "step": 126450 }, { "epoch": 0.2554572009195328, "grad_norm": 122.07353973388672, "learning_rate": 9.304420723103101e-06, "loss": 16.2988, "step": 126460 }, { "epoch": 0.2554774015522166, "grad_norm": 95.06617736816406, "learning_rate": 9.304243107548928e-06, "loss": 31.1577, "step": 126470 }, { "epoch": 0.2554976021849004, "grad_norm": 213.85702514648438, "learning_rate": 9.304065471016396e-06, "loss": 23.6866, "step": 126480 }, { "epoch": 0.25551780281758424, "grad_norm": 305.0613098144531, "learning_rate": 9.303887813506372e-06, "loss": 14.273, "step": 126490 }, { "epoch": 0.25553800345026806, "grad_norm": 284.95831298828125, "learning_rate": 9.30371013501972e-06, "loss": 14.3729, "step": 126500 }, { "epoch": 0.2555582040829519, "grad_norm": 180.46234130859375, "learning_rate": 9.303532435557305e-06, "loss": 18.6997, "step": 126510 }, { "epoch": 0.2555784047156357, "grad_norm": 400.1902160644531, "learning_rate": 9.303354715119997e-06, "loss": 31.21, "step": 126520 }, { "epoch": 0.2555986053483195, "grad_norm": 172.0705108642578, "learning_rate": 9.30317697370866e-06, "loss": 27.2764, "step": 126530 }, { "epoch": 0.25561880598100334, "grad_norm": 449.38702392578125, "learning_rate": 9.302999211324159e-06, "loss": 23.0268, "step": 126540 }, { "epoch": 0.25563900661368716, "grad_norm": 417.41259765625, "learning_rate": 9.302821427967363e-06, "loss": 44.2628, "step": 126550 }, { "epoch": 0.255659207246371, "grad_norm": 50.55282211303711, "learning_rate": 9.302643623639136e-06, "loss": 30.515, "step": 126560 }, { "epoch": 0.2556794078790548, "grad_norm": 1.9983412027359009, "learning_rate": 9.302465798340347e-06, "loss": 20.8452, "step": 126570 }, { "epoch": 0.25569960851173856, "grad_norm": 531.14208984375, "learning_rate": 9.302287952071862e-06, "loss": 36.3822, "step": 126580 }, { "epoch": 0.2557198091444224, "grad_norm": 280.7905578613281, "learning_rate": 9.302110084834545e-06, "loss": 21.6177, "step": 126590 }, { "epoch": 0.2557400097771062, "grad_norm": 249.60623168945312, "learning_rate": 9.301932196629267e-06, "loss": 29.4346, "step": 126600 }, { "epoch": 0.25576021040979, "grad_norm": 306.4937744140625, "learning_rate": 9.301754287456894e-06, "loss": 21.3279, "step": 126610 }, { "epoch": 0.25578041104247384, "grad_norm": 891.42333984375, "learning_rate": 9.301576357318291e-06, "loss": 24.801, "step": 126620 }, { "epoch": 0.25580061167515766, "grad_norm": 245.6976776123047, "learning_rate": 9.301398406214326e-06, "loss": 32.4785, "step": 126630 }, { "epoch": 0.2558208123078415, "grad_norm": 269.7202453613281, "learning_rate": 9.301220434145868e-06, "loss": 21.4803, "step": 126640 }, { "epoch": 0.2558410129405253, "grad_norm": 305.4909362792969, "learning_rate": 9.301042441113784e-06, "loss": 19.2593, "step": 126650 }, { "epoch": 0.2558612135732091, "grad_norm": 421.9887390136719, "learning_rate": 9.300864427118938e-06, "loss": 20.2981, "step": 126660 }, { "epoch": 0.25588141420589294, "grad_norm": 685.07763671875, "learning_rate": 9.300686392162203e-06, "loss": 21.6444, "step": 126670 }, { "epoch": 0.25590161483857676, "grad_norm": 0.0, "learning_rate": 9.300508336244443e-06, "loss": 13.2913, "step": 126680 }, { "epoch": 0.2559218154712606, "grad_norm": 335.9877014160156, "learning_rate": 9.300330259366528e-06, "loss": 8.0545, "step": 126690 }, { "epoch": 0.2559420161039444, "grad_norm": 625.8668823242188, "learning_rate": 9.300152161529325e-06, "loss": 20.793, "step": 126700 }, { "epoch": 0.25596221673662817, "grad_norm": 175.83914184570312, "learning_rate": 9.299974042733701e-06, "loss": 20.1509, "step": 126710 }, { "epoch": 0.255982417369312, "grad_norm": 313.64495849609375, "learning_rate": 9.299795902980524e-06, "loss": 32.7205, "step": 126720 }, { "epoch": 0.2560026180019958, "grad_norm": 548.6371459960938, "learning_rate": 9.299617742270665e-06, "loss": 14.3827, "step": 126730 }, { "epoch": 0.2560228186346796, "grad_norm": 775.5892333984375, "learning_rate": 9.29943956060499e-06, "loss": 24.0056, "step": 126740 }, { "epoch": 0.25604301926736345, "grad_norm": 372.0305480957031, "learning_rate": 9.299261357984368e-06, "loss": 29.3446, "step": 126750 }, { "epoch": 0.25606321990004727, "grad_norm": 312.3209228515625, "learning_rate": 9.299083134409667e-06, "loss": 19.075, "step": 126760 }, { "epoch": 0.2560834205327311, "grad_norm": 759.9981079101562, "learning_rate": 9.298904889881757e-06, "loss": 25.4314, "step": 126770 }, { "epoch": 0.2561036211654149, "grad_norm": 74.96871185302734, "learning_rate": 9.298726624401507e-06, "loss": 15.698, "step": 126780 }, { "epoch": 0.2561238217980987, "grad_norm": 130.21954345703125, "learning_rate": 9.298548337969784e-06, "loss": 23.6795, "step": 126790 }, { "epoch": 0.25614402243078255, "grad_norm": 0.0, "learning_rate": 9.298370030587456e-06, "loss": 19.6005, "step": 126800 }, { "epoch": 0.25616422306346637, "grad_norm": 196.06455993652344, "learning_rate": 9.298191702255395e-06, "loss": 22.8001, "step": 126810 }, { "epoch": 0.2561844236961502, "grad_norm": 161.98635864257812, "learning_rate": 9.298013352974469e-06, "loss": 22.5654, "step": 126820 }, { "epoch": 0.25620462432883395, "grad_norm": 194.8225555419922, "learning_rate": 9.297834982745548e-06, "loss": 28.5489, "step": 126830 }, { "epoch": 0.25622482496151777, "grad_norm": 208.1326446533203, "learning_rate": 9.2976565915695e-06, "loss": 16.5274, "step": 126840 }, { "epoch": 0.2562450255942016, "grad_norm": 213.13641357421875, "learning_rate": 9.297478179447195e-06, "loss": 21.4055, "step": 126850 }, { "epoch": 0.2562652262268854, "grad_norm": 485.0968017578125, "learning_rate": 9.297299746379503e-06, "loss": 24.6399, "step": 126860 }, { "epoch": 0.25628542685956923, "grad_norm": 802.504638671875, "learning_rate": 9.297121292367293e-06, "loss": 21.6382, "step": 126870 }, { "epoch": 0.25630562749225305, "grad_norm": 323.2547912597656, "learning_rate": 9.296942817411433e-06, "loss": 16.8011, "step": 126880 }, { "epoch": 0.25632582812493687, "grad_norm": 120.02050018310547, "learning_rate": 9.296764321512797e-06, "loss": 33.9419, "step": 126890 }, { "epoch": 0.2563460287576207, "grad_norm": 253.41372680664062, "learning_rate": 9.296585804672253e-06, "loss": 15.5436, "step": 126900 }, { "epoch": 0.2563662293903045, "grad_norm": 337.2938537597656, "learning_rate": 9.29640726689067e-06, "loss": 42.0643, "step": 126910 }, { "epoch": 0.25638643002298833, "grad_norm": 746.387939453125, "learning_rate": 9.29622870816892e-06, "loss": 48.7629, "step": 126920 }, { "epoch": 0.25640663065567215, "grad_norm": 349.1642150878906, "learning_rate": 9.29605012850787e-06, "loss": 32.4375, "step": 126930 }, { "epoch": 0.25642683128835597, "grad_norm": 230.43325805664062, "learning_rate": 9.295871527908396e-06, "loss": 26.1364, "step": 126940 }, { "epoch": 0.2564470319210398, "grad_norm": 0.0, "learning_rate": 9.295692906371362e-06, "loss": 34.9863, "step": 126950 }, { "epoch": 0.25646723255372356, "grad_norm": 295.1181945800781, "learning_rate": 9.295514263897644e-06, "loss": 14.4285, "step": 126960 }, { "epoch": 0.2564874331864074, "grad_norm": 602.3079833984375, "learning_rate": 9.29533560048811e-06, "loss": 28.6545, "step": 126970 }, { "epoch": 0.2565076338190912, "grad_norm": 167.2379608154297, "learning_rate": 9.295156916143631e-06, "loss": 31.8269, "step": 126980 }, { "epoch": 0.256527834451775, "grad_norm": 333.9062805175781, "learning_rate": 9.294978210865078e-06, "loss": 22.9826, "step": 126990 }, { "epoch": 0.25654803508445884, "grad_norm": 269.784423828125, "learning_rate": 9.294799484653323e-06, "loss": 21.5111, "step": 127000 }, { "epoch": 0.25656823571714266, "grad_norm": 345.1800842285156, "learning_rate": 9.294620737509235e-06, "loss": 24.4462, "step": 127010 }, { "epoch": 0.2565884363498265, "grad_norm": 360.3450927734375, "learning_rate": 9.294441969433688e-06, "loss": 25.2911, "step": 127020 }, { "epoch": 0.2566086369825103, "grad_norm": 226.18255615234375, "learning_rate": 9.294263180427549e-06, "loss": 21.6256, "step": 127030 }, { "epoch": 0.2566288376151941, "grad_norm": 202.57342529296875, "learning_rate": 9.294084370491695e-06, "loss": 23.3522, "step": 127040 }, { "epoch": 0.25664903824787794, "grad_norm": 666.13818359375, "learning_rate": 9.293905539626992e-06, "loss": 30.7116, "step": 127050 }, { "epoch": 0.25666923888056176, "grad_norm": 210.373779296875, "learning_rate": 9.293726687834318e-06, "loss": 22.6694, "step": 127060 }, { "epoch": 0.2566894395132456, "grad_norm": 385.1900939941406, "learning_rate": 9.293547815114537e-06, "loss": 16.4625, "step": 127070 }, { "epoch": 0.2567096401459294, "grad_norm": 199.24002075195312, "learning_rate": 9.293368921468526e-06, "loss": 20.3427, "step": 127080 }, { "epoch": 0.25672984077861316, "grad_norm": 373.090087890625, "learning_rate": 9.293190006897156e-06, "loss": 23.6768, "step": 127090 }, { "epoch": 0.256750041411297, "grad_norm": 309.4405212402344, "learning_rate": 9.293011071401299e-06, "loss": 17.2156, "step": 127100 }, { "epoch": 0.2567702420439808, "grad_norm": 197.38519287109375, "learning_rate": 9.292832114981825e-06, "loss": 25.5827, "step": 127110 }, { "epoch": 0.2567904426766646, "grad_norm": 338.5206298828125, "learning_rate": 9.29265313763961e-06, "loss": 24.8134, "step": 127120 }, { "epoch": 0.25681064330934844, "grad_norm": 99.86254119873047, "learning_rate": 9.292474139375523e-06, "loss": 19.7025, "step": 127130 }, { "epoch": 0.25683084394203226, "grad_norm": 254.98605346679688, "learning_rate": 9.292295120190438e-06, "loss": 14.9091, "step": 127140 }, { "epoch": 0.2568510445747161, "grad_norm": 299.9100036621094, "learning_rate": 9.292116080085226e-06, "loss": 12.9566, "step": 127150 }, { "epoch": 0.2568712452073999, "grad_norm": 249.32562255859375, "learning_rate": 9.291937019060762e-06, "loss": 30.5472, "step": 127160 }, { "epoch": 0.2568914458400837, "grad_norm": 316.0506896972656, "learning_rate": 9.291757937117917e-06, "loss": 19.3924, "step": 127170 }, { "epoch": 0.25691164647276754, "grad_norm": 277.75518798828125, "learning_rate": 9.291578834257565e-06, "loss": 27.2077, "step": 127180 }, { "epoch": 0.25693184710545136, "grad_norm": 140.58888244628906, "learning_rate": 9.291399710480578e-06, "loss": 20.5211, "step": 127190 }, { "epoch": 0.2569520477381352, "grad_norm": 1228.306396484375, "learning_rate": 9.291220565787829e-06, "loss": 27.2712, "step": 127200 }, { "epoch": 0.256972248370819, "grad_norm": 98.28571319580078, "learning_rate": 9.291041400180193e-06, "loss": 19.9125, "step": 127210 }, { "epoch": 0.25699244900350277, "grad_norm": 220.2958526611328, "learning_rate": 9.29086221365854e-06, "loss": 36.9459, "step": 127220 }, { "epoch": 0.2570126496361866, "grad_norm": 36.49966049194336, "learning_rate": 9.290683006223745e-06, "loss": 16.9539, "step": 127230 }, { "epoch": 0.2570328502688704, "grad_norm": 278.6464538574219, "learning_rate": 9.290503777876683e-06, "loss": 24.527, "step": 127240 }, { "epoch": 0.2570530509015542, "grad_norm": 205.2666473388672, "learning_rate": 9.290324528618225e-06, "loss": 18.1415, "step": 127250 }, { "epoch": 0.25707325153423805, "grad_norm": 254.75421142578125, "learning_rate": 9.290145258449243e-06, "loss": 20.8729, "step": 127260 }, { "epoch": 0.25709345216692187, "grad_norm": 502.50726318359375, "learning_rate": 9.289965967370617e-06, "loss": 31.9485, "step": 127270 }, { "epoch": 0.2571136527996057, "grad_norm": 333.1544189453125, "learning_rate": 9.289786655383215e-06, "loss": 18.0194, "step": 127280 }, { "epoch": 0.2571338534322895, "grad_norm": 313.12554931640625, "learning_rate": 9.289607322487914e-06, "loss": 21.4747, "step": 127290 }, { "epoch": 0.2571540540649733, "grad_norm": 479.302001953125, "learning_rate": 9.289427968685588e-06, "loss": 19.0917, "step": 127300 }, { "epoch": 0.25717425469765715, "grad_norm": 390.15594482421875, "learning_rate": 9.28924859397711e-06, "loss": 22.7473, "step": 127310 }, { "epoch": 0.25719445533034097, "grad_norm": 218.55368041992188, "learning_rate": 9.289069198363353e-06, "loss": 17.2089, "step": 127320 }, { "epoch": 0.2572146559630248, "grad_norm": 505.6830139160156, "learning_rate": 9.288889781845193e-06, "loss": 25.2366, "step": 127330 }, { "epoch": 0.2572348565957086, "grad_norm": 219.81788635253906, "learning_rate": 9.288710344423505e-06, "loss": 20.0065, "step": 127340 }, { "epoch": 0.25725505722839237, "grad_norm": 0.0, "learning_rate": 9.288530886099165e-06, "loss": 17.1507, "step": 127350 }, { "epoch": 0.2572752578610762, "grad_norm": 237.62774658203125, "learning_rate": 9.288351406873044e-06, "loss": 18.2165, "step": 127360 }, { "epoch": 0.25729545849376, "grad_norm": 343.670166015625, "learning_rate": 9.288171906746018e-06, "loss": 9.9755, "step": 127370 }, { "epoch": 0.25731565912644383, "grad_norm": 375.6775207519531, "learning_rate": 9.287992385718963e-06, "loss": 15.9099, "step": 127380 }, { "epoch": 0.25733585975912765, "grad_norm": 145.68392944335938, "learning_rate": 9.287812843792752e-06, "loss": 25.6121, "step": 127390 }, { "epoch": 0.25735606039181147, "grad_norm": 395.79608154296875, "learning_rate": 9.287633280968263e-06, "loss": 30.4517, "step": 127400 }, { "epoch": 0.2573762610244953, "grad_norm": 186.7076416015625, "learning_rate": 9.287453697246367e-06, "loss": 20.6873, "step": 127410 }, { "epoch": 0.2573964616571791, "grad_norm": 263.7507629394531, "learning_rate": 9.287274092627944e-06, "loss": 31.5815, "step": 127420 }, { "epoch": 0.25741666228986293, "grad_norm": 493.2078552246094, "learning_rate": 9.287094467113866e-06, "loss": 50.3702, "step": 127430 }, { "epoch": 0.25743686292254675, "grad_norm": 354.9422912597656, "learning_rate": 9.28691482070501e-06, "loss": 35.5258, "step": 127440 }, { "epoch": 0.25745706355523057, "grad_norm": 1116.931640625, "learning_rate": 9.28673515340225e-06, "loss": 36.0058, "step": 127450 }, { "epoch": 0.2574772641879144, "grad_norm": 300.0147705078125, "learning_rate": 9.286555465206463e-06, "loss": 17.5154, "step": 127460 }, { "epoch": 0.25749746482059815, "grad_norm": 147.80734252929688, "learning_rate": 9.286375756118526e-06, "loss": 18.405, "step": 127470 }, { "epoch": 0.257517665453282, "grad_norm": 223.7917022705078, "learning_rate": 9.286196026139311e-06, "loss": 30.2837, "step": 127480 }, { "epoch": 0.2575378660859658, "grad_norm": 292.1416931152344, "learning_rate": 9.2860162752697e-06, "loss": 14.248, "step": 127490 }, { "epoch": 0.2575580667186496, "grad_norm": 273.2431945800781, "learning_rate": 9.285836503510562e-06, "loss": 21.4986, "step": 127500 }, { "epoch": 0.25757826735133343, "grad_norm": 65.00953674316406, "learning_rate": 9.285656710862778e-06, "loss": 33.3382, "step": 127510 }, { "epoch": 0.25759846798401725, "grad_norm": 481.978515625, "learning_rate": 9.285476897327223e-06, "loss": 20.5869, "step": 127520 }, { "epoch": 0.2576186686167011, "grad_norm": 188.5455322265625, "learning_rate": 9.285297062904774e-06, "loss": 20.2468, "step": 127530 }, { "epoch": 0.2576388692493849, "grad_norm": 110.42162322998047, "learning_rate": 9.285117207596307e-06, "loss": 17.174, "step": 127540 }, { "epoch": 0.2576590698820687, "grad_norm": 260.04058837890625, "learning_rate": 9.284937331402697e-06, "loss": 31.7702, "step": 127550 }, { "epoch": 0.25767927051475253, "grad_norm": 818.1154174804688, "learning_rate": 9.284757434324823e-06, "loss": 22.5121, "step": 127560 }, { "epoch": 0.25769947114743635, "grad_norm": 23.35238265991211, "learning_rate": 9.284577516363561e-06, "loss": 33.836, "step": 127570 }, { "epoch": 0.2577196717801202, "grad_norm": 95.08285522460938, "learning_rate": 9.284397577519788e-06, "loss": 24.6757, "step": 127580 }, { "epoch": 0.257739872412804, "grad_norm": 431.515380859375, "learning_rate": 9.28421761779438e-06, "loss": 30.5624, "step": 127590 }, { "epoch": 0.25776007304548776, "grad_norm": 286.0390930175781, "learning_rate": 9.284037637188215e-06, "loss": 20.4114, "step": 127600 }, { "epoch": 0.2577802736781716, "grad_norm": 129.003662109375, "learning_rate": 9.28385763570217e-06, "loss": 21.9735, "step": 127610 }, { "epoch": 0.2578004743108554, "grad_norm": 462.7196044921875, "learning_rate": 9.283677613337124e-06, "loss": 23.1079, "step": 127620 }, { "epoch": 0.2578206749435392, "grad_norm": 258.5696716308594, "learning_rate": 9.283497570093952e-06, "loss": 13.3749, "step": 127630 }, { "epoch": 0.25784087557622304, "grad_norm": 210.30979919433594, "learning_rate": 9.283317505973533e-06, "loss": 25.944, "step": 127640 }, { "epoch": 0.25786107620890686, "grad_norm": 211.7775115966797, "learning_rate": 9.283137420976742e-06, "loss": 24.8141, "step": 127650 }, { "epoch": 0.2578812768415907, "grad_norm": 666.1100463867188, "learning_rate": 9.282957315104462e-06, "loss": 24.9242, "step": 127660 }, { "epoch": 0.2579014774742745, "grad_norm": 152.2067108154297, "learning_rate": 9.282777188357563e-06, "loss": 19.9921, "step": 127670 }, { "epoch": 0.2579216781069583, "grad_norm": 321.222900390625, "learning_rate": 9.282597040736932e-06, "loss": 22.8883, "step": 127680 }, { "epoch": 0.25794187873964214, "grad_norm": 12.15452766418457, "learning_rate": 9.282416872243441e-06, "loss": 21.7656, "step": 127690 }, { "epoch": 0.25796207937232596, "grad_norm": 209.8081512451172, "learning_rate": 9.282236682877968e-06, "loss": 24.1595, "step": 127700 }, { "epoch": 0.2579822800050098, "grad_norm": 213.8217010498047, "learning_rate": 9.282056472641393e-06, "loss": 18.6886, "step": 127710 }, { "epoch": 0.2580024806376936, "grad_norm": 226.94493103027344, "learning_rate": 9.281876241534595e-06, "loss": 19.3561, "step": 127720 }, { "epoch": 0.25802268127037736, "grad_norm": 320.43756103515625, "learning_rate": 9.28169598955845e-06, "loss": 33.8208, "step": 127730 }, { "epoch": 0.2580428819030612, "grad_norm": 353.1011047363281, "learning_rate": 9.28151571671384e-06, "loss": 15.0358, "step": 127740 }, { "epoch": 0.258063082535745, "grad_norm": 491.0283203125, "learning_rate": 9.281335423001641e-06, "loss": 20.2493, "step": 127750 }, { "epoch": 0.2580832831684288, "grad_norm": 404.0115966796875, "learning_rate": 9.281155108422732e-06, "loss": 18.925, "step": 127760 }, { "epoch": 0.25810348380111264, "grad_norm": 59.60758590698242, "learning_rate": 9.280974772977994e-06, "loss": 14.2255, "step": 127770 }, { "epoch": 0.25812368443379646, "grad_norm": 415.9898986816406, "learning_rate": 9.280794416668303e-06, "loss": 28.0694, "step": 127780 }, { "epoch": 0.2581438850664803, "grad_norm": 187.86610412597656, "learning_rate": 9.280614039494538e-06, "loss": 14.5083, "step": 127790 }, { "epoch": 0.2581640856991641, "grad_norm": 186.03109741210938, "learning_rate": 9.280433641457582e-06, "loss": 15.1859, "step": 127800 }, { "epoch": 0.2581842863318479, "grad_norm": 241.4952392578125, "learning_rate": 9.28025322255831e-06, "loss": 17.6444, "step": 127810 }, { "epoch": 0.25820448696453174, "grad_norm": 117.89322662353516, "learning_rate": 9.280072782797602e-06, "loss": 9.487, "step": 127820 }, { "epoch": 0.25822468759721556, "grad_norm": 228.4276123046875, "learning_rate": 9.279892322176341e-06, "loss": 10.5574, "step": 127830 }, { "epoch": 0.2582448882298994, "grad_norm": 127.33572387695312, "learning_rate": 9.279711840695401e-06, "loss": 44.3011, "step": 127840 }, { "epoch": 0.2582650888625832, "grad_norm": 169.79583740234375, "learning_rate": 9.279531338355666e-06, "loss": 15.3789, "step": 127850 }, { "epoch": 0.25828528949526697, "grad_norm": 402.23919677734375, "learning_rate": 9.279350815158014e-06, "loss": 28.2176, "step": 127860 }, { "epoch": 0.2583054901279508, "grad_norm": 374.6882019042969, "learning_rate": 9.279170271103326e-06, "loss": 15.5858, "step": 127870 }, { "epoch": 0.2583256907606346, "grad_norm": 113.90262603759766, "learning_rate": 9.27898970619248e-06, "loss": 16.9948, "step": 127880 }, { "epoch": 0.2583458913933184, "grad_norm": 269.4488220214844, "learning_rate": 9.278809120426358e-06, "loss": 17.2685, "step": 127890 }, { "epoch": 0.25836609202600225, "grad_norm": 81.96720123291016, "learning_rate": 9.278628513805838e-06, "loss": 16.9459, "step": 127900 }, { "epoch": 0.25838629265868607, "grad_norm": 590.1903076171875, "learning_rate": 9.278447886331803e-06, "loss": 28.6916, "step": 127910 }, { "epoch": 0.2584064932913699, "grad_norm": 199.18260192871094, "learning_rate": 9.27826723800513e-06, "loss": 18.8569, "step": 127920 }, { "epoch": 0.2584266939240537, "grad_norm": 209.3103485107422, "learning_rate": 9.278086568826702e-06, "loss": 22.4834, "step": 127930 }, { "epoch": 0.2584468945567375, "grad_norm": 144.51876831054688, "learning_rate": 9.277905878797401e-06, "loss": 7.8778, "step": 127940 }, { "epoch": 0.25846709518942135, "grad_norm": 40.468017578125, "learning_rate": 9.277725167918103e-06, "loss": 22.5541, "step": 127950 }, { "epoch": 0.25848729582210517, "grad_norm": 175.1702117919922, "learning_rate": 9.277544436189693e-06, "loss": 22.9078, "step": 127960 }, { "epoch": 0.258507496454789, "grad_norm": 486.73822021484375, "learning_rate": 9.27736368361305e-06, "loss": 13.4163, "step": 127970 }, { "epoch": 0.25852769708747275, "grad_norm": 204.7146453857422, "learning_rate": 9.277182910189056e-06, "loss": 20.4761, "step": 127980 }, { "epoch": 0.25854789772015657, "grad_norm": 77.42448425292969, "learning_rate": 9.27700211591859e-06, "loss": 22.5896, "step": 127990 }, { "epoch": 0.2585680983528404, "grad_norm": 326.8564147949219, "learning_rate": 9.276821300802535e-06, "loss": 16.4379, "step": 128000 }, { "epoch": 0.2585882989855242, "grad_norm": 365.43048095703125, "learning_rate": 9.27664046484177e-06, "loss": 23.4927, "step": 128010 }, { "epoch": 0.25860849961820803, "grad_norm": 301.95782470703125, "learning_rate": 9.27645960803718e-06, "loss": 17.233, "step": 128020 }, { "epoch": 0.25862870025089185, "grad_norm": 382.4001770019531, "learning_rate": 9.276278730389642e-06, "loss": 21.6029, "step": 128030 }, { "epoch": 0.25864890088357567, "grad_norm": 515.5791625976562, "learning_rate": 9.276097831900044e-06, "loss": 25.6192, "step": 128040 }, { "epoch": 0.2586691015162595, "grad_norm": 592.4180908203125, "learning_rate": 9.275916912569261e-06, "loss": 15.0422, "step": 128050 }, { "epoch": 0.2586893021489433, "grad_norm": 456.9488220214844, "learning_rate": 9.27573597239818e-06, "loss": 24.6188, "step": 128060 }, { "epoch": 0.25870950278162713, "grad_norm": 486.4844665527344, "learning_rate": 9.275555011387679e-06, "loss": 9.8673, "step": 128070 }, { "epoch": 0.25872970341431095, "grad_norm": 75.38780975341797, "learning_rate": 9.275374029538639e-06, "loss": 31.3118, "step": 128080 }, { "epoch": 0.25874990404699477, "grad_norm": 387.5243835449219, "learning_rate": 9.275193026851947e-06, "loss": 18.9463, "step": 128090 }, { "epoch": 0.2587701046796786, "grad_norm": 160.3678436279297, "learning_rate": 9.275012003328483e-06, "loss": 21.3683, "step": 128100 }, { "epoch": 0.25879030531236236, "grad_norm": 221.3370361328125, "learning_rate": 9.274830958969129e-06, "loss": 25.3137, "step": 128110 }, { "epoch": 0.2588105059450462, "grad_norm": 251.27734375, "learning_rate": 9.274649893774768e-06, "loss": 35.0544, "step": 128120 }, { "epoch": 0.25883070657773, "grad_norm": 176.3927459716797, "learning_rate": 9.27446880774628e-06, "loss": 27.3233, "step": 128130 }, { "epoch": 0.2588509072104138, "grad_norm": 176.95066833496094, "learning_rate": 9.27428770088455e-06, "loss": 15.4779, "step": 128140 }, { "epoch": 0.25887110784309764, "grad_norm": 6.6894755363464355, "learning_rate": 9.27410657319046e-06, "loss": 25.7905, "step": 128150 }, { "epoch": 0.25889130847578146, "grad_norm": 274.3800964355469, "learning_rate": 9.273925424664894e-06, "loss": 15.4682, "step": 128160 }, { "epoch": 0.2589115091084653, "grad_norm": 267.9763488769531, "learning_rate": 9.273744255308733e-06, "loss": 23.0983, "step": 128170 }, { "epoch": 0.2589317097411491, "grad_norm": 255.2089080810547, "learning_rate": 9.273563065122862e-06, "loss": 23.3425, "step": 128180 }, { "epoch": 0.2589519103738329, "grad_norm": 424.24114990234375, "learning_rate": 9.27338185410816e-06, "loss": 26.2336, "step": 128190 }, { "epoch": 0.25897211100651674, "grad_norm": 454.7315979003906, "learning_rate": 9.273200622265516e-06, "loss": 21.8907, "step": 128200 }, { "epoch": 0.25899231163920056, "grad_norm": 235.7766876220703, "learning_rate": 9.27301936959581e-06, "loss": 28.2182, "step": 128210 }, { "epoch": 0.2590125122718844, "grad_norm": 442.6334228515625, "learning_rate": 9.272838096099926e-06, "loss": 36.5304, "step": 128220 }, { "epoch": 0.2590327129045682, "grad_norm": 207.7319793701172, "learning_rate": 9.272656801778745e-06, "loss": 20.6773, "step": 128230 }, { "epoch": 0.25905291353725196, "grad_norm": 48.93697738647461, "learning_rate": 9.272475486633155e-06, "loss": 19.3901, "step": 128240 }, { "epoch": 0.2590731141699358, "grad_norm": 242.65538024902344, "learning_rate": 9.272294150664039e-06, "loss": 27.5011, "step": 128250 }, { "epoch": 0.2590933148026196, "grad_norm": 338.1984558105469, "learning_rate": 9.272112793872277e-06, "loss": 17.1308, "step": 128260 }, { "epoch": 0.2591135154353034, "grad_norm": 186.8594970703125, "learning_rate": 9.271931416258756e-06, "loss": 29.2522, "step": 128270 }, { "epoch": 0.25913371606798724, "grad_norm": 153.88525390625, "learning_rate": 9.27175001782436e-06, "loss": 17.6577, "step": 128280 }, { "epoch": 0.25915391670067106, "grad_norm": 559.4411010742188, "learning_rate": 9.271568598569971e-06, "loss": 23.5331, "step": 128290 }, { "epoch": 0.2591741173333549, "grad_norm": 231.84117126464844, "learning_rate": 9.271387158496477e-06, "loss": 15.4951, "step": 128300 }, { "epoch": 0.2591943179660387, "grad_norm": 186.703125, "learning_rate": 9.271205697604759e-06, "loss": 10.9606, "step": 128310 }, { "epoch": 0.2592145185987225, "grad_norm": 551.7407836914062, "learning_rate": 9.271024215895702e-06, "loss": 26.6286, "step": 128320 }, { "epoch": 0.25923471923140634, "grad_norm": 372.9087219238281, "learning_rate": 9.270842713370192e-06, "loss": 13.7775, "step": 128330 }, { "epoch": 0.25925491986409016, "grad_norm": 236.16964721679688, "learning_rate": 9.270661190029112e-06, "loss": 15.932, "step": 128340 }, { "epoch": 0.259275120496774, "grad_norm": 221.2362060546875, "learning_rate": 9.270479645873347e-06, "loss": 25.0616, "step": 128350 }, { "epoch": 0.2592953211294578, "grad_norm": 315.8817138671875, "learning_rate": 9.270298080903782e-06, "loss": 23.9639, "step": 128360 }, { "epoch": 0.25931552176214157, "grad_norm": 242.3975067138672, "learning_rate": 9.270116495121303e-06, "loss": 31.4567, "step": 128370 }, { "epoch": 0.2593357223948254, "grad_norm": 291.0304260253906, "learning_rate": 9.269934888526793e-06, "loss": 23.6571, "step": 128380 }, { "epoch": 0.2593559230275092, "grad_norm": 104.78266906738281, "learning_rate": 9.269753261121139e-06, "loss": 21.5287, "step": 128390 }, { "epoch": 0.259376123660193, "grad_norm": 265.99859619140625, "learning_rate": 9.269571612905227e-06, "loss": 15.3739, "step": 128400 }, { "epoch": 0.25939632429287685, "grad_norm": 0.0, "learning_rate": 9.269389943879938e-06, "loss": 13.0665, "step": 128410 }, { "epoch": 0.25941652492556067, "grad_norm": 54.66908645629883, "learning_rate": 9.269208254046161e-06, "loss": 21.5474, "step": 128420 }, { "epoch": 0.2594367255582445, "grad_norm": 337.4878845214844, "learning_rate": 9.269026543404782e-06, "loss": 18.1727, "step": 128430 }, { "epoch": 0.2594569261909283, "grad_norm": 325.6561584472656, "learning_rate": 9.268844811956683e-06, "loss": 11.6633, "step": 128440 }, { "epoch": 0.2594771268236121, "grad_norm": 400.37750244140625, "learning_rate": 9.268663059702753e-06, "loss": 28.2356, "step": 128450 }, { "epoch": 0.25949732745629595, "grad_norm": 59.21805191040039, "learning_rate": 9.268481286643878e-06, "loss": 24.525, "step": 128460 }, { "epoch": 0.25951752808897977, "grad_norm": 308.402587890625, "learning_rate": 9.268299492780942e-06, "loss": 11.6304, "step": 128470 }, { "epoch": 0.2595377287216636, "grad_norm": 944.167236328125, "learning_rate": 9.268117678114833e-06, "loss": 36.4336, "step": 128480 }, { "epoch": 0.2595579293543474, "grad_norm": 155.8727264404297, "learning_rate": 9.267935842646437e-06, "loss": 22.4917, "step": 128490 }, { "epoch": 0.25957812998703117, "grad_norm": 176.15365600585938, "learning_rate": 9.267753986376638e-06, "loss": 29.0157, "step": 128500 }, { "epoch": 0.259598330619715, "grad_norm": 452.32330322265625, "learning_rate": 9.267572109306325e-06, "loss": 29.3463, "step": 128510 }, { "epoch": 0.2596185312523988, "grad_norm": 208.19483947753906, "learning_rate": 9.26739021143638e-06, "loss": 21.5019, "step": 128520 }, { "epoch": 0.25963873188508263, "grad_norm": 373.2658386230469, "learning_rate": 9.267208292767696e-06, "loss": 25.9556, "step": 128530 }, { "epoch": 0.25965893251776645, "grad_norm": 379.7319641113281, "learning_rate": 9.267026353301155e-06, "loss": 16.9431, "step": 128540 }, { "epoch": 0.25967913315045027, "grad_norm": 964.3709716796875, "learning_rate": 9.266844393037644e-06, "loss": 34.2584, "step": 128550 }, { "epoch": 0.2596993337831341, "grad_norm": 473.113525390625, "learning_rate": 9.266662411978052e-06, "loss": 16.2447, "step": 128560 }, { "epoch": 0.2597195344158179, "grad_norm": 0.0, "learning_rate": 9.266480410123264e-06, "loss": 23.0101, "step": 128570 }, { "epoch": 0.25973973504850173, "grad_norm": 165.73760986328125, "learning_rate": 9.266298387474169e-06, "loss": 21.0204, "step": 128580 }, { "epoch": 0.25975993568118555, "grad_norm": 512.442138671875, "learning_rate": 9.266116344031652e-06, "loss": 13.9845, "step": 128590 }, { "epoch": 0.25978013631386937, "grad_norm": 284.4760437011719, "learning_rate": 9.265934279796602e-06, "loss": 20.7353, "step": 128600 }, { "epoch": 0.2598003369465532, "grad_norm": 290.6150207519531, "learning_rate": 9.265752194769906e-06, "loss": 18.945, "step": 128610 }, { "epoch": 0.25982053757923695, "grad_norm": 0.0, "learning_rate": 9.265570088952452e-06, "loss": 14.6949, "step": 128620 }, { "epoch": 0.2598407382119208, "grad_norm": 252.33758544921875, "learning_rate": 9.265387962345125e-06, "loss": 37.6536, "step": 128630 }, { "epoch": 0.2598609388446046, "grad_norm": 146.20399475097656, "learning_rate": 9.265205814948814e-06, "loss": 21.3251, "step": 128640 }, { "epoch": 0.2598811394772884, "grad_norm": 166.13697814941406, "learning_rate": 9.265023646764409e-06, "loss": 26.4858, "step": 128650 }, { "epoch": 0.25990134010997223, "grad_norm": 791.9387817382812, "learning_rate": 9.264841457792795e-06, "loss": 31.5565, "step": 128660 }, { "epoch": 0.25992154074265605, "grad_norm": 142.35687255859375, "learning_rate": 9.264659248034861e-06, "loss": 43.9615, "step": 128670 }, { "epoch": 0.2599417413753399, "grad_norm": 128.25233459472656, "learning_rate": 9.264477017491496e-06, "loss": 8.6163, "step": 128680 }, { "epoch": 0.2599619420080237, "grad_norm": 162.348876953125, "learning_rate": 9.264294766163587e-06, "loss": 25.7741, "step": 128690 }, { "epoch": 0.2599821426407075, "grad_norm": 332.7527160644531, "learning_rate": 9.264112494052022e-06, "loss": 20.436, "step": 128700 }, { "epoch": 0.26000234327339133, "grad_norm": 210.2703399658203, "learning_rate": 9.26393020115769e-06, "loss": 24.8616, "step": 128710 }, { "epoch": 0.26002254390607515, "grad_norm": 160.9236297607422, "learning_rate": 9.26374788748148e-06, "loss": 14.5966, "step": 128720 }, { "epoch": 0.260042744538759, "grad_norm": 44.11042785644531, "learning_rate": 9.263565553024279e-06, "loss": 29.1287, "step": 128730 }, { "epoch": 0.2600629451714428, "grad_norm": 302.0043029785156, "learning_rate": 9.263383197786978e-06, "loss": 19.1033, "step": 128740 }, { "epoch": 0.26008314580412656, "grad_norm": 466.2555236816406, "learning_rate": 9.263200821770462e-06, "loss": 22.9347, "step": 128750 }, { "epoch": 0.2601033464368104, "grad_norm": 183.08103942871094, "learning_rate": 9.263018424975624e-06, "loss": 21.3875, "step": 128760 }, { "epoch": 0.2601235470694942, "grad_norm": 211.10919189453125, "learning_rate": 9.262836007403352e-06, "loss": 18.6941, "step": 128770 }, { "epoch": 0.260143747702178, "grad_norm": 240.9994659423828, "learning_rate": 9.262653569054532e-06, "loss": 24.9768, "step": 128780 }, { "epoch": 0.26016394833486184, "grad_norm": 178.66665649414062, "learning_rate": 9.262471109930056e-06, "loss": 13.8389, "step": 128790 }, { "epoch": 0.26018414896754566, "grad_norm": 291.5282287597656, "learning_rate": 9.262288630030814e-06, "loss": 31.6664, "step": 128800 }, { "epoch": 0.2602043496002295, "grad_norm": 458.1502990722656, "learning_rate": 9.262106129357693e-06, "loss": 22.6433, "step": 128810 }, { "epoch": 0.2602245502329133, "grad_norm": 211.512939453125, "learning_rate": 9.261923607911584e-06, "loss": 16.9892, "step": 128820 }, { "epoch": 0.2602447508655971, "grad_norm": 249.39968872070312, "learning_rate": 9.261741065693377e-06, "loss": 20.0096, "step": 128830 }, { "epoch": 0.26026495149828094, "grad_norm": 340.2354736328125, "learning_rate": 9.26155850270396e-06, "loss": 11.8054, "step": 128840 }, { "epoch": 0.26028515213096476, "grad_norm": 286.95159912109375, "learning_rate": 9.261375918944224e-06, "loss": 18.7303, "step": 128850 }, { "epoch": 0.2603053527636486, "grad_norm": 290.9115295410156, "learning_rate": 9.261193314415058e-06, "loss": 19.2075, "step": 128860 }, { "epoch": 0.2603255533963324, "grad_norm": 343.53790283203125, "learning_rate": 9.261010689117353e-06, "loss": 38.4281, "step": 128870 }, { "epoch": 0.26034575402901616, "grad_norm": 233.9909210205078, "learning_rate": 9.260828043051999e-06, "loss": 24.1526, "step": 128880 }, { "epoch": 0.2603659546617, "grad_norm": 292.89056396484375, "learning_rate": 9.260645376219887e-06, "loss": 14.5594, "step": 128890 }, { "epoch": 0.2603861552943838, "grad_norm": 488.0834045410156, "learning_rate": 9.260462688621906e-06, "loss": 18.8947, "step": 128900 }, { "epoch": 0.2604063559270676, "grad_norm": 344.52978515625, "learning_rate": 9.260279980258945e-06, "loss": 21.5654, "step": 128910 }, { "epoch": 0.26042655655975144, "grad_norm": 228.8309326171875, "learning_rate": 9.260097251131896e-06, "loss": 12.8023, "step": 128920 }, { "epoch": 0.26044675719243526, "grad_norm": 311.6656799316406, "learning_rate": 9.259914501241651e-06, "loss": 21.6081, "step": 128930 }, { "epoch": 0.2604669578251191, "grad_norm": 162.35426330566406, "learning_rate": 9.259731730589099e-06, "loss": 21.7845, "step": 128940 }, { "epoch": 0.2604871584578029, "grad_norm": 290.9582824707031, "learning_rate": 9.25954893917513e-06, "loss": 21.5479, "step": 128950 }, { "epoch": 0.2605073590904867, "grad_norm": 722.1417846679688, "learning_rate": 9.259366127000637e-06, "loss": 14.5525, "step": 128960 }, { "epoch": 0.26052755972317054, "grad_norm": 588.9274291992188, "learning_rate": 9.259183294066512e-06, "loss": 25.6636, "step": 128970 }, { "epoch": 0.26054776035585436, "grad_norm": 321.6412658691406, "learning_rate": 9.259000440373643e-06, "loss": 18.7321, "step": 128980 }, { "epoch": 0.2605679609885382, "grad_norm": 383.409423828125, "learning_rate": 9.258817565922919e-06, "loss": 24.3152, "step": 128990 }, { "epoch": 0.260588161621222, "grad_norm": 206.9812774658203, "learning_rate": 9.25863467071524e-06, "loss": 13.0095, "step": 129000 }, { "epoch": 0.26060836225390577, "grad_norm": 0.0, "learning_rate": 9.258451754751488e-06, "loss": 20.3891, "step": 129010 }, { "epoch": 0.2606285628865896, "grad_norm": 247.02540588378906, "learning_rate": 9.25826881803256e-06, "loss": 21.5678, "step": 129020 }, { "epoch": 0.2606487635192734, "grad_norm": 668.8010864257812, "learning_rate": 9.258085860559348e-06, "loss": 36.2242, "step": 129030 }, { "epoch": 0.2606689641519572, "grad_norm": 502.24609375, "learning_rate": 9.257902882332739e-06, "loss": 17.0187, "step": 129040 }, { "epoch": 0.26068916478464105, "grad_norm": 241.14012145996094, "learning_rate": 9.25771988335363e-06, "loss": 27.748, "step": 129050 }, { "epoch": 0.26070936541732487, "grad_norm": 292.7734680175781, "learning_rate": 9.25753686362291e-06, "loss": 34.3785, "step": 129060 }, { "epoch": 0.2607295660500087, "grad_norm": 192.518310546875, "learning_rate": 9.257353823141472e-06, "loss": 17.7967, "step": 129070 }, { "epoch": 0.2607497666826925, "grad_norm": 252.06021118164062, "learning_rate": 9.257170761910208e-06, "loss": 20.9462, "step": 129080 }, { "epoch": 0.2607699673153763, "grad_norm": 1118.197998046875, "learning_rate": 9.25698767993001e-06, "loss": 43.837, "step": 129090 }, { "epoch": 0.26079016794806015, "grad_norm": 393.6368408203125, "learning_rate": 9.256804577201768e-06, "loss": 37.4798, "step": 129100 }, { "epoch": 0.26081036858074397, "grad_norm": 546.433349609375, "learning_rate": 9.25662145372638e-06, "loss": 33.3348, "step": 129110 }, { "epoch": 0.2608305692134278, "grad_norm": 464.1316833496094, "learning_rate": 9.256438309504733e-06, "loss": 31.2265, "step": 129120 }, { "epoch": 0.2608507698461116, "grad_norm": 600.8143920898438, "learning_rate": 9.256255144537724e-06, "loss": 17.5693, "step": 129130 }, { "epoch": 0.26087097047879537, "grad_norm": 256.9996032714844, "learning_rate": 9.256071958826243e-06, "loss": 21.6371, "step": 129140 }, { "epoch": 0.2608911711114792, "grad_norm": 443.5301513671875, "learning_rate": 9.255888752371182e-06, "loss": 19.4419, "step": 129150 }, { "epoch": 0.260911371744163, "grad_norm": 329.8979797363281, "learning_rate": 9.255705525173437e-06, "loss": 24.1968, "step": 129160 }, { "epoch": 0.26093157237684683, "grad_norm": 321.75384521484375, "learning_rate": 9.255522277233899e-06, "loss": 28.3309, "step": 129170 }, { "epoch": 0.26095177300953065, "grad_norm": 154.47694396972656, "learning_rate": 9.255339008553462e-06, "loss": 11.8556, "step": 129180 }, { "epoch": 0.26097197364221447, "grad_norm": 528.103271484375, "learning_rate": 9.255155719133016e-06, "loss": 40.6927, "step": 129190 }, { "epoch": 0.2609921742748983, "grad_norm": 294.3609313964844, "learning_rate": 9.25497240897346e-06, "loss": 25.242, "step": 129200 }, { "epoch": 0.2610123749075821, "grad_norm": 185.53482055664062, "learning_rate": 9.254789078075684e-06, "loss": 23.1958, "step": 129210 }, { "epoch": 0.26103257554026593, "grad_norm": 260.7892150878906, "learning_rate": 9.254605726440582e-06, "loss": 28.8577, "step": 129220 }, { "epoch": 0.26105277617294975, "grad_norm": 8.216277122497559, "learning_rate": 9.254422354069048e-06, "loss": 29.1133, "step": 129230 }, { "epoch": 0.26107297680563357, "grad_norm": 514.9346313476562, "learning_rate": 9.254238960961975e-06, "loss": 22.4837, "step": 129240 }, { "epoch": 0.2610931774383174, "grad_norm": 284.17840576171875, "learning_rate": 9.254055547120258e-06, "loss": 19.875, "step": 129250 }, { "epoch": 0.26111337807100116, "grad_norm": 178.31358337402344, "learning_rate": 9.253872112544788e-06, "loss": 16.048, "step": 129260 }, { "epoch": 0.261133578703685, "grad_norm": 129.30117797851562, "learning_rate": 9.253688657236463e-06, "loss": 28.9053, "step": 129270 }, { "epoch": 0.2611537793363688, "grad_norm": 206.85975646972656, "learning_rate": 9.253505181196176e-06, "loss": 32.7466, "step": 129280 }, { "epoch": 0.2611739799690526, "grad_norm": 503.9642028808594, "learning_rate": 9.25332168442482e-06, "loss": 15.446, "step": 129290 }, { "epoch": 0.26119418060173644, "grad_norm": 506.5318908691406, "learning_rate": 9.25313816692329e-06, "loss": 15.5176, "step": 129300 }, { "epoch": 0.26121438123442026, "grad_norm": 319.416259765625, "learning_rate": 9.252954628692479e-06, "loss": 18.9115, "step": 129310 }, { "epoch": 0.2612345818671041, "grad_norm": 93.98925018310547, "learning_rate": 9.252771069733285e-06, "loss": 16.7842, "step": 129320 }, { "epoch": 0.2612547824997879, "grad_norm": 181.29489135742188, "learning_rate": 9.2525874900466e-06, "loss": 15.093, "step": 129330 }, { "epoch": 0.2612749831324717, "grad_norm": 464.2966003417969, "learning_rate": 9.252403889633319e-06, "loss": 14.8134, "step": 129340 }, { "epoch": 0.26129518376515554, "grad_norm": 302.3315124511719, "learning_rate": 9.252220268494336e-06, "loss": 23.623, "step": 129350 }, { "epoch": 0.26131538439783936, "grad_norm": 381.8354797363281, "learning_rate": 9.25203662663055e-06, "loss": 24.2987, "step": 129360 }, { "epoch": 0.2613355850305232, "grad_norm": 254.19187927246094, "learning_rate": 9.251852964042852e-06, "loss": 16.1889, "step": 129370 }, { "epoch": 0.261355785663207, "grad_norm": 213.75941467285156, "learning_rate": 9.251669280732137e-06, "loss": 38.6299, "step": 129380 }, { "epoch": 0.26137598629589076, "grad_norm": 237.70010375976562, "learning_rate": 9.251485576699302e-06, "loss": 44.7851, "step": 129390 }, { "epoch": 0.2613961869285746, "grad_norm": 284.2520751953125, "learning_rate": 9.251301851945244e-06, "loss": 17.3535, "step": 129400 }, { "epoch": 0.2614163875612584, "grad_norm": 420.3191833496094, "learning_rate": 9.251118106470855e-06, "loss": 30.9602, "step": 129410 }, { "epoch": 0.2614365881939422, "grad_norm": 354.6914367675781, "learning_rate": 9.250934340277031e-06, "loss": 18.8493, "step": 129420 }, { "epoch": 0.26145678882662604, "grad_norm": 167.4217529296875, "learning_rate": 9.250750553364669e-06, "loss": 19.9885, "step": 129430 }, { "epoch": 0.26147698945930986, "grad_norm": 115.02622985839844, "learning_rate": 9.250566745734666e-06, "loss": 12.9268, "step": 129440 }, { "epoch": 0.2614971900919937, "grad_norm": 400.38232421875, "learning_rate": 9.250382917387915e-06, "loss": 26.7287, "step": 129450 }, { "epoch": 0.2615173907246775, "grad_norm": 552.4902954101562, "learning_rate": 9.250199068325314e-06, "loss": 22.9631, "step": 129460 }, { "epoch": 0.2615375913573613, "grad_norm": 332.2176208496094, "learning_rate": 9.250015198547757e-06, "loss": 14.1949, "step": 129470 }, { "epoch": 0.26155779199004514, "grad_norm": 281.7771911621094, "learning_rate": 9.249831308056141e-06, "loss": 31.2369, "step": 129480 }, { "epoch": 0.26157799262272896, "grad_norm": 202.70440673828125, "learning_rate": 9.249647396851364e-06, "loss": 25.7474, "step": 129490 }, { "epoch": 0.2615981932554128, "grad_norm": 147.0850830078125, "learning_rate": 9.24946346493432e-06, "loss": 22.3523, "step": 129500 }, { "epoch": 0.2616183938880966, "grad_norm": 210.63427734375, "learning_rate": 9.249279512305907e-06, "loss": 9.1871, "step": 129510 }, { "epoch": 0.26163859452078037, "grad_norm": 102.7641372680664, "learning_rate": 9.249095538967021e-06, "loss": 19.4803, "step": 129520 }, { "epoch": 0.2616587951534642, "grad_norm": 507.70489501953125, "learning_rate": 9.248911544918559e-06, "loss": 31.8843, "step": 129530 }, { "epoch": 0.261678995786148, "grad_norm": 392.380859375, "learning_rate": 9.248727530161417e-06, "loss": 17.4028, "step": 129540 }, { "epoch": 0.2616991964188318, "grad_norm": 419.91314697265625, "learning_rate": 9.248543494696493e-06, "loss": 33.5558, "step": 129550 }, { "epoch": 0.26171939705151565, "grad_norm": 135.6229705810547, "learning_rate": 9.248359438524683e-06, "loss": 15.2087, "step": 129560 }, { "epoch": 0.26173959768419947, "grad_norm": 302.4566955566406, "learning_rate": 9.248175361646884e-06, "loss": 20.0454, "step": 129570 }, { "epoch": 0.2617597983168833, "grad_norm": 116.71344757080078, "learning_rate": 9.247991264063994e-06, "loss": 13.0094, "step": 129580 }, { "epoch": 0.2617799989495671, "grad_norm": 278.9741516113281, "learning_rate": 9.247807145776909e-06, "loss": 19.1934, "step": 129590 }, { "epoch": 0.2618001995822509, "grad_norm": 61.36634826660156, "learning_rate": 9.247623006786529e-06, "loss": 14.7251, "step": 129600 }, { "epoch": 0.26182040021493475, "grad_norm": 275.9872131347656, "learning_rate": 9.247438847093747e-06, "loss": 23.9599, "step": 129610 }, { "epoch": 0.26184060084761857, "grad_norm": 341.7347717285156, "learning_rate": 9.247254666699465e-06, "loss": 21.6643, "step": 129620 }, { "epoch": 0.2618608014803024, "grad_norm": 165.0023651123047, "learning_rate": 9.247070465604578e-06, "loss": 20.1014, "step": 129630 }, { "epoch": 0.2618810021129862, "grad_norm": 0.0, "learning_rate": 9.246886243809985e-06, "loss": 24.0572, "step": 129640 }, { "epoch": 0.26190120274566997, "grad_norm": 407.7612609863281, "learning_rate": 9.246702001316584e-06, "loss": 23.1058, "step": 129650 }, { "epoch": 0.2619214033783538, "grad_norm": 199.06809997558594, "learning_rate": 9.246517738125271e-06, "loss": 16.5822, "step": 129660 }, { "epoch": 0.2619416040110376, "grad_norm": 540.3873291015625, "learning_rate": 9.246333454236946e-06, "loss": 31.897, "step": 129670 }, { "epoch": 0.26196180464372143, "grad_norm": 116.72136688232422, "learning_rate": 9.246149149652507e-06, "loss": 23.3436, "step": 129680 }, { "epoch": 0.26198200527640525, "grad_norm": 324.6308288574219, "learning_rate": 9.245964824372855e-06, "loss": 17.7542, "step": 129690 }, { "epoch": 0.26200220590908907, "grad_norm": 287.99407958984375, "learning_rate": 9.245780478398883e-06, "loss": 31.6328, "step": 129700 }, { "epoch": 0.2620224065417729, "grad_norm": 249.7967987060547, "learning_rate": 9.245596111731492e-06, "loss": 16.5126, "step": 129710 }, { "epoch": 0.2620426071744567, "grad_norm": 74.83383178710938, "learning_rate": 9.245411724371578e-06, "loss": 15.8808, "step": 129720 }, { "epoch": 0.26206280780714053, "grad_norm": 378.4853210449219, "learning_rate": 9.245227316320046e-06, "loss": 14.944, "step": 129730 }, { "epoch": 0.26208300843982435, "grad_norm": 371.9002380371094, "learning_rate": 9.245042887577789e-06, "loss": 22.4586, "step": 129740 }, { "epoch": 0.26210320907250817, "grad_norm": 748.1837158203125, "learning_rate": 9.244858438145709e-06, "loss": 45.0356, "step": 129750 }, { "epoch": 0.262123409705192, "grad_norm": 421.13226318359375, "learning_rate": 9.244673968024701e-06, "loss": 27.6006, "step": 129760 }, { "epoch": 0.2621436103378758, "grad_norm": 195.1837615966797, "learning_rate": 9.24448947721567e-06, "loss": 21.2427, "step": 129770 }, { "epoch": 0.2621638109705596, "grad_norm": 245.95765686035156, "learning_rate": 9.24430496571951e-06, "loss": 13.3889, "step": 129780 }, { "epoch": 0.2621840116032434, "grad_norm": 169.9891357421875, "learning_rate": 9.244120433537126e-06, "loss": 12.4955, "step": 129790 }, { "epoch": 0.2622042122359272, "grad_norm": 454.4453430175781, "learning_rate": 9.24393588066941e-06, "loss": 28.2729, "step": 129800 }, { "epoch": 0.26222441286861103, "grad_norm": 834.25830078125, "learning_rate": 9.243751307117266e-06, "loss": 21.3751, "step": 129810 }, { "epoch": 0.26224461350129485, "grad_norm": 113.78535461425781, "learning_rate": 9.243566712881593e-06, "loss": 15.1413, "step": 129820 }, { "epoch": 0.2622648141339787, "grad_norm": 354.5184326171875, "learning_rate": 9.243382097963292e-06, "loss": 18.4215, "step": 129830 }, { "epoch": 0.2622850147666625, "grad_norm": 0.0, "learning_rate": 9.24319746236326e-06, "loss": 14.6979, "step": 129840 }, { "epoch": 0.2623052153993463, "grad_norm": 121.40631866455078, "learning_rate": 9.243012806082398e-06, "loss": 20.9623, "step": 129850 }, { "epoch": 0.26232541603203013, "grad_norm": 278.96514892578125, "learning_rate": 9.242828129121607e-06, "loss": 13.1778, "step": 129860 }, { "epoch": 0.26234561666471395, "grad_norm": 414.690185546875, "learning_rate": 9.242643431481783e-06, "loss": 24.5263, "step": 129870 }, { "epoch": 0.2623658172973978, "grad_norm": 211.73695373535156, "learning_rate": 9.242458713163834e-06, "loss": 20.1864, "step": 129880 }, { "epoch": 0.2623860179300816, "grad_norm": 91.06668853759766, "learning_rate": 9.242273974168655e-06, "loss": 29.0181, "step": 129890 }, { "epoch": 0.26240621856276536, "grad_norm": 373.81597900390625, "learning_rate": 9.242089214497146e-06, "loss": 34.6583, "step": 129900 }, { "epoch": 0.2624264191954492, "grad_norm": 195.44522094726562, "learning_rate": 9.241904434150208e-06, "loss": 27.0003, "step": 129910 }, { "epoch": 0.262446619828133, "grad_norm": 777.9647827148438, "learning_rate": 9.241719633128743e-06, "loss": 67.0908, "step": 129920 }, { "epoch": 0.2624668204608168, "grad_norm": 373.15252685546875, "learning_rate": 9.241534811433651e-06, "loss": 28.1559, "step": 129930 }, { "epoch": 0.26248702109350064, "grad_norm": 145.73133850097656, "learning_rate": 9.241349969065834e-06, "loss": 14.8453, "step": 129940 }, { "epoch": 0.26250722172618446, "grad_norm": 429.2937316894531, "learning_rate": 9.241165106026189e-06, "loss": 28.7496, "step": 129950 }, { "epoch": 0.2625274223588683, "grad_norm": 262.8624572753906, "learning_rate": 9.24098022231562e-06, "loss": 28.8341, "step": 129960 }, { "epoch": 0.2625476229915521, "grad_norm": 199.19415283203125, "learning_rate": 9.24079531793503e-06, "loss": 24.7117, "step": 129970 }, { "epoch": 0.2625678236242359, "grad_norm": 22.98516273498535, "learning_rate": 9.24061039288532e-06, "loss": 13.8059, "step": 129980 }, { "epoch": 0.26258802425691974, "grad_norm": 2.703617572784424, "learning_rate": 9.240425447167384e-06, "loss": 21.1812, "step": 129990 }, { "epoch": 0.26260822488960356, "grad_norm": 346.6095275878906, "learning_rate": 9.24024048078213e-06, "loss": 32.1575, "step": 130000 }, { "epoch": 0.2626284255222874, "grad_norm": 131.34286499023438, "learning_rate": 9.24005549373046e-06, "loss": 11.1799, "step": 130010 }, { "epoch": 0.2626486261549712, "grad_norm": 308.6712341308594, "learning_rate": 9.239870486013272e-06, "loss": 31.1478, "step": 130020 }, { "epoch": 0.26266882678765496, "grad_norm": 701.81591796875, "learning_rate": 9.23968545763147e-06, "loss": 24.908, "step": 130030 }, { "epoch": 0.2626890274203388, "grad_norm": 200.5743865966797, "learning_rate": 9.239500408585956e-06, "loss": 17.0382, "step": 130040 }, { "epoch": 0.2627092280530226, "grad_norm": 92.90514373779297, "learning_rate": 9.239315338877632e-06, "loss": 15.775, "step": 130050 }, { "epoch": 0.2627294286857064, "grad_norm": 167.89163208007812, "learning_rate": 9.239130248507398e-06, "loss": 9.3582, "step": 130060 }, { "epoch": 0.26274962931839024, "grad_norm": 334.10107421875, "learning_rate": 9.238945137476157e-06, "loss": 22.8912, "step": 130070 }, { "epoch": 0.26276982995107406, "grad_norm": 294.6474304199219, "learning_rate": 9.23876000578481e-06, "loss": 26.47, "step": 130080 }, { "epoch": 0.2627900305837579, "grad_norm": 820.2103881835938, "learning_rate": 9.238574853434264e-06, "loss": 18.3633, "step": 130090 }, { "epoch": 0.2628102312164417, "grad_norm": 315.1759033203125, "learning_rate": 9.238389680425417e-06, "loss": 29.6387, "step": 130100 }, { "epoch": 0.2628304318491255, "grad_norm": 322.38299560546875, "learning_rate": 9.238204486759172e-06, "loss": 27.3251, "step": 130110 }, { "epoch": 0.26285063248180934, "grad_norm": 223.71653747558594, "learning_rate": 9.238019272436434e-06, "loss": 21.2153, "step": 130120 }, { "epoch": 0.26287083311449316, "grad_norm": 331.0341796875, "learning_rate": 9.237834037458102e-06, "loss": 16.9589, "step": 130130 }, { "epoch": 0.262891033747177, "grad_norm": 576.6720581054688, "learning_rate": 9.237648781825082e-06, "loss": 31.9558, "step": 130140 }, { "epoch": 0.2629112343798608, "grad_norm": 273.04437255859375, "learning_rate": 9.237463505538277e-06, "loss": 30.366, "step": 130150 }, { "epoch": 0.26293143501254457, "grad_norm": 168.84811401367188, "learning_rate": 9.237278208598587e-06, "loss": 20.8242, "step": 130160 }, { "epoch": 0.2629516356452284, "grad_norm": 154.12351989746094, "learning_rate": 9.237092891006918e-06, "loss": 16.7077, "step": 130170 }, { "epoch": 0.2629718362779122, "grad_norm": 265.9403381347656, "learning_rate": 9.236907552764171e-06, "loss": 17.8871, "step": 130180 }, { "epoch": 0.262992036910596, "grad_norm": 29.4339599609375, "learning_rate": 9.236722193871252e-06, "loss": 16.74, "step": 130190 }, { "epoch": 0.26301223754327985, "grad_norm": 462.0174865722656, "learning_rate": 9.236536814329062e-06, "loss": 21.9085, "step": 130200 }, { "epoch": 0.26303243817596367, "grad_norm": 144.95790100097656, "learning_rate": 9.236351414138505e-06, "loss": 13.7305, "step": 130210 }, { "epoch": 0.2630526388086475, "grad_norm": 214.11367797851562, "learning_rate": 9.236165993300486e-06, "loss": 15.7088, "step": 130220 }, { "epoch": 0.2630728394413313, "grad_norm": 219.56085205078125, "learning_rate": 9.235980551815907e-06, "loss": 13.8076, "step": 130230 }, { "epoch": 0.2630930400740151, "grad_norm": 182.3064727783203, "learning_rate": 9.235795089685673e-06, "loss": 23.6947, "step": 130240 }, { "epoch": 0.26311324070669895, "grad_norm": 238.17941284179688, "learning_rate": 9.235609606910687e-06, "loss": 24.5757, "step": 130250 }, { "epoch": 0.26313344133938277, "grad_norm": 111.83209991455078, "learning_rate": 9.235424103491853e-06, "loss": 19.6934, "step": 130260 }, { "epoch": 0.2631536419720666, "grad_norm": 435.9765319824219, "learning_rate": 9.235238579430077e-06, "loss": 19.353, "step": 130270 }, { "epoch": 0.2631738426047504, "grad_norm": 314.8712463378906, "learning_rate": 9.235053034726261e-06, "loss": 18.3918, "step": 130280 }, { "epoch": 0.26319404323743417, "grad_norm": 344.42156982421875, "learning_rate": 9.23486746938131e-06, "loss": 22.597, "step": 130290 }, { "epoch": 0.263214243870118, "grad_norm": 707.9059448242188, "learning_rate": 9.234681883396129e-06, "loss": 17.2105, "step": 130300 }, { "epoch": 0.2632344445028018, "grad_norm": 267.44073486328125, "learning_rate": 9.234496276771622e-06, "loss": 12.8405, "step": 130310 }, { "epoch": 0.26325464513548563, "grad_norm": 229.5823974609375, "learning_rate": 9.234310649508694e-06, "loss": 18.3125, "step": 130320 }, { "epoch": 0.26327484576816945, "grad_norm": 368.01629638671875, "learning_rate": 9.23412500160825e-06, "loss": 31.1658, "step": 130330 }, { "epoch": 0.26329504640085327, "grad_norm": 0.0, "learning_rate": 9.233939333071193e-06, "loss": 23.0912, "step": 130340 }, { "epoch": 0.2633152470335371, "grad_norm": 227.6385040283203, "learning_rate": 9.233753643898428e-06, "loss": 20.806, "step": 130350 }, { "epoch": 0.2633354476662209, "grad_norm": 160.9967803955078, "learning_rate": 9.233567934090864e-06, "loss": 10.7392, "step": 130360 }, { "epoch": 0.26335564829890473, "grad_norm": 409.976318359375, "learning_rate": 9.233382203649402e-06, "loss": 41.9382, "step": 130370 }, { "epoch": 0.26337584893158855, "grad_norm": 184.9346466064453, "learning_rate": 9.23319645257495e-06, "loss": 19.929, "step": 130380 }, { "epoch": 0.26339604956427237, "grad_norm": 164.72219848632812, "learning_rate": 9.233010680868409e-06, "loss": 20.3287, "step": 130390 }, { "epoch": 0.2634162501969562, "grad_norm": 561.0146484375, "learning_rate": 9.232824888530689e-06, "loss": 19.8383, "step": 130400 }, { "epoch": 0.26343645082964, "grad_norm": 514.4578857421875, "learning_rate": 9.232639075562695e-06, "loss": 17.6474, "step": 130410 }, { "epoch": 0.2634566514623238, "grad_norm": 352.5010986328125, "learning_rate": 9.23245324196533e-06, "loss": 16.6652, "step": 130420 }, { "epoch": 0.2634768520950076, "grad_norm": 272.9388427734375, "learning_rate": 9.232267387739502e-06, "loss": 23.3731, "step": 130430 }, { "epoch": 0.2634970527276914, "grad_norm": 384.89190673828125, "learning_rate": 9.232081512886116e-06, "loss": 16.2517, "step": 130440 }, { "epoch": 0.26351725336037524, "grad_norm": 1004.5955200195312, "learning_rate": 9.231895617406076e-06, "loss": 20.1944, "step": 130450 }, { "epoch": 0.26353745399305906, "grad_norm": 685.2216796875, "learning_rate": 9.231709701300293e-06, "loss": 36.5053, "step": 130460 }, { "epoch": 0.2635576546257429, "grad_norm": 535.05810546875, "learning_rate": 9.23152376456967e-06, "loss": 24.9702, "step": 130470 }, { "epoch": 0.2635778552584267, "grad_norm": 307.9693908691406, "learning_rate": 9.231337807215111e-06, "loss": 22.3798, "step": 130480 }, { "epoch": 0.2635980558911105, "grad_norm": 157.90228271484375, "learning_rate": 9.231151829237527e-06, "loss": 31.5769, "step": 130490 }, { "epoch": 0.26361825652379434, "grad_norm": 264.05078125, "learning_rate": 9.230965830637821e-06, "loss": 16.9722, "step": 130500 }, { "epoch": 0.26363845715647816, "grad_norm": 321.2132263183594, "learning_rate": 9.230779811416901e-06, "loss": 17.4888, "step": 130510 }, { "epoch": 0.263658657789162, "grad_norm": 206.08302307128906, "learning_rate": 9.230593771575673e-06, "loss": 17.2631, "step": 130520 }, { "epoch": 0.2636788584218458, "grad_norm": 167.89801025390625, "learning_rate": 9.230407711115043e-06, "loss": 14.367, "step": 130530 }, { "epoch": 0.26369905905452956, "grad_norm": 535.6409301757812, "learning_rate": 9.230221630035921e-06, "loss": 18.8058, "step": 130540 }, { "epoch": 0.2637192596872134, "grad_norm": 168.85130310058594, "learning_rate": 9.230035528339212e-06, "loss": 16.9215, "step": 130550 }, { "epoch": 0.2637394603198972, "grad_norm": 198.4930877685547, "learning_rate": 9.229849406025821e-06, "loss": 20.545, "step": 130560 }, { "epoch": 0.263759660952581, "grad_norm": 656.6640014648438, "learning_rate": 9.22966326309666e-06, "loss": 21.3638, "step": 130570 }, { "epoch": 0.26377986158526484, "grad_norm": 139.74871826171875, "learning_rate": 9.22947709955263e-06, "loss": 8.4026, "step": 130580 }, { "epoch": 0.26380006221794866, "grad_norm": 278.6380310058594, "learning_rate": 9.229290915394643e-06, "loss": 15.1224, "step": 130590 }, { "epoch": 0.2638202628506325, "grad_norm": 105.4938735961914, "learning_rate": 9.229104710623604e-06, "loss": 17.8322, "step": 130600 }, { "epoch": 0.2638404634833163, "grad_norm": 196.124755859375, "learning_rate": 9.228918485240423e-06, "loss": 13.5918, "step": 130610 }, { "epoch": 0.2638606641160001, "grad_norm": 74.81546020507812, "learning_rate": 9.228732239246005e-06, "loss": 16.3609, "step": 130620 }, { "epoch": 0.26388086474868394, "grad_norm": 131.96311950683594, "learning_rate": 9.22854597264126e-06, "loss": 14.061, "step": 130630 }, { "epoch": 0.26390106538136776, "grad_norm": 388.83050537109375, "learning_rate": 9.228359685427095e-06, "loss": 42.7274, "step": 130640 }, { "epoch": 0.2639212660140516, "grad_norm": 242.0411376953125, "learning_rate": 9.228173377604417e-06, "loss": 18.447, "step": 130650 }, { "epoch": 0.2639414666467354, "grad_norm": 94.57605743408203, "learning_rate": 9.227987049174133e-06, "loss": 15.4147, "step": 130660 }, { "epoch": 0.26396166727941917, "grad_norm": 160.47158813476562, "learning_rate": 9.227800700137156e-06, "loss": 10.5492, "step": 130670 }, { "epoch": 0.263981867912103, "grad_norm": 460.70953369140625, "learning_rate": 9.22761433049439e-06, "loss": 16.2835, "step": 130680 }, { "epoch": 0.2640020685447868, "grad_norm": 225.00790405273438, "learning_rate": 9.227427940246744e-06, "loss": 16.0712, "step": 130690 }, { "epoch": 0.2640222691774706, "grad_norm": 350.09539794921875, "learning_rate": 9.227241529395127e-06, "loss": 19.2027, "step": 130700 }, { "epoch": 0.26404246981015445, "grad_norm": 377.72186279296875, "learning_rate": 9.22705509794045e-06, "loss": 17.673, "step": 130710 }, { "epoch": 0.26406267044283827, "grad_norm": 867.4427490234375, "learning_rate": 9.226868645883616e-06, "loss": 31.2545, "step": 130720 }, { "epoch": 0.2640828710755221, "grad_norm": 355.70794677734375, "learning_rate": 9.226682173225537e-06, "loss": 17.7609, "step": 130730 }, { "epoch": 0.2641030717082059, "grad_norm": 238.36830139160156, "learning_rate": 9.226495679967123e-06, "loss": 29.4404, "step": 130740 }, { "epoch": 0.2641232723408897, "grad_norm": 497.3797912597656, "learning_rate": 9.226309166109281e-06, "loss": 35.4831, "step": 130750 }, { "epoch": 0.26414347297357355, "grad_norm": 383.9061279296875, "learning_rate": 9.226122631652921e-06, "loss": 28.0227, "step": 130760 }, { "epoch": 0.26416367360625737, "grad_norm": 215.87266540527344, "learning_rate": 9.225936076598952e-06, "loss": 14.3393, "step": 130770 }, { "epoch": 0.2641838742389412, "grad_norm": 189.21929931640625, "learning_rate": 9.225749500948283e-06, "loss": 18.4508, "step": 130780 }, { "epoch": 0.264204074871625, "grad_norm": 247.8761444091797, "learning_rate": 9.225562904701823e-06, "loss": 34.6637, "step": 130790 }, { "epoch": 0.26422427550430877, "grad_norm": 358.6277770996094, "learning_rate": 9.225376287860484e-06, "loss": 25.7525, "step": 130800 }, { "epoch": 0.2642444761369926, "grad_norm": 813.1657104492188, "learning_rate": 9.22518965042517e-06, "loss": 29.516, "step": 130810 }, { "epoch": 0.2642646767696764, "grad_norm": 295.0511474609375, "learning_rate": 9.225002992396797e-06, "loss": 23.8186, "step": 130820 }, { "epoch": 0.26428487740236023, "grad_norm": 147.18861389160156, "learning_rate": 9.22481631377627e-06, "loss": 13.8016, "step": 130830 }, { "epoch": 0.26430507803504405, "grad_norm": 348.0419006347656, "learning_rate": 9.224629614564502e-06, "loss": 15.0785, "step": 130840 }, { "epoch": 0.26432527866772787, "grad_norm": 55.73855209350586, "learning_rate": 9.224442894762401e-06, "loss": 26.4671, "step": 130850 }, { "epoch": 0.2643454793004117, "grad_norm": 4.948325157165527, "learning_rate": 9.224256154370878e-06, "loss": 17.6347, "step": 130860 }, { "epoch": 0.2643656799330955, "grad_norm": 226.4357147216797, "learning_rate": 9.224069393390843e-06, "loss": 25.6598, "step": 130870 }, { "epoch": 0.26438588056577933, "grad_norm": 361.1082458496094, "learning_rate": 9.223882611823205e-06, "loss": 31.9383, "step": 130880 }, { "epoch": 0.26440608119846315, "grad_norm": 303.35662841796875, "learning_rate": 9.223695809668876e-06, "loss": 18.953, "step": 130890 }, { "epoch": 0.26442628183114697, "grad_norm": 141.02706909179688, "learning_rate": 9.223508986928766e-06, "loss": 19.1096, "step": 130900 }, { "epoch": 0.2644464824638308, "grad_norm": 120.77274322509766, "learning_rate": 9.223322143603786e-06, "loss": 28.3943, "step": 130910 }, { "epoch": 0.2644666830965146, "grad_norm": 636.3385009765625, "learning_rate": 9.223135279694845e-06, "loss": 17.5156, "step": 130920 }, { "epoch": 0.2644868837291984, "grad_norm": 212.68528747558594, "learning_rate": 9.222948395202855e-06, "loss": 17.3563, "step": 130930 }, { "epoch": 0.2645070843618822, "grad_norm": 566.427490234375, "learning_rate": 9.222761490128726e-06, "loss": 25.8142, "step": 130940 }, { "epoch": 0.264527284994566, "grad_norm": 193.68385314941406, "learning_rate": 9.222574564473372e-06, "loss": 22.8485, "step": 130950 }, { "epoch": 0.26454748562724983, "grad_norm": 484.2875061035156, "learning_rate": 9.222387618237701e-06, "loss": 23.8325, "step": 130960 }, { "epoch": 0.26456768625993365, "grad_norm": 303.17279052734375, "learning_rate": 9.222200651422624e-06, "loss": 18.7769, "step": 130970 }, { "epoch": 0.2645878868926175, "grad_norm": 279.4579162597656, "learning_rate": 9.222013664029053e-06, "loss": 17.3292, "step": 130980 }, { "epoch": 0.2646080875253013, "grad_norm": 145.6254425048828, "learning_rate": 9.2218266560579e-06, "loss": 17.7783, "step": 130990 }, { "epoch": 0.2646282881579851, "grad_norm": 361.9292907714844, "learning_rate": 9.221639627510076e-06, "loss": 13.5815, "step": 131000 }, { "epoch": 0.26464848879066893, "grad_norm": 201.07464599609375, "learning_rate": 9.221452578386492e-06, "loss": 25.1806, "step": 131010 }, { "epoch": 0.26466868942335275, "grad_norm": 284.2518005371094, "learning_rate": 9.221265508688061e-06, "loss": 8.4773, "step": 131020 }, { "epoch": 0.2646888900560366, "grad_norm": 236.3583221435547, "learning_rate": 9.221078418415692e-06, "loss": 20.3154, "step": 131030 }, { "epoch": 0.2647090906887204, "grad_norm": 205.55435180664062, "learning_rate": 9.220891307570301e-06, "loss": 11.8411, "step": 131040 }, { "epoch": 0.26472929132140416, "grad_norm": 262.3857116699219, "learning_rate": 9.220704176152798e-06, "loss": 16.5035, "step": 131050 }, { "epoch": 0.264749491954088, "grad_norm": 115.07047271728516, "learning_rate": 9.220517024164092e-06, "loss": 16.5697, "step": 131060 }, { "epoch": 0.2647696925867718, "grad_norm": 455.1586608886719, "learning_rate": 9.2203298516051e-06, "loss": 16.019, "step": 131070 }, { "epoch": 0.2647898932194556, "grad_norm": 170.8802032470703, "learning_rate": 9.220142658476732e-06, "loss": 22.5031, "step": 131080 }, { "epoch": 0.26481009385213944, "grad_norm": 841.9189453125, "learning_rate": 9.2199554447799e-06, "loss": 15.0185, "step": 131090 }, { "epoch": 0.26483029448482326, "grad_norm": 358.7441711425781, "learning_rate": 9.219768210515518e-06, "loss": 20.8689, "step": 131100 }, { "epoch": 0.2648504951175071, "grad_norm": 382.5992736816406, "learning_rate": 9.219580955684495e-06, "loss": 28.0323, "step": 131110 }, { "epoch": 0.2648706957501909, "grad_norm": 49.77172088623047, "learning_rate": 9.21939368028775e-06, "loss": 26.6038, "step": 131120 }, { "epoch": 0.2648908963828747, "grad_norm": 396.30169677734375, "learning_rate": 9.21920638432619e-06, "loss": 17.7471, "step": 131130 }, { "epoch": 0.26491109701555854, "grad_norm": 356.1116943359375, "learning_rate": 9.219019067800728e-06, "loss": 29.8889, "step": 131140 }, { "epoch": 0.26493129764824236, "grad_norm": 232.90086364746094, "learning_rate": 9.218831730712281e-06, "loss": 27.4981, "step": 131150 }, { "epoch": 0.2649514982809262, "grad_norm": 302.7195739746094, "learning_rate": 9.218644373061759e-06, "loss": 30.8061, "step": 131160 }, { "epoch": 0.26497169891361, "grad_norm": 290.3616943359375, "learning_rate": 9.218456994850076e-06, "loss": 26.8306, "step": 131170 }, { "epoch": 0.26499189954629376, "grad_norm": 212.21755981445312, "learning_rate": 9.218269596078145e-06, "loss": 18.7591, "step": 131180 }, { "epoch": 0.2650121001789776, "grad_norm": 354.3333740234375, "learning_rate": 9.21808217674688e-06, "loss": 27.855, "step": 131190 }, { "epoch": 0.2650323008116614, "grad_norm": 324.7735900878906, "learning_rate": 9.217894736857195e-06, "loss": 31.2724, "step": 131200 }, { "epoch": 0.2650525014443452, "grad_norm": 186.60775756835938, "learning_rate": 9.217707276410002e-06, "loss": 18.6625, "step": 131210 }, { "epoch": 0.26507270207702904, "grad_norm": 295.36846923828125, "learning_rate": 9.217519795406214e-06, "loss": 16.2716, "step": 131220 }, { "epoch": 0.26509290270971286, "grad_norm": 185.4807891845703, "learning_rate": 9.217332293846747e-06, "loss": 16.9828, "step": 131230 }, { "epoch": 0.2651131033423967, "grad_norm": 366.7891540527344, "learning_rate": 9.217144771732515e-06, "loss": 18.7757, "step": 131240 }, { "epoch": 0.2651333039750805, "grad_norm": 523.4879760742188, "learning_rate": 9.21695722906443e-06, "loss": 36.4144, "step": 131250 }, { "epoch": 0.2651535046077643, "grad_norm": 365.41802978515625, "learning_rate": 9.216769665843406e-06, "loss": 20.6369, "step": 131260 }, { "epoch": 0.26517370524044814, "grad_norm": 223.75750732421875, "learning_rate": 9.216582082070359e-06, "loss": 24.4276, "step": 131270 }, { "epoch": 0.26519390587313196, "grad_norm": 134.6510009765625, "learning_rate": 9.216394477746202e-06, "loss": 22.7861, "step": 131280 }, { "epoch": 0.2652141065058158, "grad_norm": 321.4700622558594, "learning_rate": 9.21620685287185e-06, "loss": 10.7645, "step": 131290 }, { "epoch": 0.2652343071384996, "grad_norm": 261.88531494140625, "learning_rate": 9.216019207448216e-06, "loss": 25.7997, "step": 131300 }, { "epoch": 0.26525450777118337, "grad_norm": 145.2380828857422, "learning_rate": 9.215831541476217e-06, "loss": 27.6914, "step": 131310 }, { "epoch": 0.2652747084038672, "grad_norm": 269.56219482421875, "learning_rate": 9.215643854956766e-06, "loss": 35.4358, "step": 131320 }, { "epoch": 0.265294909036551, "grad_norm": 32.369441986083984, "learning_rate": 9.215456147890778e-06, "loss": 24.3982, "step": 131330 }, { "epoch": 0.2653151096692348, "grad_norm": 277.0848388671875, "learning_rate": 9.215268420279168e-06, "loss": 24.683, "step": 131340 }, { "epoch": 0.26533531030191865, "grad_norm": 38.84209060668945, "learning_rate": 9.215080672122854e-06, "loss": 14.8635, "step": 131350 }, { "epoch": 0.26535551093460247, "grad_norm": 32.338401794433594, "learning_rate": 9.214892903422745e-06, "loss": 24.1664, "step": 131360 }, { "epoch": 0.2653757115672863, "grad_norm": 55.134517669677734, "learning_rate": 9.214705114179759e-06, "loss": 21.923, "step": 131370 }, { "epoch": 0.2653959121999701, "grad_norm": 590.6253051757812, "learning_rate": 9.214517304394813e-06, "loss": 22.8793, "step": 131380 }, { "epoch": 0.2654161128326539, "grad_norm": 37.100582122802734, "learning_rate": 9.214329474068818e-06, "loss": 15.9547, "step": 131390 }, { "epoch": 0.26543631346533775, "grad_norm": 460.7122497558594, "learning_rate": 9.214141623202694e-06, "loss": 29.4425, "step": 131400 }, { "epoch": 0.26545651409802157, "grad_norm": 215.329345703125, "learning_rate": 9.213953751797355e-06, "loss": 14.2494, "step": 131410 }, { "epoch": 0.2654767147307054, "grad_norm": 336.56671142578125, "learning_rate": 9.213765859853717e-06, "loss": 18.0861, "step": 131420 }, { "epoch": 0.2654969153633892, "grad_norm": 426.3466796875, "learning_rate": 9.213577947372694e-06, "loss": 21.4584, "step": 131430 }, { "epoch": 0.26551711599607297, "grad_norm": 177.53062438964844, "learning_rate": 9.213390014355204e-06, "loss": 28.7605, "step": 131440 }, { "epoch": 0.2655373166287568, "grad_norm": 209.9029541015625, "learning_rate": 9.213202060802162e-06, "loss": 51.2386, "step": 131450 }, { "epoch": 0.2655575172614406, "grad_norm": 44.30344772338867, "learning_rate": 9.213014086714484e-06, "loss": 18.7887, "step": 131460 }, { "epoch": 0.26557771789412443, "grad_norm": 203.44947814941406, "learning_rate": 9.212826092093085e-06, "loss": 16.0005, "step": 131470 }, { "epoch": 0.26559791852680825, "grad_norm": 267.8418884277344, "learning_rate": 9.212638076938885e-06, "loss": 19.842, "step": 131480 }, { "epoch": 0.26561811915949207, "grad_norm": 288.79534912109375, "learning_rate": 9.212450041252797e-06, "loss": 24.4326, "step": 131490 }, { "epoch": 0.2656383197921759, "grad_norm": 258.497802734375, "learning_rate": 9.21226198503574e-06, "loss": 21.3712, "step": 131500 }, { "epoch": 0.2656585204248597, "grad_norm": 443.02423095703125, "learning_rate": 9.212073908288626e-06, "loss": 14.8822, "step": 131510 }, { "epoch": 0.26567872105754353, "grad_norm": 791.5526123046875, "learning_rate": 9.211885811012376e-06, "loss": 19.883, "step": 131520 }, { "epoch": 0.26569892169022735, "grad_norm": 296.625244140625, "learning_rate": 9.211697693207905e-06, "loss": 23.1266, "step": 131530 }, { "epoch": 0.26571912232291117, "grad_norm": 265.65771484375, "learning_rate": 9.21150955487613e-06, "loss": 19.1879, "step": 131540 }, { "epoch": 0.265739322955595, "grad_norm": 336.53302001953125, "learning_rate": 9.21132139601797e-06, "loss": 38.7844, "step": 131550 }, { "epoch": 0.2657595235882788, "grad_norm": 351.2852478027344, "learning_rate": 9.211133216634339e-06, "loss": 24.5874, "step": 131560 }, { "epoch": 0.2657797242209626, "grad_norm": 361.46539306640625, "learning_rate": 9.210945016726155e-06, "loss": 16.4213, "step": 131570 }, { "epoch": 0.2657999248536464, "grad_norm": 192.3424835205078, "learning_rate": 9.210756796294335e-06, "loss": 27.0096, "step": 131580 }, { "epoch": 0.2658201254863302, "grad_norm": 272.88519287109375, "learning_rate": 9.2105685553398e-06, "loss": 34.5145, "step": 131590 }, { "epoch": 0.26584032611901404, "grad_norm": 218.7504425048828, "learning_rate": 9.210380293863462e-06, "loss": 24.0416, "step": 131600 }, { "epoch": 0.26586052675169786, "grad_norm": 131.02207946777344, "learning_rate": 9.210192011866242e-06, "loss": 25.5761, "step": 131610 }, { "epoch": 0.2658807273843817, "grad_norm": 315.1550598144531, "learning_rate": 9.210003709349058e-06, "loss": 25.0471, "step": 131620 }, { "epoch": 0.2659009280170655, "grad_norm": 614.7770385742188, "learning_rate": 9.209815386312824e-06, "loss": 27.8007, "step": 131630 }, { "epoch": 0.2659211286497493, "grad_norm": 23.959182739257812, "learning_rate": 9.209627042758462e-06, "loss": 16.335, "step": 131640 }, { "epoch": 0.26594132928243314, "grad_norm": 109.56261444091797, "learning_rate": 9.209438678686888e-06, "loss": 18.5758, "step": 131650 }, { "epoch": 0.26596152991511696, "grad_norm": 164.66470336914062, "learning_rate": 9.209250294099021e-06, "loss": 22.8213, "step": 131660 }, { "epoch": 0.2659817305478008, "grad_norm": 515.708251953125, "learning_rate": 9.209061888995777e-06, "loss": 13.3993, "step": 131670 }, { "epoch": 0.2660019311804846, "grad_norm": 726.3757934570312, "learning_rate": 9.208873463378078e-06, "loss": 22.0186, "step": 131680 }, { "epoch": 0.26602213181316836, "grad_norm": 138.14413452148438, "learning_rate": 9.208685017246839e-06, "loss": 19.2736, "step": 131690 }, { "epoch": 0.2660423324458522, "grad_norm": 394.1117858886719, "learning_rate": 9.208496550602979e-06, "loss": 23.9208, "step": 131700 }, { "epoch": 0.266062533078536, "grad_norm": 131.0630645751953, "learning_rate": 9.208308063447418e-06, "loss": 15.5976, "step": 131710 }, { "epoch": 0.2660827337112198, "grad_norm": 198.0703582763672, "learning_rate": 9.208119555781074e-06, "loss": 25.5678, "step": 131720 }, { "epoch": 0.26610293434390364, "grad_norm": 111.7975845336914, "learning_rate": 9.207931027604867e-06, "loss": 15.8477, "step": 131730 }, { "epoch": 0.26612313497658746, "grad_norm": 286.3114013671875, "learning_rate": 9.207742478919713e-06, "loss": 17.3215, "step": 131740 }, { "epoch": 0.2661433356092713, "grad_norm": 145.40310668945312, "learning_rate": 9.207553909726532e-06, "loss": 19.5132, "step": 131750 }, { "epoch": 0.2661635362419551, "grad_norm": 0.0, "learning_rate": 9.207365320026244e-06, "loss": 17.6981, "step": 131760 }, { "epoch": 0.2661837368746389, "grad_norm": 95.64360046386719, "learning_rate": 9.207176709819768e-06, "loss": 15.9734, "step": 131770 }, { "epoch": 0.26620393750732274, "grad_norm": 387.3345642089844, "learning_rate": 9.206988079108023e-06, "loss": 27.1973, "step": 131780 }, { "epoch": 0.26622413814000656, "grad_norm": 315.1808166503906, "learning_rate": 9.206799427891928e-06, "loss": 15.3424, "step": 131790 }, { "epoch": 0.2662443387726904, "grad_norm": 811.95361328125, "learning_rate": 9.206610756172402e-06, "loss": 37.6552, "step": 131800 }, { "epoch": 0.2662645394053742, "grad_norm": 287.294677734375, "learning_rate": 9.206422063950368e-06, "loss": 40.5785, "step": 131810 }, { "epoch": 0.26628474003805797, "grad_norm": 90.23150634765625, "learning_rate": 9.206233351226742e-06, "loss": 23.8886, "step": 131820 }, { "epoch": 0.2663049406707418, "grad_norm": 223.1604461669922, "learning_rate": 9.206044618002443e-06, "loss": 22.9928, "step": 131830 }, { "epoch": 0.2663251413034256, "grad_norm": 1019.6112060546875, "learning_rate": 9.205855864278394e-06, "loss": 25.3168, "step": 131840 }, { "epoch": 0.2663453419361094, "grad_norm": 328.9830322265625, "learning_rate": 9.205667090055513e-06, "loss": 14.5425, "step": 131850 }, { "epoch": 0.26636554256879325, "grad_norm": 396.987060546875, "learning_rate": 9.205478295334722e-06, "loss": 24.8156, "step": 131860 }, { "epoch": 0.26638574320147707, "grad_norm": 63.40408706665039, "learning_rate": 9.20528948011694e-06, "loss": 16.3502, "step": 131870 }, { "epoch": 0.2664059438341609, "grad_norm": 85.84463500976562, "learning_rate": 9.205100644403084e-06, "loss": 23.3627, "step": 131880 }, { "epoch": 0.2664261444668447, "grad_norm": 176.727294921875, "learning_rate": 9.20491178819408e-06, "loss": 13.5899, "step": 131890 }, { "epoch": 0.2664463450995285, "grad_norm": 183.9629364013672, "learning_rate": 9.204722911490847e-06, "loss": 14.3959, "step": 131900 }, { "epoch": 0.26646654573221235, "grad_norm": 64.25181579589844, "learning_rate": 9.204534014294302e-06, "loss": 14.7777, "step": 131910 }, { "epoch": 0.26648674636489617, "grad_norm": 355.75439453125, "learning_rate": 9.204345096605369e-06, "loss": 19.5682, "step": 131920 }, { "epoch": 0.26650694699758, "grad_norm": 366.1025390625, "learning_rate": 9.204156158424969e-06, "loss": 28.5282, "step": 131930 }, { "epoch": 0.2665271476302638, "grad_norm": 381.6898193359375, "learning_rate": 9.20396719975402e-06, "loss": 19.394, "step": 131940 }, { "epoch": 0.26654734826294757, "grad_norm": 387.0378112792969, "learning_rate": 9.203778220593447e-06, "loss": 23.5318, "step": 131950 }, { "epoch": 0.2665675488956314, "grad_norm": 408.1318054199219, "learning_rate": 9.203589220944166e-06, "loss": 12.3876, "step": 131960 }, { "epoch": 0.2665877495283152, "grad_norm": 115.05711364746094, "learning_rate": 9.203400200807104e-06, "loss": 22.9614, "step": 131970 }, { "epoch": 0.26660795016099903, "grad_norm": 528.1298828125, "learning_rate": 9.203211160183177e-06, "loss": 24.5819, "step": 131980 }, { "epoch": 0.26662815079368285, "grad_norm": 234.77857971191406, "learning_rate": 9.20302209907331e-06, "loss": 15.0943, "step": 131990 }, { "epoch": 0.26664835142636667, "grad_norm": 145.06007385253906, "learning_rate": 9.202833017478421e-06, "loss": 8.3666, "step": 132000 }, { "epoch": 0.2666685520590505, "grad_norm": 148.48989868164062, "learning_rate": 9.202643915399436e-06, "loss": 27.2527, "step": 132010 }, { "epoch": 0.2666887526917343, "grad_norm": 437.6144714355469, "learning_rate": 9.202454792837273e-06, "loss": 16.6817, "step": 132020 }, { "epoch": 0.26670895332441813, "grad_norm": 253.79168701171875, "learning_rate": 9.202265649792856e-06, "loss": 26.7523, "step": 132030 }, { "epoch": 0.26672915395710195, "grad_norm": 568.609619140625, "learning_rate": 9.202076486267106e-06, "loss": 39.5765, "step": 132040 }, { "epoch": 0.26674935458978577, "grad_norm": 184.16366577148438, "learning_rate": 9.201887302260943e-06, "loss": 38.3333, "step": 132050 }, { "epoch": 0.2667695552224696, "grad_norm": 270.7777404785156, "learning_rate": 9.201698097775291e-06, "loss": 10.7028, "step": 132060 }, { "epoch": 0.2667897558551534, "grad_norm": 432.274658203125, "learning_rate": 9.201508872811074e-06, "loss": 21.4385, "step": 132070 }, { "epoch": 0.2668099564878372, "grad_norm": 209.89505004882812, "learning_rate": 9.201319627369211e-06, "loss": 24.6184, "step": 132080 }, { "epoch": 0.266830157120521, "grad_norm": 226.97816467285156, "learning_rate": 9.201130361450627e-06, "loss": 15.8131, "step": 132090 }, { "epoch": 0.2668503577532048, "grad_norm": 395.8572998046875, "learning_rate": 9.200941075056242e-06, "loss": 15.1805, "step": 132100 }, { "epoch": 0.26687055838588863, "grad_norm": 518.3175659179688, "learning_rate": 9.20075176818698e-06, "loss": 20.0201, "step": 132110 }, { "epoch": 0.26689075901857245, "grad_norm": 272.2595520019531, "learning_rate": 9.200562440843763e-06, "loss": 24.8606, "step": 132120 }, { "epoch": 0.2669109596512563, "grad_norm": 429.3791198730469, "learning_rate": 9.200373093027515e-06, "loss": 24.6751, "step": 132130 }, { "epoch": 0.2669311602839401, "grad_norm": 310.9672546386719, "learning_rate": 9.200183724739158e-06, "loss": 18.1408, "step": 132140 }, { "epoch": 0.2669513609166239, "grad_norm": 37.14625930786133, "learning_rate": 9.199994335979613e-06, "loss": 18.0082, "step": 132150 }, { "epoch": 0.26697156154930773, "grad_norm": 326.14996337890625, "learning_rate": 9.199804926749807e-06, "loss": 22.1183, "step": 132160 }, { "epoch": 0.26699176218199155, "grad_norm": 428.1421203613281, "learning_rate": 9.19961549705066e-06, "loss": 12.698, "step": 132170 }, { "epoch": 0.2670119628146754, "grad_norm": 213.8477325439453, "learning_rate": 9.199426046883097e-06, "loss": 26.3443, "step": 132180 }, { "epoch": 0.2670321634473592, "grad_norm": 416.07574462890625, "learning_rate": 9.19923657624804e-06, "loss": 22.351, "step": 132190 }, { "epoch": 0.267052364080043, "grad_norm": 17.453109741210938, "learning_rate": 9.199047085146415e-06, "loss": 17.3136, "step": 132200 }, { "epoch": 0.2670725647127268, "grad_norm": 1007.7659912109375, "learning_rate": 9.198857573579143e-06, "loss": 17.0352, "step": 132210 }, { "epoch": 0.2670927653454106, "grad_norm": 205.6129913330078, "learning_rate": 9.198668041547149e-06, "loss": 30.2993, "step": 132220 }, { "epoch": 0.2671129659780944, "grad_norm": 190.1449737548828, "learning_rate": 9.198478489051355e-06, "loss": 21.8533, "step": 132230 }, { "epoch": 0.26713316661077824, "grad_norm": 248.04788208007812, "learning_rate": 9.198288916092685e-06, "loss": 15.9535, "step": 132240 }, { "epoch": 0.26715336724346206, "grad_norm": 211.1146240234375, "learning_rate": 9.198099322672066e-06, "loss": 27.3251, "step": 132250 }, { "epoch": 0.2671735678761459, "grad_norm": 43.873775482177734, "learning_rate": 9.19790970879042e-06, "loss": 23.2796, "step": 132260 }, { "epoch": 0.2671937685088297, "grad_norm": 354.9283752441406, "learning_rate": 9.19772007444867e-06, "loss": 19.3009, "step": 132270 }, { "epoch": 0.2672139691415135, "grad_norm": 971.1929321289062, "learning_rate": 9.197530419647744e-06, "loss": 26.6245, "step": 132280 }, { "epoch": 0.26723416977419734, "grad_norm": 361.4471130371094, "learning_rate": 9.197340744388562e-06, "loss": 17.2705, "step": 132290 }, { "epoch": 0.26725437040688116, "grad_norm": 222.39024353027344, "learning_rate": 9.197151048672051e-06, "loss": 19.2487, "step": 132300 }, { "epoch": 0.267274571039565, "grad_norm": 317.63153076171875, "learning_rate": 9.196961332499133e-06, "loss": 18.2908, "step": 132310 }, { "epoch": 0.2672947716722488, "grad_norm": 238.85208129882812, "learning_rate": 9.196771595870736e-06, "loss": 21.2914, "step": 132320 }, { "epoch": 0.26731497230493256, "grad_norm": 205.6937713623047, "learning_rate": 9.196581838787784e-06, "loss": 17.7515, "step": 132330 }, { "epoch": 0.2673351729376164, "grad_norm": 110.98303985595703, "learning_rate": 9.196392061251199e-06, "loss": 14.6956, "step": 132340 }, { "epoch": 0.2673553735703002, "grad_norm": 187.3257293701172, "learning_rate": 9.196202263261908e-06, "loss": 11.0825, "step": 132350 }, { "epoch": 0.267375574202984, "grad_norm": 362.7536926269531, "learning_rate": 9.196012444820839e-06, "loss": 18.2864, "step": 132360 }, { "epoch": 0.26739577483566784, "grad_norm": 373.6578674316406, "learning_rate": 9.195822605928913e-06, "loss": 25.9045, "step": 132370 }, { "epoch": 0.26741597546835166, "grad_norm": 334.4429931640625, "learning_rate": 9.195632746587055e-06, "loss": 20.0686, "step": 132380 }, { "epoch": 0.2674361761010355, "grad_norm": 181.1815948486328, "learning_rate": 9.195442866796194e-06, "loss": 22.2578, "step": 132390 }, { "epoch": 0.2674563767337193, "grad_norm": 158.57501220703125, "learning_rate": 9.195252966557252e-06, "loss": 22.0235, "step": 132400 }, { "epoch": 0.2674765773664031, "grad_norm": 240.96176147460938, "learning_rate": 9.195063045871156e-06, "loss": 14.4997, "step": 132410 }, { "epoch": 0.26749677799908694, "grad_norm": 399.24884033203125, "learning_rate": 9.194873104738831e-06, "loss": 17.933, "step": 132420 }, { "epoch": 0.26751697863177076, "grad_norm": 292.900634765625, "learning_rate": 9.194683143161205e-06, "loss": 30.6744, "step": 132430 }, { "epoch": 0.2675371792644546, "grad_norm": 104.73880004882812, "learning_rate": 9.1944931611392e-06, "loss": 30.532, "step": 132440 }, { "epoch": 0.2675573798971384, "grad_norm": 98.16401672363281, "learning_rate": 9.194303158673744e-06, "loss": 19.744, "step": 132450 }, { "epoch": 0.26757758052982217, "grad_norm": 259.1365966796875, "learning_rate": 9.194113135765766e-06, "loss": 14.2575, "step": 132460 }, { "epoch": 0.267597781162506, "grad_norm": 503.4309997558594, "learning_rate": 9.193923092416187e-06, "loss": 32.503, "step": 132470 }, { "epoch": 0.2676179817951898, "grad_norm": 541.65869140625, "learning_rate": 9.193733028625936e-06, "loss": 13.4467, "step": 132480 }, { "epoch": 0.2676381824278736, "grad_norm": 564.7096557617188, "learning_rate": 9.193542944395938e-06, "loss": 30.5852, "step": 132490 }, { "epoch": 0.26765838306055745, "grad_norm": 498.1608581542969, "learning_rate": 9.193352839727122e-06, "loss": 21.075, "step": 132500 }, { "epoch": 0.26767858369324127, "grad_norm": 279.5301513671875, "learning_rate": 9.193162714620411e-06, "loss": 11.6294, "step": 132510 }, { "epoch": 0.2676987843259251, "grad_norm": 406.45526123046875, "learning_rate": 9.192972569076734e-06, "loss": 16.3702, "step": 132520 }, { "epoch": 0.2677189849586089, "grad_norm": 297.8812561035156, "learning_rate": 9.192782403097018e-06, "loss": 11.4888, "step": 132530 }, { "epoch": 0.2677391855912927, "grad_norm": 303.8533630371094, "learning_rate": 9.192592216682189e-06, "loss": 32.7983, "step": 132540 }, { "epoch": 0.26775938622397655, "grad_norm": 234.4134979248047, "learning_rate": 9.192402009833174e-06, "loss": 14.8128, "step": 132550 }, { "epoch": 0.26777958685666037, "grad_norm": 1049.6341552734375, "learning_rate": 9.192211782550899e-06, "loss": 31.9735, "step": 132560 }, { "epoch": 0.2677997874893442, "grad_norm": 965.6641235351562, "learning_rate": 9.192021534836293e-06, "loss": 30.6306, "step": 132570 }, { "epoch": 0.267819988122028, "grad_norm": 479.67266845703125, "learning_rate": 9.191831266690284e-06, "loss": 26.9133, "step": 132580 }, { "epoch": 0.26784018875471177, "grad_norm": 370.476806640625, "learning_rate": 9.191640978113796e-06, "loss": 16.8071, "step": 132590 }, { "epoch": 0.2678603893873956, "grad_norm": 266.4512634277344, "learning_rate": 9.191450669107758e-06, "loss": 51.2621, "step": 132600 }, { "epoch": 0.2678805900200794, "grad_norm": 0.0, "learning_rate": 9.191260339673099e-06, "loss": 10.5381, "step": 132610 }, { "epoch": 0.26790079065276323, "grad_norm": 223.70909118652344, "learning_rate": 9.191069989810743e-06, "loss": 20.0077, "step": 132620 }, { "epoch": 0.26792099128544705, "grad_norm": 208.69642639160156, "learning_rate": 9.190879619521623e-06, "loss": 23.237, "step": 132630 }, { "epoch": 0.26794119191813087, "grad_norm": 0.0, "learning_rate": 9.190689228806664e-06, "loss": 18.4919, "step": 132640 }, { "epoch": 0.2679613925508147, "grad_norm": 0.0, "learning_rate": 9.190498817666793e-06, "loss": 18.136, "step": 132650 }, { "epoch": 0.2679815931834985, "grad_norm": 92.2044677734375, "learning_rate": 9.190308386102938e-06, "loss": 14.0786, "step": 132660 }, { "epoch": 0.26800179381618233, "grad_norm": 388.5872497558594, "learning_rate": 9.19011793411603e-06, "loss": 26.3087, "step": 132670 }, { "epoch": 0.26802199444886615, "grad_norm": 177.7222137451172, "learning_rate": 9.189927461706994e-06, "loss": 22.7198, "step": 132680 }, { "epoch": 0.26804219508154997, "grad_norm": 609.1795654296875, "learning_rate": 9.189736968876761e-06, "loss": 23.8394, "step": 132690 }, { "epoch": 0.2680623957142338, "grad_norm": 368.980712890625, "learning_rate": 9.189546455626258e-06, "loss": 25.7562, "step": 132700 }, { "epoch": 0.2680825963469176, "grad_norm": 7.495061874389648, "learning_rate": 9.189355921956414e-06, "loss": 26.5232, "step": 132710 }, { "epoch": 0.2681027969796014, "grad_norm": 179.25186157226562, "learning_rate": 9.189165367868157e-06, "loss": 23.5647, "step": 132720 }, { "epoch": 0.2681229976122852, "grad_norm": 160.0326690673828, "learning_rate": 9.188974793362412e-06, "loss": 26.4754, "step": 132730 }, { "epoch": 0.268143198244969, "grad_norm": 197.23825073242188, "learning_rate": 9.188784198440118e-06, "loss": 12.0468, "step": 132740 }, { "epoch": 0.26816339887765284, "grad_norm": 273.7117614746094, "learning_rate": 9.188593583102195e-06, "loss": 17.5146, "step": 132750 }, { "epoch": 0.26818359951033666, "grad_norm": 285.4767761230469, "learning_rate": 9.188402947349575e-06, "loss": 14.888, "step": 132760 }, { "epoch": 0.2682038001430205, "grad_norm": 248.12193298339844, "learning_rate": 9.188212291183187e-06, "loss": 16.4195, "step": 132770 }, { "epoch": 0.2682240007757043, "grad_norm": 117.16590118408203, "learning_rate": 9.18802161460396e-06, "loss": 27.7209, "step": 132780 }, { "epoch": 0.2682442014083881, "grad_norm": 107.06631469726562, "learning_rate": 9.187830917612826e-06, "loss": 20.7498, "step": 132790 }, { "epoch": 0.26826440204107194, "grad_norm": 53.5087776184082, "learning_rate": 9.18764020021071e-06, "loss": 57.9235, "step": 132800 }, { "epoch": 0.26828460267375576, "grad_norm": 75.28845977783203, "learning_rate": 9.187449462398544e-06, "loss": 18.6495, "step": 132810 }, { "epoch": 0.2683048033064396, "grad_norm": 382.52789306640625, "learning_rate": 9.187258704177255e-06, "loss": 35.6245, "step": 132820 }, { "epoch": 0.2683250039391234, "grad_norm": 9.600459098815918, "learning_rate": 9.187067925547779e-06, "loss": 15.1469, "step": 132830 }, { "epoch": 0.2683452045718072, "grad_norm": 269.2230224609375, "learning_rate": 9.186877126511039e-06, "loss": 17.8122, "step": 132840 }, { "epoch": 0.268365405204491, "grad_norm": 30.430578231811523, "learning_rate": 9.186686307067968e-06, "loss": 23.8872, "step": 132850 }, { "epoch": 0.2683856058371748, "grad_norm": 280.35089111328125, "learning_rate": 9.186495467219497e-06, "loss": 24.4237, "step": 132860 }, { "epoch": 0.2684058064698586, "grad_norm": 159.50729370117188, "learning_rate": 9.186304606966554e-06, "loss": 27.3946, "step": 132870 }, { "epoch": 0.26842600710254244, "grad_norm": 439.1177978515625, "learning_rate": 9.186113726310068e-06, "loss": 23.4099, "step": 132880 }, { "epoch": 0.26844620773522626, "grad_norm": 575.4057006835938, "learning_rate": 9.185922825250975e-06, "loss": 20.2156, "step": 132890 }, { "epoch": 0.2684664083679101, "grad_norm": 136.48841857910156, "learning_rate": 9.1857319037902e-06, "loss": 18.913, "step": 132900 }, { "epoch": 0.2684866090005939, "grad_norm": 222.27105712890625, "learning_rate": 9.185540961928677e-06, "loss": 21.2555, "step": 132910 }, { "epoch": 0.2685068096332777, "grad_norm": 485.3514709472656, "learning_rate": 9.185349999667333e-06, "loss": 17.8538, "step": 132920 }, { "epoch": 0.26852701026596154, "grad_norm": 224.91043090820312, "learning_rate": 9.185159017007101e-06, "loss": 12.2962, "step": 132930 }, { "epoch": 0.26854721089864536, "grad_norm": 542.1920776367188, "learning_rate": 9.184968013948912e-06, "loss": 19.857, "step": 132940 }, { "epoch": 0.2685674115313292, "grad_norm": 254.21302795410156, "learning_rate": 9.184776990493696e-06, "loss": 18.0943, "step": 132950 }, { "epoch": 0.268587612164013, "grad_norm": 364.07977294921875, "learning_rate": 9.184585946642384e-06, "loss": 16.3364, "step": 132960 }, { "epoch": 0.26860781279669677, "grad_norm": 128.91323852539062, "learning_rate": 9.184394882395909e-06, "loss": 33.2618, "step": 132970 }, { "epoch": 0.2686280134293806, "grad_norm": 162.90670776367188, "learning_rate": 9.1842037977552e-06, "loss": 23.8781, "step": 132980 }, { "epoch": 0.2686482140620644, "grad_norm": 86.40279388427734, "learning_rate": 9.18401269272119e-06, "loss": 16.8672, "step": 132990 }, { "epoch": 0.2686684146947482, "grad_norm": 365.31561279296875, "learning_rate": 9.18382156729481e-06, "loss": 16.4353, "step": 133000 }, { "epoch": 0.26868861532743205, "grad_norm": 477.6526184082031, "learning_rate": 9.18363042147699e-06, "loss": 14.2922, "step": 133010 }, { "epoch": 0.26870881596011587, "grad_norm": 161.3505859375, "learning_rate": 9.183439255268662e-06, "loss": 31.4242, "step": 133020 }, { "epoch": 0.2687290165927997, "grad_norm": 157.21435546875, "learning_rate": 9.18324806867076e-06, "loss": 25.3449, "step": 133030 }, { "epoch": 0.2687492172254835, "grad_norm": 158.5929412841797, "learning_rate": 9.183056861684215e-06, "loss": 12.2123, "step": 133040 }, { "epoch": 0.2687694178581673, "grad_norm": 346.0450744628906, "learning_rate": 9.182865634309956e-06, "loss": 11.7307, "step": 133050 }, { "epoch": 0.26878961849085115, "grad_norm": 436.7522277832031, "learning_rate": 9.182674386548918e-06, "loss": 38.8923, "step": 133060 }, { "epoch": 0.26880981912353497, "grad_norm": 371.28411865234375, "learning_rate": 9.182483118402032e-06, "loss": 15.6076, "step": 133070 }, { "epoch": 0.2688300197562188, "grad_norm": 395.6809387207031, "learning_rate": 9.18229182987023e-06, "loss": 15.4543, "step": 133080 }, { "epoch": 0.2688502203889026, "grad_norm": 192.57028198242188, "learning_rate": 9.182100520954446e-06, "loss": 29.3794, "step": 133090 }, { "epoch": 0.26887042102158637, "grad_norm": 481.9129943847656, "learning_rate": 9.181909191655613e-06, "loss": 21.072, "step": 133100 }, { "epoch": 0.2688906216542702, "grad_norm": 284.41387939453125, "learning_rate": 9.18171784197466e-06, "loss": 35.763, "step": 133110 }, { "epoch": 0.268910822286954, "grad_norm": 214.3193359375, "learning_rate": 9.18152647191252e-06, "loss": 22.6472, "step": 133120 }, { "epoch": 0.26893102291963783, "grad_norm": 264.21551513671875, "learning_rate": 9.181335081470128e-06, "loss": 21.1529, "step": 133130 }, { "epoch": 0.26895122355232165, "grad_norm": 373.3157958984375, "learning_rate": 9.181143670648418e-06, "loss": 21.5126, "step": 133140 }, { "epoch": 0.26897142418500547, "grad_norm": 430.1833190917969, "learning_rate": 9.18095223944832e-06, "loss": 22.4942, "step": 133150 }, { "epoch": 0.2689916248176893, "grad_norm": 98.78097534179688, "learning_rate": 9.180760787870766e-06, "loss": 22.4513, "step": 133160 }, { "epoch": 0.2690118254503731, "grad_norm": 335.7779235839844, "learning_rate": 9.180569315916693e-06, "loss": 27.1377, "step": 133170 }, { "epoch": 0.26903202608305693, "grad_norm": 1433.2869873046875, "learning_rate": 9.180377823587031e-06, "loss": 32.2744, "step": 133180 }, { "epoch": 0.26905222671574075, "grad_norm": 382.0848388671875, "learning_rate": 9.180186310882715e-06, "loss": 27.5331, "step": 133190 }, { "epoch": 0.26907242734842457, "grad_norm": 779.1260375976562, "learning_rate": 9.179994777804677e-06, "loss": 25.6874, "step": 133200 }, { "epoch": 0.2690926279811084, "grad_norm": 207.5975341796875, "learning_rate": 9.179803224353854e-06, "loss": 23.4724, "step": 133210 }, { "epoch": 0.2691128286137922, "grad_norm": 457.01654052734375, "learning_rate": 9.179611650531174e-06, "loss": 18.8569, "step": 133220 }, { "epoch": 0.269133029246476, "grad_norm": 388.2160339355469, "learning_rate": 9.179420056337576e-06, "loss": 18.2812, "step": 133230 }, { "epoch": 0.2691532298791598, "grad_norm": 230.99644470214844, "learning_rate": 9.179228441773991e-06, "loss": 17.6737, "step": 133240 }, { "epoch": 0.2691734305118436, "grad_norm": 450.4414978027344, "learning_rate": 9.179036806841352e-06, "loss": 12.5032, "step": 133250 }, { "epoch": 0.26919363114452743, "grad_norm": 170.10377502441406, "learning_rate": 9.178845151540597e-06, "loss": 17.3016, "step": 133260 }, { "epoch": 0.26921383177721125, "grad_norm": 82.92369079589844, "learning_rate": 9.178653475872655e-06, "loss": 14.6605, "step": 133270 }, { "epoch": 0.2692340324098951, "grad_norm": 560.9869384765625, "learning_rate": 9.178461779838464e-06, "loss": 32.7731, "step": 133280 }, { "epoch": 0.2692542330425789, "grad_norm": 316.4130859375, "learning_rate": 9.178270063438957e-06, "loss": 25.2596, "step": 133290 }, { "epoch": 0.2692744336752627, "grad_norm": 390.6031494140625, "learning_rate": 9.178078326675069e-06, "loss": 34.1917, "step": 133300 }, { "epoch": 0.26929463430794653, "grad_norm": 223.0550079345703, "learning_rate": 9.177886569547731e-06, "loss": 23.9244, "step": 133310 }, { "epoch": 0.26931483494063035, "grad_norm": 504.6650695800781, "learning_rate": 9.177694792057884e-06, "loss": 22.7816, "step": 133320 }, { "epoch": 0.2693350355733142, "grad_norm": 224.6036376953125, "learning_rate": 9.177502994206457e-06, "loss": 15.1651, "step": 133330 }, { "epoch": 0.269355236205998, "grad_norm": 267.32427978515625, "learning_rate": 9.17731117599439e-06, "loss": 12.4133, "step": 133340 }, { "epoch": 0.2693754368386818, "grad_norm": 236.76768493652344, "learning_rate": 9.177119337422613e-06, "loss": 20.1599, "step": 133350 }, { "epoch": 0.2693956374713656, "grad_norm": 231.28042602539062, "learning_rate": 9.176927478492064e-06, "loss": 23.4146, "step": 133360 }, { "epoch": 0.2694158381040494, "grad_norm": 333.02850341796875, "learning_rate": 9.176735599203676e-06, "loss": 21.8672, "step": 133370 }, { "epoch": 0.2694360387367332, "grad_norm": 204.9567413330078, "learning_rate": 9.176543699558384e-06, "loss": 18.3534, "step": 133380 }, { "epoch": 0.26945623936941704, "grad_norm": 433.06829833984375, "learning_rate": 9.176351779557127e-06, "loss": 27.7429, "step": 133390 }, { "epoch": 0.26947644000210086, "grad_norm": 373.3736572265625, "learning_rate": 9.176159839200838e-06, "loss": 20.2198, "step": 133400 }, { "epoch": 0.2694966406347847, "grad_norm": 159.3482208251953, "learning_rate": 9.175967878490451e-06, "loss": 14.4216, "step": 133410 }, { "epoch": 0.2695168412674685, "grad_norm": 116.9761734008789, "learning_rate": 9.175775897426904e-06, "loss": 19.1808, "step": 133420 }, { "epoch": 0.2695370419001523, "grad_norm": 362.33868408203125, "learning_rate": 9.17558389601113e-06, "loss": 25.2172, "step": 133430 }, { "epoch": 0.26955724253283614, "grad_norm": 100.86964416503906, "learning_rate": 9.175391874244068e-06, "loss": 14.6104, "step": 133440 }, { "epoch": 0.26957744316551996, "grad_norm": 203.78346252441406, "learning_rate": 9.175199832126654e-06, "loss": 22.5774, "step": 133450 }, { "epoch": 0.2695976437982038, "grad_norm": 350.2778015136719, "learning_rate": 9.17500776965982e-06, "loss": 18.9959, "step": 133460 }, { "epoch": 0.2696178444308876, "grad_norm": 64.03233337402344, "learning_rate": 9.174815686844506e-06, "loss": 32.2294, "step": 133470 }, { "epoch": 0.2696380450635714, "grad_norm": 554.240478515625, "learning_rate": 9.174623583681644e-06, "loss": 35.6224, "step": 133480 }, { "epoch": 0.2696582456962552, "grad_norm": 320.8109436035156, "learning_rate": 9.174431460172175e-06, "loss": 10.4794, "step": 133490 }, { "epoch": 0.269678446328939, "grad_norm": 107.67835998535156, "learning_rate": 9.174239316317034e-06, "loss": 18.3269, "step": 133500 }, { "epoch": 0.2696986469616228, "grad_norm": 171.8280792236328, "learning_rate": 9.174047152117155e-06, "loss": 19.2402, "step": 133510 }, { "epoch": 0.26971884759430664, "grad_norm": 378.034912109375, "learning_rate": 9.173854967573479e-06, "loss": 31.218, "step": 133520 }, { "epoch": 0.26973904822699046, "grad_norm": 288.087158203125, "learning_rate": 9.173662762686937e-06, "loss": 15.2851, "step": 133530 }, { "epoch": 0.2697592488596743, "grad_norm": 467.5207824707031, "learning_rate": 9.17347053745847e-06, "loss": 23.7271, "step": 133540 }, { "epoch": 0.2697794494923581, "grad_norm": 573.9155883789062, "learning_rate": 9.173278291889016e-06, "loss": 16.6518, "step": 133550 }, { "epoch": 0.2697996501250419, "grad_norm": 290.9129638671875, "learning_rate": 9.173086025979507e-06, "loss": 11.2003, "step": 133560 }, { "epoch": 0.26981985075772574, "grad_norm": 269.2447204589844, "learning_rate": 9.172893739730884e-06, "loss": 19.0861, "step": 133570 }, { "epoch": 0.26984005139040956, "grad_norm": 278.99310302734375, "learning_rate": 9.172701433144083e-06, "loss": 23.591, "step": 133580 }, { "epoch": 0.2698602520230934, "grad_norm": 227.37892150878906, "learning_rate": 9.172509106220041e-06, "loss": 17.7537, "step": 133590 }, { "epoch": 0.2698804526557772, "grad_norm": 178.65574645996094, "learning_rate": 9.172316758959695e-06, "loss": 15.351, "step": 133600 }, { "epoch": 0.26990065328846097, "grad_norm": 351.1447448730469, "learning_rate": 9.172124391363986e-06, "loss": 26.6739, "step": 133610 }, { "epoch": 0.2699208539211448, "grad_norm": 81.51409912109375, "learning_rate": 9.171932003433845e-06, "loss": 19.5508, "step": 133620 }, { "epoch": 0.2699410545538286, "grad_norm": 640.6609497070312, "learning_rate": 9.171739595170217e-06, "loss": 34.9931, "step": 133630 }, { "epoch": 0.2699612551865124, "grad_norm": 293.3099060058594, "learning_rate": 9.171547166574034e-06, "loss": 12.9603, "step": 133640 }, { "epoch": 0.26998145581919625, "grad_norm": 122.59895324707031, "learning_rate": 9.171354717646238e-06, "loss": 29.8778, "step": 133650 }, { "epoch": 0.27000165645188007, "grad_norm": 4.375693321228027, "learning_rate": 9.171162248387762e-06, "loss": 15.977, "step": 133660 }, { "epoch": 0.2700218570845639, "grad_norm": 131.79083251953125, "learning_rate": 9.170969758799549e-06, "loss": 21.4054, "step": 133670 }, { "epoch": 0.2700420577172477, "grad_norm": 316.64263916015625, "learning_rate": 9.170777248882535e-06, "loss": 16.7775, "step": 133680 }, { "epoch": 0.2700622583499315, "grad_norm": 303.68572998046875, "learning_rate": 9.170584718637658e-06, "loss": 22.4445, "step": 133690 }, { "epoch": 0.27008245898261535, "grad_norm": 544.8724975585938, "learning_rate": 9.170392168065858e-06, "loss": 11.5178, "step": 133700 }, { "epoch": 0.27010265961529917, "grad_norm": 560.303466796875, "learning_rate": 9.17019959716807e-06, "loss": 18.0215, "step": 133710 }, { "epoch": 0.270122860247983, "grad_norm": 515.4376220703125, "learning_rate": 9.170007005945236e-06, "loss": 32.4402, "step": 133720 }, { "epoch": 0.2701430608806668, "grad_norm": 27.365123748779297, "learning_rate": 9.169814394398295e-06, "loss": 16.3249, "step": 133730 }, { "epoch": 0.27016326151335057, "grad_norm": 144.08935546875, "learning_rate": 9.169621762528184e-06, "loss": 25.41, "step": 133740 }, { "epoch": 0.2701834621460344, "grad_norm": 442.22686767578125, "learning_rate": 9.169429110335842e-06, "loss": 21.7591, "step": 133750 }, { "epoch": 0.2702036627787182, "grad_norm": 90.27652740478516, "learning_rate": 9.169236437822208e-06, "loss": 32.7758, "step": 133760 }, { "epoch": 0.27022386341140203, "grad_norm": 238.51576232910156, "learning_rate": 9.169043744988222e-06, "loss": 12.7668, "step": 133770 }, { "epoch": 0.27024406404408585, "grad_norm": 212.661376953125, "learning_rate": 9.16885103183482e-06, "loss": 13.643, "step": 133780 }, { "epoch": 0.27026426467676967, "grad_norm": 141.43858337402344, "learning_rate": 9.168658298362947e-06, "loss": 19.0997, "step": 133790 }, { "epoch": 0.2702844653094535, "grad_norm": 218.21783447265625, "learning_rate": 9.168465544573538e-06, "loss": 7.8951, "step": 133800 }, { "epoch": 0.2703046659421373, "grad_norm": 308.9769592285156, "learning_rate": 9.168272770467532e-06, "loss": 22.6926, "step": 133810 }, { "epoch": 0.27032486657482113, "grad_norm": 405.4007263183594, "learning_rate": 9.16807997604587e-06, "loss": 16.8012, "step": 133820 }, { "epoch": 0.27034506720750495, "grad_norm": 293.6951904296875, "learning_rate": 9.167887161309495e-06, "loss": 19.8761, "step": 133830 }, { "epoch": 0.27036526784018877, "grad_norm": 140.23353576660156, "learning_rate": 9.16769432625934e-06, "loss": 19.1617, "step": 133840 }, { "epoch": 0.2703854684728726, "grad_norm": 459.6859130859375, "learning_rate": 9.16750147089635e-06, "loss": 14.8451, "step": 133850 }, { "epoch": 0.2704056691055564, "grad_norm": 204.9563751220703, "learning_rate": 9.167308595221463e-06, "loss": 16.3916, "step": 133860 }, { "epoch": 0.2704258697382402, "grad_norm": 756.9768676757812, "learning_rate": 9.167115699235618e-06, "loss": 16.1417, "step": 133870 }, { "epoch": 0.270446070370924, "grad_norm": 143.90719604492188, "learning_rate": 9.166922782939759e-06, "loss": 23.2425, "step": 133880 }, { "epoch": 0.2704662710036078, "grad_norm": 551.8272094726562, "learning_rate": 9.166729846334822e-06, "loss": 38.7705, "step": 133890 }, { "epoch": 0.27048647163629164, "grad_norm": 544.5302734375, "learning_rate": 9.16653688942175e-06, "loss": 35.1314, "step": 133900 }, { "epoch": 0.27050667226897546, "grad_norm": 264.9273986816406, "learning_rate": 9.166343912201482e-06, "loss": 16.7368, "step": 133910 }, { "epoch": 0.2705268729016593, "grad_norm": 382.8266296386719, "learning_rate": 9.166150914674959e-06, "loss": 35.2651, "step": 133920 }, { "epoch": 0.2705470735343431, "grad_norm": 599.8588256835938, "learning_rate": 9.165957896843122e-06, "loss": 17.3055, "step": 133930 }, { "epoch": 0.2705672741670269, "grad_norm": 127.27802276611328, "learning_rate": 9.165764858706912e-06, "loss": 23.0175, "step": 133940 }, { "epoch": 0.27058747479971074, "grad_norm": 227.96299743652344, "learning_rate": 9.165571800267267e-06, "loss": 19.4146, "step": 133950 }, { "epoch": 0.27060767543239456, "grad_norm": 474.1632080078125, "learning_rate": 9.165378721525133e-06, "loss": 35.0007, "step": 133960 }, { "epoch": 0.2706278760650784, "grad_norm": 716.2421875, "learning_rate": 9.165185622481447e-06, "loss": 27.159, "step": 133970 }, { "epoch": 0.2706480766977622, "grad_norm": 331.5572509765625, "learning_rate": 9.164992503137152e-06, "loss": 18.0506, "step": 133980 }, { "epoch": 0.270668277330446, "grad_norm": 5.3651347160339355, "learning_rate": 9.16479936349319e-06, "loss": 23.6002, "step": 133990 }, { "epoch": 0.2706884779631298, "grad_norm": 519.8292846679688, "learning_rate": 9.164606203550498e-06, "loss": 24.7114, "step": 134000 }, { "epoch": 0.2707086785958136, "grad_norm": 334.0654602050781, "learning_rate": 9.164413023310022e-06, "loss": 26.17, "step": 134010 }, { "epoch": 0.2707288792284974, "grad_norm": 350.2816162109375, "learning_rate": 9.164219822772702e-06, "loss": 16.3586, "step": 134020 }, { "epoch": 0.27074907986118124, "grad_norm": 140.15565490722656, "learning_rate": 9.16402660193948e-06, "loss": 24.9522, "step": 134030 }, { "epoch": 0.27076928049386506, "grad_norm": 528.6060791015625, "learning_rate": 9.163833360811296e-06, "loss": 17.5522, "step": 134040 }, { "epoch": 0.2707894811265489, "grad_norm": 138.0458526611328, "learning_rate": 9.163640099389095e-06, "loss": 23.0809, "step": 134050 }, { "epoch": 0.2708096817592327, "grad_norm": 248.37319946289062, "learning_rate": 9.163446817673817e-06, "loss": 12.0006, "step": 134060 }, { "epoch": 0.2708298823919165, "grad_norm": 88.36864471435547, "learning_rate": 9.163253515666403e-06, "loss": 20.1978, "step": 134070 }, { "epoch": 0.27085008302460034, "grad_norm": 645.4788208007812, "learning_rate": 9.163060193367797e-06, "loss": 20.0943, "step": 134080 }, { "epoch": 0.27087028365728416, "grad_norm": 135.09487915039062, "learning_rate": 9.16286685077894e-06, "loss": 17.9274, "step": 134090 }, { "epoch": 0.270890484289968, "grad_norm": 452.74847412109375, "learning_rate": 9.162673487900775e-06, "loss": 10.2454, "step": 134100 }, { "epoch": 0.2709106849226518, "grad_norm": 139.68211364746094, "learning_rate": 9.162480104734244e-06, "loss": 13.1204, "step": 134110 }, { "epoch": 0.27093088555533557, "grad_norm": 531.355224609375, "learning_rate": 9.162286701280292e-06, "loss": 20.1981, "step": 134120 }, { "epoch": 0.2709510861880194, "grad_norm": 513.79833984375, "learning_rate": 9.162093277539856e-06, "loss": 17.2434, "step": 134130 }, { "epoch": 0.2709712868207032, "grad_norm": 148.5, "learning_rate": 9.161899833513885e-06, "loss": 17.7689, "step": 134140 }, { "epoch": 0.270991487453387, "grad_norm": 206.4996337890625, "learning_rate": 9.161706369203319e-06, "loss": 14.7884, "step": 134150 }, { "epoch": 0.27101168808607085, "grad_norm": 306.7919616699219, "learning_rate": 9.161512884609099e-06, "loss": 13.5426, "step": 134160 }, { "epoch": 0.27103188871875467, "grad_norm": 568.8477783203125, "learning_rate": 9.16131937973217e-06, "loss": 21.6111, "step": 134170 }, { "epoch": 0.2710520893514385, "grad_norm": 397.7049865722656, "learning_rate": 9.161125854573476e-06, "loss": 32.6385, "step": 134180 }, { "epoch": 0.2710722899841223, "grad_norm": 412.57763671875, "learning_rate": 9.160932309133959e-06, "loss": 28.8624, "step": 134190 }, { "epoch": 0.2710924906168061, "grad_norm": 0.0, "learning_rate": 9.160738743414564e-06, "loss": 27.2116, "step": 134200 }, { "epoch": 0.27111269124948995, "grad_norm": 591.7841796875, "learning_rate": 9.160545157416231e-06, "loss": 19.2173, "step": 134210 }, { "epoch": 0.27113289188217377, "grad_norm": 440.88177490234375, "learning_rate": 9.160351551139906e-06, "loss": 21.5234, "step": 134220 }, { "epoch": 0.2711530925148576, "grad_norm": 193.73715209960938, "learning_rate": 9.160157924586532e-06, "loss": 20.8952, "step": 134230 }, { "epoch": 0.2711732931475414, "grad_norm": 251.74325561523438, "learning_rate": 9.159964277757054e-06, "loss": 16.7119, "step": 134240 }, { "epoch": 0.27119349378022517, "grad_norm": 195.9222412109375, "learning_rate": 9.159770610652413e-06, "loss": 33.0847, "step": 134250 }, { "epoch": 0.271213694412909, "grad_norm": 49.185420989990234, "learning_rate": 9.159576923273557e-06, "loss": 28.8908, "step": 134260 }, { "epoch": 0.2712338950455928, "grad_norm": 331.3103332519531, "learning_rate": 9.159383215621425e-06, "loss": 46.6736, "step": 134270 }, { "epoch": 0.27125409567827663, "grad_norm": 149.8839569091797, "learning_rate": 9.159189487696965e-06, "loss": 26.5099, "step": 134280 }, { "epoch": 0.27127429631096045, "grad_norm": 490.6385192871094, "learning_rate": 9.15899573950112e-06, "loss": 24.3452, "step": 134290 }, { "epoch": 0.27129449694364427, "grad_norm": 535.9559936523438, "learning_rate": 9.158801971034832e-06, "loss": 35.2812, "step": 134300 }, { "epoch": 0.2713146975763281, "grad_norm": 31.504541397094727, "learning_rate": 9.15860818229905e-06, "loss": 19.4527, "step": 134310 }, { "epoch": 0.2713348982090119, "grad_norm": 289.06964111328125, "learning_rate": 9.158414373294715e-06, "loss": 14.0955, "step": 134320 }, { "epoch": 0.27135509884169573, "grad_norm": 167.39085388183594, "learning_rate": 9.158220544022773e-06, "loss": 19.0045, "step": 134330 }, { "epoch": 0.27137529947437955, "grad_norm": 163.84657287597656, "learning_rate": 9.15802669448417e-06, "loss": 16.1528, "step": 134340 }, { "epoch": 0.27139550010706337, "grad_norm": 398.45611572265625, "learning_rate": 9.157832824679846e-06, "loss": 18.3093, "step": 134350 }, { "epoch": 0.2714157007397472, "grad_norm": 260.1958312988281, "learning_rate": 9.15763893461075e-06, "loss": 16.0223, "step": 134360 }, { "epoch": 0.271435901372431, "grad_norm": 319.2218933105469, "learning_rate": 9.157445024277829e-06, "loss": 15.2392, "step": 134370 }, { "epoch": 0.2714561020051148, "grad_norm": 349.0220947265625, "learning_rate": 9.157251093682022e-06, "loss": 17.3932, "step": 134380 }, { "epoch": 0.2714763026377986, "grad_norm": 265.95269775390625, "learning_rate": 9.15705714282428e-06, "loss": 13.6521, "step": 134390 }, { "epoch": 0.2714965032704824, "grad_norm": 313.08251953125, "learning_rate": 9.156863171705543e-06, "loss": 28.9001, "step": 134400 }, { "epoch": 0.27151670390316623, "grad_norm": 396.9394226074219, "learning_rate": 9.15666918032676e-06, "loss": 25.4406, "step": 134410 }, { "epoch": 0.27153690453585005, "grad_norm": 283.1243591308594, "learning_rate": 9.156475168688876e-06, "loss": 21.1988, "step": 134420 }, { "epoch": 0.2715571051685339, "grad_norm": 26.426612854003906, "learning_rate": 9.156281136792836e-06, "loss": 23.5588, "step": 134430 }, { "epoch": 0.2715773058012177, "grad_norm": 249.1089630126953, "learning_rate": 9.156087084639587e-06, "loss": 17.9957, "step": 134440 }, { "epoch": 0.2715975064339015, "grad_norm": 236.09666442871094, "learning_rate": 9.155893012230072e-06, "loss": 19.4876, "step": 134450 }, { "epoch": 0.27161770706658533, "grad_norm": 397.0230712890625, "learning_rate": 9.15569891956524e-06, "loss": 19.0526, "step": 134460 }, { "epoch": 0.27163790769926915, "grad_norm": 330.12353515625, "learning_rate": 9.155504806646033e-06, "loss": 25.0344, "step": 134470 }, { "epoch": 0.271658108331953, "grad_norm": 324.0494689941406, "learning_rate": 9.1553106734734e-06, "loss": 23.0194, "step": 134480 }, { "epoch": 0.2716783089646368, "grad_norm": 111.031005859375, "learning_rate": 9.155116520048289e-06, "loss": 14.7076, "step": 134490 }, { "epoch": 0.2716985095973206, "grad_norm": 677.87841796875, "learning_rate": 9.154922346371641e-06, "loss": 32.3, "step": 134500 }, { "epoch": 0.2717187102300044, "grad_norm": 444.1622619628906, "learning_rate": 9.154728152444408e-06, "loss": 15.517, "step": 134510 }, { "epoch": 0.2717389108626882, "grad_norm": 240.61361694335938, "learning_rate": 9.154533938267534e-06, "loss": 18.4269, "step": 134520 }, { "epoch": 0.271759111495372, "grad_norm": 44.22685241699219, "learning_rate": 9.154339703841963e-06, "loss": 30.4758, "step": 134530 }, { "epoch": 0.27177931212805584, "grad_norm": 687.1217651367188, "learning_rate": 9.154145449168647e-06, "loss": 18.762, "step": 134540 }, { "epoch": 0.27179951276073966, "grad_norm": 93.54328918457031, "learning_rate": 9.153951174248528e-06, "loss": 32.2966, "step": 134550 }, { "epoch": 0.2718197133934235, "grad_norm": 422.3797302246094, "learning_rate": 9.153756879082556e-06, "loss": 24.8442, "step": 134560 }, { "epoch": 0.2718399140261073, "grad_norm": 83.99479675292969, "learning_rate": 9.153562563671676e-06, "loss": 13.7115, "step": 134570 }, { "epoch": 0.2718601146587911, "grad_norm": 83.5513687133789, "learning_rate": 9.153368228016836e-06, "loss": 13.3339, "step": 134580 }, { "epoch": 0.27188031529147494, "grad_norm": 315.4933776855469, "learning_rate": 9.153173872118984e-06, "loss": 23.1513, "step": 134590 }, { "epoch": 0.27190051592415876, "grad_norm": 289.7809143066406, "learning_rate": 9.152979495979064e-06, "loss": 14.7539, "step": 134600 }, { "epoch": 0.2719207165568426, "grad_norm": 118.87428283691406, "learning_rate": 9.152785099598027e-06, "loss": 24.196, "step": 134610 }, { "epoch": 0.2719409171895264, "grad_norm": 396.13690185546875, "learning_rate": 9.152590682976818e-06, "loss": 17.8267, "step": 134620 }, { "epoch": 0.2719611178222102, "grad_norm": 173.28421020507812, "learning_rate": 9.152396246116387e-06, "loss": 44.6559, "step": 134630 }, { "epoch": 0.271981318454894, "grad_norm": 298.81768798828125, "learning_rate": 9.15220178901768e-06, "loss": 18.7226, "step": 134640 }, { "epoch": 0.2720015190875778, "grad_norm": 317.8538818359375, "learning_rate": 9.152007311681645e-06, "loss": 20.6329, "step": 134650 }, { "epoch": 0.2720217197202616, "grad_norm": 192.2227325439453, "learning_rate": 9.151812814109231e-06, "loss": 21.3576, "step": 134660 }, { "epoch": 0.27204192035294544, "grad_norm": 183.93666076660156, "learning_rate": 9.151618296301383e-06, "loss": 13.1812, "step": 134670 }, { "epoch": 0.27206212098562926, "grad_norm": 273.922119140625, "learning_rate": 9.151423758259052e-06, "loss": 25.4381, "step": 134680 }, { "epoch": 0.2720823216183131, "grad_norm": 510.212646484375, "learning_rate": 9.151229199983185e-06, "loss": 27.9904, "step": 134690 }, { "epoch": 0.2721025222509969, "grad_norm": 105.03913879394531, "learning_rate": 9.15103462147473e-06, "loss": 15.6443, "step": 134700 }, { "epoch": 0.2721227228836807, "grad_norm": 608.013671875, "learning_rate": 9.150840022734637e-06, "loss": 23.3653, "step": 134710 }, { "epoch": 0.27214292351636454, "grad_norm": 118.21414184570312, "learning_rate": 9.150645403763852e-06, "loss": 17.9401, "step": 134720 }, { "epoch": 0.27216312414904836, "grad_norm": 580.4991455078125, "learning_rate": 9.150450764563324e-06, "loss": 41.7232, "step": 134730 }, { "epoch": 0.2721833247817322, "grad_norm": 253.423583984375, "learning_rate": 9.150256105134003e-06, "loss": 9.7174, "step": 134740 }, { "epoch": 0.272203525414416, "grad_norm": 346.1338195800781, "learning_rate": 9.150061425476839e-06, "loss": 21.0342, "step": 134750 }, { "epoch": 0.27222372604709977, "grad_norm": 247.4870147705078, "learning_rate": 9.149866725592777e-06, "loss": 29.5602, "step": 134760 }, { "epoch": 0.2722439266797836, "grad_norm": 200.5161590576172, "learning_rate": 9.149672005482768e-06, "loss": 22.8228, "step": 134770 }, { "epoch": 0.2722641273124674, "grad_norm": 550.3970947265625, "learning_rate": 9.149477265147762e-06, "loss": 25.6845, "step": 134780 }, { "epoch": 0.2722843279451512, "grad_norm": 282.61895751953125, "learning_rate": 9.149282504588706e-06, "loss": 34.6832, "step": 134790 }, { "epoch": 0.27230452857783505, "grad_norm": 186.24920654296875, "learning_rate": 9.14908772380655e-06, "loss": 17.0082, "step": 134800 }, { "epoch": 0.27232472921051887, "grad_norm": 179.73207092285156, "learning_rate": 9.148892922802245e-06, "loss": 40.943, "step": 134810 }, { "epoch": 0.2723449298432027, "grad_norm": 248.38430786132812, "learning_rate": 9.148698101576737e-06, "loss": 26.5079, "step": 134820 }, { "epoch": 0.2723651304758865, "grad_norm": 506.23529052734375, "learning_rate": 9.14850326013098e-06, "loss": 23.6154, "step": 134830 }, { "epoch": 0.2723853311085703, "grad_norm": 318.0729064941406, "learning_rate": 9.14830839846592e-06, "loss": 19.8445, "step": 134840 }, { "epoch": 0.27240553174125415, "grad_norm": 222.7960968017578, "learning_rate": 9.148113516582508e-06, "loss": 11.4272, "step": 134850 }, { "epoch": 0.27242573237393797, "grad_norm": 428.57989501953125, "learning_rate": 9.147918614481693e-06, "loss": 29.9686, "step": 134860 }, { "epoch": 0.2724459330066218, "grad_norm": 177.92640686035156, "learning_rate": 9.147723692164428e-06, "loss": 13.8243, "step": 134870 }, { "epoch": 0.2724661336393056, "grad_norm": 229.82870483398438, "learning_rate": 9.14752874963166e-06, "loss": 20.8513, "step": 134880 }, { "epoch": 0.27248633427198937, "grad_norm": 293.23382568359375, "learning_rate": 9.147333786884338e-06, "loss": 20.5982, "step": 134890 }, { "epoch": 0.2725065349046732, "grad_norm": 203.52735900878906, "learning_rate": 9.147138803923417e-06, "loss": 19.7749, "step": 134900 }, { "epoch": 0.272526735537357, "grad_norm": 372.1394958496094, "learning_rate": 9.146943800749842e-06, "loss": 24.552, "step": 134910 }, { "epoch": 0.27254693617004083, "grad_norm": 233.8894805908203, "learning_rate": 9.146748777364567e-06, "loss": 20.0635, "step": 134920 }, { "epoch": 0.27256713680272465, "grad_norm": 519.9957275390625, "learning_rate": 9.146553733768541e-06, "loss": 13.4945, "step": 134930 }, { "epoch": 0.27258733743540847, "grad_norm": 187.00514221191406, "learning_rate": 9.146358669962717e-06, "loss": 13.5928, "step": 134940 }, { "epoch": 0.2726075380680923, "grad_norm": 272.51483154296875, "learning_rate": 9.146163585948041e-06, "loss": 24.1664, "step": 134950 }, { "epoch": 0.2726277387007761, "grad_norm": 293.95367431640625, "learning_rate": 9.145968481725466e-06, "loss": 23.1515, "step": 134960 }, { "epoch": 0.27264793933345993, "grad_norm": 206.09597778320312, "learning_rate": 9.145773357295947e-06, "loss": 24.2326, "step": 134970 }, { "epoch": 0.27266813996614375, "grad_norm": 291.83465576171875, "learning_rate": 9.14557821266043e-06, "loss": 14.0083, "step": 134980 }, { "epoch": 0.27268834059882757, "grad_norm": 75.78569793701172, "learning_rate": 9.145383047819867e-06, "loss": 19.462, "step": 134990 }, { "epoch": 0.2727085412315114, "grad_norm": 192.17611694335938, "learning_rate": 9.145187862775208e-06, "loss": 17.1606, "step": 135000 }, { "epoch": 0.2727287418641952, "grad_norm": 946.3507690429688, "learning_rate": 9.144992657527409e-06, "loss": 27.5972, "step": 135010 }, { "epoch": 0.272748942496879, "grad_norm": 187.5237274169922, "learning_rate": 9.144797432077418e-06, "loss": 16.6804, "step": 135020 }, { "epoch": 0.2727691431295628, "grad_norm": 16.191335678100586, "learning_rate": 9.144602186426186e-06, "loss": 10.5523, "step": 135030 }, { "epoch": 0.2727893437622466, "grad_norm": 267.5987243652344, "learning_rate": 9.144406920574667e-06, "loss": 19.9158, "step": 135040 }, { "epoch": 0.27280954439493044, "grad_norm": 260.53521728515625, "learning_rate": 9.14421163452381e-06, "loss": 23.0881, "step": 135050 }, { "epoch": 0.27282974502761426, "grad_norm": 24.70609474182129, "learning_rate": 9.144016328274569e-06, "loss": 18.389, "step": 135060 }, { "epoch": 0.2728499456602981, "grad_norm": 317.2202453613281, "learning_rate": 9.143821001827895e-06, "loss": 17.0582, "step": 135070 }, { "epoch": 0.2728701462929819, "grad_norm": 157.8903350830078, "learning_rate": 9.14362565518474e-06, "loss": 14.0681, "step": 135080 }, { "epoch": 0.2728903469256657, "grad_norm": 185.28720092773438, "learning_rate": 9.143430288346056e-06, "loss": 13.307, "step": 135090 }, { "epoch": 0.27291054755834954, "grad_norm": 361.53857421875, "learning_rate": 9.143234901312794e-06, "loss": 33.7093, "step": 135100 }, { "epoch": 0.27293074819103336, "grad_norm": 220.26687622070312, "learning_rate": 9.143039494085911e-06, "loss": 12.7707, "step": 135110 }, { "epoch": 0.2729509488237172, "grad_norm": 97.61239624023438, "learning_rate": 9.142844066666352e-06, "loss": 15.3189, "step": 135120 }, { "epoch": 0.272971149456401, "grad_norm": 454.5251159667969, "learning_rate": 9.142648619055074e-06, "loss": 21.3636, "step": 135130 }, { "epoch": 0.2729913500890848, "grad_norm": 315.7205810546875, "learning_rate": 9.142453151253033e-06, "loss": 21.5874, "step": 135140 }, { "epoch": 0.2730115507217686, "grad_norm": 257.26416015625, "learning_rate": 9.142257663261173e-06, "loss": 11.5809, "step": 135150 }, { "epoch": 0.2730317513544524, "grad_norm": 335.3143310546875, "learning_rate": 9.142062155080455e-06, "loss": 30.9142, "step": 135160 }, { "epoch": 0.2730519519871362, "grad_norm": 129.1077423095703, "learning_rate": 9.141866626711826e-06, "loss": 24.1732, "step": 135170 }, { "epoch": 0.27307215261982004, "grad_norm": 196.90745544433594, "learning_rate": 9.141671078156242e-06, "loss": 15.8773, "step": 135180 }, { "epoch": 0.27309235325250386, "grad_norm": 172.0865478515625, "learning_rate": 9.141475509414656e-06, "loss": 12.4997, "step": 135190 }, { "epoch": 0.2731125538851877, "grad_norm": 145.44033813476562, "learning_rate": 9.141279920488021e-06, "loss": 27.0318, "step": 135200 }, { "epoch": 0.2731327545178715, "grad_norm": 490.21417236328125, "learning_rate": 9.141084311377291e-06, "loss": 16.3637, "step": 135210 }, { "epoch": 0.2731529551505553, "grad_norm": 121.04666137695312, "learning_rate": 9.140888682083416e-06, "loss": 19.762, "step": 135220 }, { "epoch": 0.27317315578323914, "grad_norm": 563.986328125, "learning_rate": 9.140693032607353e-06, "loss": 22.3723, "step": 135230 }, { "epoch": 0.27319335641592296, "grad_norm": 159.78668212890625, "learning_rate": 9.140497362950054e-06, "loss": 17.6891, "step": 135240 }, { "epoch": 0.2732135570486068, "grad_norm": 369.7901306152344, "learning_rate": 9.140301673112472e-06, "loss": 20.5995, "step": 135250 }, { "epoch": 0.2732337576812906, "grad_norm": 107.43509674072266, "learning_rate": 9.140105963095563e-06, "loss": 20.499, "step": 135260 }, { "epoch": 0.2732539583139744, "grad_norm": 42.13541030883789, "learning_rate": 9.13991023290028e-06, "loss": 51.4101, "step": 135270 }, { "epoch": 0.2732741589466582, "grad_norm": 150.58364868164062, "learning_rate": 9.139714482527577e-06, "loss": 16.7237, "step": 135280 }, { "epoch": 0.273294359579342, "grad_norm": 149.3505401611328, "learning_rate": 9.139518711978406e-06, "loss": 28.4189, "step": 135290 }, { "epoch": 0.2733145602120258, "grad_norm": 147.408447265625, "learning_rate": 9.139322921253724e-06, "loss": 32.1018, "step": 135300 }, { "epoch": 0.27333476084470965, "grad_norm": 651.6652221679688, "learning_rate": 9.139127110354484e-06, "loss": 25.5357, "step": 135310 }, { "epoch": 0.27335496147739347, "grad_norm": 650.05859375, "learning_rate": 9.13893127928164e-06, "loss": 29.5509, "step": 135320 }, { "epoch": 0.2733751621100773, "grad_norm": 315.73297119140625, "learning_rate": 9.138735428036147e-06, "loss": 20.052, "step": 135330 }, { "epoch": 0.2733953627427611, "grad_norm": 387.9278259277344, "learning_rate": 9.13853955661896e-06, "loss": 35.7799, "step": 135340 }, { "epoch": 0.2734155633754449, "grad_norm": 512.8873291015625, "learning_rate": 9.138343665031033e-06, "loss": 26.8344, "step": 135350 }, { "epoch": 0.27343576400812875, "grad_norm": 500.2298583984375, "learning_rate": 9.13814775327332e-06, "loss": 24.1532, "step": 135360 }, { "epoch": 0.27345596464081257, "grad_norm": 363.7374267578125, "learning_rate": 9.137951821346779e-06, "loss": 28.6543, "step": 135370 }, { "epoch": 0.2734761652734964, "grad_norm": 89.28365325927734, "learning_rate": 9.13775586925236e-06, "loss": 28.1531, "step": 135380 }, { "epoch": 0.2734963659061802, "grad_norm": 258.1756896972656, "learning_rate": 9.137559896991023e-06, "loss": 22.5675, "step": 135390 }, { "epoch": 0.27351656653886397, "grad_norm": 86.53102111816406, "learning_rate": 9.13736390456372e-06, "loss": 23.8051, "step": 135400 }, { "epoch": 0.2735367671715478, "grad_norm": 351.3728332519531, "learning_rate": 9.137167891971407e-06, "loss": 20.5367, "step": 135410 }, { "epoch": 0.2735569678042316, "grad_norm": 274.0627136230469, "learning_rate": 9.136971859215041e-06, "loss": 32.0785, "step": 135420 }, { "epoch": 0.27357716843691543, "grad_norm": 292.0363464355469, "learning_rate": 9.136775806295572e-06, "loss": 21.2707, "step": 135430 }, { "epoch": 0.27359736906959925, "grad_norm": 359.918701171875, "learning_rate": 9.136579733213963e-06, "loss": 29.1125, "step": 135440 }, { "epoch": 0.27361756970228307, "grad_norm": 322.2874755859375, "learning_rate": 9.136383639971166e-06, "loss": 10.8115, "step": 135450 }, { "epoch": 0.2736377703349669, "grad_norm": 174.68386840820312, "learning_rate": 9.136187526568134e-06, "loss": 17.655, "step": 135460 }, { "epoch": 0.2736579709676507, "grad_norm": 190.86843872070312, "learning_rate": 9.135991393005827e-06, "loss": 29.5045, "step": 135470 }, { "epoch": 0.27367817160033453, "grad_norm": 455.5876770019531, "learning_rate": 9.135795239285201e-06, "loss": 15.229, "step": 135480 }, { "epoch": 0.27369837223301835, "grad_norm": 484.1663513183594, "learning_rate": 9.13559906540721e-06, "loss": 36.8851, "step": 135490 }, { "epoch": 0.27371857286570217, "grad_norm": 496.6761169433594, "learning_rate": 9.13540287137281e-06, "loss": 25.5116, "step": 135500 }, { "epoch": 0.273738773498386, "grad_norm": 473.279052734375, "learning_rate": 9.135206657182958e-06, "loss": 13.2843, "step": 135510 }, { "epoch": 0.2737589741310698, "grad_norm": 336.5594177246094, "learning_rate": 9.13501042283861e-06, "loss": 13.3825, "step": 135520 }, { "epoch": 0.2737791747637536, "grad_norm": 325.00201416015625, "learning_rate": 9.134814168340725e-06, "loss": 19.3863, "step": 135530 }, { "epoch": 0.2737993753964374, "grad_norm": 177.6443634033203, "learning_rate": 9.134617893690253e-06, "loss": 11.9709, "step": 135540 }, { "epoch": 0.2738195760291212, "grad_norm": 282.3763122558594, "learning_rate": 9.13442159888816e-06, "loss": 30.4475, "step": 135550 }, { "epoch": 0.27383977666180503, "grad_norm": 438.1059875488281, "learning_rate": 9.134225283935395e-06, "loss": 18.5865, "step": 135560 }, { "epoch": 0.27385997729448885, "grad_norm": 88.08883666992188, "learning_rate": 9.134028948832915e-06, "loss": 19.9568, "step": 135570 }, { "epoch": 0.2738801779271727, "grad_norm": 187.17410278320312, "learning_rate": 9.133832593581683e-06, "loss": 19.4337, "step": 135580 }, { "epoch": 0.2739003785598565, "grad_norm": 504.54364013671875, "learning_rate": 9.133636218182652e-06, "loss": 44.9807, "step": 135590 }, { "epoch": 0.2739205791925403, "grad_norm": 239.6408233642578, "learning_rate": 9.133439822636779e-06, "loss": 24.2312, "step": 135600 }, { "epoch": 0.27394077982522413, "grad_norm": 241.5808868408203, "learning_rate": 9.13324340694502e-06, "loss": 18.414, "step": 135610 }, { "epoch": 0.27396098045790795, "grad_norm": 162.88381958007812, "learning_rate": 9.133046971108335e-06, "loss": 8.0572, "step": 135620 }, { "epoch": 0.2739811810905918, "grad_norm": 243.66046142578125, "learning_rate": 9.13285051512768e-06, "loss": 18.3248, "step": 135630 }, { "epoch": 0.2740013817232756, "grad_norm": 329.8098449707031, "learning_rate": 9.132654039004015e-06, "loss": 16.306, "step": 135640 }, { "epoch": 0.2740215823559594, "grad_norm": 229.34872436523438, "learning_rate": 9.132457542738292e-06, "loss": 11.1831, "step": 135650 }, { "epoch": 0.2740417829886432, "grad_norm": 429.9995422363281, "learning_rate": 9.132261026331473e-06, "loss": 21.8748, "step": 135660 }, { "epoch": 0.274061983621327, "grad_norm": 350.5527648925781, "learning_rate": 9.132064489784516e-06, "loss": 18.8669, "step": 135670 }, { "epoch": 0.2740821842540108, "grad_norm": 91.44134521484375, "learning_rate": 9.131867933098379e-06, "loss": 24.5496, "step": 135680 }, { "epoch": 0.27410238488669464, "grad_norm": 247.1023406982422, "learning_rate": 9.131671356274016e-06, "loss": 12.6186, "step": 135690 }, { "epoch": 0.27412258551937846, "grad_norm": 343.9804992675781, "learning_rate": 9.13147475931239e-06, "loss": 12.7816, "step": 135700 }, { "epoch": 0.2741427861520623, "grad_norm": 548.723876953125, "learning_rate": 9.131278142214457e-06, "loss": 21.3094, "step": 135710 }, { "epoch": 0.2741629867847461, "grad_norm": 398.4631042480469, "learning_rate": 9.131081504981175e-06, "loss": 14.246, "step": 135720 }, { "epoch": 0.2741831874174299, "grad_norm": 238.4674835205078, "learning_rate": 9.1308848476135e-06, "loss": 40.5508, "step": 135730 }, { "epoch": 0.27420338805011374, "grad_norm": 160.64645385742188, "learning_rate": 9.130688170112398e-06, "loss": 28.2749, "step": 135740 }, { "epoch": 0.27422358868279756, "grad_norm": 299.0699462890625, "learning_rate": 9.130491472478819e-06, "loss": 18.4785, "step": 135750 }, { "epoch": 0.2742437893154814, "grad_norm": 1032.4464111328125, "learning_rate": 9.130294754713728e-06, "loss": 22.8142, "step": 135760 }, { "epoch": 0.2742639899481652, "grad_norm": 10.837931632995605, "learning_rate": 9.13009801681808e-06, "loss": 30.4852, "step": 135770 }, { "epoch": 0.274284190580849, "grad_norm": 251.04092407226562, "learning_rate": 9.129901258792838e-06, "loss": 38.7598, "step": 135780 }, { "epoch": 0.2743043912135328, "grad_norm": 38.67740249633789, "learning_rate": 9.129704480638955e-06, "loss": 15.5512, "step": 135790 }, { "epoch": 0.2743245918462166, "grad_norm": 145.26072692871094, "learning_rate": 9.129507682357393e-06, "loss": 11.1561, "step": 135800 }, { "epoch": 0.2743447924789004, "grad_norm": 563.4730834960938, "learning_rate": 9.129310863949113e-06, "loss": 34.9121, "step": 135810 }, { "epoch": 0.27436499311158424, "grad_norm": 622.657958984375, "learning_rate": 9.129114025415071e-06, "loss": 27.2554, "step": 135820 }, { "epoch": 0.27438519374426806, "grad_norm": 168.85089111328125, "learning_rate": 9.12891716675623e-06, "loss": 27.584, "step": 135830 }, { "epoch": 0.2744053943769519, "grad_norm": 364.8684387207031, "learning_rate": 9.128720287973547e-06, "loss": 20.9772, "step": 135840 }, { "epoch": 0.2744255950096357, "grad_norm": 905.5559692382812, "learning_rate": 9.128523389067983e-06, "loss": 31.2199, "step": 135850 }, { "epoch": 0.2744457956423195, "grad_norm": 619.4298706054688, "learning_rate": 9.128326470040495e-06, "loss": 22.9515, "step": 135860 }, { "epoch": 0.27446599627500334, "grad_norm": 173.2970428466797, "learning_rate": 9.128129530892044e-06, "loss": 18.1661, "step": 135870 }, { "epoch": 0.27448619690768716, "grad_norm": 269.26239013671875, "learning_rate": 9.127932571623592e-06, "loss": 18.1101, "step": 135880 }, { "epoch": 0.274506397540371, "grad_norm": 252.97994995117188, "learning_rate": 9.127735592236097e-06, "loss": 15.0443, "step": 135890 }, { "epoch": 0.2745265981730548, "grad_norm": 240.75164794921875, "learning_rate": 9.12753859273052e-06, "loss": 24.491, "step": 135900 }, { "epoch": 0.2745467988057386, "grad_norm": 86.73409271240234, "learning_rate": 9.127341573107819e-06, "loss": 22.6683, "step": 135910 }, { "epoch": 0.2745669994384224, "grad_norm": 260.99151611328125, "learning_rate": 9.127144533368956e-06, "loss": 19.2789, "step": 135920 }, { "epoch": 0.2745872000711062, "grad_norm": 489.68707275390625, "learning_rate": 9.126947473514891e-06, "loss": 46.9927, "step": 135930 }, { "epoch": 0.27460740070379, "grad_norm": 222.9585723876953, "learning_rate": 9.126750393546587e-06, "loss": 26.3767, "step": 135940 }, { "epoch": 0.27462760133647385, "grad_norm": 227.23863220214844, "learning_rate": 9.126553293465e-06, "loss": 23.4759, "step": 135950 }, { "epoch": 0.27464780196915767, "grad_norm": 207.5629119873047, "learning_rate": 9.126356173271092e-06, "loss": 26.3601, "step": 135960 }, { "epoch": 0.2746680026018415, "grad_norm": 656.0303955078125, "learning_rate": 9.126159032965825e-06, "loss": 33.0032, "step": 135970 }, { "epoch": 0.2746882032345253, "grad_norm": 465.453125, "learning_rate": 9.125961872550159e-06, "loss": 32.6768, "step": 135980 }, { "epoch": 0.2747084038672091, "grad_norm": 35.71929931640625, "learning_rate": 9.125764692025057e-06, "loss": 15.4815, "step": 135990 }, { "epoch": 0.27472860449989295, "grad_norm": 253.49046325683594, "learning_rate": 9.125567491391476e-06, "loss": 18.0229, "step": 136000 }, { "epoch": 0.27474880513257677, "grad_norm": 86.10710906982422, "learning_rate": 9.125370270650381e-06, "loss": 11.3164, "step": 136010 }, { "epoch": 0.2747690057652606, "grad_norm": 187.6802215576172, "learning_rate": 9.125173029802732e-06, "loss": 21.9044, "step": 136020 }, { "epoch": 0.2747892063979444, "grad_norm": 33.948482513427734, "learning_rate": 9.124975768849489e-06, "loss": 13.5205, "step": 136030 }, { "epoch": 0.27480940703062817, "grad_norm": 300.157958984375, "learning_rate": 9.124778487791615e-06, "loss": 15.6275, "step": 136040 }, { "epoch": 0.274829607663312, "grad_norm": 202.37393188476562, "learning_rate": 9.12458118663007e-06, "loss": 16.9319, "step": 136050 }, { "epoch": 0.2748498082959958, "grad_norm": 256.3176574707031, "learning_rate": 9.124383865365817e-06, "loss": 16.8834, "step": 136060 }, { "epoch": 0.27487000892867963, "grad_norm": 472.9837951660156, "learning_rate": 9.124186523999818e-06, "loss": 34.0486, "step": 136070 }, { "epoch": 0.27489020956136345, "grad_norm": 296.5820007324219, "learning_rate": 9.123989162533032e-06, "loss": 21.7236, "step": 136080 }, { "epoch": 0.27491041019404727, "grad_norm": 194.70814514160156, "learning_rate": 9.123791780966425e-06, "loss": 23.6383, "step": 136090 }, { "epoch": 0.2749306108267311, "grad_norm": 157.523193359375, "learning_rate": 9.123594379300956e-06, "loss": 24.3338, "step": 136100 }, { "epoch": 0.2749508114594149, "grad_norm": 188.98179626464844, "learning_rate": 9.12339695753759e-06, "loss": 20.7807, "step": 136110 }, { "epoch": 0.27497101209209873, "grad_norm": 165.13113403320312, "learning_rate": 9.123199515677285e-06, "loss": 18.387, "step": 136120 }, { "epoch": 0.27499121272478255, "grad_norm": 438.7861633300781, "learning_rate": 9.123002053721006e-06, "loss": 18.9675, "step": 136130 }, { "epoch": 0.27501141335746637, "grad_norm": 113.10542297363281, "learning_rate": 9.122804571669717e-06, "loss": 23.3801, "step": 136140 }, { "epoch": 0.2750316139901502, "grad_norm": 454.96417236328125, "learning_rate": 9.122607069524377e-06, "loss": 14.8794, "step": 136150 }, { "epoch": 0.275051814622834, "grad_norm": 606.8968505859375, "learning_rate": 9.122409547285948e-06, "loss": 16.5801, "step": 136160 }, { "epoch": 0.2750720152555178, "grad_norm": 236.1368408203125, "learning_rate": 9.122212004955399e-06, "loss": 13.548, "step": 136170 }, { "epoch": 0.2750922158882016, "grad_norm": 283.3677062988281, "learning_rate": 9.122014442533686e-06, "loss": 21.3762, "step": 136180 }, { "epoch": 0.2751124165208854, "grad_norm": 382.8354187011719, "learning_rate": 9.121816860021776e-06, "loss": 21.5891, "step": 136190 }, { "epoch": 0.27513261715356924, "grad_norm": 363.67169189453125, "learning_rate": 9.12161925742063e-06, "loss": 23.8355, "step": 136200 }, { "epoch": 0.27515281778625306, "grad_norm": 399.4233093261719, "learning_rate": 9.121421634731211e-06, "loss": 20.1671, "step": 136210 }, { "epoch": 0.2751730184189369, "grad_norm": 174.61634826660156, "learning_rate": 9.121223991954484e-06, "loss": 25.535, "step": 136220 }, { "epoch": 0.2751932190516207, "grad_norm": 238.01441955566406, "learning_rate": 9.12102632909141e-06, "loss": 13.9983, "step": 136230 }, { "epoch": 0.2752134196843045, "grad_norm": 33.236846923828125, "learning_rate": 9.120828646142954e-06, "loss": 12.631, "step": 136240 }, { "epoch": 0.27523362031698834, "grad_norm": 53.04288864135742, "learning_rate": 9.120630943110078e-06, "loss": 25.2703, "step": 136250 }, { "epoch": 0.27525382094967216, "grad_norm": 136.6112518310547, "learning_rate": 9.120433219993749e-06, "loss": 20.5344, "step": 136260 }, { "epoch": 0.275274021582356, "grad_norm": 262.7619323730469, "learning_rate": 9.120235476794926e-06, "loss": 18.3843, "step": 136270 }, { "epoch": 0.2752942222150398, "grad_norm": 86.09542846679688, "learning_rate": 9.120037713514575e-06, "loss": 21.5987, "step": 136280 }, { "epoch": 0.2753144228477236, "grad_norm": 190.63800048828125, "learning_rate": 9.119839930153663e-06, "loss": 23.4929, "step": 136290 }, { "epoch": 0.2753346234804074, "grad_norm": 65.09477233886719, "learning_rate": 9.119642126713147e-06, "loss": 34.0589, "step": 136300 }, { "epoch": 0.2753548241130912, "grad_norm": 448.7789001464844, "learning_rate": 9.119444303193997e-06, "loss": 18.1482, "step": 136310 }, { "epoch": 0.275375024745775, "grad_norm": 654.6657104492188, "learning_rate": 9.119246459597173e-06, "loss": 26.2771, "step": 136320 }, { "epoch": 0.27539522537845884, "grad_norm": 416.85601806640625, "learning_rate": 9.119048595923643e-06, "loss": 17.2528, "step": 136330 }, { "epoch": 0.27541542601114266, "grad_norm": 243.1229248046875, "learning_rate": 9.11885071217437e-06, "loss": 34.4779, "step": 136340 }, { "epoch": 0.2754356266438265, "grad_norm": 731.1221923828125, "learning_rate": 9.11865280835032e-06, "loss": 27.101, "step": 136350 }, { "epoch": 0.2754558272765103, "grad_norm": 387.3265686035156, "learning_rate": 9.118454884452452e-06, "loss": 35.797, "step": 136360 }, { "epoch": 0.2754760279091941, "grad_norm": 179.68646240234375, "learning_rate": 9.118256940481735e-06, "loss": 15.4651, "step": 136370 }, { "epoch": 0.27549622854187794, "grad_norm": 7.022815704345703, "learning_rate": 9.118058976439134e-06, "loss": 15.6538, "step": 136380 }, { "epoch": 0.27551642917456176, "grad_norm": 1010.45263671875, "learning_rate": 9.117860992325613e-06, "loss": 35.3664, "step": 136390 }, { "epoch": 0.2755366298072456, "grad_norm": 181.1259765625, "learning_rate": 9.117662988142138e-06, "loss": 19.2377, "step": 136400 }, { "epoch": 0.2755568304399294, "grad_norm": 351.9560546875, "learning_rate": 9.117464963889672e-06, "loss": 25.148, "step": 136410 }, { "epoch": 0.2755770310726132, "grad_norm": 335.7454528808594, "learning_rate": 9.117266919569181e-06, "loss": 25.4104, "step": 136420 }, { "epoch": 0.275597231705297, "grad_norm": 339.6723327636719, "learning_rate": 9.11706885518163e-06, "loss": 19.2103, "step": 136430 }, { "epoch": 0.2756174323379808, "grad_norm": 78.57444763183594, "learning_rate": 9.116870770727986e-06, "loss": 16.9073, "step": 136440 }, { "epoch": 0.2756376329706646, "grad_norm": 163.0300750732422, "learning_rate": 9.116672666209211e-06, "loss": 20.2456, "step": 136450 }, { "epoch": 0.27565783360334845, "grad_norm": 179.90283203125, "learning_rate": 9.116474541626277e-06, "loss": 31.6315, "step": 136460 }, { "epoch": 0.27567803423603227, "grad_norm": 211.01416015625, "learning_rate": 9.116276396980141e-06, "loss": 15.0668, "step": 136470 }, { "epoch": 0.2756982348687161, "grad_norm": 11.283744812011719, "learning_rate": 9.116078232271774e-06, "loss": 27.5693, "step": 136480 }, { "epoch": 0.2757184355013999, "grad_norm": 216.7115936279297, "learning_rate": 9.115880047502142e-06, "loss": 11.291, "step": 136490 }, { "epoch": 0.2757386361340837, "grad_norm": 218.44058227539062, "learning_rate": 9.115681842672211e-06, "loss": 18.5517, "step": 136500 }, { "epoch": 0.27575883676676755, "grad_norm": 403.8744812011719, "learning_rate": 9.115483617782943e-06, "loss": 30.6344, "step": 136510 }, { "epoch": 0.27577903739945137, "grad_norm": 261.73138427734375, "learning_rate": 9.115285372835309e-06, "loss": 23.3491, "step": 136520 }, { "epoch": 0.2757992380321352, "grad_norm": 332.28240966796875, "learning_rate": 9.115087107830272e-06, "loss": 31.5415, "step": 136530 }, { "epoch": 0.275819438664819, "grad_norm": 241.9741973876953, "learning_rate": 9.114888822768801e-06, "loss": 13.3893, "step": 136540 }, { "epoch": 0.2758396392975028, "grad_norm": 220.01419067382812, "learning_rate": 9.11469051765186e-06, "loss": 20.0902, "step": 136550 }, { "epoch": 0.2758598399301866, "grad_norm": 397.5327453613281, "learning_rate": 9.114492192480416e-06, "loss": 16.6926, "step": 136560 }, { "epoch": 0.2758800405628704, "grad_norm": 314.93231201171875, "learning_rate": 9.114293847255437e-06, "loss": 23.5176, "step": 136570 }, { "epoch": 0.27590024119555423, "grad_norm": 353.68804931640625, "learning_rate": 9.114095481977887e-06, "loss": 28.2914, "step": 136580 }, { "epoch": 0.27592044182823805, "grad_norm": 333.72772216796875, "learning_rate": 9.113897096648735e-06, "loss": 17.8621, "step": 136590 }, { "epoch": 0.27594064246092187, "grad_norm": 269.9982604980469, "learning_rate": 9.11369869126895e-06, "loss": 24.219, "step": 136600 }, { "epoch": 0.2759608430936057, "grad_norm": 420.6968994140625, "learning_rate": 9.113500265839495e-06, "loss": 29.1386, "step": 136610 }, { "epoch": 0.2759810437262895, "grad_norm": 646.2324829101562, "learning_rate": 9.113301820361339e-06, "loss": 39.7209, "step": 136620 }, { "epoch": 0.27600124435897333, "grad_norm": 360.3862609863281, "learning_rate": 9.113103354835448e-06, "loss": 30.1616, "step": 136630 }, { "epoch": 0.27602144499165715, "grad_norm": 421.1723937988281, "learning_rate": 9.112904869262791e-06, "loss": 25.3559, "step": 136640 }, { "epoch": 0.27604164562434097, "grad_norm": 327.76715087890625, "learning_rate": 9.112706363644334e-06, "loss": 42.6995, "step": 136650 }, { "epoch": 0.2760618462570248, "grad_norm": 412.6033020019531, "learning_rate": 9.112507837981045e-06, "loss": 27.7484, "step": 136660 }, { "epoch": 0.2760820468897086, "grad_norm": 221.56723022460938, "learning_rate": 9.112309292273891e-06, "loss": 17.4118, "step": 136670 }, { "epoch": 0.2761022475223924, "grad_norm": 427.3962707519531, "learning_rate": 9.112110726523841e-06, "loss": 28.584, "step": 136680 }, { "epoch": 0.2761224481550762, "grad_norm": 219.7229461669922, "learning_rate": 9.111912140731862e-06, "loss": 21.8014, "step": 136690 }, { "epoch": 0.27614264878776, "grad_norm": 181.25672912597656, "learning_rate": 9.111713534898923e-06, "loss": 14.6018, "step": 136700 }, { "epoch": 0.27616284942044383, "grad_norm": 126.04016876220703, "learning_rate": 9.111514909025988e-06, "loss": 28.6469, "step": 136710 }, { "epoch": 0.27618305005312765, "grad_norm": 424.3619079589844, "learning_rate": 9.11131626311403e-06, "loss": 16.5054, "step": 136720 }, { "epoch": 0.2762032506858115, "grad_norm": 487.2112731933594, "learning_rate": 9.111117597164016e-06, "loss": 26.2044, "step": 136730 }, { "epoch": 0.2762234513184953, "grad_norm": 206.2068328857422, "learning_rate": 9.110918911176911e-06, "loss": 12.9662, "step": 136740 }, { "epoch": 0.2762436519511791, "grad_norm": 361.41815185546875, "learning_rate": 9.110720205153688e-06, "loss": 17.5148, "step": 136750 }, { "epoch": 0.27626385258386293, "grad_norm": 383.0902404785156, "learning_rate": 9.110521479095314e-06, "loss": 17.2572, "step": 136760 }, { "epoch": 0.27628405321654675, "grad_norm": 138.79258728027344, "learning_rate": 9.110322733002755e-06, "loss": 32.6162, "step": 136770 }, { "epoch": 0.2763042538492306, "grad_norm": 324.50634765625, "learning_rate": 9.11012396687698e-06, "loss": 20.8859, "step": 136780 }, { "epoch": 0.2763244544819144, "grad_norm": 350.0141296386719, "learning_rate": 9.10992518071896e-06, "loss": 37.1248, "step": 136790 }, { "epoch": 0.2763446551145982, "grad_norm": 149.31185913085938, "learning_rate": 9.109726374529666e-06, "loss": 14.7918, "step": 136800 }, { "epoch": 0.276364855747282, "grad_norm": 318.52142333984375, "learning_rate": 9.109527548310062e-06, "loss": 10.1969, "step": 136810 }, { "epoch": 0.2763850563799658, "grad_norm": 235.1845245361328, "learning_rate": 9.10932870206112e-06, "loss": 23.816, "step": 136820 }, { "epoch": 0.2764052570126496, "grad_norm": 168.22830200195312, "learning_rate": 9.109129835783808e-06, "loss": 14.9703, "step": 136830 }, { "epoch": 0.27642545764533344, "grad_norm": 422.3341369628906, "learning_rate": 9.108930949479096e-06, "loss": 20.3724, "step": 136840 }, { "epoch": 0.27644565827801726, "grad_norm": 368.4128112792969, "learning_rate": 9.108732043147952e-06, "loss": 15.9503, "step": 136850 }, { "epoch": 0.2764658589107011, "grad_norm": 178.6656494140625, "learning_rate": 9.108533116791347e-06, "loss": 8.8596, "step": 136860 }, { "epoch": 0.2764860595433849, "grad_norm": 146.42971801757812, "learning_rate": 9.108334170410249e-06, "loss": 18.063, "step": 136870 }, { "epoch": 0.2765062601760687, "grad_norm": 32.714759826660156, "learning_rate": 9.10813520400563e-06, "loss": 12.7061, "step": 136880 }, { "epoch": 0.27652646080875254, "grad_norm": 96.61821746826172, "learning_rate": 9.107936217578457e-06, "loss": 16.2141, "step": 136890 }, { "epoch": 0.27654666144143636, "grad_norm": 677.0598754882812, "learning_rate": 9.107737211129702e-06, "loss": 43.0939, "step": 136900 }, { "epoch": 0.2765668620741202, "grad_norm": 280.5899353027344, "learning_rate": 9.107538184660333e-06, "loss": 14.5356, "step": 136910 }, { "epoch": 0.276587062706804, "grad_norm": 183.5652618408203, "learning_rate": 9.107339138171321e-06, "loss": 15.9356, "step": 136920 }, { "epoch": 0.2766072633394878, "grad_norm": 316.7225646972656, "learning_rate": 9.107140071663637e-06, "loss": 22.3141, "step": 136930 }, { "epoch": 0.2766274639721716, "grad_norm": 310.81817626953125, "learning_rate": 9.106940985138251e-06, "loss": 19.0601, "step": 136940 }, { "epoch": 0.2766476646048554, "grad_norm": 468.9579162597656, "learning_rate": 9.106741878596132e-06, "loss": 26.4934, "step": 136950 }, { "epoch": 0.2766678652375392, "grad_norm": 145.86526489257812, "learning_rate": 9.10654275203825e-06, "loss": 12.8207, "step": 136960 }, { "epoch": 0.27668806587022304, "grad_norm": 105.90185546875, "learning_rate": 9.106343605465578e-06, "loss": 24.7032, "step": 136970 }, { "epoch": 0.27670826650290686, "grad_norm": 543.64111328125, "learning_rate": 9.106144438879086e-06, "loss": 25.7078, "step": 136980 }, { "epoch": 0.2767284671355907, "grad_norm": 471.9919128417969, "learning_rate": 9.105945252279743e-06, "loss": 16.7289, "step": 136990 }, { "epoch": 0.2767486677682745, "grad_norm": 328.0264587402344, "learning_rate": 9.10574604566852e-06, "loss": 24.1369, "step": 137000 }, { "epoch": 0.2767688684009583, "grad_norm": 436.443359375, "learning_rate": 9.105546819046391e-06, "loss": 14.0349, "step": 137010 }, { "epoch": 0.27678906903364214, "grad_norm": 394.4882507324219, "learning_rate": 9.105347572414323e-06, "loss": 14.4574, "step": 137020 }, { "epoch": 0.27680926966632596, "grad_norm": 136.3496856689453, "learning_rate": 9.10514830577329e-06, "loss": 17.8026, "step": 137030 }, { "epoch": 0.2768294702990098, "grad_norm": 568.7249755859375, "learning_rate": 9.104949019124261e-06, "loss": 25.8645, "step": 137040 }, { "epoch": 0.2768496709316936, "grad_norm": 444.3772888183594, "learning_rate": 9.104749712468208e-06, "loss": 22.8513, "step": 137050 }, { "epoch": 0.2768698715643774, "grad_norm": 9866.7158203125, "learning_rate": 9.104550385806103e-06, "loss": 35.4124, "step": 137060 }, { "epoch": 0.2768900721970612, "grad_norm": 663.0956420898438, "learning_rate": 9.104351039138919e-06, "loss": 26.8299, "step": 137070 }, { "epoch": 0.276910272829745, "grad_norm": 447.0508728027344, "learning_rate": 9.104151672467625e-06, "loss": 25.423, "step": 137080 }, { "epoch": 0.27693047346242883, "grad_norm": 193.38394165039062, "learning_rate": 9.103952285793192e-06, "loss": 21.4941, "step": 137090 }, { "epoch": 0.27695067409511265, "grad_norm": 284.2653503417969, "learning_rate": 9.103752879116595e-06, "loss": 28.8424, "step": 137100 }, { "epoch": 0.27697087472779647, "grad_norm": 497.0201416015625, "learning_rate": 9.103553452438803e-06, "loss": 21.2437, "step": 137110 }, { "epoch": 0.2769910753604803, "grad_norm": 564.9322509765625, "learning_rate": 9.10335400576079e-06, "loss": 15.1508, "step": 137120 }, { "epoch": 0.2770112759931641, "grad_norm": 377.7366027832031, "learning_rate": 9.103154539083527e-06, "loss": 22.5581, "step": 137130 }, { "epoch": 0.27703147662584793, "grad_norm": 429.3810729980469, "learning_rate": 9.102955052407986e-06, "loss": 23.5189, "step": 137140 }, { "epoch": 0.27705167725853175, "grad_norm": 271.2933349609375, "learning_rate": 9.102755545735141e-06, "loss": 19.8338, "step": 137150 }, { "epoch": 0.27707187789121557, "grad_norm": 359.70074462890625, "learning_rate": 9.102556019065962e-06, "loss": 22.4638, "step": 137160 }, { "epoch": 0.2770920785238994, "grad_norm": 302.98150634765625, "learning_rate": 9.102356472401424e-06, "loss": 8.9129, "step": 137170 }, { "epoch": 0.2771122791565832, "grad_norm": 583.4323120117188, "learning_rate": 9.102156905742497e-06, "loss": 27.2483, "step": 137180 }, { "epoch": 0.27713247978926697, "grad_norm": 370.04364013671875, "learning_rate": 9.101957319090153e-06, "loss": 24.7466, "step": 137190 }, { "epoch": 0.2771526804219508, "grad_norm": 408.0619201660156, "learning_rate": 9.101757712445369e-06, "loss": 24.3498, "step": 137200 }, { "epoch": 0.2771728810546346, "grad_norm": 291.4731750488281, "learning_rate": 9.101558085809114e-06, "loss": 20.5389, "step": 137210 }, { "epoch": 0.27719308168731843, "grad_norm": 178.28631591796875, "learning_rate": 9.101358439182364e-06, "loss": 11.3219, "step": 137220 }, { "epoch": 0.27721328232000225, "grad_norm": 206.64212036132812, "learning_rate": 9.101158772566088e-06, "loss": 13.0218, "step": 137230 }, { "epoch": 0.27723348295268607, "grad_norm": 256.3148193359375, "learning_rate": 9.100959085961263e-06, "loss": 27.4775, "step": 137240 }, { "epoch": 0.2772536835853699, "grad_norm": 394.78143310546875, "learning_rate": 9.100759379368863e-06, "loss": 25.5328, "step": 137250 }, { "epoch": 0.2772738842180537, "grad_norm": 40.53182601928711, "learning_rate": 9.100559652789856e-06, "loss": 19.7622, "step": 137260 }, { "epoch": 0.27729408485073753, "grad_norm": 211.41595458984375, "learning_rate": 9.100359906225219e-06, "loss": 22.9857, "step": 137270 }, { "epoch": 0.27731428548342135, "grad_norm": 250.53341674804688, "learning_rate": 9.100160139675925e-06, "loss": 15.0865, "step": 137280 }, { "epoch": 0.27733448611610517, "grad_norm": 362.9903259277344, "learning_rate": 9.099960353142948e-06, "loss": 16.6672, "step": 137290 }, { "epoch": 0.277354686748789, "grad_norm": 230.56898498535156, "learning_rate": 9.099760546627262e-06, "loss": 20.8953, "step": 137300 }, { "epoch": 0.2773748873814728, "grad_norm": 288.8524169921875, "learning_rate": 9.099560720129842e-06, "loss": 17.4811, "step": 137310 }, { "epoch": 0.2773950880141566, "grad_norm": 430.862060546875, "learning_rate": 9.099360873651658e-06, "loss": 24.9898, "step": 137320 }, { "epoch": 0.2774152886468404, "grad_norm": 415.2790222167969, "learning_rate": 9.099161007193686e-06, "loss": 18.3432, "step": 137330 }, { "epoch": 0.2774354892795242, "grad_norm": 249.9918212890625, "learning_rate": 9.098961120756902e-06, "loss": 20.2841, "step": 137340 }, { "epoch": 0.27745568991220804, "grad_norm": 609.708251953125, "learning_rate": 9.098761214342277e-06, "loss": 19.6855, "step": 137350 }, { "epoch": 0.27747589054489186, "grad_norm": 95.56465911865234, "learning_rate": 9.098561287950788e-06, "loss": 20.0175, "step": 137360 }, { "epoch": 0.2774960911775757, "grad_norm": 500.98065185546875, "learning_rate": 9.098361341583408e-06, "loss": 29.8389, "step": 137370 }, { "epoch": 0.2775162918102595, "grad_norm": 261.7269287109375, "learning_rate": 9.098161375241112e-06, "loss": 24.7024, "step": 137380 }, { "epoch": 0.2775364924429433, "grad_norm": 0.0, "learning_rate": 9.097961388924875e-06, "loss": 19.2809, "step": 137390 }, { "epoch": 0.27755669307562714, "grad_norm": 255.2352752685547, "learning_rate": 9.09776138263567e-06, "loss": 32.2676, "step": 137400 }, { "epoch": 0.27757689370831096, "grad_norm": 124.65665435791016, "learning_rate": 9.097561356374473e-06, "loss": 17.3353, "step": 137410 }, { "epoch": 0.2775970943409948, "grad_norm": 658.8836669921875, "learning_rate": 9.097361310142261e-06, "loss": 33.1404, "step": 137420 }, { "epoch": 0.2776172949736786, "grad_norm": 166.06393432617188, "learning_rate": 9.097161243940005e-06, "loss": 17.9978, "step": 137430 }, { "epoch": 0.2776374956063624, "grad_norm": 554.9646606445312, "learning_rate": 9.096961157768681e-06, "loss": 24.8946, "step": 137440 }, { "epoch": 0.2776576962390462, "grad_norm": 199.4671630859375, "learning_rate": 9.096761051629268e-06, "loss": 20.9673, "step": 137450 }, { "epoch": 0.27767789687173, "grad_norm": 71.43617248535156, "learning_rate": 9.096560925522738e-06, "loss": 22.1613, "step": 137460 }, { "epoch": 0.2776980975044138, "grad_norm": 118.2379150390625, "learning_rate": 9.096360779450066e-06, "loss": 32.5171, "step": 137470 }, { "epoch": 0.27771829813709764, "grad_norm": 130.0936279296875, "learning_rate": 9.096160613412228e-06, "loss": 15.5959, "step": 137480 }, { "epoch": 0.27773849876978146, "grad_norm": 269.11578369140625, "learning_rate": 9.095960427410202e-06, "loss": 11.9496, "step": 137490 }, { "epoch": 0.2777586994024653, "grad_norm": 784.8992919921875, "learning_rate": 9.09576022144496e-06, "loss": 25.0486, "step": 137500 }, { "epoch": 0.2777789000351491, "grad_norm": 12.46846866607666, "learning_rate": 9.09555999551748e-06, "loss": 15.8123, "step": 137510 }, { "epoch": 0.2777991006678329, "grad_norm": 348.6684265136719, "learning_rate": 9.095359749628736e-06, "loss": 16.7281, "step": 137520 }, { "epoch": 0.27781930130051674, "grad_norm": 239.71969604492188, "learning_rate": 9.095159483779707e-06, "loss": 30.2913, "step": 137530 }, { "epoch": 0.27783950193320056, "grad_norm": 238.5032501220703, "learning_rate": 9.094959197971367e-06, "loss": 27.1816, "step": 137540 }, { "epoch": 0.2778597025658844, "grad_norm": 176.27224731445312, "learning_rate": 9.09475889220469e-06, "loss": 16.9475, "step": 137550 }, { "epoch": 0.2778799031985682, "grad_norm": 188.90481567382812, "learning_rate": 9.094558566480659e-06, "loss": 15.4587, "step": 137560 }, { "epoch": 0.277900103831252, "grad_norm": 253.3138427734375, "learning_rate": 9.094358220800243e-06, "loss": 24.1758, "step": 137570 }, { "epoch": 0.2779203044639358, "grad_norm": 160.6217803955078, "learning_rate": 9.094157855164424e-06, "loss": 28.3044, "step": 137580 }, { "epoch": 0.2779405050966196, "grad_norm": 251.66065979003906, "learning_rate": 9.093957469574175e-06, "loss": 19.1041, "step": 137590 }, { "epoch": 0.2779607057293034, "grad_norm": 326.3103942871094, "learning_rate": 9.093757064030473e-06, "loss": 29.3865, "step": 137600 }, { "epoch": 0.27798090636198725, "grad_norm": 646.412353515625, "learning_rate": 9.093556638534298e-06, "loss": 21.1252, "step": 137610 }, { "epoch": 0.27800110699467107, "grad_norm": 283.00537109375, "learning_rate": 9.093356193086622e-06, "loss": 15.3614, "step": 137620 }, { "epoch": 0.2780213076273549, "grad_norm": 173.00047302246094, "learning_rate": 9.093155727688426e-06, "loss": 28.8273, "step": 137630 }, { "epoch": 0.2780415082600387, "grad_norm": 429.89923095703125, "learning_rate": 9.092955242340684e-06, "loss": 27.4985, "step": 137640 }, { "epoch": 0.2780617088927225, "grad_norm": 423.6048278808594, "learning_rate": 9.092754737044375e-06, "loss": 13.9779, "step": 137650 }, { "epoch": 0.27808190952540635, "grad_norm": 510.1482849121094, "learning_rate": 9.092554211800476e-06, "loss": 28.9534, "step": 137660 }, { "epoch": 0.27810211015809017, "grad_norm": 255.487548828125, "learning_rate": 9.092353666609963e-06, "loss": 24.9318, "step": 137670 }, { "epoch": 0.278122310790774, "grad_norm": 570.6677856445312, "learning_rate": 9.092153101473818e-06, "loss": 18.8043, "step": 137680 }, { "epoch": 0.2781425114234578, "grad_norm": 272.9458923339844, "learning_rate": 9.091952516393012e-06, "loss": 21.735, "step": 137690 }, { "epoch": 0.2781627120561416, "grad_norm": 498.09344482421875, "learning_rate": 9.091751911368524e-06, "loss": 28.111, "step": 137700 }, { "epoch": 0.2781829126888254, "grad_norm": 59.90256881713867, "learning_rate": 9.091551286401337e-06, "loss": 11.9373, "step": 137710 }, { "epoch": 0.2782031133215092, "grad_norm": 279.5541076660156, "learning_rate": 9.091350641492424e-06, "loss": 31.1292, "step": 137720 }, { "epoch": 0.27822331395419303, "grad_norm": 225.56297302246094, "learning_rate": 9.091149976642765e-06, "loss": 16.9825, "step": 137730 }, { "epoch": 0.27824351458687685, "grad_norm": 340.2207336425781, "learning_rate": 9.090949291853337e-06, "loss": 18.2082, "step": 137740 }, { "epoch": 0.27826371521956067, "grad_norm": 265.1539306640625, "learning_rate": 9.090748587125118e-06, "loss": 15.622, "step": 137750 }, { "epoch": 0.2782839158522445, "grad_norm": 363.0420837402344, "learning_rate": 9.090547862459087e-06, "loss": 25.7088, "step": 137760 }, { "epoch": 0.2783041164849283, "grad_norm": 296.7699890136719, "learning_rate": 9.09034711785622e-06, "loss": 18.4701, "step": 137770 }, { "epoch": 0.27832431711761213, "grad_norm": 299.47027587890625, "learning_rate": 9.090146353317499e-06, "loss": 17.1664, "step": 137780 }, { "epoch": 0.27834451775029595, "grad_norm": 488.3904724121094, "learning_rate": 9.0899455688439e-06, "loss": 15.8987, "step": 137790 }, { "epoch": 0.27836471838297977, "grad_norm": 463.36480712890625, "learning_rate": 9.089744764436404e-06, "loss": 20.0385, "step": 137800 }, { "epoch": 0.2783849190156636, "grad_norm": 753.0225219726562, "learning_rate": 9.089543940095985e-06, "loss": 23.7006, "step": 137810 }, { "epoch": 0.2784051196483474, "grad_norm": 349.9093322753906, "learning_rate": 9.089343095823628e-06, "loss": 23.9177, "step": 137820 }, { "epoch": 0.2784253202810312, "grad_norm": 0.0, "learning_rate": 9.089142231620306e-06, "loss": 15.6949, "step": 137830 }, { "epoch": 0.278445520913715, "grad_norm": 382.9058837890625, "learning_rate": 9.088941347487004e-06, "loss": 16.5551, "step": 137840 }, { "epoch": 0.2784657215463988, "grad_norm": 560.7875366210938, "learning_rate": 9.088740443424695e-06, "loss": 28.7601, "step": 137850 }, { "epoch": 0.27848592217908263, "grad_norm": 545.6184692382812, "learning_rate": 9.088539519434362e-06, "loss": 24.8555, "step": 137860 }, { "epoch": 0.27850612281176645, "grad_norm": 230.47601318359375, "learning_rate": 9.088338575516983e-06, "loss": 11.0313, "step": 137870 }, { "epoch": 0.2785263234444503, "grad_norm": 359.2557678222656, "learning_rate": 9.088137611673538e-06, "loss": 34.7489, "step": 137880 }, { "epoch": 0.2785465240771341, "grad_norm": 167.1384735107422, "learning_rate": 9.087936627905005e-06, "loss": 20.1526, "step": 137890 }, { "epoch": 0.2785667247098179, "grad_norm": 592.141357421875, "learning_rate": 9.087735624212365e-06, "loss": 19.9548, "step": 137900 }, { "epoch": 0.27858692534250173, "grad_norm": 1444.881103515625, "learning_rate": 9.087534600596599e-06, "loss": 22.8453, "step": 137910 }, { "epoch": 0.27860712597518555, "grad_norm": 195.19361877441406, "learning_rate": 9.087333557058684e-06, "loss": 18.9932, "step": 137920 }, { "epoch": 0.2786273266078694, "grad_norm": 226.70822143554688, "learning_rate": 9.087132493599601e-06, "loss": 12.863, "step": 137930 }, { "epoch": 0.2786475272405532, "grad_norm": 220.99586486816406, "learning_rate": 9.08693141022033e-06, "loss": 20.09, "step": 137940 }, { "epoch": 0.278667727873237, "grad_norm": 346.5348205566406, "learning_rate": 9.08673030692185e-06, "loss": 18.6805, "step": 137950 }, { "epoch": 0.2786879285059208, "grad_norm": 106.3956069946289, "learning_rate": 9.086529183705144e-06, "loss": 16.0761, "step": 137960 }, { "epoch": 0.2787081291386046, "grad_norm": 27.532039642333984, "learning_rate": 9.086328040571189e-06, "loss": 17.515, "step": 137970 }, { "epoch": 0.2787283297712884, "grad_norm": 296.3559265136719, "learning_rate": 9.086126877520967e-06, "loss": 14.1914, "step": 137980 }, { "epoch": 0.27874853040397224, "grad_norm": 243.87644958496094, "learning_rate": 9.085925694555457e-06, "loss": 16.9908, "step": 137990 }, { "epoch": 0.27876873103665606, "grad_norm": 70.779052734375, "learning_rate": 9.085724491675642e-06, "loss": 24.2836, "step": 138000 }, { "epoch": 0.2787889316693399, "grad_norm": 457.239501953125, "learning_rate": 9.085523268882504e-06, "loss": 19.5936, "step": 138010 }, { "epoch": 0.2788091323020237, "grad_norm": 593.7902221679688, "learning_rate": 9.085322026177017e-06, "loss": 32.3178, "step": 138020 }, { "epoch": 0.2788293329347075, "grad_norm": 274.9157409667969, "learning_rate": 9.085120763560168e-06, "loss": 14.4067, "step": 138030 }, { "epoch": 0.27884953356739134, "grad_norm": 327.98248291015625, "learning_rate": 9.084919481032935e-06, "loss": 32.2911, "step": 138040 }, { "epoch": 0.27886973420007516, "grad_norm": 445.86004638671875, "learning_rate": 9.084718178596301e-06, "loss": 39.0631, "step": 138050 }, { "epoch": 0.278889934832759, "grad_norm": 279.5096435546875, "learning_rate": 9.084516856251244e-06, "loss": 26.581, "step": 138060 }, { "epoch": 0.2789101354654428, "grad_norm": 181.05752563476562, "learning_rate": 9.084315513998749e-06, "loss": 22.5935, "step": 138070 }, { "epoch": 0.2789303360981266, "grad_norm": 242.31100463867188, "learning_rate": 9.084114151839795e-06, "loss": 27.9621, "step": 138080 }, { "epoch": 0.2789505367308104, "grad_norm": 410.84576416015625, "learning_rate": 9.083912769775364e-06, "loss": 19.4575, "step": 138090 }, { "epoch": 0.2789707373634942, "grad_norm": 393.5494689941406, "learning_rate": 9.083711367806438e-06, "loss": 17.3226, "step": 138100 }, { "epoch": 0.278990937996178, "grad_norm": 93.6767349243164, "learning_rate": 9.083509945933996e-06, "loss": 17.9454, "step": 138110 }, { "epoch": 0.27901113862886184, "grad_norm": 205.94752502441406, "learning_rate": 9.083308504159025e-06, "loss": 17.895, "step": 138120 }, { "epoch": 0.27903133926154566, "grad_norm": 335.52740478515625, "learning_rate": 9.083107042482502e-06, "loss": 10.86, "step": 138130 }, { "epoch": 0.2790515398942295, "grad_norm": 375.3155212402344, "learning_rate": 9.08290556090541e-06, "loss": 16.2072, "step": 138140 }, { "epoch": 0.2790717405269133, "grad_norm": 294.83953857421875, "learning_rate": 9.082704059428732e-06, "loss": 21.8988, "step": 138150 }, { "epoch": 0.2790919411595971, "grad_norm": 354.1148986816406, "learning_rate": 9.08250253805345e-06, "loss": 38.3446, "step": 138160 }, { "epoch": 0.27911214179228094, "grad_norm": 115.9286880493164, "learning_rate": 9.082300996780543e-06, "loss": 16.2133, "step": 138170 }, { "epoch": 0.27913234242496476, "grad_norm": 481.7546691894531, "learning_rate": 9.082099435611001e-06, "loss": 22.6997, "step": 138180 }, { "epoch": 0.2791525430576486, "grad_norm": 527.4861450195312, "learning_rate": 9.081897854545798e-06, "loss": 18.4962, "step": 138190 }, { "epoch": 0.2791727436903324, "grad_norm": 133.0523681640625, "learning_rate": 9.08169625358592e-06, "loss": 27.2517, "step": 138200 }, { "epoch": 0.2791929443230162, "grad_norm": 279.8306579589844, "learning_rate": 9.08149463273235e-06, "loss": 18.8246, "step": 138210 }, { "epoch": 0.2792131449557, "grad_norm": 393.2950134277344, "learning_rate": 9.081292991986072e-06, "loss": 39.5495, "step": 138220 }, { "epoch": 0.2792333455883838, "grad_norm": 224.7131805419922, "learning_rate": 9.081091331348065e-06, "loss": 15.5622, "step": 138230 }, { "epoch": 0.27925354622106763, "grad_norm": 314.0574645996094, "learning_rate": 9.080889650819313e-06, "loss": 20.8301, "step": 138240 }, { "epoch": 0.27927374685375145, "grad_norm": 468.055419921875, "learning_rate": 9.080687950400801e-06, "loss": 22.9352, "step": 138250 }, { "epoch": 0.27929394748643527, "grad_norm": 74.03951263427734, "learning_rate": 9.08048623009351e-06, "loss": 19.4959, "step": 138260 }, { "epoch": 0.2793141481191191, "grad_norm": 342.89990234375, "learning_rate": 9.080284489898428e-06, "loss": 15.1327, "step": 138270 }, { "epoch": 0.2793343487518029, "grad_norm": 161.39486694335938, "learning_rate": 9.08008272981653e-06, "loss": 26.5279, "step": 138280 }, { "epoch": 0.27935454938448673, "grad_norm": 86.74505615234375, "learning_rate": 9.079880949848804e-06, "loss": 25.7377, "step": 138290 }, { "epoch": 0.27937475001717055, "grad_norm": 328.0160217285156, "learning_rate": 9.079679149996235e-06, "loss": 27.4618, "step": 138300 }, { "epoch": 0.27939495064985437, "grad_norm": 119.8820571899414, "learning_rate": 9.079477330259803e-06, "loss": 13.988, "step": 138310 }, { "epoch": 0.2794151512825382, "grad_norm": 742.2996215820312, "learning_rate": 9.079275490640494e-06, "loss": 29.2879, "step": 138320 }, { "epoch": 0.279435351915222, "grad_norm": 1539.135986328125, "learning_rate": 9.079073631139291e-06, "loss": 29.7738, "step": 138330 }, { "epoch": 0.27945555254790583, "grad_norm": 248.878173828125, "learning_rate": 9.078871751757176e-06, "loss": 24.0815, "step": 138340 }, { "epoch": 0.2794757531805896, "grad_norm": 390.21905517578125, "learning_rate": 9.078669852495138e-06, "loss": 25.6902, "step": 138350 }, { "epoch": 0.2794959538132734, "grad_norm": 561.819580078125, "learning_rate": 9.078467933354156e-06, "loss": 31.7242, "step": 138360 }, { "epoch": 0.27951615444595723, "grad_norm": 207.9049072265625, "learning_rate": 9.078265994335216e-06, "loss": 19.8654, "step": 138370 }, { "epoch": 0.27953635507864105, "grad_norm": 346.0173034667969, "learning_rate": 9.078064035439303e-06, "loss": 22.1778, "step": 138380 }, { "epoch": 0.2795565557113249, "grad_norm": 446.6361999511719, "learning_rate": 9.0778620566674e-06, "loss": 25.7022, "step": 138390 }, { "epoch": 0.2795767563440087, "grad_norm": 215.0101776123047, "learning_rate": 9.077660058020492e-06, "loss": 41.9168, "step": 138400 }, { "epoch": 0.2795969569766925, "grad_norm": 339.387939453125, "learning_rate": 9.077458039499563e-06, "loss": 14.5169, "step": 138410 }, { "epoch": 0.27961715760937633, "grad_norm": 245.2791748046875, "learning_rate": 9.077256001105598e-06, "loss": 29.6878, "step": 138420 }, { "epoch": 0.27963735824206015, "grad_norm": 464.0594787597656, "learning_rate": 9.077053942839581e-06, "loss": 25.3462, "step": 138430 }, { "epoch": 0.27965755887474397, "grad_norm": 213.3262481689453, "learning_rate": 9.0768518647025e-06, "loss": 26.5458, "step": 138440 }, { "epoch": 0.2796777595074278, "grad_norm": 6.025686740875244, "learning_rate": 9.076649766695336e-06, "loss": 22.7142, "step": 138450 }, { "epoch": 0.2796979601401116, "grad_norm": 199.73931884765625, "learning_rate": 9.076447648819076e-06, "loss": 18.8293, "step": 138460 }, { "epoch": 0.2797181607727954, "grad_norm": 287.19342041015625, "learning_rate": 9.076245511074704e-06, "loss": 21.3862, "step": 138470 }, { "epoch": 0.2797383614054792, "grad_norm": 599.4028930664062, "learning_rate": 9.076043353463205e-06, "loss": 15.9244, "step": 138480 }, { "epoch": 0.279758562038163, "grad_norm": 333.3573913574219, "learning_rate": 9.075841175985566e-06, "loss": 34.1307, "step": 138490 }, { "epoch": 0.27977876267084684, "grad_norm": 363.70025634765625, "learning_rate": 9.07563897864277e-06, "loss": 25.902, "step": 138500 }, { "epoch": 0.27979896330353066, "grad_norm": 277.92535400390625, "learning_rate": 9.075436761435807e-06, "loss": 19.338, "step": 138510 }, { "epoch": 0.2798191639362145, "grad_norm": 880.1859741210938, "learning_rate": 9.075234524365658e-06, "loss": 21.7034, "step": 138520 }, { "epoch": 0.2798393645688983, "grad_norm": 538.4346923828125, "learning_rate": 9.07503226743331e-06, "loss": 32.9375, "step": 138530 }, { "epoch": 0.2798595652015821, "grad_norm": 635.5220336914062, "learning_rate": 9.07482999063975e-06, "loss": 29.6993, "step": 138540 }, { "epoch": 0.27987976583426594, "grad_norm": 107.75138092041016, "learning_rate": 9.07462769398596e-06, "loss": 29.7273, "step": 138550 }, { "epoch": 0.27989996646694976, "grad_norm": 348.2410583496094, "learning_rate": 9.074425377472932e-06, "loss": 26.5089, "step": 138560 }, { "epoch": 0.2799201670996336, "grad_norm": 213.3802490234375, "learning_rate": 9.074223041101647e-06, "loss": 23.8414, "step": 138570 }, { "epoch": 0.2799403677323174, "grad_norm": 291.2004089355469, "learning_rate": 9.074020684873095e-06, "loss": 36.4152, "step": 138580 }, { "epoch": 0.2799605683650012, "grad_norm": 328.1360168457031, "learning_rate": 9.073818308788258e-06, "loss": 15.6215, "step": 138590 }, { "epoch": 0.279980768997685, "grad_norm": 298.0921630859375, "learning_rate": 9.073615912848126e-06, "loss": 20.0032, "step": 138600 }, { "epoch": 0.2800009696303688, "grad_norm": 467.7037048339844, "learning_rate": 9.073413497053683e-06, "loss": 28.6562, "step": 138610 }, { "epoch": 0.2800211702630526, "grad_norm": 92.34696960449219, "learning_rate": 9.073211061405918e-06, "loss": 20.1292, "step": 138620 }, { "epoch": 0.28004137089573644, "grad_norm": 217.17066955566406, "learning_rate": 9.073008605905816e-06, "loss": 22.1326, "step": 138630 }, { "epoch": 0.28006157152842026, "grad_norm": 71.62554168701172, "learning_rate": 9.072806130554364e-06, "loss": 15.9018, "step": 138640 }, { "epoch": 0.2800817721611041, "grad_norm": 189.93666076660156, "learning_rate": 9.072603635352548e-06, "loss": 32.2952, "step": 138650 }, { "epoch": 0.2801019727937879, "grad_norm": 219.73167419433594, "learning_rate": 9.072401120301356e-06, "loss": 16.8916, "step": 138660 }, { "epoch": 0.2801221734264717, "grad_norm": 191.15286254882812, "learning_rate": 9.072198585401775e-06, "loss": 10.7827, "step": 138670 }, { "epoch": 0.28014237405915554, "grad_norm": 369.2118225097656, "learning_rate": 9.071996030654793e-06, "loss": 28.4618, "step": 138680 }, { "epoch": 0.28016257469183936, "grad_norm": 0.0, "learning_rate": 9.071793456061395e-06, "loss": 29.8487, "step": 138690 }, { "epoch": 0.2801827753245232, "grad_norm": 23.479108810424805, "learning_rate": 9.07159086162257e-06, "loss": 30.1705, "step": 138700 }, { "epoch": 0.280202975957207, "grad_norm": 301.7738037109375, "learning_rate": 9.071388247339306e-06, "loss": 11.1805, "step": 138710 }, { "epoch": 0.2802231765898908, "grad_norm": 269.5447082519531, "learning_rate": 9.071185613212588e-06, "loss": 29.9778, "step": 138720 }, { "epoch": 0.2802433772225746, "grad_norm": 458.8359375, "learning_rate": 9.070982959243406e-06, "loss": 16.6486, "step": 138730 }, { "epoch": 0.2802635778552584, "grad_norm": 89.85116577148438, "learning_rate": 9.070780285432746e-06, "loss": 20.7392, "step": 138740 }, { "epoch": 0.2802837784879422, "grad_norm": 531.9598388671875, "learning_rate": 9.070577591781598e-06, "loss": 19.1308, "step": 138750 }, { "epoch": 0.28030397912062605, "grad_norm": 301.63177490234375, "learning_rate": 9.070374878290946e-06, "loss": 14.2291, "step": 138760 }, { "epoch": 0.28032417975330987, "grad_norm": 172.4068603515625, "learning_rate": 9.070172144961781e-06, "loss": 17.1716, "step": 138770 }, { "epoch": 0.2803443803859937, "grad_norm": 199.46389770507812, "learning_rate": 9.069969391795093e-06, "loss": 28.6465, "step": 138780 }, { "epoch": 0.2803645810186775, "grad_norm": 553.096923828125, "learning_rate": 9.069766618791867e-06, "loss": 20.4486, "step": 138790 }, { "epoch": 0.2803847816513613, "grad_norm": 622.2401733398438, "learning_rate": 9.069563825953092e-06, "loss": 28.7008, "step": 138800 }, { "epoch": 0.28040498228404515, "grad_norm": 417.2597351074219, "learning_rate": 9.069361013279755e-06, "loss": 37.5973, "step": 138810 }, { "epoch": 0.28042518291672897, "grad_norm": 117.16934204101562, "learning_rate": 9.069158180772848e-06, "loss": 8.0126, "step": 138820 }, { "epoch": 0.2804453835494128, "grad_norm": 293.9488830566406, "learning_rate": 9.068955328433355e-06, "loss": 17.7238, "step": 138830 }, { "epoch": 0.2804655841820966, "grad_norm": 253.70606994628906, "learning_rate": 9.06875245626227e-06, "loss": 33.5301, "step": 138840 }, { "epoch": 0.2804857848147804, "grad_norm": 394.3919372558594, "learning_rate": 9.068549564260578e-06, "loss": 20.0668, "step": 138850 }, { "epoch": 0.2805059854474642, "grad_norm": 243.83009338378906, "learning_rate": 9.068346652429269e-06, "loss": 31.5401, "step": 138860 }, { "epoch": 0.280526186080148, "grad_norm": 149.8212127685547, "learning_rate": 9.068143720769332e-06, "loss": 10.1022, "step": 138870 }, { "epoch": 0.28054638671283183, "grad_norm": 323.6412658691406, "learning_rate": 9.067940769281755e-06, "loss": 21.3436, "step": 138880 }, { "epoch": 0.28056658734551565, "grad_norm": 237.26832580566406, "learning_rate": 9.067737797967528e-06, "loss": 27.0756, "step": 138890 }, { "epoch": 0.28058678797819947, "grad_norm": 267.0513610839844, "learning_rate": 9.06753480682764e-06, "loss": 14.25, "step": 138900 }, { "epoch": 0.2806069886108833, "grad_norm": 184.27468872070312, "learning_rate": 9.067331795863084e-06, "loss": 15.9609, "step": 138910 }, { "epoch": 0.2806271892435671, "grad_norm": 471.6550598144531, "learning_rate": 9.067128765074842e-06, "loss": 20.2987, "step": 138920 }, { "epoch": 0.28064738987625093, "grad_norm": 38.56666564941406, "learning_rate": 9.06692571446391e-06, "loss": 13.4738, "step": 138930 }, { "epoch": 0.28066759050893475, "grad_norm": 609.0213012695312, "learning_rate": 9.066722644031274e-06, "loss": 27.2051, "step": 138940 }, { "epoch": 0.28068779114161857, "grad_norm": 508.7875671386719, "learning_rate": 9.066519553777926e-06, "loss": 21.1414, "step": 138950 }, { "epoch": 0.2807079917743024, "grad_norm": 369.17401123046875, "learning_rate": 9.066316443704854e-06, "loss": 27.4233, "step": 138960 }, { "epoch": 0.2807281924069862, "grad_norm": 74.77613830566406, "learning_rate": 9.06611331381305e-06, "loss": 12.5636, "step": 138970 }, { "epoch": 0.28074839303967003, "grad_norm": 274.5867614746094, "learning_rate": 9.065910164103502e-06, "loss": 16.7559, "step": 138980 }, { "epoch": 0.2807685936723538, "grad_norm": 183.55091857910156, "learning_rate": 9.065706994577203e-06, "loss": 62.4875, "step": 138990 }, { "epoch": 0.2807887943050376, "grad_norm": 339.7176818847656, "learning_rate": 9.065503805235139e-06, "loss": 21.6643, "step": 139000 }, { "epoch": 0.28080899493772143, "grad_norm": 265.5896301269531, "learning_rate": 9.065300596078304e-06, "loss": 25.1362, "step": 139010 }, { "epoch": 0.28082919557040525, "grad_norm": 191.6724853515625, "learning_rate": 9.065097367107685e-06, "loss": 19.3614, "step": 139020 }, { "epoch": 0.2808493962030891, "grad_norm": 496.17889404296875, "learning_rate": 9.064894118324276e-06, "loss": 33.497, "step": 139030 }, { "epoch": 0.2808695968357729, "grad_norm": 228.84031677246094, "learning_rate": 9.064690849729066e-06, "loss": 21.227, "step": 139040 }, { "epoch": 0.2808897974684567, "grad_norm": 146.29110717773438, "learning_rate": 9.064487561323046e-06, "loss": 19.3904, "step": 139050 }, { "epoch": 0.28090999810114053, "grad_norm": 214.057861328125, "learning_rate": 9.064284253107206e-06, "loss": 22.8663, "step": 139060 }, { "epoch": 0.28093019873382435, "grad_norm": 152.74378967285156, "learning_rate": 9.064080925082536e-06, "loss": 18.7189, "step": 139070 }, { "epoch": 0.2809503993665082, "grad_norm": 362.5931396484375, "learning_rate": 9.063877577250031e-06, "loss": 21.7558, "step": 139080 }, { "epoch": 0.280970599999192, "grad_norm": 496.97216796875, "learning_rate": 9.063674209610678e-06, "loss": 21.5603, "step": 139090 }, { "epoch": 0.2809908006318758, "grad_norm": 157.3998260498047, "learning_rate": 9.06347082216547e-06, "loss": 16.7457, "step": 139100 }, { "epoch": 0.2810110012645596, "grad_norm": 532.96728515625, "learning_rate": 9.0632674149154e-06, "loss": 46.7179, "step": 139110 }, { "epoch": 0.2810312018972434, "grad_norm": 6.090430736541748, "learning_rate": 9.063063987861455e-06, "loss": 12.7118, "step": 139120 }, { "epoch": 0.2810514025299272, "grad_norm": 9.551039695739746, "learning_rate": 9.06286054100463e-06, "loss": 22.6085, "step": 139130 }, { "epoch": 0.28107160316261104, "grad_norm": 125.98015594482422, "learning_rate": 9.062657074345916e-06, "loss": 20.1985, "step": 139140 }, { "epoch": 0.28109180379529486, "grad_norm": 269.7104797363281, "learning_rate": 9.062453587886302e-06, "loss": 17.9826, "step": 139150 }, { "epoch": 0.2811120044279787, "grad_norm": 170.50820922851562, "learning_rate": 9.062250081626784e-06, "loss": 28.4922, "step": 139160 }, { "epoch": 0.2811322050606625, "grad_norm": 399.3626708984375, "learning_rate": 9.062046555568351e-06, "loss": 11.3934, "step": 139170 }, { "epoch": 0.2811524056933463, "grad_norm": 203.05174255371094, "learning_rate": 9.061843009711995e-06, "loss": 14.2657, "step": 139180 }, { "epoch": 0.28117260632603014, "grad_norm": 57.492191314697266, "learning_rate": 9.06163944405871e-06, "loss": 21.2791, "step": 139190 }, { "epoch": 0.28119280695871396, "grad_norm": 751.98095703125, "learning_rate": 9.061435858609486e-06, "loss": 45.4907, "step": 139200 }, { "epoch": 0.2812130075913978, "grad_norm": 771.2106323242188, "learning_rate": 9.061232253365317e-06, "loss": 25.2831, "step": 139210 }, { "epoch": 0.2812332082240816, "grad_norm": 503.2507019042969, "learning_rate": 9.061028628327196e-06, "loss": 24.5738, "step": 139220 }, { "epoch": 0.2812534088567654, "grad_norm": 380.7889709472656, "learning_rate": 9.060824983496113e-06, "loss": 18.1518, "step": 139230 }, { "epoch": 0.2812736094894492, "grad_norm": 178.71995544433594, "learning_rate": 9.06062131887306e-06, "loss": 17.0123, "step": 139240 }, { "epoch": 0.281293810122133, "grad_norm": 687.5219116210938, "learning_rate": 9.060417634459032e-06, "loss": 17.9673, "step": 139250 }, { "epoch": 0.2813140107548168, "grad_norm": 420.3254089355469, "learning_rate": 9.060213930255023e-06, "loss": 26.4269, "step": 139260 }, { "epoch": 0.28133421138750064, "grad_norm": 431.1579895019531, "learning_rate": 9.06001020626202e-06, "loss": 18.6419, "step": 139270 }, { "epoch": 0.28135441202018446, "grad_norm": 524.9252319335938, "learning_rate": 9.059806462481022e-06, "loss": 26.9111, "step": 139280 }, { "epoch": 0.2813746126528683, "grad_norm": 438.450439453125, "learning_rate": 9.05960269891302e-06, "loss": 12.6119, "step": 139290 }, { "epoch": 0.2813948132855521, "grad_norm": 235.75723266601562, "learning_rate": 9.059398915559005e-06, "loss": 16.5319, "step": 139300 }, { "epoch": 0.2814150139182359, "grad_norm": 339.1556396484375, "learning_rate": 9.059195112419972e-06, "loss": 28.3443, "step": 139310 }, { "epoch": 0.28143521455091974, "grad_norm": 232.02420043945312, "learning_rate": 9.058991289496916e-06, "loss": 17.32, "step": 139320 }, { "epoch": 0.28145541518360356, "grad_norm": 0.0, "learning_rate": 9.058787446790828e-06, "loss": 16.7275, "step": 139330 }, { "epoch": 0.2814756158162874, "grad_norm": 212.8332061767578, "learning_rate": 9.058583584302702e-06, "loss": 31.0525, "step": 139340 }, { "epoch": 0.2814958164489712, "grad_norm": 228.8565673828125, "learning_rate": 9.058379702033533e-06, "loss": 13.6813, "step": 139350 }, { "epoch": 0.281516017081655, "grad_norm": 228.03826904296875, "learning_rate": 9.058175799984312e-06, "loss": 10.5623, "step": 139360 }, { "epoch": 0.2815362177143388, "grad_norm": 628.1627197265625, "learning_rate": 9.057971878156036e-06, "loss": 49.4379, "step": 139370 }, { "epoch": 0.2815564183470226, "grad_norm": 356.2946472167969, "learning_rate": 9.057767936549696e-06, "loss": 19.9161, "step": 139380 }, { "epoch": 0.28157661897970643, "grad_norm": 306.92999267578125, "learning_rate": 9.057563975166288e-06, "loss": 21.21, "step": 139390 }, { "epoch": 0.28159681961239025, "grad_norm": 526.9788818359375, "learning_rate": 9.057359994006806e-06, "loss": 20.1254, "step": 139400 }, { "epoch": 0.28161702024507407, "grad_norm": 197.34059143066406, "learning_rate": 9.057155993072241e-06, "loss": 13.9894, "step": 139410 }, { "epoch": 0.2816372208777579, "grad_norm": 378.6277160644531, "learning_rate": 9.056951972363592e-06, "loss": 12.9845, "step": 139420 }, { "epoch": 0.2816574215104417, "grad_norm": 234.16334533691406, "learning_rate": 9.056747931881851e-06, "loss": 36.9656, "step": 139430 }, { "epoch": 0.28167762214312553, "grad_norm": 166.67156982421875, "learning_rate": 9.056543871628012e-06, "loss": 15.0091, "step": 139440 }, { "epoch": 0.28169782277580935, "grad_norm": 383.6903076171875, "learning_rate": 9.056339791603069e-06, "loss": 15.8239, "step": 139450 }, { "epoch": 0.28171802340849317, "grad_norm": 83.37410736083984, "learning_rate": 9.056135691808019e-06, "loss": 24.7802, "step": 139460 }, { "epoch": 0.281738224041177, "grad_norm": 270.700439453125, "learning_rate": 9.055931572243857e-06, "loss": 27.6369, "step": 139470 }, { "epoch": 0.2817584246738608, "grad_norm": 101.83909606933594, "learning_rate": 9.055727432911574e-06, "loss": 59.2001, "step": 139480 }, { "epoch": 0.28177862530654463, "grad_norm": 113.43636322021484, "learning_rate": 9.055523273812168e-06, "loss": 29.7829, "step": 139490 }, { "epoch": 0.2817988259392284, "grad_norm": 230.65060424804688, "learning_rate": 9.055319094946633e-06, "loss": 15.899, "step": 139500 }, { "epoch": 0.2818190265719122, "grad_norm": 315.04046630859375, "learning_rate": 9.055114896315966e-06, "loss": 22.5131, "step": 139510 }, { "epoch": 0.28183922720459603, "grad_norm": 493.35211181640625, "learning_rate": 9.05491067792116e-06, "loss": 21.4957, "step": 139520 }, { "epoch": 0.28185942783727985, "grad_norm": 215.38446044921875, "learning_rate": 9.054706439763212e-06, "loss": 20.7907, "step": 139530 }, { "epoch": 0.2818796284699637, "grad_norm": 272.55914306640625, "learning_rate": 9.054502181843117e-06, "loss": 20.8158, "step": 139540 }, { "epoch": 0.2818998291026475, "grad_norm": 277.8279113769531, "learning_rate": 9.054297904161868e-06, "loss": 14.5278, "step": 139550 }, { "epoch": 0.2819200297353313, "grad_norm": 386.6019287109375, "learning_rate": 9.054093606720464e-06, "loss": 27.2546, "step": 139560 }, { "epoch": 0.28194023036801513, "grad_norm": 26.936948776245117, "learning_rate": 9.0538892895199e-06, "loss": 21.9676, "step": 139570 }, { "epoch": 0.28196043100069895, "grad_norm": 529.6173095703125, "learning_rate": 9.053684952561171e-06, "loss": 24.4639, "step": 139580 }, { "epoch": 0.2819806316333828, "grad_norm": 211.47518920898438, "learning_rate": 9.053480595845272e-06, "loss": 11.0526, "step": 139590 }, { "epoch": 0.2820008322660666, "grad_norm": 45.12897872924805, "learning_rate": 9.0532762193732e-06, "loss": 23.8549, "step": 139600 }, { "epoch": 0.2820210328987504, "grad_norm": 190.8096923828125, "learning_rate": 9.053071823145953e-06, "loss": 34.0885, "step": 139610 }, { "epoch": 0.28204123353143423, "grad_norm": 348.278076171875, "learning_rate": 9.052867407164525e-06, "loss": 42.0387, "step": 139620 }, { "epoch": 0.282061434164118, "grad_norm": 436.54290771484375, "learning_rate": 9.052662971429912e-06, "loss": 16.9768, "step": 139630 }, { "epoch": 0.2820816347968018, "grad_norm": 145.72369384765625, "learning_rate": 9.052458515943112e-06, "loss": 6.148, "step": 139640 }, { "epoch": 0.28210183542948564, "grad_norm": 387.8900146484375, "learning_rate": 9.052254040705121e-06, "loss": 23.9024, "step": 139650 }, { "epoch": 0.28212203606216946, "grad_norm": 406.9020080566406, "learning_rate": 9.052049545716934e-06, "loss": 18.8423, "step": 139660 }, { "epoch": 0.2821422366948533, "grad_norm": 206.2200927734375, "learning_rate": 9.05184503097955e-06, "loss": 36.3252, "step": 139670 }, { "epoch": 0.2821624373275371, "grad_norm": 272.9380798339844, "learning_rate": 9.051640496493965e-06, "loss": 35.6813, "step": 139680 }, { "epoch": 0.2821826379602209, "grad_norm": 189.17262268066406, "learning_rate": 9.051435942261175e-06, "loss": 13.605, "step": 139690 }, { "epoch": 0.28220283859290474, "grad_norm": 362.2331848144531, "learning_rate": 9.051231368282177e-06, "loss": 22.9537, "step": 139700 }, { "epoch": 0.28222303922558856, "grad_norm": 770.1021728515625, "learning_rate": 9.051026774557969e-06, "loss": 20.783, "step": 139710 }, { "epoch": 0.2822432398582724, "grad_norm": 508.9685363769531, "learning_rate": 9.05082216108955e-06, "loss": 18.2053, "step": 139720 }, { "epoch": 0.2822634404909562, "grad_norm": 221.64138793945312, "learning_rate": 9.050617527877911e-06, "loss": 19.7432, "step": 139730 }, { "epoch": 0.28228364112364, "grad_norm": 395.3092956542969, "learning_rate": 9.050412874924057e-06, "loss": 31.2412, "step": 139740 }, { "epoch": 0.2823038417563238, "grad_norm": 337.7354431152344, "learning_rate": 9.050208202228981e-06, "loss": 27.588, "step": 139750 }, { "epoch": 0.2823240423890076, "grad_norm": 188.76939392089844, "learning_rate": 9.05000350979368e-06, "loss": 14.7572, "step": 139760 }, { "epoch": 0.2823442430216914, "grad_norm": 351.6512756347656, "learning_rate": 9.049798797619156e-06, "loss": 28.5766, "step": 139770 }, { "epoch": 0.28236444365437524, "grad_norm": 280.2034606933594, "learning_rate": 9.049594065706401e-06, "loss": 28.2018, "step": 139780 }, { "epoch": 0.28238464428705906, "grad_norm": 377.5614013671875, "learning_rate": 9.049389314056417e-06, "loss": 30.5234, "step": 139790 }, { "epoch": 0.2824048449197429, "grad_norm": 390.8042907714844, "learning_rate": 9.0491845426702e-06, "loss": 14.6759, "step": 139800 }, { "epoch": 0.2824250455524267, "grad_norm": 153.69920349121094, "learning_rate": 9.04897975154875e-06, "loss": 20.1872, "step": 139810 }, { "epoch": 0.2824452461851105, "grad_norm": 275.3947448730469, "learning_rate": 9.048774940693062e-06, "loss": 16.9149, "step": 139820 }, { "epoch": 0.28246544681779434, "grad_norm": 380.8222961425781, "learning_rate": 9.048570110104137e-06, "loss": 17.5648, "step": 139830 }, { "epoch": 0.28248564745047816, "grad_norm": 259.47833251953125, "learning_rate": 9.048365259782973e-06, "loss": 15.2642, "step": 139840 }, { "epoch": 0.282505848083162, "grad_norm": 122.93634033203125, "learning_rate": 9.048160389730565e-06, "loss": 16.8836, "step": 139850 }, { "epoch": 0.2825260487158458, "grad_norm": 221.40176391601562, "learning_rate": 9.047955499947916e-06, "loss": 14.1658, "step": 139860 }, { "epoch": 0.2825462493485296, "grad_norm": 428.16973876953125, "learning_rate": 9.047750590436023e-06, "loss": 22.0233, "step": 139870 }, { "epoch": 0.2825664499812134, "grad_norm": 415.7236022949219, "learning_rate": 9.047545661195885e-06, "loss": 20.1651, "step": 139880 }, { "epoch": 0.2825866506138972, "grad_norm": 751.794921875, "learning_rate": 9.0473407122285e-06, "loss": 32.3509, "step": 139890 }, { "epoch": 0.282606851246581, "grad_norm": 443.04840087890625, "learning_rate": 9.047135743534866e-06, "loss": 15.5287, "step": 139900 }, { "epoch": 0.28262705187926485, "grad_norm": 266.49395751953125, "learning_rate": 9.046930755115986e-06, "loss": 19.8078, "step": 139910 }, { "epoch": 0.28264725251194867, "grad_norm": 421.3170166015625, "learning_rate": 9.046725746972855e-06, "loss": 19.7027, "step": 139920 }, { "epoch": 0.2826674531446325, "grad_norm": 292.05828857421875, "learning_rate": 9.046520719106473e-06, "loss": 27.7885, "step": 139930 }, { "epoch": 0.2826876537773163, "grad_norm": 320.7277526855469, "learning_rate": 9.04631567151784e-06, "loss": 42.928, "step": 139940 }, { "epoch": 0.2827078544100001, "grad_norm": 486.00836181640625, "learning_rate": 9.046110604207955e-06, "loss": 16.6577, "step": 139950 }, { "epoch": 0.28272805504268395, "grad_norm": 380.845947265625, "learning_rate": 9.045905517177817e-06, "loss": 25.2362, "step": 139960 }, { "epoch": 0.28274825567536777, "grad_norm": 221.766845703125, "learning_rate": 9.045700410428428e-06, "loss": 13.5463, "step": 139970 }, { "epoch": 0.2827684563080516, "grad_norm": 210.8778076171875, "learning_rate": 9.045495283960784e-06, "loss": 6.4422, "step": 139980 }, { "epoch": 0.2827886569407354, "grad_norm": 364.0777587890625, "learning_rate": 9.045290137775888e-06, "loss": 35.5961, "step": 139990 }, { "epoch": 0.2828088575734192, "grad_norm": 534.4332885742188, "learning_rate": 9.045084971874738e-06, "loss": 16.3971, "step": 140000 }, { "epoch": 0.282829058206103, "grad_norm": 214.34823608398438, "learning_rate": 9.044879786258335e-06, "loss": 23.3617, "step": 140010 }, { "epoch": 0.2828492588387868, "grad_norm": 270.2270812988281, "learning_rate": 9.044674580927678e-06, "loss": 34.5878, "step": 140020 }, { "epoch": 0.28286945947147063, "grad_norm": 238.66326904296875, "learning_rate": 9.044469355883767e-06, "loss": 12.8607, "step": 140030 }, { "epoch": 0.28288966010415445, "grad_norm": 247.9914093017578, "learning_rate": 9.044264111127603e-06, "loss": 22.307, "step": 140040 }, { "epoch": 0.28290986073683827, "grad_norm": 301.15380859375, "learning_rate": 9.044058846660187e-06, "loss": 18.4198, "step": 140050 }, { "epoch": 0.2829300613695221, "grad_norm": 134.90805053710938, "learning_rate": 9.043853562482518e-06, "loss": 20.831, "step": 140060 }, { "epoch": 0.2829502620022059, "grad_norm": 182.46165466308594, "learning_rate": 9.043648258595598e-06, "loss": 21.6405, "step": 140070 }, { "epoch": 0.28297046263488973, "grad_norm": 67.0982437133789, "learning_rate": 9.043442935000428e-06, "loss": 18.6391, "step": 140080 }, { "epoch": 0.28299066326757355, "grad_norm": 716.5822143554688, "learning_rate": 9.043237591698006e-06, "loss": 34.1329, "step": 140090 }, { "epoch": 0.28301086390025737, "grad_norm": 387.8551025390625, "learning_rate": 9.043032228689333e-06, "loss": 24.0398, "step": 140100 }, { "epoch": 0.2830310645329412, "grad_norm": 385.4352722167969, "learning_rate": 9.042826845975413e-06, "loss": 27.9254, "step": 140110 }, { "epoch": 0.283051265165625, "grad_norm": 517.2841796875, "learning_rate": 9.042621443557244e-06, "loss": 14.0123, "step": 140120 }, { "epoch": 0.28307146579830883, "grad_norm": 244.44183349609375, "learning_rate": 9.042416021435831e-06, "loss": 29.4595, "step": 140130 }, { "epoch": 0.2830916664309926, "grad_norm": 506.6087341308594, "learning_rate": 9.042210579612171e-06, "loss": 47.3773, "step": 140140 }, { "epoch": 0.2831118670636764, "grad_norm": 215.4011688232422, "learning_rate": 9.042005118087267e-06, "loss": 15.3966, "step": 140150 }, { "epoch": 0.28313206769636023, "grad_norm": 505.6189880371094, "learning_rate": 9.041799636862119e-06, "loss": 26.3211, "step": 140160 }, { "epoch": 0.28315226832904405, "grad_norm": 368.5065002441406, "learning_rate": 9.041594135937731e-06, "loss": 19.7555, "step": 140170 }, { "epoch": 0.2831724689617279, "grad_norm": 757.466064453125, "learning_rate": 9.041388615315102e-06, "loss": 23.706, "step": 140180 }, { "epoch": 0.2831926695944117, "grad_norm": 288.6783447265625, "learning_rate": 9.041183074995238e-06, "loss": 22.7755, "step": 140190 }, { "epoch": 0.2832128702270955, "grad_norm": 263.2040710449219, "learning_rate": 9.040977514979136e-06, "loss": 16.2286, "step": 140200 }, { "epoch": 0.28323307085977933, "grad_norm": 34.13862228393555, "learning_rate": 9.0407719352678e-06, "loss": 16.4908, "step": 140210 }, { "epoch": 0.28325327149246315, "grad_norm": 118.72792053222656, "learning_rate": 9.040566335862231e-06, "loss": 26.1911, "step": 140220 }, { "epoch": 0.283273472125147, "grad_norm": 463.3952331542969, "learning_rate": 9.040360716763432e-06, "loss": 18.424, "step": 140230 }, { "epoch": 0.2832936727578308, "grad_norm": 200.77052307128906, "learning_rate": 9.040155077972406e-06, "loss": 15.3646, "step": 140240 }, { "epoch": 0.2833138733905146, "grad_norm": 670.5958862304688, "learning_rate": 9.039949419490152e-06, "loss": 21.346, "step": 140250 }, { "epoch": 0.2833340740231984, "grad_norm": 552.3989868164062, "learning_rate": 9.039743741317677e-06, "loss": 16.0048, "step": 140260 }, { "epoch": 0.2833542746558822, "grad_norm": 391.1996765136719, "learning_rate": 9.03953804345598e-06, "loss": 22.8928, "step": 140270 }, { "epoch": 0.283374475288566, "grad_norm": 321.4759216308594, "learning_rate": 9.039332325906065e-06, "loss": 18.078, "step": 140280 }, { "epoch": 0.28339467592124984, "grad_norm": 177.458251953125, "learning_rate": 9.039126588668934e-06, "loss": 20.5181, "step": 140290 }, { "epoch": 0.28341487655393366, "grad_norm": 301.08917236328125, "learning_rate": 9.038920831745587e-06, "loss": 18.0359, "step": 140300 }, { "epoch": 0.2834350771866175, "grad_norm": 335.65252685546875, "learning_rate": 9.038715055137033e-06, "loss": 19.4282, "step": 140310 }, { "epoch": 0.2834552778193013, "grad_norm": 109.06648254394531, "learning_rate": 9.038509258844271e-06, "loss": 11.8063, "step": 140320 }, { "epoch": 0.2834754784519851, "grad_norm": 275.3763732910156, "learning_rate": 9.038303442868304e-06, "loss": 26.7036, "step": 140330 }, { "epoch": 0.28349567908466894, "grad_norm": 477.8904113769531, "learning_rate": 9.038097607210136e-06, "loss": 16.6864, "step": 140340 }, { "epoch": 0.28351587971735276, "grad_norm": 423.6849365234375, "learning_rate": 9.037891751870772e-06, "loss": 20.237, "step": 140350 }, { "epoch": 0.2835360803500366, "grad_norm": 677.6395263671875, "learning_rate": 9.037685876851211e-06, "loss": 27.2017, "step": 140360 }, { "epoch": 0.2835562809827204, "grad_norm": 610.3564453125, "learning_rate": 9.03747998215246e-06, "loss": 19.828, "step": 140370 }, { "epoch": 0.2835764816154042, "grad_norm": 75.918701171875, "learning_rate": 9.03727406777552e-06, "loss": 12.504, "step": 140380 }, { "epoch": 0.283596682248088, "grad_norm": 374.5704345703125, "learning_rate": 9.037068133721396e-06, "loss": 20.6084, "step": 140390 }, { "epoch": 0.2836168828807718, "grad_norm": 11.423043251037598, "learning_rate": 9.036862179991092e-06, "loss": 5.9057, "step": 140400 }, { "epoch": 0.2836370835134556, "grad_norm": 318.82440185546875, "learning_rate": 9.036656206585612e-06, "loss": 11.9414, "step": 140410 }, { "epoch": 0.28365728414613944, "grad_norm": 429.75701904296875, "learning_rate": 9.036450213505958e-06, "loss": 31.7792, "step": 140420 }, { "epoch": 0.28367748477882326, "grad_norm": 407.0364990234375, "learning_rate": 9.036244200753136e-06, "loss": 16.1606, "step": 140430 }, { "epoch": 0.2836976854115071, "grad_norm": 158.80502319335938, "learning_rate": 9.036038168328149e-06, "loss": 19.947, "step": 140440 }, { "epoch": 0.2837178860441909, "grad_norm": 171.84120178222656, "learning_rate": 9.035832116232002e-06, "loss": 24.081, "step": 140450 }, { "epoch": 0.2837380866768747, "grad_norm": 8.986970901489258, "learning_rate": 9.035626044465699e-06, "loss": 19.8636, "step": 140460 }, { "epoch": 0.28375828730955854, "grad_norm": 149.68106079101562, "learning_rate": 9.035419953030244e-06, "loss": 16.7315, "step": 140470 }, { "epoch": 0.28377848794224236, "grad_norm": 76.43167114257812, "learning_rate": 9.03521384192664e-06, "loss": 15.3786, "step": 140480 }, { "epoch": 0.2837986885749262, "grad_norm": 326.3348083496094, "learning_rate": 9.035007711155894e-06, "loss": 19.666, "step": 140490 }, { "epoch": 0.28381888920761, "grad_norm": 804.619384765625, "learning_rate": 9.03480156071901e-06, "loss": 21.2592, "step": 140500 }, { "epoch": 0.2838390898402938, "grad_norm": 215.4338836669922, "learning_rate": 9.034595390616993e-06, "loss": 16.6904, "step": 140510 }, { "epoch": 0.2838592904729776, "grad_norm": 154.030517578125, "learning_rate": 9.034389200850847e-06, "loss": 30.9999, "step": 140520 }, { "epoch": 0.2838794911056614, "grad_norm": 101.14247131347656, "learning_rate": 9.034182991421578e-06, "loss": 16.4346, "step": 140530 }, { "epoch": 0.28389969173834523, "grad_norm": 555.1714477539062, "learning_rate": 9.033976762330189e-06, "loss": 20.483, "step": 140540 }, { "epoch": 0.28391989237102905, "grad_norm": 245.01795959472656, "learning_rate": 9.033770513577688e-06, "loss": 17.8545, "step": 140550 }, { "epoch": 0.28394009300371287, "grad_norm": 27.286895751953125, "learning_rate": 9.033564245165077e-06, "loss": 16.5523, "step": 140560 }, { "epoch": 0.2839602936363967, "grad_norm": 312.7022705078125, "learning_rate": 9.033357957093366e-06, "loss": 26.827, "step": 140570 }, { "epoch": 0.2839804942690805, "grad_norm": 259.9972229003906, "learning_rate": 9.033151649363555e-06, "loss": 28.199, "step": 140580 }, { "epoch": 0.28400069490176433, "grad_norm": 147.1996307373047, "learning_rate": 9.032945321976652e-06, "loss": 13.6206, "step": 140590 }, { "epoch": 0.28402089553444815, "grad_norm": 480.2930603027344, "learning_rate": 9.032738974933663e-06, "loss": 34.9674, "step": 140600 }, { "epoch": 0.28404109616713197, "grad_norm": 89.59274291992188, "learning_rate": 9.032532608235594e-06, "loss": 27.5447, "step": 140610 }, { "epoch": 0.2840612967998158, "grad_norm": 389.9999084472656, "learning_rate": 9.03232622188345e-06, "loss": 20.7359, "step": 140620 }, { "epoch": 0.2840814974324996, "grad_norm": 314.1833190917969, "learning_rate": 9.032119815878237e-06, "loss": 26.4948, "step": 140630 }, { "epoch": 0.28410169806518343, "grad_norm": 449.2327880859375, "learning_rate": 9.03191339022096e-06, "loss": 15.6301, "step": 140640 }, { "epoch": 0.2841218986978672, "grad_norm": 299.1946105957031, "learning_rate": 9.031706944912627e-06, "loss": 20.4681, "step": 140650 }, { "epoch": 0.284142099330551, "grad_norm": 200.0046844482422, "learning_rate": 9.031500479954243e-06, "loss": 15.6886, "step": 140660 }, { "epoch": 0.28416229996323483, "grad_norm": 394.6208190917969, "learning_rate": 9.031293995346814e-06, "loss": 25.8716, "step": 140670 }, { "epoch": 0.28418250059591865, "grad_norm": 185.50416564941406, "learning_rate": 9.03108749109135e-06, "loss": 20.6327, "step": 140680 }, { "epoch": 0.2842027012286025, "grad_norm": 315.96368408203125, "learning_rate": 9.030880967188852e-06, "loss": 14.5932, "step": 140690 }, { "epoch": 0.2842229018612863, "grad_norm": 365.7945556640625, "learning_rate": 9.03067442364033e-06, "loss": 25.3769, "step": 140700 }, { "epoch": 0.2842431024939701, "grad_norm": 130.28280639648438, "learning_rate": 9.030467860446789e-06, "loss": 13.9587, "step": 140710 }, { "epoch": 0.28426330312665393, "grad_norm": 444.1241760253906, "learning_rate": 9.030261277609235e-06, "loss": 23.6818, "step": 140720 }, { "epoch": 0.28428350375933775, "grad_norm": 355.99615478515625, "learning_rate": 9.030054675128679e-06, "loss": 49.7361, "step": 140730 }, { "epoch": 0.2843037043920216, "grad_norm": 393.1194763183594, "learning_rate": 9.029848053006125e-06, "loss": 27.3265, "step": 140740 }, { "epoch": 0.2843239050247054, "grad_norm": 203.14012145996094, "learning_rate": 9.02964141124258e-06, "loss": 18.6201, "step": 140750 }, { "epoch": 0.2843441056573892, "grad_norm": 370.2939147949219, "learning_rate": 9.02943474983905e-06, "loss": 15.525, "step": 140760 }, { "epoch": 0.28436430629007303, "grad_norm": 627.5787963867188, "learning_rate": 9.029228068796546e-06, "loss": 41.0849, "step": 140770 }, { "epoch": 0.2843845069227568, "grad_norm": 364.14715576171875, "learning_rate": 9.029021368116072e-06, "loss": 17.867, "step": 140780 }, { "epoch": 0.2844047075554406, "grad_norm": 357.46270751953125, "learning_rate": 9.028814647798635e-06, "loss": 16.9793, "step": 140790 }, { "epoch": 0.28442490818812444, "grad_norm": 398.1582336425781, "learning_rate": 9.028607907845247e-06, "loss": 28.4501, "step": 140800 }, { "epoch": 0.28444510882080826, "grad_norm": 306.7930908203125, "learning_rate": 9.028401148256911e-06, "loss": 21.5827, "step": 140810 }, { "epoch": 0.2844653094534921, "grad_norm": 173.01214599609375, "learning_rate": 9.028194369034638e-06, "loss": 16.8538, "step": 140820 }, { "epoch": 0.2844855100861759, "grad_norm": 250.69383239746094, "learning_rate": 9.027987570179432e-06, "loss": 11.2385, "step": 140830 }, { "epoch": 0.2845057107188597, "grad_norm": 170.19235229492188, "learning_rate": 9.027780751692303e-06, "loss": 13.2484, "step": 140840 }, { "epoch": 0.28452591135154354, "grad_norm": 364.15545654296875, "learning_rate": 9.02757391357426e-06, "loss": 17.1323, "step": 140850 }, { "epoch": 0.28454611198422736, "grad_norm": 203.78060913085938, "learning_rate": 9.027367055826311e-06, "loss": 23.9272, "step": 140860 }, { "epoch": 0.2845663126169112, "grad_norm": 346.6031188964844, "learning_rate": 9.027160178449464e-06, "loss": 14.4492, "step": 140870 }, { "epoch": 0.284586513249595, "grad_norm": 79.835693359375, "learning_rate": 9.026953281444725e-06, "loss": 28.9439, "step": 140880 }, { "epoch": 0.2846067138822788, "grad_norm": 280.8673095703125, "learning_rate": 9.026746364813105e-06, "loss": 14.4248, "step": 140890 }, { "epoch": 0.2846269145149626, "grad_norm": 266.841552734375, "learning_rate": 9.026539428555609e-06, "loss": 22.8557, "step": 140900 }, { "epoch": 0.2846471151476464, "grad_norm": 611.882080078125, "learning_rate": 9.026332472673251e-06, "loss": 26.3559, "step": 140910 }, { "epoch": 0.2846673157803302, "grad_norm": 256.49090576171875, "learning_rate": 9.026125497167037e-06, "loss": 16.0193, "step": 140920 }, { "epoch": 0.28468751641301404, "grad_norm": 424.8988037109375, "learning_rate": 9.025918502037975e-06, "loss": 10.8423, "step": 140930 }, { "epoch": 0.28470771704569786, "grad_norm": 608.3603515625, "learning_rate": 9.025711487287074e-06, "loss": 43.9637, "step": 140940 }, { "epoch": 0.2847279176783817, "grad_norm": 156.2509002685547, "learning_rate": 9.025504452915345e-06, "loss": 15.1723, "step": 140950 }, { "epoch": 0.2847481183110655, "grad_norm": 146.2592315673828, "learning_rate": 9.025297398923794e-06, "loss": 17.5449, "step": 140960 }, { "epoch": 0.2847683189437493, "grad_norm": 385.2055358886719, "learning_rate": 9.025090325313432e-06, "loss": 25.3146, "step": 140970 }, { "epoch": 0.28478851957643314, "grad_norm": 125.30479431152344, "learning_rate": 9.024883232085268e-06, "loss": 30.4377, "step": 140980 }, { "epoch": 0.28480872020911696, "grad_norm": 367.092041015625, "learning_rate": 9.024676119240312e-06, "loss": 20.6821, "step": 140990 }, { "epoch": 0.2848289208418008, "grad_norm": 113.59981536865234, "learning_rate": 9.02446898677957e-06, "loss": 22.9161, "step": 141000 }, { "epoch": 0.2848491214744846, "grad_norm": 151.55776977539062, "learning_rate": 9.024261834704058e-06, "loss": 21.5864, "step": 141010 }, { "epoch": 0.2848693221071684, "grad_norm": 199.47511291503906, "learning_rate": 9.02405466301478e-06, "loss": 27.0504, "step": 141020 }, { "epoch": 0.2848895227398522, "grad_norm": 363.76708984375, "learning_rate": 9.023847471712748e-06, "loss": 26.7922, "step": 141030 }, { "epoch": 0.284909723372536, "grad_norm": 410.7823486328125, "learning_rate": 9.023640260798972e-06, "loss": 30.7538, "step": 141040 }, { "epoch": 0.2849299240052198, "grad_norm": 475.0013732910156, "learning_rate": 9.02343303027446e-06, "loss": 33.4878, "step": 141050 }, { "epoch": 0.28495012463790365, "grad_norm": 135.72666931152344, "learning_rate": 9.023225780140223e-06, "loss": 20.4529, "step": 141060 }, { "epoch": 0.28497032527058747, "grad_norm": 1203.716064453125, "learning_rate": 9.023018510397274e-06, "loss": 34.3725, "step": 141070 }, { "epoch": 0.2849905259032713, "grad_norm": 412.8079833984375, "learning_rate": 9.022811221046618e-06, "loss": 15.0922, "step": 141080 }, { "epoch": 0.2850107265359551, "grad_norm": 223.48251342773438, "learning_rate": 9.02260391208927e-06, "loss": 34.4538, "step": 141090 }, { "epoch": 0.2850309271686389, "grad_norm": 454.08050537109375, "learning_rate": 9.022396583526238e-06, "loss": 30.6837, "step": 141100 }, { "epoch": 0.28505112780132275, "grad_norm": 12.783651351928711, "learning_rate": 9.022189235358533e-06, "loss": 14.0184, "step": 141110 }, { "epoch": 0.28507132843400657, "grad_norm": 494.2214050292969, "learning_rate": 9.021981867587165e-06, "loss": 26.9542, "step": 141120 }, { "epoch": 0.2850915290666904, "grad_norm": 342.78515625, "learning_rate": 9.021774480213145e-06, "loss": 12.8758, "step": 141130 }, { "epoch": 0.2851117296993742, "grad_norm": 401.7846984863281, "learning_rate": 9.021567073237486e-06, "loss": 13.3258, "step": 141140 }, { "epoch": 0.285131930332058, "grad_norm": 353.2247619628906, "learning_rate": 9.021359646661194e-06, "loss": 9.4073, "step": 141150 }, { "epoch": 0.2851521309647418, "grad_norm": 308.80670166015625, "learning_rate": 9.021152200485283e-06, "loss": 17.5954, "step": 141160 }, { "epoch": 0.2851723315974256, "grad_norm": 338.4553527832031, "learning_rate": 9.020944734710767e-06, "loss": 21.9156, "step": 141170 }, { "epoch": 0.28519253223010943, "grad_norm": 45.094173431396484, "learning_rate": 9.02073724933865e-06, "loss": 15.7095, "step": 141180 }, { "epoch": 0.28521273286279325, "grad_norm": 130.52027893066406, "learning_rate": 9.02052974436995e-06, "loss": 18.9498, "step": 141190 }, { "epoch": 0.28523293349547707, "grad_norm": 371.56109619140625, "learning_rate": 9.020322219805674e-06, "loss": 25.2857, "step": 141200 }, { "epoch": 0.2852531341281609, "grad_norm": 578.0428466796875, "learning_rate": 9.020114675646835e-06, "loss": 30.4654, "step": 141210 }, { "epoch": 0.2852733347608447, "grad_norm": 405.0187683105469, "learning_rate": 9.019907111894447e-06, "loss": 29.6807, "step": 141220 }, { "epoch": 0.28529353539352853, "grad_norm": 248.7142333984375, "learning_rate": 9.019699528549518e-06, "loss": 31.5952, "step": 141230 }, { "epoch": 0.28531373602621235, "grad_norm": 241.71981811523438, "learning_rate": 9.01949192561306e-06, "loss": 17.3366, "step": 141240 }, { "epoch": 0.28533393665889617, "grad_norm": 502.1325378417969, "learning_rate": 9.019284303086086e-06, "loss": 13.8446, "step": 141250 }, { "epoch": 0.28535413729158, "grad_norm": 983.9159545898438, "learning_rate": 9.01907666096961e-06, "loss": 35.2028, "step": 141260 }, { "epoch": 0.2853743379242638, "grad_norm": 298.1748046875, "learning_rate": 9.018868999264641e-06, "loss": 24.9032, "step": 141270 }, { "epoch": 0.28539453855694763, "grad_norm": 354.8809509277344, "learning_rate": 9.018661317972191e-06, "loss": 12.2818, "step": 141280 }, { "epoch": 0.2854147391896314, "grad_norm": 407.07098388671875, "learning_rate": 9.018453617093273e-06, "loss": 45.0947, "step": 141290 }, { "epoch": 0.2854349398223152, "grad_norm": 466.182861328125, "learning_rate": 9.0182458966289e-06, "loss": 18.2482, "step": 141300 }, { "epoch": 0.28545514045499903, "grad_norm": 208.51429748535156, "learning_rate": 9.018038156580084e-06, "loss": 18.6227, "step": 141310 }, { "epoch": 0.28547534108768285, "grad_norm": 328.8429260253906, "learning_rate": 9.017830396947838e-06, "loss": 15.9781, "step": 141320 }, { "epoch": 0.2854955417203667, "grad_norm": 241.0856475830078, "learning_rate": 9.017622617733173e-06, "loss": 12.6501, "step": 141330 }, { "epoch": 0.2855157423530505, "grad_norm": 107.72103881835938, "learning_rate": 9.017414818937101e-06, "loss": 41.076, "step": 141340 }, { "epoch": 0.2855359429857343, "grad_norm": 227.35279846191406, "learning_rate": 9.017207000560639e-06, "loss": 15.0533, "step": 141350 }, { "epoch": 0.28555614361841813, "grad_norm": 1324.3818359375, "learning_rate": 9.016999162604795e-06, "loss": 24.6875, "step": 141360 }, { "epoch": 0.28557634425110195, "grad_norm": 421.4273376464844, "learning_rate": 9.016791305070587e-06, "loss": 22.3203, "step": 141370 }, { "epoch": 0.2855965448837858, "grad_norm": 175.57167053222656, "learning_rate": 9.016583427959025e-06, "loss": 14.8831, "step": 141380 }, { "epoch": 0.2856167455164696, "grad_norm": 247.7632598876953, "learning_rate": 9.01637553127112e-06, "loss": 16.6317, "step": 141390 }, { "epoch": 0.2856369461491534, "grad_norm": 266.7445373535156, "learning_rate": 9.01616761500789e-06, "loss": 19.3348, "step": 141400 }, { "epoch": 0.28565714678183723, "grad_norm": 151.6771240234375, "learning_rate": 9.015959679170346e-06, "loss": 18.4316, "step": 141410 }, { "epoch": 0.285677347414521, "grad_norm": 931.7093505859375, "learning_rate": 9.015751723759501e-06, "loss": 24.8953, "step": 141420 }, { "epoch": 0.2856975480472048, "grad_norm": 447.4075622558594, "learning_rate": 9.01554374877637e-06, "loss": 18.8726, "step": 141430 }, { "epoch": 0.28571774867988864, "grad_norm": 54.68821334838867, "learning_rate": 9.015335754221964e-06, "loss": 18.3921, "step": 141440 }, { "epoch": 0.28573794931257246, "grad_norm": 135.9447784423828, "learning_rate": 9.015127740097301e-06, "loss": 15.4152, "step": 141450 }, { "epoch": 0.2857581499452563, "grad_norm": 234.57012939453125, "learning_rate": 9.01491970640339e-06, "loss": 18.5707, "step": 141460 }, { "epoch": 0.2857783505779401, "grad_norm": 452.7605285644531, "learning_rate": 9.014711653141248e-06, "loss": 25.2169, "step": 141470 }, { "epoch": 0.2857985512106239, "grad_norm": 267.8083801269531, "learning_rate": 9.014503580311889e-06, "loss": 17.0145, "step": 141480 }, { "epoch": 0.28581875184330774, "grad_norm": 195.14837646484375, "learning_rate": 9.014295487916325e-06, "loss": 24.5082, "step": 141490 }, { "epoch": 0.28583895247599156, "grad_norm": 3.171558380126953, "learning_rate": 9.014087375955574e-06, "loss": 24.635, "step": 141500 }, { "epoch": 0.2858591531086754, "grad_norm": 508.4595947265625, "learning_rate": 9.013879244430645e-06, "loss": 28.9757, "step": 141510 }, { "epoch": 0.2858793537413592, "grad_norm": 352.23602294921875, "learning_rate": 9.013671093342557e-06, "loss": 19.9519, "step": 141520 }, { "epoch": 0.285899554374043, "grad_norm": 493.6762390136719, "learning_rate": 9.013462922692324e-06, "loss": 30.5766, "step": 141530 }, { "epoch": 0.2859197550067268, "grad_norm": 390.53070068359375, "learning_rate": 9.013254732480958e-06, "loss": 21.569, "step": 141540 }, { "epoch": 0.2859399556394106, "grad_norm": 472.6861267089844, "learning_rate": 9.013046522709477e-06, "loss": 30.9316, "step": 141550 }, { "epoch": 0.2859601562720944, "grad_norm": 258.30560302734375, "learning_rate": 9.01283829337889e-06, "loss": 62.454, "step": 141560 }, { "epoch": 0.28598035690477824, "grad_norm": 167.68939208984375, "learning_rate": 9.01263004449022e-06, "loss": 25.8255, "step": 141570 }, { "epoch": 0.28600055753746206, "grad_norm": 197.43112182617188, "learning_rate": 9.012421776044477e-06, "loss": 24.2638, "step": 141580 }, { "epoch": 0.2860207581701459, "grad_norm": 581.0361938476562, "learning_rate": 9.012213488042677e-06, "loss": 25.1485, "step": 141590 }, { "epoch": 0.2860409588028297, "grad_norm": 143.40953063964844, "learning_rate": 9.012005180485834e-06, "loss": 18.1913, "step": 141600 }, { "epoch": 0.2860611594355135, "grad_norm": 413.9088134765625, "learning_rate": 9.011796853374964e-06, "loss": 30.8475, "step": 141610 }, { "epoch": 0.28608136006819734, "grad_norm": 225.6832275390625, "learning_rate": 9.011588506711085e-06, "loss": 31.5053, "step": 141620 }, { "epoch": 0.28610156070088116, "grad_norm": 473.97686767578125, "learning_rate": 9.011380140495207e-06, "loss": 27.474, "step": 141630 }, { "epoch": 0.286121761333565, "grad_norm": 632.804443359375, "learning_rate": 9.01117175472835e-06, "loss": 21.5622, "step": 141640 }, { "epoch": 0.2861419619662488, "grad_norm": 282.90093994140625, "learning_rate": 9.010963349411529e-06, "loss": 26.161, "step": 141650 }, { "epoch": 0.2861621625989326, "grad_norm": 103.61241912841797, "learning_rate": 9.01075492454576e-06, "loss": 14.309, "step": 141660 }, { "epoch": 0.2861823632316164, "grad_norm": 234.17840576171875, "learning_rate": 9.010546480132055e-06, "loss": 19.6246, "step": 141670 }, { "epoch": 0.2862025638643002, "grad_norm": 138.0635986328125, "learning_rate": 9.010338016171434e-06, "loss": 10.974, "step": 141680 }, { "epoch": 0.28622276449698403, "grad_norm": 311.1000061035156, "learning_rate": 9.010129532664914e-06, "loss": 25.4478, "step": 141690 }, { "epoch": 0.28624296512966785, "grad_norm": 320.9464111328125, "learning_rate": 9.009921029613506e-06, "loss": 15.8842, "step": 141700 }, { "epoch": 0.28626316576235167, "grad_norm": 139.32205200195312, "learning_rate": 9.00971250701823e-06, "loss": 36.9568, "step": 141710 }, { "epoch": 0.2862833663950355, "grad_norm": 403.73907470703125, "learning_rate": 9.009503964880105e-06, "loss": 19.5641, "step": 141720 }, { "epoch": 0.2863035670277193, "grad_norm": 114.88396453857422, "learning_rate": 9.00929540320014e-06, "loss": 14.866, "step": 141730 }, { "epoch": 0.28632376766040313, "grad_norm": 222.93157958984375, "learning_rate": 9.009086821979358e-06, "loss": 14.4522, "step": 141740 }, { "epoch": 0.28634396829308695, "grad_norm": 76.21431732177734, "learning_rate": 9.00887822121877e-06, "loss": 12.0521, "step": 141750 }, { "epoch": 0.28636416892577077, "grad_norm": 209.23675537109375, "learning_rate": 9.008669600919399e-06, "loss": 14.3898, "step": 141760 }, { "epoch": 0.2863843695584546, "grad_norm": 329.356201171875, "learning_rate": 9.008460961082257e-06, "loss": 23.5066, "step": 141770 }, { "epoch": 0.2864045701911384, "grad_norm": 275.9917907714844, "learning_rate": 9.008252301708362e-06, "loss": 13.0626, "step": 141780 }, { "epoch": 0.28642477082382223, "grad_norm": 390.1838073730469, "learning_rate": 9.008043622798732e-06, "loss": 31.6261, "step": 141790 }, { "epoch": 0.286444971456506, "grad_norm": 333.90936279296875, "learning_rate": 9.007834924354384e-06, "loss": 24.2425, "step": 141800 }, { "epoch": 0.2864651720891898, "grad_norm": 523.2058715820312, "learning_rate": 9.007626206376335e-06, "loss": 21.5459, "step": 141810 }, { "epoch": 0.28648537272187363, "grad_norm": 467.26763916015625, "learning_rate": 9.0074174688656e-06, "loss": 18.9809, "step": 141820 }, { "epoch": 0.28650557335455745, "grad_norm": 100.12918090820312, "learning_rate": 9.007208711823198e-06, "loss": 27.8868, "step": 141830 }, { "epoch": 0.2865257739872413, "grad_norm": 454.9781799316406, "learning_rate": 9.006999935250149e-06, "loss": 15.2749, "step": 141840 }, { "epoch": 0.2865459746199251, "grad_norm": 378.4798889160156, "learning_rate": 9.006791139147468e-06, "loss": 24.8719, "step": 141850 }, { "epoch": 0.2865661752526089, "grad_norm": 515.1113891601562, "learning_rate": 9.006582323516172e-06, "loss": 27.1788, "step": 141860 }, { "epoch": 0.28658637588529273, "grad_norm": 247.47012329101562, "learning_rate": 9.006373488357281e-06, "loss": 30.9123, "step": 141870 }, { "epoch": 0.28660657651797655, "grad_norm": 248.7869873046875, "learning_rate": 9.00616463367181e-06, "loss": 19.2625, "step": 141880 }, { "epoch": 0.2866267771506604, "grad_norm": 0.0, "learning_rate": 9.005955759460779e-06, "loss": 19.6304, "step": 141890 }, { "epoch": 0.2866469777833442, "grad_norm": 261.75042724609375, "learning_rate": 9.005746865725206e-06, "loss": 14.2078, "step": 141900 }, { "epoch": 0.286667178416028, "grad_norm": 300.4846496582031, "learning_rate": 9.005537952466108e-06, "loss": 41.9773, "step": 141910 }, { "epoch": 0.28668737904871183, "grad_norm": 186.48345947265625, "learning_rate": 9.005329019684503e-06, "loss": 33.8765, "step": 141920 }, { "epoch": 0.2867075796813956, "grad_norm": 223.7041015625, "learning_rate": 9.005120067381413e-06, "loss": 22.8765, "step": 141930 }, { "epoch": 0.2867277803140794, "grad_norm": 333.8845520019531, "learning_rate": 9.004911095557852e-06, "loss": 26.2414, "step": 141940 }, { "epoch": 0.28674798094676324, "grad_norm": 164.15724182128906, "learning_rate": 9.00470210421484e-06, "loss": 38.9224, "step": 141950 }, { "epoch": 0.28676818157944706, "grad_norm": 5.7766313552856445, "learning_rate": 9.004493093353394e-06, "loss": 32.6894, "step": 141960 }, { "epoch": 0.2867883822121309, "grad_norm": 167.23524475097656, "learning_rate": 9.004284062974537e-06, "loss": 21.8657, "step": 141970 }, { "epoch": 0.2868085828448147, "grad_norm": 82.63095092773438, "learning_rate": 9.004075013079284e-06, "loss": 20.3065, "step": 141980 }, { "epoch": 0.2868287834774985, "grad_norm": 270.2087707519531, "learning_rate": 9.003865943668656e-06, "loss": 17.4723, "step": 141990 }, { "epoch": 0.28684898411018234, "grad_norm": 121.95477294921875, "learning_rate": 9.003656854743667e-06, "loss": 20.4794, "step": 142000 }, { "epoch": 0.28686918474286616, "grad_norm": 255.65139770507812, "learning_rate": 9.003447746305345e-06, "loss": 32.0405, "step": 142010 }, { "epoch": 0.28688938537555, "grad_norm": 222.70799255371094, "learning_rate": 9.003238618354702e-06, "loss": 15.2237, "step": 142020 }, { "epoch": 0.2869095860082338, "grad_norm": 242.5521240234375, "learning_rate": 9.003029470892759e-06, "loss": 25.9057, "step": 142030 }, { "epoch": 0.2869297866409176, "grad_norm": 54.6217155456543, "learning_rate": 9.002820303920537e-06, "loss": 20.9463, "step": 142040 }, { "epoch": 0.28694998727360144, "grad_norm": 265.1227722167969, "learning_rate": 9.002611117439054e-06, "loss": 17.3034, "step": 142050 }, { "epoch": 0.2869701879062852, "grad_norm": 163.49354553222656, "learning_rate": 9.00240191144933e-06, "loss": 19.5808, "step": 142060 }, { "epoch": 0.286990388538969, "grad_norm": 230.9964141845703, "learning_rate": 9.002192685952385e-06, "loss": 19.9088, "step": 142070 }, { "epoch": 0.28701058917165284, "grad_norm": 219.49752807617188, "learning_rate": 9.001983440949236e-06, "loss": 17.4407, "step": 142080 }, { "epoch": 0.28703078980433666, "grad_norm": 70.03234100341797, "learning_rate": 9.001774176440908e-06, "loss": 13.8145, "step": 142090 }, { "epoch": 0.2870509904370205, "grad_norm": 242.63926696777344, "learning_rate": 9.001564892428416e-06, "loss": 31.1689, "step": 142100 }, { "epoch": 0.2870711910697043, "grad_norm": 424.5156555175781, "learning_rate": 9.001355588912784e-06, "loss": 20.5046, "step": 142110 }, { "epoch": 0.2870913917023881, "grad_norm": 546.4880981445312, "learning_rate": 9.001146265895028e-06, "loss": 20.571, "step": 142120 }, { "epoch": 0.28711159233507194, "grad_norm": 389.0870056152344, "learning_rate": 9.000936923376171e-06, "loss": 11.9658, "step": 142130 }, { "epoch": 0.28713179296775576, "grad_norm": 334.2333679199219, "learning_rate": 9.000727561357234e-06, "loss": 15.439, "step": 142140 }, { "epoch": 0.2871519936004396, "grad_norm": 100.16044616699219, "learning_rate": 9.000518179839236e-06, "loss": 14.6407, "step": 142150 }, { "epoch": 0.2871721942331234, "grad_norm": 0.0, "learning_rate": 9.000308778823196e-06, "loss": 13.1375, "step": 142160 }, { "epoch": 0.2871923948658072, "grad_norm": 7.060220718383789, "learning_rate": 9.000099358310137e-06, "loss": 32.3711, "step": 142170 }, { "epoch": 0.287212595498491, "grad_norm": 152.0251007080078, "learning_rate": 8.99988991830108e-06, "loss": 17.3691, "step": 142180 }, { "epoch": 0.2872327961311748, "grad_norm": 349.78045654296875, "learning_rate": 8.999680458797042e-06, "loss": 21.2601, "step": 142190 }, { "epoch": 0.2872529967638586, "grad_norm": 372.7937927246094, "learning_rate": 8.999470979799048e-06, "loss": 23.323, "step": 142200 }, { "epoch": 0.28727319739654245, "grad_norm": 16.4593448638916, "learning_rate": 8.999261481308117e-06, "loss": 23.4685, "step": 142210 }, { "epoch": 0.28729339802922627, "grad_norm": 100.79811096191406, "learning_rate": 8.999051963325271e-06, "loss": 12.9688, "step": 142220 }, { "epoch": 0.2873135986619101, "grad_norm": 268.5015563964844, "learning_rate": 8.998842425851531e-06, "loss": 10.8283, "step": 142230 }, { "epoch": 0.2873337992945939, "grad_norm": 129.80455017089844, "learning_rate": 8.998632868887918e-06, "loss": 13.2704, "step": 142240 }, { "epoch": 0.2873539999272777, "grad_norm": 497.9131774902344, "learning_rate": 8.998423292435455e-06, "loss": 30.2268, "step": 142250 }, { "epoch": 0.28737420055996155, "grad_norm": 207.98951721191406, "learning_rate": 8.998213696495159e-06, "loss": 31.4461, "step": 142260 }, { "epoch": 0.28739440119264537, "grad_norm": 419.12628173828125, "learning_rate": 8.998004081068055e-06, "loss": 32.0398, "step": 142270 }, { "epoch": 0.2874146018253292, "grad_norm": 500.40179443359375, "learning_rate": 8.997794446155165e-06, "loss": 17.5461, "step": 142280 }, { "epoch": 0.287434802458013, "grad_norm": 705.307861328125, "learning_rate": 8.997584791757508e-06, "loss": 32.4499, "step": 142290 }, { "epoch": 0.2874550030906968, "grad_norm": 962.0341186523438, "learning_rate": 8.99737511787611e-06, "loss": 39.879, "step": 142300 }, { "epoch": 0.2874752037233806, "grad_norm": 178.57485961914062, "learning_rate": 8.997165424511988e-06, "loss": 27.8422, "step": 142310 }, { "epoch": 0.2874954043560644, "grad_norm": 43.618282318115234, "learning_rate": 8.996955711666168e-06, "loss": 19.5875, "step": 142320 }, { "epoch": 0.28751560498874823, "grad_norm": 354.0745849609375, "learning_rate": 8.996745979339671e-06, "loss": 11.8992, "step": 142330 }, { "epoch": 0.28753580562143205, "grad_norm": 279.5390319824219, "learning_rate": 8.996536227533519e-06, "loss": 16.4089, "step": 142340 }, { "epoch": 0.28755600625411587, "grad_norm": 412.4971923828125, "learning_rate": 8.996326456248732e-06, "loss": 23.0928, "step": 142350 }, { "epoch": 0.2875762068867997, "grad_norm": 221.30128479003906, "learning_rate": 8.996116665486337e-06, "loss": 21.8666, "step": 142360 }, { "epoch": 0.2875964075194835, "grad_norm": 289.2975158691406, "learning_rate": 8.995906855247354e-06, "loss": 23.3042, "step": 142370 }, { "epoch": 0.28761660815216733, "grad_norm": 1089.509033203125, "learning_rate": 8.995697025532803e-06, "loss": 49.8221, "step": 142380 }, { "epoch": 0.28763680878485115, "grad_norm": 413.6571350097656, "learning_rate": 8.995487176343711e-06, "loss": 25.8695, "step": 142390 }, { "epoch": 0.28765700941753497, "grad_norm": 338.92254638671875, "learning_rate": 8.9952773076811e-06, "loss": 15.994, "step": 142400 }, { "epoch": 0.2876772100502188, "grad_norm": 64.98502349853516, "learning_rate": 8.99506741954599e-06, "loss": 18.4254, "step": 142410 }, { "epoch": 0.2876974106829026, "grad_norm": 73.72489929199219, "learning_rate": 8.994857511939408e-06, "loss": 22.3394, "step": 142420 }, { "epoch": 0.28771761131558643, "grad_norm": 405.7519226074219, "learning_rate": 8.994647584862374e-06, "loss": 12.6863, "step": 142430 }, { "epoch": 0.2877378119482702, "grad_norm": 327.0452880859375, "learning_rate": 8.994437638315912e-06, "loss": 27.2186, "step": 142440 }, { "epoch": 0.287758012580954, "grad_norm": 354.12347412109375, "learning_rate": 8.994227672301046e-06, "loss": 12.058, "step": 142450 }, { "epoch": 0.28777821321363783, "grad_norm": 310.8761291503906, "learning_rate": 8.994017686818799e-06, "loss": 20.84, "step": 142460 }, { "epoch": 0.28779841384632165, "grad_norm": 358.1612243652344, "learning_rate": 8.993807681870192e-06, "loss": 24.8094, "step": 142470 }, { "epoch": 0.2878186144790055, "grad_norm": 221.54745483398438, "learning_rate": 8.993597657456252e-06, "loss": 20.6126, "step": 142480 }, { "epoch": 0.2878388151116893, "grad_norm": 302.901611328125, "learning_rate": 8.993387613578003e-06, "loss": 38.5822, "step": 142490 }, { "epoch": 0.2878590157443731, "grad_norm": 412.38262939453125, "learning_rate": 8.993177550236464e-06, "loss": 19.8909, "step": 142500 }, { "epoch": 0.28787921637705693, "grad_norm": 225.31005859375, "learning_rate": 8.992967467432665e-06, "loss": 24.273, "step": 142510 }, { "epoch": 0.28789941700974075, "grad_norm": 1.4786155223846436, "learning_rate": 8.992757365167625e-06, "loss": 20.3751, "step": 142520 }, { "epoch": 0.2879196176424246, "grad_norm": 217.52919006347656, "learning_rate": 8.99254724344237e-06, "loss": 25.5164, "step": 142530 }, { "epoch": 0.2879398182751084, "grad_norm": 134.29603576660156, "learning_rate": 8.992337102257925e-06, "loss": 25.9474, "step": 142540 }, { "epoch": 0.2879600189077922, "grad_norm": 460.1518859863281, "learning_rate": 8.992126941615314e-06, "loss": 41.8711, "step": 142550 }, { "epoch": 0.28798021954047603, "grad_norm": 355.94512939453125, "learning_rate": 8.991916761515557e-06, "loss": 31.2444, "step": 142560 }, { "epoch": 0.2880004201731598, "grad_norm": 279.1015625, "learning_rate": 8.991706561959684e-06, "loss": 16.2945, "step": 142570 }, { "epoch": 0.2880206208058436, "grad_norm": 701.2138671875, "learning_rate": 8.991496342948718e-06, "loss": 19.7335, "step": 142580 }, { "epoch": 0.28804082143852744, "grad_norm": 538.9707641601562, "learning_rate": 8.991286104483682e-06, "loss": 24.6549, "step": 142590 }, { "epoch": 0.28806102207121126, "grad_norm": 229.1966552734375, "learning_rate": 8.991075846565603e-06, "loss": 14.4532, "step": 142600 }, { "epoch": 0.2880812227038951, "grad_norm": 104.9123764038086, "learning_rate": 8.990865569195502e-06, "loss": 25.2677, "step": 142610 }, { "epoch": 0.2881014233365789, "grad_norm": 417.29779052734375, "learning_rate": 8.990655272374409e-06, "loss": 15.7137, "step": 142620 }, { "epoch": 0.2881216239692627, "grad_norm": 248.7758026123047, "learning_rate": 8.990444956103343e-06, "loss": 16.9927, "step": 142630 }, { "epoch": 0.28814182460194654, "grad_norm": 124.53526306152344, "learning_rate": 8.990234620383335e-06, "loss": 21.2252, "step": 142640 }, { "epoch": 0.28816202523463036, "grad_norm": 302.3263854980469, "learning_rate": 8.990024265215405e-06, "loss": 20.8398, "step": 142650 }, { "epoch": 0.2881822258673142, "grad_norm": 269.9779968261719, "learning_rate": 8.989813890600582e-06, "loss": 25.719, "step": 142660 }, { "epoch": 0.288202426499998, "grad_norm": 510.9490966796875, "learning_rate": 8.989603496539891e-06, "loss": 23.5981, "step": 142670 }, { "epoch": 0.2882226271326818, "grad_norm": 146.3780975341797, "learning_rate": 8.989393083034355e-06, "loss": 13.7446, "step": 142680 }, { "epoch": 0.28824282776536564, "grad_norm": 232.1122283935547, "learning_rate": 8.989182650085003e-06, "loss": 23.2108, "step": 142690 }, { "epoch": 0.2882630283980494, "grad_norm": 181.81246948242188, "learning_rate": 8.988972197692857e-06, "loss": 21.4984, "step": 142700 }, { "epoch": 0.2882832290307332, "grad_norm": 431.64666748046875, "learning_rate": 8.988761725858942e-06, "loss": 25.7758, "step": 142710 }, { "epoch": 0.28830342966341704, "grad_norm": 270.7570495605469, "learning_rate": 8.988551234584289e-06, "loss": 23.7964, "step": 142720 }, { "epoch": 0.28832363029610086, "grad_norm": 418.7917785644531, "learning_rate": 8.988340723869921e-06, "loss": 24.3227, "step": 142730 }, { "epoch": 0.2883438309287847, "grad_norm": 157.9159393310547, "learning_rate": 8.988130193716864e-06, "loss": 8.6044, "step": 142740 }, { "epoch": 0.2883640315614685, "grad_norm": 275.6920166015625, "learning_rate": 8.987919644126145e-06, "loss": 18.1579, "step": 142750 }, { "epoch": 0.2883842321941523, "grad_norm": 328.12127685546875, "learning_rate": 8.987709075098786e-06, "loss": 15.0049, "step": 142760 }, { "epoch": 0.28840443282683614, "grad_norm": 684.8626708984375, "learning_rate": 8.98749848663582e-06, "loss": 21.3986, "step": 142770 }, { "epoch": 0.28842463345951996, "grad_norm": 337.878662109375, "learning_rate": 8.987287878738269e-06, "loss": 18.7894, "step": 142780 }, { "epoch": 0.2884448340922038, "grad_norm": 232.59097290039062, "learning_rate": 8.987077251407159e-06, "loss": 26.8815, "step": 142790 }, { "epoch": 0.2884650347248876, "grad_norm": 173.4846954345703, "learning_rate": 8.986866604643518e-06, "loss": 13.3181, "step": 142800 }, { "epoch": 0.2884852353575714, "grad_norm": 266.179931640625, "learning_rate": 8.986655938448373e-06, "loss": 34.7581, "step": 142810 }, { "epoch": 0.2885054359902552, "grad_norm": 347.078369140625, "learning_rate": 8.986445252822752e-06, "loss": 25.1816, "step": 142820 }, { "epoch": 0.288525636622939, "grad_norm": 446.2776184082031, "learning_rate": 8.986234547767681e-06, "loss": 25.6579, "step": 142830 }, { "epoch": 0.28854583725562283, "grad_norm": 186.617431640625, "learning_rate": 8.986023823284184e-06, "loss": 11.642, "step": 142840 }, { "epoch": 0.28856603788830665, "grad_norm": 262.1022644042969, "learning_rate": 8.985813079373293e-06, "loss": 31.6414, "step": 142850 }, { "epoch": 0.28858623852099047, "grad_norm": 398.3401794433594, "learning_rate": 8.98560231603603e-06, "loss": 13.5494, "step": 142860 }, { "epoch": 0.2886064391536743, "grad_norm": 267.8316345214844, "learning_rate": 8.985391533273425e-06, "loss": 18.6413, "step": 142870 }, { "epoch": 0.2886266397863581, "grad_norm": 373.4322509765625, "learning_rate": 8.985180731086505e-06, "loss": 23.4218, "step": 142880 }, { "epoch": 0.28864684041904193, "grad_norm": 294.7499084472656, "learning_rate": 8.984969909476299e-06, "loss": 23.8243, "step": 142890 }, { "epoch": 0.28866704105172575, "grad_norm": 377.3338928222656, "learning_rate": 8.984759068443832e-06, "loss": 33.5512, "step": 142900 }, { "epoch": 0.28868724168440957, "grad_norm": 289.8852233886719, "learning_rate": 8.984548207990133e-06, "loss": 19.0581, "step": 142910 }, { "epoch": 0.2887074423170934, "grad_norm": 0.0, "learning_rate": 8.984337328116228e-06, "loss": 6.3757, "step": 142920 }, { "epoch": 0.2887276429497772, "grad_norm": 202.8689727783203, "learning_rate": 8.984126428823147e-06, "loss": 16.2592, "step": 142930 }, { "epoch": 0.28874784358246103, "grad_norm": 389.8948669433594, "learning_rate": 8.983915510111918e-06, "loss": 19.4771, "step": 142940 }, { "epoch": 0.2887680442151448, "grad_norm": 260.1515808105469, "learning_rate": 8.983704571983568e-06, "loss": 20.914, "step": 142950 }, { "epoch": 0.2887882448478286, "grad_norm": 490.32659912109375, "learning_rate": 8.983493614439123e-06, "loss": 23.438, "step": 142960 }, { "epoch": 0.28880844548051243, "grad_norm": 108.22864532470703, "learning_rate": 8.983282637479613e-06, "loss": 22.7253, "step": 142970 }, { "epoch": 0.28882864611319625, "grad_norm": 360.3310241699219, "learning_rate": 8.983071641106068e-06, "loss": 22.1484, "step": 142980 }, { "epoch": 0.2888488467458801, "grad_norm": 288.5499267578125, "learning_rate": 8.982860625319514e-06, "loss": 11.7796, "step": 142990 }, { "epoch": 0.2888690473785639, "grad_norm": 622.6229858398438, "learning_rate": 8.982649590120982e-06, "loss": 30.4311, "step": 143000 }, { "epoch": 0.2888892480112477, "grad_norm": 541.18212890625, "learning_rate": 8.982438535511498e-06, "loss": 21.2816, "step": 143010 }, { "epoch": 0.28890944864393153, "grad_norm": 438.3682861328125, "learning_rate": 8.982227461492092e-06, "loss": 26.82, "step": 143020 }, { "epoch": 0.28892964927661535, "grad_norm": 675.84326171875, "learning_rate": 8.982016368063793e-06, "loss": 30.7201, "step": 143030 }, { "epoch": 0.2889498499092992, "grad_norm": 140.3732452392578, "learning_rate": 8.981805255227627e-06, "loss": 11.1931, "step": 143040 }, { "epoch": 0.288970050541983, "grad_norm": 268.6198425292969, "learning_rate": 8.981594122984628e-06, "loss": 34.0327, "step": 143050 }, { "epoch": 0.2889902511746668, "grad_norm": 359.8173522949219, "learning_rate": 8.98138297133582e-06, "loss": 20.6166, "step": 143060 }, { "epoch": 0.28901045180735063, "grad_norm": 581.2867431640625, "learning_rate": 8.981171800282233e-06, "loss": 15.7967, "step": 143070 }, { "epoch": 0.2890306524400344, "grad_norm": 179.92727661132812, "learning_rate": 8.9809606098249e-06, "loss": 7.3309, "step": 143080 }, { "epoch": 0.2890508530727182, "grad_norm": 376.8297119140625, "learning_rate": 8.980749399964847e-06, "loss": 14.755, "step": 143090 }, { "epoch": 0.28907105370540204, "grad_norm": 471.8597717285156, "learning_rate": 8.980538170703104e-06, "loss": 21.0345, "step": 143100 }, { "epoch": 0.28909125433808586, "grad_norm": 244.8143768310547, "learning_rate": 8.9803269220407e-06, "loss": 19.3541, "step": 143110 }, { "epoch": 0.2891114549707697, "grad_norm": 177.59385681152344, "learning_rate": 8.980115653978667e-06, "loss": 24.1847, "step": 143120 }, { "epoch": 0.2891316556034535, "grad_norm": 285.9876403808594, "learning_rate": 8.979904366518034e-06, "loss": 15.056, "step": 143130 }, { "epoch": 0.2891518562361373, "grad_norm": 8.585139274597168, "learning_rate": 8.979693059659826e-06, "loss": 40.2796, "step": 143140 }, { "epoch": 0.28917205686882114, "grad_norm": 246.01589965820312, "learning_rate": 8.97948173340508e-06, "loss": 11.2973, "step": 143150 }, { "epoch": 0.28919225750150496, "grad_norm": 186.15383911132812, "learning_rate": 8.97927038775482e-06, "loss": 27.1106, "step": 143160 }, { "epoch": 0.2892124581341888, "grad_norm": 128.91351318359375, "learning_rate": 8.979059022710081e-06, "loss": 19.233, "step": 143170 }, { "epoch": 0.2892326587668726, "grad_norm": 437.2085876464844, "learning_rate": 8.97884763827189e-06, "loss": 26.1224, "step": 143180 }, { "epoch": 0.2892528593995564, "grad_norm": 324.9698486328125, "learning_rate": 8.97863623444128e-06, "loss": 29.8715, "step": 143190 }, { "epoch": 0.28927306003224024, "grad_norm": 299.83026123046875, "learning_rate": 8.978424811219277e-06, "loss": 18.3106, "step": 143200 }, { "epoch": 0.289293260664924, "grad_norm": 111.51203918457031, "learning_rate": 8.978213368606916e-06, "loss": 11.4444, "step": 143210 }, { "epoch": 0.2893134612976078, "grad_norm": 370.8824768066406, "learning_rate": 8.978001906605226e-06, "loss": 12.8017, "step": 143220 }, { "epoch": 0.28933366193029164, "grad_norm": 454.49664306640625, "learning_rate": 8.977790425215234e-06, "loss": 31.8851, "step": 143230 }, { "epoch": 0.28935386256297546, "grad_norm": 201.15708923339844, "learning_rate": 8.977578924437976e-06, "loss": 15.838, "step": 143240 }, { "epoch": 0.2893740631956593, "grad_norm": 471.61962890625, "learning_rate": 8.97736740427448e-06, "loss": 28.7326, "step": 143250 }, { "epoch": 0.2893942638283431, "grad_norm": 424.2200622558594, "learning_rate": 8.977155864725778e-06, "loss": 21.9715, "step": 143260 }, { "epoch": 0.2894144644610269, "grad_norm": 296.57061767578125, "learning_rate": 8.976944305792901e-06, "loss": 15.6878, "step": 143270 }, { "epoch": 0.28943466509371074, "grad_norm": 964.1082763671875, "learning_rate": 8.97673272747688e-06, "loss": 52.5027, "step": 143280 }, { "epoch": 0.28945486572639456, "grad_norm": 168.68328857421875, "learning_rate": 8.976521129778746e-06, "loss": 14.8812, "step": 143290 }, { "epoch": 0.2894750663590784, "grad_norm": 22.143421173095703, "learning_rate": 8.97630951269953e-06, "loss": 16.7231, "step": 143300 }, { "epoch": 0.2894952669917622, "grad_norm": 246.72390747070312, "learning_rate": 8.976097876240263e-06, "loss": 14.8646, "step": 143310 }, { "epoch": 0.289515467624446, "grad_norm": 408.16204833984375, "learning_rate": 8.975886220401978e-06, "loss": 14.8949, "step": 143320 }, { "epoch": 0.2895356682571298, "grad_norm": 295.4291076660156, "learning_rate": 8.975674545185704e-06, "loss": 24.691, "step": 143330 }, { "epoch": 0.2895558688898136, "grad_norm": 180.132568359375, "learning_rate": 8.975462850592476e-06, "loss": 16.0137, "step": 143340 }, { "epoch": 0.2895760695224974, "grad_norm": 12.672704696655273, "learning_rate": 8.975251136623326e-06, "loss": 24.448, "step": 143350 }, { "epoch": 0.28959627015518125, "grad_norm": 489.2466735839844, "learning_rate": 8.975039403279282e-06, "loss": 21.1221, "step": 143360 }, { "epoch": 0.28961647078786507, "grad_norm": 250.6770477294922, "learning_rate": 8.974827650561378e-06, "loss": 20.9651, "step": 143370 }, { "epoch": 0.2896366714205489, "grad_norm": 457.84576416015625, "learning_rate": 8.974615878470646e-06, "loss": 49.4032, "step": 143380 }, { "epoch": 0.2896568720532327, "grad_norm": 113.42047882080078, "learning_rate": 8.97440408700812e-06, "loss": 15.5533, "step": 143390 }, { "epoch": 0.2896770726859165, "grad_norm": 94.4756088256836, "learning_rate": 8.97419227617483e-06, "loss": 18.4424, "step": 143400 }, { "epoch": 0.28969727331860035, "grad_norm": 359.8805847167969, "learning_rate": 8.973980445971806e-06, "loss": 37.3983, "step": 143410 }, { "epoch": 0.28971747395128417, "grad_norm": 311.87640380859375, "learning_rate": 8.973768596400085e-06, "loss": 24.0726, "step": 143420 }, { "epoch": 0.289737674583968, "grad_norm": 43.61752700805664, "learning_rate": 8.973556727460699e-06, "loss": 31.7918, "step": 143430 }, { "epoch": 0.2897578752166518, "grad_norm": 534.201171875, "learning_rate": 8.973344839154678e-06, "loss": 27.2514, "step": 143440 }, { "epoch": 0.2897780758493356, "grad_norm": 254.10614013671875, "learning_rate": 8.973132931483057e-06, "loss": 27.7291, "step": 143450 }, { "epoch": 0.2897982764820194, "grad_norm": 354.6586608886719, "learning_rate": 8.972921004446868e-06, "loss": 19.4252, "step": 143460 }, { "epoch": 0.2898184771147032, "grad_norm": 694.1453857421875, "learning_rate": 8.972709058047145e-06, "loss": 33.2604, "step": 143470 }, { "epoch": 0.28983867774738703, "grad_norm": 200.59271240234375, "learning_rate": 8.972497092284918e-06, "loss": 25.0762, "step": 143480 }, { "epoch": 0.28985887838007085, "grad_norm": 52.49176025390625, "learning_rate": 8.972285107161222e-06, "loss": 18.5365, "step": 143490 }, { "epoch": 0.28987907901275467, "grad_norm": 139.0539093017578, "learning_rate": 8.972073102677091e-06, "loss": 21.7445, "step": 143500 }, { "epoch": 0.2898992796454385, "grad_norm": 148.5225067138672, "learning_rate": 8.971861078833558e-06, "loss": 17.076, "step": 143510 }, { "epoch": 0.2899194802781223, "grad_norm": 57.082611083984375, "learning_rate": 8.971649035631655e-06, "loss": 11.8631, "step": 143520 }, { "epoch": 0.28993968091080613, "grad_norm": 61.617713928222656, "learning_rate": 8.971436973072416e-06, "loss": 16.04, "step": 143530 }, { "epoch": 0.28995988154348995, "grad_norm": 128.83432006835938, "learning_rate": 8.971224891156876e-06, "loss": 24.4777, "step": 143540 }, { "epoch": 0.28998008217617377, "grad_norm": 440.42205810546875, "learning_rate": 8.971012789886066e-06, "loss": 23.0784, "step": 143550 }, { "epoch": 0.2900002828088576, "grad_norm": 310.2643127441406, "learning_rate": 8.970800669261022e-06, "loss": 25.4187, "step": 143560 }, { "epoch": 0.2900204834415414, "grad_norm": 615.584228515625, "learning_rate": 8.970588529282778e-06, "loss": 22.5299, "step": 143570 }, { "epoch": 0.29004068407422523, "grad_norm": 341.51605224609375, "learning_rate": 8.970376369952366e-06, "loss": 15.1671, "step": 143580 }, { "epoch": 0.290060884706909, "grad_norm": 181.3779754638672, "learning_rate": 8.97016419127082e-06, "loss": 21.0462, "step": 143590 }, { "epoch": 0.2900810853395928, "grad_norm": 334.60284423828125, "learning_rate": 8.969951993239177e-06, "loss": 19.0165, "step": 143600 }, { "epoch": 0.29010128597227663, "grad_norm": 485.8629150390625, "learning_rate": 8.96973977585847e-06, "loss": 16.4498, "step": 143610 }, { "epoch": 0.29012148660496045, "grad_norm": 625.33984375, "learning_rate": 8.969527539129732e-06, "loss": 23.5806, "step": 143620 }, { "epoch": 0.2901416872376443, "grad_norm": 237.7928466796875, "learning_rate": 8.969315283053998e-06, "loss": 23.6531, "step": 143630 }, { "epoch": 0.2901618878703281, "grad_norm": 457.45306396484375, "learning_rate": 8.969103007632302e-06, "loss": 15.2166, "step": 143640 }, { "epoch": 0.2901820885030119, "grad_norm": 167.29689025878906, "learning_rate": 8.96889071286568e-06, "loss": 11.6777, "step": 143650 }, { "epoch": 0.29020228913569573, "grad_norm": 324.2578430175781, "learning_rate": 8.968678398755165e-06, "loss": 21.8234, "step": 143660 }, { "epoch": 0.29022248976837955, "grad_norm": 228.46937561035156, "learning_rate": 8.968466065301796e-06, "loss": 12.3996, "step": 143670 }, { "epoch": 0.2902426904010634, "grad_norm": 429.25360107421875, "learning_rate": 8.968253712506602e-06, "loss": 29.3468, "step": 143680 }, { "epoch": 0.2902628910337472, "grad_norm": 357.44146728515625, "learning_rate": 8.968041340370622e-06, "loss": 22.5363, "step": 143690 }, { "epoch": 0.290283091666431, "grad_norm": 1023.4579467773438, "learning_rate": 8.96782894889489e-06, "loss": 32.1246, "step": 143700 }, { "epoch": 0.29030329229911483, "grad_norm": 139.18638610839844, "learning_rate": 8.967616538080438e-06, "loss": 14.4995, "step": 143710 }, { "epoch": 0.2903234929317986, "grad_norm": 194.27210998535156, "learning_rate": 8.967404107928309e-06, "loss": 19.7321, "step": 143720 }, { "epoch": 0.2903436935644824, "grad_norm": 695.085205078125, "learning_rate": 8.96719165843953e-06, "loss": 43.1457, "step": 143730 }, { "epoch": 0.29036389419716624, "grad_norm": 223.62448120117188, "learning_rate": 8.966979189615142e-06, "loss": 16.9288, "step": 143740 }, { "epoch": 0.29038409482985006, "grad_norm": 230.37677001953125, "learning_rate": 8.966766701456177e-06, "loss": 14.321, "step": 143750 }, { "epoch": 0.2904042954625339, "grad_norm": 158.62295532226562, "learning_rate": 8.966554193963673e-06, "loss": 31.9907, "step": 143760 }, { "epoch": 0.2904244960952177, "grad_norm": 248.57119750976562, "learning_rate": 8.966341667138663e-06, "loss": 18.0778, "step": 143770 }, { "epoch": 0.2904446967279015, "grad_norm": 149.7830810546875, "learning_rate": 8.966129120982188e-06, "loss": 17.5667, "step": 143780 }, { "epoch": 0.29046489736058534, "grad_norm": 161.22987365722656, "learning_rate": 8.965916555495278e-06, "loss": 9.0482, "step": 143790 }, { "epoch": 0.29048509799326916, "grad_norm": 238.22177124023438, "learning_rate": 8.965703970678974e-06, "loss": 31.0491, "step": 143800 }, { "epoch": 0.290505298625953, "grad_norm": 544.5739135742188, "learning_rate": 8.965491366534309e-06, "loss": 21.5389, "step": 143810 }, { "epoch": 0.2905254992586368, "grad_norm": 312.32037353515625, "learning_rate": 8.96527874306232e-06, "loss": 12.1812, "step": 143820 }, { "epoch": 0.2905456998913206, "grad_norm": 156.56301879882812, "learning_rate": 8.965066100264042e-06, "loss": 48.1253, "step": 143830 }, { "epoch": 0.29056590052400444, "grad_norm": 106.56568908691406, "learning_rate": 8.964853438140515e-06, "loss": 13.9757, "step": 143840 }, { "epoch": 0.2905861011566882, "grad_norm": 275.17919921875, "learning_rate": 8.96464075669277e-06, "loss": 17.166, "step": 143850 }, { "epoch": 0.290606301789372, "grad_norm": 1.4840850830078125, "learning_rate": 8.96442805592185e-06, "loss": 21.8532, "step": 143860 }, { "epoch": 0.29062650242205584, "grad_norm": 23.287029266357422, "learning_rate": 8.964215335828788e-06, "loss": 19.7968, "step": 143870 }, { "epoch": 0.29064670305473966, "grad_norm": 276.6784973144531, "learning_rate": 8.96400259641462e-06, "loss": 18.9605, "step": 143880 }, { "epoch": 0.2906669036874235, "grad_norm": 472.7251892089844, "learning_rate": 8.963789837680386e-06, "loss": 46.3878, "step": 143890 }, { "epoch": 0.2906871043201073, "grad_norm": 202.76002502441406, "learning_rate": 8.963577059627117e-06, "loss": 16.9341, "step": 143900 }, { "epoch": 0.2907073049527911, "grad_norm": 211.3086395263672, "learning_rate": 8.963364262255859e-06, "loss": 17.6999, "step": 143910 }, { "epoch": 0.29072750558547494, "grad_norm": 272.0254211425781, "learning_rate": 8.963151445567642e-06, "loss": 25.526, "step": 143920 }, { "epoch": 0.29074770621815876, "grad_norm": 179.0928192138672, "learning_rate": 8.962938609563506e-06, "loss": 31.4231, "step": 143930 }, { "epoch": 0.2907679068508426, "grad_norm": 591.6312866210938, "learning_rate": 8.962725754244487e-06, "loss": 33.1127, "step": 143940 }, { "epoch": 0.2907881074835264, "grad_norm": 475.302734375, "learning_rate": 8.962512879611624e-06, "loss": 22.4598, "step": 143950 }, { "epoch": 0.2908083081162102, "grad_norm": 282.9322509765625, "learning_rate": 8.962299985665955e-06, "loss": 9.0329, "step": 143960 }, { "epoch": 0.290828508748894, "grad_norm": 463.3082275390625, "learning_rate": 8.962087072408514e-06, "loss": 22.5913, "step": 143970 }, { "epoch": 0.2908487093815778, "grad_norm": 932.6509399414062, "learning_rate": 8.961874139840342e-06, "loss": 53.8176, "step": 143980 }, { "epoch": 0.29086891001426163, "grad_norm": 0.0, "learning_rate": 8.961661187962477e-06, "loss": 20.4395, "step": 143990 }, { "epoch": 0.29088911064694545, "grad_norm": 131.52659606933594, "learning_rate": 8.961448216775955e-06, "loss": 37.05, "step": 144000 }, { "epoch": 0.29090931127962927, "grad_norm": 308.5359191894531, "learning_rate": 8.961235226281815e-06, "loss": 25.4731, "step": 144010 }, { "epoch": 0.2909295119123131, "grad_norm": 100.31656646728516, "learning_rate": 8.961022216481094e-06, "loss": 17.1688, "step": 144020 }, { "epoch": 0.2909497125449969, "grad_norm": 220.0603485107422, "learning_rate": 8.960809187374833e-06, "loss": 20.5258, "step": 144030 }, { "epoch": 0.29096991317768073, "grad_norm": 352.10302734375, "learning_rate": 8.960596138964065e-06, "loss": 19.4061, "step": 144040 }, { "epoch": 0.29099011381036455, "grad_norm": 266.7361145019531, "learning_rate": 8.960383071249837e-06, "loss": 9.7825, "step": 144050 }, { "epoch": 0.29101031444304837, "grad_norm": 146.65853881835938, "learning_rate": 8.960169984233179e-06, "loss": 19.447, "step": 144060 }, { "epoch": 0.2910305150757322, "grad_norm": 574.3923950195312, "learning_rate": 8.959956877915132e-06, "loss": 15.7223, "step": 144070 }, { "epoch": 0.291050715708416, "grad_norm": 210.13031005859375, "learning_rate": 8.959743752296736e-06, "loss": 16.7297, "step": 144080 }, { "epoch": 0.29107091634109983, "grad_norm": 234.99502563476562, "learning_rate": 8.959530607379032e-06, "loss": 34.4064, "step": 144090 }, { "epoch": 0.2910911169737836, "grad_norm": 506.2070007324219, "learning_rate": 8.959317443163054e-06, "loss": 19.322, "step": 144100 }, { "epoch": 0.2911113176064674, "grad_norm": 166.36073303222656, "learning_rate": 8.959104259649842e-06, "loss": 22.9613, "step": 144110 }, { "epoch": 0.29113151823915123, "grad_norm": 650.4634399414062, "learning_rate": 8.958891056840438e-06, "loss": 48.6002, "step": 144120 }, { "epoch": 0.29115171887183505, "grad_norm": 91.06068420410156, "learning_rate": 8.958677834735879e-06, "loss": 12.8264, "step": 144130 }, { "epoch": 0.2911719195045189, "grad_norm": 232.18426513671875, "learning_rate": 8.958464593337202e-06, "loss": 12.9045, "step": 144140 }, { "epoch": 0.2911921201372027, "grad_norm": 155.57691955566406, "learning_rate": 8.95825133264545e-06, "loss": 25.4359, "step": 144150 }, { "epoch": 0.2912123207698865, "grad_norm": 224.163330078125, "learning_rate": 8.958038052661661e-06, "loss": 8.176, "step": 144160 }, { "epoch": 0.29123252140257033, "grad_norm": 479.7457580566406, "learning_rate": 8.957824753386877e-06, "loss": 27.3457, "step": 144170 }, { "epoch": 0.29125272203525415, "grad_norm": 366.9686584472656, "learning_rate": 8.957611434822133e-06, "loss": 24.8755, "step": 144180 }, { "epoch": 0.291272922667938, "grad_norm": 241.40208435058594, "learning_rate": 8.95739809696847e-06, "loss": 14.7504, "step": 144190 }, { "epoch": 0.2912931233006218, "grad_norm": 608.6564331054688, "learning_rate": 8.957184739826929e-06, "loss": 25.4068, "step": 144200 }, { "epoch": 0.2913133239333056, "grad_norm": 541.532958984375, "learning_rate": 8.95697136339855e-06, "loss": 24.1517, "step": 144210 }, { "epoch": 0.29133352456598943, "grad_norm": 153.1974334716797, "learning_rate": 8.956757967684372e-06, "loss": 15.4505, "step": 144220 }, { "epoch": 0.2913537251986732, "grad_norm": 65.97481536865234, "learning_rate": 8.956544552685437e-06, "loss": 31.7361, "step": 144230 }, { "epoch": 0.291373925831357, "grad_norm": 186.25933837890625, "learning_rate": 8.956331118402784e-06, "loss": 16.2577, "step": 144240 }, { "epoch": 0.29139412646404084, "grad_norm": 36.97776412963867, "learning_rate": 8.956117664837452e-06, "loss": 15.124, "step": 144250 }, { "epoch": 0.29141432709672466, "grad_norm": 239.16183471679688, "learning_rate": 8.955904191990481e-06, "loss": 20.0544, "step": 144260 }, { "epoch": 0.2914345277294085, "grad_norm": 942.4480590820312, "learning_rate": 8.955690699862913e-06, "loss": 23.1715, "step": 144270 }, { "epoch": 0.2914547283620923, "grad_norm": 213.1745147705078, "learning_rate": 8.955477188455791e-06, "loss": 11.0402, "step": 144280 }, { "epoch": 0.2914749289947761, "grad_norm": 286.6073913574219, "learning_rate": 8.95526365777015e-06, "loss": 29.8942, "step": 144290 }, { "epoch": 0.29149512962745994, "grad_norm": 372.10760498046875, "learning_rate": 8.955050107807035e-06, "loss": 41.4, "step": 144300 }, { "epoch": 0.29151533026014376, "grad_norm": 187.10289001464844, "learning_rate": 8.954836538567486e-06, "loss": 15.2397, "step": 144310 }, { "epoch": 0.2915355308928276, "grad_norm": 480.323486328125, "learning_rate": 8.954622950052543e-06, "loss": 32.3639, "step": 144320 }, { "epoch": 0.2915557315255114, "grad_norm": 717.9234008789062, "learning_rate": 8.954409342263246e-06, "loss": 33.0117, "step": 144330 }, { "epoch": 0.2915759321581952, "grad_norm": 301.7745056152344, "learning_rate": 8.95419571520064e-06, "loss": 22.0944, "step": 144340 }, { "epoch": 0.29159613279087904, "grad_norm": 100.29994201660156, "learning_rate": 8.95398206886576e-06, "loss": 19.4359, "step": 144350 }, { "epoch": 0.2916163334235628, "grad_norm": 151.3311004638672, "learning_rate": 8.953768403259655e-06, "loss": 11.7385, "step": 144360 }, { "epoch": 0.2916365340562466, "grad_norm": 145.65113830566406, "learning_rate": 8.95355471838336e-06, "loss": 27.7037, "step": 144370 }, { "epoch": 0.29165673468893044, "grad_norm": 327.35247802734375, "learning_rate": 8.953341014237919e-06, "loss": 36.0446, "step": 144380 }, { "epoch": 0.29167693532161426, "grad_norm": 283.6488037109375, "learning_rate": 8.953127290824374e-06, "loss": 16.4529, "step": 144390 }, { "epoch": 0.2916971359542981, "grad_norm": 471.2793884277344, "learning_rate": 8.952913548143766e-06, "loss": 14.0154, "step": 144400 }, { "epoch": 0.2917173365869819, "grad_norm": 79.45439910888672, "learning_rate": 8.952699786197137e-06, "loss": 21.8296, "step": 144410 }, { "epoch": 0.2917375372196657, "grad_norm": 196.39938354492188, "learning_rate": 8.952486004985527e-06, "loss": 35.8736, "step": 144420 }, { "epoch": 0.29175773785234954, "grad_norm": 235.43710327148438, "learning_rate": 8.95227220450998e-06, "loss": 20.4335, "step": 144430 }, { "epoch": 0.29177793848503336, "grad_norm": 53.816017150878906, "learning_rate": 8.952058384771539e-06, "loss": 12.8523, "step": 144440 }, { "epoch": 0.2917981391177172, "grad_norm": 302.18804931640625, "learning_rate": 8.951844545771244e-06, "loss": 16.3646, "step": 144450 }, { "epoch": 0.291818339750401, "grad_norm": 262.74432373046875, "learning_rate": 8.951630687510137e-06, "loss": 21.8885, "step": 144460 }, { "epoch": 0.2918385403830848, "grad_norm": 235.14071655273438, "learning_rate": 8.951416809989263e-06, "loss": 29.1923, "step": 144470 }, { "epoch": 0.29185874101576864, "grad_norm": 542.9161987304688, "learning_rate": 8.951202913209662e-06, "loss": 22.7497, "step": 144480 }, { "epoch": 0.2918789416484524, "grad_norm": 395.4957275390625, "learning_rate": 8.950988997172378e-06, "loss": 23.6673, "step": 144490 }, { "epoch": 0.2918991422811362, "grad_norm": 320.89654541015625, "learning_rate": 8.950775061878453e-06, "loss": 26.3467, "step": 144500 }, { "epoch": 0.29191934291382005, "grad_norm": 206.4673614501953, "learning_rate": 8.950561107328927e-06, "loss": 37.094, "step": 144510 }, { "epoch": 0.29193954354650387, "grad_norm": 183.28553771972656, "learning_rate": 8.950347133524849e-06, "loss": 22.4136, "step": 144520 }, { "epoch": 0.2919597441791877, "grad_norm": 59.2383918762207, "learning_rate": 8.950133140467256e-06, "loss": 16.9066, "step": 144530 }, { "epoch": 0.2919799448118715, "grad_norm": 273.9954833984375, "learning_rate": 8.949919128157194e-06, "loss": 20.3127, "step": 144540 }, { "epoch": 0.2920001454445553, "grad_norm": 606.9722290039062, "learning_rate": 8.949705096595704e-06, "loss": 25.9503, "step": 144550 }, { "epoch": 0.29202034607723915, "grad_norm": 312.2330017089844, "learning_rate": 8.94949104578383e-06, "loss": 13.1705, "step": 144560 }, { "epoch": 0.29204054670992297, "grad_norm": 331.6214294433594, "learning_rate": 8.949276975722617e-06, "loss": 20.0036, "step": 144570 }, { "epoch": 0.2920607473426068, "grad_norm": 76.7727279663086, "learning_rate": 8.949062886413106e-06, "loss": 32.9433, "step": 144580 }, { "epoch": 0.2920809479752906, "grad_norm": 463.36016845703125, "learning_rate": 8.948848777856342e-06, "loss": 14.8681, "step": 144590 }, { "epoch": 0.2921011486079744, "grad_norm": 228.2914581298828, "learning_rate": 8.94863465005337e-06, "loss": 20.8481, "step": 144600 }, { "epoch": 0.2921213492406582, "grad_norm": 419.5574035644531, "learning_rate": 8.948420503005229e-06, "loss": 33.6238, "step": 144610 }, { "epoch": 0.292141549873342, "grad_norm": 677.09130859375, "learning_rate": 8.948206336712966e-06, "loss": 32.489, "step": 144620 }, { "epoch": 0.29216175050602583, "grad_norm": 433.6905212402344, "learning_rate": 8.947992151177625e-06, "loss": 22.0625, "step": 144630 }, { "epoch": 0.29218195113870965, "grad_norm": 228.10525512695312, "learning_rate": 8.947777946400247e-06, "loss": 12.1337, "step": 144640 }, { "epoch": 0.29220215177139347, "grad_norm": 190.26409912109375, "learning_rate": 8.94756372238188e-06, "loss": 29.9115, "step": 144650 }, { "epoch": 0.2922223524040773, "grad_norm": 128.02499389648438, "learning_rate": 8.947349479123565e-06, "loss": 34.7976, "step": 144660 }, { "epoch": 0.2922425530367611, "grad_norm": 227.24810791015625, "learning_rate": 8.947135216626349e-06, "loss": 20.9426, "step": 144670 }, { "epoch": 0.29226275366944493, "grad_norm": 330.3255615234375, "learning_rate": 8.946920934891274e-06, "loss": 21.3316, "step": 144680 }, { "epoch": 0.29228295430212875, "grad_norm": 645.0468139648438, "learning_rate": 8.946706633919385e-06, "loss": 21.7997, "step": 144690 }, { "epoch": 0.29230315493481257, "grad_norm": 376.58428955078125, "learning_rate": 8.946492313711725e-06, "loss": 37.2681, "step": 144700 }, { "epoch": 0.2923233555674964, "grad_norm": 384.9559631347656, "learning_rate": 8.946277974269342e-06, "loss": 28.5574, "step": 144710 }, { "epoch": 0.2923435562001802, "grad_norm": 415.54254150390625, "learning_rate": 8.94606361559328e-06, "loss": 23.235, "step": 144720 }, { "epoch": 0.29236375683286403, "grad_norm": 383.546142578125, "learning_rate": 8.945849237684578e-06, "loss": 20.5947, "step": 144730 }, { "epoch": 0.2923839574655478, "grad_norm": 720.77880859375, "learning_rate": 8.94563484054429e-06, "loss": 23.144, "step": 144740 }, { "epoch": 0.2924041580982316, "grad_norm": 175.7727508544922, "learning_rate": 8.945420424173455e-06, "loss": 18.8282, "step": 144750 }, { "epoch": 0.29242435873091543, "grad_norm": 157.23031616210938, "learning_rate": 8.945205988573117e-06, "loss": 24.5665, "step": 144760 }, { "epoch": 0.29244455936359925, "grad_norm": 137.41220092773438, "learning_rate": 8.944991533744327e-06, "loss": 10.6625, "step": 144770 }, { "epoch": 0.2924647599962831, "grad_norm": 344.8930358886719, "learning_rate": 8.944777059688125e-06, "loss": 17.8901, "step": 144780 }, { "epoch": 0.2924849606289669, "grad_norm": 57.695186614990234, "learning_rate": 8.944562566405558e-06, "loss": 16.3927, "step": 144790 }, { "epoch": 0.2925051612616507, "grad_norm": 440.96954345703125, "learning_rate": 8.944348053897672e-06, "loss": 22.2165, "step": 144800 }, { "epoch": 0.29252536189433453, "grad_norm": 77.04108428955078, "learning_rate": 8.94413352216551e-06, "loss": 24.2919, "step": 144810 }, { "epoch": 0.29254556252701835, "grad_norm": 749.2860717773438, "learning_rate": 8.943918971210122e-06, "loss": 20.6234, "step": 144820 }, { "epoch": 0.2925657631597022, "grad_norm": 261.3152770996094, "learning_rate": 8.943704401032551e-06, "loss": 20.465, "step": 144830 }, { "epoch": 0.292585963792386, "grad_norm": 250.89649963378906, "learning_rate": 8.943489811633843e-06, "loss": 13.7114, "step": 144840 }, { "epoch": 0.2926061644250698, "grad_norm": 865.4324951171875, "learning_rate": 8.943275203015042e-06, "loss": 36.438, "step": 144850 }, { "epoch": 0.29262636505775363, "grad_norm": 318.74957275390625, "learning_rate": 8.943060575177197e-06, "loss": 18.4243, "step": 144860 }, { "epoch": 0.2926465656904374, "grad_norm": 97.39192962646484, "learning_rate": 8.942845928121356e-06, "loss": 10.0531, "step": 144870 }, { "epoch": 0.2926667663231212, "grad_norm": 473.9556884765625, "learning_rate": 8.942631261848558e-06, "loss": 38.0139, "step": 144880 }, { "epoch": 0.29268696695580504, "grad_norm": 285.3481140136719, "learning_rate": 8.942416576359855e-06, "loss": 18.7664, "step": 144890 }, { "epoch": 0.29270716758848886, "grad_norm": 306.4140625, "learning_rate": 8.942201871656292e-06, "loss": 27.9742, "step": 144900 }, { "epoch": 0.2927273682211727, "grad_norm": 397.9444885253906, "learning_rate": 8.941987147738915e-06, "loss": 22.1613, "step": 144910 }, { "epoch": 0.2927475688538565, "grad_norm": 199.15194702148438, "learning_rate": 8.94177240460877e-06, "loss": 11.6104, "step": 144920 }, { "epoch": 0.2927677694865403, "grad_norm": 116.1017837524414, "learning_rate": 8.941557642266906e-06, "loss": 18.0553, "step": 144930 }, { "epoch": 0.29278797011922414, "grad_norm": 362.1697082519531, "learning_rate": 8.941342860714368e-06, "loss": 15.5098, "step": 144940 }, { "epoch": 0.29280817075190796, "grad_norm": 396.1563720703125, "learning_rate": 8.9411280599522e-06, "loss": 22.8793, "step": 144950 }, { "epoch": 0.2928283713845918, "grad_norm": 6.841002464294434, "learning_rate": 8.940913239981454e-06, "loss": 25.0574, "step": 144960 }, { "epoch": 0.2928485720172756, "grad_norm": 491.0921936035156, "learning_rate": 8.940698400803177e-06, "loss": 23.0241, "step": 144970 }, { "epoch": 0.2928687726499594, "grad_norm": 335.8235168457031, "learning_rate": 8.94048354241841e-06, "loss": 19.9882, "step": 144980 }, { "epoch": 0.29288897328264324, "grad_norm": 216.74807739257812, "learning_rate": 8.940268664828207e-06, "loss": 16.9743, "step": 144990 }, { "epoch": 0.292909173915327, "grad_norm": 221.71810913085938, "learning_rate": 8.94005376803361e-06, "loss": 13.8185, "step": 145000 }, { "epoch": 0.2929293745480108, "grad_norm": 223.2478790283203, "learning_rate": 8.939838852035672e-06, "loss": 16.9124, "step": 145010 }, { "epoch": 0.29294957518069464, "grad_norm": 675.7261962890625, "learning_rate": 8.939623916835434e-06, "loss": 18.0242, "step": 145020 }, { "epoch": 0.29296977581337846, "grad_norm": 452.32489013671875, "learning_rate": 8.939408962433949e-06, "loss": 24.8715, "step": 145030 }, { "epoch": 0.2929899764460623, "grad_norm": 177.22080993652344, "learning_rate": 8.939193988832261e-06, "loss": 10.2425, "step": 145040 }, { "epoch": 0.2930101770787461, "grad_norm": 96.49590301513672, "learning_rate": 8.93897899603142e-06, "loss": 17.0481, "step": 145050 }, { "epoch": 0.2930303777114299, "grad_norm": 311.413330078125, "learning_rate": 8.938763984032473e-06, "loss": 12.5763, "step": 145060 }, { "epoch": 0.29305057834411374, "grad_norm": 293.9170227050781, "learning_rate": 8.938548952836469e-06, "loss": 24.2862, "step": 145070 }, { "epoch": 0.29307077897679756, "grad_norm": 150.0432586669922, "learning_rate": 8.938333902444454e-06, "loss": 14.0623, "step": 145080 }, { "epoch": 0.2930909796094814, "grad_norm": 460.5698547363281, "learning_rate": 8.938118832857476e-06, "loss": 26.9435, "step": 145090 }, { "epoch": 0.2931111802421652, "grad_norm": 376.8614196777344, "learning_rate": 8.937903744076587e-06, "loss": 16.9678, "step": 145100 }, { "epoch": 0.293131380874849, "grad_norm": 378.06207275390625, "learning_rate": 8.937688636102832e-06, "loss": 51.0835, "step": 145110 }, { "epoch": 0.29315158150753284, "grad_norm": 231.71255493164062, "learning_rate": 8.93747350893726e-06, "loss": 17.3665, "step": 145120 }, { "epoch": 0.2931717821402166, "grad_norm": 128.4167938232422, "learning_rate": 8.937258362580918e-06, "loss": 16.4369, "step": 145130 }, { "epoch": 0.29319198277290043, "grad_norm": 285.7743225097656, "learning_rate": 8.937043197034858e-06, "loss": 22.11, "step": 145140 }, { "epoch": 0.29321218340558425, "grad_norm": 1824.28271484375, "learning_rate": 8.936828012300127e-06, "loss": 31.531, "step": 145150 }, { "epoch": 0.29323238403826807, "grad_norm": 102.27003479003906, "learning_rate": 8.936612808377773e-06, "loss": 7.9564, "step": 145160 }, { "epoch": 0.2932525846709519, "grad_norm": 234.3483428955078, "learning_rate": 8.936397585268848e-06, "loss": 18.3439, "step": 145170 }, { "epoch": 0.2932727853036357, "grad_norm": 409.9053039550781, "learning_rate": 8.936182342974396e-06, "loss": 28.1547, "step": 145180 }, { "epoch": 0.29329298593631953, "grad_norm": 260.23944091796875, "learning_rate": 8.93596708149547e-06, "loss": 14.0538, "step": 145190 }, { "epoch": 0.29331318656900335, "grad_norm": 202.73358154296875, "learning_rate": 8.935751800833117e-06, "loss": 20.6131, "step": 145200 }, { "epoch": 0.29333338720168717, "grad_norm": 365.9839782714844, "learning_rate": 8.935536500988387e-06, "loss": 11.3682, "step": 145210 }, { "epoch": 0.293353587834371, "grad_norm": 220.41986083984375, "learning_rate": 8.93532118196233e-06, "loss": 19.236, "step": 145220 }, { "epoch": 0.2933737884670548, "grad_norm": 70.16381072998047, "learning_rate": 8.935105843755994e-06, "loss": 17.7374, "step": 145230 }, { "epoch": 0.29339398909973863, "grad_norm": 265.1478271484375, "learning_rate": 8.93489048637043e-06, "loss": 11.0949, "step": 145240 }, { "epoch": 0.2934141897324224, "grad_norm": 84.24224853515625, "learning_rate": 8.934675109806688e-06, "loss": 36.5164, "step": 145250 }, { "epoch": 0.2934343903651062, "grad_norm": 123.47663116455078, "learning_rate": 8.934459714065815e-06, "loss": 14.8713, "step": 145260 }, { "epoch": 0.29345459099779003, "grad_norm": 50.02994918823242, "learning_rate": 8.934244299148864e-06, "loss": 24.8759, "step": 145270 }, { "epoch": 0.29347479163047385, "grad_norm": 194.276123046875, "learning_rate": 8.934028865056883e-06, "loss": 38.2849, "step": 145280 }, { "epoch": 0.2934949922631577, "grad_norm": 525.3101806640625, "learning_rate": 8.933813411790922e-06, "loss": 27.1058, "step": 145290 }, { "epoch": 0.2935151928958415, "grad_norm": 229.4756622314453, "learning_rate": 8.933597939352031e-06, "loss": 38.0699, "step": 145300 }, { "epoch": 0.2935353935285253, "grad_norm": 12.076606750488281, "learning_rate": 8.93338244774126e-06, "loss": 14.3635, "step": 145310 }, { "epoch": 0.29355559416120913, "grad_norm": 247.93939208984375, "learning_rate": 8.933166936959664e-06, "loss": 31.9781, "step": 145320 }, { "epoch": 0.29357579479389295, "grad_norm": 301.01043701171875, "learning_rate": 8.932951407008286e-06, "loss": 15.2058, "step": 145330 }, { "epoch": 0.2935959954265768, "grad_norm": 276.119384765625, "learning_rate": 8.93273585788818e-06, "loss": 31.8299, "step": 145340 }, { "epoch": 0.2936161960592606, "grad_norm": 82.24546813964844, "learning_rate": 8.932520289600396e-06, "loss": 23.7937, "step": 145350 }, { "epoch": 0.2936363966919444, "grad_norm": 171.79689025878906, "learning_rate": 8.932304702145988e-06, "loss": 14.0823, "step": 145360 }, { "epoch": 0.29365659732462823, "grad_norm": 107.50999450683594, "learning_rate": 8.932089095526003e-06, "loss": 15.1639, "step": 145370 }, { "epoch": 0.293676797957312, "grad_norm": 174.44252014160156, "learning_rate": 8.93187346974149e-06, "loss": 23.0125, "step": 145380 }, { "epoch": 0.2936969985899958, "grad_norm": 203.64324951171875, "learning_rate": 8.931657824793505e-06, "loss": 25.2332, "step": 145390 }, { "epoch": 0.29371719922267964, "grad_norm": 196.68812561035156, "learning_rate": 8.931442160683094e-06, "loss": 9.0928, "step": 145400 }, { "epoch": 0.29373739985536346, "grad_norm": 182.79721069335938, "learning_rate": 8.931226477411314e-06, "loss": 28.8523, "step": 145410 }, { "epoch": 0.2937576004880473, "grad_norm": 319.360107421875, "learning_rate": 8.931010774979212e-06, "loss": 21.713, "step": 145420 }, { "epoch": 0.2937778011207311, "grad_norm": 207.9465789794922, "learning_rate": 8.93079505338784e-06, "loss": 15.5356, "step": 145430 }, { "epoch": 0.2937980017534149, "grad_norm": 191.83229064941406, "learning_rate": 8.93057931263825e-06, "loss": 57.282, "step": 145440 }, { "epoch": 0.29381820238609874, "grad_norm": 310.91357421875, "learning_rate": 8.930363552731491e-06, "loss": 22.6941, "step": 145450 }, { "epoch": 0.29383840301878256, "grad_norm": 320.724365234375, "learning_rate": 8.930147773668618e-06, "loss": 26.4588, "step": 145460 }, { "epoch": 0.2938586036514664, "grad_norm": 99.3324966430664, "learning_rate": 8.929931975450683e-06, "loss": 16.4978, "step": 145470 }, { "epoch": 0.2938788042841502, "grad_norm": 547.3724365234375, "learning_rate": 8.929716158078734e-06, "loss": 16.8977, "step": 145480 }, { "epoch": 0.293899004916834, "grad_norm": 669.8316040039062, "learning_rate": 8.929500321553825e-06, "loss": 38.0419, "step": 145490 }, { "epoch": 0.29391920554951784, "grad_norm": 528.9037475585938, "learning_rate": 8.92928446587701e-06, "loss": 17.4077, "step": 145500 }, { "epoch": 0.2939394061822016, "grad_norm": 751.9921875, "learning_rate": 8.929068591049338e-06, "loss": 20.4755, "step": 145510 }, { "epoch": 0.2939596068148854, "grad_norm": 314.1864929199219, "learning_rate": 8.928852697071863e-06, "loss": 25.1437, "step": 145520 }, { "epoch": 0.29397980744756924, "grad_norm": 237.7614288330078, "learning_rate": 8.928636783945635e-06, "loss": 24.3829, "step": 145530 }, { "epoch": 0.29400000808025306, "grad_norm": 296.3868408203125, "learning_rate": 8.928420851671708e-06, "loss": 27.3293, "step": 145540 }, { "epoch": 0.2940202087129369, "grad_norm": 236.98629760742188, "learning_rate": 8.928204900251136e-06, "loss": 20.6355, "step": 145550 }, { "epoch": 0.2940404093456207, "grad_norm": 98.40457153320312, "learning_rate": 8.92798892968497e-06, "loss": 17.7818, "step": 145560 }, { "epoch": 0.2940606099783045, "grad_norm": 632.592041015625, "learning_rate": 8.92777293997426e-06, "loss": 21.764, "step": 145570 }, { "epoch": 0.29408081061098834, "grad_norm": 180.94947814941406, "learning_rate": 8.92755693112006e-06, "loss": 20.8123, "step": 145580 }, { "epoch": 0.29410101124367216, "grad_norm": 147.0638885498047, "learning_rate": 8.927340903123428e-06, "loss": 19.7781, "step": 145590 }, { "epoch": 0.294121211876356, "grad_norm": 99.19258880615234, "learning_rate": 8.92712485598541e-06, "loss": 24.6359, "step": 145600 }, { "epoch": 0.2941414125090398, "grad_norm": 250.27139282226562, "learning_rate": 8.926908789707063e-06, "loss": 16.5827, "step": 145610 }, { "epoch": 0.2941616131417236, "grad_norm": 156.98507690429688, "learning_rate": 8.926692704289437e-06, "loss": 24.0789, "step": 145620 }, { "epoch": 0.29418181377440744, "grad_norm": 435.7337646484375, "learning_rate": 8.926476599733588e-06, "loss": 16.673, "step": 145630 }, { "epoch": 0.2942020144070912, "grad_norm": 233.8654327392578, "learning_rate": 8.926260476040568e-06, "loss": 28.4686, "step": 145640 }, { "epoch": 0.294222215039775, "grad_norm": 258.15924072265625, "learning_rate": 8.926044333211433e-06, "loss": 27.7675, "step": 145650 }, { "epoch": 0.29424241567245885, "grad_norm": 152.40933227539062, "learning_rate": 8.925828171247231e-06, "loss": 27.4503, "step": 145660 }, { "epoch": 0.29426261630514267, "grad_norm": 197.25534057617188, "learning_rate": 8.925611990149021e-06, "loss": 28.6801, "step": 145670 }, { "epoch": 0.2942828169378265, "grad_norm": 257.14990234375, "learning_rate": 8.925395789917852e-06, "loss": 18.2334, "step": 145680 }, { "epoch": 0.2943030175705103, "grad_norm": 172.77813720703125, "learning_rate": 8.925179570554783e-06, "loss": 11.8079, "step": 145690 }, { "epoch": 0.2943232182031941, "grad_norm": 253.6573028564453, "learning_rate": 8.924963332060863e-06, "loss": 13.3633, "step": 145700 }, { "epoch": 0.29434341883587795, "grad_norm": 438.8563537597656, "learning_rate": 8.924747074437147e-06, "loss": 25.3535, "step": 145710 }, { "epoch": 0.29436361946856177, "grad_norm": 607.6083984375, "learning_rate": 8.92453079768469e-06, "loss": 36.6018, "step": 145720 }, { "epoch": 0.2943838201012456, "grad_norm": 314.3677062988281, "learning_rate": 8.924314501804548e-06, "loss": 17.9628, "step": 145730 }, { "epoch": 0.2944040207339294, "grad_norm": 234.26023864746094, "learning_rate": 8.924098186797771e-06, "loss": 17.8081, "step": 145740 }, { "epoch": 0.2944242213666132, "grad_norm": 147.3927001953125, "learning_rate": 8.923881852665416e-06, "loss": 14.9273, "step": 145750 }, { "epoch": 0.29444442199929705, "grad_norm": 157.45016479492188, "learning_rate": 8.923665499408535e-06, "loss": 17.4005, "step": 145760 }, { "epoch": 0.2944646226319808, "grad_norm": 145.46849060058594, "learning_rate": 8.923449127028187e-06, "loss": 10.5614, "step": 145770 }, { "epoch": 0.29448482326466463, "grad_norm": 218.89520263671875, "learning_rate": 8.923232735525422e-06, "loss": 22.5842, "step": 145780 }, { "epoch": 0.29450502389734845, "grad_norm": 57.350379943847656, "learning_rate": 8.923016324901298e-06, "loss": 18.7089, "step": 145790 }, { "epoch": 0.29452522453003227, "grad_norm": 13.486300468444824, "learning_rate": 8.922799895156868e-06, "loss": 17.8151, "step": 145800 }, { "epoch": 0.2945454251627161, "grad_norm": 653.29736328125, "learning_rate": 8.922583446293186e-06, "loss": 29.0202, "step": 145810 }, { "epoch": 0.2945656257953999, "grad_norm": 366.2355041503906, "learning_rate": 8.922366978311307e-06, "loss": 15.6639, "step": 145820 }, { "epoch": 0.29458582642808373, "grad_norm": 284.11163330078125, "learning_rate": 8.92215049121229e-06, "loss": 25.4827, "step": 145830 }, { "epoch": 0.29460602706076755, "grad_norm": 127.19479370117188, "learning_rate": 8.921933984997186e-06, "loss": 23.3419, "step": 145840 }, { "epoch": 0.29462622769345137, "grad_norm": 399.57806396484375, "learning_rate": 8.921717459667052e-06, "loss": 23.2815, "step": 145850 }, { "epoch": 0.2946464283261352, "grad_norm": 428.2537841796875, "learning_rate": 8.921500915222941e-06, "loss": 19.2584, "step": 145860 }, { "epoch": 0.294666628958819, "grad_norm": 273.8040771484375, "learning_rate": 8.921284351665911e-06, "loss": 10.4234, "step": 145870 }, { "epoch": 0.29468682959150283, "grad_norm": 186.50726318359375, "learning_rate": 8.921067768997018e-06, "loss": 16.1587, "step": 145880 }, { "epoch": 0.2947070302241866, "grad_norm": 517.5870971679688, "learning_rate": 8.920851167217315e-06, "loss": 17.4725, "step": 145890 }, { "epoch": 0.2947272308568704, "grad_norm": 192.067138671875, "learning_rate": 8.920634546327857e-06, "loss": 24.0368, "step": 145900 }, { "epoch": 0.29474743148955423, "grad_norm": 179.6768341064453, "learning_rate": 8.920417906329704e-06, "loss": 24.5709, "step": 145910 }, { "epoch": 0.29476763212223805, "grad_norm": 494.04254150390625, "learning_rate": 8.92020124722391e-06, "loss": 14.6904, "step": 145920 }, { "epoch": 0.2947878327549219, "grad_norm": 334.0469055175781, "learning_rate": 8.91998456901153e-06, "loss": 24.1722, "step": 145930 }, { "epoch": 0.2948080333876057, "grad_norm": 468.5563659667969, "learning_rate": 8.91976787169362e-06, "loss": 28.3261, "step": 145940 }, { "epoch": 0.2948282340202895, "grad_norm": 116.01739501953125, "learning_rate": 8.919551155271239e-06, "loss": 24.4642, "step": 145950 }, { "epoch": 0.29484843465297333, "grad_norm": 588.1138305664062, "learning_rate": 8.91933441974544e-06, "loss": 23.4469, "step": 145960 }, { "epoch": 0.29486863528565715, "grad_norm": 128.9580535888672, "learning_rate": 8.91911766511728e-06, "loss": 18.8398, "step": 145970 }, { "epoch": 0.294888835918341, "grad_norm": 175.528564453125, "learning_rate": 8.918900891387814e-06, "loss": 10.5472, "step": 145980 }, { "epoch": 0.2949090365510248, "grad_norm": 353.08026123046875, "learning_rate": 8.918684098558102e-06, "loss": 27.3851, "step": 145990 }, { "epoch": 0.2949292371837086, "grad_norm": 269.20526123046875, "learning_rate": 8.9184672866292e-06, "loss": 15.6852, "step": 146000 }, { "epoch": 0.29494943781639243, "grad_norm": 409.851318359375, "learning_rate": 8.918250455602162e-06, "loss": 23.3665, "step": 146010 }, { "epoch": 0.2949696384490762, "grad_norm": 144.2677459716797, "learning_rate": 8.918033605478047e-06, "loss": 33.5598, "step": 146020 }, { "epoch": 0.29498983908176, "grad_norm": 167.20809936523438, "learning_rate": 8.917816736257912e-06, "loss": 21.6718, "step": 146030 }, { "epoch": 0.29501003971444384, "grad_norm": 247.3301239013672, "learning_rate": 8.917599847942813e-06, "loss": 25.9973, "step": 146040 }, { "epoch": 0.29503024034712766, "grad_norm": 40.04937744140625, "learning_rate": 8.917382940533809e-06, "loss": 18.2416, "step": 146050 }, { "epoch": 0.2950504409798115, "grad_norm": 222.5066680908203, "learning_rate": 8.917166014031953e-06, "loss": 20.3841, "step": 146060 }, { "epoch": 0.2950706416124953, "grad_norm": 147.79684448242188, "learning_rate": 8.916949068438307e-06, "loss": 17.3439, "step": 146070 }, { "epoch": 0.2950908422451791, "grad_norm": 372.39544677734375, "learning_rate": 8.916732103753924e-06, "loss": 23.8865, "step": 146080 }, { "epoch": 0.29511104287786294, "grad_norm": 0.0, "learning_rate": 8.916515119979867e-06, "loss": 23.1654, "step": 146090 }, { "epoch": 0.29513124351054676, "grad_norm": 57.18629455566406, "learning_rate": 8.916298117117188e-06, "loss": 25.9606, "step": 146100 }, { "epoch": 0.2951514441432306, "grad_norm": 81.99925231933594, "learning_rate": 8.916081095166947e-06, "loss": 17.5056, "step": 146110 }, { "epoch": 0.2951716447759144, "grad_norm": 84.53407287597656, "learning_rate": 8.915864054130203e-06, "loss": 12.5021, "step": 146120 }, { "epoch": 0.2951918454085982, "grad_norm": 388.3645935058594, "learning_rate": 8.915646994008011e-06, "loss": 34.3971, "step": 146130 }, { "epoch": 0.29521204604128204, "grad_norm": 0.0, "learning_rate": 8.915429914801433e-06, "loss": 21.4234, "step": 146140 }, { "epoch": 0.2952322466739658, "grad_norm": 446.6255798339844, "learning_rate": 8.915212816511521e-06, "loss": 25.0329, "step": 146150 }, { "epoch": 0.2952524473066496, "grad_norm": 276.4041748046875, "learning_rate": 8.91499569913934e-06, "loss": 22.9194, "step": 146160 }, { "epoch": 0.29527264793933344, "grad_norm": 296.1176452636719, "learning_rate": 8.914778562685941e-06, "loss": 9.3709, "step": 146170 }, { "epoch": 0.29529284857201726, "grad_norm": 202.46212768554688, "learning_rate": 8.91456140715239e-06, "loss": 30.3451, "step": 146180 }, { "epoch": 0.2953130492047011, "grad_norm": 472.9142150878906, "learning_rate": 8.914344232539739e-06, "loss": 32.8423, "step": 146190 }, { "epoch": 0.2953332498373849, "grad_norm": 178.7008056640625, "learning_rate": 8.91412703884905e-06, "loss": 15.6537, "step": 146200 }, { "epoch": 0.2953534504700687, "grad_norm": 609.8689575195312, "learning_rate": 8.91390982608138e-06, "loss": 22.6938, "step": 146210 }, { "epoch": 0.29537365110275254, "grad_norm": 163.6333465576172, "learning_rate": 8.91369259423779e-06, "loss": 18.1454, "step": 146220 }, { "epoch": 0.29539385173543636, "grad_norm": 133.64219665527344, "learning_rate": 8.913475343319333e-06, "loss": 20.5241, "step": 146230 }, { "epoch": 0.2954140523681202, "grad_norm": 434.4700927734375, "learning_rate": 8.913258073327075e-06, "loss": 20.415, "step": 146240 }, { "epoch": 0.295434253000804, "grad_norm": 201.703125, "learning_rate": 8.91304078426207e-06, "loss": 20.1526, "step": 146250 }, { "epoch": 0.2954544536334878, "grad_norm": 425.043701171875, "learning_rate": 8.91282347612538e-06, "loss": 17.6187, "step": 146260 }, { "epoch": 0.29547465426617164, "grad_norm": 144.12728881835938, "learning_rate": 8.912606148918063e-06, "loss": 19.8395, "step": 146270 }, { "epoch": 0.2954948548988554, "grad_norm": 472.1793518066406, "learning_rate": 8.912388802641177e-06, "loss": 25.3911, "step": 146280 }, { "epoch": 0.29551505553153923, "grad_norm": 221.9882354736328, "learning_rate": 8.912171437295785e-06, "loss": 25.2066, "step": 146290 }, { "epoch": 0.29553525616422305, "grad_norm": 149.8636016845703, "learning_rate": 8.911954052882941e-06, "loss": 10.9735, "step": 146300 }, { "epoch": 0.29555545679690687, "grad_norm": 282.87896728515625, "learning_rate": 8.91173664940371e-06, "loss": 19.5515, "step": 146310 }, { "epoch": 0.2955756574295907, "grad_norm": 149.91297912597656, "learning_rate": 8.911519226859147e-06, "loss": 20.1735, "step": 146320 }, { "epoch": 0.2955958580622745, "grad_norm": 462.7615661621094, "learning_rate": 8.911301785250315e-06, "loss": 27.0333, "step": 146330 }, { "epoch": 0.29561605869495833, "grad_norm": 241.8584747314453, "learning_rate": 8.911084324578272e-06, "loss": 30.3014, "step": 146340 }, { "epoch": 0.29563625932764215, "grad_norm": 96.51071166992188, "learning_rate": 8.910866844844077e-06, "loss": 52.7971, "step": 146350 }, { "epoch": 0.29565645996032597, "grad_norm": 286.4424743652344, "learning_rate": 8.910649346048792e-06, "loss": 24.5648, "step": 146360 }, { "epoch": 0.2956766605930098, "grad_norm": 113.85694122314453, "learning_rate": 8.910431828193478e-06, "loss": 21.9407, "step": 146370 }, { "epoch": 0.2956968612256936, "grad_norm": 190.33322143554688, "learning_rate": 8.910214291279192e-06, "loss": 10.008, "step": 146380 }, { "epoch": 0.29571706185837743, "grad_norm": 225.7638702392578, "learning_rate": 8.909996735306996e-06, "loss": 30.2762, "step": 146390 }, { "epoch": 0.2957372624910612, "grad_norm": 674.8375244140625, "learning_rate": 8.909779160277951e-06, "loss": 21.1511, "step": 146400 }, { "epoch": 0.295757463123745, "grad_norm": 248.91348266601562, "learning_rate": 8.909561566193118e-06, "loss": 20.3411, "step": 146410 }, { "epoch": 0.29577766375642883, "grad_norm": 190.9489288330078, "learning_rate": 8.909343953053553e-06, "loss": 16.4399, "step": 146420 }, { "epoch": 0.29579786438911265, "grad_norm": 723.2575073242188, "learning_rate": 8.90912632086032e-06, "loss": 25.1163, "step": 146430 }, { "epoch": 0.2958180650217965, "grad_norm": 493.9483947753906, "learning_rate": 8.90890866961448e-06, "loss": 21.0348, "step": 146440 }, { "epoch": 0.2958382656544803, "grad_norm": 161.9943389892578, "learning_rate": 8.908690999317094e-06, "loss": 37.2498, "step": 146450 }, { "epoch": 0.2958584662871641, "grad_norm": 289.649169921875, "learning_rate": 8.90847330996922e-06, "loss": 20.9464, "step": 146460 }, { "epoch": 0.29587866691984793, "grad_norm": 130.732666015625, "learning_rate": 8.908255601571924e-06, "loss": 18.9345, "step": 146470 }, { "epoch": 0.29589886755253175, "grad_norm": 629.6993408203125, "learning_rate": 8.908037874126263e-06, "loss": 40.0716, "step": 146480 }, { "epoch": 0.2959190681852156, "grad_norm": 155.0184783935547, "learning_rate": 8.9078201276333e-06, "loss": 40.7808, "step": 146490 }, { "epoch": 0.2959392688178994, "grad_norm": 251.22650146484375, "learning_rate": 8.907602362094094e-06, "loss": 16.3237, "step": 146500 }, { "epoch": 0.2959594694505832, "grad_norm": 284.5832214355469, "learning_rate": 8.90738457750971e-06, "loss": 11.7217, "step": 146510 }, { "epoch": 0.29597967008326703, "grad_norm": 447.9478454589844, "learning_rate": 8.907166773881207e-06, "loss": 35.0427, "step": 146520 }, { "epoch": 0.2959998707159508, "grad_norm": 441.7938537597656, "learning_rate": 8.906948951209647e-06, "loss": 11.9462, "step": 146530 }, { "epoch": 0.2960200713486346, "grad_norm": 374.62884521484375, "learning_rate": 8.90673110949609e-06, "loss": 26.1452, "step": 146540 }, { "epoch": 0.29604027198131844, "grad_norm": 156.2978973388672, "learning_rate": 8.9065132487416e-06, "loss": 43.2318, "step": 146550 }, { "epoch": 0.29606047261400226, "grad_norm": 427.7441101074219, "learning_rate": 8.90629536894724e-06, "loss": 18.0475, "step": 146560 }, { "epoch": 0.2960806732466861, "grad_norm": 210.4543914794922, "learning_rate": 8.906077470114068e-06, "loss": 20.7236, "step": 146570 }, { "epoch": 0.2961008738793699, "grad_norm": 290.7176513671875, "learning_rate": 8.90585955224315e-06, "loss": 24.4077, "step": 146580 }, { "epoch": 0.2961210745120537, "grad_norm": 334.6560974121094, "learning_rate": 8.905641615335545e-06, "loss": 17.9128, "step": 146590 }, { "epoch": 0.29614127514473754, "grad_norm": 836.2611694335938, "learning_rate": 8.905423659392316e-06, "loss": 22.9746, "step": 146600 }, { "epoch": 0.29616147577742136, "grad_norm": 138.90284729003906, "learning_rate": 8.905205684414527e-06, "loss": 20.6885, "step": 146610 }, { "epoch": 0.2961816764101052, "grad_norm": 288.87213134765625, "learning_rate": 8.90498769040324e-06, "loss": 16.8958, "step": 146620 }, { "epoch": 0.296201877042789, "grad_norm": 421.83380126953125, "learning_rate": 8.904769677359515e-06, "loss": 19.3998, "step": 146630 }, { "epoch": 0.2962220776754728, "grad_norm": 371.7138366699219, "learning_rate": 8.904551645284416e-06, "loss": 11.8774, "step": 146640 }, { "epoch": 0.29624227830815664, "grad_norm": 259.82952880859375, "learning_rate": 8.904333594179007e-06, "loss": 19.8294, "step": 146650 }, { "epoch": 0.2962624789408404, "grad_norm": 434.2825622558594, "learning_rate": 8.904115524044349e-06, "loss": 13.1067, "step": 146660 }, { "epoch": 0.2962826795735242, "grad_norm": 216.4358673095703, "learning_rate": 8.903897434881506e-06, "loss": 39.0183, "step": 146670 }, { "epoch": 0.29630288020620804, "grad_norm": 422.30487060546875, "learning_rate": 8.90367932669154e-06, "loss": 25.0749, "step": 146680 }, { "epoch": 0.29632308083889186, "grad_norm": 734.3863525390625, "learning_rate": 8.903461199475514e-06, "loss": 16.5241, "step": 146690 }, { "epoch": 0.2963432814715757, "grad_norm": 139.90249633789062, "learning_rate": 8.903243053234492e-06, "loss": 11.6066, "step": 146700 }, { "epoch": 0.2963634821042595, "grad_norm": 336.1477966308594, "learning_rate": 8.903024887969536e-06, "loss": 15.5044, "step": 146710 }, { "epoch": 0.2963836827369433, "grad_norm": 505.07525634765625, "learning_rate": 8.90280670368171e-06, "loss": 27.974, "step": 146720 }, { "epoch": 0.29640388336962714, "grad_norm": 190.57940673828125, "learning_rate": 8.902588500372078e-06, "loss": 23.016, "step": 146730 }, { "epoch": 0.29642408400231096, "grad_norm": 462.0219421386719, "learning_rate": 8.902370278041705e-06, "loss": 21.2933, "step": 146740 }, { "epoch": 0.2964442846349948, "grad_norm": 188.3461456298828, "learning_rate": 8.902152036691649e-06, "loss": 37.1155, "step": 146750 }, { "epoch": 0.2964644852676786, "grad_norm": 435.21636962890625, "learning_rate": 8.90193377632298e-06, "loss": 34.2967, "step": 146760 }, { "epoch": 0.2964846859003624, "grad_norm": 295.2095642089844, "learning_rate": 8.901715496936758e-06, "loss": 25.083, "step": 146770 }, { "epoch": 0.29650488653304624, "grad_norm": 349.887451171875, "learning_rate": 8.901497198534048e-06, "loss": 24.2759, "step": 146780 }, { "epoch": 0.29652508716573, "grad_norm": 164.00570678710938, "learning_rate": 8.901278881115914e-06, "loss": 14.5158, "step": 146790 }, { "epoch": 0.2965452877984138, "grad_norm": 55.539710998535156, "learning_rate": 8.90106054468342e-06, "loss": 17.9289, "step": 146800 }, { "epoch": 0.29656548843109765, "grad_norm": 54.00072479248047, "learning_rate": 8.90084218923763e-06, "loss": 31.0499, "step": 146810 }, { "epoch": 0.29658568906378147, "grad_norm": 364.5129089355469, "learning_rate": 8.900623814779605e-06, "loss": 11.873, "step": 146820 }, { "epoch": 0.2966058896964653, "grad_norm": 0.0, "learning_rate": 8.900405421310416e-06, "loss": 15.8587, "step": 146830 }, { "epoch": 0.2966260903291491, "grad_norm": 486.33551025390625, "learning_rate": 8.900187008831124e-06, "loss": 19.7687, "step": 146840 }, { "epoch": 0.2966462909618329, "grad_norm": 259.1111755371094, "learning_rate": 8.899968577342794e-06, "loss": 11.7199, "step": 146850 }, { "epoch": 0.29666649159451675, "grad_norm": 248.27052307128906, "learning_rate": 8.89975012684649e-06, "loss": 28.5556, "step": 146860 }, { "epoch": 0.29668669222720057, "grad_norm": 124.14593505859375, "learning_rate": 8.899531657343275e-06, "loss": 19.7176, "step": 146870 }, { "epoch": 0.2967068928598844, "grad_norm": 167.23760986328125, "learning_rate": 8.899313168834216e-06, "loss": 22.9181, "step": 146880 }, { "epoch": 0.2967270934925682, "grad_norm": 342.69915771484375, "learning_rate": 8.899094661320378e-06, "loss": 19.8753, "step": 146890 }, { "epoch": 0.296747294125252, "grad_norm": 308.8180847167969, "learning_rate": 8.898876134802827e-06, "loss": 25.1758, "step": 146900 }, { "epoch": 0.29676749475793585, "grad_norm": 59.29542541503906, "learning_rate": 8.898657589282625e-06, "loss": 24.5248, "step": 146910 }, { "epoch": 0.2967876953906196, "grad_norm": 412.6939392089844, "learning_rate": 8.89843902476084e-06, "loss": 21.0596, "step": 146920 }, { "epoch": 0.29680789602330343, "grad_norm": 415.1787414550781, "learning_rate": 8.898220441238534e-06, "loss": 14.0873, "step": 146930 }, { "epoch": 0.29682809665598725, "grad_norm": 96.40065002441406, "learning_rate": 8.898001838716777e-06, "loss": 16.6584, "step": 146940 }, { "epoch": 0.29684829728867107, "grad_norm": 272.50140380859375, "learning_rate": 8.897783217196629e-06, "loss": 22.2589, "step": 146950 }, { "epoch": 0.2968684979213549, "grad_norm": 326.0782470703125, "learning_rate": 8.89756457667916e-06, "loss": 25.962, "step": 146960 }, { "epoch": 0.2968886985540387, "grad_norm": 233.13250732421875, "learning_rate": 8.897345917165434e-06, "loss": 31.9124, "step": 146970 }, { "epoch": 0.29690889918672253, "grad_norm": 431.927001953125, "learning_rate": 8.897127238656517e-06, "loss": 12.6164, "step": 146980 }, { "epoch": 0.29692909981940635, "grad_norm": 322.75799560546875, "learning_rate": 8.896908541153475e-06, "loss": 12.3548, "step": 146990 }, { "epoch": 0.29694930045209017, "grad_norm": 180.18003845214844, "learning_rate": 8.896689824657371e-06, "loss": 12.9861, "step": 147000 }, { "epoch": 0.296969501084774, "grad_norm": 152.2717742919922, "learning_rate": 8.896471089169275e-06, "loss": 20.272, "step": 147010 }, { "epoch": 0.2969897017174578, "grad_norm": 1022.5787963867188, "learning_rate": 8.896252334690251e-06, "loss": 25.9173, "step": 147020 }, { "epoch": 0.29700990235014163, "grad_norm": 243.7765350341797, "learning_rate": 8.896033561221367e-06, "loss": 21.0722, "step": 147030 }, { "epoch": 0.2970301029828254, "grad_norm": 72.73299407958984, "learning_rate": 8.895814768763686e-06, "loss": 14.6961, "step": 147040 }, { "epoch": 0.2970503036155092, "grad_norm": 365.9035339355469, "learning_rate": 8.895595957318277e-06, "loss": 28.9308, "step": 147050 }, { "epoch": 0.29707050424819303, "grad_norm": 182.63734436035156, "learning_rate": 8.895377126886206e-06, "loss": 29.4286, "step": 147060 }, { "epoch": 0.29709070488087685, "grad_norm": 210.33245849609375, "learning_rate": 8.89515827746854e-06, "loss": 23.7451, "step": 147070 }, { "epoch": 0.2971109055135607, "grad_norm": 44.462493896484375, "learning_rate": 8.894939409066344e-06, "loss": 13.998, "step": 147080 }, { "epoch": 0.2971311061462445, "grad_norm": 439.18951416015625, "learning_rate": 8.894720521680687e-06, "loss": 33.8278, "step": 147090 }, { "epoch": 0.2971513067789283, "grad_norm": 285.48370361328125, "learning_rate": 8.894501615312633e-06, "loss": 11.9814, "step": 147100 }, { "epoch": 0.29717150741161213, "grad_norm": 177.125732421875, "learning_rate": 8.894282689963252e-06, "loss": 26.375, "step": 147110 }, { "epoch": 0.29719170804429595, "grad_norm": 166.273681640625, "learning_rate": 8.894063745633607e-06, "loss": 15.0171, "step": 147120 }, { "epoch": 0.2972119086769798, "grad_norm": 586.3330688476562, "learning_rate": 8.89384478232477e-06, "loss": 32.1491, "step": 147130 }, { "epoch": 0.2972321093096636, "grad_norm": 264.0667724609375, "learning_rate": 8.893625800037803e-06, "loss": 30.9064, "step": 147140 }, { "epoch": 0.2972523099423474, "grad_norm": 255.71258544921875, "learning_rate": 8.89340679877378e-06, "loss": 24.8261, "step": 147150 }, { "epoch": 0.29727251057503123, "grad_norm": 298.7345886230469, "learning_rate": 8.893187778533763e-06, "loss": 23.8954, "step": 147160 }, { "epoch": 0.297292711207715, "grad_norm": 63.75090408325195, "learning_rate": 8.892968739318819e-06, "loss": 48.2822, "step": 147170 }, { "epoch": 0.2973129118403988, "grad_norm": 159.7095947265625, "learning_rate": 8.89274968113002e-06, "loss": 13.545, "step": 147180 }, { "epoch": 0.29733311247308264, "grad_norm": 429.11309814453125, "learning_rate": 8.89253060396843e-06, "loss": 36.1138, "step": 147190 }, { "epoch": 0.29735331310576646, "grad_norm": 157.16885375976562, "learning_rate": 8.892311507835118e-06, "loss": 16.2936, "step": 147200 }, { "epoch": 0.2973735137384503, "grad_norm": 367.5013122558594, "learning_rate": 8.892092392731152e-06, "loss": 24.0316, "step": 147210 }, { "epoch": 0.2973937143711341, "grad_norm": 335.0994873046875, "learning_rate": 8.891873258657599e-06, "loss": 26.401, "step": 147220 }, { "epoch": 0.2974139150038179, "grad_norm": 199.46006774902344, "learning_rate": 8.891654105615528e-06, "loss": 19.3606, "step": 147230 }, { "epoch": 0.29743411563650174, "grad_norm": 283.1739807128906, "learning_rate": 8.891434933606009e-06, "loss": 20.4049, "step": 147240 }, { "epoch": 0.29745431626918556, "grad_norm": 240.279541015625, "learning_rate": 8.891215742630106e-06, "loss": 13.9021, "step": 147250 }, { "epoch": 0.2974745169018694, "grad_norm": 60.37054443359375, "learning_rate": 8.890996532688889e-06, "loss": 17.8797, "step": 147260 }, { "epoch": 0.2974947175345532, "grad_norm": 383.9093933105469, "learning_rate": 8.890777303783428e-06, "loss": 19.9292, "step": 147270 }, { "epoch": 0.297514918167237, "grad_norm": 267.1871643066406, "learning_rate": 8.89055805591479e-06, "loss": 19.0101, "step": 147280 }, { "epoch": 0.29753511879992084, "grad_norm": 562.5958251953125, "learning_rate": 8.890338789084043e-06, "loss": 25.1334, "step": 147290 }, { "epoch": 0.2975553194326046, "grad_norm": 681.1688232421875, "learning_rate": 8.890119503292258e-06, "loss": 26.3574, "step": 147300 }, { "epoch": 0.2975755200652884, "grad_norm": 0.0, "learning_rate": 8.889900198540502e-06, "loss": 18.4676, "step": 147310 }, { "epoch": 0.29759572069797224, "grad_norm": 226.87437438964844, "learning_rate": 8.889680874829845e-06, "loss": 27.0107, "step": 147320 }, { "epoch": 0.29761592133065606, "grad_norm": 229.96054077148438, "learning_rate": 8.889461532161353e-06, "loss": 23.7219, "step": 147330 }, { "epoch": 0.2976361219633399, "grad_norm": 569.4736328125, "learning_rate": 8.889242170536099e-06, "loss": 18.5307, "step": 147340 }, { "epoch": 0.2976563225960237, "grad_norm": 247.3912811279297, "learning_rate": 8.88902278995515e-06, "loss": 22.6322, "step": 147350 }, { "epoch": 0.2976765232287075, "grad_norm": 349.69873046875, "learning_rate": 8.888803390419576e-06, "loss": 14.9921, "step": 147360 }, { "epoch": 0.29769672386139134, "grad_norm": 183.3472137451172, "learning_rate": 8.888583971930444e-06, "loss": 35.2443, "step": 147370 }, { "epoch": 0.29771692449407516, "grad_norm": 669.5637817382812, "learning_rate": 8.888364534488828e-06, "loss": 24.3452, "step": 147380 }, { "epoch": 0.297737125126759, "grad_norm": 297.0299987792969, "learning_rate": 8.888145078095794e-06, "loss": 14.2922, "step": 147390 }, { "epoch": 0.2977573257594428, "grad_norm": 152.9611358642578, "learning_rate": 8.887925602752411e-06, "loss": 12.7989, "step": 147400 }, { "epoch": 0.2977775263921266, "grad_norm": 274.9643859863281, "learning_rate": 8.887706108459751e-06, "loss": 21.203, "step": 147410 }, { "epoch": 0.29779772702481044, "grad_norm": 345.923828125, "learning_rate": 8.887486595218884e-06, "loss": 26.0111, "step": 147420 }, { "epoch": 0.2978179276574942, "grad_norm": 357.01483154296875, "learning_rate": 8.887267063030876e-06, "loss": 17.9089, "step": 147430 }, { "epoch": 0.29783812829017803, "grad_norm": 350.2316589355469, "learning_rate": 8.887047511896803e-06, "loss": 13.9491, "step": 147440 }, { "epoch": 0.29785832892286185, "grad_norm": 176.206298828125, "learning_rate": 8.886827941817731e-06, "loss": 23.9398, "step": 147450 }, { "epoch": 0.29787852955554567, "grad_norm": 280.4626770019531, "learning_rate": 8.88660835279473e-06, "loss": 25.1759, "step": 147460 }, { "epoch": 0.2978987301882295, "grad_norm": 138.88677978515625, "learning_rate": 8.886388744828872e-06, "loss": 12.6495, "step": 147470 }, { "epoch": 0.2979189308209133, "grad_norm": 895.0816040039062, "learning_rate": 8.886169117921227e-06, "loss": 58.8878, "step": 147480 }, { "epoch": 0.29793913145359713, "grad_norm": 562.2074584960938, "learning_rate": 8.885949472072864e-06, "loss": 15.8799, "step": 147490 }, { "epoch": 0.29795933208628095, "grad_norm": 95.58809661865234, "learning_rate": 8.885729807284855e-06, "loss": 17.0064, "step": 147500 }, { "epoch": 0.29797953271896477, "grad_norm": 259.09912109375, "learning_rate": 8.88551012355827e-06, "loss": 16.2629, "step": 147510 }, { "epoch": 0.2979997333516486, "grad_norm": 183.38241577148438, "learning_rate": 8.885290420894182e-06, "loss": 15.1021, "step": 147520 }, { "epoch": 0.2980199339843324, "grad_norm": 299.47705078125, "learning_rate": 8.885070699293656e-06, "loss": 20.6615, "step": 147530 }, { "epoch": 0.29804013461701623, "grad_norm": 510.4315490722656, "learning_rate": 8.884850958757769e-06, "loss": 13.2413, "step": 147540 }, { "epoch": 0.29806033524970005, "grad_norm": 186.10684204101562, "learning_rate": 8.88463119928759e-06, "loss": 21.8419, "step": 147550 }, { "epoch": 0.2980805358823838, "grad_norm": 232.77679443359375, "learning_rate": 8.88441142088419e-06, "loss": 27.0081, "step": 147560 }, { "epoch": 0.29810073651506763, "grad_norm": 228.75999450683594, "learning_rate": 8.884191623548636e-06, "loss": 28.8878, "step": 147570 }, { "epoch": 0.29812093714775145, "grad_norm": 445.1830139160156, "learning_rate": 8.883971807282007e-06, "loss": 18.9611, "step": 147580 }, { "epoch": 0.2981411377804353, "grad_norm": 423.938232421875, "learning_rate": 8.88375197208537e-06, "loss": 19.8422, "step": 147590 }, { "epoch": 0.2981613384131191, "grad_norm": 45.83300018310547, "learning_rate": 8.883532117959797e-06, "loss": 41.1681, "step": 147600 }, { "epoch": 0.2981815390458029, "grad_norm": 79.37523651123047, "learning_rate": 8.883312244906358e-06, "loss": 9.6894, "step": 147610 }, { "epoch": 0.29820173967848673, "grad_norm": 241.63607788085938, "learning_rate": 8.883092352926126e-06, "loss": 27.4044, "step": 147620 }, { "epoch": 0.29822194031117055, "grad_norm": 490.6800231933594, "learning_rate": 8.882872442020174e-06, "loss": 31.9102, "step": 147630 }, { "epoch": 0.2982421409438544, "grad_norm": 366.9421081542969, "learning_rate": 8.882652512189574e-06, "loss": 18.1817, "step": 147640 }, { "epoch": 0.2982623415765382, "grad_norm": 145.4837646484375, "learning_rate": 8.882432563435394e-06, "loss": 12.9836, "step": 147650 }, { "epoch": 0.298282542209222, "grad_norm": 318.98309326171875, "learning_rate": 8.88221259575871e-06, "loss": 21.9433, "step": 147660 }, { "epoch": 0.29830274284190583, "grad_norm": 339.26361083984375, "learning_rate": 8.881992609160592e-06, "loss": 12.7843, "step": 147670 }, { "epoch": 0.2983229434745896, "grad_norm": 209.13043212890625, "learning_rate": 8.881772603642113e-06, "loss": 22.9972, "step": 147680 }, { "epoch": 0.2983431441072734, "grad_norm": 378.44140625, "learning_rate": 8.881552579204345e-06, "loss": 26.4282, "step": 147690 }, { "epoch": 0.29836334473995724, "grad_norm": 321.1785583496094, "learning_rate": 8.88133253584836e-06, "loss": 28.6104, "step": 147700 }, { "epoch": 0.29838354537264106, "grad_norm": 342.8838195800781, "learning_rate": 8.881112473575231e-06, "loss": 23.7786, "step": 147710 }, { "epoch": 0.2984037460053249, "grad_norm": 298.0773620605469, "learning_rate": 8.880892392386032e-06, "loss": 58.605, "step": 147720 }, { "epoch": 0.2984239466380087, "grad_norm": 98.41344451904297, "learning_rate": 8.880672292281834e-06, "loss": 18.2984, "step": 147730 }, { "epoch": 0.2984441472706925, "grad_norm": 168.34547424316406, "learning_rate": 8.880452173263708e-06, "loss": 15.3593, "step": 147740 }, { "epoch": 0.29846434790337634, "grad_norm": 317.4595947265625, "learning_rate": 8.88023203533273e-06, "loss": 17.6826, "step": 147750 }, { "epoch": 0.29848454853606016, "grad_norm": 490.7160339355469, "learning_rate": 8.880011878489972e-06, "loss": 20.1256, "step": 147760 }, { "epoch": 0.298504749168744, "grad_norm": 215.88214111328125, "learning_rate": 8.879791702736507e-06, "loss": 22.5481, "step": 147770 }, { "epoch": 0.2985249498014278, "grad_norm": 516.1148681640625, "learning_rate": 8.879571508073407e-06, "loss": 18.5506, "step": 147780 }, { "epoch": 0.2985451504341116, "grad_norm": 376.89068603515625, "learning_rate": 8.879351294501746e-06, "loss": 20.2032, "step": 147790 }, { "epoch": 0.29856535106679544, "grad_norm": 342.07757568359375, "learning_rate": 8.879131062022598e-06, "loss": 24.3858, "step": 147800 }, { "epoch": 0.2985855516994792, "grad_norm": 565.7283325195312, "learning_rate": 8.878910810637036e-06, "loss": 22.9151, "step": 147810 }, { "epoch": 0.298605752332163, "grad_norm": 212.27198791503906, "learning_rate": 8.878690540346132e-06, "loss": 10.1812, "step": 147820 }, { "epoch": 0.29862595296484684, "grad_norm": 558.6450805664062, "learning_rate": 8.878470251150959e-06, "loss": 21.6879, "step": 147830 }, { "epoch": 0.29864615359753066, "grad_norm": 594.29638671875, "learning_rate": 8.878249943052595e-06, "loss": 38.7479, "step": 147840 }, { "epoch": 0.2986663542302145, "grad_norm": 269.29266357421875, "learning_rate": 8.87802961605211e-06, "loss": 17.6626, "step": 147850 }, { "epoch": 0.2986865548628983, "grad_norm": 66.44206237792969, "learning_rate": 8.877809270150582e-06, "loss": 17.1358, "step": 147860 }, { "epoch": 0.2987067554955821, "grad_norm": 422.6797180175781, "learning_rate": 8.877588905349079e-06, "loss": 38.2939, "step": 147870 }, { "epoch": 0.29872695612826594, "grad_norm": 696.4716186523438, "learning_rate": 8.877368521648678e-06, "loss": 30.9999, "step": 147880 }, { "epoch": 0.29874715676094976, "grad_norm": 224.95590209960938, "learning_rate": 8.877148119050453e-06, "loss": 15.8423, "step": 147890 }, { "epoch": 0.2987673573936336, "grad_norm": 38.69296646118164, "learning_rate": 8.87692769755548e-06, "loss": 16.5642, "step": 147900 }, { "epoch": 0.2987875580263174, "grad_norm": 334.7036437988281, "learning_rate": 8.876707257164829e-06, "loss": 18.338, "step": 147910 }, { "epoch": 0.2988077586590012, "grad_norm": 541.1287231445312, "learning_rate": 8.87648679787958e-06, "loss": 14.565, "step": 147920 }, { "epoch": 0.29882795929168504, "grad_norm": 336.0312805175781, "learning_rate": 8.876266319700802e-06, "loss": 13.1427, "step": 147930 }, { "epoch": 0.2988481599243688, "grad_norm": 128.4329833984375, "learning_rate": 8.876045822629573e-06, "loss": 31.6972, "step": 147940 }, { "epoch": 0.2988683605570526, "grad_norm": 649.6993408203125, "learning_rate": 8.875825306666968e-06, "loss": 20.8662, "step": 147950 }, { "epoch": 0.29888856118973645, "grad_norm": 36.458946228027344, "learning_rate": 8.87560477181406e-06, "loss": 22.4225, "step": 147960 }, { "epoch": 0.29890876182242027, "grad_norm": 262.2691650390625, "learning_rate": 8.875384218071923e-06, "loss": 11.434, "step": 147970 }, { "epoch": 0.2989289624551041, "grad_norm": 64.57925415039062, "learning_rate": 8.875163645441633e-06, "loss": 20.8594, "step": 147980 }, { "epoch": 0.2989491630877879, "grad_norm": 330.3468017578125, "learning_rate": 8.874943053924267e-06, "loss": 11.8504, "step": 147990 }, { "epoch": 0.2989693637204717, "grad_norm": 169.85272216796875, "learning_rate": 8.874722443520898e-06, "loss": 26.71, "step": 148000 }, { "epoch": 0.29898956435315555, "grad_norm": 234.25892639160156, "learning_rate": 8.874501814232603e-06, "loss": 30.7071, "step": 148010 }, { "epoch": 0.29900976498583937, "grad_norm": 413.079833984375, "learning_rate": 8.874281166060454e-06, "loss": 16.303, "step": 148020 }, { "epoch": 0.2990299656185232, "grad_norm": 433.82989501953125, "learning_rate": 8.87406049900553e-06, "loss": 17.7176, "step": 148030 }, { "epoch": 0.299050166251207, "grad_norm": 220.50486755371094, "learning_rate": 8.873839813068904e-06, "loss": 13.4487, "step": 148040 }, { "epoch": 0.2990703668838908, "grad_norm": 546.8905029296875, "learning_rate": 8.873619108251654e-06, "loss": 22.4042, "step": 148050 }, { "epoch": 0.29909056751657465, "grad_norm": 3.207855224609375, "learning_rate": 8.873398384554852e-06, "loss": 13.3187, "step": 148060 }, { "epoch": 0.2991107681492584, "grad_norm": 127.79290008544922, "learning_rate": 8.873177641979578e-06, "loss": 21.1885, "step": 148070 }, { "epoch": 0.29913096878194223, "grad_norm": 99.46066284179688, "learning_rate": 8.872956880526906e-06, "loss": 20.9522, "step": 148080 }, { "epoch": 0.29915116941462605, "grad_norm": 462.34033203125, "learning_rate": 8.87273610019791e-06, "loss": 28.1564, "step": 148090 }, { "epoch": 0.29917137004730987, "grad_norm": 429.55548095703125, "learning_rate": 8.872515300993669e-06, "loss": 18.3016, "step": 148100 }, { "epoch": 0.2991915706799937, "grad_norm": 363.5998840332031, "learning_rate": 8.872294482915259e-06, "loss": 18.2276, "step": 148110 }, { "epoch": 0.2992117713126775, "grad_norm": 309.1612548828125, "learning_rate": 8.872073645963755e-06, "loss": 27.8033, "step": 148120 }, { "epoch": 0.29923197194536133, "grad_norm": 214.73265075683594, "learning_rate": 8.871852790140234e-06, "loss": 18.088, "step": 148130 }, { "epoch": 0.29925217257804515, "grad_norm": 327.44384765625, "learning_rate": 8.87163191544577e-06, "loss": 18.2499, "step": 148140 }, { "epoch": 0.29927237321072897, "grad_norm": 308.6640930175781, "learning_rate": 8.871411021881444e-06, "loss": 25.7244, "step": 148150 }, { "epoch": 0.2992925738434128, "grad_norm": 589.6687622070312, "learning_rate": 8.871190109448329e-06, "loss": 32.7542, "step": 148160 }, { "epoch": 0.2993127744760966, "grad_norm": 717.1983642578125, "learning_rate": 8.870969178147502e-06, "loss": 14.6002, "step": 148170 }, { "epoch": 0.29933297510878043, "grad_norm": 290.5276184082031, "learning_rate": 8.870748227980044e-06, "loss": 21.1297, "step": 148180 }, { "epoch": 0.29935317574146425, "grad_norm": 1675.328369140625, "learning_rate": 8.870527258947025e-06, "loss": 44.5277, "step": 148190 }, { "epoch": 0.299373376374148, "grad_norm": 587.11083984375, "learning_rate": 8.870306271049527e-06, "loss": 20.148, "step": 148200 }, { "epoch": 0.29939357700683183, "grad_norm": 389.1329650878906, "learning_rate": 8.870085264288626e-06, "loss": 31.2584, "step": 148210 }, { "epoch": 0.29941377763951565, "grad_norm": 70.14835357666016, "learning_rate": 8.869864238665398e-06, "loss": 15.6892, "step": 148220 }, { "epoch": 0.2994339782721995, "grad_norm": 1025.1485595703125, "learning_rate": 8.86964319418092e-06, "loss": 30.2566, "step": 148230 }, { "epoch": 0.2994541789048833, "grad_norm": 581.1032104492188, "learning_rate": 8.869422130836274e-06, "loss": 46.5794, "step": 148240 }, { "epoch": 0.2994743795375671, "grad_norm": 596.0458984375, "learning_rate": 8.869201048632531e-06, "loss": 39.2599, "step": 148250 }, { "epoch": 0.29949458017025093, "grad_norm": 325.9103088378906, "learning_rate": 8.868979947570773e-06, "loss": 19.9232, "step": 148260 }, { "epoch": 0.29951478080293475, "grad_norm": 258.52911376953125, "learning_rate": 8.868758827652075e-06, "loss": 15.1748, "step": 148270 }, { "epoch": 0.2995349814356186, "grad_norm": 276.8941650390625, "learning_rate": 8.868537688877516e-06, "loss": 20.8251, "step": 148280 }, { "epoch": 0.2995551820683024, "grad_norm": 339.92535400390625, "learning_rate": 8.868316531248173e-06, "loss": 20.9988, "step": 148290 }, { "epoch": 0.2995753827009862, "grad_norm": 368.423828125, "learning_rate": 8.868095354765125e-06, "loss": 17.6806, "step": 148300 }, { "epoch": 0.29959558333367003, "grad_norm": 317.762451171875, "learning_rate": 8.867874159429448e-06, "loss": 17.0899, "step": 148310 }, { "epoch": 0.2996157839663538, "grad_norm": 327.7456970214844, "learning_rate": 8.867652945242225e-06, "loss": 16.8876, "step": 148320 }, { "epoch": 0.2996359845990376, "grad_norm": 398.0007019042969, "learning_rate": 8.867431712204527e-06, "loss": 24.0711, "step": 148330 }, { "epoch": 0.29965618523172144, "grad_norm": 585.3739013671875, "learning_rate": 8.867210460317437e-06, "loss": 29.1485, "step": 148340 }, { "epoch": 0.29967638586440526, "grad_norm": 696.1988525390625, "learning_rate": 8.866989189582033e-06, "loss": 34.2216, "step": 148350 }, { "epoch": 0.2996965864970891, "grad_norm": 180.8604278564453, "learning_rate": 8.86676789999939e-06, "loss": 15.586, "step": 148360 }, { "epoch": 0.2997167871297729, "grad_norm": 133.32186889648438, "learning_rate": 8.866546591570593e-06, "loss": 8.6767, "step": 148370 }, { "epoch": 0.2997369877624567, "grad_norm": 92.20783996582031, "learning_rate": 8.866325264296716e-06, "loss": 16.2744, "step": 148380 }, { "epoch": 0.29975718839514054, "grad_norm": 262.6266784667969, "learning_rate": 8.866103918178837e-06, "loss": 15.425, "step": 148390 }, { "epoch": 0.29977738902782436, "grad_norm": 256.6765441894531, "learning_rate": 8.865882553218036e-06, "loss": 22.1526, "step": 148400 }, { "epoch": 0.2997975896605082, "grad_norm": 77.33412170410156, "learning_rate": 8.865661169415396e-06, "loss": 15.2944, "step": 148410 }, { "epoch": 0.299817790293192, "grad_norm": 179.73977661132812, "learning_rate": 8.865439766771988e-06, "loss": 12.611, "step": 148420 }, { "epoch": 0.2998379909258758, "grad_norm": 157.46200561523438, "learning_rate": 8.865218345288897e-06, "loss": 14.0017, "step": 148430 }, { "epoch": 0.29985819155855964, "grad_norm": 208.48617553710938, "learning_rate": 8.864996904967202e-06, "loss": 23.7895, "step": 148440 }, { "epoch": 0.2998783921912434, "grad_norm": 195.89364624023438, "learning_rate": 8.864775445807979e-06, "loss": 19.5676, "step": 148450 }, { "epoch": 0.2998985928239272, "grad_norm": 114.60022735595703, "learning_rate": 8.86455396781231e-06, "loss": 21.4891, "step": 148460 }, { "epoch": 0.29991879345661104, "grad_norm": 181.90753173828125, "learning_rate": 8.864332470981274e-06, "loss": 18.0884, "step": 148470 }, { "epoch": 0.29993899408929486, "grad_norm": 379.0750427246094, "learning_rate": 8.86411095531595e-06, "loss": 23.3915, "step": 148480 }, { "epoch": 0.2999591947219787, "grad_norm": 39.434322357177734, "learning_rate": 8.863889420817418e-06, "loss": 18.8518, "step": 148490 }, { "epoch": 0.2999793953546625, "grad_norm": 214.86141967773438, "learning_rate": 8.863667867486756e-06, "loss": 15.0252, "step": 148500 }, { "epoch": 0.2999995959873463, "grad_norm": 702.7595825195312, "learning_rate": 8.863446295325047e-06, "loss": 31.8621, "step": 148510 }, { "epoch": 0.30001979662003014, "grad_norm": 306.45257568359375, "learning_rate": 8.863224704333368e-06, "loss": 16.2856, "step": 148520 }, { "epoch": 0.30003999725271396, "grad_norm": 336.00836181640625, "learning_rate": 8.863003094512801e-06, "loss": 18.7955, "step": 148530 }, { "epoch": 0.3000601978853978, "grad_norm": 200.58193969726562, "learning_rate": 8.862781465864427e-06, "loss": 18.7465, "step": 148540 }, { "epoch": 0.3000803985180816, "grad_norm": 196.11317443847656, "learning_rate": 8.862559818389322e-06, "loss": 22.5644, "step": 148550 }, { "epoch": 0.3001005991507654, "grad_norm": 323.56439208984375, "learning_rate": 8.862338152088573e-06, "loss": 30.9142, "step": 148560 }, { "epoch": 0.30012079978344924, "grad_norm": 226.4674072265625, "learning_rate": 8.862116466963251e-06, "loss": 22.0332, "step": 148570 }, { "epoch": 0.300141000416133, "grad_norm": 355.0110778808594, "learning_rate": 8.861894763014444e-06, "loss": 15.9151, "step": 148580 }, { "epoch": 0.30016120104881683, "grad_norm": 181.67884826660156, "learning_rate": 8.861673040243231e-06, "loss": 22.8355, "step": 148590 }, { "epoch": 0.30018140168150065, "grad_norm": 181.09938049316406, "learning_rate": 8.861451298650692e-06, "loss": 19.4519, "step": 148600 }, { "epoch": 0.30020160231418447, "grad_norm": 513.7516479492188, "learning_rate": 8.861229538237908e-06, "loss": 22.9138, "step": 148610 }, { "epoch": 0.3002218029468683, "grad_norm": 60.56475067138672, "learning_rate": 8.861007759005959e-06, "loss": 39.4335, "step": 148620 }, { "epoch": 0.3002420035795521, "grad_norm": 266.1932067871094, "learning_rate": 8.860785960955926e-06, "loss": 32.3683, "step": 148630 }, { "epoch": 0.30026220421223593, "grad_norm": 435.8598937988281, "learning_rate": 8.860564144088891e-06, "loss": 22.5814, "step": 148640 }, { "epoch": 0.30028240484491975, "grad_norm": 177.30267333984375, "learning_rate": 8.860342308405933e-06, "loss": 44.1829, "step": 148650 }, { "epoch": 0.30030260547760357, "grad_norm": 67.58795166015625, "learning_rate": 8.860120453908138e-06, "loss": 40.7773, "step": 148660 }, { "epoch": 0.3003228061102874, "grad_norm": 217.70040893554688, "learning_rate": 8.859898580596581e-06, "loss": 14.8111, "step": 148670 }, { "epoch": 0.3003430067429712, "grad_norm": 448.2027587890625, "learning_rate": 8.859676688472349e-06, "loss": 31.2585, "step": 148680 }, { "epoch": 0.30036320737565503, "grad_norm": 24.141624450683594, "learning_rate": 8.85945477753652e-06, "loss": 23.974, "step": 148690 }, { "epoch": 0.30038340800833885, "grad_norm": 332.75189208984375, "learning_rate": 8.859232847790175e-06, "loss": 26.4986, "step": 148700 }, { "epoch": 0.3004036086410226, "grad_norm": 329.08935546875, "learning_rate": 8.859010899234399e-06, "loss": 15.5443, "step": 148710 }, { "epoch": 0.30042380927370643, "grad_norm": 516.3297729492188, "learning_rate": 8.85878893187027e-06, "loss": 20.6526, "step": 148720 }, { "epoch": 0.30044400990639025, "grad_norm": 145.7822265625, "learning_rate": 8.858566945698874e-06, "loss": 22.7873, "step": 148730 }, { "epoch": 0.3004642105390741, "grad_norm": 337.9742431640625, "learning_rate": 8.858344940721291e-06, "loss": 32.881, "step": 148740 }, { "epoch": 0.3004844111717579, "grad_norm": 250.65277099609375, "learning_rate": 8.858122916938601e-06, "loss": 25.0514, "step": 148750 }, { "epoch": 0.3005046118044417, "grad_norm": 515.59814453125, "learning_rate": 8.857900874351888e-06, "loss": 14.5908, "step": 148760 }, { "epoch": 0.30052481243712553, "grad_norm": 393.79498291015625, "learning_rate": 8.857678812962234e-06, "loss": 40.1108, "step": 148770 }, { "epoch": 0.30054501306980935, "grad_norm": 29.774646759033203, "learning_rate": 8.857456732770723e-06, "loss": 10.3275, "step": 148780 }, { "epoch": 0.3005652137024932, "grad_norm": 116.4747314453125, "learning_rate": 8.857234633778434e-06, "loss": 25.3463, "step": 148790 }, { "epoch": 0.300585414335177, "grad_norm": 473.7539978027344, "learning_rate": 8.857012515986452e-06, "loss": 20.4503, "step": 148800 }, { "epoch": 0.3006056149678608, "grad_norm": 261.7532043457031, "learning_rate": 8.856790379395858e-06, "loss": 16.4797, "step": 148810 }, { "epoch": 0.30062581560054463, "grad_norm": 688.706787109375, "learning_rate": 8.856568224007736e-06, "loss": 29.3886, "step": 148820 }, { "epoch": 0.3006460162332284, "grad_norm": 194.765380859375, "learning_rate": 8.856346049823169e-06, "loss": 19.9138, "step": 148830 }, { "epoch": 0.3006662168659122, "grad_norm": 108.89417266845703, "learning_rate": 8.856123856843236e-06, "loss": 18.6424, "step": 148840 }, { "epoch": 0.30068641749859604, "grad_norm": 90.82894897460938, "learning_rate": 8.855901645069026e-06, "loss": 31.6736, "step": 148850 }, { "epoch": 0.30070661813127986, "grad_norm": 144.2578125, "learning_rate": 8.855679414501619e-06, "loss": 17.0635, "step": 148860 }, { "epoch": 0.3007268187639637, "grad_norm": 513.58203125, "learning_rate": 8.855457165142096e-06, "loss": 14.2035, "step": 148870 }, { "epoch": 0.3007470193966475, "grad_norm": 609.9279174804688, "learning_rate": 8.855234896991544e-06, "loss": 35.6088, "step": 148880 }, { "epoch": 0.3007672200293313, "grad_norm": 46.75633239746094, "learning_rate": 8.855012610051045e-06, "loss": 18.8222, "step": 148890 }, { "epoch": 0.30078742066201514, "grad_norm": 90.35652923583984, "learning_rate": 8.854790304321682e-06, "loss": 12.0893, "step": 148900 }, { "epoch": 0.30080762129469896, "grad_norm": 297.77789306640625, "learning_rate": 8.854567979804538e-06, "loss": 14.7146, "step": 148910 }, { "epoch": 0.3008278219273828, "grad_norm": 346.3417053222656, "learning_rate": 8.854345636500698e-06, "loss": 26.3755, "step": 148920 }, { "epoch": 0.3008480225600666, "grad_norm": 188.66651916503906, "learning_rate": 8.854123274411243e-06, "loss": 18.2245, "step": 148930 }, { "epoch": 0.3008682231927504, "grad_norm": 349.3143005371094, "learning_rate": 8.85390089353726e-06, "loss": 19.7513, "step": 148940 }, { "epoch": 0.30088842382543424, "grad_norm": 448.4359130859375, "learning_rate": 8.853678493879832e-06, "loss": 39.0152, "step": 148950 }, { "epoch": 0.300908624458118, "grad_norm": 168.58511352539062, "learning_rate": 8.853456075440041e-06, "loss": 19.9283, "step": 148960 }, { "epoch": 0.3009288250908018, "grad_norm": 81.2247314453125, "learning_rate": 8.853233638218974e-06, "loss": 25.9647, "step": 148970 }, { "epoch": 0.30094902572348564, "grad_norm": 180.07611083984375, "learning_rate": 8.853011182217712e-06, "loss": 11.0252, "step": 148980 }, { "epoch": 0.30096922635616946, "grad_norm": 169.720458984375, "learning_rate": 8.852788707437343e-06, "loss": 18.0168, "step": 148990 }, { "epoch": 0.3009894269888533, "grad_norm": 43.225494384765625, "learning_rate": 8.852566213878947e-06, "loss": 18.6645, "step": 149000 }, { "epoch": 0.3010096276215371, "grad_norm": 509.3212585449219, "learning_rate": 8.852343701543611e-06, "loss": 17.5929, "step": 149010 }, { "epoch": 0.3010298282542209, "grad_norm": 682.3831176757812, "learning_rate": 8.85212117043242e-06, "loss": 33.0887, "step": 149020 }, { "epoch": 0.30105002888690474, "grad_norm": 284.67645263671875, "learning_rate": 8.851898620546456e-06, "loss": 14.784, "step": 149030 }, { "epoch": 0.30107022951958856, "grad_norm": 211.795654296875, "learning_rate": 8.851676051886805e-06, "loss": 27.9248, "step": 149040 }, { "epoch": 0.3010904301522724, "grad_norm": 240.46051025390625, "learning_rate": 8.851453464454555e-06, "loss": 53.266, "step": 149050 }, { "epoch": 0.3011106307849562, "grad_norm": 118.68473815917969, "learning_rate": 8.851230858250785e-06, "loss": 14.8748, "step": 149060 }, { "epoch": 0.30113083141764, "grad_norm": 187.1436767578125, "learning_rate": 8.851008233276586e-06, "loss": 22.8978, "step": 149070 }, { "epoch": 0.30115103205032384, "grad_norm": 360.6593017578125, "learning_rate": 8.850785589533038e-06, "loss": 23.2066, "step": 149080 }, { "epoch": 0.3011712326830076, "grad_norm": 564.84765625, "learning_rate": 8.850562927021227e-06, "loss": 24.929, "step": 149090 }, { "epoch": 0.3011914333156914, "grad_norm": 240.23902893066406, "learning_rate": 8.85034024574224e-06, "loss": 25.0695, "step": 149100 }, { "epoch": 0.30121163394837525, "grad_norm": 49.229530334472656, "learning_rate": 8.850117545697163e-06, "loss": 20.8541, "step": 149110 }, { "epoch": 0.30123183458105907, "grad_norm": 872.9815063476562, "learning_rate": 8.849894826887078e-06, "loss": 26.2246, "step": 149120 }, { "epoch": 0.3012520352137429, "grad_norm": 392.39801025390625, "learning_rate": 8.849672089313074e-06, "loss": 18.9245, "step": 149130 }, { "epoch": 0.3012722358464267, "grad_norm": 443.2808837890625, "learning_rate": 8.849449332976235e-06, "loss": 26.5645, "step": 149140 }, { "epoch": 0.3012924364791105, "grad_norm": 298.9245300292969, "learning_rate": 8.849226557877647e-06, "loss": 21.537, "step": 149150 }, { "epoch": 0.30131263711179435, "grad_norm": 478.32098388671875, "learning_rate": 8.849003764018395e-06, "loss": 19.366, "step": 149160 }, { "epoch": 0.30133283774447817, "grad_norm": 286.066162109375, "learning_rate": 8.848780951399566e-06, "loss": 22.9028, "step": 149170 }, { "epoch": 0.301353038377162, "grad_norm": 210.3717498779297, "learning_rate": 8.848558120022246e-06, "loss": 16.958, "step": 149180 }, { "epoch": 0.3013732390098458, "grad_norm": 124.34255981445312, "learning_rate": 8.84833526988752e-06, "loss": 24.1864, "step": 149190 }, { "epoch": 0.3013934396425296, "grad_norm": 403.6701354980469, "learning_rate": 8.848112400996473e-06, "loss": 24.6661, "step": 149200 }, { "epoch": 0.30141364027521345, "grad_norm": 315.61077880859375, "learning_rate": 8.847889513350195e-06, "loss": 19.3281, "step": 149210 }, { "epoch": 0.3014338409078972, "grad_norm": 519.0108032226562, "learning_rate": 8.847666606949768e-06, "loss": 32.7549, "step": 149220 }, { "epoch": 0.30145404154058103, "grad_norm": 93.99861907958984, "learning_rate": 8.847443681796283e-06, "loss": 25.5197, "step": 149230 }, { "epoch": 0.30147424217326485, "grad_norm": 165.79452514648438, "learning_rate": 8.847220737890823e-06, "loss": 15.9813, "step": 149240 }, { "epoch": 0.30149444280594867, "grad_norm": 271.00506591796875, "learning_rate": 8.846997775234476e-06, "loss": 16.061, "step": 149250 }, { "epoch": 0.3015146434386325, "grad_norm": 136.71595764160156, "learning_rate": 8.846774793828328e-06, "loss": 21.9985, "step": 149260 }, { "epoch": 0.3015348440713163, "grad_norm": 102.89586639404297, "learning_rate": 8.846551793673467e-06, "loss": 11.9977, "step": 149270 }, { "epoch": 0.30155504470400013, "grad_norm": 626.992919921875, "learning_rate": 8.846328774770978e-06, "loss": 19.0882, "step": 149280 }, { "epoch": 0.30157524533668395, "grad_norm": 268.0168151855469, "learning_rate": 8.84610573712195e-06, "loss": 27.2602, "step": 149290 }, { "epoch": 0.30159544596936777, "grad_norm": 109.54322052001953, "learning_rate": 8.84588268072747e-06, "loss": 19.3889, "step": 149300 }, { "epoch": 0.3016156466020516, "grad_norm": 57.0951042175293, "learning_rate": 8.845659605588622e-06, "loss": 10.5121, "step": 149310 }, { "epoch": 0.3016358472347354, "grad_norm": 262.10040283203125, "learning_rate": 8.845436511706497e-06, "loss": 24.4718, "step": 149320 }, { "epoch": 0.30165604786741923, "grad_norm": 1235.91015625, "learning_rate": 8.84521339908218e-06, "loss": 24.8839, "step": 149330 }, { "epoch": 0.30167624850010305, "grad_norm": 315.8169860839844, "learning_rate": 8.84499026771676e-06, "loss": 34.648, "step": 149340 }, { "epoch": 0.3016964491327868, "grad_norm": 454.62066650390625, "learning_rate": 8.844767117611324e-06, "loss": 24.4548, "step": 149350 }, { "epoch": 0.30171664976547063, "grad_norm": 301.01788330078125, "learning_rate": 8.844543948766958e-06, "loss": 15.4215, "step": 149360 }, { "epoch": 0.30173685039815445, "grad_norm": 2607.0302734375, "learning_rate": 8.844320761184753e-06, "loss": 17.549, "step": 149370 }, { "epoch": 0.3017570510308383, "grad_norm": 366.6160583496094, "learning_rate": 8.844097554865792e-06, "loss": 28.4288, "step": 149380 }, { "epoch": 0.3017772516635221, "grad_norm": 719.3068237304688, "learning_rate": 8.84387432981117e-06, "loss": 19.5414, "step": 149390 }, { "epoch": 0.3017974522962059, "grad_norm": 311.5763244628906, "learning_rate": 8.843651086021966e-06, "loss": 17.2605, "step": 149400 }, { "epoch": 0.30181765292888973, "grad_norm": 313.7657470703125, "learning_rate": 8.843427823499275e-06, "loss": 12.724, "step": 149410 }, { "epoch": 0.30183785356157355, "grad_norm": 161.529052734375, "learning_rate": 8.843204542244184e-06, "loss": 16.3962, "step": 149420 }, { "epoch": 0.3018580541942574, "grad_norm": 566.6034545898438, "learning_rate": 8.842981242257779e-06, "loss": 14.3367, "step": 149430 }, { "epoch": 0.3018782548269412, "grad_norm": 229.3406524658203, "learning_rate": 8.84275792354115e-06, "loss": 27.4325, "step": 149440 }, { "epoch": 0.301898455459625, "grad_norm": 389.46063232421875, "learning_rate": 8.842534586095383e-06, "loss": 23.4529, "step": 149450 }, { "epoch": 0.30191865609230883, "grad_norm": 222.732666015625, "learning_rate": 8.842311229921571e-06, "loss": 17.127, "step": 149460 }, { "epoch": 0.3019388567249926, "grad_norm": 239.08619689941406, "learning_rate": 8.8420878550208e-06, "loss": 21.0125, "step": 149470 }, { "epoch": 0.3019590573576764, "grad_norm": 215.65081787109375, "learning_rate": 8.841864461394158e-06, "loss": 23.8589, "step": 149480 }, { "epoch": 0.30197925799036024, "grad_norm": 43.942481994628906, "learning_rate": 8.841641049042732e-06, "loss": 33.5604, "step": 149490 }, { "epoch": 0.30199945862304406, "grad_norm": 549.9864501953125, "learning_rate": 8.841417617967618e-06, "loss": 39.1823, "step": 149500 }, { "epoch": 0.3020196592557279, "grad_norm": 176.6044464111328, "learning_rate": 8.841194168169897e-06, "loss": 15.8606, "step": 149510 }, { "epoch": 0.3020398598884117, "grad_norm": 351.5376281738281, "learning_rate": 8.840970699650665e-06, "loss": 23.5694, "step": 149520 }, { "epoch": 0.3020600605210955, "grad_norm": 168.21014404296875, "learning_rate": 8.840747212411005e-06, "loss": 19.1928, "step": 149530 }, { "epoch": 0.30208026115377934, "grad_norm": 330.1772766113281, "learning_rate": 8.84052370645201e-06, "loss": 41.1221, "step": 149540 }, { "epoch": 0.30210046178646316, "grad_norm": 380.2062072753906, "learning_rate": 8.840300181774767e-06, "loss": 19.8514, "step": 149550 }, { "epoch": 0.302120662419147, "grad_norm": 201.8834991455078, "learning_rate": 8.840076638380368e-06, "loss": 22.2479, "step": 149560 }, { "epoch": 0.3021408630518308, "grad_norm": 333.9659118652344, "learning_rate": 8.8398530762699e-06, "loss": 12.4911, "step": 149570 }, { "epoch": 0.3021610636845146, "grad_norm": 174.77012634277344, "learning_rate": 8.839629495444455e-06, "loss": 15.5716, "step": 149580 }, { "epoch": 0.30218126431719844, "grad_norm": 228.39297485351562, "learning_rate": 8.83940589590512e-06, "loss": 25.9822, "step": 149590 }, { "epoch": 0.3022014649498822, "grad_norm": 588.8809814453125, "learning_rate": 8.83918227765299e-06, "loss": 16.0205, "step": 149600 }, { "epoch": 0.302221665582566, "grad_norm": 546.7249755859375, "learning_rate": 8.838958640689146e-06, "loss": 19.8234, "step": 149610 }, { "epoch": 0.30224186621524984, "grad_norm": 142.4442596435547, "learning_rate": 8.838734985014686e-06, "loss": 9.1933, "step": 149620 }, { "epoch": 0.30226206684793366, "grad_norm": 174.6310272216797, "learning_rate": 8.838511310630697e-06, "loss": 16.2006, "step": 149630 }, { "epoch": 0.3022822674806175, "grad_norm": 269.0298156738281, "learning_rate": 8.83828761753827e-06, "loss": 11.3419, "step": 149640 }, { "epoch": 0.3023024681133013, "grad_norm": 579.0819091796875, "learning_rate": 8.838063905738495e-06, "loss": 29.8704, "step": 149650 }, { "epoch": 0.3023226687459851, "grad_norm": 272.4783630371094, "learning_rate": 8.83784017523246e-06, "loss": 18.0082, "step": 149660 }, { "epoch": 0.30234286937866894, "grad_norm": 112.41419219970703, "learning_rate": 8.837616426021259e-06, "loss": 30.8317, "step": 149670 }, { "epoch": 0.30236307001135276, "grad_norm": 310.1944274902344, "learning_rate": 8.837392658105981e-06, "loss": 15.0273, "step": 149680 }, { "epoch": 0.3023832706440366, "grad_norm": 115.42916870117188, "learning_rate": 8.837168871487715e-06, "loss": 22.3966, "step": 149690 }, { "epoch": 0.3024034712767204, "grad_norm": 263.5594482421875, "learning_rate": 8.836945066167556e-06, "loss": 20.2742, "step": 149700 }, { "epoch": 0.3024236719094042, "grad_norm": 207.4596710205078, "learning_rate": 8.83672124214659e-06, "loss": 45.9726, "step": 149710 }, { "epoch": 0.30244387254208804, "grad_norm": 629.0947265625, "learning_rate": 8.83649739942591e-06, "loss": 13.5692, "step": 149720 }, { "epoch": 0.3024640731747718, "grad_norm": 295.7816162109375, "learning_rate": 8.836273538006608e-06, "loss": 18.6016, "step": 149730 }, { "epoch": 0.30248427380745563, "grad_norm": 128.1962432861328, "learning_rate": 8.836049657889774e-06, "loss": 22.346, "step": 149740 }, { "epoch": 0.30250447444013945, "grad_norm": 415.4438781738281, "learning_rate": 8.8358257590765e-06, "loss": 15.8932, "step": 149750 }, { "epoch": 0.30252467507282327, "grad_norm": 316.4052429199219, "learning_rate": 8.835601841567874e-06, "loss": 19.9089, "step": 149760 }, { "epoch": 0.3025448757055071, "grad_norm": 560.1199340820312, "learning_rate": 8.835377905364992e-06, "loss": 12.1495, "step": 149770 }, { "epoch": 0.3025650763381909, "grad_norm": 1260.89404296875, "learning_rate": 8.835153950468943e-06, "loss": 42.023, "step": 149780 }, { "epoch": 0.30258527697087473, "grad_norm": 244.52769470214844, "learning_rate": 8.834929976880818e-06, "loss": 22.3389, "step": 149790 }, { "epoch": 0.30260547760355855, "grad_norm": 395.49908447265625, "learning_rate": 8.834705984601708e-06, "loss": 15.8753, "step": 149800 }, { "epoch": 0.30262567823624237, "grad_norm": 401.11737060546875, "learning_rate": 8.834481973632709e-06, "loss": 26.6827, "step": 149810 }, { "epoch": 0.3026458788689262, "grad_norm": 575.2360229492188, "learning_rate": 8.83425794397491e-06, "loss": 18.1424, "step": 149820 }, { "epoch": 0.30266607950161, "grad_norm": 100.06526184082031, "learning_rate": 8.8340338956294e-06, "loss": 20.4223, "step": 149830 }, { "epoch": 0.30268628013429383, "grad_norm": 426.4596862792969, "learning_rate": 8.833809828597275e-06, "loss": 12.0891, "step": 149840 }, { "epoch": 0.30270648076697765, "grad_norm": 261.0762023925781, "learning_rate": 8.833585742879627e-06, "loss": 19.8122, "step": 149850 }, { "epoch": 0.3027266813996614, "grad_norm": 180.52444458007812, "learning_rate": 8.833361638477546e-06, "loss": 24.3726, "step": 149860 }, { "epoch": 0.30274688203234523, "grad_norm": 346.71209716796875, "learning_rate": 8.833137515392125e-06, "loss": 50.2721, "step": 149870 }, { "epoch": 0.30276708266502905, "grad_norm": 773.8203735351562, "learning_rate": 8.832913373624458e-06, "loss": 26.9615, "step": 149880 }, { "epoch": 0.3027872832977129, "grad_norm": 430.3808898925781, "learning_rate": 8.832689213175636e-06, "loss": 23.6458, "step": 149890 }, { "epoch": 0.3028074839303967, "grad_norm": 230.22708129882812, "learning_rate": 8.83246503404675e-06, "loss": 14.837, "step": 149900 }, { "epoch": 0.3028276845630805, "grad_norm": 370.8480224609375, "learning_rate": 8.832240836238894e-06, "loss": 24.8092, "step": 149910 }, { "epoch": 0.30284788519576433, "grad_norm": 183.66943359375, "learning_rate": 8.832016619753164e-06, "loss": 29.1511, "step": 149920 }, { "epoch": 0.30286808582844815, "grad_norm": 149.44178771972656, "learning_rate": 8.831792384590646e-06, "loss": 17.5413, "step": 149930 }, { "epoch": 0.302888286461132, "grad_norm": 158.71385192871094, "learning_rate": 8.831568130752439e-06, "loss": 21.1063, "step": 149940 }, { "epoch": 0.3029084870938158, "grad_norm": 162.0598907470703, "learning_rate": 8.831343858239634e-06, "loss": 9.443, "step": 149950 }, { "epoch": 0.3029286877264996, "grad_norm": 300.6808166503906, "learning_rate": 8.831119567053323e-06, "loss": 19.1811, "step": 149960 }, { "epoch": 0.30294888835918343, "grad_norm": 588.0015258789062, "learning_rate": 8.8308952571946e-06, "loss": 28.6252, "step": 149970 }, { "epoch": 0.30296908899186725, "grad_norm": 257.81170654296875, "learning_rate": 8.830670928664558e-06, "loss": 19.7015, "step": 149980 }, { "epoch": 0.302989289624551, "grad_norm": 183.21026611328125, "learning_rate": 8.83044658146429e-06, "loss": 18.0776, "step": 149990 }, { "epoch": 0.30300949025723484, "grad_norm": 343.0096435546875, "learning_rate": 8.83022221559489e-06, "loss": 16.5193, "step": 150000 }, { "epoch": 0.30302969088991866, "grad_norm": 427.2884826660156, "learning_rate": 8.829997831057454e-06, "loss": 34.213, "step": 150010 }, { "epoch": 0.3030498915226025, "grad_norm": 949.879150390625, "learning_rate": 8.82977342785307e-06, "loss": 29.5077, "step": 150020 }, { "epoch": 0.3030700921552863, "grad_norm": 156.7051239013672, "learning_rate": 8.829549005982836e-06, "loss": 23.2333, "step": 150030 }, { "epoch": 0.3030902927879701, "grad_norm": 0.0, "learning_rate": 8.829324565447844e-06, "loss": 10.9345, "step": 150040 }, { "epoch": 0.30311049342065394, "grad_norm": 252.72640991210938, "learning_rate": 8.829100106249189e-06, "loss": 21.2051, "step": 150050 }, { "epoch": 0.30313069405333776, "grad_norm": 491.263671875, "learning_rate": 8.828875628387964e-06, "loss": 18.8556, "step": 150060 }, { "epoch": 0.3031508946860216, "grad_norm": 545.97119140625, "learning_rate": 8.828651131865264e-06, "loss": 11.3543, "step": 150070 }, { "epoch": 0.3031710953187054, "grad_norm": 339.7392883300781, "learning_rate": 8.828426616682184e-06, "loss": 24.9122, "step": 150080 }, { "epoch": 0.3031912959513892, "grad_norm": 449.7311706542969, "learning_rate": 8.828202082839816e-06, "loss": 26.5837, "step": 150090 }, { "epoch": 0.30321149658407304, "grad_norm": 333.8340759277344, "learning_rate": 8.827977530339254e-06, "loss": 16.1228, "step": 150100 }, { "epoch": 0.3032316972167568, "grad_norm": 236.1326446533203, "learning_rate": 8.827752959181594e-06, "loss": 33.9399, "step": 150110 }, { "epoch": 0.3032518978494406, "grad_norm": 29.79181671142578, "learning_rate": 8.827528369367932e-06, "loss": 19.8086, "step": 150120 }, { "epoch": 0.30327209848212444, "grad_norm": 231.19970703125, "learning_rate": 8.82730376089936e-06, "loss": 35.4353, "step": 150130 }, { "epoch": 0.30329229911480826, "grad_norm": 483.08441162109375, "learning_rate": 8.827079133776975e-06, "loss": 40.9494, "step": 150140 }, { "epoch": 0.3033124997474921, "grad_norm": 271.4341125488281, "learning_rate": 8.826854488001869e-06, "loss": 20.7409, "step": 150150 }, { "epoch": 0.3033327003801759, "grad_norm": 130.07701110839844, "learning_rate": 8.826629823575138e-06, "loss": 16.1849, "step": 150160 }, { "epoch": 0.3033529010128597, "grad_norm": 229.5166473388672, "learning_rate": 8.826405140497878e-06, "loss": 20.0643, "step": 150170 }, { "epoch": 0.30337310164554354, "grad_norm": 437.6599426269531, "learning_rate": 8.826180438771184e-06, "loss": 10.3247, "step": 150180 }, { "epoch": 0.30339330227822736, "grad_norm": 384.4480895996094, "learning_rate": 8.82595571839615e-06, "loss": 18.5568, "step": 150190 }, { "epoch": 0.3034135029109112, "grad_norm": 224.5307159423828, "learning_rate": 8.825730979373873e-06, "loss": 17.4975, "step": 150200 }, { "epoch": 0.303433703543595, "grad_norm": 358.0045166015625, "learning_rate": 8.825506221705445e-06, "loss": 29.8943, "step": 150210 }, { "epoch": 0.3034539041762788, "grad_norm": 231.1409912109375, "learning_rate": 8.825281445391965e-06, "loss": 27.0507, "step": 150220 }, { "epoch": 0.30347410480896264, "grad_norm": 201.58119201660156, "learning_rate": 8.825056650434528e-06, "loss": 33.327, "step": 150230 }, { "epoch": 0.3034943054416464, "grad_norm": 277.3409118652344, "learning_rate": 8.824831836834227e-06, "loss": 28.443, "step": 150240 }, { "epoch": 0.3035145060743302, "grad_norm": 604.5170288085938, "learning_rate": 8.824607004592161e-06, "loss": 39.9601, "step": 150250 }, { "epoch": 0.30353470670701405, "grad_norm": 393.2569580078125, "learning_rate": 8.824382153709423e-06, "loss": 9.601, "step": 150260 }, { "epoch": 0.30355490733969787, "grad_norm": 436.1141662597656, "learning_rate": 8.824157284187111e-06, "loss": 12.8409, "step": 150270 }, { "epoch": 0.3035751079723817, "grad_norm": 229.76748657226562, "learning_rate": 8.82393239602632e-06, "loss": 16.3983, "step": 150280 }, { "epoch": 0.3035953086050655, "grad_norm": 236.46310424804688, "learning_rate": 8.823707489228145e-06, "loss": 26.433, "step": 150290 }, { "epoch": 0.3036155092377493, "grad_norm": 271.25970458984375, "learning_rate": 8.823482563793687e-06, "loss": 20.2384, "step": 150300 }, { "epoch": 0.30363570987043315, "grad_norm": 348.17724609375, "learning_rate": 8.823257619724036e-06, "loss": 16.1915, "step": 150310 }, { "epoch": 0.30365591050311697, "grad_norm": 409.085205078125, "learning_rate": 8.82303265702029e-06, "loss": 11.8221, "step": 150320 }, { "epoch": 0.3036761111358008, "grad_norm": 208.2102813720703, "learning_rate": 8.82280767568355e-06, "loss": 14.8738, "step": 150330 }, { "epoch": 0.3036963117684846, "grad_norm": 322.634765625, "learning_rate": 8.822582675714906e-06, "loss": 28.5634, "step": 150340 }, { "epoch": 0.3037165124011684, "grad_norm": 444.4468994140625, "learning_rate": 8.82235765711546e-06, "loss": 31.7822, "step": 150350 }, { "epoch": 0.30373671303385225, "grad_norm": 276.8089904785156, "learning_rate": 8.822132619886303e-06, "loss": 14.0243, "step": 150360 }, { "epoch": 0.303756913666536, "grad_norm": 160.05136108398438, "learning_rate": 8.821907564028538e-06, "loss": 15.7162, "step": 150370 }, { "epoch": 0.30377711429921983, "grad_norm": 341.22698974609375, "learning_rate": 8.821682489543259e-06, "loss": 40.8517, "step": 150380 }, { "epoch": 0.30379731493190365, "grad_norm": 1298.8233642578125, "learning_rate": 8.821457396431563e-06, "loss": 38.0791, "step": 150390 }, { "epoch": 0.30381751556458747, "grad_norm": 326.71197509765625, "learning_rate": 8.821232284694545e-06, "loss": 12.4267, "step": 150400 }, { "epoch": 0.3038377161972713, "grad_norm": 133.72940063476562, "learning_rate": 8.821007154333308e-06, "loss": 12.2558, "step": 150410 }, { "epoch": 0.3038579168299551, "grad_norm": 115.3331298828125, "learning_rate": 8.820782005348943e-06, "loss": 33.7463, "step": 150420 }, { "epoch": 0.30387811746263893, "grad_norm": 418.9744873046875, "learning_rate": 8.82055683774255e-06, "loss": 17.2669, "step": 150430 }, { "epoch": 0.30389831809532275, "grad_norm": 282.3912048339844, "learning_rate": 8.820331651515226e-06, "loss": 20.8254, "step": 150440 }, { "epoch": 0.30391851872800657, "grad_norm": 320.9784240722656, "learning_rate": 8.820106446668071e-06, "loss": 11.1142, "step": 150450 }, { "epoch": 0.3039387193606904, "grad_norm": 179.88291931152344, "learning_rate": 8.819881223202179e-06, "loss": 22.7958, "step": 150460 }, { "epoch": 0.3039589199933742, "grad_norm": 117.36296844482422, "learning_rate": 8.819655981118649e-06, "loss": 26.0957, "step": 150470 }, { "epoch": 0.30397912062605803, "grad_norm": 240.52609252929688, "learning_rate": 8.819430720418579e-06, "loss": 26.5956, "step": 150480 }, { "epoch": 0.30399932125874185, "grad_norm": 3.9979071617126465, "learning_rate": 8.819205441103067e-06, "loss": 7.5535, "step": 150490 }, { "epoch": 0.3040195218914256, "grad_norm": 518.8795166015625, "learning_rate": 8.818980143173212e-06, "loss": 18.7704, "step": 150500 }, { "epoch": 0.30403972252410943, "grad_norm": 289.16851806640625, "learning_rate": 8.818754826630109e-06, "loss": 16.7201, "step": 150510 }, { "epoch": 0.30405992315679325, "grad_norm": 356.1964111328125, "learning_rate": 8.81852949147486e-06, "loss": 25.1481, "step": 150520 }, { "epoch": 0.3040801237894771, "grad_norm": 367.7644958496094, "learning_rate": 8.81830413770856e-06, "loss": 16.5194, "step": 150530 }, { "epoch": 0.3041003244221609, "grad_norm": 238.68505859375, "learning_rate": 8.818078765332309e-06, "loss": 18.7244, "step": 150540 }, { "epoch": 0.3041205250548447, "grad_norm": 149.02244567871094, "learning_rate": 8.817853374347208e-06, "loss": 12.5125, "step": 150550 }, { "epoch": 0.30414072568752853, "grad_norm": 268.00592041015625, "learning_rate": 8.81762796475435e-06, "loss": 20.7386, "step": 150560 }, { "epoch": 0.30416092632021235, "grad_norm": 553.5607299804688, "learning_rate": 8.817402536554835e-06, "loss": 25.3672, "step": 150570 }, { "epoch": 0.3041811269528962, "grad_norm": 221.80259704589844, "learning_rate": 8.817177089749766e-06, "loss": 17.6804, "step": 150580 }, { "epoch": 0.30420132758558, "grad_norm": 37.90202331542969, "learning_rate": 8.816951624340238e-06, "loss": 20.1902, "step": 150590 }, { "epoch": 0.3042215282182638, "grad_norm": 199.37571716308594, "learning_rate": 8.81672614032735e-06, "loss": 8.5211, "step": 150600 }, { "epoch": 0.30424172885094763, "grad_norm": 366.9526062011719, "learning_rate": 8.816500637712201e-06, "loss": 24.0771, "step": 150610 }, { "epoch": 0.30426192948363145, "grad_norm": 430.632080078125, "learning_rate": 8.816275116495891e-06, "loss": 20.5643, "step": 150620 }, { "epoch": 0.3042821301163152, "grad_norm": 311.45361328125, "learning_rate": 8.816049576679521e-06, "loss": 16.6748, "step": 150630 }, { "epoch": 0.30430233074899904, "grad_norm": 468.55303955078125, "learning_rate": 8.815824018264187e-06, "loss": 21.0465, "step": 150640 }, { "epoch": 0.30432253138168286, "grad_norm": 245.30552673339844, "learning_rate": 8.815598441250987e-06, "loss": 15.5359, "step": 150650 }, { "epoch": 0.3043427320143667, "grad_norm": 499.44134521484375, "learning_rate": 8.815372845641027e-06, "loss": 22.9817, "step": 150660 }, { "epoch": 0.3043629326470505, "grad_norm": 267.3191223144531, "learning_rate": 8.815147231435402e-06, "loss": 18.3047, "step": 150670 }, { "epoch": 0.3043831332797343, "grad_norm": 314.10638427734375, "learning_rate": 8.81492159863521e-06, "loss": 26.0052, "step": 150680 }, { "epoch": 0.30440333391241814, "grad_norm": 275.7088623046875, "learning_rate": 8.814695947241555e-06, "loss": 20.6939, "step": 150690 }, { "epoch": 0.30442353454510196, "grad_norm": 1033.9215087890625, "learning_rate": 8.814470277255532e-06, "loss": 30.5142, "step": 150700 }, { "epoch": 0.3044437351777858, "grad_norm": 208.34619140625, "learning_rate": 8.814244588678245e-06, "loss": 21.474, "step": 150710 }, { "epoch": 0.3044639358104696, "grad_norm": 329.2478332519531, "learning_rate": 8.814018881510793e-06, "loss": 25.7805, "step": 150720 }, { "epoch": 0.3044841364431534, "grad_norm": 101.50041961669922, "learning_rate": 8.813793155754273e-06, "loss": 24.8876, "step": 150730 }, { "epoch": 0.30450433707583724, "grad_norm": 212.9734649658203, "learning_rate": 8.81356741140979e-06, "loss": 30.4796, "step": 150740 }, { "epoch": 0.304524537708521, "grad_norm": 355.3797302246094, "learning_rate": 8.813341648478443e-06, "loss": 18.6154, "step": 150750 }, { "epoch": 0.3045447383412048, "grad_norm": 423.56414794921875, "learning_rate": 8.81311586696133e-06, "loss": 34.1467, "step": 150760 }, { "epoch": 0.30456493897388864, "grad_norm": 394.8096923828125, "learning_rate": 8.812890066859552e-06, "loss": 31.5995, "step": 150770 }, { "epoch": 0.30458513960657246, "grad_norm": 96.9171142578125, "learning_rate": 8.81266424817421e-06, "loss": 16.4718, "step": 150780 }, { "epoch": 0.3046053402392563, "grad_norm": 153.63937377929688, "learning_rate": 8.812438410906407e-06, "loss": 16.757, "step": 150790 }, { "epoch": 0.3046255408719401, "grad_norm": 183.81781005859375, "learning_rate": 8.81221255505724e-06, "loss": 28.179, "step": 150800 }, { "epoch": 0.3046457415046239, "grad_norm": 216.14907836914062, "learning_rate": 8.811986680627812e-06, "loss": 15.5354, "step": 150810 }, { "epoch": 0.30466594213730774, "grad_norm": 208.236083984375, "learning_rate": 8.811760787619224e-06, "loss": 29.6939, "step": 150820 }, { "epoch": 0.30468614276999156, "grad_norm": 105.2694091796875, "learning_rate": 8.811534876032575e-06, "loss": 22.5369, "step": 150830 }, { "epoch": 0.3047063434026754, "grad_norm": 881.2667236328125, "learning_rate": 8.811308945868966e-06, "loss": 14.6175, "step": 150840 }, { "epoch": 0.3047265440353592, "grad_norm": 172.46746826171875, "learning_rate": 8.811082997129501e-06, "loss": 17.2617, "step": 150850 }, { "epoch": 0.304746744668043, "grad_norm": 202.1938018798828, "learning_rate": 8.81085702981528e-06, "loss": 17.3231, "step": 150860 }, { "epoch": 0.30476694530072684, "grad_norm": 634.205810546875, "learning_rate": 8.810631043927405e-06, "loss": 22.1901, "step": 150870 }, { "epoch": 0.3047871459334106, "grad_norm": 250.703857421875, "learning_rate": 8.810405039466973e-06, "loss": 27.0126, "step": 150880 }, { "epoch": 0.30480734656609443, "grad_norm": 310.2431945800781, "learning_rate": 8.810179016435092e-06, "loss": 18.4128, "step": 150890 }, { "epoch": 0.30482754719877825, "grad_norm": 442.4149169921875, "learning_rate": 8.80995297483286e-06, "loss": 30.0083, "step": 150900 }, { "epoch": 0.30484774783146207, "grad_norm": 371.34844970703125, "learning_rate": 8.80972691466138e-06, "loss": 21.6954, "step": 150910 }, { "epoch": 0.3048679484641459, "grad_norm": 305.8639221191406, "learning_rate": 8.809500835921751e-06, "loss": 32.2652, "step": 150920 }, { "epoch": 0.3048881490968297, "grad_norm": 164.29379272460938, "learning_rate": 8.809274738615079e-06, "loss": 10.7187, "step": 150930 }, { "epoch": 0.30490834972951353, "grad_norm": 368.4850769042969, "learning_rate": 8.809048622742463e-06, "loss": 9.2364, "step": 150940 }, { "epoch": 0.30492855036219735, "grad_norm": 170.6142120361328, "learning_rate": 8.808822488305005e-06, "loss": 17.0626, "step": 150950 }, { "epoch": 0.30494875099488117, "grad_norm": 244.25697326660156, "learning_rate": 8.80859633530381e-06, "loss": 21.6748, "step": 150960 }, { "epoch": 0.304968951627565, "grad_norm": 765.2318115234375, "learning_rate": 8.808370163739978e-06, "loss": 27.8353, "step": 150970 }, { "epoch": 0.3049891522602488, "grad_norm": 47.06742858886719, "learning_rate": 8.808143973614612e-06, "loss": 9.8086, "step": 150980 }, { "epoch": 0.30500935289293263, "grad_norm": 203.1630859375, "learning_rate": 8.807917764928813e-06, "loss": 19.5413, "step": 150990 }, { "epoch": 0.30502955352561645, "grad_norm": 654.4868774414062, "learning_rate": 8.807691537683685e-06, "loss": 47.4073, "step": 151000 }, { "epoch": 0.3050497541583002, "grad_norm": 413.98651123046875, "learning_rate": 8.807465291880331e-06, "loss": 46.9099, "step": 151010 }, { "epoch": 0.30506995479098403, "grad_norm": 230.1887969970703, "learning_rate": 8.807239027519852e-06, "loss": 11.7474, "step": 151020 }, { "epoch": 0.30509015542366785, "grad_norm": 262.0575866699219, "learning_rate": 8.807012744603352e-06, "loss": 11.701, "step": 151030 }, { "epoch": 0.3051103560563517, "grad_norm": 250.36607360839844, "learning_rate": 8.806786443131932e-06, "loss": 23.7511, "step": 151040 }, { "epoch": 0.3051305566890355, "grad_norm": 325.5229797363281, "learning_rate": 8.8065601231067e-06, "loss": 14.8488, "step": 151050 }, { "epoch": 0.3051507573217193, "grad_norm": 287.345703125, "learning_rate": 8.806333784528754e-06, "loss": 25.0761, "step": 151060 }, { "epoch": 0.30517095795440313, "grad_norm": 823.4463500976562, "learning_rate": 8.806107427399198e-06, "loss": 19.4375, "step": 151070 }, { "epoch": 0.30519115858708695, "grad_norm": 308.3523254394531, "learning_rate": 8.805881051719137e-06, "loss": 23.9936, "step": 151080 }, { "epoch": 0.3052113592197708, "grad_norm": 807.494140625, "learning_rate": 8.805654657489673e-06, "loss": 20.8709, "step": 151090 }, { "epoch": 0.3052315598524546, "grad_norm": 204.667724609375, "learning_rate": 8.80542824471191e-06, "loss": 32.495, "step": 151100 }, { "epoch": 0.3052517604851384, "grad_norm": 307.1479187011719, "learning_rate": 8.80520181338695e-06, "loss": 14.6147, "step": 151110 }, { "epoch": 0.30527196111782223, "grad_norm": 237.7410888671875, "learning_rate": 8.8049753635159e-06, "loss": 36.7334, "step": 151120 }, { "epoch": 0.30529216175050605, "grad_norm": 339.5580749511719, "learning_rate": 8.80474889509986e-06, "loss": 15.5689, "step": 151130 }, { "epoch": 0.3053123623831898, "grad_norm": 343.6465148925781, "learning_rate": 8.804522408139936e-06, "loss": 23.329, "step": 151140 }, { "epoch": 0.30533256301587364, "grad_norm": 0.0, "learning_rate": 8.804295902637233e-06, "loss": 11.5342, "step": 151150 }, { "epoch": 0.30535276364855746, "grad_norm": 63.11260223388672, "learning_rate": 8.80406937859285e-06, "loss": 11.1499, "step": 151160 }, { "epoch": 0.3053729642812413, "grad_norm": 175.1032257080078, "learning_rate": 8.803842836007895e-06, "loss": 15.0782, "step": 151170 }, { "epoch": 0.3053931649139251, "grad_norm": 170.4766082763672, "learning_rate": 8.803616274883473e-06, "loss": 22.9681, "step": 151180 }, { "epoch": 0.3054133655466089, "grad_norm": 33.5782585144043, "learning_rate": 8.803389695220685e-06, "loss": 16.6963, "step": 151190 }, { "epoch": 0.30543356617929274, "grad_norm": 200.3013916015625, "learning_rate": 8.803163097020637e-06, "loss": 18.1865, "step": 151200 }, { "epoch": 0.30545376681197656, "grad_norm": 300.7507019042969, "learning_rate": 8.802936480284434e-06, "loss": 16.8214, "step": 151210 }, { "epoch": 0.3054739674446604, "grad_norm": 183.9778594970703, "learning_rate": 8.80270984501318e-06, "loss": 31.377, "step": 151220 }, { "epoch": 0.3054941680773442, "grad_norm": 122.45866394042969, "learning_rate": 8.802483191207978e-06, "loss": 20.7065, "step": 151230 }, { "epoch": 0.305514368710028, "grad_norm": 167.9942169189453, "learning_rate": 8.802256518869935e-06, "loss": 16.4885, "step": 151240 }, { "epoch": 0.30553456934271184, "grad_norm": 240.41830444335938, "learning_rate": 8.802029828000157e-06, "loss": 14.6281, "step": 151250 }, { "epoch": 0.30555476997539566, "grad_norm": 617.3421630859375, "learning_rate": 8.801803118599743e-06, "loss": 13.2036, "step": 151260 }, { "epoch": 0.3055749706080794, "grad_norm": 166.7339630126953, "learning_rate": 8.801576390669803e-06, "loss": 27.5233, "step": 151270 }, { "epoch": 0.30559517124076324, "grad_norm": 233.42431640625, "learning_rate": 8.801349644211442e-06, "loss": 30.8644, "step": 151280 }, { "epoch": 0.30561537187344706, "grad_norm": 557.9232788085938, "learning_rate": 8.801122879225762e-06, "loss": 33.965, "step": 151290 }, { "epoch": 0.3056355725061309, "grad_norm": 9.913869857788086, "learning_rate": 8.80089609571387e-06, "loss": 16.365, "step": 151300 }, { "epoch": 0.3056557731388147, "grad_norm": 399.513916015625, "learning_rate": 8.800669293676876e-06, "loss": 16.1054, "step": 151310 }, { "epoch": 0.3056759737714985, "grad_norm": 192.15602111816406, "learning_rate": 8.800442473115877e-06, "loss": 23.533, "step": 151320 }, { "epoch": 0.30569617440418234, "grad_norm": 306.813720703125, "learning_rate": 8.800215634031983e-06, "loss": 20.5008, "step": 151330 }, { "epoch": 0.30571637503686616, "grad_norm": 262.65057373046875, "learning_rate": 8.799988776426298e-06, "loss": 17.1613, "step": 151340 }, { "epoch": 0.30573657566955, "grad_norm": 41.15827560424805, "learning_rate": 8.799761900299929e-06, "loss": 24.2822, "step": 151350 }, { "epoch": 0.3057567763022338, "grad_norm": 232.21270751953125, "learning_rate": 8.799535005653982e-06, "loss": 15.0334, "step": 151360 }, { "epoch": 0.3057769769349176, "grad_norm": 36.683876037597656, "learning_rate": 8.799308092489561e-06, "loss": 25.8162, "step": 151370 }, { "epoch": 0.30579717756760144, "grad_norm": 375.1251525878906, "learning_rate": 8.799081160807773e-06, "loss": 18.9224, "step": 151380 }, { "epoch": 0.3058173782002852, "grad_norm": 284.247802734375, "learning_rate": 8.798854210609727e-06, "loss": 17.1721, "step": 151390 }, { "epoch": 0.305837578832969, "grad_norm": 300.2645568847656, "learning_rate": 8.798627241896524e-06, "loss": 15.5286, "step": 151400 }, { "epoch": 0.30585777946565285, "grad_norm": 39.15469741821289, "learning_rate": 8.798400254669272e-06, "loss": 16.3649, "step": 151410 }, { "epoch": 0.30587798009833667, "grad_norm": 433.34149169921875, "learning_rate": 8.798173248929081e-06, "loss": 13.7906, "step": 151420 }, { "epoch": 0.3058981807310205, "grad_norm": 1.8944743871688843, "learning_rate": 8.797946224677051e-06, "loss": 28.4366, "step": 151430 }, { "epoch": 0.3059183813637043, "grad_norm": 291.5290222167969, "learning_rate": 8.797719181914292e-06, "loss": 10.9267, "step": 151440 }, { "epoch": 0.3059385819963881, "grad_norm": 275.73883056640625, "learning_rate": 8.797492120641913e-06, "loss": 21.2744, "step": 151450 }, { "epoch": 0.30595878262907195, "grad_norm": 310.1722412109375, "learning_rate": 8.797265040861016e-06, "loss": 29.8213, "step": 151460 }, { "epoch": 0.30597898326175577, "grad_norm": 408.6690979003906, "learning_rate": 8.79703794257271e-06, "loss": 19.4656, "step": 151470 }, { "epoch": 0.3059991838944396, "grad_norm": 54.71696472167969, "learning_rate": 8.796810825778101e-06, "loss": 15.9454, "step": 151480 }, { "epoch": 0.3060193845271234, "grad_norm": 208.18435668945312, "learning_rate": 8.796583690478297e-06, "loss": 25.3024, "step": 151490 }, { "epoch": 0.3060395851598072, "grad_norm": 339.33978271484375, "learning_rate": 8.796356536674404e-06, "loss": 11.9622, "step": 151500 }, { "epoch": 0.30605978579249105, "grad_norm": 238.1308135986328, "learning_rate": 8.796129364367532e-06, "loss": 23.0177, "step": 151510 }, { "epoch": 0.3060799864251748, "grad_norm": 312.35003662109375, "learning_rate": 8.795902173558784e-06, "loss": 21.7703, "step": 151520 }, { "epoch": 0.30610018705785863, "grad_norm": 355.4923400878906, "learning_rate": 8.79567496424927e-06, "loss": 23.6565, "step": 151530 }, { "epoch": 0.30612038769054245, "grad_norm": 677.6407470703125, "learning_rate": 8.795447736440095e-06, "loss": 21.9421, "step": 151540 }, { "epoch": 0.30614058832322627, "grad_norm": 92.0779037475586, "learning_rate": 8.795220490132369e-06, "loss": 15.3177, "step": 151550 }, { "epoch": 0.3061607889559101, "grad_norm": 240.48744201660156, "learning_rate": 8.794993225327199e-06, "loss": 26.926, "step": 151560 }, { "epoch": 0.3061809895885939, "grad_norm": 35.53240203857422, "learning_rate": 8.794765942025692e-06, "loss": 13.1976, "step": 151570 }, { "epoch": 0.30620119022127773, "grad_norm": 391.5346984863281, "learning_rate": 8.794538640228956e-06, "loss": 38.1744, "step": 151580 }, { "epoch": 0.30622139085396155, "grad_norm": 158.18333435058594, "learning_rate": 8.794311319938098e-06, "loss": 29.328, "step": 151590 }, { "epoch": 0.30624159148664537, "grad_norm": 405.65576171875, "learning_rate": 8.794083981154229e-06, "loss": 38.4015, "step": 151600 }, { "epoch": 0.3062617921193292, "grad_norm": 383.728515625, "learning_rate": 8.793856623878453e-06, "loss": 20.4609, "step": 151610 }, { "epoch": 0.306281992752013, "grad_norm": 35.41801834106445, "learning_rate": 8.79362924811188e-06, "loss": 28.5072, "step": 151620 }, { "epoch": 0.30630219338469683, "grad_norm": 176.29986572265625, "learning_rate": 8.793401853855619e-06, "loss": 23.2449, "step": 151630 }, { "epoch": 0.30632239401738065, "grad_norm": 103.71546936035156, "learning_rate": 8.793174441110777e-06, "loss": 21.6839, "step": 151640 }, { "epoch": 0.3063425946500644, "grad_norm": 59.80694580078125, "learning_rate": 8.792947009878463e-06, "loss": 19.3455, "step": 151650 }, { "epoch": 0.30636279528274823, "grad_norm": 677.0868530273438, "learning_rate": 8.792719560159786e-06, "loss": 41.7977, "step": 151660 }, { "epoch": 0.30638299591543205, "grad_norm": 110.109619140625, "learning_rate": 8.792492091955852e-06, "loss": 14.8896, "step": 151670 }, { "epoch": 0.3064031965481159, "grad_norm": 118.74826049804688, "learning_rate": 8.792264605267772e-06, "loss": 16.0239, "step": 151680 }, { "epoch": 0.3064233971807997, "grad_norm": 305.57781982421875, "learning_rate": 8.792037100096656e-06, "loss": 22.8894, "step": 151690 }, { "epoch": 0.3064435978134835, "grad_norm": 527.2737426757812, "learning_rate": 8.791809576443611e-06, "loss": 22.2091, "step": 151700 }, { "epoch": 0.30646379844616733, "grad_norm": 321.19403076171875, "learning_rate": 8.791582034309745e-06, "loss": 34.7569, "step": 151710 }, { "epoch": 0.30648399907885115, "grad_norm": 586.2211303710938, "learning_rate": 8.791354473696167e-06, "loss": 25.4944, "step": 151720 }, { "epoch": 0.306504199711535, "grad_norm": 117.63409423828125, "learning_rate": 8.791126894603987e-06, "loss": 16.4035, "step": 151730 }, { "epoch": 0.3065244003442188, "grad_norm": 415.64776611328125, "learning_rate": 8.790899297034317e-06, "loss": 18.4343, "step": 151740 }, { "epoch": 0.3065446009769026, "grad_norm": 312.8681945800781, "learning_rate": 8.790671680988261e-06, "loss": 10.7629, "step": 151750 }, { "epoch": 0.30656480160958643, "grad_norm": 147.3041534423828, "learning_rate": 8.790444046466933e-06, "loss": 22.9202, "step": 151760 }, { "epoch": 0.30658500224227025, "grad_norm": 59.39794921875, "learning_rate": 8.79021639347144e-06, "loss": 21.8277, "step": 151770 }, { "epoch": 0.306605202874954, "grad_norm": 83.88175964355469, "learning_rate": 8.789988722002891e-06, "loss": 11.964, "step": 151780 }, { "epoch": 0.30662540350763784, "grad_norm": 118.38362121582031, "learning_rate": 8.789761032062398e-06, "loss": 13.1768, "step": 151790 }, { "epoch": 0.30664560414032166, "grad_norm": 419.4764099121094, "learning_rate": 8.789533323651067e-06, "loss": 18.1992, "step": 151800 }, { "epoch": 0.3066658047730055, "grad_norm": 436.6068115234375, "learning_rate": 8.789305596770013e-06, "loss": 10.0896, "step": 151810 }, { "epoch": 0.3066860054056893, "grad_norm": 447.7608947753906, "learning_rate": 8.789077851420341e-06, "loss": 14.8556, "step": 151820 }, { "epoch": 0.3067062060383731, "grad_norm": 411.368896484375, "learning_rate": 8.788850087603164e-06, "loss": 23.79, "step": 151830 }, { "epoch": 0.30672640667105694, "grad_norm": 308.0526428222656, "learning_rate": 8.788622305319591e-06, "loss": 28.25, "step": 151840 }, { "epoch": 0.30674660730374076, "grad_norm": 207.58384704589844, "learning_rate": 8.788394504570732e-06, "loss": 22.0883, "step": 151850 }, { "epoch": 0.3067668079364246, "grad_norm": 651.9456787109375, "learning_rate": 8.7881666853577e-06, "loss": 17.3917, "step": 151860 }, { "epoch": 0.3067870085691084, "grad_norm": 771.2710571289062, "learning_rate": 8.7879388476816e-06, "loss": 20.5876, "step": 151870 }, { "epoch": 0.3068072092017922, "grad_norm": 168.67835998535156, "learning_rate": 8.787710991543547e-06, "loss": 28.9988, "step": 151880 }, { "epoch": 0.30682740983447604, "grad_norm": 503.09991455078125, "learning_rate": 8.78748311694465e-06, "loss": 19.4895, "step": 151890 }, { "epoch": 0.3068476104671598, "grad_norm": 409.2926940917969, "learning_rate": 8.78725522388602e-06, "loss": 37.5754, "step": 151900 }, { "epoch": 0.3068678110998436, "grad_norm": 429.3181457519531, "learning_rate": 8.787027312368766e-06, "loss": 28.7351, "step": 151910 }, { "epoch": 0.30688801173252744, "grad_norm": 383.4990539550781, "learning_rate": 8.786799382394e-06, "loss": 15.9943, "step": 151920 }, { "epoch": 0.30690821236521126, "grad_norm": 469.45941162109375, "learning_rate": 8.786571433962837e-06, "loss": 18.2484, "step": 151930 }, { "epoch": 0.3069284129978951, "grad_norm": 211.03579711914062, "learning_rate": 8.78634346707638e-06, "loss": 12.7816, "step": 151940 }, { "epoch": 0.3069486136305789, "grad_norm": 394.8388366699219, "learning_rate": 8.786115481735745e-06, "loss": 16.037, "step": 151950 }, { "epoch": 0.3069688142632627, "grad_norm": 289.69964599609375, "learning_rate": 8.785887477942041e-06, "loss": 28.7042, "step": 151960 }, { "epoch": 0.30698901489594654, "grad_norm": 420.3507995605469, "learning_rate": 8.785659455696384e-06, "loss": 29.8144, "step": 151970 }, { "epoch": 0.30700921552863036, "grad_norm": 312.2274169921875, "learning_rate": 8.78543141499988e-06, "loss": 20.9277, "step": 151980 }, { "epoch": 0.3070294161613142, "grad_norm": 386.7939147949219, "learning_rate": 8.785203355853642e-06, "loss": 24.5216, "step": 151990 }, { "epoch": 0.307049616793998, "grad_norm": 422.6808776855469, "learning_rate": 8.784975278258783e-06, "loss": 28.8852, "step": 152000 }, { "epoch": 0.3070698174266818, "grad_norm": 369.6784973144531, "learning_rate": 8.784747182216414e-06, "loss": 22.1603, "step": 152010 }, { "epoch": 0.30709001805936564, "grad_norm": 367.23956298828125, "learning_rate": 8.784519067727644e-06, "loss": 20.1401, "step": 152020 }, { "epoch": 0.3071102186920494, "grad_norm": 168.3351593017578, "learning_rate": 8.78429093479359e-06, "loss": 16.4034, "step": 152030 }, { "epoch": 0.30713041932473323, "grad_norm": 363.4021301269531, "learning_rate": 8.78406278341536e-06, "loss": 14.1207, "step": 152040 }, { "epoch": 0.30715061995741705, "grad_norm": 385.3751525878906, "learning_rate": 8.783834613594064e-06, "loss": 16.3462, "step": 152050 }, { "epoch": 0.30717082059010087, "grad_norm": 162.46978759765625, "learning_rate": 8.78360642533082e-06, "loss": 19.1407, "step": 152060 }, { "epoch": 0.3071910212227847, "grad_norm": 208.8948211669922, "learning_rate": 8.783378218626737e-06, "loss": 24.4614, "step": 152070 }, { "epoch": 0.3072112218554685, "grad_norm": 280.7435607910156, "learning_rate": 8.783149993482928e-06, "loss": 19.8538, "step": 152080 }, { "epoch": 0.30723142248815233, "grad_norm": 392.04754638671875, "learning_rate": 8.782921749900502e-06, "loss": 17.7428, "step": 152090 }, { "epoch": 0.30725162312083615, "grad_norm": 339.7778625488281, "learning_rate": 8.782693487880575e-06, "loss": 29.301, "step": 152100 }, { "epoch": 0.30727182375351997, "grad_norm": 791.1005249023438, "learning_rate": 8.782465207424261e-06, "loss": 27.8215, "step": 152110 }, { "epoch": 0.3072920243862038, "grad_norm": 0.0, "learning_rate": 8.78223690853267e-06, "loss": 13.747, "step": 152120 }, { "epoch": 0.3073122250188876, "grad_norm": 216.2332305908203, "learning_rate": 8.782008591206914e-06, "loss": 20.8374, "step": 152130 }, { "epoch": 0.30733242565157143, "grad_norm": 354.9555969238281, "learning_rate": 8.781780255448106e-06, "loss": 27.812, "step": 152140 }, { "epoch": 0.30735262628425525, "grad_norm": 488.1226501464844, "learning_rate": 8.78155190125736e-06, "loss": 20.7969, "step": 152150 }, { "epoch": 0.307372826916939, "grad_norm": 198.00152587890625, "learning_rate": 8.78132352863579e-06, "loss": 24.7723, "step": 152160 }, { "epoch": 0.30739302754962283, "grad_norm": 216.2751007080078, "learning_rate": 8.781095137584506e-06, "loss": 12.7961, "step": 152170 }, { "epoch": 0.30741322818230665, "grad_norm": 221.05091857910156, "learning_rate": 8.780866728104625e-06, "loss": 14.1506, "step": 152180 }, { "epoch": 0.3074334288149905, "grad_norm": 38.39664840698242, "learning_rate": 8.780638300197258e-06, "loss": 11.0513, "step": 152190 }, { "epoch": 0.3074536294476743, "grad_norm": 801.5200805664062, "learning_rate": 8.780409853863517e-06, "loss": 27.4631, "step": 152200 }, { "epoch": 0.3074738300803581, "grad_norm": 258.609130859375, "learning_rate": 8.780181389104516e-06, "loss": 22.7652, "step": 152210 }, { "epoch": 0.30749403071304193, "grad_norm": 297.8200378417969, "learning_rate": 8.779952905921372e-06, "loss": 23.0695, "step": 152220 }, { "epoch": 0.30751423134572575, "grad_norm": 342.3063049316406, "learning_rate": 8.779724404315195e-06, "loss": 17.9497, "step": 152230 }, { "epoch": 0.3075344319784096, "grad_norm": 418.3288269042969, "learning_rate": 8.779495884287099e-06, "loss": 32.8341, "step": 152240 }, { "epoch": 0.3075546326110934, "grad_norm": 160.9750213623047, "learning_rate": 8.779267345838198e-06, "loss": 32.5812, "step": 152250 }, { "epoch": 0.3075748332437772, "grad_norm": 447.358642578125, "learning_rate": 8.779038788969607e-06, "loss": 32.4696, "step": 152260 }, { "epoch": 0.30759503387646103, "grad_norm": 286.9361572265625, "learning_rate": 8.77881021368244e-06, "loss": 14.9789, "step": 152270 }, { "epoch": 0.30761523450914485, "grad_norm": 205.81369018554688, "learning_rate": 8.778581619977811e-06, "loss": 24.5351, "step": 152280 }, { "epoch": 0.3076354351418286, "grad_norm": 293.9339904785156, "learning_rate": 8.778353007856832e-06, "loss": 19.3899, "step": 152290 }, { "epoch": 0.30765563577451244, "grad_norm": 286.5095520019531, "learning_rate": 8.778124377320619e-06, "loss": 20.046, "step": 152300 }, { "epoch": 0.30767583640719626, "grad_norm": 577.9072265625, "learning_rate": 8.777895728370285e-06, "loss": 39.8192, "step": 152310 }, { "epoch": 0.3076960370398801, "grad_norm": 674.98583984375, "learning_rate": 8.777667061006947e-06, "loss": 20.1398, "step": 152320 }, { "epoch": 0.3077162376725639, "grad_norm": 368.5007629394531, "learning_rate": 8.777438375231717e-06, "loss": 21.3666, "step": 152330 }, { "epoch": 0.3077364383052477, "grad_norm": 318.4439697265625, "learning_rate": 8.77720967104571e-06, "loss": 23.3322, "step": 152340 }, { "epoch": 0.30775663893793154, "grad_norm": 326.7030944824219, "learning_rate": 8.776980948450043e-06, "loss": 11.4066, "step": 152350 }, { "epoch": 0.30777683957061536, "grad_norm": 208.1195526123047, "learning_rate": 8.776752207445829e-06, "loss": 31.8959, "step": 152360 }, { "epoch": 0.3077970402032992, "grad_norm": 225.5050048828125, "learning_rate": 8.776523448034182e-06, "loss": 18.945, "step": 152370 }, { "epoch": 0.307817240835983, "grad_norm": 293.3037414550781, "learning_rate": 8.776294670216217e-06, "loss": 15.0634, "step": 152380 }, { "epoch": 0.3078374414686668, "grad_norm": 481.739990234375, "learning_rate": 8.776065873993049e-06, "loss": 15.3312, "step": 152390 }, { "epoch": 0.30785764210135064, "grad_norm": 319.92132568359375, "learning_rate": 8.775837059365796e-06, "loss": 27.8646, "step": 152400 }, { "epoch": 0.30787784273403446, "grad_norm": 325.300537109375, "learning_rate": 8.77560822633557e-06, "loss": 34.6775, "step": 152410 }, { "epoch": 0.3078980433667182, "grad_norm": 213.47952270507812, "learning_rate": 8.775379374903487e-06, "loss": 27.1667, "step": 152420 }, { "epoch": 0.30791824399940204, "grad_norm": 2.3813018798828125, "learning_rate": 8.775150505070664e-06, "loss": 15.499, "step": 152430 }, { "epoch": 0.30793844463208586, "grad_norm": 485.1654968261719, "learning_rate": 8.774921616838217e-06, "loss": 21.6517, "step": 152440 }, { "epoch": 0.3079586452647697, "grad_norm": 576.2132568359375, "learning_rate": 8.774692710207257e-06, "loss": 25.9486, "step": 152450 }, { "epoch": 0.3079788458974535, "grad_norm": 356.6376953125, "learning_rate": 8.774463785178904e-06, "loss": 27.8735, "step": 152460 }, { "epoch": 0.3079990465301373, "grad_norm": 225.2332000732422, "learning_rate": 8.774234841754271e-06, "loss": 15.3441, "step": 152470 }, { "epoch": 0.30801924716282114, "grad_norm": 162.66778564453125, "learning_rate": 8.774005879934475e-06, "loss": 30.3546, "step": 152480 }, { "epoch": 0.30803944779550496, "grad_norm": 310.86053466796875, "learning_rate": 8.773776899720634e-06, "loss": 14.2002, "step": 152490 }, { "epoch": 0.3080596484281888, "grad_norm": 339.61962890625, "learning_rate": 8.773547901113862e-06, "loss": 18.2939, "step": 152500 }, { "epoch": 0.3080798490608726, "grad_norm": 243.95875549316406, "learning_rate": 8.773318884115273e-06, "loss": 31.136, "step": 152510 }, { "epoch": 0.3081000496935564, "grad_norm": 334.0319519042969, "learning_rate": 8.773089848725986e-06, "loss": 15.7793, "step": 152520 }, { "epoch": 0.30812025032624024, "grad_norm": 176.71231079101562, "learning_rate": 8.772860794947119e-06, "loss": 39.6682, "step": 152530 }, { "epoch": 0.308140450958924, "grad_norm": 309.6560974121094, "learning_rate": 8.772631722779783e-06, "loss": 18.7121, "step": 152540 }, { "epoch": 0.3081606515916078, "grad_norm": 392.6878356933594, "learning_rate": 8.772402632225098e-06, "loss": 17.167, "step": 152550 }, { "epoch": 0.30818085222429165, "grad_norm": 403.9038391113281, "learning_rate": 8.772173523284182e-06, "loss": 21.0196, "step": 152560 }, { "epoch": 0.30820105285697547, "grad_norm": 266.9403076171875, "learning_rate": 8.77194439595815e-06, "loss": 14.2769, "step": 152570 }, { "epoch": 0.3082212534896593, "grad_norm": 209.0220489501953, "learning_rate": 8.771715250248116e-06, "loss": 18.6591, "step": 152580 }, { "epoch": 0.3082414541223431, "grad_norm": 21.29311752319336, "learning_rate": 8.771486086155201e-06, "loss": 20.0096, "step": 152590 }, { "epoch": 0.3082616547550269, "grad_norm": 380.7731018066406, "learning_rate": 8.77125690368052e-06, "loss": 28.0111, "step": 152600 }, { "epoch": 0.30828185538771075, "grad_norm": 411.1728210449219, "learning_rate": 8.77102770282519e-06, "loss": 32.4315, "step": 152610 }, { "epoch": 0.30830205602039457, "grad_norm": 434.34954833984375, "learning_rate": 8.770798483590327e-06, "loss": 23.0557, "step": 152620 }, { "epoch": 0.3083222566530784, "grad_norm": 204.51303100585938, "learning_rate": 8.770569245977052e-06, "loss": 30.3811, "step": 152630 }, { "epoch": 0.3083424572857622, "grad_norm": 724.8303833007812, "learning_rate": 8.770339989986479e-06, "loss": 17.5082, "step": 152640 }, { "epoch": 0.308362657918446, "grad_norm": 293.98712158203125, "learning_rate": 8.770110715619726e-06, "loss": 8.5194, "step": 152650 }, { "epoch": 0.30838285855112985, "grad_norm": 344.2889099121094, "learning_rate": 8.769881422877911e-06, "loss": 19.4383, "step": 152660 }, { "epoch": 0.3084030591838136, "grad_norm": 57.262420654296875, "learning_rate": 8.76965211176215e-06, "loss": 17.6182, "step": 152670 }, { "epoch": 0.30842325981649743, "grad_norm": 363.5317077636719, "learning_rate": 8.769422782273563e-06, "loss": 31.6844, "step": 152680 }, { "epoch": 0.30844346044918125, "grad_norm": 199.94053649902344, "learning_rate": 8.769193434413266e-06, "loss": 19.7434, "step": 152690 }, { "epoch": 0.30846366108186507, "grad_norm": 233.206787109375, "learning_rate": 8.768964068182378e-06, "loss": 12.0366, "step": 152700 }, { "epoch": 0.3084838617145489, "grad_norm": 70.4413070678711, "learning_rate": 8.768734683582017e-06, "loss": 16.606, "step": 152710 }, { "epoch": 0.3085040623472327, "grad_norm": 114.97087097167969, "learning_rate": 8.768505280613297e-06, "loss": 17.1253, "step": 152720 }, { "epoch": 0.30852426297991653, "grad_norm": 157.65969848632812, "learning_rate": 8.768275859277342e-06, "loss": 32.5369, "step": 152730 }, { "epoch": 0.30854446361260035, "grad_norm": 393.3841552734375, "learning_rate": 8.768046419575267e-06, "loss": 20.5304, "step": 152740 }, { "epoch": 0.30856466424528417, "grad_norm": 54.57630920410156, "learning_rate": 8.767816961508191e-06, "loss": 14.45, "step": 152750 }, { "epoch": 0.308584864877968, "grad_norm": 133.46749877929688, "learning_rate": 8.76758748507723e-06, "loss": 29.3036, "step": 152760 }, { "epoch": 0.3086050655106518, "grad_norm": 302.5450744628906, "learning_rate": 8.767357990283507e-06, "loss": 19.3906, "step": 152770 }, { "epoch": 0.30862526614333563, "grad_norm": 400.9719543457031, "learning_rate": 8.767128477128138e-06, "loss": 20.4249, "step": 152780 }, { "epoch": 0.30864546677601945, "grad_norm": 318.1088562011719, "learning_rate": 8.766898945612241e-06, "loss": 35.2038, "step": 152790 }, { "epoch": 0.3086656674087032, "grad_norm": 528.9554443359375, "learning_rate": 8.766669395736936e-06, "loss": 24.1118, "step": 152800 }, { "epoch": 0.30868586804138703, "grad_norm": 119.2809066772461, "learning_rate": 8.766439827503339e-06, "loss": 15.4705, "step": 152810 }, { "epoch": 0.30870606867407085, "grad_norm": 156.37696838378906, "learning_rate": 8.766210240912574e-06, "loss": 16.2501, "step": 152820 }, { "epoch": 0.3087262693067547, "grad_norm": 341.1950988769531, "learning_rate": 8.765980635965755e-06, "loss": 26.7463, "step": 152830 }, { "epoch": 0.3087464699394385, "grad_norm": 325.62567138671875, "learning_rate": 8.765751012664004e-06, "loss": 29.0927, "step": 152840 }, { "epoch": 0.3087666705721223, "grad_norm": 154.7163848876953, "learning_rate": 8.765521371008439e-06, "loss": 21.256, "step": 152850 }, { "epoch": 0.30878687120480613, "grad_norm": 462.84747314453125, "learning_rate": 8.76529171100018e-06, "loss": 17.1042, "step": 152860 }, { "epoch": 0.30880707183748995, "grad_norm": 583.1702880859375, "learning_rate": 8.765062032640346e-06, "loss": 25.0051, "step": 152870 }, { "epoch": 0.3088272724701738, "grad_norm": 293.7120361328125, "learning_rate": 8.764832335930055e-06, "loss": 18.8988, "step": 152880 }, { "epoch": 0.3088474731028576, "grad_norm": 368.9406433105469, "learning_rate": 8.764602620870429e-06, "loss": 14.6028, "step": 152890 }, { "epoch": 0.3088676737355414, "grad_norm": 348.947998046875, "learning_rate": 8.764372887462587e-06, "loss": 16.6768, "step": 152900 }, { "epoch": 0.30888787436822523, "grad_norm": 205.06150817871094, "learning_rate": 8.764143135707647e-06, "loss": 20.2982, "step": 152910 }, { "epoch": 0.30890807500090905, "grad_norm": 528.6824951171875, "learning_rate": 8.76391336560673e-06, "loss": 23.911, "step": 152920 }, { "epoch": 0.3089282756335928, "grad_norm": 485.2646484375, "learning_rate": 8.763683577160955e-06, "loss": 26.0786, "step": 152930 }, { "epoch": 0.30894847626627664, "grad_norm": 224.72027587890625, "learning_rate": 8.763453770371444e-06, "loss": 23.1357, "step": 152940 }, { "epoch": 0.30896867689896046, "grad_norm": 7.388305187225342, "learning_rate": 8.763223945239317e-06, "loss": 23.0271, "step": 152950 }, { "epoch": 0.3089888775316443, "grad_norm": 398.7001953125, "learning_rate": 8.76299410176569e-06, "loss": 14.4131, "step": 152960 }, { "epoch": 0.3090090781643281, "grad_norm": 80.96422576904297, "learning_rate": 8.762764239951688e-06, "loss": 15.3701, "step": 152970 }, { "epoch": 0.3090292787970119, "grad_norm": 29.725940704345703, "learning_rate": 8.76253435979843e-06, "loss": 14.3205, "step": 152980 }, { "epoch": 0.30904947942969574, "grad_norm": 382.2878112792969, "learning_rate": 8.762304461307033e-06, "loss": 26.3182, "step": 152990 }, { "epoch": 0.30906968006237956, "grad_norm": 184.84988403320312, "learning_rate": 8.762074544478622e-06, "loss": 11.6815, "step": 153000 }, { "epoch": 0.3090898806950634, "grad_norm": 221.9973907470703, "learning_rate": 8.761844609314316e-06, "loss": 11.6994, "step": 153010 }, { "epoch": 0.3091100813277472, "grad_norm": 180.58184814453125, "learning_rate": 8.761614655815237e-06, "loss": 24.283, "step": 153020 }, { "epoch": 0.309130281960431, "grad_norm": 365.34320068359375, "learning_rate": 8.761384683982503e-06, "loss": 28.2789, "step": 153030 }, { "epoch": 0.30915048259311484, "grad_norm": 238.53904724121094, "learning_rate": 8.761154693817236e-06, "loss": 22.9361, "step": 153040 }, { "epoch": 0.30917068322579866, "grad_norm": 121.74559783935547, "learning_rate": 8.760924685320558e-06, "loss": 18.3064, "step": 153050 }, { "epoch": 0.3091908838584824, "grad_norm": 207.3932342529297, "learning_rate": 8.760694658493589e-06, "loss": 30.4753, "step": 153060 }, { "epoch": 0.30921108449116624, "grad_norm": 175.97230529785156, "learning_rate": 8.76046461333745e-06, "loss": 20.8351, "step": 153070 }, { "epoch": 0.30923128512385006, "grad_norm": 298.8153076171875, "learning_rate": 8.760234549853263e-06, "loss": 27.5595, "step": 153080 }, { "epoch": 0.3092514857565339, "grad_norm": 334.2640380859375, "learning_rate": 8.760004468042148e-06, "loss": 35.9618, "step": 153090 }, { "epoch": 0.3092716863892177, "grad_norm": 309.28204345703125, "learning_rate": 8.759774367905228e-06, "loss": 16.396, "step": 153100 }, { "epoch": 0.3092918870219015, "grad_norm": 346.6181945800781, "learning_rate": 8.759544249443624e-06, "loss": 19.5012, "step": 153110 }, { "epoch": 0.30931208765458534, "grad_norm": 356.9286193847656, "learning_rate": 8.759314112658458e-06, "loss": 26.9489, "step": 153120 }, { "epoch": 0.30933228828726916, "grad_norm": 354.5332336425781, "learning_rate": 8.759083957550849e-06, "loss": 19.853, "step": 153130 }, { "epoch": 0.309352488919953, "grad_norm": 250.43478393554688, "learning_rate": 8.758853784121921e-06, "loss": 22.4541, "step": 153140 }, { "epoch": 0.3093726895526368, "grad_norm": 587.0438232421875, "learning_rate": 8.758623592372797e-06, "loss": 25.3026, "step": 153150 }, { "epoch": 0.3093928901853206, "grad_norm": 369.0464782714844, "learning_rate": 8.758393382304597e-06, "loss": 21.7319, "step": 153160 }, { "epoch": 0.30941309081800444, "grad_norm": 345.3206481933594, "learning_rate": 8.758163153918442e-06, "loss": 16.9024, "step": 153170 }, { "epoch": 0.3094332914506882, "grad_norm": 91.53553009033203, "learning_rate": 8.757932907215457e-06, "loss": 17.9179, "step": 153180 }, { "epoch": 0.30945349208337203, "grad_norm": 441.80816650390625, "learning_rate": 8.757702642196763e-06, "loss": 26.9275, "step": 153190 }, { "epoch": 0.30947369271605585, "grad_norm": 329.83612060546875, "learning_rate": 8.757472358863481e-06, "loss": 28.0505, "step": 153200 }, { "epoch": 0.30949389334873967, "grad_norm": 532.6705932617188, "learning_rate": 8.757242057216735e-06, "loss": 24.1426, "step": 153210 }, { "epoch": 0.3095140939814235, "grad_norm": 221.9176788330078, "learning_rate": 8.757011737257646e-06, "loss": 21.911, "step": 153220 }, { "epoch": 0.3095342946141073, "grad_norm": 220.52194213867188, "learning_rate": 8.75678139898734e-06, "loss": 14.336, "step": 153230 }, { "epoch": 0.30955449524679113, "grad_norm": 586.3423461914062, "learning_rate": 8.756551042406936e-06, "loss": 38.1692, "step": 153240 }, { "epoch": 0.30957469587947495, "grad_norm": 0.0, "learning_rate": 8.756320667517557e-06, "loss": 27.741, "step": 153250 }, { "epoch": 0.30959489651215877, "grad_norm": 492.3929138183594, "learning_rate": 8.756090274320326e-06, "loss": 22.1193, "step": 153260 }, { "epoch": 0.3096150971448426, "grad_norm": 685.36279296875, "learning_rate": 8.755859862816368e-06, "loss": 39.1818, "step": 153270 }, { "epoch": 0.3096352977775264, "grad_norm": 130.56214904785156, "learning_rate": 8.755629433006804e-06, "loss": 19.1548, "step": 153280 }, { "epoch": 0.30965549841021023, "grad_norm": 125.44181060791016, "learning_rate": 8.755398984892757e-06, "loss": 32.142, "step": 153290 }, { "epoch": 0.30967569904289405, "grad_norm": 505.6233215332031, "learning_rate": 8.755168518475351e-06, "loss": 25.1883, "step": 153300 }, { "epoch": 0.3096958996755778, "grad_norm": 167.7787628173828, "learning_rate": 8.754938033755712e-06, "loss": 15.4698, "step": 153310 }, { "epoch": 0.30971610030826163, "grad_norm": 207.58937072753906, "learning_rate": 8.754707530734958e-06, "loss": 42.5961, "step": 153320 }, { "epoch": 0.30973630094094545, "grad_norm": 645.6146240234375, "learning_rate": 8.754477009414215e-06, "loss": 13.6148, "step": 153330 }, { "epoch": 0.3097565015736293, "grad_norm": 468.57330322265625, "learning_rate": 8.754246469794606e-06, "loss": 16.7055, "step": 153340 }, { "epoch": 0.3097767022063131, "grad_norm": 454.4181213378906, "learning_rate": 8.754015911877255e-06, "loss": 28.6678, "step": 153350 }, { "epoch": 0.3097969028389969, "grad_norm": 158.0446014404297, "learning_rate": 8.753785335663287e-06, "loss": 12.5163, "step": 153360 }, { "epoch": 0.30981710347168073, "grad_norm": 286.8199462890625, "learning_rate": 8.753554741153822e-06, "loss": 15.2816, "step": 153370 }, { "epoch": 0.30983730410436455, "grad_norm": 650.8275756835938, "learning_rate": 8.75332412834999e-06, "loss": 19.0044, "step": 153380 }, { "epoch": 0.3098575047370484, "grad_norm": 671.2449340820312, "learning_rate": 8.75309349725291e-06, "loss": 26.7508, "step": 153390 }, { "epoch": 0.3098777053697322, "grad_norm": 95.67963409423828, "learning_rate": 8.752862847863707e-06, "loss": 25.4041, "step": 153400 }, { "epoch": 0.309897906002416, "grad_norm": 297.50994873046875, "learning_rate": 8.752632180183504e-06, "loss": 28.3583, "step": 153410 }, { "epoch": 0.30991810663509983, "grad_norm": 137.37498474121094, "learning_rate": 8.75240149421343e-06, "loss": 18.9761, "step": 153420 }, { "epoch": 0.30993830726778365, "grad_norm": 242.51333618164062, "learning_rate": 8.752170789954604e-06, "loss": 19.4179, "step": 153430 }, { "epoch": 0.3099585079004674, "grad_norm": 0.0, "learning_rate": 8.751940067408155e-06, "loss": 24.273, "step": 153440 }, { "epoch": 0.30997870853315124, "grad_norm": 376.41888427734375, "learning_rate": 8.751709326575204e-06, "loss": 14.3608, "step": 153450 }, { "epoch": 0.30999890916583506, "grad_norm": 170.5850067138672, "learning_rate": 8.751478567456874e-06, "loss": 10.9869, "step": 153460 }, { "epoch": 0.3100191097985189, "grad_norm": 474.08380126953125, "learning_rate": 8.751247790054297e-06, "loss": 12.5811, "step": 153470 }, { "epoch": 0.3100393104312027, "grad_norm": 340.08856201171875, "learning_rate": 8.75101699436859e-06, "loss": 27.325, "step": 153480 }, { "epoch": 0.3100595110638865, "grad_norm": 249.54652404785156, "learning_rate": 8.750786180400883e-06, "loss": 17.7454, "step": 153490 }, { "epoch": 0.31007971169657034, "grad_norm": 232.5574493408203, "learning_rate": 8.750555348152299e-06, "loss": 18.9804, "step": 153500 }, { "epoch": 0.31009991232925416, "grad_norm": 282.6781005859375, "learning_rate": 8.750324497623963e-06, "loss": 23.3461, "step": 153510 }, { "epoch": 0.310120112961938, "grad_norm": 649.1866455078125, "learning_rate": 8.750093628817e-06, "loss": 33.7548, "step": 153520 }, { "epoch": 0.3101403135946218, "grad_norm": 258.2040100097656, "learning_rate": 8.749862741732534e-06, "loss": 24.5099, "step": 153530 }, { "epoch": 0.3101605142273056, "grad_norm": 0.0, "learning_rate": 8.749631836371692e-06, "loss": 7.5649, "step": 153540 }, { "epoch": 0.31018071485998944, "grad_norm": 254.9884796142578, "learning_rate": 8.749400912735602e-06, "loss": 20.1118, "step": 153550 }, { "epoch": 0.31020091549267326, "grad_norm": 994.185302734375, "learning_rate": 8.749169970825384e-06, "loss": 29.07, "step": 153560 }, { "epoch": 0.310221116125357, "grad_norm": 357.3439025878906, "learning_rate": 8.748939010642168e-06, "loss": 24.6214, "step": 153570 }, { "epoch": 0.31024131675804084, "grad_norm": 264.33123779296875, "learning_rate": 8.748708032187076e-06, "loss": 24.4567, "step": 153580 }, { "epoch": 0.31026151739072466, "grad_norm": 204.4443359375, "learning_rate": 8.748477035461237e-06, "loss": 12.9156, "step": 153590 }, { "epoch": 0.3102817180234085, "grad_norm": 382.69171142578125, "learning_rate": 8.748246020465776e-06, "loss": 29.396, "step": 153600 }, { "epoch": 0.3103019186560923, "grad_norm": 251.02085876464844, "learning_rate": 8.748014987201818e-06, "loss": 16.1203, "step": 153610 }, { "epoch": 0.3103221192887761, "grad_norm": 117.63137817382812, "learning_rate": 8.74778393567049e-06, "loss": 12.5781, "step": 153620 }, { "epoch": 0.31034231992145994, "grad_norm": 183.88037109375, "learning_rate": 8.747552865872918e-06, "loss": 22.0869, "step": 153630 }, { "epoch": 0.31036252055414376, "grad_norm": 363.4797668457031, "learning_rate": 8.747321777810226e-06, "loss": 28.7835, "step": 153640 }, { "epoch": 0.3103827211868276, "grad_norm": 337.51953125, "learning_rate": 8.747090671483542e-06, "loss": 12.6057, "step": 153650 }, { "epoch": 0.3104029218195114, "grad_norm": 155.5360870361328, "learning_rate": 8.746859546893995e-06, "loss": 26.0484, "step": 153660 }, { "epoch": 0.3104231224521952, "grad_norm": 195.1410369873047, "learning_rate": 8.746628404042707e-06, "loss": 15.0568, "step": 153670 }, { "epoch": 0.31044332308487904, "grad_norm": 420.71527099609375, "learning_rate": 8.74639724293081e-06, "loss": 25.884, "step": 153680 }, { "epoch": 0.31046352371756286, "grad_norm": 248.77444458007812, "learning_rate": 8.746166063559423e-06, "loss": 16.8359, "step": 153690 }, { "epoch": 0.3104837243502466, "grad_norm": 63.93393325805664, "learning_rate": 8.745934865929676e-06, "loss": 16.0421, "step": 153700 }, { "epoch": 0.31050392498293045, "grad_norm": 124.17976379394531, "learning_rate": 8.745703650042701e-06, "loss": 31.9748, "step": 153710 }, { "epoch": 0.31052412561561427, "grad_norm": 150.4432830810547, "learning_rate": 8.74547241589962e-06, "loss": 30.4803, "step": 153720 }, { "epoch": 0.3105443262482981, "grad_norm": 267.81805419921875, "learning_rate": 8.74524116350156e-06, "loss": 17.5342, "step": 153730 }, { "epoch": 0.3105645268809819, "grad_norm": 261.1622314453125, "learning_rate": 8.745009892849647e-06, "loss": 14.3127, "step": 153740 }, { "epoch": 0.3105847275136657, "grad_norm": 429.62628173828125, "learning_rate": 8.744778603945013e-06, "loss": 40.5072, "step": 153750 }, { "epoch": 0.31060492814634955, "grad_norm": 5.253328800201416, "learning_rate": 8.744547296788779e-06, "loss": 15.539, "step": 153760 }, { "epoch": 0.31062512877903337, "grad_norm": 272.1152038574219, "learning_rate": 8.744315971382078e-06, "loss": 18.5062, "step": 153770 }, { "epoch": 0.3106453294117172, "grad_norm": 278.6641845703125, "learning_rate": 8.744084627726034e-06, "loss": 13.3243, "step": 153780 }, { "epoch": 0.310665530044401, "grad_norm": 351.3999328613281, "learning_rate": 8.743853265821776e-06, "loss": 14.5284, "step": 153790 }, { "epoch": 0.3106857306770848, "grad_norm": 351.087890625, "learning_rate": 8.743621885670431e-06, "loss": 17.1077, "step": 153800 }, { "epoch": 0.31070593130976865, "grad_norm": 365.17236328125, "learning_rate": 8.743390487273127e-06, "loss": 15.4726, "step": 153810 }, { "epoch": 0.3107261319424524, "grad_norm": 194.7184295654297, "learning_rate": 8.743159070630993e-06, "loss": 30.5381, "step": 153820 }, { "epoch": 0.31074633257513623, "grad_norm": 17.268455505371094, "learning_rate": 8.742927635745155e-06, "loss": 20.1983, "step": 153830 }, { "epoch": 0.31076653320782005, "grad_norm": 164.44923400878906, "learning_rate": 8.742696182616742e-06, "loss": 17.9478, "step": 153840 }, { "epoch": 0.31078673384050387, "grad_norm": 377.5628967285156, "learning_rate": 8.74246471124688e-06, "loss": 19.8009, "step": 153850 }, { "epoch": 0.3108069344731877, "grad_norm": 246.18031311035156, "learning_rate": 8.7422332216367e-06, "loss": 13.3118, "step": 153860 }, { "epoch": 0.3108271351058715, "grad_norm": 464.3234558105469, "learning_rate": 8.742001713787329e-06, "loss": 10.1519, "step": 153870 }, { "epoch": 0.31084733573855533, "grad_norm": 375.6519470214844, "learning_rate": 8.741770187699897e-06, "loss": 14.0036, "step": 153880 }, { "epoch": 0.31086753637123915, "grad_norm": 320.7792663574219, "learning_rate": 8.741538643375528e-06, "loss": 25.0348, "step": 153890 }, { "epoch": 0.31088773700392297, "grad_norm": 370.8011169433594, "learning_rate": 8.741307080815357e-06, "loss": 36.6458, "step": 153900 }, { "epoch": 0.3109079376366068, "grad_norm": 542.7550659179688, "learning_rate": 8.741075500020506e-06, "loss": 17.1109, "step": 153910 }, { "epoch": 0.3109281382692906, "grad_norm": 337.708984375, "learning_rate": 8.74084390099211e-06, "loss": 18.2526, "step": 153920 }, { "epoch": 0.31094833890197443, "grad_norm": 234.66014099121094, "learning_rate": 8.74061228373129e-06, "loss": 16.8481, "step": 153930 }, { "epoch": 0.31096853953465825, "grad_norm": 238.7696533203125, "learning_rate": 8.740380648239182e-06, "loss": 19.5267, "step": 153940 }, { "epoch": 0.310988740167342, "grad_norm": 295.8719177246094, "learning_rate": 8.740148994516912e-06, "loss": 17.0798, "step": 153950 }, { "epoch": 0.31100894080002583, "grad_norm": 449.1596984863281, "learning_rate": 8.73991732256561e-06, "loss": 20.6155, "step": 153960 }, { "epoch": 0.31102914143270965, "grad_norm": 325.3324890136719, "learning_rate": 8.739685632386405e-06, "loss": 15.0823, "step": 153970 }, { "epoch": 0.3110493420653935, "grad_norm": 476.2724609375, "learning_rate": 8.739453923980425e-06, "loss": 20.8968, "step": 153980 }, { "epoch": 0.3110695426980773, "grad_norm": 323.1799621582031, "learning_rate": 8.7392221973488e-06, "loss": 26.257, "step": 153990 }, { "epoch": 0.3110897433307611, "grad_norm": 761.0156860351562, "learning_rate": 8.73899045249266e-06, "loss": 44.1989, "step": 154000 }, { "epoch": 0.31110994396344493, "grad_norm": 384.6611633300781, "learning_rate": 8.738758689413133e-06, "loss": 21.3911, "step": 154010 }, { "epoch": 0.31113014459612875, "grad_norm": 304.7519226074219, "learning_rate": 8.738526908111352e-06, "loss": 29.833, "step": 154020 }, { "epoch": 0.3111503452288126, "grad_norm": 216.63046264648438, "learning_rate": 8.738295108588442e-06, "loss": 23.4506, "step": 154030 }, { "epoch": 0.3111705458614964, "grad_norm": 268.87841796875, "learning_rate": 8.738063290845536e-06, "loss": 19.3952, "step": 154040 }, { "epoch": 0.3111907464941802, "grad_norm": 404.29833984375, "learning_rate": 8.737831454883762e-06, "loss": 31.52, "step": 154050 }, { "epoch": 0.31121094712686403, "grad_norm": 572.03173828125, "learning_rate": 8.737599600704251e-06, "loss": 37.3598, "step": 154060 }, { "epoch": 0.31123114775954785, "grad_norm": 236.5586395263672, "learning_rate": 8.737367728308134e-06, "loss": 15.3038, "step": 154070 }, { "epoch": 0.3112513483922316, "grad_norm": 41.43634033203125, "learning_rate": 8.737135837696539e-06, "loss": 12.7145, "step": 154080 }, { "epoch": 0.31127154902491544, "grad_norm": 245.38836669921875, "learning_rate": 8.736903928870597e-06, "loss": 27.8264, "step": 154090 }, { "epoch": 0.31129174965759926, "grad_norm": 27.073627471923828, "learning_rate": 8.736672001831438e-06, "loss": 20.1667, "step": 154100 }, { "epoch": 0.3113119502902831, "grad_norm": 335.1429138183594, "learning_rate": 8.736440056580196e-06, "loss": 23.9061, "step": 154110 }, { "epoch": 0.3113321509229669, "grad_norm": 444.9556884765625, "learning_rate": 8.736208093117994e-06, "loss": 29.3622, "step": 154120 }, { "epoch": 0.3113523515556507, "grad_norm": 291.8482360839844, "learning_rate": 8.73597611144597e-06, "loss": 19.682, "step": 154130 }, { "epoch": 0.31137255218833454, "grad_norm": 346.0944519042969, "learning_rate": 8.73574411156525e-06, "loss": 20.0939, "step": 154140 }, { "epoch": 0.31139275282101836, "grad_norm": 96.34417724609375, "learning_rate": 8.735512093476968e-06, "loss": 30.5226, "step": 154150 }, { "epoch": 0.3114129534537022, "grad_norm": 96.61886596679688, "learning_rate": 8.735280057182252e-06, "loss": 13.3859, "step": 154160 }, { "epoch": 0.311433154086386, "grad_norm": 188.7327117919922, "learning_rate": 8.735048002682233e-06, "loss": 26.6889, "step": 154170 }, { "epoch": 0.3114533547190698, "grad_norm": 255.5815887451172, "learning_rate": 8.734815929978045e-06, "loss": 21.4268, "step": 154180 }, { "epoch": 0.31147355535175364, "grad_norm": 256.7290954589844, "learning_rate": 8.734583839070817e-06, "loss": 20.0694, "step": 154190 }, { "epoch": 0.31149375598443746, "grad_norm": 227.3784942626953, "learning_rate": 8.73435172996168e-06, "loss": 29.3152, "step": 154200 }, { "epoch": 0.3115139566171212, "grad_norm": 229.05125427246094, "learning_rate": 8.734119602651762e-06, "loss": 17.5802, "step": 154210 }, { "epoch": 0.31153415724980504, "grad_norm": 192.02804565429688, "learning_rate": 8.733887457142202e-06, "loss": 19.1386, "step": 154220 }, { "epoch": 0.31155435788248886, "grad_norm": 318.62078857421875, "learning_rate": 8.733655293434127e-06, "loss": 23.8168, "step": 154230 }, { "epoch": 0.3115745585151727, "grad_norm": 146.4119110107422, "learning_rate": 8.733423111528667e-06, "loss": 18.5625, "step": 154240 }, { "epoch": 0.3115947591478565, "grad_norm": 309.7334899902344, "learning_rate": 8.733190911426957e-06, "loss": 11.5415, "step": 154250 }, { "epoch": 0.3116149597805403, "grad_norm": 273.25018310546875, "learning_rate": 8.732958693130128e-06, "loss": 25.968, "step": 154260 }, { "epoch": 0.31163516041322414, "grad_norm": 414.7947998046875, "learning_rate": 8.73272645663931e-06, "loss": 20.1489, "step": 154270 }, { "epoch": 0.31165536104590796, "grad_norm": 353.5997009277344, "learning_rate": 8.732494201955636e-06, "loss": 16.1327, "step": 154280 }, { "epoch": 0.3116755616785918, "grad_norm": 292.2055969238281, "learning_rate": 8.732261929080239e-06, "loss": 7.7783, "step": 154290 }, { "epoch": 0.3116957623112756, "grad_norm": 263.2476501464844, "learning_rate": 8.732029638014249e-06, "loss": 15.6315, "step": 154300 }, { "epoch": 0.3117159629439594, "grad_norm": 399.04925537109375, "learning_rate": 8.7317973287588e-06, "loss": 28.5985, "step": 154310 }, { "epoch": 0.31173616357664324, "grad_norm": 229.9312286376953, "learning_rate": 8.73156500131502e-06, "loss": 32.7302, "step": 154320 }, { "epoch": 0.31175636420932706, "grad_norm": 288.3134765625, "learning_rate": 8.73133265568405e-06, "loss": 17.8314, "step": 154330 }, { "epoch": 0.31177656484201083, "grad_norm": 264.8688049316406, "learning_rate": 8.731100291867013e-06, "loss": 17.02, "step": 154340 }, { "epoch": 0.31179676547469465, "grad_norm": 137.61392211914062, "learning_rate": 8.730867909865048e-06, "loss": 29.0316, "step": 154350 }, { "epoch": 0.31181696610737847, "grad_norm": 141.66607666015625, "learning_rate": 8.730635509679286e-06, "loss": 34.5312, "step": 154360 }, { "epoch": 0.3118371667400623, "grad_norm": 140.95957946777344, "learning_rate": 8.730403091310857e-06, "loss": 16.8989, "step": 154370 }, { "epoch": 0.3118573673727461, "grad_norm": 661.583740234375, "learning_rate": 8.730170654760896e-06, "loss": 25.2838, "step": 154380 }, { "epoch": 0.31187756800542993, "grad_norm": 187.98696899414062, "learning_rate": 8.729938200030537e-06, "loss": 20.4851, "step": 154390 }, { "epoch": 0.31189776863811375, "grad_norm": 221.3997344970703, "learning_rate": 8.729705727120911e-06, "loss": 20.6201, "step": 154400 }, { "epoch": 0.31191796927079757, "grad_norm": 327.7557067871094, "learning_rate": 8.729473236033152e-06, "loss": 21.6218, "step": 154410 }, { "epoch": 0.3119381699034814, "grad_norm": 116.75366973876953, "learning_rate": 8.729240726768393e-06, "loss": 11.4815, "step": 154420 }, { "epoch": 0.3119583705361652, "grad_norm": 393.2626647949219, "learning_rate": 8.729008199327767e-06, "loss": 29.596, "step": 154430 }, { "epoch": 0.31197857116884903, "grad_norm": 151.46853637695312, "learning_rate": 8.728775653712405e-06, "loss": 20.5638, "step": 154440 }, { "epoch": 0.31199877180153285, "grad_norm": 186.26490783691406, "learning_rate": 8.728543089923444e-06, "loss": 11.6747, "step": 154450 }, { "epoch": 0.3120189724342166, "grad_norm": 524.7268676757812, "learning_rate": 8.728310507962016e-06, "loss": 21.1313, "step": 154460 }, { "epoch": 0.31203917306690043, "grad_norm": 683.7332153320312, "learning_rate": 8.728077907829256e-06, "loss": 39.9097, "step": 154470 }, { "epoch": 0.31205937369958425, "grad_norm": 92.20255279541016, "learning_rate": 8.727845289526296e-06, "loss": 9.4522, "step": 154480 }, { "epoch": 0.3120795743322681, "grad_norm": 71.641845703125, "learning_rate": 8.72761265305427e-06, "loss": 17.7239, "step": 154490 }, { "epoch": 0.3120997749649519, "grad_norm": 223.80905151367188, "learning_rate": 8.727379998414311e-06, "loss": 23.2601, "step": 154500 }, { "epoch": 0.3121199755976357, "grad_norm": 376.1450500488281, "learning_rate": 8.727147325607556e-06, "loss": 24.7525, "step": 154510 }, { "epoch": 0.31214017623031953, "grad_norm": 443.2114562988281, "learning_rate": 8.726914634635136e-06, "loss": 52.0261, "step": 154520 }, { "epoch": 0.31216037686300335, "grad_norm": 135.185302734375, "learning_rate": 8.726681925498187e-06, "loss": 19.4171, "step": 154530 }, { "epoch": 0.3121805774956872, "grad_norm": 504.91973876953125, "learning_rate": 8.72644919819784e-06, "loss": 20.0781, "step": 154540 }, { "epoch": 0.312200778128371, "grad_norm": 64.86072540283203, "learning_rate": 8.726216452735233e-06, "loss": 13.2381, "step": 154550 }, { "epoch": 0.3122209787610548, "grad_norm": 251.6763916015625, "learning_rate": 8.725983689111499e-06, "loss": 13.452, "step": 154560 }, { "epoch": 0.31224117939373863, "grad_norm": 345.4408264160156, "learning_rate": 8.725750907327772e-06, "loss": 20.0159, "step": 154570 }, { "epoch": 0.31226138002642245, "grad_norm": 470.5979309082031, "learning_rate": 8.725518107385188e-06, "loss": 24.2557, "step": 154580 }, { "epoch": 0.3122815806591062, "grad_norm": 227.6385955810547, "learning_rate": 8.725285289284879e-06, "loss": 29.8967, "step": 154590 }, { "epoch": 0.31230178129179004, "grad_norm": 157.2914581298828, "learning_rate": 8.725052453027982e-06, "loss": 13.6944, "step": 154600 }, { "epoch": 0.31232198192447386, "grad_norm": 156.19088745117188, "learning_rate": 8.72481959861563e-06, "loss": 22.6279, "step": 154610 }, { "epoch": 0.3123421825571577, "grad_norm": 286.2450866699219, "learning_rate": 8.72458672604896e-06, "loss": 22.757, "step": 154620 }, { "epoch": 0.3123623831898415, "grad_norm": 651.7180786132812, "learning_rate": 8.724353835329107e-06, "loss": 30.0882, "step": 154630 }, { "epoch": 0.3123825838225253, "grad_norm": 240.5815887451172, "learning_rate": 8.724120926457205e-06, "loss": 19.8257, "step": 154640 }, { "epoch": 0.31240278445520914, "grad_norm": 384.8279113769531, "learning_rate": 8.723887999434389e-06, "loss": 29.8088, "step": 154650 }, { "epoch": 0.31242298508789296, "grad_norm": 241.5310821533203, "learning_rate": 8.723655054261792e-06, "loss": 21.4962, "step": 154660 }, { "epoch": 0.3124431857205768, "grad_norm": 162.2728729248047, "learning_rate": 8.723422090940556e-06, "loss": 22.5891, "step": 154670 }, { "epoch": 0.3124633863532606, "grad_norm": 2855.247802734375, "learning_rate": 8.72318910947181e-06, "loss": 22.1236, "step": 154680 }, { "epoch": 0.3124835869859444, "grad_norm": 601.62890625, "learning_rate": 8.722956109856693e-06, "loss": 26.5364, "step": 154690 }, { "epoch": 0.31250378761862824, "grad_norm": 304.3387451171875, "learning_rate": 8.722723092096337e-06, "loss": 28.0836, "step": 154700 }, { "epoch": 0.31252398825131206, "grad_norm": 178.64630126953125, "learning_rate": 8.722490056191884e-06, "loss": 15.994, "step": 154710 }, { "epoch": 0.3125441888839958, "grad_norm": 209.43283081054688, "learning_rate": 8.722257002144462e-06, "loss": 16.9014, "step": 154720 }, { "epoch": 0.31256438951667964, "grad_norm": 312.5208740234375, "learning_rate": 8.722023929955213e-06, "loss": 23.7401, "step": 154730 }, { "epoch": 0.31258459014936346, "grad_norm": 320.650634765625, "learning_rate": 8.72179083962527e-06, "loss": 19.766, "step": 154740 }, { "epoch": 0.3126047907820473, "grad_norm": 188.35301208496094, "learning_rate": 8.72155773115577e-06, "loss": 14.4807, "step": 154750 }, { "epoch": 0.3126249914147311, "grad_norm": 326.0574645996094, "learning_rate": 8.721324604547851e-06, "loss": 33.1109, "step": 154760 }, { "epoch": 0.3126451920474149, "grad_norm": 187.89846801757812, "learning_rate": 8.721091459802646e-06, "loss": 27.7554, "step": 154770 }, { "epoch": 0.31266539268009874, "grad_norm": 936.06689453125, "learning_rate": 8.72085829692129e-06, "loss": 20.111, "step": 154780 }, { "epoch": 0.31268559331278256, "grad_norm": 80.451904296875, "learning_rate": 8.720625115904927e-06, "loss": 27.283, "step": 154790 }, { "epoch": 0.3127057939454664, "grad_norm": 119.96383666992188, "learning_rate": 8.720391916754683e-06, "loss": 23.7468, "step": 154800 }, { "epoch": 0.3127259945781502, "grad_norm": 646.8290405273438, "learning_rate": 8.720158699471704e-06, "loss": 35.0676, "step": 154810 }, { "epoch": 0.312746195210834, "grad_norm": 239.94444274902344, "learning_rate": 8.71992546405712e-06, "loss": 24.8069, "step": 154820 }, { "epoch": 0.31276639584351784, "grad_norm": 488.15643310546875, "learning_rate": 8.719692210512072e-06, "loss": 17.6687, "step": 154830 }, { "epoch": 0.31278659647620166, "grad_norm": 157.03851318359375, "learning_rate": 8.719458938837695e-06, "loss": 20.9636, "step": 154840 }, { "epoch": 0.3128067971088854, "grad_norm": 144.076171875, "learning_rate": 8.719225649035126e-06, "loss": 15.9744, "step": 154850 }, { "epoch": 0.31282699774156925, "grad_norm": 304.18353271484375, "learning_rate": 8.718992341105503e-06, "loss": 21.9069, "step": 154860 }, { "epoch": 0.31284719837425307, "grad_norm": 163.7047119140625, "learning_rate": 8.718759015049963e-06, "loss": 17.6321, "step": 154870 }, { "epoch": 0.3128673990069369, "grad_norm": 240.96543884277344, "learning_rate": 8.71852567086964e-06, "loss": 19.9829, "step": 154880 }, { "epoch": 0.3128875996396207, "grad_norm": 506.4364013671875, "learning_rate": 8.718292308565675e-06, "loss": 22.6542, "step": 154890 }, { "epoch": 0.3129078002723045, "grad_norm": 32.02078628540039, "learning_rate": 8.718058928139205e-06, "loss": 29.8132, "step": 154900 }, { "epoch": 0.31292800090498835, "grad_norm": 378.37353515625, "learning_rate": 8.717825529591367e-06, "loss": 21.4892, "step": 154910 }, { "epoch": 0.31294820153767217, "grad_norm": 160.11459350585938, "learning_rate": 8.717592112923296e-06, "loss": 18.7667, "step": 154920 }, { "epoch": 0.312968402170356, "grad_norm": 468.9417724609375, "learning_rate": 8.717358678136133e-06, "loss": 33.2246, "step": 154930 }, { "epoch": 0.3129886028030398, "grad_norm": 281.8332824707031, "learning_rate": 8.717125225231018e-06, "loss": 18.2816, "step": 154940 }, { "epoch": 0.3130088034357236, "grad_norm": 302.9359130859375, "learning_rate": 8.716891754209081e-06, "loss": 35.3219, "step": 154950 }, { "epoch": 0.31302900406840745, "grad_norm": 350.93719482421875, "learning_rate": 8.716658265071467e-06, "loss": 51.2295, "step": 154960 }, { "epoch": 0.3130492047010912, "grad_norm": 275.20880126953125, "learning_rate": 8.71642475781931e-06, "loss": 13.5733, "step": 154970 }, { "epoch": 0.31306940533377503, "grad_norm": 208.75143432617188, "learning_rate": 8.71619123245375e-06, "loss": 11.5839, "step": 154980 }, { "epoch": 0.31308960596645885, "grad_norm": 596.0903930664062, "learning_rate": 8.715957688975925e-06, "loss": 26.0751, "step": 154990 }, { "epoch": 0.31310980659914267, "grad_norm": 1028.61669921875, "learning_rate": 8.715724127386971e-06, "loss": 33.3783, "step": 155000 }, { "epoch": 0.3131300072318265, "grad_norm": 355.68963623046875, "learning_rate": 8.71549054768803e-06, "loss": 37.0535, "step": 155010 }, { "epoch": 0.3131502078645103, "grad_norm": 320.8461608886719, "learning_rate": 8.715256949880239e-06, "loss": 22.3606, "step": 155020 }, { "epoch": 0.31317040849719413, "grad_norm": 210.7073974609375, "learning_rate": 8.715023333964737e-06, "loss": 19.7579, "step": 155030 }, { "epoch": 0.31319060912987795, "grad_norm": 232.6704559326172, "learning_rate": 8.714789699942659e-06, "loss": 16.7873, "step": 155040 }, { "epoch": 0.31321080976256177, "grad_norm": 203.4931640625, "learning_rate": 8.714556047815148e-06, "loss": 19.216, "step": 155050 }, { "epoch": 0.3132310103952456, "grad_norm": 59.3969612121582, "learning_rate": 8.714322377583341e-06, "loss": 10.1145, "step": 155060 }, { "epoch": 0.3132512110279294, "grad_norm": 332.44586181640625, "learning_rate": 8.714088689248379e-06, "loss": 23.6119, "step": 155070 }, { "epoch": 0.31327141166061323, "grad_norm": 229.68531799316406, "learning_rate": 8.713854982811398e-06, "loss": 21.7468, "step": 155080 }, { "epoch": 0.31329161229329705, "grad_norm": 288.06634521484375, "learning_rate": 8.713621258273539e-06, "loss": 37.331, "step": 155090 }, { "epoch": 0.3133118129259808, "grad_norm": 315.9927062988281, "learning_rate": 8.713387515635938e-06, "loss": 11.6108, "step": 155100 }, { "epoch": 0.31333201355866463, "grad_norm": 16.27593231201172, "learning_rate": 8.713153754899738e-06, "loss": 15.187, "step": 155110 }, { "epoch": 0.31335221419134845, "grad_norm": 288.355712890625, "learning_rate": 8.712919976066078e-06, "loss": 21.6534, "step": 155120 }, { "epoch": 0.3133724148240323, "grad_norm": 190.1195831298828, "learning_rate": 8.712686179136097e-06, "loss": 15.2023, "step": 155130 }, { "epoch": 0.3133926154567161, "grad_norm": 287.1048278808594, "learning_rate": 8.712452364110931e-06, "loss": 16.4714, "step": 155140 }, { "epoch": 0.3134128160893999, "grad_norm": 473.3482971191406, "learning_rate": 8.712218530991723e-06, "loss": 24.5891, "step": 155150 }, { "epoch": 0.31343301672208373, "grad_norm": 251.05020141601562, "learning_rate": 8.711984679779612e-06, "loss": 38.3659, "step": 155160 }, { "epoch": 0.31345321735476755, "grad_norm": 441.2667541503906, "learning_rate": 8.71175081047574e-06, "loss": 16.8322, "step": 155170 }, { "epoch": 0.3134734179874514, "grad_norm": 565.145751953125, "learning_rate": 8.711516923081244e-06, "loss": 18.26, "step": 155180 }, { "epoch": 0.3134936186201352, "grad_norm": 32.02457046508789, "learning_rate": 8.711283017597265e-06, "loss": 15.6823, "step": 155190 }, { "epoch": 0.313513819252819, "grad_norm": 423.2878723144531, "learning_rate": 8.711049094024942e-06, "loss": 38.0471, "step": 155200 }, { "epoch": 0.31353401988550283, "grad_norm": 202.4793701171875, "learning_rate": 8.710815152365416e-06, "loss": 22.4958, "step": 155210 }, { "epoch": 0.31355422051818665, "grad_norm": 294.6838684082031, "learning_rate": 8.710581192619824e-06, "loss": 16.3909, "step": 155220 }, { "epoch": 0.3135744211508704, "grad_norm": 143.47508239746094, "learning_rate": 8.710347214789313e-06, "loss": 20.146, "step": 155230 }, { "epoch": 0.31359462178355424, "grad_norm": 280.291748046875, "learning_rate": 8.710113218875018e-06, "loss": 14.5336, "step": 155240 }, { "epoch": 0.31361482241623806, "grad_norm": 321.8594055175781, "learning_rate": 8.709879204878082e-06, "loss": 25.504, "step": 155250 }, { "epoch": 0.3136350230489219, "grad_norm": 224.975830078125, "learning_rate": 8.709645172799646e-06, "loss": 20.1451, "step": 155260 }, { "epoch": 0.3136552236816057, "grad_norm": 616.2012939453125, "learning_rate": 8.709411122640847e-06, "loss": 23.5561, "step": 155270 }, { "epoch": 0.3136754243142895, "grad_norm": 396.50714111328125, "learning_rate": 8.709177054402829e-06, "loss": 12.3615, "step": 155280 }, { "epoch": 0.31369562494697334, "grad_norm": 338.65667724609375, "learning_rate": 8.708942968086733e-06, "loss": 14.8314, "step": 155290 }, { "epoch": 0.31371582557965716, "grad_norm": 825.1728515625, "learning_rate": 8.708708863693696e-06, "loss": 23.9582, "step": 155300 }, { "epoch": 0.313736026212341, "grad_norm": 204.37982177734375, "learning_rate": 8.708474741224863e-06, "loss": 27.5471, "step": 155310 }, { "epoch": 0.3137562268450248, "grad_norm": 0.0, "learning_rate": 8.708240600681375e-06, "loss": 16.1364, "step": 155320 }, { "epoch": 0.3137764274777086, "grad_norm": 232.5314178466797, "learning_rate": 8.708006442064373e-06, "loss": 13.1361, "step": 155330 }, { "epoch": 0.31379662811039244, "grad_norm": 338.7648010253906, "learning_rate": 8.707772265374994e-06, "loss": 20.4867, "step": 155340 }, { "epoch": 0.31381682874307626, "grad_norm": 398.72540283203125, "learning_rate": 8.707538070614385e-06, "loss": 34.1075, "step": 155350 }, { "epoch": 0.31383702937576, "grad_norm": 99.78507995605469, "learning_rate": 8.707303857783685e-06, "loss": 31.2221, "step": 155360 }, { "epoch": 0.31385723000844384, "grad_norm": 463.2357177734375, "learning_rate": 8.707069626884034e-06, "loss": 24.9596, "step": 155370 }, { "epoch": 0.31387743064112766, "grad_norm": 351.2920837402344, "learning_rate": 8.706835377916579e-06, "loss": 16.4753, "step": 155380 }, { "epoch": 0.3138976312738115, "grad_norm": 61.62226104736328, "learning_rate": 8.706601110882456e-06, "loss": 12.2813, "step": 155390 }, { "epoch": 0.3139178319064953, "grad_norm": 426.31317138671875, "learning_rate": 8.706366825782805e-06, "loss": 19.4925, "step": 155400 }, { "epoch": 0.3139380325391791, "grad_norm": 469.349853515625, "learning_rate": 8.706132522618777e-06, "loss": 26.5761, "step": 155410 }, { "epoch": 0.31395823317186294, "grad_norm": 112.34686279296875, "learning_rate": 8.705898201391504e-06, "loss": 23.326, "step": 155420 }, { "epoch": 0.31397843380454676, "grad_norm": 363.5381774902344, "learning_rate": 8.705663862102137e-06, "loss": 28.2904, "step": 155430 }, { "epoch": 0.3139986344372306, "grad_norm": 278.5861511230469, "learning_rate": 8.705429504751813e-06, "loss": 31.5843, "step": 155440 }, { "epoch": 0.3140188350699144, "grad_norm": 144.3763427734375, "learning_rate": 8.705195129341672e-06, "loss": 18.9811, "step": 155450 }, { "epoch": 0.3140390357025982, "grad_norm": 342.8795471191406, "learning_rate": 8.704960735872862e-06, "loss": 27.9212, "step": 155460 }, { "epoch": 0.31405923633528204, "grad_norm": 445.9842224121094, "learning_rate": 8.704726324346521e-06, "loss": 31.3798, "step": 155470 }, { "epoch": 0.31407943696796586, "grad_norm": 260.4394836425781, "learning_rate": 8.704491894763794e-06, "loss": 17.1406, "step": 155480 }, { "epoch": 0.31409963760064963, "grad_norm": 246.53053283691406, "learning_rate": 8.704257447125823e-06, "loss": 19.334, "step": 155490 }, { "epoch": 0.31411983823333345, "grad_norm": 376.68902587890625, "learning_rate": 8.70402298143375e-06, "loss": 17.2797, "step": 155500 }, { "epoch": 0.31414003886601727, "grad_norm": 413.4793701171875, "learning_rate": 8.70378849768872e-06, "loss": 19.2107, "step": 155510 }, { "epoch": 0.3141602394987011, "grad_norm": 277.1871643066406, "learning_rate": 8.703553995891873e-06, "loss": 19.1673, "step": 155520 }, { "epoch": 0.3141804401313849, "grad_norm": 242.46566772460938, "learning_rate": 8.703319476044352e-06, "loss": 22.468, "step": 155530 }, { "epoch": 0.31420064076406873, "grad_norm": 340.93084716796875, "learning_rate": 8.703084938147302e-06, "loss": 24.9824, "step": 155540 }, { "epoch": 0.31422084139675255, "grad_norm": 238.921630859375, "learning_rate": 8.702850382201863e-06, "loss": 18.1263, "step": 155550 }, { "epoch": 0.31424104202943637, "grad_norm": 222.2796630859375, "learning_rate": 8.702615808209185e-06, "loss": 12.2482, "step": 155560 }, { "epoch": 0.3142612426621202, "grad_norm": 450.24993896484375, "learning_rate": 8.702381216170404e-06, "loss": 24.6157, "step": 155570 }, { "epoch": 0.314281443294804, "grad_norm": 516.1026000976562, "learning_rate": 8.702146606086665e-06, "loss": 32.8766, "step": 155580 }, { "epoch": 0.31430164392748783, "grad_norm": 335.5802917480469, "learning_rate": 8.701911977959113e-06, "loss": 20.9245, "step": 155590 }, { "epoch": 0.31432184456017165, "grad_norm": 89.57960510253906, "learning_rate": 8.701677331788891e-06, "loss": 20.639, "step": 155600 }, { "epoch": 0.3143420451928554, "grad_norm": 205.2728271484375, "learning_rate": 8.701442667577143e-06, "loss": 10.2573, "step": 155610 }, { "epoch": 0.31436224582553923, "grad_norm": 448.92388916015625, "learning_rate": 8.701207985325013e-06, "loss": 19.1849, "step": 155620 }, { "epoch": 0.31438244645822305, "grad_norm": 317.2098083496094, "learning_rate": 8.700973285033642e-06, "loss": 25.4818, "step": 155630 }, { "epoch": 0.3144026470909069, "grad_norm": 268.8910827636719, "learning_rate": 8.700738566704178e-06, "loss": 14.4267, "step": 155640 }, { "epoch": 0.3144228477235907, "grad_norm": 318.28021240234375, "learning_rate": 8.700503830337763e-06, "loss": 20.5692, "step": 155650 }, { "epoch": 0.3144430483562745, "grad_norm": 181.130859375, "learning_rate": 8.700269075935542e-06, "loss": 19.4072, "step": 155660 }, { "epoch": 0.31446324898895833, "grad_norm": 262.43609619140625, "learning_rate": 8.700034303498657e-06, "loss": 11.9716, "step": 155670 }, { "epoch": 0.31448344962164215, "grad_norm": 314.77362060546875, "learning_rate": 8.699799513028252e-06, "loss": 14.8286, "step": 155680 }, { "epoch": 0.314503650254326, "grad_norm": 300.4656677246094, "learning_rate": 8.699564704525477e-06, "loss": 17.6986, "step": 155690 }, { "epoch": 0.3145238508870098, "grad_norm": 222.7803955078125, "learning_rate": 8.699329877991469e-06, "loss": 33.8253, "step": 155700 }, { "epoch": 0.3145440515196936, "grad_norm": 89.81096649169922, "learning_rate": 8.699095033427377e-06, "loss": 15.1095, "step": 155710 }, { "epoch": 0.31456425215237743, "grad_norm": 185.2431640625, "learning_rate": 8.698860170834343e-06, "loss": 33.2029, "step": 155720 }, { "epoch": 0.31458445278506125, "grad_norm": 408.5797424316406, "learning_rate": 8.698625290213515e-06, "loss": 15.9979, "step": 155730 }, { "epoch": 0.314604653417745, "grad_norm": 399.3043518066406, "learning_rate": 8.698390391566036e-06, "loss": 18.773, "step": 155740 }, { "epoch": 0.31462485405042884, "grad_norm": 251.0016632080078, "learning_rate": 8.69815547489305e-06, "loss": 14.9639, "step": 155750 }, { "epoch": 0.31464505468311266, "grad_norm": 423.2959289550781, "learning_rate": 8.697920540195702e-06, "loss": 24.0375, "step": 155760 }, { "epoch": 0.3146652553157965, "grad_norm": 216.2311553955078, "learning_rate": 8.697685587475139e-06, "loss": 25.7205, "step": 155770 }, { "epoch": 0.3146854559484803, "grad_norm": 12.564062118530273, "learning_rate": 8.697450616732503e-06, "loss": 26.0747, "step": 155780 }, { "epoch": 0.3147056565811641, "grad_norm": 310.0550842285156, "learning_rate": 8.697215627968944e-06, "loss": 26.1844, "step": 155790 }, { "epoch": 0.31472585721384794, "grad_norm": 362.9472961425781, "learning_rate": 8.696980621185602e-06, "loss": 20.4762, "step": 155800 }, { "epoch": 0.31474605784653176, "grad_norm": 222.69241333007812, "learning_rate": 8.696745596383627e-06, "loss": 14.1392, "step": 155810 }, { "epoch": 0.3147662584792156, "grad_norm": 168.59030151367188, "learning_rate": 8.696510553564162e-06, "loss": 21.8355, "step": 155820 }, { "epoch": 0.3147864591118994, "grad_norm": 307.4064025878906, "learning_rate": 8.696275492728352e-06, "loss": 28.7196, "step": 155830 }, { "epoch": 0.3148066597445832, "grad_norm": 358.9534606933594, "learning_rate": 8.696040413877344e-06, "loss": 27.7391, "step": 155840 }, { "epoch": 0.31482686037726704, "grad_norm": 223.19859313964844, "learning_rate": 8.695805317012283e-06, "loss": 22.7006, "step": 155850 }, { "epoch": 0.31484706100995086, "grad_norm": 321.0693664550781, "learning_rate": 8.695570202134314e-06, "loss": 21.807, "step": 155860 }, { "epoch": 0.3148672616426346, "grad_norm": 339.5673522949219, "learning_rate": 8.695335069244586e-06, "loss": 17.5193, "step": 155870 }, { "epoch": 0.31488746227531844, "grad_norm": 324.5011291503906, "learning_rate": 8.695099918344243e-06, "loss": 32.4654, "step": 155880 }, { "epoch": 0.31490766290800226, "grad_norm": 238.60765075683594, "learning_rate": 8.69486474943443e-06, "loss": 18.2466, "step": 155890 }, { "epoch": 0.3149278635406861, "grad_norm": 412.79559326171875, "learning_rate": 8.694629562516295e-06, "loss": 24.5511, "step": 155900 }, { "epoch": 0.3149480641733699, "grad_norm": 402.1057434082031, "learning_rate": 8.694394357590982e-06, "loss": 13.6489, "step": 155910 }, { "epoch": 0.3149682648060537, "grad_norm": 191.8535614013672, "learning_rate": 8.694159134659641e-06, "loss": 22.7485, "step": 155920 }, { "epoch": 0.31498846543873754, "grad_norm": 199.75921630859375, "learning_rate": 8.693923893723415e-06, "loss": 18.8825, "step": 155930 }, { "epoch": 0.31500866607142136, "grad_norm": 519.1969604492188, "learning_rate": 8.693688634783453e-06, "loss": 12.548, "step": 155940 }, { "epoch": 0.3150288667041052, "grad_norm": 263.66156005859375, "learning_rate": 8.6934533578409e-06, "loss": 10.4727, "step": 155950 }, { "epoch": 0.315049067336789, "grad_norm": 321.00848388671875, "learning_rate": 8.693218062896905e-06, "loss": 10.2939, "step": 155960 }, { "epoch": 0.3150692679694728, "grad_norm": 352.0982666015625, "learning_rate": 8.692982749952613e-06, "loss": 24.62, "step": 155970 }, { "epoch": 0.31508946860215664, "grad_norm": 278.9892883300781, "learning_rate": 8.692747419009168e-06, "loss": 19.6306, "step": 155980 }, { "epoch": 0.31510966923484046, "grad_norm": 339.9013977050781, "learning_rate": 8.692512070067722e-06, "loss": 32.1049, "step": 155990 }, { "epoch": 0.3151298698675242, "grad_norm": 739.4376831054688, "learning_rate": 8.692276703129421e-06, "loss": 16.2925, "step": 156000 }, { "epoch": 0.31515007050020805, "grad_norm": 482.3772888183594, "learning_rate": 8.692041318195409e-06, "loss": 14.6028, "step": 156010 }, { "epoch": 0.31517027113289187, "grad_norm": 126.10417938232422, "learning_rate": 8.691805915266836e-06, "loss": 18.4097, "step": 156020 }, { "epoch": 0.3151904717655757, "grad_norm": 445.10400390625, "learning_rate": 8.691570494344848e-06, "loss": 19.9767, "step": 156030 }, { "epoch": 0.3152106723982595, "grad_norm": 373.9829406738281, "learning_rate": 8.691335055430595e-06, "loss": 19.0924, "step": 156040 }, { "epoch": 0.3152308730309433, "grad_norm": 491.060302734375, "learning_rate": 8.691099598525222e-06, "loss": 19.6029, "step": 156050 }, { "epoch": 0.31525107366362715, "grad_norm": 308.44403076171875, "learning_rate": 8.690864123629876e-06, "loss": 32.8643, "step": 156060 }, { "epoch": 0.31527127429631097, "grad_norm": 346.0982666015625, "learning_rate": 8.690628630745708e-06, "loss": 16.5971, "step": 156070 }, { "epoch": 0.3152914749289948, "grad_norm": 405.4490661621094, "learning_rate": 8.690393119873863e-06, "loss": 16.2308, "step": 156080 }, { "epoch": 0.3153116755616786, "grad_norm": 193.64842224121094, "learning_rate": 8.690157591015488e-06, "loss": 30.8859, "step": 156090 }, { "epoch": 0.3153318761943624, "grad_norm": 336.9530029296875, "learning_rate": 8.689922044171735e-06, "loss": 13.6459, "step": 156100 }, { "epoch": 0.31535207682704625, "grad_norm": 134.61126708984375, "learning_rate": 8.689686479343747e-06, "loss": 16.5464, "step": 156110 }, { "epoch": 0.31537227745973007, "grad_norm": 413.19903564453125, "learning_rate": 8.689450896532675e-06, "loss": 12.662, "step": 156120 }, { "epoch": 0.31539247809241383, "grad_norm": 197.7588348388672, "learning_rate": 8.689215295739669e-06, "loss": 12.922, "step": 156130 }, { "epoch": 0.31541267872509765, "grad_norm": 162.37625122070312, "learning_rate": 8.688979676965872e-06, "loss": 13.9398, "step": 156140 }, { "epoch": 0.31543287935778147, "grad_norm": 395.22113037109375, "learning_rate": 8.688744040212438e-06, "loss": 12.6098, "step": 156150 }, { "epoch": 0.3154530799904653, "grad_norm": 145.20359802246094, "learning_rate": 8.688508385480513e-06, "loss": 18.4165, "step": 156160 }, { "epoch": 0.3154732806231491, "grad_norm": 63.33405685424805, "learning_rate": 8.688272712771243e-06, "loss": 12.583, "step": 156170 }, { "epoch": 0.31549348125583293, "grad_norm": 435.2193908691406, "learning_rate": 8.688037022085783e-06, "loss": 18.6411, "step": 156180 }, { "epoch": 0.31551368188851675, "grad_norm": 303.4767150878906, "learning_rate": 8.687801313425275e-06, "loss": 24.468, "step": 156190 }, { "epoch": 0.31553388252120057, "grad_norm": 164.4569854736328, "learning_rate": 8.68756558679087e-06, "loss": 12.2792, "step": 156200 }, { "epoch": 0.3155540831538844, "grad_norm": 397.9287414550781, "learning_rate": 8.68732984218372e-06, "loss": 19.6776, "step": 156210 }, { "epoch": 0.3155742837865682, "grad_norm": 570.5171508789062, "learning_rate": 8.68709407960497e-06, "loss": 31.2626, "step": 156220 }, { "epoch": 0.31559448441925203, "grad_norm": 143.38290405273438, "learning_rate": 8.68685829905577e-06, "loss": 21.4421, "step": 156230 }, { "epoch": 0.31561468505193585, "grad_norm": 131.50717163085938, "learning_rate": 8.686622500537272e-06, "loss": 14.0374, "step": 156240 }, { "epoch": 0.3156348856846196, "grad_norm": 201.74334716796875, "learning_rate": 8.68638668405062e-06, "loss": 13.9118, "step": 156250 }, { "epoch": 0.31565508631730343, "grad_norm": 59.210166931152344, "learning_rate": 8.68615084959697e-06, "loss": 17.171, "step": 156260 }, { "epoch": 0.31567528694998725, "grad_norm": 649.3320922851562, "learning_rate": 8.685914997177465e-06, "loss": 27.8755, "step": 156270 }, { "epoch": 0.3156954875826711, "grad_norm": 255.3026580810547, "learning_rate": 8.685679126793258e-06, "loss": 23.7287, "step": 156280 }, { "epoch": 0.3157156882153549, "grad_norm": 211.4212188720703, "learning_rate": 8.6854432384455e-06, "loss": 19.3804, "step": 156290 }, { "epoch": 0.3157358888480387, "grad_norm": 292.63458251953125, "learning_rate": 8.685207332135337e-06, "loss": 16.3499, "step": 156300 }, { "epoch": 0.31575608948072253, "grad_norm": 236.4305419921875, "learning_rate": 8.68497140786392e-06, "loss": 15.7173, "step": 156310 }, { "epoch": 0.31577629011340635, "grad_norm": 307.2117004394531, "learning_rate": 8.6847354656324e-06, "loss": 22.1556, "step": 156320 }, { "epoch": 0.3157964907460902, "grad_norm": 205.03067016601562, "learning_rate": 8.684499505441926e-06, "loss": 22.5673, "step": 156330 }, { "epoch": 0.315816691378774, "grad_norm": 155.00140380859375, "learning_rate": 8.684263527293649e-06, "loss": 27.2229, "step": 156340 }, { "epoch": 0.3158368920114578, "grad_norm": 113.0438003540039, "learning_rate": 8.684027531188717e-06, "loss": 29.6661, "step": 156350 }, { "epoch": 0.31585709264414163, "grad_norm": 404.43194580078125, "learning_rate": 8.683791517128282e-06, "loss": 17.5002, "step": 156360 }, { "epoch": 0.31587729327682545, "grad_norm": 401.067626953125, "learning_rate": 8.683555485113493e-06, "loss": 30.0975, "step": 156370 }, { "epoch": 0.3158974939095092, "grad_norm": 467.6741638183594, "learning_rate": 8.683319435145503e-06, "loss": 28.4054, "step": 156380 }, { "epoch": 0.31591769454219304, "grad_norm": 305.7153625488281, "learning_rate": 8.683083367225461e-06, "loss": 25.787, "step": 156390 }, { "epoch": 0.31593789517487686, "grad_norm": 284.81689453125, "learning_rate": 8.682847281354517e-06, "loss": 29.4344, "step": 156400 }, { "epoch": 0.3159580958075607, "grad_norm": 278.07330322265625, "learning_rate": 8.682611177533822e-06, "loss": 9.4029, "step": 156410 }, { "epoch": 0.3159782964402445, "grad_norm": 388.7257080078125, "learning_rate": 8.682375055764528e-06, "loss": 23.594, "step": 156420 }, { "epoch": 0.3159984970729283, "grad_norm": 254.3741912841797, "learning_rate": 8.682138916047782e-06, "loss": 52.6698, "step": 156430 }, { "epoch": 0.31601869770561214, "grad_norm": 87.3544692993164, "learning_rate": 8.681902758384738e-06, "loss": 17.5018, "step": 156440 }, { "epoch": 0.31603889833829596, "grad_norm": 312.1885070800781, "learning_rate": 8.681666582776547e-06, "loss": 27.0761, "step": 156450 }, { "epoch": 0.3160590989709798, "grad_norm": 346.2375793457031, "learning_rate": 8.68143038922436e-06, "loss": 13.2552, "step": 156460 }, { "epoch": 0.3160792996036636, "grad_norm": 488.3246765136719, "learning_rate": 8.681194177729328e-06, "loss": 22.9379, "step": 156470 }, { "epoch": 0.3160995002363474, "grad_norm": 774.063720703125, "learning_rate": 8.680957948292602e-06, "loss": 16.7564, "step": 156480 }, { "epoch": 0.31611970086903124, "grad_norm": 304.7137145996094, "learning_rate": 8.680721700915333e-06, "loss": 17.2847, "step": 156490 }, { "epoch": 0.31613990150171506, "grad_norm": 205.25759887695312, "learning_rate": 8.680485435598674e-06, "loss": 14.1518, "step": 156500 }, { "epoch": 0.3161601021343988, "grad_norm": 211.16964721679688, "learning_rate": 8.680249152343772e-06, "loss": 12.0488, "step": 156510 }, { "epoch": 0.31618030276708264, "grad_norm": 400.6031494140625, "learning_rate": 8.680012851151785e-06, "loss": 20.196, "step": 156520 }, { "epoch": 0.31620050339976646, "grad_norm": 279.0886535644531, "learning_rate": 8.679776532023861e-06, "loss": 20.8917, "step": 156530 }, { "epoch": 0.3162207040324503, "grad_norm": 415.4428405761719, "learning_rate": 8.679540194961153e-06, "loss": 19.1178, "step": 156540 }, { "epoch": 0.3162409046651341, "grad_norm": 211.7430419921875, "learning_rate": 8.679303839964811e-06, "loss": 18.9562, "step": 156550 }, { "epoch": 0.3162611052978179, "grad_norm": 492.7645263671875, "learning_rate": 8.679067467035989e-06, "loss": 16.6197, "step": 156560 }, { "epoch": 0.31628130593050174, "grad_norm": 680.2404174804688, "learning_rate": 8.678831076175838e-06, "loss": 21.3511, "step": 156570 }, { "epoch": 0.31630150656318556, "grad_norm": 325.4295349121094, "learning_rate": 8.678594667385511e-06, "loss": 16.834, "step": 156580 }, { "epoch": 0.3163217071958694, "grad_norm": 268.95867919921875, "learning_rate": 8.67835824066616e-06, "loss": 19.1201, "step": 156590 }, { "epoch": 0.3163419078285532, "grad_norm": 229.71380615234375, "learning_rate": 8.678121796018938e-06, "loss": 36.3229, "step": 156600 }, { "epoch": 0.316362108461237, "grad_norm": 338.6586608886719, "learning_rate": 8.677885333444995e-06, "loss": 20.7994, "step": 156610 }, { "epoch": 0.31638230909392084, "grad_norm": 142.8424530029297, "learning_rate": 8.677648852945486e-06, "loss": 11.9092, "step": 156620 }, { "epoch": 0.31640250972660466, "grad_norm": 364.1866760253906, "learning_rate": 8.677412354521561e-06, "loss": 23.6297, "step": 156630 }, { "epoch": 0.31642271035928843, "grad_norm": 152.9621124267578, "learning_rate": 8.677175838174374e-06, "loss": 14.041, "step": 156640 }, { "epoch": 0.31644291099197225, "grad_norm": 118.7545166015625, "learning_rate": 8.67693930390508e-06, "loss": 25.8608, "step": 156650 }, { "epoch": 0.31646311162465607, "grad_norm": 312.5607604980469, "learning_rate": 8.676702751714829e-06, "loss": 17.6321, "step": 156660 }, { "epoch": 0.3164833122573399, "grad_norm": 333.78253173828125, "learning_rate": 8.676466181604775e-06, "loss": 32.9721, "step": 156670 }, { "epoch": 0.3165035128900237, "grad_norm": 602.8256225585938, "learning_rate": 8.67622959357607e-06, "loss": 30.4781, "step": 156680 }, { "epoch": 0.31652371352270753, "grad_norm": 208.40000915527344, "learning_rate": 8.675992987629869e-06, "loss": 14.8123, "step": 156690 }, { "epoch": 0.31654391415539135, "grad_norm": 182.97532653808594, "learning_rate": 8.675756363767322e-06, "loss": 13.8649, "step": 156700 }, { "epoch": 0.31656411478807517, "grad_norm": 92.03968811035156, "learning_rate": 8.675519721989585e-06, "loss": 28.5223, "step": 156710 }, { "epoch": 0.316584315420759, "grad_norm": 97.28550720214844, "learning_rate": 8.675283062297811e-06, "loss": 18.8546, "step": 156720 }, { "epoch": 0.3166045160534428, "grad_norm": 242.2445831298828, "learning_rate": 8.675046384693154e-06, "loss": 30.9306, "step": 156730 }, { "epoch": 0.31662471668612663, "grad_norm": 266.22760009765625, "learning_rate": 8.674809689176765e-06, "loss": 32.8484, "step": 156740 }, { "epoch": 0.31664491731881045, "grad_norm": 292.8721008300781, "learning_rate": 8.6745729757498e-06, "loss": 17.2175, "step": 156750 }, { "epoch": 0.31666511795149427, "grad_norm": 245.42384338378906, "learning_rate": 8.674336244413413e-06, "loss": 22.3325, "step": 156760 }, { "epoch": 0.31668531858417803, "grad_norm": 113.02253723144531, "learning_rate": 8.674099495168755e-06, "loss": 22.6812, "step": 156770 }, { "epoch": 0.31670551921686185, "grad_norm": 175.66009521484375, "learning_rate": 8.673862728016983e-06, "loss": 14.0001, "step": 156780 }, { "epoch": 0.3167257198495457, "grad_norm": 343.4086608886719, "learning_rate": 8.67362594295925e-06, "loss": 25.173, "step": 156790 }, { "epoch": 0.3167459204822295, "grad_norm": 399.105712890625, "learning_rate": 8.673389139996708e-06, "loss": 25.0461, "step": 156800 }, { "epoch": 0.3167661211149133, "grad_norm": 388.44171142578125, "learning_rate": 8.673152319130514e-06, "loss": 23.9946, "step": 156810 }, { "epoch": 0.31678632174759713, "grad_norm": 397.1879577636719, "learning_rate": 8.672915480361821e-06, "loss": 21.6412, "step": 156820 }, { "epoch": 0.31680652238028095, "grad_norm": 234.55780029296875, "learning_rate": 8.672678623691783e-06, "loss": 9.2852, "step": 156830 }, { "epoch": 0.3168267230129648, "grad_norm": 506.4057922363281, "learning_rate": 8.672441749121555e-06, "loss": 38.7299, "step": 156840 }, { "epoch": 0.3168469236456486, "grad_norm": 56.24407958984375, "learning_rate": 8.672204856652291e-06, "loss": 15.5773, "step": 156850 }, { "epoch": 0.3168671242783324, "grad_norm": 121.5376205444336, "learning_rate": 8.671967946285147e-06, "loss": 20.6228, "step": 156860 }, { "epoch": 0.31688732491101623, "grad_norm": 396.46624755859375, "learning_rate": 8.671731018021275e-06, "loss": 21.43, "step": 156870 }, { "epoch": 0.31690752554370005, "grad_norm": 70.89419555664062, "learning_rate": 8.671494071861832e-06, "loss": 20.0598, "step": 156880 }, { "epoch": 0.3169277261763838, "grad_norm": 358.5874328613281, "learning_rate": 8.671257107807974e-06, "loss": 20.8878, "step": 156890 }, { "epoch": 0.31694792680906764, "grad_norm": 169.51722717285156, "learning_rate": 8.671020125860851e-06, "loss": 34.877, "step": 156900 }, { "epoch": 0.31696812744175146, "grad_norm": 384.74200439453125, "learning_rate": 8.670783126021623e-06, "loss": 23.9613, "step": 156910 }, { "epoch": 0.3169883280744353, "grad_norm": 200.65139770507812, "learning_rate": 8.670546108291443e-06, "loss": 28.062, "step": 156920 }, { "epoch": 0.3170085287071191, "grad_norm": 178.1051025390625, "learning_rate": 8.670309072671468e-06, "loss": 29.7965, "step": 156930 }, { "epoch": 0.3170287293398029, "grad_norm": 316.2807922363281, "learning_rate": 8.67007201916285e-06, "loss": 18.7846, "step": 156940 }, { "epoch": 0.31704892997248674, "grad_norm": 282.1052551269531, "learning_rate": 8.669834947766746e-06, "loss": 19.5795, "step": 156950 }, { "epoch": 0.31706913060517056, "grad_norm": 433.21826171875, "learning_rate": 8.66959785848431e-06, "loss": 22.5472, "step": 156960 }, { "epoch": 0.3170893312378544, "grad_norm": 582.766357421875, "learning_rate": 8.669360751316702e-06, "loss": 28.1698, "step": 156970 }, { "epoch": 0.3171095318705382, "grad_norm": 184.25918579101562, "learning_rate": 8.669123626265074e-06, "loss": 19.6861, "step": 156980 }, { "epoch": 0.317129732503222, "grad_norm": 242.72813415527344, "learning_rate": 8.668886483330584e-06, "loss": 11.5836, "step": 156990 }, { "epoch": 0.31714993313590584, "grad_norm": 296.27294921875, "learning_rate": 8.668649322514382e-06, "loss": 25.8178, "step": 157000 }, { "epoch": 0.31717013376858966, "grad_norm": 132.55113220214844, "learning_rate": 8.66841214381763e-06, "loss": 31.6374, "step": 157010 }, { "epoch": 0.3171903344012734, "grad_norm": 208.5125274658203, "learning_rate": 8.668174947241485e-06, "loss": 34.6287, "step": 157020 }, { "epoch": 0.31721053503395724, "grad_norm": 207.32711791992188, "learning_rate": 8.667937732787097e-06, "loss": 19.2819, "step": 157030 }, { "epoch": 0.31723073566664106, "grad_norm": 326.9341125488281, "learning_rate": 8.667700500455627e-06, "loss": 17.2714, "step": 157040 }, { "epoch": 0.3172509362993249, "grad_norm": 174.44798278808594, "learning_rate": 8.667463250248229e-06, "loss": 18.2705, "step": 157050 }, { "epoch": 0.3172711369320087, "grad_norm": 144.37278747558594, "learning_rate": 8.667225982166058e-06, "loss": 28.2375, "step": 157060 }, { "epoch": 0.3172913375646925, "grad_norm": 53.45744705200195, "learning_rate": 8.666988696210275e-06, "loss": 14.4742, "step": 157070 }, { "epoch": 0.31731153819737634, "grad_norm": 217.32388305664062, "learning_rate": 8.666751392382033e-06, "loss": 16.2218, "step": 157080 }, { "epoch": 0.31733173883006016, "grad_norm": 112.66278839111328, "learning_rate": 8.66651407068249e-06, "loss": 22.9643, "step": 157090 }, { "epoch": 0.317351939462744, "grad_norm": 892.0944213867188, "learning_rate": 8.666276731112802e-06, "loss": 16.3623, "step": 157100 }, { "epoch": 0.3173721400954278, "grad_norm": 325.8453674316406, "learning_rate": 8.666039373674124e-06, "loss": 11.2255, "step": 157110 }, { "epoch": 0.3173923407281116, "grad_norm": 954.470703125, "learning_rate": 8.665801998367616e-06, "loss": 25.6437, "step": 157120 }, { "epoch": 0.31741254136079544, "grad_norm": 50.64088439941406, "learning_rate": 8.665564605194435e-06, "loss": 17.109, "step": 157130 }, { "epoch": 0.31743274199347926, "grad_norm": 194.3233642578125, "learning_rate": 8.665327194155736e-06, "loss": 14.2049, "step": 157140 }, { "epoch": 0.317452942626163, "grad_norm": 444.7592468261719, "learning_rate": 8.665089765252674e-06, "loss": 15.3081, "step": 157150 }, { "epoch": 0.31747314325884685, "grad_norm": 24.8874568939209, "learning_rate": 8.664852318486412e-06, "loss": 23.0455, "step": 157160 }, { "epoch": 0.31749334389153067, "grad_norm": 276.9592590332031, "learning_rate": 8.664614853858105e-06, "loss": 18.2121, "step": 157170 }, { "epoch": 0.3175135445242145, "grad_norm": 369.1905517578125, "learning_rate": 8.664377371368907e-06, "loss": 20.3723, "step": 157180 }, { "epoch": 0.3175337451568983, "grad_norm": 161.03717041015625, "learning_rate": 8.664139871019979e-06, "loss": 14.8763, "step": 157190 }, { "epoch": 0.3175539457895821, "grad_norm": 377.0033264160156, "learning_rate": 8.66390235281248e-06, "loss": 15.1942, "step": 157200 }, { "epoch": 0.31757414642226595, "grad_norm": 286.59527587890625, "learning_rate": 8.663664816747562e-06, "loss": 15.8516, "step": 157210 }, { "epoch": 0.31759434705494977, "grad_norm": 151.3009796142578, "learning_rate": 8.663427262826386e-06, "loss": 10.4331, "step": 157220 }, { "epoch": 0.3176145476876336, "grad_norm": 416.9915466308594, "learning_rate": 8.663189691050114e-06, "loss": 20.5504, "step": 157230 }, { "epoch": 0.3176347483203174, "grad_norm": 385.4063415527344, "learning_rate": 8.662952101419895e-06, "loss": 21.7554, "step": 157240 }, { "epoch": 0.3176549489530012, "grad_norm": 199.88270568847656, "learning_rate": 8.662714493936895e-06, "loss": 23.6702, "step": 157250 }, { "epoch": 0.31767514958568505, "grad_norm": 329.5572509765625, "learning_rate": 8.662476868602268e-06, "loss": 21.0732, "step": 157260 }, { "epoch": 0.31769535021836887, "grad_norm": 48.77279281616211, "learning_rate": 8.662239225417171e-06, "loss": 18.7632, "step": 157270 }, { "epoch": 0.31771555085105263, "grad_norm": 340.42291259765625, "learning_rate": 8.662001564382768e-06, "loss": 16.4172, "step": 157280 }, { "epoch": 0.31773575148373645, "grad_norm": 2181.41796875, "learning_rate": 8.66176388550021e-06, "loss": 30.6799, "step": 157290 }, { "epoch": 0.31775595211642027, "grad_norm": 213.77102661132812, "learning_rate": 8.66152618877066e-06, "loss": 26.282, "step": 157300 }, { "epoch": 0.3177761527491041, "grad_norm": 206.5682830810547, "learning_rate": 8.661288474195275e-06, "loss": 18.1259, "step": 157310 }, { "epoch": 0.3177963533817879, "grad_norm": 426.9442443847656, "learning_rate": 8.661050741775215e-06, "loss": 12.8249, "step": 157320 }, { "epoch": 0.31781655401447173, "grad_norm": 1035.5775146484375, "learning_rate": 8.660812991511636e-06, "loss": 21.9123, "step": 157330 }, { "epoch": 0.31783675464715555, "grad_norm": 293.8466796875, "learning_rate": 8.6605752234057e-06, "loss": 23.9909, "step": 157340 }, { "epoch": 0.31785695527983937, "grad_norm": 539.1363525390625, "learning_rate": 8.660337437458565e-06, "loss": 41.9561, "step": 157350 }, { "epoch": 0.3178771559125232, "grad_norm": 564.7734985351562, "learning_rate": 8.660099633671388e-06, "loss": 15.5495, "step": 157360 }, { "epoch": 0.317897356545207, "grad_norm": 1131.22265625, "learning_rate": 8.65986181204533e-06, "loss": 36.241, "step": 157370 }, { "epoch": 0.31791755717789083, "grad_norm": 824.5579833984375, "learning_rate": 8.659623972581548e-06, "loss": 41.7588, "step": 157380 }, { "epoch": 0.31793775781057465, "grad_norm": 165.24830627441406, "learning_rate": 8.659386115281205e-06, "loss": 12.4163, "step": 157390 }, { "epoch": 0.31795795844325847, "grad_norm": 540.2538452148438, "learning_rate": 8.659148240145456e-06, "loss": 23.2508, "step": 157400 }, { "epoch": 0.31797815907594223, "grad_norm": 202.78204345703125, "learning_rate": 8.658910347175463e-06, "loss": 14.0819, "step": 157410 }, { "epoch": 0.31799835970862605, "grad_norm": 345.87176513671875, "learning_rate": 8.658672436372385e-06, "loss": 19.8284, "step": 157420 }, { "epoch": 0.3180185603413099, "grad_norm": 665.663818359375, "learning_rate": 8.658434507737381e-06, "loss": 31.0578, "step": 157430 }, { "epoch": 0.3180387609739937, "grad_norm": 147.1372528076172, "learning_rate": 8.65819656127161e-06, "loss": 18.1667, "step": 157440 }, { "epoch": 0.3180589616066775, "grad_norm": 80.78816986083984, "learning_rate": 8.657958596976235e-06, "loss": 41.9783, "step": 157450 }, { "epoch": 0.31807916223936133, "grad_norm": 295.4004821777344, "learning_rate": 8.657720614852412e-06, "loss": 26.4023, "step": 157460 }, { "epoch": 0.31809936287204515, "grad_norm": 550.0714721679688, "learning_rate": 8.657482614901302e-06, "loss": 17.7067, "step": 157470 }, { "epoch": 0.318119563504729, "grad_norm": 289.6199645996094, "learning_rate": 8.657244597124066e-06, "loss": 19.0076, "step": 157480 }, { "epoch": 0.3181397641374128, "grad_norm": 152.23611450195312, "learning_rate": 8.657006561521863e-06, "loss": 27.0882, "step": 157490 }, { "epoch": 0.3181599647700966, "grad_norm": 292.5614013671875, "learning_rate": 8.656768508095853e-06, "loss": 29.0849, "step": 157500 }, { "epoch": 0.31818016540278043, "grad_norm": 261.89666748046875, "learning_rate": 8.656530436847196e-06, "loss": 14.7051, "step": 157510 }, { "epoch": 0.31820036603546425, "grad_norm": 0.0, "learning_rate": 8.656292347777056e-06, "loss": 27.4942, "step": 157520 }, { "epoch": 0.318220566668148, "grad_norm": 276.1974792480469, "learning_rate": 8.65605424088659e-06, "loss": 17.3948, "step": 157530 }, { "epoch": 0.31824076730083184, "grad_norm": 204.1532745361328, "learning_rate": 8.655816116176959e-06, "loss": 27.2264, "step": 157540 }, { "epoch": 0.31826096793351566, "grad_norm": 255.27984619140625, "learning_rate": 8.655577973649322e-06, "loss": 24.2624, "step": 157550 }, { "epoch": 0.3182811685661995, "grad_norm": 503.3350830078125, "learning_rate": 8.655339813304842e-06, "loss": 29.3907, "step": 157560 }, { "epoch": 0.3183013691988833, "grad_norm": 325.04443359375, "learning_rate": 8.655101635144678e-06, "loss": 34.5385, "step": 157570 }, { "epoch": 0.3183215698315671, "grad_norm": 133.46531677246094, "learning_rate": 8.654863439169994e-06, "loss": 24.7045, "step": 157580 }, { "epoch": 0.31834177046425094, "grad_norm": 376.1509704589844, "learning_rate": 8.654625225381947e-06, "loss": 20.6516, "step": 157590 }, { "epoch": 0.31836197109693476, "grad_norm": 234.71554565429688, "learning_rate": 8.654386993781703e-06, "loss": 24.9831, "step": 157600 }, { "epoch": 0.3183821717296186, "grad_norm": 285.09442138671875, "learning_rate": 8.654148744370416e-06, "loss": 5.5279, "step": 157610 }, { "epoch": 0.3184023723623024, "grad_norm": 293.8329772949219, "learning_rate": 8.653910477149254e-06, "loss": 26.0452, "step": 157620 }, { "epoch": 0.3184225729949862, "grad_norm": 173.8465576171875, "learning_rate": 8.653672192119376e-06, "loss": 24.5821, "step": 157630 }, { "epoch": 0.31844277362767004, "grad_norm": 261.7745056152344, "learning_rate": 8.65343388928194e-06, "loss": 7.1364, "step": 157640 }, { "epoch": 0.31846297426035386, "grad_norm": 360.0426025390625, "learning_rate": 8.653195568638114e-06, "loss": 15.0632, "step": 157650 }, { "epoch": 0.3184831748930376, "grad_norm": 521.11572265625, "learning_rate": 8.652957230189051e-06, "loss": 17.0265, "step": 157660 }, { "epoch": 0.31850337552572144, "grad_norm": 187.3146209716797, "learning_rate": 8.652718873935922e-06, "loss": 15.9365, "step": 157670 }, { "epoch": 0.31852357615840526, "grad_norm": 946.49951171875, "learning_rate": 8.652480499879881e-06, "loss": 39.0081, "step": 157680 }, { "epoch": 0.3185437767910891, "grad_norm": 12.066466331481934, "learning_rate": 8.652242108022095e-06, "loss": 15.4507, "step": 157690 }, { "epoch": 0.3185639774237729, "grad_norm": 509.3262634277344, "learning_rate": 8.652003698363724e-06, "loss": 21.245, "step": 157700 }, { "epoch": 0.3185841780564567, "grad_norm": 451.8820495605469, "learning_rate": 8.65176527090593e-06, "loss": 38.1163, "step": 157710 }, { "epoch": 0.31860437868914054, "grad_norm": 40.83683776855469, "learning_rate": 8.651526825649874e-06, "loss": 16.2768, "step": 157720 }, { "epoch": 0.31862457932182436, "grad_norm": 202.827880859375, "learning_rate": 8.651288362596719e-06, "loss": 26.5783, "step": 157730 }, { "epoch": 0.3186447799545082, "grad_norm": 260.3866271972656, "learning_rate": 8.651049881747628e-06, "loss": 16.7693, "step": 157740 }, { "epoch": 0.318664980587192, "grad_norm": 225.61328125, "learning_rate": 8.650811383103763e-06, "loss": 18.6514, "step": 157750 }, { "epoch": 0.3186851812198758, "grad_norm": 117.831787109375, "learning_rate": 8.650572866666285e-06, "loss": 29.2886, "step": 157760 }, { "epoch": 0.31870538185255964, "grad_norm": 150.03355407714844, "learning_rate": 8.650334332436358e-06, "loss": 27.8969, "step": 157770 }, { "epoch": 0.31872558248524346, "grad_norm": 520.7167358398438, "learning_rate": 8.650095780415144e-06, "loss": 15.4845, "step": 157780 }, { "epoch": 0.31874578311792723, "grad_norm": 868.1956787109375, "learning_rate": 8.649857210603808e-06, "loss": 26.0302, "step": 157790 }, { "epoch": 0.31876598375061105, "grad_norm": 219.3620147705078, "learning_rate": 8.649618623003509e-06, "loss": 16.4495, "step": 157800 }, { "epoch": 0.31878618438329487, "grad_norm": 335.5268249511719, "learning_rate": 8.649380017615411e-06, "loss": 19.2912, "step": 157810 }, { "epoch": 0.3188063850159787, "grad_norm": 479.656005859375, "learning_rate": 8.649141394440677e-06, "loss": 24.1357, "step": 157820 }, { "epoch": 0.3188265856486625, "grad_norm": 169.12635803222656, "learning_rate": 8.648902753480472e-06, "loss": 23.6838, "step": 157830 }, { "epoch": 0.31884678628134633, "grad_norm": 438.6565856933594, "learning_rate": 8.648664094735957e-06, "loss": 16.261, "step": 157840 }, { "epoch": 0.31886698691403015, "grad_norm": 207.45492553710938, "learning_rate": 8.648425418208294e-06, "loss": 20.1898, "step": 157850 }, { "epoch": 0.31888718754671397, "grad_norm": 361.18475341796875, "learning_rate": 8.648186723898651e-06, "loss": 13.0632, "step": 157860 }, { "epoch": 0.3189073881793978, "grad_norm": 383.822998046875, "learning_rate": 8.647948011808187e-06, "loss": 25.0589, "step": 157870 }, { "epoch": 0.3189275888120816, "grad_norm": 281.13037109375, "learning_rate": 8.647709281938066e-06, "loss": 19.2069, "step": 157880 }, { "epoch": 0.31894778944476543, "grad_norm": 374.6888732910156, "learning_rate": 8.647470534289453e-06, "loss": 17.1849, "step": 157890 }, { "epoch": 0.31896799007744925, "grad_norm": 291.4905090332031, "learning_rate": 8.647231768863513e-06, "loss": 23.3442, "step": 157900 }, { "epoch": 0.31898819071013307, "grad_norm": 451.8069763183594, "learning_rate": 8.646992985661404e-06, "loss": 18.1466, "step": 157910 }, { "epoch": 0.31900839134281683, "grad_norm": 342.88726806640625, "learning_rate": 8.646754184684297e-06, "loss": 20.9162, "step": 157920 }, { "epoch": 0.31902859197550065, "grad_norm": 325.7464599609375, "learning_rate": 8.64651536593335e-06, "loss": 16.2594, "step": 157930 }, { "epoch": 0.3190487926081845, "grad_norm": 239.6884765625, "learning_rate": 8.646276529409729e-06, "loss": 18.5916, "step": 157940 }, { "epoch": 0.3190689932408683, "grad_norm": 303.8397216796875, "learning_rate": 8.6460376751146e-06, "loss": 29.5627, "step": 157950 }, { "epoch": 0.3190891938735521, "grad_norm": 614.7428588867188, "learning_rate": 8.645798803049126e-06, "loss": 28.573, "step": 157960 }, { "epoch": 0.31910939450623593, "grad_norm": 668.76611328125, "learning_rate": 8.64555991321447e-06, "loss": 19.716, "step": 157970 }, { "epoch": 0.31912959513891975, "grad_norm": 718.8262329101562, "learning_rate": 8.645321005611797e-06, "loss": 33.1614, "step": 157980 }, { "epoch": 0.3191497957716036, "grad_norm": 87.52703094482422, "learning_rate": 8.64508208024227e-06, "loss": 23.1281, "step": 157990 }, { "epoch": 0.3191699964042874, "grad_norm": 194.08853149414062, "learning_rate": 8.644843137107058e-06, "loss": 16.8125, "step": 158000 }, { "epoch": 0.3191901970369712, "grad_norm": 0.0, "learning_rate": 8.644604176207322e-06, "loss": 9.8071, "step": 158010 }, { "epoch": 0.31921039766965503, "grad_norm": 117.94172668457031, "learning_rate": 8.644365197544227e-06, "loss": 12.5855, "step": 158020 }, { "epoch": 0.31923059830233885, "grad_norm": 294.36578369140625, "learning_rate": 8.644126201118936e-06, "loss": 21.3951, "step": 158030 }, { "epoch": 0.3192507989350226, "grad_norm": 203.08973693847656, "learning_rate": 8.643887186932617e-06, "loss": 26.1142, "step": 158040 }, { "epoch": 0.31927099956770644, "grad_norm": 426.8924865722656, "learning_rate": 8.643648154986436e-06, "loss": 21.7628, "step": 158050 }, { "epoch": 0.31929120020039026, "grad_norm": 493.535888671875, "learning_rate": 8.643409105281554e-06, "loss": 13.2781, "step": 158060 }, { "epoch": 0.3193114008330741, "grad_norm": 335.2322998046875, "learning_rate": 8.643170037819137e-06, "loss": 38.9369, "step": 158070 }, { "epoch": 0.3193316014657579, "grad_norm": 448.56890869140625, "learning_rate": 8.642930952600353e-06, "loss": 24.4811, "step": 158080 }, { "epoch": 0.3193518020984417, "grad_norm": 110.81492614746094, "learning_rate": 8.642691849626364e-06, "loss": 15.0749, "step": 158090 }, { "epoch": 0.31937200273112554, "grad_norm": 314.9667663574219, "learning_rate": 8.642452728898339e-06, "loss": 31.4837, "step": 158100 }, { "epoch": 0.31939220336380936, "grad_norm": 47.22786331176758, "learning_rate": 8.642213590417439e-06, "loss": 12.9027, "step": 158110 }, { "epoch": 0.3194124039964932, "grad_norm": 174.1620635986328, "learning_rate": 8.641974434184832e-06, "loss": 11.8375, "step": 158120 }, { "epoch": 0.319432604629177, "grad_norm": 290.3607177734375, "learning_rate": 8.641735260201682e-06, "loss": 19.1374, "step": 158130 }, { "epoch": 0.3194528052618608, "grad_norm": 182.34085083007812, "learning_rate": 8.641496068469159e-06, "loss": 18.0552, "step": 158140 }, { "epoch": 0.31947300589454464, "grad_norm": 159.5022430419922, "learning_rate": 8.641256858988424e-06, "loss": 23.5846, "step": 158150 }, { "epoch": 0.31949320652722846, "grad_norm": 364.2108459472656, "learning_rate": 8.641017631760646e-06, "loss": 26.4399, "step": 158160 }, { "epoch": 0.3195134071599122, "grad_norm": 595.6912231445312, "learning_rate": 8.640778386786987e-06, "loss": 18.2873, "step": 158170 }, { "epoch": 0.31953360779259604, "grad_norm": 1.2532217502593994, "learning_rate": 8.640539124068617e-06, "loss": 15.4558, "step": 158180 }, { "epoch": 0.31955380842527986, "grad_norm": 196.04002380371094, "learning_rate": 8.640299843606702e-06, "loss": 20.8093, "step": 158190 }, { "epoch": 0.3195740090579637, "grad_norm": 137.03167724609375, "learning_rate": 8.640060545402407e-06, "loss": 15.5358, "step": 158200 }, { "epoch": 0.3195942096906475, "grad_norm": 238.58175659179688, "learning_rate": 8.639821229456898e-06, "loss": 28.9403, "step": 158210 }, { "epoch": 0.3196144103233313, "grad_norm": 220.64573669433594, "learning_rate": 8.63958189577134e-06, "loss": 13.1523, "step": 158220 }, { "epoch": 0.31963461095601514, "grad_norm": 207.9300537109375, "learning_rate": 8.639342544346903e-06, "loss": 11.1044, "step": 158230 }, { "epoch": 0.31965481158869896, "grad_norm": 159.47645568847656, "learning_rate": 8.639103175184753e-06, "loss": 8.617, "step": 158240 }, { "epoch": 0.3196750122213828, "grad_norm": 174.39962768554688, "learning_rate": 8.638863788286054e-06, "loss": 16.1776, "step": 158250 }, { "epoch": 0.3196952128540666, "grad_norm": 220.05026245117188, "learning_rate": 8.638624383651974e-06, "loss": 38.4655, "step": 158260 }, { "epoch": 0.3197154134867504, "grad_norm": 364.3473815917969, "learning_rate": 8.638384961283678e-06, "loss": 14.4721, "step": 158270 }, { "epoch": 0.31973561411943424, "grad_norm": 197.2839813232422, "learning_rate": 8.63814552118234e-06, "loss": 26.1182, "step": 158280 }, { "epoch": 0.31975581475211806, "grad_norm": 81.93717193603516, "learning_rate": 8.637906063349119e-06, "loss": 18.9249, "step": 158290 }, { "epoch": 0.3197760153848018, "grad_norm": 510.987548828125, "learning_rate": 8.637666587785185e-06, "loss": 25.3069, "step": 158300 }, { "epoch": 0.31979621601748565, "grad_norm": 619.7308349609375, "learning_rate": 8.637427094491706e-06, "loss": 24.5602, "step": 158310 }, { "epoch": 0.31981641665016947, "grad_norm": 435.1920471191406, "learning_rate": 8.637187583469849e-06, "loss": 25.6403, "step": 158320 }, { "epoch": 0.3198366172828533, "grad_norm": 1706.8756103515625, "learning_rate": 8.63694805472078e-06, "loss": 37.9997, "step": 158330 }, { "epoch": 0.3198568179155371, "grad_norm": 487.28167724609375, "learning_rate": 8.636708508245666e-06, "loss": 35.2028, "step": 158340 }, { "epoch": 0.3198770185482209, "grad_norm": 241.9866943359375, "learning_rate": 8.636468944045677e-06, "loss": 12.7475, "step": 158350 }, { "epoch": 0.31989721918090475, "grad_norm": 198.61451721191406, "learning_rate": 8.636229362121979e-06, "loss": 14.5104, "step": 158360 }, { "epoch": 0.31991741981358857, "grad_norm": 190.08897399902344, "learning_rate": 8.635989762475742e-06, "loss": 21.546, "step": 158370 }, { "epoch": 0.3199376204462724, "grad_norm": 225.36416625976562, "learning_rate": 8.63575014510813e-06, "loss": 16.0979, "step": 158380 }, { "epoch": 0.3199578210789562, "grad_norm": 304.4082946777344, "learning_rate": 8.635510510020313e-06, "loss": 25.81, "step": 158390 }, { "epoch": 0.31997802171164, "grad_norm": 171.87826538085938, "learning_rate": 8.63527085721346e-06, "loss": 31.788, "step": 158400 }, { "epoch": 0.31999822234432385, "grad_norm": 426.34906005859375, "learning_rate": 8.635031186688736e-06, "loss": 18.2974, "step": 158410 }, { "epoch": 0.32001842297700767, "grad_norm": 333.05267333984375, "learning_rate": 8.634791498447313e-06, "loss": 22.246, "step": 158420 }, { "epoch": 0.32003862360969143, "grad_norm": 368.6766052246094, "learning_rate": 8.634551792490356e-06, "loss": 30.6117, "step": 158430 }, { "epoch": 0.32005882424237525, "grad_norm": 970.1619262695312, "learning_rate": 8.634312068819032e-06, "loss": 13.6369, "step": 158440 }, { "epoch": 0.32007902487505907, "grad_norm": 227.0609588623047, "learning_rate": 8.634072327434515e-06, "loss": 21.4717, "step": 158450 }, { "epoch": 0.3200992255077429, "grad_norm": 292.22186279296875, "learning_rate": 8.63383256833797e-06, "loss": 26.7114, "step": 158460 }, { "epoch": 0.3201194261404267, "grad_norm": 450.0807189941406, "learning_rate": 8.633592791530564e-06, "loss": 16.3091, "step": 158470 }, { "epoch": 0.32013962677311053, "grad_norm": 83.41497039794922, "learning_rate": 8.63335299701347e-06, "loss": 26.4741, "step": 158480 }, { "epoch": 0.32015982740579435, "grad_norm": 5.924254894256592, "learning_rate": 8.633113184787852e-06, "loss": 15.6215, "step": 158490 }, { "epoch": 0.32018002803847817, "grad_norm": 201.7967071533203, "learning_rate": 8.632873354854881e-06, "loss": 20.9059, "step": 158500 }, { "epoch": 0.320200228671162, "grad_norm": 381.15283203125, "learning_rate": 8.632633507215726e-06, "loss": 18.3584, "step": 158510 }, { "epoch": 0.3202204293038458, "grad_norm": 313.1841125488281, "learning_rate": 8.632393641871555e-06, "loss": 29.6363, "step": 158520 }, { "epoch": 0.32024062993652963, "grad_norm": 137.67630004882812, "learning_rate": 8.63215375882354e-06, "loss": 21.1555, "step": 158530 }, { "epoch": 0.32026083056921345, "grad_norm": 88.62931060791016, "learning_rate": 8.631913858072846e-06, "loss": 32.0659, "step": 158540 }, { "epoch": 0.32028103120189727, "grad_norm": 718.0419311523438, "learning_rate": 8.631673939620647e-06, "loss": 25.2679, "step": 158550 }, { "epoch": 0.32030123183458103, "grad_norm": 424.9658203125, "learning_rate": 8.631434003468108e-06, "loss": 14.0193, "step": 158560 }, { "epoch": 0.32032143246726485, "grad_norm": 50.81481170654297, "learning_rate": 8.6311940496164e-06, "loss": 16.6759, "step": 158570 }, { "epoch": 0.3203416330999487, "grad_norm": 321.7388916015625, "learning_rate": 8.630954078066693e-06, "loss": 20.7951, "step": 158580 }, { "epoch": 0.3203618337326325, "grad_norm": 210.68614196777344, "learning_rate": 8.630714088820158e-06, "loss": 19.9492, "step": 158590 }, { "epoch": 0.3203820343653163, "grad_norm": 746.830322265625, "learning_rate": 8.630474081877959e-06, "loss": 41.5992, "step": 158600 }, { "epoch": 0.32040223499800013, "grad_norm": 314.31414794921875, "learning_rate": 8.630234057241274e-06, "loss": 44.2396, "step": 158610 }, { "epoch": 0.32042243563068395, "grad_norm": 266.2599182128906, "learning_rate": 8.629994014911265e-06, "loss": 29.1186, "step": 158620 }, { "epoch": 0.3204426362633678, "grad_norm": 249.96231079101562, "learning_rate": 8.629753954889108e-06, "loss": 10.8021, "step": 158630 }, { "epoch": 0.3204628368960516, "grad_norm": 3444.28955078125, "learning_rate": 8.629513877175968e-06, "loss": 29.8635, "step": 158640 }, { "epoch": 0.3204830375287354, "grad_norm": 97.21244812011719, "learning_rate": 8.62927378177302e-06, "loss": 15.2215, "step": 158650 }, { "epoch": 0.32050323816141923, "grad_norm": 256.813720703125, "learning_rate": 8.629033668681431e-06, "loss": 16.4721, "step": 158660 }, { "epoch": 0.32052343879410305, "grad_norm": 294.29083251953125, "learning_rate": 8.62879353790237e-06, "loss": 29.8241, "step": 158670 }, { "epoch": 0.3205436394267868, "grad_norm": 296.8296203613281, "learning_rate": 8.628553389437011e-06, "loss": 18.4062, "step": 158680 }, { "epoch": 0.32056384005947064, "grad_norm": 455.5325927734375, "learning_rate": 8.628313223286524e-06, "loss": 34.3874, "step": 158690 }, { "epoch": 0.32058404069215446, "grad_norm": 429.10894775390625, "learning_rate": 8.628073039452076e-06, "loss": 15.236, "step": 158700 }, { "epoch": 0.3206042413248383, "grad_norm": 178.36354064941406, "learning_rate": 8.627832837934843e-06, "loss": 15.9435, "step": 158710 }, { "epoch": 0.3206244419575221, "grad_norm": 249.77291870117188, "learning_rate": 8.627592618735989e-06, "loss": 23.4224, "step": 158720 }, { "epoch": 0.3206446425902059, "grad_norm": 302.16485595703125, "learning_rate": 8.627352381856691e-06, "loss": 18.054, "step": 158730 }, { "epoch": 0.32066484322288974, "grad_norm": 234.931640625, "learning_rate": 8.627112127298117e-06, "loss": 16.5442, "step": 158740 }, { "epoch": 0.32068504385557356, "grad_norm": 308.732177734375, "learning_rate": 8.626871855061438e-06, "loss": 21.4199, "step": 158750 }, { "epoch": 0.3207052444882574, "grad_norm": 372.8027648925781, "learning_rate": 8.626631565147827e-06, "loss": 20.0598, "step": 158760 }, { "epoch": 0.3207254451209412, "grad_norm": 321.06634521484375, "learning_rate": 8.626391257558453e-06, "loss": 22.5688, "step": 158770 }, { "epoch": 0.320745645753625, "grad_norm": 372.77117919921875, "learning_rate": 8.626150932294486e-06, "loss": 23.9163, "step": 158780 }, { "epoch": 0.32076584638630884, "grad_norm": 193.1827850341797, "learning_rate": 8.625910589357102e-06, "loss": 26.3137, "step": 158790 }, { "epoch": 0.32078604701899266, "grad_norm": 165.8345489501953, "learning_rate": 8.625670228747467e-06, "loss": 36.6702, "step": 158800 }, { "epoch": 0.3208062476516764, "grad_norm": 513.0969848632812, "learning_rate": 8.625429850466756e-06, "loss": 12.227, "step": 158810 }, { "epoch": 0.32082644828436024, "grad_norm": 311.0313720703125, "learning_rate": 8.625189454516141e-06, "loss": 16.6483, "step": 158820 }, { "epoch": 0.32084664891704406, "grad_norm": 278.8121032714844, "learning_rate": 8.62494904089679e-06, "loss": 25.1153, "step": 158830 }, { "epoch": 0.3208668495497279, "grad_norm": 567.5172729492188, "learning_rate": 8.624708609609879e-06, "loss": 36.6956, "step": 158840 }, { "epoch": 0.3208870501824117, "grad_norm": 178.05665588378906, "learning_rate": 8.624468160656576e-06, "loss": 22.8591, "step": 158850 }, { "epoch": 0.3209072508150955, "grad_norm": 268.1910400390625, "learning_rate": 8.624227694038057e-06, "loss": 19.0245, "step": 158860 }, { "epoch": 0.32092745144777934, "grad_norm": 373.21551513671875, "learning_rate": 8.623987209755489e-06, "loss": 21.9865, "step": 158870 }, { "epoch": 0.32094765208046316, "grad_norm": 235.04295349121094, "learning_rate": 8.62374670781005e-06, "loss": 26.6747, "step": 158880 }, { "epoch": 0.320967852713147, "grad_norm": 335.1015625, "learning_rate": 8.623506188202906e-06, "loss": 33.1884, "step": 158890 }, { "epoch": 0.3209880533458308, "grad_norm": 444.9839172363281, "learning_rate": 8.623265650935233e-06, "loss": 24.2929, "step": 158900 }, { "epoch": 0.3210082539785146, "grad_norm": 430.68365478515625, "learning_rate": 8.623025096008203e-06, "loss": 27.5212, "step": 158910 }, { "epoch": 0.32102845461119844, "grad_norm": 70.64128112792969, "learning_rate": 8.62278452342299e-06, "loss": 21.4839, "step": 158920 }, { "epoch": 0.32104865524388226, "grad_norm": 116.0479965209961, "learning_rate": 8.622543933180762e-06, "loss": 14.7263, "step": 158930 }, { "epoch": 0.32106885587656603, "grad_norm": 160.12571716308594, "learning_rate": 8.622303325282697e-06, "loss": 9.7799, "step": 158940 }, { "epoch": 0.32108905650924985, "grad_norm": 410.2300720214844, "learning_rate": 8.622062699729963e-06, "loss": 35.2413, "step": 158950 }, { "epoch": 0.32110925714193367, "grad_norm": 176.7652587890625, "learning_rate": 8.621822056523735e-06, "loss": 11.4847, "step": 158960 }, { "epoch": 0.3211294577746175, "grad_norm": 158.4217071533203, "learning_rate": 8.621581395665185e-06, "loss": 20.2297, "step": 158970 }, { "epoch": 0.3211496584073013, "grad_norm": 302.764892578125, "learning_rate": 8.621340717155487e-06, "loss": 18.7892, "step": 158980 }, { "epoch": 0.32116985903998513, "grad_norm": 219.1226043701172, "learning_rate": 8.621100020995813e-06, "loss": 14.5753, "step": 158990 }, { "epoch": 0.32119005967266895, "grad_norm": 143.5157470703125, "learning_rate": 8.620859307187339e-06, "loss": 25.7427, "step": 159000 }, { "epoch": 0.32121026030535277, "grad_norm": 211.8491973876953, "learning_rate": 8.620618575731233e-06, "loss": 19.2056, "step": 159010 }, { "epoch": 0.3212304609380366, "grad_norm": 185.5460662841797, "learning_rate": 8.620377826628672e-06, "loss": 23.6014, "step": 159020 }, { "epoch": 0.3212506615707204, "grad_norm": 218.11863708496094, "learning_rate": 8.62013705988083e-06, "loss": 19.0881, "step": 159030 }, { "epoch": 0.32127086220340423, "grad_norm": 139.14585876464844, "learning_rate": 8.619896275488876e-06, "loss": 25.5816, "step": 159040 }, { "epoch": 0.32129106283608805, "grad_norm": 352.18609619140625, "learning_rate": 8.61965547345399e-06, "loss": 30.009, "step": 159050 }, { "epoch": 0.32131126346877187, "grad_norm": 410.231689453125, "learning_rate": 8.619414653777341e-06, "loss": 18.5952, "step": 159060 }, { "epoch": 0.32133146410145563, "grad_norm": 37.688045501708984, "learning_rate": 8.619173816460104e-06, "loss": 26.7469, "step": 159070 }, { "epoch": 0.32135166473413945, "grad_norm": 196.94996643066406, "learning_rate": 8.618932961503452e-06, "loss": 11.0749, "step": 159080 }, { "epoch": 0.3213718653668233, "grad_norm": 66.86140441894531, "learning_rate": 8.618692088908562e-06, "loss": 16.3947, "step": 159090 }, { "epoch": 0.3213920659995071, "grad_norm": 335.9710388183594, "learning_rate": 8.618451198676602e-06, "loss": 25.9346, "step": 159100 }, { "epoch": 0.3214122666321909, "grad_norm": 306.1143798828125, "learning_rate": 8.618210290808753e-06, "loss": 10.322, "step": 159110 }, { "epoch": 0.32143246726487473, "grad_norm": 558.8087158203125, "learning_rate": 8.617969365306184e-06, "loss": 19.7621, "step": 159120 }, { "epoch": 0.32145266789755855, "grad_norm": 409.5292663574219, "learning_rate": 8.617728422170071e-06, "loss": 17.3509, "step": 159130 }, { "epoch": 0.3214728685302424, "grad_norm": 148.34161376953125, "learning_rate": 8.61748746140159e-06, "loss": 17.6911, "step": 159140 }, { "epoch": 0.3214930691629262, "grad_norm": 421.675537109375, "learning_rate": 8.617246483001914e-06, "loss": 18.8503, "step": 159150 }, { "epoch": 0.32151326979561, "grad_norm": 926.4773559570312, "learning_rate": 8.617005486972214e-06, "loss": 22.2602, "step": 159160 }, { "epoch": 0.32153347042829383, "grad_norm": 51.88847351074219, "learning_rate": 8.616764473313671e-06, "loss": 19.5263, "step": 159170 }, { "epoch": 0.32155367106097765, "grad_norm": 233.7967987060547, "learning_rate": 8.616523442027456e-06, "loss": 15.5352, "step": 159180 }, { "epoch": 0.3215738716936615, "grad_norm": 290.58184814453125, "learning_rate": 8.616282393114745e-06, "loss": 25.4573, "step": 159190 }, { "epoch": 0.32159407232634524, "grad_norm": 339.9847412109375, "learning_rate": 8.616041326576711e-06, "loss": 28.5967, "step": 159200 }, { "epoch": 0.32161427295902906, "grad_norm": 555.289794921875, "learning_rate": 8.61580024241453e-06, "loss": 14.2544, "step": 159210 }, { "epoch": 0.3216344735917129, "grad_norm": 274.30377197265625, "learning_rate": 8.615559140629377e-06, "loss": 21.7641, "step": 159220 }, { "epoch": 0.3216546742243967, "grad_norm": 5.469722270965576, "learning_rate": 8.61531802122243e-06, "loss": 8.0972, "step": 159230 }, { "epoch": 0.3216748748570805, "grad_norm": 190.05252075195312, "learning_rate": 8.615076884194859e-06, "loss": 13.4386, "step": 159240 }, { "epoch": 0.32169507548976434, "grad_norm": 251.7804718017578, "learning_rate": 8.614835729547841e-06, "loss": 23.1014, "step": 159250 }, { "epoch": 0.32171527612244816, "grad_norm": 387.4628601074219, "learning_rate": 8.614594557282553e-06, "loss": 18.8374, "step": 159260 }, { "epoch": 0.321735476755132, "grad_norm": 318.3892822265625, "learning_rate": 8.614353367400171e-06, "loss": 25.2327, "step": 159270 }, { "epoch": 0.3217556773878158, "grad_norm": 156.87762451171875, "learning_rate": 8.614112159901869e-06, "loss": 13.3475, "step": 159280 }, { "epoch": 0.3217758780204996, "grad_norm": 52.2751350402832, "learning_rate": 8.61387093478882e-06, "loss": 18.369, "step": 159290 }, { "epoch": 0.32179607865318344, "grad_norm": 394.3717041015625, "learning_rate": 8.613629692062204e-06, "loss": 29.8795, "step": 159300 }, { "epoch": 0.32181627928586726, "grad_norm": 427.05621337890625, "learning_rate": 8.613388431723195e-06, "loss": 21.4821, "step": 159310 }, { "epoch": 0.321836479918551, "grad_norm": 431.665283203125, "learning_rate": 8.61314715377297e-06, "loss": 15.0569, "step": 159320 }, { "epoch": 0.32185668055123484, "grad_norm": 141.75120544433594, "learning_rate": 8.612905858212702e-06, "loss": 19.0222, "step": 159330 }, { "epoch": 0.32187688118391866, "grad_norm": 62.07404327392578, "learning_rate": 8.612664545043572e-06, "loss": 8.974, "step": 159340 }, { "epoch": 0.3218970818166025, "grad_norm": 354.9383239746094, "learning_rate": 8.612423214266749e-06, "loss": 13.4768, "step": 159350 }, { "epoch": 0.3219172824492863, "grad_norm": 79.01036071777344, "learning_rate": 8.612181865883416e-06, "loss": 20.161, "step": 159360 }, { "epoch": 0.3219374830819701, "grad_norm": 306.1672058105469, "learning_rate": 8.611940499894746e-06, "loss": 15.4411, "step": 159370 }, { "epoch": 0.32195768371465394, "grad_norm": 377.9465637207031, "learning_rate": 8.611699116301916e-06, "loss": 27.4871, "step": 159380 }, { "epoch": 0.32197788434733776, "grad_norm": 365.8425598144531, "learning_rate": 8.611457715106103e-06, "loss": 18.5127, "step": 159390 }, { "epoch": 0.3219980849800216, "grad_norm": 271.089599609375, "learning_rate": 8.611216296308485e-06, "loss": 19.9102, "step": 159400 }, { "epoch": 0.3220182856127054, "grad_norm": 507.5986022949219, "learning_rate": 8.610974859910235e-06, "loss": 24.1164, "step": 159410 }, { "epoch": 0.3220384862453892, "grad_norm": 442.35906982421875, "learning_rate": 8.610733405912531e-06, "loss": 17.4695, "step": 159420 }, { "epoch": 0.32205868687807304, "grad_norm": 175.22415161132812, "learning_rate": 8.61049193431655e-06, "loss": 9.4757, "step": 159430 }, { "epoch": 0.32207888751075686, "grad_norm": 203.4575958251953, "learning_rate": 8.610250445123472e-06, "loss": 28.446, "step": 159440 }, { "epoch": 0.3220990881434406, "grad_norm": 230.4443359375, "learning_rate": 8.610008938334467e-06, "loss": 19.5618, "step": 159450 }, { "epoch": 0.32211928877612445, "grad_norm": 198.4458770751953, "learning_rate": 8.609767413950719e-06, "loss": 11.6903, "step": 159460 }, { "epoch": 0.32213948940880827, "grad_norm": 723.504638671875, "learning_rate": 8.609525871973402e-06, "loss": 30.0522, "step": 159470 }, { "epoch": 0.3221596900414921, "grad_norm": 513.935791015625, "learning_rate": 8.609284312403695e-06, "loss": 16.0147, "step": 159480 }, { "epoch": 0.3221798906741759, "grad_norm": 21.741662979125977, "learning_rate": 8.60904273524277e-06, "loss": 9.7485, "step": 159490 }, { "epoch": 0.3222000913068597, "grad_norm": 388.29248046875, "learning_rate": 8.608801140491811e-06, "loss": 19.5505, "step": 159500 }, { "epoch": 0.32222029193954355, "grad_norm": 276.1550598144531, "learning_rate": 8.608559528151994e-06, "loss": 31.1724, "step": 159510 }, { "epoch": 0.32224049257222737, "grad_norm": 482.7808532714844, "learning_rate": 8.608317898224495e-06, "loss": 22.3926, "step": 159520 }, { "epoch": 0.3222606932049112, "grad_norm": 716.8136596679688, "learning_rate": 8.608076250710491e-06, "loss": 25.1325, "step": 159530 }, { "epoch": 0.322280893837595, "grad_norm": 640.281494140625, "learning_rate": 8.607834585611162e-06, "loss": 19.0061, "step": 159540 }, { "epoch": 0.3223010944702788, "grad_norm": 304.6932373046875, "learning_rate": 8.607592902927684e-06, "loss": 20.7927, "step": 159550 }, { "epoch": 0.32232129510296265, "grad_norm": 24.293148040771484, "learning_rate": 8.607351202661236e-06, "loss": 13.7175, "step": 159560 }, { "epoch": 0.32234149573564647, "grad_norm": 155.75730895996094, "learning_rate": 8.607109484812996e-06, "loss": 15.8845, "step": 159570 }, { "epoch": 0.32236169636833023, "grad_norm": 130.6527099609375, "learning_rate": 8.606867749384142e-06, "loss": 20.7173, "step": 159580 }, { "epoch": 0.32238189700101405, "grad_norm": 478.2652282714844, "learning_rate": 8.60662599637585e-06, "loss": 36.5556, "step": 159590 }, { "epoch": 0.32240209763369787, "grad_norm": 245.3540802001953, "learning_rate": 8.606384225789304e-06, "loss": 12.9873, "step": 159600 }, { "epoch": 0.3224222982663817, "grad_norm": 162.5829315185547, "learning_rate": 8.606142437625676e-06, "loss": 14.6303, "step": 159610 }, { "epoch": 0.3224424988990655, "grad_norm": 227.86598205566406, "learning_rate": 8.605900631886148e-06, "loss": 17.124, "step": 159620 }, { "epoch": 0.32246269953174933, "grad_norm": 735.3265380859375, "learning_rate": 8.605658808571898e-06, "loss": 14.5158, "step": 159630 }, { "epoch": 0.32248290016443315, "grad_norm": 336.7503356933594, "learning_rate": 8.605416967684105e-06, "loss": 15.3012, "step": 159640 }, { "epoch": 0.32250310079711697, "grad_norm": 282.5049743652344, "learning_rate": 8.605175109223945e-06, "loss": 16.0195, "step": 159650 }, { "epoch": 0.3225233014298008, "grad_norm": 42.62806701660156, "learning_rate": 8.604933233192598e-06, "loss": 11.0932, "step": 159660 }, { "epoch": 0.3225435020624846, "grad_norm": 232.2516326904297, "learning_rate": 8.604691339591248e-06, "loss": 11.9198, "step": 159670 }, { "epoch": 0.32256370269516843, "grad_norm": 357.5400695800781, "learning_rate": 8.604449428421065e-06, "loss": 26.4038, "step": 159680 }, { "epoch": 0.32258390332785225, "grad_norm": 146.70455932617188, "learning_rate": 8.604207499683235e-06, "loss": 12.047, "step": 159690 }, { "epoch": 0.32260410396053607, "grad_norm": 232.76576232910156, "learning_rate": 8.603965553378934e-06, "loss": 16.7936, "step": 159700 }, { "epoch": 0.32262430459321984, "grad_norm": 467.8788757324219, "learning_rate": 8.603723589509342e-06, "loss": 21.9832, "step": 159710 }, { "epoch": 0.32264450522590365, "grad_norm": 367.3899841308594, "learning_rate": 8.603481608075638e-06, "loss": 26.2558, "step": 159720 }, { "epoch": 0.3226647058585875, "grad_norm": 419.1034851074219, "learning_rate": 8.603239609079005e-06, "loss": 21.8795, "step": 159730 }, { "epoch": 0.3226849064912713, "grad_norm": 554.20166015625, "learning_rate": 8.602997592520615e-06, "loss": 14.8932, "step": 159740 }, { "epoch": 0.3227051071239551, "grad_norm": 2587.07275390625, "learning_rate": 8.602755558401653e-06, "loss": 28.3916, "step": 159750 }, { "epoch": 0.32272530775663893, "grad_norm": 917.493408203125, "learning_rate": 8.602513506723298e-06, "loss": 39.8777, "step": 159760 }, { "epoch": 0.32274550838932275, "grad_norm": 387.0860900878906, "learning_rate": 8.602271437486728e-06, "loss": 29.7952, "step": 159770 }, { "epoch": 0.3227657090220066, "grad_norm": 133.15994262695312, "learning_rate": 8.602029350693124e-06, "loss": 28.0721, "step": 159780 }, { "epoch": 0.3227859096546904, "grad_norm": 304.4433288574219, "learning_rate": 8.601787246343667e-06, "loss": 10.5962, "step": 159790 }, { "epoch": 0.3228061102873742, "grad_norm": 344.75439453125, "learning_rate": 8.601545124439535e-06, "loss": 17.8007, "step": 159800 }, { "epoch": 0.32282631092005803, "grad_norm": 0.0, "learning_rate": 8.60130298498191e-06, "loss": 15.8685, "step": 159810 }, { "epoch": 0.32284651155274185, "grad_norm": 148.33917236328125, "learning_rate": 8.60106082797197e-06, "loss": 13.4473, "step": 159820 }, { "epoch": 0.3228667121854257, "grad_norm": 131.86767578125, "learning_rate": 8.600818653410895e-06, "loss": 23.4629, "step": 159830 }, { "epoch": 0.32288691281810944, "grad_norm": 217.0811767578125, "learning_rate": 8.600576461299869e-06, "loss": 35.9559, "step": 159840 }, { "epoch": 0.32290711345079326, "grad_norm": 408.2188720703125, "learning_rate": 8.60033425164007e-06, "loss": 13.1624, "step": 159850 }, { "epoch": 0.3229273140834771, "grad_norm": 270.2550048828125, "learning_rate": 8.600092024432676e-06, "loss": 16.2545, "step": 159860 }, { "epoch": 0.3229475147161609, "grad_norm": 471.7857971191406, "learning_rate": 8.599849779678872e-06, "loss": 28.8012, "step": 159870 }, { "epoch": 0.3229677153488447, "grad_norm": 363.5111083984375, "learning_rate": 8.599607517379837e-06, "loss": 23.0254, "step": 159880 }, { "epoch": 0.32298791598152854, "grad_norm": 187.9815216064453, "learning_rate": 8.599365237536751e-06, "loss": 20.6017, "step": 159890 }, { "epoch": 0.32300811661421236, "grad_norm": 142.3489227294922, "learning_rate": 8.599122940150795e-06, "loss": 25.998, "step": 159900 }, { "epoch": 0.3230283172468962, "grad_norm": 133.5078887939453, "learning_rate": 8.598880625223152e-06, "loss": 17.3504, "step": 159910 }, { "epoch": 0.32304851787958, "grad_norm": 322.2449951171875, "learning_rate": 8.598638292755e-06, "loss": 21.3396, "step": 159920 }, { "epoch": 0.3230687185122638, "grad_norm": 281.89691162109375, "learning_rate": 8.59839594274752e-06, "loss": 24.3771, "step": 159930 }, { "epoch": 0.32308891914494764, "grad_norm": 456.2262268066406, "learning_rate": 8.598153575201897e-06, "loss": 24.9151, "step": 159940 }, { "epoch": 0.32310911977763146, "grad_norm": 254.5800323486328, "learning_rate": 8.597911190119308e-06, "loss": 27.08, "step": 159950 }, { "epoch": 0.3231293204103152, "grad_norm": 0.0, "learning_rate": 8.597668787500937e-06, "loss": 7.122, "step": 159960 }, { "epoch": 0.32314952104299904, "grad_norm": 427.7899169921875, "learning_rate": 8.597426367347965e-06, "loss": 33.3629, "step": 159970 }, { "epoch": 0.32316972167568286, "grad_norm": 309.64178466796875, "learning_rate": 8.597183929661573e-06, "loss": 11.7937, "step": 159980 }, { "epoch": 0.3231899223083667, "grad_norm": 145.270263671875, "learning_rate": 8.596941474442943e-06, "loss": 13.4268, "step": 159990 }, { "epoch": 0.3232101229410505, "grad_norm": 229.5210418701172, "learning_rate": 8.596699001693257e-06, "loss": 14.5943, "step": 160000 }, { "epoch": 0.3232303235737343, "grad_norm": 528.5821533203125, "learning_rate": 8.596456511413695e-06, "loss": 33.1959, "step": 160010 }, { "epoch": 0.32325052420641814, "grad_norm": 247.9377899169922, "learning_rate": 8.59621400360544e-06, "loss": 23.1673, "step": 160020 }, { "epoch": 0.32327072483910196, "grad_norm": 553.3692016601562, "learning_rate": 8.595971478269675e-06, "loss": 15.6548, "step": 160030 }, { "epoch": 0.3232909254717858, "grad_norm": 359.00408935546875, "learning_rate": 8.59572893540758e-06, "loss": 12.4956, "step": 160040 }, { "epoch": 0.3233111261044696, "grad_norm": 444.3692626953125, "learning_rate": 8.59548637502034e-06, "loss": 17.1067, "step": 160050 }, { "epoch": 0.3233313267371534, "grad_norm": 172.60482788085938, "learning_rate": 8.595243797109137e-06, "loss": 27.0166, "step": 160060 }, { "epoch": 0.32335152736983724, "grad_norm": 165.98023986816406, "learning_rate": 8.595001201675149e-06, "loss": 24.4848, "step": 160070 }, { "epoch": 0.32337172800252106, "grad_norm": 275.40582275390625, "learning_rate": 8.594758588719562e-06, "loss": 20.352, "step": 160080 }, { "epoch": 0.32339192863520483, "grad_norm": 391.1814270019531, "learning_rate": 8.594515958243557e-06, "loss": 27.4306, "step": 160090 }, { "epoch": 0.32341212926788865, "grad_norm": 466.42523193359375, "learning_rate": 8.594273310248317e-06, "loss": 17.7794, "step": 160100 }, { "epoch": 0.32343232990057247, "grad_norm": 225.48728942871094, "learning_rate": 8.594030644735025e-06, "loss": 16.8261, "step": 160110 }, { "epoch": 0.3234525305332563, "grad_norm": 330.7485656738281, "learning_rate": 8.593787961704864e-06, "loss": 15.7742, "step": 160120 }, { "epoch": 0.3234727311659401, "grad_norm": 454.29754638671875, "learning_rate": 8.593545261159017e-06, "loss": 29.992, "step": 160130 }, { "epoch": 0.32349293179862393, "grad_norm": 329.708740234375, "learning_rate": 8.593302543098666e-06, "loss": 18.5196, "step": 160140 }, { "epoch": 0.32351313243130775, "grad_norm": 684.826416015625, "learning_rate": 8.593059807524993e-06, "loss": 29.1436, "step": 160150 }, { "epoch": 0.32353333306399157, "grad_norm": 277.2701416015625, "learning_rate": 8.592817054439184e-06, "loss": 13.4445, "step": 160160 }, { "epoch": 0.3235535336966754, "grad_norm": 163.81622314453125, "learning_rate": 8.592574283842418e-06, "loss": 10.0287, "step": 160170 }, { "epoch": 0.3235737343293592, "grad_norm": 314.90875244140625, "learning_rate": 8.592331495735884e-06, "loss": 11.634, "step": 160180 }, { "epoch": 0.32359393496204303, "grad_norm": 229.58425903320312, "learning_rate": 8.592088690120759e-06, "loss": 17.403, "step": 160190 }, { "epoch": 0.32361413559472685, "grad_norm": 267.06658935546875, "learning_rate": 8.591845866998231e-06, "loss": 23.8863, "step": 160200 }, { "epoch": 0.32363433622741067, "grad_norm": 1246.707763671875, "learning_rate": 8.591603026369481e-06, "loss": 28.2416, "step": 160210 }, { "epoch": 0.32365453686009443, "grad_norm": 182.14608764648438, "learning_rate": 8.591360168235694e-06, "loss": 8.2271, "step": 160220 }, { "epoch": 0.32367473749277825, "grad_norm": 178.26727294921875, "learning_rate": 8.591117292598053e-06, "loss": 25.0418, "step": 160230 }, { "epoch": 0.3236949381254621, "grad_norm": 264.09320068359375, "learning_rate": 8.590874399457743e-06, "loss": 34.8977, "step": 160240 }, { "epoch": 0.3237151387581459, "grad_norm": 9.428799629211426, "learning_rate": 8.590631488815945e-06, "loss": 25.3432, "step": 160250 }, { "epoch": 0.3237353393908297, "grad_norm": 222.79591369628906, "learning_rate": 8.590388560673846e-06, "loss": 23.0049, "step": 160260 }, { "epoch": 0.32375554002351353, "grad_norm": 681.2034912109375, "learning_rate": 8.590145615032626e-06, "loss": 16.8512, "step": 160270 }, { "epoch": 0.32377574065619735, "grad_norm": 826.7547607421875, "learning_rate": 8.589902651893474e-06, "loss": 40.3379, "step": 160280 }, { "epoch": 0.3237959412888812, "grad_norm": 579.8724975585938, "learning_rate": 8.589659671257573e-06, "loss": 18.3186, "step": 160290 }, { "epoch": 0.323816141921565, "grad_norm": 186.5430450439453, "learning_rate": 8.589416673126104e-06, "loss": 17.3347, "step": 160300 }, { "epoch": 0.3238363425542488, "grad_norm": 343.86126708984375, "learning_rate": 8.589173657500254e-06, "loss": 19.3778, "step": 160310 }, { "epoch": 0.32385654318693263, "grad_norm": 179.45281982421875, "learning_rate": 8.588930624381207e-06, "loss": 10.4361, "step": 160320 }, { "epoch": 0.32387674381961645, "grad_norm": 635.8007202148438, "learning_rate": 8.588687573770146e-06, "loss": 24.3066, "step": 160330 }, { "epoch": 0.3238969444523003, "grad_norm": 29.071779251098633, "learning_rate": 8.588444505668259e-06, "loss": 22.667, "step": 160340 }, { "epoch": 0.32391714508498404, "grad_norm": 185.80630493164062, "learning_rate": 8.588201420076727e-06, "loss": 19.5003, "step": 160350 }, { "epoch": 0.32393734571766786, "grad_norm": 676.4252319335938, "learning_rate": 8.587958316996739e-06, "loss": 21.0258, "step": 160360 }, { "epoch": 0.3239575463503517, "grad_norm": 155.59559631347656, "learning_rate": 8.587715196429477e-06, "loss": 21.1043, "step": 160370 }, { "epoch": 0.3239777469830355, "grad_norm": 19.197193145751953, "learning_rate": 8.587472058376122e-06, "loss": 29.9791, "step": 160380 }, { "epoch": 0.3239979476157193, "grad_norm": 556.343017578125, "learning_rate": 8.587228902837868e-06, "loss": 29.5033, "step": 160390 }, { "epoch": 0.32401814824840314, "grad_norm": 334.4491882324219, "learning_rate": 8.586985729815895e-06, "loss": 25.6602, "step": 160400 }, { "epoch": 0.32403834888108696, "grad_norm": 270.7784729003906, "learning_rate": 8.586742539311385e-06, "loss": 13.814, "step": 160410 }, { "epoch": 0.3240585495137708, "grad_norm": 218.21041870117188, "learning_rate": 8.58649933132553e-06, "loss": 19.2715, "step": 160420 }, { "epoch": 0.3240787501464546, "grad_norm": 1351.86865234375, "learning_rate": 8.586256105859512e-06, "loss": 30.5869, "step": 160430 }, { "epoch": 0.3240989507791384, "grad_norm": 302.4422302246094, "learning_rate": 8.586012862914517e-06, "loss": 24.7844, "step": 160440 }, { "epoch": 0.32411915141182224, "grad_norm": 471.20770263671875, "learning_rate": 8.585769602491729e-06, "loss": 21.5888, "step": 160450 }, { "epoch": 0.32413935204450606, "grad_norm": 312.970947265625, "learning_rate": 8.585526324592335e-06, "loss": 21.1036, "step": 160460 }, { "epoch": 0.3241595526771899, "grad_norm": 347.3215026855469, "learning_rate": 8.585283029217521e-06, "loss": 22.3124, "step": 160470 }, { "epoch": 0.32417975330987364, "grad_norm": 145.96595764160156, "learning_rate": 8.585039716368473e-06, "loss": 14.874, "step": 160480 }, { "epoch": 0.32419995394255746, "grad_norm": 175.42677307128906, "learning_rate": 8.584796386046374e-06, "loss": 16.0856, "step": 160490 }, { "epoch": 0.3242201545752413, "grad_norm": 345.2078552246094, "learning_rate": 8.584553038252415e-06, "loss": 21.6175, "step": 160500 }, { "epoch": 0.3242403552079251, "grad_norm": 288.8846435546875, "learning_rate": 8.584309672987778e-06, "loss": 19.5666, "step": 160510 }, { "epoch": 0.3242605558406089, "grad_norm": 504.0223693847656, "learning_rate": 8.584066290253649e-06, "loss": 23.4037, "step": 160520 }, { "epoch": 0.32428075647329274, "grad_norm": 469.3775634765625, "learning_rate": 8.583822890051217e-06, "loss": 27.1463, "step": 160530 }, { "epoch": 0.32430095710597656, "grad_norm": 60.67906951904297, "learning_rate": 8.583579472381668e-06, "loss": 12.8854, "step": 160540 }, { "epoch": 0.3243211577386604, "grad_norm": 641.5224609375, "learning_rate": 8.583336037246187e-06, "loss": 42.9731, "step": 160550 }, { "epoch": 0.3243413583713442, "grad_norm": 281.75421142578125, "learning_rate": 8.58309258464596e-06, "loss": 33.2418, "step": 160560 }, { "epoch": 0.324361559004028, "grad_norm": 459.7868957519531, "learning_rate": 8.582849114582173e-06, "loss": 13.7972, "step": 160570 }, { "epoch": 0.32438175963671184, "grad_norm": 64.96794891357422, "learning_rate": 8.582605627056016e-06, "loss": 18.0004, "step": 160580 }, { "epoch": 0.32440196026939566, "grad_norm": 252.6522674560547, "learning_rate": 8.582362122068673e-06, "loss": 12.4507, "step": 160590 }, { "epoch": 0.3244221609020794, "grad_norm": 606.77587890625, "learning_rate": 8.58211859962133e-06, "loss": 24.8825, "step": 160600 }, { "epoch": 0.32444236153476325, "grad_norm": 145.46923828125, "learning_rate": 8.581875059715177e-06, "loss": 30.3179, "step": 160610 }, { "epoch": 0.32446256216744707, "grad_norm": 140.54229736328125, "learning_rate": 8.5816315023514e-06, "loss": 19.3367, "step": 160620 }, { "epoch": 0.3244827628001309, "grad_norm": 321.04681396484375, "learning_rate": 8.581387927531184e-06, "loss": 12.4165, "step": 160630 }, { "epoch": 0.3245029634328147, "grad_norm": 117.43833923339844, "learning_rate": 8.581144335255717e-06, "loss": 12.9364, "step": 160640 }, { "epoch": 0.3245231640654985, "grad_norm": 1063.7276611328125, "learning_rate": 8.580900725526189e-06, "loss": 21.3312, "step": 160650 }, { "epoch": 0.32454336469818235, "grad_norm": 257.79071044921875, "learning_rate": 8.580657098343786e-06, "loss": 15.9298, "step": 160660 }, { "epoch": 0.32456356533086617, "grad_norm": 248.06492614746094, "learning_rate": 8.58041345370969e-06, "loss": 18.4282, "step": 160670 }, { "epoch": 0.32458376596355, "grad_norm": 252.79800415039062, "learning_rate": 8.580169791625097e-06, "loss": 17.299, "step": 160680 }, { "epoch": 0.3246039665962338, "grad_norm": 586.7491455078125, "learning_rate": 8.57992611209119e-06, "loss": 34.0981, "step": 160690 }, { "epoch": 0.3246241672289176, "grad_norm": 163.87538146972656, "learning_rate": 8.579682415109156e-06, "loss": 29.5291, "step": 160700 }, { "epoch": 0.32464436786160145, "grad_norm": 314.1620788574219, "learning_rate": 8.579438700680184e-06, "loss": 21.9702, "step": 160710 }, { "epoch": 0.32466456849428527, "grad_norm": 313.78704833984375, "learning_rate": 8.579194968805464e-06, "loss": 23.282, "step": 160720 }, { "epoch": 0.32468476912696903, "grad_norm": 115.17179870605469, "learning_rate": 8.57895121948618e-06, "loss": 16.7608, "step": 160730 }, { "epoch": 0.32470496975965285, "grad_norm": 142.38975524902344, "learning_rate": 8.578707452723524e-06, "loss": 12.6143, "step": 160740 }, { "epoch": 0.32472517039233667, "grad_norm": 754.7799072265625, "learning_rate": 8.57846366851868e-06, "loss": 23.3749, "step": 160750 }, { "epoch": 0.3247453710250205, "grad_norm": 288.4206237792969, "learning_rate": 8.57821986687284e-06, "loss": 10.4402, "step": 160760 }, { "epoch": 0.3247655716577043, "grad_norm": 1.1205369234085083, "learning_rate": 8.577976047787187e-06, "loss": 15.0203, "step": 160770 }, { "epoch": 0.32478577229038813, "grad_norm": 190.72705078125, "learning_rate": 8.577732211262914e-06, "loss": 20.8905, "step": 160780 }, { "epoch": 0.32480597292307195, "grad_norm": 390.17388916015625, "learning_rate": 8.577488357301209e-06, "loss": 16.59, "step": 160790 }, { "epoch": 0.32482617355575577, "grad_norm": 143.6304473876953, "learning_rate": 8.57724448590326e-06, "loss": 23.1108, "step": 160800 }, { "epoch": 0.3248463741884396, "grad_norm": 0.0, "learning_rate": 8.577000597070256e-06, "loss": 12.7179, "step": 160810 }, { "epoch": 0.3248665748211234, "grad_norm": 175.57736206054688, "learning_rate": 8.576756690803382e-06, "loss": 11.678, "step": 160820 }, { "epoch": 0.32488677545380723, "grad_norm": 335.06170654296875, "learning_rate": 8.576512767103831e-06, "loss": 12.0152, "step": 160830 }, { "epoch": 0.32490697608649105, "grad_norm": 270.6121520996094, "learning_rate": 8.576268825972791e-06, "loss": 37.1402, "step": 160840 }, { "epoch": 0.32492717671917487, "grad_norm": 324.3387756347656, "learning_rate": 8.576024867411452e-06, "loss": 31.4032, "step": 160850 }, { "epoch": 0.32494737735185864, "grad_norm": 328.5863342285156, "learning_rate": 8.575780891420998e-06, "loss": 26.5302, "step": 160860 }, { "epoch": 0.32496757798454246, "grad_norm": 17.533044815063477, "learning_rate": 8.575536898002623e-06, "loss": 7.6747, "step": 160870 }, { "epoch": 0.3249877786172263, "grad_norm": 43.81504440307617, "learning_rate": 8.575292887157515e-06, "loss": 18.6822, "step": 160880 }, { "epoch": 0.3250079792499101, "grad_norm": 414.38336181640625, "learning_rate": 8.575048858886865e-06, "loss": 19.1681, "step": 160890 }, { "epoch": 0.3250281798825939, "grad_norm": 205.98416137695312, "learning_rate": 8.574804813191859e-06, "loss": 14.0824, "step": 160900 }, { "epoch": 0.32504838051527774, "grad_norm": 405.8812561035156, "learning_rate": 8.574560750073687e-06, "loss": 14.6721, "step": 160910 }, { "epoch": 0.32506858114796156, "grad_norm": 308.0984802246094, "learning_rate": 8.57431666953354e-06, "loss": 22.1127, "step": 160920 }, { "epoch": 0.3250887817806454, "grad_norm": 378.99468994140625, "learning_rate": 8.574072571572606e-06, "loss": 14.9109, "step": 160930 }, { "epoch": 0.3251089824133292, "grad_norm": 423.50006103515625, "learning_rate": 8.57382845619208e-06, "loss": 18.1392, "step": 160940 }, { "epoch": 0.325129183046013, "grad_norm": 436.1254577636719, "learning_rate": 8.573584323393142e-06, "loss": 22.2936, "step": 160950 }, { "epoch": 0.32514938367869683, "grad_norm": 324.3702697753906, "learning_rate": 8.57334017317699e-06, "loss": 21.9063, "step": 160960 }, { "epoch": 0.32516958431138065, "grad_norm": 359.3026123046875, "learning_rate": 8.573096005544812e-06, "loss": 31.7196, "step": 160970 }, { "epoch": 0.3251897849440645, "grad_norm": 288.1664733886719, "learning_rate": 8.572851820497797e-06, "loss": 17.8718, "step": 160980 }, { "epoch": 0.32520998557674824, "grad_norm": 504.6815185546875, "learning_rate": 8.572607618037137e-06, "loss": 20.1381, "step": 160990 }, { "epoch": 0.32523018620943206, "grad_norm": 402.6356201171875, "learning_rate": 8.572363398164017e-06, "loss": 27.5268, "step": 161000 }, { "epoch": 0.3252503868421159, "grad_norm": 460.24468994140625, "learning_rate": 8.572119160879633e-06, "loss": 22.3075, "step": 161010 }, { "epoch": 0.3252705874747997, "grad_norm": 397.0452880859375, "learning_rate": 8.571874906185175e-06, "loss": 28.9501, "step": 161020 }, { "epoch": 0.3252907881074835, "grad_norm": 89.34671783447266, "learning_rate": 8.57163063408183e-06, "loss": 17.4963, "step": 161030 }, { "epoch": 0.32531098874016734, "grad_norm": 406.5155029296875, "learning_rate": 8.571386344570791e-06, "loss": 38.5164, "step": 161040 }, { "epoch": 0.32533118937285116, "grad_norm": 254.5028533935547, "learning_rate": 8.571142037653249e-06, "loss": 25.2298, "step": 161050 }, { "epoch": 0.325351390005535, "grad_norm": 448.8887634277344, "learning_rate": 8.570897713330392e-06, "loss": 23.9027, "step": 161060 }, { "epoch": 0.3253715906382188, "grad_norm": 207.20237731933594, "learning_rate": 8.570653371603414e-06, "loss": 19.7495, "step": 161070 }, { "epoch": 0.3253917912709026, "grad_norm": 225.35865783691406, "learning_rate": 8.570409012473503e-06, "loss": 19.8522, "step": 161080 }, { "epoch": 0.32541199190358644, "grad_norm": 33.08717727661133, "learning_rate": 8.570164635941853e-06, "loss": 25.909, "step": 161090 }, { "epoch": 0.32543219253627026, "grad_norm": 459.81842041015625, "learning_rate": 8.569920242009655e-06, "loss": 25.9371, "step": 161100 }, { "epoch": 0.325452393168954, "grad_norm": 283.97119140625, "learning_rate": 8.569675830678097e-06, "loss": 15.6303, "step": 161110 }, { "epoch": 0.32547259380163784, "grad_norm": 440.4578552246094, "learning_rate": 8.569431401948371e-06, "loss": 28.3527, "step": 161120 }, { "epoch": 0.32549279443432166, "grad_norm": 36.686946868896484, "learning_rate": 8.56918695582167e-06, "loss": 22.1303, "step": 161130 }, { "epoch": 0.3255129950670055, "grad_norm": 207.19908142089844, "learning_rate": 8.568942492299186e-06, "loss": 19.0015, "step": 161140 }, { "epoch": 0.3255331956996893, "grad_norm": 568.3507080078125, "learning_rate": 8.568698011382108e-06, "loss": 19.3392, "step": 161150 }, { "epoch": 0.3255533963323731, "grad_norm": 336.6758117675781, "learning_rate": 8.568453513071628e-06, "loss": 24.0954, "step": 161160 }, { "epoch": 0.32557359696505694, "grad_norm": 251.8780059814453, "learning_rate": 8.568208997368938e-06, "loss": 18.7081, "step": 161170 }, { "epoch": 0.32559379759774076, "grad_norm": 327.2490539550781, "learning_rate": 8.567964464275233e-06, "loss": 13.5288, "step": 161180 }, { "epoch": 0.3256139982304246, "grad_norm": 174.08016967773438, "learning_rate": 8.5677199137917e-06, "loss": 26.3825, "step": 161190 }, { "epoch": 0.3256341988631084, "grad_norm": 263.1756591796875, "learning_rate": 8.567475345919532e-06, "loss": 27.0217, "step": 161200 }, { "epoch": 0.3256543994957922, "grad_norm": 195.12847900390625, "learning_rate": 8.567230760659924e-06, "loss": 14.0718, "step": 161210 }, { "epoch": 0.32567460012847604, "grad_norm": 150.4462890625, "learning_rate": 8.566986158014065e-06, "loss": 16.7205, "step": 161220 }, { "epoch": 0.32569480076115986, "grad_norm": 265.1540222167969, "learning_rate": 8.566741537983147e-06, "loss": 17.5833, "step": 161230 }, { "epoch": 0.32571500139384363, "grad_norm": 583.5469970703125, "learning_rate": 8.566496900568364e-06, "loss": 19.0612, "step": 161240 }, { "epoch": 0.32573520202652745, "grad_norm": 299.9870910644531, "learning_rate": 8.56625224577091e-06, "loss": 19.8703, "step": 161250 }, { "epoch": 0.32575540265921127, "grad_norm": 520.8071899414062, "learning_rate": 8.566007573591972e-06, "loss": 14.7954, "step": 161260 }, { "epoch": 0.3257756032918951, "grad_norm": 237.85879516601562, "learning_rate": 8.565762884032747e-06, "loss": 19.9579, "step": 161270 }, { "epoch": 0.3257958039245789, "grad_norm": 271.0999755859375, "learning_rate": 8.565518177094425e-06, "loss": 22.4733, "step": 161280 }, { "epoch": 0.32581600455726273, "grad_norm": 353.4420166015625, "learning_rate": 8.5652734527782e-06, "loss": 16.9628, "step": 161290 }, { "epoch": 0.32583620518994655, "grad_norm": 336.1426696777344, "learning_rate": 8.565028711085266e-06, "loss": 27.0701, "step": 161300 }, { "epoch": 0.32585640582263037, "grad_norm": 742.9075317382812, "learning_rate": 8.564783952016813e-06, "loss": 19.2863, "step": 161310 }, { "epoch": 0.3258766064553142, "grad_norm": 41.691917419433594, "learning_rate": 8.564539175574035e-06, "loss": 15.3361, "step": 161320 }, { "epoch": 0.325896807087998, "grad_norm": 380.1393127441406, "learning_rate": 8.564294381758128e-06, "loss": 11.8427, "step": 161330 }, { "epoch": 0.32591700772068183, "grad_norm": 113.38142395019531, "learning_rate": 8.56404957057028e-06, "loss": 24.2607, "step": 161340 }, { "epoch": 0.32593720835336565, "grad_norm": 169.4033966064453, "learning_rate": 8.563804742011689e-06, "loss": 20.1016, "step": 161350 }, { "epoch": 0.32595740898604947, "grad_norm": 371.5568542480469, "learning_rate": 8.563559896083544e-06, "loss": 23.8444, "step": 161360 }, { "epoch": 0.32597760961873323, "grad_norm": 724.4224243164062, "learning_rate": 8.56331503278704e-06, "loss": 24.6272, "step": 161370 }, { "epoch": 0.32599781025141705, "grad_norm": 278.4543151855469, "learning_rate": 8.563070152123372e-06, "loss": 10.2137, "step": 161380 }, { "epoch": 0.3260180108841009, "grad_norm": 0.0, "learning_rate": 8.562825254093732e-06, "loss": 11.7542, "step": 161390 }, { "epoch": 0.3260382115167847, "grad_norm": 479.7064514160156, "learning_rate": 8.562580338699313e-06, "loss": 28.3307, "step": 161400 }, { "epoch": 0.3260584121494685, "grad_norm": 324.5362854003906, "learning_rate": 8.56233540594131e-06, "loss": 16.3939, "step": 161410 }, { "epoch": 0.32607861278215233, "grad_norm": 249.52044677734375, "learning_rate": 8.562090455820918e-06, "loss": 36.6503, "step": 161420 }, { "epoch": 0.32609881341483615, "grad_norm": 0.0, "learning_rate": 8.561845488339327e-06, "loss": 15.8241, "step": 161430 }, { "epoch": 0.32611901404752, "grad_norm": 43.22331619262695, "learning_rate": 8.561600503497734e-06, "loss": 31.8492, "step": 161440 }, { "epoch": 0.3261392146802038, "grad_norm": 568.6189575195312, "learning_rate": 8.56135550129733e-06, "loss": 31.3497, "step": 161450 }, { "epoch": 0.3261594153128876, "grad_norm": 30.54588508605957, "learning_rate": 8.561110481739314e-06, "loss": 15.6232, "step": 161460 }, { "epoch": 0.32617961594557143, "grad_norm": 7.8938307762146, "learning_rate": 8.560865444824875e-06, "loss": 15.7416, "step": 161470 }, { "epoch": 0.32619981657825525, "grad_norm": 430.7480163574219, "learning_rate": 8.560620390555212e-06, "loss": 17.8675, "step": 161480 }, { "epoch": 0.3262200172109391, "grad_norm": 332.28253173828125, "learning_rate": 8.560375318931517e-06, "loss": 12.5896, "step": 161490 }, { "epoch": 0.32624021784362284, "grad_norm": 90.5221176147461, "learning_rate": 8.560130229954985e-06, "loss": 24.6832, "step": 161500 }, { "epoch": 0.32626041847630666, "grad_norm": 658.7239379882812, "learning_rate": 8.559885123626806e-06, "loss": 21.9032, "step": 161510 }, { "epoch": 0.3262806191089905, "grad_norm": 355.9521789550781, "learning_rate": 8.559639999948181e-06, "loss": 15.9106, "step": 161520 }, { "epoch": 0.3263008197416743, "grad_norm": 287.7037048339844, "learning_rate": 8.559394858920304e-06, "loss": 20.5597, "step": 161530 }, { "epoch": 0.3263210203743581, "grad_norm": 271.0421447753906, "learning_rate": 8.559149700544367e-06, "loss": 25.5398, "step": 161540 }, { "epoch": 0.32634122100704194, "grad_norm": 299.478515625, "learning_rate": 8.558904524821565e-06, "loss": 7.6193, "step": 161550 }, { "epoch": 0.32636142163972576, "grad_norm": 371.0263671875, "learning_rate": 8.558659331753096e-06, "loss": 24.8287, "step": 161560 }, { "epoch": 0.3263816222724096, "grad_norm": 108.2397689819336, "learning_rate": 8.558414121340152e-06, "loss": 22.5924, "step": 161570 }, { "epoch": 0.3264018229050934, "grad_norm": 349.9432678222656, "learning_rate": 8.55816889358393e-06, "loss": 10.4757, "step": 161580 }, { "epoch": 0.3264220235377772, "grad_norm": 0.0, "learning_rate": 8.557923648485622e-06, "loss": 13.6579, "step": 161590 }, { "epoch": 0.32644222417046104, "grad_norm": 697.65234375, "learning_rate": 8.557678386046429e-06, "loss": 30.5358, "step": 161600 }, { "epoch": 0.32646242480314486, "grad_norm": 108.10091400146484, "learning_rate": 8.55743310626754e-06, "loss": 11.4335, "step": 161610 }, { "epoch": 0.3264826254358287, "grad_norm": 276.4860534667969, "learning_rate": 8.557187809150154e-06, "loss": 18.952, "step": 161620 }, { "epoch": 0.32650282606851244, "grad_norm": 595.3681640625, "learning_rate": 8.556942494695467e-06, "loss": 18.1993, "step": 161630 }, { "epoch": 0.32652302670119626, "grad_norm": 286.603515625, "learning_rate": 8.556697162904674e-06, "loss": 33.3384, "step": 161640 }, { "epoch": 0.3265432273338801, "grad_norm": 432.5000305175781, "learning_rate": 8.55645181377897e-06, "loss": 16.1291, "step": 161650 }, { "epoch": 0.3265634279665639, "grad_norm": 71.44153594970703, "learning_rate": 8.55620644731955e-06, "loss": 25.382, "step": 161660 }, { "epoch": 0.3265836285992477, "grad_norm": 222.82911682128906, "learning_rate": 8.555961063527612e-06, "loss": 21.467, "step": 161670 }, { "epoch": 0.32660382923193154, "grad_norm": 93.01781463623047, "learning_rate": 8.555715662404352e-06, "loss": 9.6252, "step": 161680 }, { "epoch": 0.32662402986461536, "grad_norm": 191.33892822265625, "learning_rate": 8.555470243950963e-06, "loss": 15.2433, "step": 161690 }, { "epoch": 0.3266442304972992, "grad_norm": 372.4892883300781, "learning_rate": 8.555224808168644e-06, "loss": 21.3093, "step": 161700 }, { "epoch": 0.326664431129983, "grad_norm": 222.12583923339844, "learning_rate": 8.554979355058593e-06, "loss": 18.1167, "step": 161710 }, { "epoch": 0.3266846317626668, "grad_norm": 262.0791015625, "learning_rate": 8.554733884622003e-06, "loss": 22.1763, "step": 161720 }, { "epoch": 0.32670483239535064, "grad_norm": 461.62054443359375, "learning_rate": 8.554488396860069e-06, "loss": 28.49, "step": 161730 }, { "epoch": 0.32672503302803446, "grad_norm": 228.8190460205078, "learning_rate": 8.55424289177399e-06, "loss": 18.3679, "step": 161740 }, { "epoch": 0.3267452336607182, "grad_norm": 99.61195373535156, "learning_rate": 8.553997369364964e-06, "loss": 17.5033, "step": 161750 }, { "epoch": 0.32676543429340205, "grad_norm": 10.042817115783691, "learning_rate": 8.553751829634184e-06, "loss": 12.041, "step": 161760 }, { "epoch": 0.32678563492608587, "grad_norm": 37.838897705078125, "learning_rate": 8.55350627258285e-06, "loss": 14.4586, "step": 161770 }, { "epoch": 0.3268058355587697, "grad_norm": 198.80201721191406, "learning_rate": 8.553260698212156e-06, "loss": 13.2688, "step": 161780 }, { "epoch": 0.3268260361914535, "grad_norm": 517.840576171875, "learning_rate": 8.5530151065233e-06, "loss": 28.6678, "step": 161790 }, { "epoch": 0.3268462368241373, "grad_norm": 0.0, "learning_rate": 8.55276949751748e-06, "loss": 13.7913, "step": 161800 }, { "epoch": 0.32686643745682115, "grad_norm": 304.2605895996094, "learning_rate": 8.552523871195895e-06, "loss": 27.0233, "step": 161810 }, { "epoch": 0.32688663808950497, "grad_norm": 328.7656555175781, "learning_rate": 8.552278227559736e-06, "loss": 33.0603, "step": 161820 }, { "epoch": 0.3269068387221888, "grad_norm": 51.357818603515625, "learning_rate": 8.552032566610206e-06, "loss": 13.427, "step": 161830 }, { "epoch": 0.3269270393548726, "grad_norm": 189.44451904296875, "learning_rate": 8.551786888348499e-06, "loss": 20.5904, "step": 161840 }, { "epoch": 0.3269472399875564, "grad_norm": 321.4538879394531, "learning_rate": 8.551541192775813e-06, "loss": 42.3215, "step": 161850 }, { "epoch": 0.32696744062024025, "grad_norm": 130.42837524414062, "learning_rate": 8.551295479893347e-06, "loss": 16.6457, "step": 161860 }, { "epoch": 0.32698764125292407, "grad_norm": 278.4328918457031, "learning_rate": 8.551049749702298e-06, "loss": 23.9856, "step": 161870 }, { "epoch": 0.32700784188560783, "grad_norm": 128.2458953857422, "learning_rate": 8.550804002203862e-06, "loss": 14.4225, "step": 161880 }, { "epoch": 0.32702804251829165, "grad_norm": 221.93756103515625, "learning_rate": 8.550558237399238e-06, "loss": 15.2876, "step": 161890 }, { "epoch": 0.32704824315097547, "grad_norm": 360.99481201171875, "learning_rate": 8.550312455289624e-06, "loss": 19.8531, "step": 161900 }, { "epoch": 0.3270684437836593, "grad_norm": 0.0, "learning_rate": 8.550066655876219e-06, "loss": 20.1579, "step": 161910 }, { "epoch": 0.3270886444163431, "grad_norm": 431.2405700683594, "learning_rate": 8.549820839160217e-06, "loss": 19.6738, "step": 161920 }, { "epoch": 0.32710884504902693, "grad_norm": 246.01959228515625, "learning_rate": 8.54957500514282e-06, "loss": 24.579, "step": 161930 }, { "epoch": 0.32712904568171075, "grad_norm": 343.4013366699219, "learning_rate": 8.549329153825226e-06, "loss": 14.3194, "step": 161940 }, { "epoch": 0.32714924631439457, "grad_norm": 645.0366821289062, "learning_rate": 8.549083285208632e-06, "loss": 16.1469, "step": 161950 }, { "epoch": 0.3271694469470784, "grad_norm": 372.9342956542969, "learning_rate": 8.548837399294235e-06, "loss": 45.0335, "step": 161960 }, { "epoch": 0.3271896475797622, "grad_norm": 262.1321105957031, "learning_rate": 8.548591496083236e-06, "loss": 25.2331, "step": 161970 }, { "epoch": 0.32720984821244603, "grad_norm": 296.33575439453125, "learning_rate": 8.548345575576832e-06, "loss": 30.8484, "step": 161980 }, { "epoch": 0.32723004884512985, "grad_norm": 363.92987060546875, "learning_rate": 8.548099637776222e-06, "loss": 17.0713, "step": 161990 }, { "epoch": 0.32725024947781367, "grad_norm": 255.95973205566406, "learning_rate": 8.547853682682605e-06, "loss": 12.1382, "step": 162000 }, { "epoch": 0.32727045011049744, "grad_norm": 128.99453735351562, "learning_rate": 8.54760771029718e-06, "loss": 15.3357, "step": 162010 }, { "epoch": 0.32729065074318126, "grad_norm": 614.3482055664062, "learning_rate": 8.547361720621144e-06, "loss": 22.7171, "step": 162020 }, { "epoch": 0.3273108513758651, "grad_norm": 562.34423828125, "learning_rate": 8.547115713655698e-06, "loss": 17.9622, "step": 162030 }, { "epoch": 0.3273310520085489, "grad_norm": 132.04837036132812, "learning_rate": 8.546869689402042e-06, "loss": 17.7351, "step": 162040 }, { "epoch": 0.3273512526412327, "grad_norm": 235.9044952392578, "learning_rate": 8.54662364786137e-06, "loss": 18.0572, "step": 162050 }, { "epoch": 0.32737145327391654, "grad_norm": 39.30150604248047, "learning_rate": 8.546377589034886e-06, "loss": 24.21, "step": 162060 }, { "epoch": 0.32739165390660036, "grad_norm": 537.3536376953125, "learning_rate": 8.546131512923787e-06, "loss": 29.3692, "step": 162070 }, { "epoch": 0.3274118545392842, "grad_norm": 257.9901123046875, "learning_rate": 8.545885419529276e-06, "loss": 20.2619, "step": 162080 }, { "epoch": 0.327432055171968, "grad_norm": 225.55946350097656, "learning_rate": 8.545639308852546e-06, "loss": 9.1633, "step": 162090 }, { "epoch": 0.3274522558046518, "grad_norm": 279.44677734375, "learning_rate": 8.545393180894801e-06, "loss": 23.7411, "step": 162100 }, { "epoch": 0.32747245643733564, "grad_norm": 356.2279357910156, "learning_rate": 8.54514703565724e-06, "loss": 18.6675, "step": 162110 }, { "epoch": 0.32749265707001946, "grad_norm": 299.8529052734375, "learning_rate": 8.544900873141063e-06, "loss": 10.7733, "step": 162120 }, { "epoch": 0.3275128577027033, "grad_norm": 135.27017211914062, "learning_rate": 8.54465469334747e-06, "loss": 24.2648, "step": 162130 }, { "epoch": 0.32753305833538704, "grad_norm": 575.68408203125, "learning_rate": 8.544408496277657e-06, "loss": 17.6983, "step": 162140 }, { "epoch": 0.32755325896807086, "grad_norm": 719.7817993164062, "learning_rate": 8.544162281932829e-06, "loss": 34.9995, "step": 162150 }, { "epoch": 0.3275734596007547, "grad_norm": 197.396484375, "learning_rate": 8.543916050314182e-06, "loss": 21.258, "step": 162160 }, { "epoch": 0.3275936602334385, "grad_norm": 406.9288024902344, "learning_rate": 8.54366980142292e-06, "loss": 10.7136, "step": 162170 }, { "epoch": 0.3276138608661223, "grad_norm": 149.58261108398438, "learning_rate": 8.54342353526024e-06, "loss": 16.2869, "step": 162180 }, { "epoch": 0.32763406149880614, "grad_norm": 248.72100830078125, "learning_rate": 8.543177251827344e-06, "loss": 20.5343, "step": 162190 }, { "epoch": 0.32765426213148996, "grad_norm": 169.83155822753906, "learning_rate": 8.542930951125432e-06, "loss": 17.1896, "step": 162200 }, { "epoch": 0.3276744627641738, "grad_norm": 302.6604309082031, "learning_rate": 8.542684633155703e-06, "loss": 12.9328, "step": 162210 }, { "epoch": 0.3276946633968576, "grad_norm": 237.6878204345703, "learning_rate": 8.54243829791936e-06, "loss": 29.2501, "step": 162220 }, { "epoch": 0.3277148640295414, "grad_norm": 267.9850158691406, "learning_rate": 8.5421919454176e-06, "loss": 16.6813, "step": 162230 }, { "epoch": 0.32773506466222524, "grad_norm": 366.10986328125, "learning_rate": 8.54194557565163e-06, "loss": 10.7623, "step": 162240 }, { "epoch": 0.32775526529490906, "grad_norm": 201.7869415283203, "learning_rate": 8.541699188622645e-06, "loss": 15.9546, "step": 162250 }, { "epoch": 0.3277754659275929, "grad_norm": 429.77667236328125, "learning_rate": 8.541452784331848e-06, "loss": 26.8716, "step": 162260 }, { "epoch": 0.32779566656027664, "grad_norm": 178.979736328125, "learning_rate": 8.541206362780439e-06, "loss": 38.2107, "step": 162270 }, { "epoch": 0.32781586719296046, "grad_norm": 283.47998046875, "learning_rate": 8.54095992396962e-06, "loss": 15.8778, "step": 162280 }, { "epoch": 0.3278360678256443, "grad_norm": 290.61767578125, "learning_rate": 8.540713467900592e-06, "loss": 17.922, "step": 162290 }, { "epoch": 0.3278562684583281, "grad_norm": 208.37530517578125, "learning_rate": 8.540466994574556e-06, "loss": 27.6846, "step": 162300 }, { "epoch": 0.3278764690910119, "grad_norm": 309.73333740234375, "learning_rate": 8.540220503992713e-06, "loss": 29.8107, "step": 162310 }, { "epoch": 0.32789666972369574, "grad_norm": 323.0174560546875, "learning_rate": 8.539973996156265e-06, "loss": 13.112, "step": 162320 }, { "epoch": 0.32791687035637956, "grad_norm": 288.80047607421875, "learning_rate": 8.539727471066412e-06, "loss": 24.7797, "step": 162330 }, { "epoch": 0.3279370709890634, "grad_norm": 323.5565185546875, "learning_rate": 8.539480928724358e-06, "loss": 20.252, "step": 162340 }, { "epoch": 0.3279572716217472, "grad_norm": 284.6083984375, "learning_rate": 8.539234369131301e-06, "loss": 21.5427, "step": 162350 }, { "epoch": 0.327977472254431, "grad_norm": 254.5157928466797, "learning_rate": 8.538987792288447e-06, "loss": 16.6197, "step": 162360 }, { "epoch": 0.32799767288711484, "grad_norm": 297.11029052734375, "learning_rate": 8.538741198196996e-06, "loss": 17.9096, "step": 162370 }, { "epoch": 0.32801787351979866, "grad_norm": 595.4795532226562, "learning_rate": 8.53849458685815e-06, "loss": 19.0857, "step": 162380 }, { "epoch": 0.32803807415248243, "grad_norm": 284.99176025390625, "learning_rate": 8.53824795827311e-06, "loss": 20.5621, "step": 162390 }, { "epoch": 0.32805827478516625, "grad_norm": 209.95526123046875, "learning_rate": 8.538001312443078e-06, "loss": 20.7624, "step": 162400 }, { "epoch": 0.32807847541785007, "grad_norm": 237.90518188476562, "learning_rate": 8.537754649369256e-06, "loss": 20.2255, "step": 162410 }, { "epoch": 0.3280986760505339, "grad_norm": 205.1381378173828, "learning_rate": 8.537507969052848e-06, "loss": 17.6126, "step": 162420 }, { "epoch": 0.3281188766832177, "grad_norm": 184.6565704345703, "learning_rate": 8.537261271495055e-06, "loss": 11.9788, "step": 162430 }, { "epoch": 0.32813907731590153, "grad_norm": 172.0233917236328, "learning_rate": 8.537014556697078e-06, "loss": 18.5022, "step": 162440 }, { "epoch": 0.32815927794858535, "grad_norm": 241.6584014892578, "learning_rate": 8.536767824660124e-06, "loss": 23.0887, "step": 162450 }, { "epoch": 0.32817947858126917, "grad_norm": 84.54242706298828, "learning_rate": 8.536521075385391e-06, "loss": 13.1944, "step": 162460 }, { "epoch": 0.328199679213953, "grad_norm": 395.49810791015625, "learning_rate": 8.536274308874083e-06, "loss": 18.0104, "step": 162470 }, { "epoch": 0.3282198798466368, "grad_norm": 494.9972839355469, "learning_rate": 8.536027525127405e-06, "loss": 26.2375, "step": 162480 }, { "epoch": 0.32824008047932063, "grad_norm": 362.4843444824219, "learning_rate": 8.535780724146553e-06, "loss": 17.0178, "step": 162490 }, { "epoch": 0.32826028111200445, "grad_norm": 724.7839965820312, "learning_rate": 8.535533905932739e-06, "loss": 42.8276, "step": 162500 }, { "epoch": 0.32828048174468827, "grad_norm": 245.06858825683594, "learning_rate": 8.53528707048716e-06, "loss": 13.3939, "step": 162510 }, { "epoch": 0.32830068237737203, "grad_norm": 139.88148498535156, "learning_rate": 8.535040217811019e-06, "loss": 14.5396, "step": 162520 }, { "epoch": 0.32832088301005585, "grad_norm": 130.60879516601562, "learning_rate": 8.534793347905523e-06, "loss": 16.2585, "step": 162530 }, { "epoch": 0.3283410836427397, "grad_norm": 487.502685546875, "learning_rate": 8.534546460771873e-06, "loss": 28.2207, "step": 162540 }, { "epoch": 0.3283612842754235, "grad_norm": 190.25743103027344, "learning_rate": 8.534299556411272e-06, "loss": 18.1848, "step": 162550 }, { "epoch": 0.3283814849081073, "grad_norm": 265.1337585449219, "learning_rate": 8.534052634824923e-06, "loss": 19.2674, "step": 162560 }, { "epoch": 0.32840168554079113, "grad_norm": 270.58966064453125, "learning_rate": 8.53380569601403e-06, "loss": 11.5489, "step": 162570 }, { "epoch": 0.32842188617347495, "grad_norm": 26.32523536682129, "learning_rate": 8.533558739979796e-06, "loss": 17.8911, "step": 162580 }, { "epoch": 0.3284420868061588, "grad_norm": 381.2300720214844, "learning_rate": 8.533311766723428e-06, "loss": 37.8082, "step": 162590 }, { "epoch": 0.3284622874388426, "grad_norm": 336.3320617675781, "learning_rate": 8.533064776246126e-06, "loss": 14.9841, "step": 162600 }, { "epoch": 0.3284824880715264, "grad_norm": 370.32025146484375, "learning_rate": 8.532817768549092e-06, "loss": 10.7591, "step": 162610 }, { "epoch": 0.32850268870421023, "grad_norm": 315.4004211425781, "learning_rate": 8.532570743633535e-06, "loss": 23.1309, "step": 162620 }, { "epoch": 0.32852288933689405, "grad_norm": 159.81503295898438, "learning_rate": 8.532323701500657e-06, "loss": 12.8759, "step": 162630 }, { "epoch": 0.3285430899695779, "grad_norm": 308.9885559082031, "learning_rate": 8.532076642151661e-06, "loss": 23.3977, "step": 162640 }, { "epoch": 0.32856329060226164, "grad_norm": 234.4618377685547, "learning_rate": 8.531829565587751e-06, "loss": 19.5421, "step": 162650 }, { "epoch": 0.32858349123494546, "grad_norm": 233.9818878173828, "learning_rate": 8.531582471810134e-06, "loss": 21.8814, "step": 162660 }, { "epoch": 0.3286036918676293, "grad_norm": 234.55410766601562, "learning_rate": 8.53133536082001e-06, "loss": 21.0321, "step": 162670 }, { "epoch": 0.3286238925003131, "grad_norm": 508.860595703125, "learning_rate": 8.531088232618587e-06, "loss": 27.4477, "step": 162680 }, { "epoch": 0.3286440931329969, "grad_norm": 114.37043762207031, "learning_rate": 8.530841087207068e-06, "loss": 9.9296, "step": 162690 }, { "epoch": 0.32866429376568074, "grad_norm": 128.8371124267578, "learning_rate": 8.530593924586659e-06, "loss": 25.8596, "step": 162700 }, { "epoch": 0.32868449439836456, "grad_norm": 295.5189514160156, "learning_rate": 8.530346744758562e-06, "loss": 25.7543, "step": 162710 }, { "epoch": 0.3287046950310484, "grad_norm": 322.8118896484375, "learning_rate": 8.530099547723983e-06, "loss": 27.8189, "step": 162720 }, { "epoch": 0.3287248956637322, "grad_norm": 591.704345703125, "learning_rate": 8.529852333484129e-06, "loss": 19.5014, "step": 162730 }, { "epoch": 0.328745096296416, "grad_norm": 90.76709747314453, "learning_rate": 8.5296051020402e-06, "loss": 18.3724, "step": 162740 }, { "epoch": 0.32876529692909984, "grad_norm": 172.02536010742188, "learning_rate": 8.529357853393406e-06, "loss": 18.998, "step": 162750 }, { "epoch": 0.32878549756178366, "grad_norm": 168.46913146972656, "learning_rate": 8.52911058754495e-06, "loss": 18.8584, "step": 162760 }, { "epoch": 0.3288056981944675, "grad_norm": 253.21585083007812, "learning_rate": 8.528863304496035e-06, "loss": 22.6802, "step": 162770 }, { "epoch": 0.32882589882715124, "grad_norm": 587.8738403320312, "learning_rate": 8.528616004247869e-06, "loss": 19.1564, "step": 162780 }, { "epoch": 0.32884609945983506, "grad_norm": 439.267333984375, "learning_rate": 8.528368686801656e-06, "loss": 22.8479, "step": 162790 }, { "epoch": 0.3288663000925189, "grad_norm": 381.1836853027344, "learning_rate": 8.528121352158604e-06, "loss": 10.7987, "step": 162800 }, { "epoch": 0.3288865007252027, "grad_norm": 109.8764419555664, "learning_rate": 8.527874000319915e-06, "loss": 13.6689, "step": 162810 }, { "epoch": 0.3289067013578865, "grad_norm": 393.32476806640625, "learning_rate": 8.527626631286797e-06, "loss": 29.3036, "step": 162820 }, { "epoch": 0.32892690199057034, "grad_norm": 128.92327880859375, "learning_rate": 8.527379245060453e-06, "loss": 18.59, "step": 162830 }, { "epoch": 0.32894710262325416, "grad_norm": 109.87393188476562, "learning_rate": 8.527131841642092e-06, "loss": 21.4, "step": 162840 }, { "epoch": 0.328967303255938, "grad_norm": 120.42993927001953, "learning_rate": 8.526884421032916e-06, "loss": 27.9378, "step": 162850 }, { "epoch": 0.3289875038886218, "grad_norm": 248.73248291015625, "learning_rate": 8.526636983234135e-06, "loss": 13.761, "step": 162860 }, { "epoch": 0.3290077045213056, "grad_norm": 308.0274963378906, "learning_rate": 8.526389528246955e-06, "loss": 16.6465, "step": 162870 }, { "epoch": 0.32902790515398944, "grad_norm": 369.4353942871094, "learning_rate": 8.526142056072578e-06, "loss": 26.5751, "step": 162880 }, { "epoch": 0.32904810578667326, "grad_norm": 171.50547790527344, "learning_rate": 8.525894566712212e-06, "loss": 21.6892, "step": 162890 }, { "epoch": 0.3290683064193571, "grad_norm": 249.1346893310547, "learning_rate": 8.525647060167063e-06, "loss": 17.6325, "step": 162900 }, { "epoch": 0.32908850705204085, "grad_norm": 218.3851318359375, "learning_rate": 8.52539953643834e-06, "loss": 29.6356, "step": 162910 }, { "epoch": 0.32910870768472467, "grad_norm": 541.3108520507812, "learning_rate": 8.525151995527244e-06, "loss": 20.8004, "step": 162920 }, { "epoch": 0.3291289083174085, "grad_norm": 627.0709838867188, "learning_rate": 8.524904437434986e-06, "loss": 30.3482, "step": 162930 }, { "epoch": 0.3291491089500923, "grad_norm": 193.11598205566406, "learning_rate": 8.524656862162773e-06, "loss": 24.5486, "step": 162940 }, { "epoch": 0.3291693095827761, "grad_norm": 516.9819946289062, "learning_rate": 8.524409269711808e-06, "loss": 11.5021, "step": 162950 }, { "epoch": 0.32918951021545995, "grad_norm": 160.8563995361328, "learning_rate": 8.524161660083301e-06, "loss": 16.027, "step": 162960 }, { "epoch": 0.32920971084814377, "grad_norm": 445.36553955078125, "learning_rate": 8.523914033278456e-06, "loss": 22.7647, "step": 162970 }, { "epoch": 0.3292299114808276, "grad_norm": 105.55641174316406, "learning_rate": 8.523666389298484e-06, "loss": 25.9528, "step": 162980 }, { "epoch": 0.3292501121135114, "grad_norm": 543.8961181640625, "learning_rate": 8.523418728144585e-06, "loss": 24.9125, "step": 162990 }, { "epoch": 0.3292703127461952, "grad_norm": 666.1368408203125, "learning_rate": 8.523171049817974e-06, "loss": 22.4246, "step": 163000 }, { "epoch": 0.32929051337887905, "grad_norm": 273.36566162109375, "learning_rate": 8.522923354319854e-06, "loss": 23.1039, "step": 163010 }, { "epoch": 0.32931071401156287, "grad_norm": 221.27430725097656, "learning_rate": 8.522675641651432e-06, "loss": 12.9034, "step": 163020 }, { "epoch": 0.32933091464424663, "grad_norm": 383.09368896484375, "learning_rate": 8.522427911813917e-06, "loss": 20.5533, "step": 163030 }, { "epoch": 0.32935111527693045, "grad_norm": 481.99700927734375, "learning_rate": 8.522180164808515e-06, "loss": 32.2666, "step": 163040 }, { "epoch": 0.32937131590961427, "grad_norm": 391.9695739746094, "learning_rate": 8.521932400636435e-06, "loss": 26.1974, "step": 163050 }, { "epoch": 0.3293915165422981, "grad_norm": 144.45933532714844, "learning_rate": 8.521684619298883e-06, "loss": 17.0659, "step": 163060 }, { "epoch": 0.3294117171749819, "grad_norm": 127.09485626220703, "learning_rate": 8.521436820797067e-06, "loss": 12.948, "step": 163070 }, { "epoch": 0.32943191780766573, "grad_norm": 468.7964172363281, "learning_rate": 8.521189005132195e-06, "loss": 13.4712, "step": 163080 }, { "epoch": 0.32945211844034955, "grad_norm": 538.3344116210938, "learning_rate": 8.520941172305477e-06, "loss": 18.2079, "step": 163090 }, { "epoch": 0.32947231907303337, "grad_norm": 258.8137512207031, "learning_rate": 8.520693322318116e-06, "loss": 29.0806, "step": 163100 }, { "epoch": 0.3294925197057172, "grad_norm": 272.6590270996094, "learning_rate": 8.520445455171325e-06, "loss": 23.0965, "step": 163110 }, { "epoch": 0.329512720338401, "grad_norm": 351.4837646484375, "learning_rate": 8.520197570866307e-06, "loss": 19.3405, "step": 163120 }, { "epoch": 0.32953292097108483, "grad_norm": 17.714553833007812, "learning_rate": 8.519949669404275e-06, "loss": 22.6676, "step": 163130 }, { "epoch": 0.32955312160376865, "grad_norm": 524.312255859375, "learning_rate": 8.519701750786435e-06, "loss": 24.5169, "step": 163140 }, { "epoch": 0.32957332223645247, "grad_norm": 198.48895263671875, "learning_rate": 8.519453815013996e-06, "loss": 18.927, "step": 163150 }, { "epoch": 0.32959352286913624, "grad_norm": 511.1898498535156, "learning_rate": 8.519205862088165e-06, "loss": 23.304, "step": 163160 }, { "epoch": 0.32961372350182006, "grad_norm": 397.8203430175781, "learning_rate": 8.518957892010151e-06, "loss": 18.6811, "step": 163170 }, { "epoch": 0.3296339241345039, "grad_norm": 154.5198516845703, "learning_rate": 8.518709904781163e-06, "loss": 24.0681, "step": 163180 }, { "epoch": 0.3296541247671877, "grad_norm": 344.25689697265625, "learning_rate": 8.518461900402411e-06, "loss": 18.0803, "step": 163190 }, { "epoch": 0.3296743253998715, "grad_norm": 1121.5653076171875, "learning_rate": 8.518213878875103e-06, "loss": 22.5937, "step": 163200 }, { "epoch": 0.32969452603255534, "grad_norm": 61.36903762817383, "learning_rate": 8.517965840200445e-06, "loss": 23.5668, "step": 163210 }, { "epoch": 0.32971472666523916, "grad_norm": 9.07269287109375, "learning_rate": 8.51771778437965e-06, "loss": 18.0288, "step": 163220 }, { "epoch": 0.329734927297923, "grad_norm": 164.331787109375, "learning_rate": 8.517469711413924e-06, "loss": 16.908, "step": 163230 }, { "epoch": 0.3297551279306068, "grad_norm": 168.9466552734375, "learning_rate": 8.517221621304479e-06, "loss": 12.8018, "step": 163240 }, { "epoch": 0.3297753285632906, "grad_norm": 16.79106330871582, "learning_rate": 8.51697351405252e-06, "loss": 18.7127, "step": 163250 }, { "epoch": 0.32979552919597444, "grad_norm": 302.5372009277344, "learning_rate": 8.51672538965926e-06, "loss": 22.5445, "step": 163260 }, { "epoch": 0.32981572982865826, "grad_norm": 265.10723876953125, "learning_rate": 8.516477248125907e-06, "loss": 20.4552, "step": 163270 }, { "epoch": 0.3298359304613421, "grad_norm": 361.0265197753906, "learning_rate": 8.51622908945367e-06, "loss": 18.6643, "step": 163280 }, { "epoch": 0.32985613109402584, "grad_norm": 535.2169189453125, "learning_rate": 8.515980913643759e-06, "loss": 19.7956, "step": 163290 }, { "epoch": 0.32987633172670966, "grad_norm": 266.98797607421875, "learning_rate": 8.515732720697383e-06, "loss": 22.3715, "step": 163300 }, { "epoch": 0.3298965323593935, "grad_norm": 345.9737854003906, "learning_rate": 8.515484510615753e-06, "loss": 19.3567, "step": 163310 }, { "epoch": 0.3299167329920773, "grad_norm": 27.846946716308594, "learning_rate": 8.515236283400078e-06, "loss": 18.8569, "step": 163320 }, { "epoch": 0.3299369336247611, "grad_norm": 334.7138366699219, "learning_rate": 8.514988039051567e-06, "loss": 17.4182, "step": 163330 }, { "epoch": 0.32995713425744494, "grad_norm": 503.9947814941406, "learning_rate": 8.514739777571431e-06, "loss": 21.98, "step": 163340 }, { "epoch": 0.32997733489012876, "grad_norm": 311.85455322265625, "learning_rate": 8.51449149896088e-06, "loss": 17.1605, "step": 163350 }, { "epoch": 0.3299975355228126, "grad_norm": 17.554868698120117, "learning_rate": 8.514243203221124e-06, "loss": 31.5025, "step": 163360 }, { "epoch": 0.3300177361554964, "grad_norm": 465.7385559082031, "learning_rate": 8.51399489035337e-06, "loss": 30.3311, "step": 163370 }, { "epoch": 0.3300379367881802, "grad_norm": 278.14764404296875, "learning_rate": 8.513746560358833e-06, "loss": 23.9552, "step": 163380 }, { "epoch": 0.33005813742086404, "grad_norm": 468.36016845703125, "learning_rate": 8.513498213238722e-06, "loss": 24.9737, "step": 163390 }, { "epoch": 0.33007833805354786, "grad_norm": 740.0568237304688, "learning_rate": 8.513249848994248e-06, "loss": 30.2124, "step": 163400 }, { "epoch": 0.3300985386862317, "grad_norm": 471.21319580078125, "learning_rate": 8.513001467626618e-06, "loss": 32.5397, "step": 163410 }, { "epoch": 0.33011873931891544, "grad_norm": 435.9621276855469, "learning_rate": 8.512753069137046e-06, "loss": 30.9883, "step": 163420 }, { "epoch": 0.33013893995159926, "grad_norm": 69.7506103515625, "learning_rate": 8.51250465352674e-06, "loss": 12.1343, "step": 163430 }, { "epoch": 0.3301591405842831, "grad_norm": 256.0396728515625, "learning_rate": 8.512256220796915e-06, "loss": 32.2829, "step": 163440 }, { "epoch": 0.3301793412169669, "grad_norm": 309.1819152832031, "learning_rate": 8.512007770948775e-06, "loss": 18.0459, "step": 163450 }, { "epoch": 0.3301995418496507, "grad_norm": 32.120338439941406, "learning_rate": 8.51175930398354e-06, "loss": 21.7181, "step": 163460 }, { "epoch": 0.33021974248233454, "grad_norm": 534.4570922851562, "learning_rate": 8.511510819902413e-06, "loss": 37.4644, "step": 163470 }, { "epoch": 0.33023994311501836, "grad_norm": 267.18792724609375, "learning_rate": 8.51126231870661e-06, "loss": 16.4287, "step": 163480 }, { "epoch": 0.3302601437477022, "grad_norm": 763.7919921875, "learning_rate": 8.511013800397338e-06, "loss": 19.3396, "step": 163490 }, { "epoch": 0.330280344380386, "grad_norm": 384.11444091796875, "learning_rate": 8.510765264975813e-06, "loss": 28.4592, "step": 163500 }, { "epoch": 0.3303005450130698, "grad_norm": 411.3933410644531, "learning_rate": 8.510516712443244e-06, "loss": 14.1125, "step": 163510 }, { "epoch": 0.33032074564575364, "grad_norm": 394.11578369140625, "learning_rate": 8.51026814280084e-06, "loss": 30.2813, "step": 163520 }, { "epoch": 0.33034094627843746, "grad_norm": 550.73486328125, "learning_rate": 8.510019556049815e-06, "loss": 26.2647, "step": 163530 }, { "epoch": 0.3303611469111213, "grad_norm": 469.9820251464844, "learning_rate": 8.509770952191384e-06, "loss": 23.0699, "step": 163540 }, { "epoch": 0.33038134754380505, "grad_norm": 386.8929138183594, "learning_rate": 8.509522331226751e-06, "loss": 44.4193, "step": 163550 }, { "epoch": 0.33040154817648887, "grad_norm": 198.1128692626953, "learning_rate": 8.509273693157133e-06, "loss": 19.1042, "step": 163560 }, { "epoch": 0.3304217488091727, "grad_norm": 330.52301025390625, "learning_rate": 8.509025037983742e-06, "loss": 26.5918, "step": 163570 }, { "epoch": 0.3304419494418565, "grad_norm": 229.4310302734375, "learning_rate": 8.508776365707788e-06, "loss": 22.9945, "step": 163580 }, { "epoch": 0.33046215007454033, "grad_norm": 665.7144165039062, "learning_rate": 8.508527676330483e-06, "loss": 34.8871, "step": 163590 }, { "epoch": 0.33048235070722415, "grad_norm": 697.5511474609375, "learning_rate": 8.508278969853037e-06, "loss": 18.0467, "step": 163600 }, { "epoch": 0.33050255133990797, "grad_norm": 112.84270477294922, "learning_rate": 8.508030246276668e-06, "loss": 20.6646, "step": 163610 }, { "epoch": 0.3305227519725918, "grad_norm": 7.473842620849609, "learning_rate": 8.507781505602585e-06, "loss": 22.9061, "step": 163620 }, { "epoch": 0.3305429526052756, "grad_norm": 263.9842529296875, "learning_rate": 8.507532747832e-06, "loss": 25.4512, "step": 163630 }, { "epoch": 0.33056315323795943, "grad_norm": 134.56578063964844, "learning_rate": 8.507283972966126e-06, "loss": 19.6938, "step": 163640 }, { "epoch": 0.33058335387064325, "grad_norm": 265.8783264160156, "learning_rate": 8.507035181006175e-06, "loss": 20.2171, "step": 163650 }, { "epoch": 0.33060355450332707, "grad_norm": 318.6019287109375, "learning_rate": 8.50678637195336e-06, "loss": 29.0459, "step": 163660 }, { "epoch": 0.33062375513601083, "grad_norm": 567.8115844726562, "learning_rate": 8.506537545808894e-06, "loss": 27.686, "step": 163670 }, { "epoch": 0.33064395576869465, "grad_norm": 960.7307739257812, "learning_rate": 8.506288702573988e-06, "loss": 32.1641, "step": 163680 }, { "epoch": 0.3306641564013785, "grad_norm": 370.08697509765625, "learning_rate": 8.506039842249855e-06, "loss": 19.8074, "step": 163690 }, { "epoch": 0.3306843570340623, "grad_norm": 635.2039794921875, "learning_rate": 8.505790964837712e-06, "loss": 21.4368, "step": 163700 }, { "epoch": 0.3307045576667461, "grad_norm": 435.7977294921875, "learning_rate": 8.505542070338768e-06, "loss": 26.2657, "step": 163710 }, { "epoch": 0.33072475829942993, "grad_norm": 652.779052734375, "learning_rate": 8.505293158754238e-06, "loss": 23.1095, "step": 163720 }, { "epoch": 0.33074495893211375, "grad_norm": 759.5297241210938, "learning_rate": 8.505044230085332e-06, "loss": 20.4033, "step": 163730 }, { "epoch": 0.3307651595647976, "grad_norm": 193.44479370117188, "learning_rate": 8.504795284333267e-06, "loss": 26.8769, "step": 163740 }, { "epoch": 0.3307853601974814, "grad_norm": 196.4053497314453, "learning_rate": 8.504546321499255e-06, "loss": 33.8965, "step": 163750 }, { "epoch": 0.3308055608301652, "grad_norm": 379.7756652832031, "learning_rate": 8.504297341584509e-06, "loss": 22.633, "step": 163760 }, { "epoch": 0.33082576146284903, "grad_norm": 242.9032745361328, "learning_rate": 8.504048344590243e-06, "loss": 13.0765, "step": 163770 }, { "epoch": 0.33084596209553285, "grad_norm": 323.24859619140625, "learning_rate": 8.50379933051767e-06, "loss": 7.0845, "step": 163780 }, { "epoch": 0.3308661627282167, "grad_norm": 41.68326950073242, "learning_rate": 8.503550299368004e-06, "loss": 11.8097, "step": 163790 }, { "epoch": 0.33088636336090044, "grad_norm": 185.17051696777344, "learning_rate": 8.50330125114246e-06, "loss": 19.8503, "step": 163800 }, { "epoch": 0.33090656399358426, "grad_norm": 539.4900512695312, "learning_rate": 8.50305218584225e-06, "loss": 39.2578, "step": 163810 }, { "epoch": 0.3309267646262681, "grad_norm": 96.00993347167969, "learning_rate": 8.502803103468587e-06, "loss": 14.012, "step": 163820 }, { "epoch": 0.3309469652589519, "grad_norm": 1375.129638671875, "learning_rate": 8.502554004022688e-06, "loss": 27.1154, "step": 163830 }, { "epoch": 0.3309671658916357, "grad_norm": 211.8894500732422, "learning_rate": 8.502304887505765e-06, "loss": 14.2917, "step": 163840 }, { "epoch": 0.33098736652431954, "grad_norm": 304.5118408203125, "learning_rate": 8.502055753919033e-06, "loss": 21.6183, "step": 163850 }, { "epoch": 0.33100756715700336, "grad_norm": 332.1041259765625, "learning_rate": 8.501806603263706e-06, "loss": 21.3169, "step": 163860 }, { "epoch": 0.3310277677896872, "grad_norm": 311.13653564453125, "learning_rate": 8.501557435540996e-06, "loss": 22.1664, "step": 163870 }, { "epoch": 0.331047968422371, "grad_norm": 543.50439453125, "learning_rate": 8.501308250752123e-06, "loss": 35.246, "step": 163880 }, { "epoch": 0.3310681690550548, "grad_norm": 58.02324676513672, "learning_rate": 8.501059048898297e-06, "loss": 17.468, "step": 163890 }, { "epoch": 0.33108836968773864, "grad_norm": 400.9122009277344, "learning_rate": 8.500809829980734e-06, "loss": 15.7858, "step": 163900 }, { "epoch": 0.33110857032042246, "grad_norm": 18.358407974243164, "learning_rate": 8.50056059400065e-06, "loss": 21.9923, "step": 163910 }, { "epoch": 0.3311287709531063, "grad_norm": 161.1336669921875, "learning_rate": 8.500311340959256e-06, "loss": 21.8543, "step": 163920 }, { "epoch": 0.33114897158579004, "grad_norm": 156.92117309570312, "learning_rate": 8.500062070857772e-06, "loss": 20.1275, "step": 163930 }, { "epoch": 0.33116917221847386, "grad_norm": 254.82638549804688, "learning_rate": 8.499812783697406e-06, "loss": 20.5893, "step": 163940 }, { "epoch": 0.3311893728511577, "grad_norm": 66.61095428466797, "learning_rate": 8.499563479479378e-06, "loss": 28.8519, "step": 163950 }, { "epoch": 0.3312095734838415, "grad_norm": 555.0891723632812, "learning_rate": 8.499314158204904e-06, "loss": 28.9911, "step": 163960 }, { "epoch": 0.3312297741165253, "grad_norm": 420.680908203125, "learning_rate": 8.499064819875195e-06, "loss": 18.3402, "step": 163970 }, { "epoch": 0.33124997474920914, "grad_norm": 378.317138671875, "learning_rate": 8.49881546449147e-06, "loss": 15.315, "step": 163980 }, { "epoch": 0.33127017538189296, "grad_norm": 185.15240478515625, "learning_rate": 8.498566092054943e-06, "loss": 16.8852, "step": 163990 }, { "epoch": 0.3312903760145768, "grad_norm": 448.84771728515625, "learning_rate": 8.498316702566828e-06, "loss": 30.0461, "step": 164000 }, { "epoch": 0.3313105766472606, "grad_norm": 429.41668701171875, "learning_rate": 8.498067296028343e-06, "loss": 30.0207, "step": 164010 }, { "epoch": 0.3313307772799444, "grad_norm": 386.4164733886719, "learning_rate": 8.497817872440702e-06, "loss": 19.3193, "step": 164020 }, { "epoch": 0.33135097791262824, "grad_norm": 215.05335998535156, "learning_rate": 8.497568431805118e-06, "loss": 6.4467, "step": 164030 }, { "epoch": 0.33137117854531206, "grad_norm": 104.97160339355469, "learning_rate": 8.497318974122813e-06, "loss": 12.526, "step": 164040 }, { "epoch": 0.3313913791779959, "grad_norm": 402.8816833496094, "learning_rate": 8.497069499394998e-06, "loss": 20.3663, "step": 164050 }, { "epoch": 0.33141157981067965, "grad_norm": 292.6847229003906, "learning_rate": 8.496820007622891e-06, "loss": 14.7327, "step": 164060 }, { "epoch": 0.33143178044336347, "grad_norm": 409.5272521972656, "learning_rate": 8.496570498807708e-06, "loss": 25.272, "step": 164070 }, { "epoch": 0.3314519810760473, "grad_norm": 347.12237548828125, "learning_rate": 8.496320972950663e-06, "loss": 18.7066, "step": 164080 }, { "epoch": 0.3314721817087311, "grad_norm": 243.21839904785156, "learning_rate": 8.496071430052975e-06, "loss": 19.2063, "step": 164090 }, { "epoch": 0.3314923823414149, "grad_norm": 503.8268737792969, "learning_rate": 8.495821870115857e-06, "loss": 28.707, "step": 164100 }, { "epoch": 0.33151258297409875, "grad_norm": 262.190185546875, "learning_rate": 8.49557229314053e-06, "loss": 17.8881, "step": 164110 }, { "epoch": 0.33153278360678257, "grad_norm": 335.1723937988281, "learning_rate": 8.495322699128206e-06, "loss": 19.813, "step": 164120 }, { "epoch": 0.3315529842394664, "grad_norm": 442.90362548828125, "learning_rate": 8.495073088080102e-06, "loss": 19.071, "step": 164130 }, { "epoch": 0.3315731848721502, "grad_norm": 112.83855438232422, "learning_rate": 8.494823459997437e-06, "loss": 24.5015, "step": 164140 }, { "epoch": 0.331593385504834, "grad_norm": 207.1382293701172, "learning_rate": 8.494573814881426e-06, "loss": 20.1367, "step": 164150 }, { "epoch": 0.33161358613751785, "grad_norm": 247.27532958984375, "learning_rate": 8.494324152733286e-06, "loss": 22.7772, "step": 164160 }, { "epoch": 0.33163378677020167, "grad_norm": 183.56056213378906, "learning_rate": 8.494074473554235e-06, "loss": 19.8806, "step": 164170 }, { "epoch": 0.33165398740288543, "grad_norm": 71.13424682617188, "learning_rate": 8.493824777345487e-06, "loss": 19.8803, "step": 164180 }, { "epoch": 0.33167418803556925, "grad_norm": 236.53436279296875, "learning_rate": 8.493575064108262e-06, "loss": 23.0459, "step": 164190 }, { "epoch": 0.33169438866825307, "grad_norm": 96.85140991210938, "learning_rate": 8.493325333843776e-06, "loss": 15.5451, "step": 164200 }, { "epoch": 0.3317145893009369, "grad_norm": 55.936946868896484, "learning_rate": 8.493075586553245e-06, "loss": 19.7795, "step": 164210 }, { "epoch": 0.3317347899336207, "grad_norm": 182.2644500732422, "learning_rate": 8.492825822237888e-06, "loss": 48.5824, "step": 164220 }, { "epoch": 0.33175499056630453, "grad_norm": 363.97900390625, "learning_rate": 8.492576040898921e-06, "loss": 16.1024, "step": 164230 }, { "epoch": 0.33177519119898835, "grad_norm": 271.0367126464844, "learning_rate": 8.492326242537564e-06, "loss": 17.0652, "step": 164240 }, { "epoch": 0.33179539183167217, "grad_norm": 488.8482666015625, "learning_rate": 8.492076427155031e-06, "loss": 29.3363, "step": 164250 }, { "epoch": 0.331815592464356, "grad_norm": 115.44332885742188, "learning_rate": 8.49182659475254e-06, "loss": 11.4004, "step": 164260 }, { "epoch": 0.3318357930970398, "grad_norm": 434.4683532714844, "learning_rate": 8.491576745331312e-06, "loss": 20.9399, "step": 164270 }, { "epoch": 0.33185599372972363, "grad_norm": 297.5650634765625, "learning_rate": 8.49132687889256e-06, "loss": 16.5945, "step": 164280 }, { "epoch": 0.33187619436240745, "grad_norm": 36.454490661621094, "learning_rate": 8.491076995437504e-06, "loss": 29.1681, "step": 164290 }, { "epoch": 0.33189639499509127, "grad_norm": 77.96312713623047, "learning_rate": 8.490827094967364e-06, "loss": 19.5484, "step": 164300 }, { "epoch": 0.33191659562777504, "grad_norm": 302.0710144042969, "learning_rate": 8.490577177483357e-06, "loss": 18.635, "step": 164310 }, { "epoch": 0.33193679626045886, "grad_norm": 228.28005981445312, "learning_rate": 8.490327242986698e-06, "loss": 18.3311, "step": 164320 }, { "epoch": 0.3319569968931427, "grad_norm": 671.5521240234375, "learning_rate": 8.490077291478607e-06, "loss": 18.843, "step": 164330 }, { "epoch": 0.3319771975258265, "grad_norm": 125.64741516113281, "learning_rate": 8.489827322960305e-06, "loss": 12.1025, "step": 164340 }, { "epoch": 0.3319973981585103, "grad_norm": 354.17926025390625, "learning_rate": 8.489577337433006e-06, "loss": 17.5653, "step": 164350 }, { "epoch": 0.33201759879119414, "grad_norm": 205.52879333496094, "learning_rate": 8.48932733489793e-06, "loss": 24.2273, "step": 164360 }, { "epoch": 0.33203779942387796, "grad_norm": 298.9077453613281, "learning_rate": 8.489077315356297e-06, "loss": 18.3268, "step": 164370 }, { "epoch": 0.3320580000565618, "grad_norm": 396.4359130859375, "learning_rate": 8.488827278809324e-06, "loss": 26.7132, "step": 164380 }, { "epoch": 0.3320782006892456, "grad_norm": 447.0672302246094, "learning_rate": 8.48857722525823e-06, "loss": 27.0116, "step": 164390 }, { "epoch": 0.3320984013219294, "grad_norm": 240.1743621826172, "learning_rate": 8.488327154704232e-06, "loss": 19.5004, "step": 164400 }, { "epoch": 0.33211860195461324, "grad_norm": 230.26129150390625, "learning_rate": 8.488077067148554e-06, "loss": 9.9799, "step": 164410 }, { "epoch": 0.33213880258729706, "grad_norm": 473.1795654296875, "learning_rate": 8.487826962592409e-06, "loss": 14.1135, "step": 164420 }, { "epoch": 0.3321590032199809, "grad_norm": 590.1678466796875, "learning_rate": 8.487576841037019e-06, "loss": 17.5955, "step": 164430 }, { "epoch": 0.33217920385266464, "grad_norm": 29.640411376953125, "learning_rate": 8.487326702483602e-06, "loss": 45.1409, "step": 164440 }, { "epoch": 0.33219940448534846, "grad_norm": 303.3145446777344, "learning_rate": 8.487076546933378e-06, "loss": 17.1015, "step": 164450 }, { "epoch": 0.3322196051180323, "grad_norm": 232.22474670410156, "learning_rate": 8.486826374387568e-06, "loss": 21.0075, "step": 164460 }, { "epoch": 0.3322398057507161, "grad_norm": 592.8365478515625, "learning_rate": 8.486576184847386e-06, "loss": 29.1786, "step": 164470 }, { "epoch": 0.3322600063833999, "grad_norm": 220.8876495361328, "learning_rate": 8.486325978314054e-06, "loss": 18.9879, "step": 164480 }, { "epoch": 0.33228020701608374, "grad_norm": 401.6668395996094, "learning_rate": 8.486075754788794e-06, "loss": 16.8733, "step": 164490 }, { "epoch": 0.33230040764876756, "grad_norm": 432.0315246582031, "learning_rate": 8.485825514272824e-06, "loss": 21.7808, "step": 164500 }, { "epoch": 0.3323206082814514, "grad_norm": 188.13819885253906, "learning_rate": 8.485575256767362e-06, "loss": 28.9028, "step": 164510 }, { "epoch": 0.3323408089141352, "grad_norm": 423.246826171875, "learning_rate": 8.48532498227363e-06, "loss": 12.0402, "step": 164520 }, { "epoch": 0.332361009546819, "grad_norm": 192.30929565429688, "learning_rate": 8.485074690792845e-06, "loss": 12.9734, "step": 164530 }, { "epoch": 0.33238121017950284, "grad_norm": 2101.193115234375, "learning_rate": 8.484824382326232e-06, "loss": 29.6358, "step": 164540 }, { "epoch": 0.33240141081218666, "grad_norm": 276.6634216308594, "learning_rate": 8.484574056875004e-06, "loss": 25.1073, "step": 164550 }, { "epoch": 0.3324216114448705, "grad_norm": 207.587646484375, "learning_rate": 8.484323714440386e-06, "loss": 38.223, "step": 164560 }, { "epoch": 0.33244181207755424, "grad_norm": 0.0, "learning_rate": 8.484073355023597e-06, "loss": 17.8015, "step": 164570 }, { "epoch": 0.33246201271023806, "grad_norm": 477.0229797363281, "learning_rate": 8.483822978625855e-06, "loss": 17.1811, "step": 164580 }, { "epoch": 0.3324822133429219, "grad_norm": 221.0781707763672, "learning_rate": 8.483572585248385e-06, "loss": 11.2947, "step": 164590 }, { "epoch": 0.3325024139756057, "grad_norm": 176.70713806152344, "learning_rate": 8.483322174892404e-06, "loss": 16.0261, "step": 164600 }, { "epoch": 0.3325226146082895, "grad_norm": 160.35836791992188, "learning_rate": 8.483071747559133e-06, "loss": 15.0984, "step": 164610 }, { "epoch": 0.33254281524097334, "grad_norm": 138.48947143554688, "learning_rate": 8.482821303249793e-06, "loss": 21.2403, "step": 164620 }, { "epoch": 0.33256301587365716, "grad_norm": 191.57606506347656, "learning_rate": 8.482570841965605e-06, "loss": 18.7266, "step": 164630 }, { "epoch": 0.332583216506341, "grad_norm": 277.4239196777344, "learning_rate": 8.482320363707787e-06, "loss": 19.7712, "step": 164640 }, { "epoch": 0.3326034171390248, "grad_norm": 575.947998046875, "learning_rate": 8.482069868477565e-06, "loss": 13.5014, "step": 164650 }, { "epoch": 0.3326236177717086, "grad_norm": 471.56884765625, "learning_rate": 8.481819356276155e-06, "loss": 31.1326, "step": 164660 }, { "epoch": 0.33264381840439244, "grad_norm": 361.4928283691406, "learning_rate": 8.481568827104779e-06, "loss": 19.5751, "step": 164670 }, { "epoch": 0.33266401903707626, "grad_norm": 358.65966796875, "learning_rate": 8.481318280964661e-06, "loss": 20.9525, "step": 164680 }, { "epoch": 0.3326842196697601, "grad_norm": 382.1414489746094, "learning_rate": 8.481067717857017e-06, "loss": 17.0451, "step": 164690 }, { "epoch": 0.33270442030244385, "grad_norm": 343.1446533203125, "learning_rate": 8.480817137783073e-06, "loss": 18.0691, "step": 164700 }, { "epoch": 0.33272462093512767, "grad_norm": 393.5867614746094, "learning_rate": 8.480566540744048e-06, "loss": 16.9203, "step": 164710 }, { "epoch": 0.3327448215678115, "grad_norm": 0.0, "learning_rate": 8.480315926741165e-06, "loss": 16.0772, "step": 164720 }, { "epoch": 0.3327650222004953, "grad_norm": 446.9092712402344, "learning_rate": 8.480065295775643e-06, "loss": 26.208, "step": 164730 }, { "epoch": 0.33278522283317913, "grad_norm": 407.0735778808594, "learning_rate": 8.479814647848706e-06, "loss": 16.5414, "step": 164740 }, { "epoch": 0.33280542346586295, "grad_norm": 60.83491897583008, "learning_rate": 8.479563982961572e-06, "loss": 34.6631, "step": 164750 }, { "epoch": 0.33282562409854677, "grad_norm": 152.74429321289062, "learning_rate": 8.479313301115467e-06, "loss": 11.7936, "step": 164760 }, { "epoch": 0.3328458247312306, "grad_norm": 283.35870361328125, "learning_rate": 8.479062602311611e-06, "loss": 15.0176, "step": 164770 }, { "epoch": 0.3328660253639144, "grad_norm": 15.57100772857666, "learning_rate": 8.478811886551226e-06, "loss": 31.1581, "step": 164780 }, { "epoch": 0.33288622599659823, "grad_norm": 421.0953674316406, "learning_rate": 8.478561153835532e-06, "loss": 20.1161, "step": 164790 }, { "epoch": 0.33290642662928205, "grad_norm": 438.13812255859375, "learning_rate": 8.478310404165756e-06, "loss": 32.2982, "step": 164800 }, { "epoch": 0.33292662726196587, "grad_norm": 276.8498840332031, "learning_rate": 8.478059637543114e-06, "loss": 40.2731, "step": 164810 }, { "epoch": 0.33294682789464963, "grad_norm": 354.6539001464844, "learning_rate": 8.477808853968831e-06, "loss": 15.1793, "step": 164820 }, { "epoch": 0.33296702852733345, "grad_norm": 262.10302734375, "learning_rate": 8.477558053444133e-06, "loss": 17.3008, "step": 164830 }, { "epoch": 0.3329872291600173, "grad_norm": 269.54681396484375, "learning_rate": 8.477307235970235e-06, "loss": 22.9637, "step": 164840 }, { "epoch": 0.3330074297927011, "grad_norm": 193.9597625732422, "learning_rate": 8.477056401548364e-06, "loss": 38.8478, "step": 164850 }, { "epoch": 0.3330276304253849, "grad_norm": 448.99908447265625, "learning_rate": 8.476805550179743e-06, "loss": 14.4452, "step": 164860 }, { "epoch": 0.33304783105806873, "grad_norm": 302.98779296875, "learning_rate": 8.476554681865594e-06, "loss": 27.0873, "step": 164870 }, { "epoch": 0.33306803169075255, "grad_norm": 532.409423828125, "learning_rate": 8.476303796607138e-06, "loss": 18.0725, "step": 164880 }, { "epoch": 0.3330882323234364, "grad_norm": 330.1044006347656, "learning_rate": 8.4760528944056e-06, "loss": 19.1653, "step": 164890 }, { "epoch": 0.3331084329561202, "grad_norm": 364.41717529296875, "learning_rate": 8.4758019752622e-06, "loss": 19.6933, "step": 164900 }, { "epoch": 0.333128633588804, "grad_norm": 375.3778381347656, "learning_rate": 8.475551039178164e-06, "loss": 19.298, "step": 164910 }, { "epoch": 0.33314883422148783, "grad_norm": 453.5091552734375, "learning_rate": 8.475300086154714e-06, "loss": 40.0244, "step": 164920 }, { "epoch": 0.33316903485417165, "grad_norm": 611.7855834960938, "learning_rate": 8.475049116193071e-06, "loss": 38.7827, "step": 164930 }, { "epoch": 0.3331892354868555, "grad_norm": 187.4418182373047, "learning_rate": 8.474798129294462e-06, "loss": 28.8502, "step": 164940 }, { "epoch": 0.33320943611953924, "grad_norm": 15.367188453674316, "learning_rate": 8.474547125460108e-06, "loss": 18.81, "step": 164950 }, { "epoch": 0.33322963675222306, "grad_norm": 269.1940002441406, "learning_rate": 8.474296104691231e-06, "loss": 41.1294, "step": 164960 }, { "epoch": 0.3332498373849069, "grad_norm": 20.744922637939453, "learning_rate": 8.474045066989058e-06, "loss": 22.2355, "step": 164970 }, { "epoch": 0.3332700380175907, "grad_norm": 438.31146240234375, "learning_rate": 8.47379401235481e-06, "loss": 34.9045, "step": 164980 }, { "epoch": 0.3332902386502745, "grad_norm": 297.2908020019531, "learning_rate": 8.473542940789712e-06, "loss": 19.4086, "step": 164990 }, { "epoch": 0.33331043928295834, "grad_norm": 0.0, "learning_rate": 8.473291852294986e-06, "loss": 14.0345, "step": 165000 }, { "epoch": 0.33333063991564216, "grad_norm": 103.38040161132812, "learning_rate": 8.47304074687186e-06, "loss": 17.1335, "step": 165010 }, { "epoch": 0.333350840548326, "grad_norm": 379.67901611328125, "learning_rate": 8.472789624521552e-06, "loss": 33.9341, "step": 165020 }, { "epoch": 0.3333710411810098, "grad_norm": 131.71112060546875, "learning_rate": 8.472538485245287e-06, "loss": 35.7226, "step": 165030 }, { "epoch": 0.3333912418136936, "grad_norm": 343.09130859375, "learning_rate": 8.472287329044292e-06, "loss": 14.8467, "step": 165040 }, { "epoch": 0.33341144244637744, "grad_norm": 167.4866485595703, "learning_rate": 8.47203615591979e-06, "loss": 19.3034, "step": 165050 }, { "epoch": 0.33343164307906126, "grad_norm": 242.78285217285156, "learning_rate": 8.471784965873005e-06, "loss": 17.2631, "step": 165060 }, { "epoch": 0.3334518437117451, "grad_norm": 358.673828125, "learning_rate": 8.471533758905161e-06, "loss": 13.4315, "step": 165070 }, { "epoch": 0.33347204434442884, "grad_norm": 453.779052734375, "learning_rate": 8.471282535017482e-06, "loss": 17.7916, "step": 165080 }, { "epoch": 0.33349224497711266, "grad_norm": 141.49560546875, "learning_rate": 8.471031294211194e-06, "loss": 32.9072, "step": 165090 }, { "epoch": 0.3335124456097965, "grad_norm": 557.4320068359375, "learning_rate": 8.47078003648752e-06, "loss": 29.8332, "step": 165100 }, { "epoch": 0.3335326462424803, "grad_norm": 190.56622314453125, "learning_rate": 8.470528761847684e-06, "loss": 16.3664, "step": 165110 }, { "epoch": 0.3335528468751641, "grad_norm": 300.89068603515625, "learning_rate": 8.470277470292914e-06, "loss": 12.0188, "step": 165120 }, { "epoch": 0.33357304750784794, "grad_norm": 343.52008056640625, "learning_rate": 8.47002616182443e-06, "loss": 18.1538, "step": 165130 }, { "epoch": 0.33359324814053176, "grad_norm": 191.4951629638672, "learning_rate": 8.46977483644346e-06, "loss": 21.9909, "step": 165140 }, { "epoch": 0.3336134487732156, "grad_norm": 202.2924346923828, "learning_rate": 8.469523494151229e-06, "loss": 17.7999, "step": 165150 }, { "epoch": 0.3336336494058994, "grad_norm": 253.2954559326172, "learning_rate": 8.469272134948963e-06, "loss": 13.1704, "step": 165160 }, { "epoch": 0.3336538500385832, "grad_norm": 299.3653869628906, "learning_rate": 8.469020758837882e-06, "loss": 23.0179, "step": 165170 }, { "epoch": 0.33367405067126704, "grad_norm": 635.116455078125, "learning_rate": 8.468769365819216e-06, "loss": 36.0626, "step": 165180 }, { "epoch": 0.33369425130395086, "grad_norm": 489.9075927734375, "learning_rate": 8.46851795589419e-06, "loss": 14.4532, "step": 165190 }, { "epoch": 0.3337144519366347, "grad_norm": 0.0, "learning_rate": 8.468266529064025e-06, "loss": 26.4412, "step": 165200 }, { "epoch": 0.33373465256931845, "grad_norm": 186.87294006347656, "learning_rate": 8.468015085329952e-06, "loss": 12.7722, "step": 165210 }, { "epoch": 0.33375485320200227, "grad_norm": 176.99923706054688, "learning_rate": 8.467763624693195e-06, "loss": 13.9462, "step": 165220 }, { "epoch": 0.3337750538346861, "grad_norm": 519.7848510742188, "learning_rate": 8.467512147154977e-06, "loss": 23.1987, "step": 165230 }, { "epoch": 0.3337952544673699, "grad_norm": 108.17408752441406, "learning_rate": 8.467260652716525e-06, "loss": 12.6775, "step": 165240 }, { "epoch": 0.3338154551000537, "grad_norm": 197.01153564453125, "learning_rate": 8.467009141379065e-06, "loss": 23.7391, "step": 165250 }, { "epoch": 0.33383565573273755, "grad_norm": 152.7328338623047, "learning_rate": 8.466757613143824e-06, "loss": 24.062, "step": 165260 }, { "epoch": 0.33385585636542137, "grad_norm": 463.88458251953125, "learning_rate": 8.466506068012025e-06, "loss": 17.5344, "step": 165270 }, { "epoch": 0.3338760569981052, "grad_norm": 383.3978576660156, "learning_rate": 8.466254505984899e-06, "loss": 24.9736, "step": 165280 }, { "epoch": 0.333896257630789, "grad_norm": 226.2635498046875, "learning_rate": 8.466002927063668e-06, "loss": 20.6964, "step": 165290 }, { "epoch": 0.3339164582634728, "grad_norm": 248.37791442871094, "learning_rate": 8.465751331249558e-06, "loss": 24.0305, "step": 165300 }, { "epoch": 0.33393665889615665, "grad_norm": 745.9874267578125, "learning_rate": 8.465499718543797e-06, "loss": 38.0462, "step": 165310 }, { "epoch": 0.33395685952884047, "grad_norm": 101.66973876953125, "learning_rate": 8.46524808894761e-06, "loss": 13.2641, "step": 165320 }, { "epoch": 0.3339770601615243, "grad_norm": 201.4263916015625, "learning_rate": 8.464996442462226e-06, "loss": 14.4327, "step": 165330 }, { "epoch": 0.33399726079420805, "grad_norm": 314.6021728515625, "learning_rate": 8.464744779088868e-06, "loss": 36.3068, "step": 165340 }, { "epoch": 0.33401746142689187, "grad_norm": 226.07916259765625, "learning_rate": 8.464493098828763e-06, "loss": 29.2476, "step": 165350 }, { "epoch": 0.3340376620595757, "grad_norm": 229.82643127441406, "learning_rate": 8.464241401683142e-06, "loss": 14.2612, "step": 165360 }, { "epoch": 0.3340578626922595, "grad_norm": 240.40884399414062, "learning_rate": 8.463989687653226e-06, "loss": 16.7803, "step": 165370 }, { "epoch": 0.33407806332494333, "grad_norm": 295.5080261230469, "learning_rate": 8.463737956740246e-06, "loss": 19.2123, "step": 165380 }, { "epoch": 0.33409826395762715, "grad_norm": 54.33226013183594, "learning_rate": 8.463486208945426e-06, "loss": 18.5875, "step": 165390 }, { "epoch": 0.33411846459031097, "grad_norm": 279.91375732421875, "learning_rate": 8.463234444269994e-06, "loss": 9.1388, "step": 165400 }, { "epoch": 0.3341386652229948, "grad_norm": 283.6732482910156, "learning_rate": 8.462982662715179e-06, "loss": 18.7247, "step": 165410 }, { "epoch": 0.3341588658556786, "grad_norm": 357.1438903808594, "learning_rate": 8.462730864282206e-06, "loss": 14.9224, "step": 165420 }, { "epoch": 0.33417906648836243, "grad_norm": 172.4838104248047, "learning_rate": 8.462479048972302e-06, "loss": 26.5511, "step": 165430 }, { "epoch": 0.33419926712104625, "grad_norm": 341.36895751953125, "learning_rate": 8.462227216786696e-06, "loss": 20.471, "step": 165440 }, { "epoch": 0.33421946775373007, "grad_norm": 309.9099426269531, "learning_rate": 8.461975367726614e-06, "loss": 14.9628, "step": 165450 }, { "epoch": 0.33423966838641384, "grad_norm": 108.2904281616211, "learning_rate": 8.461723501793284e-06, "loss": 35.4108, "step": 165460 }, { "epoch": 0.33425986901909766, "grad_norm": 163.7235565185547, "learning_rate": 8.461471618987933e-06, "loss": 19.2381, "step": 165470 }, { "epoch": 0.3342800696517815, "grad_norm": 559.8934326171875, "learning_rate": 8.46121971931179e-06, "loss": 25.4875, "step": 165480 }, { "epoch": 0.3343002702844653, "grad_norm": 262.3384704589844, "learning_rate": 8.460967802766081e-06, "loss": 14.4675, "step": 165490 }, { "epoch": 0.3343204709171491, "grad_norm": 246.4487762451172, "learning_rate": 8.460715869352035e-06, "loss": 20.1974, "step": 165500 }, { "epoch": 0.33434067154983294, "grad_norm": 3.2396419048309326, "learning_rate": 8.460463919070879e-06, "loss": 20.0366, "step": 165510 }, { "epoch": 0.33436087218251676, "grad_norm": 212.33595275878906, "learning_rate": 8.460211951923842e-06, "loss": 28.9692, "step": 165520 }, { "epoch": 0.3343810728152006, "grad_norm": 345.25604248046875, "learning_rate": 8.459959967912152e-06, "loss": 23.6583, "step": 165530 }, { "epoch": 0.3344012734478844, "grad_norm": 402.528564453125, "learning_rate": 8.459707967037037e-06, "loss": 15.7511, "step": 165540 }, { "epoch": 0.3344214740805682, "grad_norm": 100.94129943847656, "learning_rate": 8.459455949299725e-06, "loss": 15.724, "step": 165550 }, { "epoch": 0.33444167471325204, "grad_norm": 444.7031555175781, "learning_rate": 8.459203914701444e-06, "loss": 18.0335, "step": 165560 }, { "epoch": 0.33446187534593586, "grad_norm": 411.656982421875, "learning_rate": 8.458951863243424e-06, "loss": 24.5155, "step": 165570 }, { "epoch": 0.3344820759786197, "grad_norm": 415.06964111328125, "learning_rate": 8.45869979492689e-06, "loss": 18.8456, "step": 165580 }, { "epoch": 0.33450227661130344, "grad_norm": 487.200439453125, "learning_rate": 8.458447709753073e-06, "loss": 28.5476, "step": 165590 }, { "epoch": 0.33452247724398726, "grad_norm": 181.44686889648438, "learning_rate": 8.458195607723201e-06, "loss": 19.0071, "step": 165600 }, { "epoch": 0.3345426778766711, "grad_norm": 228.4161376953125, "learning_rate": 8.457943488838504e-06, "loss": 19.4916, "step": 165610 }, { "epoch": 0.3345628785093549, "grad_norm": 284.47052001953125, "learning_rate": 8.45769135310021e-06, "loss": 16.367, "step": 165620 }, { "epoch": 0.3345830791420387, "grad_norm": 581.7800903320312, "learning_rate": 8.457439200509548e-06, "loss": 25.5598, "step": 165630 }, { "epoch": 0.33460327977472254, "grad_norm": 131.15550231933594, "learning_rate": 8.457187031067746e-06, "loss": 14.6573, "step": 165640 }, { "epoch": 0.33462348040740636, "grad_norm": 175.67344665527344, "learning_rate": 8.456934844776033e-06, "loss": 13.5539, "step": 165650 }, { "epoch": 0.3346436810400902, "grad_norm": 556.6846923828125, "learning_rate": 8.456682641635639e-06, "loss": 26.6437, "step": 165660 }, { "epoch": 0.334663881672774, "grad_norm": 317.033935546875, "learning_rate": 8.456430421647795e-06, "loss": 35.1442, "step": 165670 }, { "epoch": 0.3346840823054578, "grad_norm": 60.878231048583984, "learning_rate": 8.456178184813726e-06, "loss": 12.6404, "step": 165680 }, { "epoch": 0.33470428293814164, "grad_norm": 410.7195129394531, "learning_rate": 8.455925931134665e-06, "loss": 13.8875, "step": 165690 }, { "epoch": 0.33472448357082546, "grad_norm": 481.0753173828125, "learning_rate": 8.45567366061184e-06, "loss": 26.9701, "step": 165700 }, { "epoch": 0.3347446842035093, "grad_norm": 206.26333618164062, "learning_rate": 8.455421373246479e-06, "loss": 15.8732, "step": 165710 }, { "epoch": 0.33476488483619304, "grad_norm": 229.13079833984375, "learning_rate": 8.455169069039814e-06, "loss": 15.0254, "step": 165720 }, { "epoch": 0.33478508546887686, "grad_norm": 284.2247009277344, "learning_rate": 8.454916747993076e-06, "loss": 20.4221, "step": 165730 }, { "epoch": 0.3348052861015607, "grad_norm": 223.0905303955078, "learning_rate": 8.454664410107492e-06, "loss": 29.1308, "step": 165740 }, { "epoch": 0.3348254867342445, "grad_norm": 213.49505615234375, "learning_rate": 8.454412055384292e-06, "loss": 17.961, "step": 165750 }, { "epoch": 0.3348456873669283, "grad_norm": 0.0, "learning_rate": 8.454159683824707e-06, "loss": 17.8647, "step": 165760 }, { "epoch": 0.33486588799961214, "grad_norm": 223.05628967285156, "learning_rate": 8.453907295429969e-06, "loss": 16.4995, "step": 165770 }, { "epoch": 0.33488608863229596, "grad_norm": 149.83413696289062, "learning_rate": 8.453654890201301e-06, "loss": 15.701, "step": 165780 }, { "epoch": 0.3349062892649798, "grad_norm": 1.7519863843917847, "learning_rate": 8.453402468139941e-06, "loss": 10.6211, "step": 165790 }, { "epoch": 0.3349264898976636, "grad_norm": 443.1351013183594, "learning_rate": 8.453150029247115e-06, "loss": 24.0223, "step": 165800 }, { "epoch": 0.3349466905303474, "grad_norm": 394.0238037109375, "learning_rate": 8.452897573524055e-06, "loss": 18.6045, "step": 165810 }, { "epoch": 0.33496689116303124, "grad_norm": 257.3160400390625, "learning_rate": 8.452645100971991e-06, "loss": 24.7095, "step": 165820 }, { "epoch": 0.33498709179571506, "grad_norm": 322.9184875488281, "learning_rate": 8.452392611592154e-06, "loss": 15.7185, "step": 165830 }, { "epoch": 0.3350072924283989, "grad_norm": 221.2910614013672, "learning_rate": 8.452140105385774e-06, "loss": 18.2683, "step": 165840 }, { "epoch": 0.33502749306108265, "grad_norm": 159.94857788085938, "learning_rate": 8.451887582354081e-06, "loss": 23.5513, "step": 165850 }, { "epoch": 0.33504769369376647, "grad_norm": 127.22516632080078, "learning_rate": 8.451635042498307e-06, "loss": 17.6577, "step": 165860 }, { "epoch": 0.3350678943264503, "grad_norm": 141.89625549316406, "learning_rate": 8.451382485819683e-06, "loss": 18.9085, "step": 165870 }, { "epoch": 0.3350880949591341, "grad_norm": 45.665584564208984, "learning_rate": 8.451129912319439e-06, "loss": 21.5081, "step": 165880 }, { "epoch": 0.33510829559181793, "grad_norm": 387.708251953125, "learning_rate": 8.450877321998805e-06, "loss": 19.3353, "step": 165890 }, { "epoch": 0.33512849622450175, "grad_norm": 83.49002838134766, "learning_rate": 8.450624714859016e-06, "loss": 25.4605, "step": 165900 }, { "epoch": 0.33514869685718557, "grad_norm": 387.1105041503906, "learning_rate": 8.4503720909013e-06, "loss": 22.8108, "step": 165910 }, { "epoch": 0.3351688974898694, "grad_norm": 472.15875244140625, "learning_rate": 8.450119450126889e-06, "loss": 30.2303, "step": 165920 }, { "epoch": 0.3351890981225532, "grad_norm": 320.4130554199219, "learning_rate": 8.449866792537013e-06, "loss": 13.5246, "step": 165930 }, { "epoch": 0.33520929875523703, "grad_norm": 389.18701171875, "learning_rate": 8.449614118132905e-06, "loss": 28.9741, "step": 165940 }, { "epoch": 0.33522949938792085, "grad_norm": 233.06016540527344, "learning_rate": 8.449361426915797e-06, "loss": 17.6799, "step": 165950 }, { "epoch": 0.33524970002060467, "grad_norm": 389.0527038574219, "learning_rate": 8.449108718886919e-06, "loss": 16.2378, "step": 165960 }, { "epoch": 0.3352699006532885, "grad_norm": 324.9711608886719, "learning_rate": 8.448855994047502e-06, "loss": 40.4996, "step": 165970 }, { "epoch": 0.33529010128597225, "grad_norm": 420.0579833984375, "learning_rate": 8.448603252398782e-06, "loss": 27.2188, "step": 165980 }, { "epoch": 0.3353103019186561, "grad_norm": 546.1483764648438, "learning_rate": 8.448350493941986e-06, "loss": 18.1594, "step": 165990 }, { "epoch": 0.3353305025513399, "grad_norm": 269.83038330078125, "learning_rate": 8.44809771867835e-06, "loss": 25.5206, "step": 166000 }, { "epoch": 0.3353507031840237, "grad_norm": 648.2899169921875, "learning_rate": 8.447844926609103e-06, "loss": 27.8666, "step": 166010 }, { "epoch": 0.33537090381670753, "grad_norm": 250.45077514648438, "learning_rate": 8.447592117735477e-06, "loss": 15.8671, "step": 166020 }, { "epoch": 0.33539110444939135, "grad_norm": 344.5387268066406, "learning_rate": 8.447339292058706e-06, "loss": 35.5279, "step": 166030 }, { "epoch": 0.3354113050820752, "grad_norm": 205.2060089111328, "learning_rate": 8.44708644958002e-06, "loss": 18.0284, "step": 166040 }, { "epoch": 0.335431505714759, "grad_norm": 210.69725036621094, "learning_rate": 8.446833590300656e-06, "loss": 23.1892, "step": 166050 }, { "epoch": 0.3354517063474428, "grad_norm": 325.77581787109375, "learning_rate": 8.44658071422184e-06, "loss": 24.7465, "step": 166060 }, { "epoch": 0.33547190698012663, "grad_norm": 585.8490600585938, "learning_rate": 8.44632782134481e-06, "loss": 16.9564, "step": 166070 }, { "epoch": 0.33549210761281045, "grad_norm": 411.3858947753906, "learning_rate": 8.446074911670795e-06, "loss": 20.5741, "step": 166080 }, { "epoch": 0.3355123082454943, "grad_norm": 173.85438537597656, "learning_rate": 8.445821985201028e-06, "loss": 12.7721, "step": 166090 }, { "epoch": 0.33553250887817804, "grad_norm": 422.77960205078125, "learning_rate": 8.445569041936743e-06, "loss": 19.034, "step": 166100 }, { "epoch": 0.33555270951086186, "grad_norm": 401.0907287597656, "learning_rate": 8.445316081879174e-06, "loss": 26.3893, "step": 166110 }, { "epoch": 0.3355729101435457, "grad_norm": 528.4168090820312, "learning_rate": 8.44506310502955e-06, "loss": 16.8306, "step": 166120 }, { "epoch": 0.3355931107762295, "grad_norm": 329.6810302734375, "learning_rate": 8.444810111389108e-06, "loss": 39.8437, "step": 166130 }, { "epoch": 0.3356133114089133, "grad_norm": 67.90018463134766, "learning_rate": 8.44455710095908e-06, "loss": 21.5852, "step": 166140 }, { "epoch": 0.33563351204159714, "grad_norm": 493.2801513671875, "learning_rate": 8.444304073740695e-06, "loss": 14.0973, "step": 166150 }, { "epoch": 0.33565371267428096, "grad_norm": 213.0481414794922, "learning_rate": 8.444051029735192e-06, "loss": 21.0395, "step": 166160 }, { "epoch": 0.3356739133069648, "grad_norm": 257.0966491699219, "learning_rate": 8.443797968943801e-06, "loss": 31.5013, "step": 166170 }, { "epoch": 0.3356941139396486, "grad_norm": 293.1725158691406, "learning_rate": 8.443544891367758e-06, "loss": 16.234, "step": 166180 }, { "epoch": 0.3357143145723324, "grad_norm": 227.91802978515625, "learning_rate": 8.443291797008294e-06, "loss": 19.4813, "step": 166190 }, { "epoch": 0.33573451520501624, "grad_norm": 279.1755676269531, "learning_rate": 8.443038685866643e-06, "loss": 19.5734, "step": 166200 }, { "epoch": 0.33575471583770006, "grad_norm": 43.863033294677734, "learning_rate": 8.44278555794404e-06, "loss": 16.7582, "step": 166210 }, { "epoch": 0.3357749164703839, "grad_norm": 259.2994384765625, "learning_rate": 8.442532413241717e-06, "loss": 22.0986, "step": 166220 }, { "epoch": 0.33579511710306764, "grad_norm": 35.16197204589844, "learning_rate": 8.442279251760907e-06, "loss": 17.9287, "step": 166230 }, { "epoch": 0.33581531773575146, "grad_norm": 192.08837890625, "learning_rate": 8.442026073502849e-06, "loss": 17.8625, "step": 166240 }, { "epoch": 0.3358355183684353, "grad_norm": 237.1589813232422, "learning_rate": 8.44177287846877e-06, "loss": 15.0342, "step": 166250 }, { "epoch": 0.3358557190011191, "grad_norm": 257.18914794921875, "learning_rate": 8.44151966665991e-06, "loss": 25.9516, "step": 166260 }, { "epoch": 0.3358759196338029, "grad_norm": 15.625290870666504, "learning_rate": 8.4412664380775e-06, "loss": 33.2257, "step": 166270 }, { "epoch": 0.33589612026648674, "grad_norm": 506.3506164550781, "learning_rate": 8.441013192722774e-06, "loss": 19.7184, "step": 166280 }, { "epoch": 0.33591632089917056, "grad_norm": 157.50607299804688, "learning_rate": 8.440759930596967e-06, "loss": 23.795, "step": 166290 }, { "epoch": 0.3359365215318544, "grad_norm": 318.0564270019531, "learning_rate": 8.440506651701315e-06, "loss": 21.3431, "step": 166300 }, { "epoch": 0.3359567221645382, "grad_norm": 417.45452880859375, "learning_rate": 8.440253356037048e-06, "loss": 19.1423, "step": 166310 }, { "epoch": 0.335976922797222, "grad_norm": 116.83910369873047, "learning_rate": 8.440000043605406e-06, "loss": 23.5181, "step": 166320 }, { "epoch": 0.33599712342990584, "grad_norm": 310.4891662597656, "learning_rate": 8.439746714407619e-06, "loss": 19.2884, "step": 166330 }, { "epoch": 0.33601732406258966, "grad_norm": 261.62969970703125, "learning_rate": 8.439493368444924e-06, "loss": 12.0368, "step": 166340 }, { "epoch": 0.3360375246952735, "grad_norm": 282.0821838378906, "learning_rate": 8.439240005718556e-06, "loss": 19.0758, "step": 166350 }, { "epoch": 0.33605772532795725, "grad_norm": 622.9390258789062, "learning_rate": 8.43898662622975e-06, "loss": 48.7711, "step": 166360 }, { "epoch": 0.33607792596064107, "grad_norm": 288.77960205078125, "learning_rate": 8.438733229979741e-06, "loss": 17.2534, "step": 166370 }, { "epoch": 0.3360981265933249, "grad_norm": 147.1881561279297, "learning_rate": 8.438479816969762e-06, "loss": 25.3555, "step": 166380 }, { "epoch": 0.3361183272260087, "grad_norm": 302.0043029785156, "learning_rate": 8.438226387201048e-06, "loss": 14.2379, "step": 166390 }, { "epoch": 0.3361385278586925, "grad_norm": 414.52362060546875, "learning_rate": 8.437972940674838e-06, "loss": 31.3733, "step": 166400 }, { "epoch": 0.33615872849137635, "grad_norm": 288.99609375, "learning_rate": 8.437719477392363e-06, "loss": 21.0391, "step": 166410 }, { "epoch": 0.33617892912406017, "grad_norm": 156.36573791503906, "learning_rate": 8.43746599735486e-06, "loss": 15.0973, "step": 166420 }, { "epoch": 0.336199129756744, "grad_norm": 159.67572021484375, "learning_rate": 8.437212500563567e-06, "loss": 13.4629, "step": 166430 }, { "epoch": 0.3362193303894278, "grad_norm": 78.00969696044922, "learning_rate": 8.436958987019717e-06, "loss": 17.2692, "step": 166440 }, { "epoch": 0.3362395310221116, "grad_norm": 158.85488891601562, "learning_rate": 8.436705456724545e-06, "loss": 16.8983, "step": 166450 }, { "epoch": 0.33625973165479545, "grad_norm": 181.82241821289062, "learning_rate": 8.436451909679286e-06, "loss": 26.7088, "step": 166460 }, { "epoch": 0.33627993228747927, "grad_norm": 331.4200439453125, "learning_rate": 8.436198345885177e-06, "loss": 13.14, "step": 166470 }, { "epoch": 0.3363001329201631, "grad_norm": 395.46917724609375, "learning_rate": 8.435944765343457e-06, "loss": 24.0179, "step": 166480 }, { "epoch": 0.33632033355284685, "grad_norm": 339.5784912109375, "learning_rate": 8.435691168055358e-06, "loss": 23.0872, "step": 166490 }, { "epoch": 0.33634053418553067, "grad_norm": 411.1085510253906, "learning_rate": 8.435437554022116e-06, "loss": 24.4623, "step": 166500 }, { "epoch": 0.3363607348182145, "grad_norm": 162.2394561767578, "learning_rate": 8.435183923244969e-06, "loss": 37.8846, "step": 166510 }, { "epoch": 0.3363809354508983, "grad_norm": 213.24314880371094, "learning_rate": 8.43493027572515e-06, "loss": 16.6498, "step": 166520 }, { "epoch": 0.33640113608358213, "grad_norm": 25.697965621948242, "learning_rate": 8.4346766114639e-06, "loss": 17.2814, "step": 166530 }, { "epoch": 0.33642133671626595, "grad_norm": 294.2319030761719, "learning_rate": 8.434422930462452e-06, "loss": 21.8714, "step": 166540 }, { "epoch": 0.33644153734894977, "grad_norm": 312.87744140625, "learning_rate": 8.434169232722043e-06, "loss": 23.0436, "step": 166550 }, { "epoch": 0.3364617379816336, "grad_norm": 540.1511840820312, "learning_rate": 8.433915518243909e-06, "loss": 17.9349, "step": 166560 }, { "epoch": 0.3364819386143174, "grad_norm": 473.4886779785156, "learning_rate": 8.433661787029288e-06, "loss": 20.4394, "step": 166570 }, { "epoch": 0.33650213924700123, "grad_norm": 278.2188415527344, "learning_rate": 8.433408039079415e-06, "loss": 13.6782, "step": 166580 }, { "epoch": 0.33652233987968505, "grad_norm": 189.22166442871094, "learning_rate": 8.433154274395529e-06, "loss": 25.3276, "step": 166590 }, { "epoch": 0.33654254051236887, "grad_norm": 1638.364013671875, "learning_rate": 8.432900492978864e-06, "loss": 21.4181, "step": 166600 }, { "epoch": 0.3365627411450527, "grad_norm": 183.7588653564453, "learning_rate": 8.43264669483066e-06, "loss": 24.6843, "step": 166610 }, { "epoch": 0.33658294177773646, "grad_norm": 204.11143493652344, "learning_rate": 8.432392879952151e-06, "loss": 13.7718, "step": 166620 }, { "epoch": 0.3366031424104203, "grad_norm": 298.32305908203125, "learning_rate": 8.432139048344577e-06, "loss": 17.3623, "step": 166630 }, { "epoch": 0.3366233430431041, "grad_norm": 14.019364356994629, "learning_rate": 8.431885200009172e-06, "loss": 21.7789, "step": 166640 }, { "epoch": 0.3366435436757879, "grad_norm": 286.3145446777344, "learning_rate": 8.431631334947175e-06, "loss": 23.6567, "step": 166650 }, { "epoch": 0.33666374430847174, "grad_norm": 343.9097595214844, "learning_rate": 8.431377453159822e-06, "loss": 18.9925, "step": 166660 }, { "epoch": 0.33668394494115556, "grad_norm": 354.0306091308594, "learning_rate": 8.431123554648354e-06, "loss": 24.1642, "step": 166670 }, { "epoch": 0.3367041455738394, "grad_norm": 249.57069396972656, "learning_rate": 8.430869639414004e-06, "loss": 23.611, "step": 166680 }, { "epoch": 0.3367243462065232, "grad_norm": 288.5650939941406, "learning_rate": 8.430615707458012e-06, "loss": 21.5773, "step": 166690 }, { "epoch": 0.336744546839207, "grad_norm": 156.8946990966797, "learning_rate": 8.430361758781616e-06, "loss": 25.6285, "step": 166700 }, { "epoch": 0.33676474747189084, "grad_norm": 237.84674072265625, "learning_rate": 8.430107793386053e-06, "loss": 19.1563, "step": 166710 }, { "epoch": 0.33678494810457466, "grad_norm": 238.70587158203125, "learning_rate": 8.42985381127256e-06, "loss": 22.4946, "step": 166720 }, { "epoch": 0.3368051487372585, "grad_norm": 539.9031982421875, "learning_rate": 8.429599812442373e-06, "loss": 26.2322, "step": 166730 }, { "epoch": 0.33682534936994224, "grad_norm": 848.5516967773438, "learning_rate": 8.429345796896736e-06, "loss": 29.0824, "step": 166740 }, { "epoch": 0.33684555000262606, "grad_norm": 194.22613525390625, "learning_rate": 8.429091764636883e-06, "loss": 20.8943, "step": 166750 }, { "epoch": 0.3368657506353099, "grad_norm": 462.03863525390625, "learning_rate": 8.42883771566405e-06, "loss": 27.9473, "step": 166760 }, { "epoch": 0.3368859512679937, "grad_norm": 790.0964965820312, "learning_rate": 8.42858364997948e-06, "loss": 22.8154, "step": 166770 }, { "epoch": 0.3369061519006775, "grad_norm": 339.3218078613281, "learning_rate": 8.428329567584411e-06, "loss": 24.6638, "step": 166780 }, { "epoch": 0.33692635253336134, "grad_norm": 85.5932846069336, "learning_rate": 8.428075468480076e-06, "loss": 15.3829, "step": 166790 }, { "epoch": 0.33694655316604516, "grad_norm": 348.49188232421875, "learning_rate": 8.427821352667719e-06, "loss": 16.3435, "step": 166800 }, { "epoch": 0.336966753798729, "grad_norm": 989.2748413085938, "learning_rate": 8.427567220148574e-06, "loss": 27.4467, "step": 166810 }, { "epoch": 0.3369869544314128, "grad_norm": 221.39697265625, "learning_rate": 8.427313070923885e-06, "loss": 27.1926, "step": 166820 }, { "epoch": 0.3370071550640966, "grad_norm": 240.052978515625, "learning_rate": 8.427058904994888e-06, "loss": 11.6302, "step": 166830 }, { "epoch": 0.33702735569678044, "grad_norm": 175.66722106933594, "learning_rate": 8.426804722362818e-06, "loss": 17.038, "step": 166840 }, { "epoch": 0.33704755632946426, "grad_norm": 57.94038391113281, "learning_rate": 8.42655052302892e-06, "loss": 7.5643, "step": 166850 }, { "epoch": 0.3370677569621481, "grad_norm": 155.8800048828125, "learning_rate": 8.42629630699443e-06, "loss": 18.7839, "step": 166860 }, { "epoch": 0.33708795759483184, "grad_norm": 374.15240478515625, "learning_rate": 8.426042074260588e-06, "loss": 19.3022, "step": 166870 }, { "epoch": 0.33710815822751566, "grad_norm": 294.3373718261719, "learning_rate": 8.425787824828632e-06, "loss": 13.5517, "step": 166880 }, { "epoch": 0.3371283588601995, "grad_norm": 300.43402099609375, "learning_rate": 8.425533558699801e-06, "loss": 16.8367, "step": 166890 }, { "epoch": 0.3371485594928833, "grad_norm": 137.6414337158203, "learning_rate": 8.425279275875336e-06, "loss": 14.6415, "step": 166900 }, { "epoch": 0.3371687601255671, "grad_norm": 195.23008728027344, "learning_rate": 8.425024976356474e-06, "loss": 26.1408, "step": 166910 }, { "epoch": 0.33718896075825094, "grad_norm": 335.1715087890625, "learning_rate": 8.424770660144457e-06, "loss": 30.9946, "step": 166920 }, { "epoch": 0.33720916139093476, "grad_norm": 181.4379425048828, "learning_rate": 8.424516327240521e-06, "loss": 12.9568, "step": 166930 }, { "epoch": 0.3372293620236186, "grad_norm": 232.23831176757812, "learning_rate": 8.424261977645909e-06, "loss": 29.3769, "step": 166940 }, { "epoch": 0.3372495626563024, "grad_norm": 301.9388122558594, "learning_rate": 8.424007611361861e-06, "loss": 16.0306, "step": 166950 }, { "epoch": 0.3372697632889862, "grad_norm": 392.7266540527344, "learning_rate": 8.423753228389612e-06, "loss": 20.5251, "step": 166960 }, { "epoch": 0.33728996392167004, "grad_norm": 1150.1451416015625, "learning_rate": 8.423498828730408e-06, "loss": 22.4782, "step": 166970 }, { "epoch": 0.33731016455435386, "grad_norm": 303.8962707519531, "learning_rate": 8.423244412385485e-06, "loss": 19.9488, "step": 166980 }, { "epoch": 0.3373303651870377, "grad_norm": 1010.6693115234375, "learning_rate": 8.422989979356084e-06, "loss": 17.7061, "step": 166990 }, { "epoch": 0.33735056581972145, "grad_norm": 0.0, "learning_rate": 8.422735529643445e-06, "loss": 10.8276, "step": 167000 }, { "epoch": 0.33737076645240527, "grad_norm": 662.1213989257812, "learning_rate": 8.422481063248806e-06, "loss": 16.2874, "step": 167010 }, { "epoch": 0.3373909670850891, "grad_norm": 257.31707763671875, "learning_rate": 8.422226580173411e-06, "loss": 15.3582, "step": 167020 }, { "epoch": 0.3374111677177729, "grad_norm": 199.30313110351562, "learning_rate": 8.4219720804185e-06, "loss": 25.3827, "step": 167030 }, { "epoch": 0.33743136835045673, "grad_norm": 558.8154907226562, "learning_rate": 8.421717563985312e-06, "loss": 20.7109, "step": 167040 }, { "epoch": 0.33745156898314055, "grad_norm": 452.7998352050781, "learning_rate": 8.421463030875086e-06, "loss": 35.3621, "step": 167050 }, { "epoch": 0.33747176961582437, "grad_norm": 86.62930297851562, "learning_rate": 8.421208481089064e-06, "loss": 18.9954, "step": 167060 }, { "epoch": 0.3374919702485082, "grad_norm": 116.16189575195312, "learning_rate": 8.42095391462849e-06, "loss": 17.6366, "step": 167070 }, { "epoch": 0.337512170881192, "grad_norm": 259.1904602050781, "learning_rate": 8.420699331494597e-06, "loss": 26.4605, "step": 167080 }, { "epoch": 0.33753237151387583, "grad_norm": 394.9403076171875, "learning_rate": 8.420444731688632e-06, "loss": 25.2763, "step": 167090 }, { "epoch": 0.33755257214655965, "grad_norm": 475.5412902832031, "learning_rate": 8.420190115211835e-06, "loss": 23.0216, "step": 167100 }, { "epoch": 0.33757277277924347, "grad_norm": 252.7615509033203, "learning_rate": 8.419935482065447e-06, "loss": 21.8136, "step": 167110 }, { "epoch": 0.3375929734119273, "grad_norm": 569.9881591796875, "learning_rate": 8.419680832250706e-06, "loss": 26.3108, "step": 167120 }, { "epoch": 0.33761317404461105, "grad_norm": 436.2150573730469, "learning_rate": 8.419426165768856e-06, "loss": 15.7643, "step": 167130 }, { "epoch": 0.3376333746772949, "grad_norm": 669.4376220703125, "learning_rate": 8.41917148262114e-06, "loss": 24.7469, "step": 167140 }, { "epoch": 0.3376535753099787, "grad_norm": 444.4395751953125, "learning_rate": 8.418916782808795e-06, "loss": 22.2999, "step": 167150 }, { "epoch": 0.3376737759426625, "grad_norm": 445.9439392089844, "learning_rate": 8.418662066333063e-06, "loss": 34.5783, "step": 167160 }, { "epoch": 0.33769397657534633, "grad_norm": 688.2111206054688, "learning_rate": 8.418407333195189e-06, "loss": 21.1082, "step": 167170 }, { "epoch": 0.33771417720803015, "grad_norm": 303.0702819824219, "learning_rate": 8.418152583396411e-06, "loss": 18.5962, "step": 167180 }, { "epoch": 0.337734377840714, "grad_norm": 76.3598403930664, "learning_rate": 8.417897816937973e-06, "loss": 24.9821, "step": 167190 }, { "epoch": 0.3377545784733978, "grad_norm": 286.64752197265625, "learning_rate": 8.417643033821114e-06, "loss": 15.1922, "step": 167200 }, { "epoch": 0.3377747791060816, "grad_norm": 181.26275634765625, "learning_rate": 8.417388234047078e-06, "loss": 10.9649, "step": 167210 }, { "epoch": 0.33779497973876543, "grad_norm": 205.09523010253906, "learning_rate": 8.417133417617107e-06, "loss": 25.4292, "step": 167220 }, { "epoch": 0.33781518037144925, "grad_norm": 92.80377960205078, "learning_rate": 8.416878584532442e-06, "loss": 16.355, "step": 167230 }, { "epoch": 0.3378353810041331, "grad_norm": 161.00035095214844, "learning_rate": 8.416623734794324e-06, "loss": 19.2826, "step": 167240 }, { "epoch": 0.33785558163681684, "grad_norm": 203.28897094726562, "learning_rate": 8.416368868403997e-06, "loss": 21.7179, "step": 167250 }, { "epoch": 0.33787578226950066, "grad_norm": 119.3681869506836, "learning_rate": 8.416113985362702e-06, "loss": 17.5, "step": 167260 }, { "epoch": 0.3378959829021845, "grad_norm": 989.9488525390625, "learning_rate": 8.415859085671683e-06, "loss": 41.7437, "step": 167270 }, { "epoch": 0.3379161835348683, "grad_norm": 644.9483032226562, "learning_rate": 8.41560416933218e-06, "loss": 21.5907, "step": 167280 }, { "epoch": 0.3379363841675521, "grad_norm": 293.8641052246094, "learning_rate": 8.415349236345436e-06, "loss": 18.6149, "step": 167290 }, { "epoch": 0.33795658480023594, "grad_norm": 261.0167236328125, "learning_rate": 8.415094286712694e-06, "loss": 11.8284, "step": 167300 }, { "epoch": 0.33797678543291976, "grad_norm": 272.1670837402344, "learning_rate": 8.4148393204352e-06, "loss": 13.4346, "step": 167310 }, { "epoch": 0.3379969860656036, "grad_norm": 127.36851501464844, "learning_rate": 8.41458433751419e-06, "loss": 13.4446, "step": 167320 }, { "epoch": 0.3380171866982874, "grad_norm": 586.43359375, "learning_rate": 8.41432933795091e-06, "loss": 24.7959, "step": 167330 }, { "epoch": 0.3380373873309712, "grad_norm": 202.94569396972656, "learning_rate": 8.414074321746605e-06, "loss": 14.9589, "step": 167340 }, { "epoch": 0.33805758796365504, "grad_norm": 962.9777221679688, "learning_rate": 8.413819288902514e-06, "loss": 35.9771, "step": 167350 }, { "epoch": 0.33807778859633886, "grad_norm": 245.2925262451172, "learning_rate": 8.413564239419883e-06, "loss": 23.0854, "step": 167360 }, { "epoch": 0.3380979892290227, "grad_norm": 341.6145935058594, "learning_rate": 8.413309173299954e-06, "loss": 18.0544, "step": 167370 }, { "epoch": 0.33811818986170644, "grad_norm": 278.02679443359375, "learning_rate": 8.41305409054397e-06, "loss": 18.8768, "step": 167380 }, { "epoch": 0.33813839049439026, "grad_norm": 114.70399475097656, "learning_rate": 8.412798991153172e-06, "loss": 19.7442, "step": 167390 }, { "epoch": 0.3381585911270741, "grad_norm": 0.0, "learning_rate": 8.412543875128809e-06, "loss": 13.0311, "step": 167400 }, { "epoch": 0.3381787917597579, "grad_norm": 83.4466781616211, "learning_rate": 8.412288742472118e-06, "loss": 24.1941, "step": 167410 }, { "epoch": 0.3381989923924417, "grad_norm": 407.4266357421875, "learning_rate": 8.412033593184348e-06, "loss": 19.4168, "step": 167420 }, { "epoch": 0.33821919302512554, "grad_norm": 375.1177673339844, "learning_rate": 8.411778427266739e-06, "loss": 21.786, "step": 167430 }, { "epoch": 0.33823939365780936, "grad_norm": 217.47442626953125, "learning_rate": 8.411523244720536e-06, "loss": 22.472, "step": 167440 }, { "epoch": 0.3382595942904932, "grad_norm": 216.95164489746094, "learning_rate": 8.411268045546984e-06, "loss": 16.4898, "step": 167450 }, { "epoch": 0.338279794923177, "grad_norm": 40.941219329833984, "learning_rate": 8.411012829747323e-06, "loss": 12.4852, "step": 167460 }, { "epoch": 0.3382999955558608, "grad_norm": 446.7447814941406, "learning_rate": 8.4107575973228e-06, "loss": 22.3419, "step": 167470 }, { "epoch": 0.33832019618854464, "grad_norm": 183.97900390625, "learning_rate": 8.410502348274658e-06, "loss": 21.4401, "step": 167480 }, { "epoch": 0.33834039682122846, "grad_norm": 250.5062713623047, "learning_rate": 8.410247082604142e-06, "loss": 25.8679, "step": 167490 }, { "epoch": 0.3383605974539123, "grad_norm": 203.4428253173828, "learning_rate": 8.409991800312493e-06, "loss": 44.9402, "step": 167500 }, { "epoch": 0.33838079808659605, "grad_norm": 173.74423217773438, "learning_rate": 8.40973650140096e-06, "loss": 45.5835, "step": 167510 }, { "epoch": 0.33840099871927987, "grad_norm": 188.60806274414062, "learning_rate": 8.409481185870783e-06, "loss": 16.3146, "step": 167520 }, { "epoch": 0.3384211993519637, "grad_norm": 266.76837158203125, "learning_rate": 8.409225853723209e-06, "loss": 26.1344, "step": 167530 }, { "epoch": 0.3384413999846475, "grad_norm": 189.410888671875, "learning_rate": 8.40897050495948e-06, "loss": 25.3642, "step": 167540 }, { "epoch": 0.3384616006173313, "grad_norm": 2.9060609340667725, "learning_rate": 8.408715139580846e-06, "loss": 16.0166, "step": 167550 }, { "epoch": 0.33848180125001515, "grad_norm": 249.8786163330078, "learning_rate": 8.408459757588547e-06, "loss": 14.7086, "step": 167560 }, { "epoch": 0.33850200188269897, "grad_norm": 155.39523315429688, "learning_rate": 8.408204358983826e-06, "loss": 17.575, "step": 167570 }, { "epoch": 0.3385222025153828, "grad_norm": 149.0742645263672, "learning_rate": 8.407948943767933e-06, "loss": 13.5457, "step": 167580 }, { "epoch": 0.3385424031480666, "grad_norm": 509.65863037109375, "learning_rate": 8.407693511942107e-06, "loss": 16.9274, "step": 167590 }, { "epoch": 0.3385626037807504, "grad_norm": 200.90341186523438, "learning_rate": 8.4074380635076e-06, "loss": 23.5431, "step": 167600 }, { "epoch": 0.33858280441343425, "grad_norm": 198.7735595703125, "learning_rate": 8.40718259846565e-06, "loss": 25.5215, "step": 167610 }, { "epoch": 0.33860300504611807, "grad_norm": 210.95567321777344, "learning_rate": 8.406927116817507e-06, "loss": 29.475, "step": 167620 }, { "epoch": 0.3386232056788019, "grad_norm": 220.9145965576172, "learning_rate": 8.406671618564413e-06, "loss": 20.7121, "step": 167630 }, { "epoch": 0.33864340631148565, "grad_norm": 168.1911163330078, "learning_rate": 8.406416103707616e-06, "loss": 16.8258, "step": 167640 }, { "epoch": 0.33866360694416947, "grad_norm": 12.36449146270752, "learning_rate": 8.406160572248361e-06, "loss": 17.3338, "step": 167650 }, { "epoch": 0.3386838075768533, "grad_norm": 460.3017272949219, "learning_rate": 8.40590502418789e-06, "loss": 24.3144, "step": 167660 }, { "epoch": 0.3387040082095371, "grad_norm": 135.8423614501953, "learning_rate": 8.405649459527453e-06, "loss": 26.3692, "step": 167670 }, { "epoch": 0.33872420884222093, "grad_norm": 95.2094497680664, "learning_rate": 8.405393878268292e-06, "loss": 13.6755, "step": 167680 }, { "epoch": 0.33874440947490475, "grad_norm": 2.9493582248687744, "learning_rate": 8.405138280411656e-06, "loss": 18.8623, "step": 167690 }, { "epoch": 0.33876461010758857, "grad_norm": 381.0074157714844, "learning_rate": 8.404882665958788e-06, "loss": 19.7762, "step": 167700 }, { "epoch": 0.3387848107402724, "grad_norm": 332.1507263183594, "learning_rate": 8.404627034910934e-06, "loss": 17.5741, "step": 167710 }, { "epoch": 0.3388050113729562, "grad_norm": 337.66815185546875, "learning_rate": 8.404371387269341e-06, "loss": 22.7464, "step": 167720 }, { "epoch": 0.33882521200564003, "grad_norm": 205.86607360839844, "learning_rate": 8.404115723035256e-06, "loss": 19.0772, "step": 167730 }, { "epoch": 0.33884541263832385, "grad_norm": 340.8726501464844, "learning_rate": 8.403860042209923e-06, "loss": 23.3921, "step": 167740 }, { "epoch": 0.33886561327100767, "grad_norm": 351.49029541015625, "learning_rate": 8.40360434479459e-06, "loss": 36.7987, "step": 167750 }, { "epoch": 0.3388858139036915, "grad_norm": 494.986328125, "learning_rate": 8.4033486307905e-06, "loss": 23.2231, "step": 167760 }, { "epoch": 0.33890601453637526, "grad_norm": 54.52848434448242, "learning_rate": 8.403092900198904e-06, "loss": 13.3392, "step": 167770 }, { "epoch": 0.3389262151690591, "grad_norm": 748.3292846679688, "learning_rate": 8.402837153021047e-06, "loss": 18.5247, "step": 167780 }, { "epoch": 0.3389464158017429, "grad_norm": 467.7299499511719, "learning_rate": 8.402581389258171e-06, "loss": 20.0509, "step": 167790 }, { "epoch": 0.3389666164344267, "grad_norm": 361.2658386230469, "learning_rate": 8.402325608911527e-06, "loss": 21.6213, "step": 167800 }, { "epoch": 0.33898681706711054, "grad_norm": 559.6964721679688, "learning_rate": 8.40206981198236e-06, "loss": 29.9356, "step": 167810 }, { "epoch": 0.33900701769979436, "grad_norm": 340.0006103515625, "learning_rate": 8.40181399847192e-06, "loss": 35.5701, "step": 167820 }, { "epoch": 0.3390272183324782, "grad_norm": 464.8050231933594, "learning_rate": 8.40155816838145e-06, "loss": 29.1242, "step": 167830 }, { "epoch": 0.339047418965162, "grad_norm": 281.2287292480469, "learning_rate": 8.401302321712198e-06, "loss": 21.1873, "step": 167840 }, { "epoch": 0.3390676195978458, "grad_norm": 330.19134521484375, "learning_rate": 8.40104645846541e-06, "loss": 8.2288, "step": 167850 }, { "epoch": 0.33908782023052964, "grad_norm": 96.80235290527344, "learning_rate": 8.400790578642333e-06, "loss": 10.917, "step": 167860 }, { "epoch": 0.33910802086321346, "grad_norm": 475.932861328125, "learning_rate": 8.400534682244217e-06, "loss": 28.4181, "step": 167870 }, { "epoch": 0.3391282214958973, "grad_norm": 260.79803466796875, "learning_rate": 8.400278769272307e-06, "loss": 35.8639, "step": 167880 }, { "epoch": 0.33914842212858104, "grad_norm": 179.6329803466797, "learning_rate": 8.400022839727853e-06, "loss": 13.6933, "step": 167890 }, { "epoch": 0.33916862276126486, "grad_norm": 370.51409912109375, "learning_rate": 8.399766893612096e-06, "loss": 10.3107, "step": 167900 }, { "epoch": 0.3391888233939487, "grad_norm": 242.9939422607422, "learning_rate": 8.399510930926291e-06, "loss": 25.848, "step": 167910 }, { "epoch": 0.3392090240266325, "grad_norm": 444.2420959472656, "learning_rate": 8.399254951671681e-06, "loss": 21.3357, "step": 167920 }, { "epoch": 0.3392292246593163, "grad_norm": 280.6379699707031, "learning_rate": 8.398998955849513e-06, "loss": 44.8297, "step": 167930 }, { "epoch": 0.33924942529200014, "grad_norm": 269.6487121582031, "learning_rate": 8.398742943461038e-06, "loss": 24.5027, "step": 167940 }, { "epoch": 0.33926962592468396, "grad_norm": 354.87030029296875, "learning_rate": 8.398486914507501e-06, "loss": 22.0271, "step": 167950 }, { "epoch": 0.3392898265573678, "grad_norm": 439.1939392089844, "learning_rate": 8.398230868990151e-06, "loss": 24.2498, "step": 167960 }, { "epoch": 0.3393100271900516, "grad_norm": 346.8290100097656, "learning_rate": 8.397974806910237e-06, "loss": 25.3946, "step": 167970 }, { "epoch": 0.3393302278227354, "grad_norm": 382.88421630859375, "learning_rate": 8.397718728269006e-06, "loss": 16.6659, "step": 167980 }, { "epoch": 0.33935042845541924, "grad_norm": 80.05567932128906, "learning_rate": 8.397462633067705e-06, "loss": 23.841, "step": 167990 }, { "epoch": 0.33937062908810306, "grad_norm": 93.02030944824219, "learning_rate": 8.397206521307584e-06, "loss": 21.2372, "step": 168000 }, { "epoch": 0.3393908297207869, "grad_norm": 240.18272399902344, "learning_rate": 8.396950392989888e-06, "loss": 13.8669, "step": 168010 }, { "epoch": 0.33941103035347064, "grad_norm": 9.403772354125977, "learning_rate": 8.396694248115871e-06, "loss": 18.6763, "step": 168020 }, { "epoch": 0.33943123098615446, "grad_norm": 360.84429931640625, "learning_rate": 8.396438086686779e-06, "loss": 40.4603, "step": 168030 }, { "epoch": 0.3394514316188383, "grad_norm": 171.47817993164062, "learning_rate": 8.396181908703855e-06, "loss": 14.3472, "step": 168040 }, { "epoch": 0.3394716322515221, "grad_norm": 164.5063018798828, "learning_rate": 8.395925714168356e-06, "loss": 47.6634, "step": 168050 }, { "epoch": 0.3394918328842059, "grad_norm": 305.9817810058594, "learning_rate": 8.395669503081524e-06, "loss": 13.7316, "step": 168060 }, { "epoch": 0.33951203351688974, "grad_norm": 163.3946533203125, "learning_rate": 8.395413275444614e-06, "loss": 15.4748, "step": 168070 }, { "epoch": 0.33953223414957356, "grad_norm": 337.1431579589844, "learning_rate": 8.39515703125887e-06, "loss": 24.1183, "step": 168080 }, { "epoch": 0.3395524347822574, "grad_norm": 80.43614196777344, "learning_rate": 8.394900770525543e-06, "loss": 35.6688, "step": 168090 }, { "epoch": 0.3395726354149412, "grad_norm": 451.1327209472656, "learning_rate": 8.394644493245882e-06, "loss": 24.4774, "step": 168100 }, { "epoch": 0.339592836047625, "grad_norm": 88.14883422851562, "learning_rate": 8.394388199421133e-06, "loss": 10.1905, "step": 168110 }, { "epoch": 0.33961303668030884, "grad_norm": 422.1810607910156, "learning_rate": 8.39413188905255e-06, "loss": 28.3427, "step": 168120 }, { "epoch": 0.33963323731299266, "grad_norm": 3729.56591796875, "learning_rate": 8.393875562141379e-06, "loss": 15.2102, "step": 168130 }, { "epoch": 0.3396534379456765, "grad_norm": 163.22901916503906, "learning_rate": 8.39361921868887e-06, "loss": 20.1551, "step": 168140 }, { "epoch": 0.33967363857836025, "grad_norm": 220.199462890625, "learning_rate": 8.393362858696272e-06, "loss": 9.3764, "step": 168150 }, { "epoch": 0.33969383921104407, "grad_norm": 558.9656372070312, "learning_rate": 8.393106482164836e-06, "loss": 27.5657, "step": 168160 }, { "epoch": 0.3397140398437279, "grad_norm": 430.8228759765625, "learning_rate": 8.39285008909581e-06, "loss": 24.3831, "step": 168170 }, { "epoch": 0.3397342404764117, "grad_norm": 268.3948669433594, "learning_rate": 8.392593679490444e-06, "loss": 18.2102, "step": 168180 }, { "epoch": 0.33975444110909553, "grad_norm": 279.17913818359375, "learning_rate": 8.392337253349988e-06, "loss": 10.8727, "step": 168190 }, { "epoch": 0.33977464174177935, "grad_norm": 287.8023986816406, "learning_rate": 8.392080810675692e-06, "loss": 25.6407, "step": 168200 }, { "epoch": 0.33979484237446317, "grad_norm": 263.4696960449219, "learning_rate": 8.391824351468805e-06, "loss": 13.9887, "step": 168210 }, { "epoch": 0.339815043007147, "grad_norm": 295.0177307128906, "learning_rate": 8.391567875730577e-06, "loss": 19.5135, "step": 168220 }, { "epoch": 0.3398352436398308, "grad_norm": 462.9874572753906, "learning_rate": 8.39131138346226e-06, "loss": 10.4573, "step": 168230 }, { "epoch": 0.33985544427251463, "grad_norm": 229.9269256591797, "learning_rate": 8.391054874665103e-06, "loss": 21.3351, "step": 168240 }, { "epoch": 0.33987564490519845, "grad_norm": 228.70245361328125, "learning_rate": 8.390798349340354e-06, "loss": 14.2831, "step": 168250 }, { "epoch": 0.33989584553788227, "grad_norm": 432.54656982421875, "learning_rate": 8.390541807489266e-06, "loss": 23.5711, "step": 168260 }, { "epoch": 0.3399160461705661, "grad_norm": 402.1029968261719, "learning_rate": 8.390285249113088e-06, "loss": 16.6938, "step": 168270 }, { "epoch": 0.33993624680324985, "grad_norm": 124.82726287841797, "learning_rate": 8.390028674213072e-06, "loss": 17.5074, "step": 168280 }, { "epoch": 0.3399564474359337, "grad_norm": 135.45008850097656, "learning_rate": 8.389772082790466e-06, "loss": 12.5173, "step": 168290 }, { "epoch": 0.3399766480686175, "grad_norm": 263.7895202636719, "learning_rate": 8.389515474846522e-06, "loss": 23.7203, "step": 168300 }, { "epoch": 0.3399968487013013, "grad_norm": 94.67830657958984, "learning_rate": 8.389258850382491e-06, "loss": 17.2535, "step": 168310 }, { "epoch": 0.34001704933398513, "grad_norm": 299.1087341308594, "learning_rate": 8.389002209399625e-06, "loss": 17.4132, "step": 168320 }, { "epoch": 0.34003724996666895, "grad_norm": 340.1627197265625, "learning_rate": 8.388745551899172e-06, "loss": 15.9586, "step": 168330 }, { "epoch": 0.3400574505993528, "grad_norm": 332.5255126953125, "learning_rate": 8.388488877882383e-06, "loss": 20.5329, "step": 168340 }, { "epoch": 0.3400776512320366, "grad_norm": 176.05899047851562, "learning_rate": 8.388232187350513e-06, "loss": 21.0718, "step": 168350 }, { "epoch": 0.3400978518647204, "grad_norm": 98.6200942993164, "learning_rate": 8.387975480304808e-06, "loss": 20.129, "step": 168360 }, { "epoch": 0.34011805249740423, "grad_norm": 142.75546264648438, "learning_rate": 8.387718756746522e-06, "loss": 23.3539, "step": 168370 }, { "epoch": 0.34013825313008805, "grad_norm": 550.5253295898438, "learning_rate": 8.387462016676906e-06, "loss": 28.9288, "step": 168380 }, { "epoch": 0.3401584537627719, "grad_norm": 135.90292358398438, "learning_rate": 8.387205260097211e-06, "loss": 27.2627, "step": 168390 }, { "epoch": 0.3401786543954557, "grad_norm": 255.28897094726562, "learning_rate": 8.386948487008687e-06, "loss": 20.2, "step": 168400 }, { "epoch": 0.34019885502813946, "grad_norm": 224.75408935546875, "learning_rate": 8.386691697412588e-06, "loss": 21.6188, "step": 168410 }, { "epoch": 0.3402190556608233, "grad_norm": 150.703857421875, "learning_rate": 8.386434891310164e-06, "loss": 21.6727, "step": 168420 }, { "epoch": 0.3402392562935071, "grad_norm": 211.6686248779297, "learning_rate": 8.386178068702669e-06, "loss": 26.4694, "step": 168430 }, { "epoch": 0.3402594569261909, "grad_norm": 409.5935363769531, "learning_rate": 8.385921229591351e-06, "loss": 15.6839, "step": 168440 }, { "epoch": 0.34027965755887474, "grad_norm": 263.2363586425781, "learning_rate": 8.385664373977462e-06, "loss": 19.655, "step": 168450 }, { "epoch": 0.34029985819155856, "grad_norm": 522.9468994140625, "learning_rate": 8.385407501862258e-06, "loss": 23.0846, "step": 168460 }, { "epoch": 0.3403200588242424, "grad_norm": 384.7742919921875, "learning_rate": 8.385150613246989e-06, "loss": 10.4839, "step": 168470 }, { "epoch": 0.3403402594569262, "grad_norm": 392.870849609375, "learning_rate": 8.384893708132904e-06, "loss": 17.1306, "step": 168480 }, { "epoch": 0.34036046008961, "grad_norm": 187.56085205078125, "learning_rate": 8.384636786521259e-06, "loss": 18.8336, "step": 168490 }, { "epoch": 0.34038066072229384, "grad_norm": 181.89923095703125, "learning_rate": 8.384379848413304e-06, "loss": 18.4035, "step": 168500 }, { "epoch": 0.34040086135497766, "grad_norm": 247.6270294189453, "learning_rate": 8.384122893810294e-06, "loss": 14.3956, "step": 168510 }, { "epoch": 0.3404210619876615, "grad_norm": 115.60026550292969, "learning_rate": 8.383865922713478e-06, "loss": 23.9792, "step": 168520 }, { "epoch": 0.34044126262034524, "grad_norm": 457.1192626953125, "learning_rate": 8.383608935124109e-06, "loss": 20.3085, "step": 168530 }, { "epoch": 0.34046146325302906, "grad_norm": 427.1487731933594, "learning_rate": 8.383351931043441e-06, "loss": 18.4235, "step": 168540 }, { "epoch": 0.3404816638857129, "grad_norm": 17.42327308654785, "learning_rate": 8.383094910472728e-06, "loss": 15.2253, "step": 168550 }, { "epoch": 0.3405018645183967, "grad_norm": 850.0340576171875, "learning_rate": 8.38283787341322e-06, "loss": 23.2354, "step": 168560 }, { "epoch": 0.3405220651510805, "grad_norm": 203.01551818847656, "learning_rate": 8.382580819866168e-06, "loss": 12.6319, "step": 168570 }, { "epoch": 0.34054226578376434, "grad_norm": 278.4968566894531, "learning_rate": 8.38232374983283e-06, "loss": 32.7982, "step": 168580 }, { "epoch": 0.34056246641644816, "grad_norm": 413.9723815917969, "learning_rate": 8.382066663314455e-06, "loss": 42.2386, "step": 168590 }, { "epoch": 0.340582667049132, "grad_norm": 659.3339233398438, "learning_rate": 8.381809560312298e-06, "loss": 28.9671, "step": 168600 }, { "epoch": 0.3406028676818158, "grad_norm": 296.89959716796875, "learning_rate": 8.381552440827611e-06, "loss": 35.074, "step": 168610 }, { "epoch": 0.3406230683144996, "grad_norm": 122.00875854492188, "learning_rate": 8.381295304861647e-06, "loss": 19.108, "step": 168620 }, { "epoch": 0.34064326894718344, "grad_norm": 422.0478210449219, "learning_rate": 8.381038152415661e-06, "loss": 27.4, "step": 168630 }, { "epoch": 0.34066346957986726, "grad_norm": 339.75933837890625, "learning_rate": 8.380780983490903e-06, "loss": 26.2376, "step": 168640 }, { "epoch": 0.3406836702125511, "grad_norm": 281.5389404296875, "learning_rate": 8.38052379808863e-06, "loss": 28.0848, "step": 168650 }, { "epoch": 0.34070387084523485, "grad_norm": 225.03677368164062, "learning_rate": 8.380266596210095e-06, "loss": 29.7882, "step": 168660 }, { "epoch": 0.34072407147791867, "grad_norm": 377.7826843261719, "learning_rate": 8.380009377856548e-06, "loss": 22.647, "step": 168670 }, { "epoch": 0.3407442721106025, "grad_norm": 444.70904541015625, "learning_rate": 8.379752143029248e-06, "loss": 20.3587, "step": 168680 }, { "epoch": 0.3407644727432863, "grad_norm": 6.234682083129883, "learning_rate": 8.379494891729445e-06, "loss": 26.1196, "step": 168690 }, { "epoch": 0.3407846733759701, "grad_norm": 480.9463195800781, "learning_rate": 8.379237623958393e-06, "loss": 19.2158, "step": 168700 }, { "epoch": 0.34080487400865395, "grad_norm": 191.8983917236328, "learning_rate": 8.378980339717348e-06, "loss": 19.402, "step": 168710 }, { "epoch": 0.34082507464133777, "grad_norm": 436.5052490234375, "learning_rate": 8.37872303900756e-06, "loss": 22.5633, "step": 168720 }, { "epoch": 0.3408452752740216, "grad_norm": 259.7563781738281, "learning_rate": 8.378465721830289e-06, "loss": 15.2304, "step": 168730 }, { "epoch": 0.3408654759067054, "grad_norm": 150.2193145751953, "learning_rate": 8.378208388186784e-06, "loss": 49.8093, "step": 168740 }, { "epoch": 0.3408856765393892, "grad_norm": 246.40576171875, "learning_rate": 8.377951038078303e-06, "loss": 32.3586, "step": 168750 }, { "epoch": 0.34090587717207305, "grad_norm": 19.414447784423828, "learning_rate": 8.377693671506094e-06, "loss": 14.4279, "step": 168760 }, { "epoch": 0.34092607780475687, "grad_norm": 387.6641845703125, "learning_rate": 8.37743628847142e-06, "loss": 21.6378, "step": 168770 }, { "epoch": 0.3409462784374407, "grad_norm": 300.167236328125, "learning_rate": 8.37717888897553e-06, "loss": 19.0005, "step": 168780 }, { "epoch": 0.34096647907012445, "grad_norm": 257.9981994628906, "learning_rate": 8.37692147301968e-06, "loss": 16.0206, "step": 168790 }, { "epoch": 0.34098667970280827, "grad_norm": 539.7286987304688, "learning_rate": 8.376664040605122e-06, "loss": 22.5637, "step": 168800 }, { "epoch": 0.3410068803354921, "grad_norm": 163.2705841064453, "learning_rate": 8.376406591733115e-06, "loss": 21.3365, "step": 168810 }, { "epoch": 0.3410270809681759, "grad_norm": 283.9355163574219, "learning_rate": 8.37614912640491e-06, "loss": 33.6396, "step": 168820 }, { "epoch": 0.34104728160085973, "grad_norm": 270.3693542480469, "learning_rate": 8.375891644621767e-06, "loss": 10.0936, "step": 168830 }, { "epoch": 0.34106748223354355, "grad_norm": 243.17611694335938, "learning_rate": 8.375634146384937e-06, "loss": 15.9033, "step": 168840 }, { "epoch": 0.34108768286622737, "grad_norm": 12.596551895141602, "learning_rate": 8.375376631695673e-06, "loss": 17.5263, "step": 168850 }, { "epoch": 0.3411078834989112, "grad_norm": 272.57867431640625, "learning_rate": 8.375119100555234e-06, "loss": 17.4263, "step": 168860 }, { "epoch": 0.341128084131595, "grad_norm": 277.19403076171875, "learning_rate": 8.374861552964875e-06, "loss": 17.268, "step": 168870 }, { "epoch": 0.34114828476427883, "grad_norm": 221.00869750976562, "learning_rate": 8.374603988925848e-06, "loss": 16.0168, "step": 168880 }, { "epoch": 0.34116848539696265, "grad_norm": 355.1002197265625, "learning_rate": 8.37434640843941e-06, "loss": 14.0906, "step": 168890 }, { "epoch": 0.34118868602964647, "grad_norm": 152.13941955566406, "learning_rate": 8.374088811506819e-06, "loss": 22.3104, "step": 168900 }, { "epoch": 0.3412088866623303, "grad_norm": 527.1307373046875, "learning_rate": 8.373831198129327e-06, "loss": 14.2289, "step": 168910 }, { "epoch": 0.34122908729501406, "grad_norm": 311.8716735839844, "learning_rate": 8.373573568308193e-06, "loss": 24.0969, "step": 168920 }, { "epoch": 0.3412492879276979, "grad_norm": 1245.1796875, "learning_rate": 8.37331592204467e-06, "loss": 30.0823, "step": 168930 }, { "epoch": 0.3412694885603817, "grad_norm": 29.51732635498047, "learning_rate": 8.373058259340012e-06, "loss": 25.4236, "step": 168940 }, { "epoch": 0.3412896891930655, "grad_norm": 408.1950378417969, "learning_rate": 8.372800580195479e-06, "loss": 22.6719, "step": 168950 }, { "epoch": 0.34130988982574934, "grad_norm": 267.07550048828125, "learning_rate": 8.372542884612324e-06, "loss": 16.8631, "step": 168960 }, { "epoch": 0.34133009045843316, "grad_norm": 217.213134765625, "learning_rate": 8.372285172591806e-06, "loss": 21.4707, "step": 168970 }, { "epoch": 0.341350291091117, "grad_norm": 292.0655517578125, "learning_rate": 8.372027444135176e-06, "loss": 21.7918, "step": 168980 }, { "epoch": 0.3413704917238008, "grad_norm": 141.8952178955078, "learning_rate": 8.371769699243694e-06, "loss": 16.2613, "step": 168990 }, { "epoch": 0.3413906923564846, "grad_norm": 770.5528564453125, "learning_rate": 8.371511937918616e-06, "loss": 24.6704, "step": 169000 }, { "epoch": 0.34141089298916844, "grad_norm": 3.9070448875427246, "learning_rate": 8.3712541601612e-06, "loss": 9.0761, "step": 169010 }, { "epoch": 0.34143109362185226, "grad_norm": 167.31065368652344, "learning_rate": 8.370996365972698e-06, "loss": 27.1114, "step": 169020 }, { "epoch": 0.3414512942545361, "grad_norm": 117.19823455810547, "learning_rate": 8.37073855535437e-06, "loss": 15.0953, "step": 169030 }, { "epoch": 0.3414714948872199, "grad_norm": 270.1206359863281, "learning_rate": 8.370480728307469e-06, "loss": 20.7538, "step": 169040 }, { "epoch": 0.34149169551990366, "grad_norm": 185.29270935058594, "learning_rate": 8.370222884833255e-06, "loss": 14.2262, "step": 169050 }, { "epoch": 0.3415118961525875, "grad_norm": 519.6625366210938, "learning_rate": 8.369965024932983e-06, "loss": 33.7473, "step": 169060 }, { "epoch": 0.3415320967852713, "grad_norm": 25.527976989746094, "learning_rate": 8.36970714860791e-06, "loss": 20.7354, "step": 169070 }, { "epoch": 0.3415522974179551, "grad_norm": 41.70753479003906, "learning_rate": 8.369449255859294e-06, "loss": 21.8049, "step": 169080 }, { "epoch": 0.34157249805063894, "grad_norm": 405.1187438964844, "learning_rate": 8.36919134668839e-06, "loss": 10.3421, "step": 169090 }, { "epoch": 0.34159269868332276, "grad_norm": 85.36827850341797, "learning_rate": 8.368933421096454e-06, "loss": 26.1439, "step": 169100 }, { "epoch": 0.3416128993160066, "grad_norm": 428.1422424316406, "learning_rate": 8.368675479084749e-06, "loss": 34.6881, "step": 169110 }, { "epoch": 0.3416330999486904, "grad_norm": 89.0877914428711, "learning_rate": 8.368417520654526e-06, "loss": 13.6173, "step": 169120 }, { "epoch": 0.3416533005813742, "grad_norm": 37.801177978515625, "learning_rate": 8.368159545807047e-06, "loss": 20.6865, "step": 169130 }, { "epoch": 0.34167350121405804, "grad_norm": 178.8148193359375, "learning_rate": 8.367901554543563e-06, "loss": 12.8515, "step": 169140 }, { "epoch": 0.34169370184674186, "grad_norm": 0.0, "learning_rate": 8.367643546865339e-06, "loss": 15.5312, "step": 169150 }, { "epoch": 0.3417139024794257, "grad_norm": 92.47428894042969, "learning_rate": 8.367385522773625e-06, "loss": 14.2372, "step": 169160 }, { "epoch": 0.34173410311210944, "grad_norm": 403.06915283203125, "learning_rate": 8.367127482269686e-06, "loss": 20.9616, "step": 169170 }, { "epoch": 0.34175430374479326, "grad_norm": 212.30902099609375, "learning_rate": 8.366869425354774e-06, "loss": 11.7798, "step": 169180 }, { "epoch": 0.3417745043774771, "grad_norm": 295.9895324707031, "learning_rate": 8.36661135203015e-06, "loss": 7.2622, "step": 169190 }, { "epoch": 0.3417947050101609, "grad_norm": 883.7073364257812, "learning_rate": 8.366353262297069e-06, "loss": 30.2356, "step": 169200 }, { "epoch": 0.3418149056428447, "grad_norm": 328.57293701171875, "learning_rate": 8.366095156156793e-06, "loss": 21.3927, "step": 169210 }, { "epoch": 0.34183510627552854, "grad_norm": 296.1581726074219, "learning_rate": 8.365837033610576e-06, "loss": 25.4719, "step": 169220 }, { "epoch": 0.34185530690821236, "grad_norm": 421.3023376464844, "learning_rate": 8.365578894659677e-06, "loss": 14.2675, "step": 169230 }, { "epoch": 0.3418755075408962, "grad_norm": 342.0794372558594, "learning_rate": 8.365320739305355e-06, "loss": 17.7246, "step": 169240 }, { "epoch": 0.34189570817358, "grad_norm": 735.29345703125, "learning_rate": 8.365062567548868e-06, "loss": 27.3815, "step": 169250 }, { "epoch": 0.3419159088062638, "grad_norm": 98.63773345947266, "learning_rate": 8.364804379391474e-06, "loss": 33.3713, "step": 169260 }, { "epoch": 0.34193610943894764, "grad_norm": 467.251953125, "learning_rate": 8.364546174834431e-06, "loss": 36.8312, "step": 169270 }, { "epoch": 0.34195631007163146, "grad_norm": 319.4925537109375, "learning_rate": 8.364287953879e-06, "loss": 16.6527, "step": 169280 }, { "epoch": 0.3419765107043153, "grad_norm": 249.7276611328125, "learning_rate": 8.364029716526437e-06, "loss": 14.2264, "step": 169290 }, { "epoch": 0.34199671133699905, "grad_norm": 97.889404296875, "learning_rate": 8.363771462778e-06, "loss": 27.1963, "step": 169300 }, { "epoch": 0.34201691196968287, "grad_norm": 266.24871826171875, "learning_rate": 8.36351319263495e-06, "loss": 22.7834, "step": 169310 }, { "epoch": 0.3420371126023667, "grad_norm": 310.7219543457031, "learning_rate": 8.363254906098543e-06, "loss": 23.0058, "step": 169320 }, { "epoch": 0.3420573132350505, "grad_norm": 438.8309631347656, "learning_rate": 8.36299660317004e-06, "loss": 20.1636, "step": 169330 }, { "epoch": 0.34207751386773433, "grad_norm": 249.38755798339844, "learning_rate": 8.3627382838507e-06, "loss": 27.6402, "step": 169340 }, { "epoch": 0.34209771450041815, "grad_norm": 244.68150329589844, "learning_rate": 8.36247994814178e-06, "loss": 14.0505, "step": 169350 }, { "epoch": 0.34211791513310197, "grad_norm": 430.007568359375, "learning_rate": 8.362221596044542e-06, "loss": 24.5229, "step": 169360 }, { "epoch": 0.3421381157657858, "grad_norm": 208.11233520507812, "learning_rate": 8.361963227560244e-06, "loss": 21.3982, "step": 169370 }, { "epoch": 0.3421583163984696, "grad_norm": 47.68157958984375, "learning_rate": 8.361704842690144e-06, "loss": 10.9469, "step": 169380 }, { "epoch": 0.34217851703115343, "grad_norm": 498.3441467285156, "learning_rate": 8.361446441435503e-06, "loss": 15.7028, "step": 169390 }, { "epoch": 0.34219871766383725, "grad_norm": 220.7538604736328, "learning_rate": 8.361188023797581e-06, "loss": 19.8481, "step": 169400 }, { "epoch": 0.34221891829652107, "grad_norm": 506.9656066894531, "learning_rate": 8.360929589777634e-06, "loss": 27.1071, "step": 169410 }, { "epoch": 0.3422391189292049, "grad_norm": 391.9134216308594, "learning_rate": 8.360671139376925e-06, "loss": 11.5357, "step": 169420 }, { "epoch": 0.34225931956188865, "grad_norm": 267.8738098144531, "learning_rate": 8.360412672596712e-06, "loss": 21.0483, "step": 169430 }, { "epoch": 0.3422795201945725, "grad_norm": 509.51751708984375, "learning_rate": 8.360154189438257e-06, "loss": 19.0344, "step": 169440 }, { "epoch": 0.3422997208272563, "grad_norm": 104.22761535644531, "learning_rate": 8.359895689902815e-06, "loss": 19.0946, "step": 169450 }, { "epoch": 0.3423199214599401, "grad_norm": 331.7555236816406, "learning_rate": 8.35963717399165e-06, "loss": 15.3532, "step": 169460 }, { "epoch": 0.34234012209262393, "grad_norm": 599.7227172851562, "learning_rate": 8.359378641706021e-06, "loss": 24.6748, "step": 169470 }, { "epoch": 0.34236032272530775, "grad_norm": 320.41650390625, "learning_rate": 8.35912009304719e-06, "loss": 18.3289, "step": 169480 }, { "epoch": 0.3423805233579916, "grad_norm": 175.5325927734375, "learning_rate": 8.358861528016413e-06, "loss": 34.3688, "step": 169490 }, { "epoch": 0.3424007239906754, "grad_norm": 59.19881820678711, "learning_rate": 8.358602946614952e-06, "loss": 22.3889, "step": 169500 }, { "epoch": 0.3424209246233592, "grad_norm": 354.02642822265625, "learning_rate": 8.358344348844068e-06, "loss": 16.4501, "step": 169510 }, { "epoch": 0.34244112525604303, "grad_norm": 79.87654113769531, "learning_rate": 8.358085734705021e-06, "loss": 20.0661, "step": 169520 }, { "epoch": 0.34246132588872685, "grad_norm": 165.42213439941406, "learning_rate": 8.357827104199073e-06, "loss": 18.9506, "step": 169530 }, { "epoch": 0.3424815265214107, "grad_norm": 536.7589111328125, "learning_rate": 8.357568457327481e-06, "loss": 27.1201, "step": 169540 }, { "epoch": 0.3425017271540945, "grad_norm": 115.28950500488281, "learning_rate": 8.357309794091508e-06, "loss": 21.0225, "step": 169550 }, { "epoch": 0.34252192778677826, "grad_norm": 159.8693389892578, "learning_rate": 8.357051114492414e-06, "loss": 11.0231, "step": 169560 }, { "epoch": 0.3425421284194621, "grad_norm": 102.88177490234375, "learning_rate": 8.35679241853146e-06, "loss": 14.6971, "step": 169570 }, { "epoch": 0.3425623290521459, "grad_norm": 238.68865966796875, "learning_rate": 8.356533706209907e-06, "loss": 18.8612, "step": 169580 }, { "epoch": 0.3425825296848297, "grad_norm": 396.66021728515625, "learning_rate": 8.356274977529015e-06, "loss": 22.8925, "step": 169590 }, { "epoch": 0.34260273031751354, "grad_norm": 329.3504638671875, "learning_rate": 8.356016232490047e-06, "loss": 23.9745, "step": 169600 }, { "epoch": 0.34262293095019736, "grad_norm": 253.61224365234375, "learning_rate": 8.355757471094263e-06, "loss": 13.1238, "step": 169610 }, { "epoch": 0.3426431315828812, "grad_norm": 311.0011291503906, "learning_rate": 8.355498693342925e-06, "loss": 25.3043, "step": 169620 }, { "epoch": 0.342663332215565, "grad_norm": 436.347412109375, "learning_rate": 8.355239899237291e-06, "loss": 22.702, "step": 169630 }, { "epoch": 0.3426835328482488, "grad_norm": 0.0, "learning_rate": 8.354981088778626e-06, "loss": 13.5347, "step": 169640 }, { "epoch": 0.34270373348093264, "grad_norm": 595.8646850585938, "learning_rate": 8.35472226196819e-06, "loss": 27.1766, "step": 169650 }, { "epoch": 0.34272393411361646, "grad_norm": 296.7896728515625, "learning_rate": 8.354463418807245e-06, "loss": 18.5105, "step": 169660 }, { "epoch": 0.3427441347463003, "grad_norm": 286.8760070800781, "learning_rate": 8.35420455929705e-06, "loss": 14.6495, "step": 169670 }, { "epoch": 0.3427643353789841, "grad_norm": 185.09471130371094, "learning_rate": 8.35394568343887e-06, "loss": 23.1781, "step": 169680 }, { "epoch": 0.34278453601166786, "grad_norm": 243.15036010742188, "learning_rate": 8.353686791233969e-06, "loss": 16.2704, "step": 169690 }, { "epoch": 0.3428047366443517, "grad_norm": 296.3936767578125, "learning_rate": 8.353427882683601e-06, "loss": 19.4886, "step": 169700 }, { "epoch": 0.3428249372770355, "grad_norm": 333.1054382324219, "learning_rate": 8.353168957789033e-06, "loss": 24.2192, "step": 169710 }, { "epoch": 0.3428451379097193, "grad_norm": 282.8890075683594, "learning_rate": 8.352910016551527e-06, "loss": 27.9426, "step": 169720 }, { "epoch": 0.34286533854240314, "grad_norm": 261.2474365234375, "learning_rate": 8.352651058972344e-06, "loss": 25.3008, "step": 169730 }, { "epoch": 0.34288553917508696, "grad_norm": 557.745361328125, "learning_rate": 8.352392085052748e-06, "loss": 21.3805, "step": 169740 }, { "epoch": 0.3429057398077708, "grad_norm": 159.56304931640625, "learning_rate": 8.352133094793996e-06, "loss": 29.9489, "step": 169750 }, { "epoch": 0.3429259404404546, "grad_norm": 357.9878845214844, "learning_rate": 8.351874088197356e-06, "loss": 14.6987, "step": 169760 }, { "epoch": 0.3429461410731384, "grad_norm": 86.84164428710938, "learning_rate": 8.351615065264088e-06, "loss": 28.3171, "step": 169770 }, { "epoch": 0.34296634170582224, "grad_norm": 327.9209899902344, "learning_rate": 8.351356025995454e-06, "loss": 14.6716, "step": 169780 }, { "epoch": 0.34298654233850606, "grad_norm": 275.06488037109375, "learning_rate": 8.351096970392718e-06, "loss": 20.2559, "step": 169790 }, { "epoch": 0.3430067429711899, "grad_norm": 224.1498260498047, "learning_rate": 8.350837898457142e-06, "loss": 32.9631, "step": 169800 }, { "epoch": 0.34302694360387365, "grad_norm": 41.76155471801758, "learning_rate": 8.350578810189988e-06, "loss": 18.879, "step": 169810 }, { "epoch": 0.34304714423655747, "grad_norm": 353.63726806640625, "learning_rate": 8.35031970559252e-06, "loss": 13.7696, "step": 169820 }, { "epoch": 0.3430673448692413, "grad_norm": 503.97113037109375, "learning_rate": 8.350060584666e-06, "loss": 25.7976, "step": 169830 }, { "epoch": 0.3430875455019251, "grad_norm": 541.6664428710938, "learning_rate": 8.34980144741169e-06, "loss": 26.6851, "step": 169840 }, { "epoch": 0.3431077461346089, "grad_norm": 306.15130615234375, "learning_rate": 8.349542293830854e-06, "loss": 19.7743, "step": 169850 }, { "epoch": 0.34312794676729275, "grad_norm": 211.7631378173828, "learning_rate": 8.349283123924756e-06, "loss": 21.7019, "step": 169860 }, { "epoch": 0.34314814739997657, "grad_norm": 124.1648178100586, "learning_rate": 8.349023937694658e-06, "loss": 20.1651, "step": 169870 }, { "epoch": 0.3431683480326604, "grad_norm": 214.49005126953125, "learning_rate": 8.348764735141823e-06, "loss": 21.77, "step": 169880 }, { "epoch": 0.3431885486653442, "grad_norm": 24.01561737060547, "learning_rate": 8.348505516267515e-06, "loss": 16.0287, "step": 169890 }, { "epoch": 0.343208749298028, "grad_norm": 486.786376953125, "learning_rate": 8.348246281072998e-06, "loss": 34.6961, "step": 169900 }, { "epoch": 0.34322894993071185, "grad_norm": 4293.8505859375, "learning_rate": 8.347987029559534e-06, "loss": 35.4654, "step": 169910 }, { "epoch": 0.34324915056339567, "grad_norm": 443.6123352050781, "learning_rate": 8.347727761728388e-06, "loss": 16.4116, "step": 169920 }, { "epoch": 0.3432693511960795, "grad_norm": 3.0912039279937744, "learning_rate": 8.347468477580822e-06, "loss": 13.6917, "step": 169930 }, { "epoch": 0.34328955182876325, "grad_norm": 9.394501686096191, "learning_rate": 8.347209177118101e-06, "loss": 16.9474, "step": 169940 }, { "epoch": 0.34330975246144707, "grad_norm": 317.0843811035156, "learning_rate": 8.346949860341489e-06, "loss": 23.8358, "step": 169950 }, { "epoch": 0.3433299530941309, "grad_norm": 247.54989624023438, "learning_rate": 8.34669052725225e-06, "loss": 14.0914, "step": 169960 }, { "epoch": 0.3433501537268147, "grad_norm": 387.74932861328125, "learning_rate": 8.346431177851645e-06, "loss": 15.2304, "step": 169970 }, { "epoch": 0.34337035435949853, "grad_norm": 206.6967010498047, "learning_rate": 8.346171812140942e-06, "loss": 12.2167, "step": 169980 }, { "epoch": 0.34339055499218235, "grad_norm": 483.91259765625, "learning_rate": 8.345912430121403e-06, "loss": 36.2824, "step": 169990 }, { "epoch": 0.34341075562486617, "grad_norm": 33.730892181396484, "learning_rate": 8.345653031794292e-06, "loss": 15.6399, "step": 170000 }, { "epoch": 0.34343095625755, "grad_norm": 120.37821197509766, "learning_rate": 8.345393617160876e-06, "loss": 14.5866, "step": 170010 }, { "epoch": 0.3434511568902338, "grad_norm": 217.32943725585938, "learning_rate": 8.345134186222415e-06, "loss": 25.982, "step": 170020 }, { "epoch": 0.34347135752291763, "grad_norm": 193.81129455566406, "learning_rate": 8.344874738980175e-06, "loss": 19.8073, "step": 170030 }, { "epoch": 0.34349155815560145, "grad_norm": 107.99917602539062, "learning_rate": 8.344615275435423e-06, "loss": 27.1555, "step": 170040 }, { "epoch": 0.34351175878828527, "grad_norm": 629.4872436523438, "learning_rate": 8.34435579558942e-06, "loss": 26.0408, "step": 170050 }, { "epoch": 0.3435319594209691, "grad_norm": 489.61932373046875, "learning_rate": 8.344096299443434e-06, "loss": 20.0658, "step": 170060 }, { "epoch": 0.34355216005365286, "grad_norm": 140.80982971191406, "learning_rate": 8.34383678699873e-06, "loss": 29.2317, "step": 170070 }, { "epoch": 0.3435723606863367, "grad_norm": 210.8293914794922, "learning_rate": 8.343577258256567e-06, "loss": 21.4634, "step": 170080 }, { "epoch": 0.3435925613190205, "grad_norm": 394.1241760253906, "learning_rate": 8.343317713218218e-06, "loss": 18.3594, "step": 170090 }, { "epoch": 0.3436127619517043, "grad_norm": 144.25955200195312, "learning_rate": 8.343058151884942e-06, "loss": 26.9541, "step": 170100 }, { "epoch": 0.34363296258438814, "grad_norm": 756.796630859375, "learning_rate": 8.342798574258005e-06, "loss": 27.386, "step": 170110 }, { "epoch": 0.34365316321707196, "grad_norm": 233.61439514160156, "learning_rate": 8.342538980338675e-06, "loss": 24.4122, "step": 170120 }, { "epoch": 0.3436733638497558, "grad_norm": 247.0546112060547, "learning_rate": 8.342279370128215e-06, "loss": 14.1597, "step": 170130 }, { "epoch": 0.3436935644824396, "grad_norm": 654.6117553710938, "learning_rate": 8.34201974362789e-06, "loss": 29.5, "step": 170140 }, { "epoch": 0.3437137651151234, "grad_norm": 337.2203063964844, "learning_rate": 8.341760100838967e-06, "loss": 13.2311, "step": 170150 }, { "epoch": 0.34373396574780724, "grad_norm": 193.05918884277344, "learning_rate": 8.341500441762708e-06, "loss": 32.7463, "step": 170160 }, { "epoch": 0.34375416638049106, "grad_norm": 190.84873962402344, "learning_rate": 8.341240766400385e-06, "loss": 17.1037, "step": 170170 }, { "epoch": 0.3437743670131749, "grad_norm": 474.5269470214844, "learning_rate": 8.340981074753258e-06, "loss": 25.4709, "step": 170180 }, { "epoch": 0.3437945676458587, "grad_norm": 417.40484619140625, "learning_rate": 8.340721366822594e-06, "loss": 15.1923, "step": 170190 }, { "epoch": 0.34381476827854246, "grad_norm": 603.7338256835938, "learning_rate": 8.34046164260966e-06, "loss": 21.2341, "step": 170200 }, { "epoch": 0.3438349689112263, "grad_norm": 154.1627197265625, "learning_rate": 8.34020190211572e-06, "loss": 26.7847, "step": 170210 }, { "epoch": 0.3438551695439101, "grad_norm": 382.9526062011719, "learning_rate": 8.33994214534204e-06, "loss": 19.903, "step": 170220 }, { "epoch": 0.3438753701765939, "grad_norm": 369.5533142089844, "learning_rate": 8.33968237228989e-06, "loss": 17.3134, "step": 170230 }, { "epoch": 0.34389557080927774, "grad_norm": 183.25357055664062, "learning_rate": 8.339422582960533e-06, "loss": 15.1859, "step": 170240 }, { "epoch": 0.34391577144196156, "grad_norm": 0.0, "learning_rate": 8.339162777355233e-06, "loss": 13.1109, "step": 170250 }, { "epoch": 0.3439359720746454, "grad_norm": 333.4830017089844, "learning_rate": 8.338902955475261e-06, "loss": 19.2664, "step": 170260 }, { "epoch": 0.3439561727073292, "grad_norm": 141.41085815429688, "learning_rate": 8.33864311732188e-06, "loss": 20.7401, "step": 170270 }, { "epoch": 0.343976373340013, "grad_norm": 161.00270080566406, "learning_rate": 8.338383262896357e-06, "loss": 36.2157, "step": 170280 }, { "epoch": 0.34399657397269684, "grad_norm": 360.1719055175781, "learning_rate": 8.33812339219996e-06, "loss": 10.1907, "step": 170290 }, { "epoch": 0.34401677460538066, "grad_norm": 275.62322998046875, "learning_rate": 8.337863505233954e-06, "loss": 15.581, "step": 170300 }, { "epoch": 0.3440369752380645, "grad_norm": 189.0816650390625, "learning_rate": 8.337603601999605e-06, "loss": 19.1533, "step": 170310 }, { "epoch": 0.34405717587074824, "grad_norm": 353.074462890625, "learning_rate": 8.337343682498181e-06, "loss": 18.6338, "step": 170320 }, { "epoch": 0.34407737650343206, "grad_norm": 346.27716064453125, "learning_rate": 8.33708374673095e-06, "loss": 17.9278, "step": 170330 }, { "epoch": 0.3440975771361159, "grad_norm": 184.47967529296875, "learning_rate": 8.336823794699177e-06, "loss": 38.9364, "step": 170340 }, { "epoch": 0.3441177777687997, "grad_norm": 336.7402038574219, "learning_rate": 8.336563826404129e-06, "loss": 16.1312, "step": 170350 }, { "epoch": 0.3441379784014835, "grad_norm": 195.4165802001953, "learning_rate": 8.336303841847073e-06, "loss": 21.4333, "step": 170360 }, { "epoch": 0.34415817903416734, "grad_norm": 428.377197265625, "learning_rate": 8.336043841029278e-06, "loss": 20.5674, "step": 170370 }, { "epoch": 0.34417837966685116, "grad_norm": 538.0037841796875, "learning_rate": 8.33578382395201e-06, "loss": 37.3499, "step": 170380 }, { "epoch": 0.344198580299535, "grad_norm": 406.0247802734375, "learning_rate": 8.335523790616536e-06, "loss": 20.8684, "step": 170390 }, { "epoch": 0.3442187809322188, "grad_norm": 218.61279296875, "learning_rate": 8.335263741024123e-06, "loss": 18.1752, "step": 170400 }, { "epoch": 0.3442389815649026, "grad_norm": 824.3475341796875, "learning_rate": 8.33500367517604e-06, "loss": 32.7394, "step": 170410 }, { "epoch": 0.34425918219758644, "grad_norm": 298.86810302734375, "learning_rate": 8.334743593073553e-06, "loss": 33.5439, "step": 170420 }, { "epoch": 0.34427938283027026, "grad_norm": 328.7309265136719, "learning_rate": 8.33448349471793e-06, "loss": 10.3777, "step": 170430 }, { "epoch": 0.3442995834629541, "grad_norm": 461.0801086425781, "learning_rate": 8.334223380110438e-06, "loss": 28.5949, "step": 170440 }, { "epoch": 0.34431978409563785, "grad_norm": 398.4685974121094, "learning_rate": 8.333963249252347e-06, "loss": 27.9119, "step": 170450 }, { "epoch": 0.34433998472832167, "grad_norm": 209.1918487548828, "learning_rate": 8.333703102144924e-06, "loss": 16.7623, "step": 170460 }, { "epoch": 0.3443601853610055, "grad_norm": 558.6058959960938, "learning_rate": 8.333442938789435e-06, "loss": 19.3745, "step": 170470 }, { "epoch": 0.3443803859936893, "grad_norm": 900.6947631835938, "learning_rate": 8.333182759187151e-06, "loss": 25.0725, "step": 170480 }, { "epoch": 0.34440058662637313, "grad_norm": 223.29193115234375, "learning_rate": 8.332922563339336e-06, "loss": 20.1986, "step": 170490 }, { "epoch": 0.34442078725905695, "grad_norm": 731.4050903320312, "learning_rate": 8.332662351247262e-06, "loss": 22.8768, "step": 170500 }, { "epoch": 0.34444098789174077, "grad_norm": 223.88818359375, "learning_rate": 8.332402122912198e-06, "loss": 19.6513, "step": 170510 }, { "epoch": 0.3444611885244246, "grad_norm": 100.77892303466797, "learning_rate": 8.332141878335407e-06, "loss": 36.5339, "step": 170520 }, { "epoch": 0.3444813891571084, "grad_norm": 129.2215576171875, "learning_rate": 8.331881617518163e-06, "loss": 12.5593, "step": 170530 }, { "epoch": 0.34450158978979223, "grad_norm": 578.3355712890625, "learning_rate": 8.331621340461731e-06, "loss": 23.7184, "step": 170540 }, { "epoch": 0.34452179042247605, "grad_norm": 897.3865356445312, "learning_rate": 8.33136104716738e-06, "loss": 32.7095, "step": 170550 }, { "epoch": 0.34454199105515987, "grad_norm": 272.4613952636719, "learning_rate": 8.331100737636381e-06, "loss": 23.1787, "step": 170560 }, { "epoch": 0.3445621916878437, "grad_norm": 148.07223510742188, "learning_rate": 8.330840411869999e-06, "loss": 20.7231, "step": 170570 }, { "epoch": 0.34458239232052745, "grad_norm": 226.2969970703125, "learning_rate": 8.330580069869506e-06, "loss": 11.8119, "step": 170580 }, { "epoch": 0.3446025929532113, "grad_norm": 325.1975402832031, "learning_rate": 8.33031971163617e-06, "loss": 22.0969, "step": 170590 }, { "epoch": 0.3446227935858951, "grad_norm": 297.3385925292969, "learning_rate": 8.33005933717126e-06, "loss": 16.9888, "step": 170600 }, { "epoch": 0.3446429942185789, "grad_norm": 472.20001220703125, "learning_rate": 8.329798946476042e-06, "loss": 23.4437, "step": 170610 }, { "epoch": 0.34466319485126273, "grad_norm": 224.74546813964844, "learning_rate": 8.329538539551791e-06, "loss": 37.5062, "step": 170620 }, { "epoch": 0.34468339548394655, "grad_norm": 1182.8414306640625, "learning_rate": 8.32927811639977e-06, "loss": 26.8754, "step": 170630 }, { "epoch": 0.3447035961166304, "grad_norm": 479.99859619140625, "learning_rate": 8.329017677021254e-06, "loss": 10.2862, "step": 170640 }, { "epoch": 0.3447237967493142, "grad_norm": 67.94975280761719, "learning_rate": 8.328757221417507e-06, "loss": 13.4659, "step": 170650 }, { "epoch": 0.344743997381998, "grad_norm": 1382.2430419921875, "learning_rate": 8.328496749589803e-06, "loss": 46.9842, "step": 170660 }, { "epoch": 0.34476419801468183, "grad_norm": 233.00741577148438, "learning_rate": 8.328236261539411e-06, "loss": 17.4405, "step": 170670 }, { "epoch": 0.34478439864736565, "grad_norm": 202.4651336669922, "learning_rate": 8.327975757267596e-06, "loss": 17.6215, "step": 170680 }, { "epoch": 0.3448045992800495, "grad_norm": 145.4674072265625, "learning_rate": 8.327715236775634e-06, "loss": 12.1717, "step": 170690 }, { "epoch": 0.3448247999127333, "grad_norm": 134.54193115234375, "learning_rate": 8.327454700064788e-06, "loss": 12.8107, "step": 170700 }, { "epoch": 0.34484500054541706, "grad_norm": 157.5186767578125, "learning_rate": 8.327194147136332e-06, "loss": 31.7175, "step": 170710 }, { "epoch": 0.3448652011781009, "grad_norm": 1263.5888671875, "learning_rate": 8.326933577991536e-06, "loss": 29.6592, "step": 170720 }, { "epoch": 0.3448854018107847, "grad_norm": 215.4190673828125, "learning_rate": 8.326672992631671e-06, "loss": 29.3626, "step": 170730 }, { "epoch": 0.3449056024434685, "grad_norm": 56.84693145751953, "learning_rate": 8.326412391058003e-06, "loss": 14.918, "step": 170740 }, { "epoch": 0.34492580307615234, "grad_norm": 133.9408416748047, "learning_rate": 8.326151773271805e-06, "loss": 18.6103, "step": 170750 }, { "epoch": 0.34494600370883616, "grad_norm": 546.8868408203125, "learning_rate": 8.325891139274348e-06, "loss": 23.2209, "step": 170760 }, { "epoch": 0.34496620434152, "grad_norm": 706.3624267578125, "learning_rate": 8.325630489066899e-06, "loss": 29.0988, "step": 170770 }, { "epoch": 0.3449864049742038, "grad_norm": 288.14337158203125, "learning_rate": 8.325369822650731e-06, "loss": 18.4262, "step": 170780 }, { "epoch": 0.3450066056068876, "grad_norm": 172.55477905273438, "learning_rate": 8.325109140027115e-06, "loss": 21.7003, "step": 170790 }, { "epoch": 0.34502680623957144, "grad_norm": 152.78038024902344, "learning_rate": 8.324848441197317e-06, "loss": 22.9129, "step": 170800 }, { "epoch": 0.34504700687225526, "grad_norm": 55.44389724731445, "learning_rate": 8.324587726162614e-06, "loss": 16.3789, "step": 170810 }, { "epoch": 0.3450672075049391, "grad_norm": 186.7355194091797, "learning_rate": 8.324326994924272e-06, "loss": 18.3605, "step": 170820 }, { "epoch": 0.3450874081376229, "grad_norm": 538.0653076171875, "learning_rate": 8.324066247483565e-06, "loss": 15.0916, "step": 170830 }, { "epoch": 0.34510760877030666, "grad_norm": 68.13291931152344, "learning_rate": 8.323805483841762e-06, "loss": 30.8149, "step": 170840 }, { "epoch": 0.3451278094029905, "grad_norm": 228.2765655517578, "learning_rate": 8.323544704000134e-06, "loss": 19.3277, "step": 170850 }, { "epoch": 0.3451480100356743, "grad_norm": 383.4209289550781, "learning_rate": 8.323283907959952e-06, "loss": 10.5868, "step": 170860 }, { "epoch": 0.3451682106683581, "grad_norm": 200.56739807128906, "learning_rate": 8.323023095722486e-06, "loss": 13.1865, "step": 170870 }, { "epoch": 0.34518841130104194, "grad_norm": 488.5154113769531, "learning_rate": 8.32276226728901e-06, "loss": 26.1843, "step": 170880 }, { "epoch": 0.34520861193372576, "grad_norm": 364.5115661621094, "learning_rate": 8.322501422660794e-06, "loss": 20.6822, "step": 170890 }, { "epoch": 0.3452288125664096, "grad_norm": 1810.1368408203125, "learning_rate": 8.32224056183911e-06, "loss": 27.0223, "step": 170900 }, { "epoch": 0.3452490131990934, "grad_norm": 288.8748779296875, "learning_rate": 8.321979684825225e-06, "loss": 20.684, "step": 170910 }, { "epoch": 0.3452692138317772, "grad_norm": 348.7375793457031, "learning_rate": 8.321718791620417e-06, "loss": 13.6374, "step": 170920 }, { "epoch": 0.34528941446446104, "grad_norm": 323.93280029296875, "learning_rate": 8.321457882225952e-06, "loss": 8.739, "step": 170930 }, { "epoch": 0.34530961509714486, "grad_norm": 101.95902252197266, "learning_rate": 8.321196956643107e-06, "loss": 28.3534, "step": 170940 }, { "epoch": 0.3453298157298287, "grad_norm": 304.73138427734375, "learning_rate": 8.320936014873148e-06, "loss": 30.7475, "step": 170950 }, { "epoch": 0.34535001636251245, "grad_norm": 149.52565002441406, "learning_rate": 8.320675056917353e-06, "loss": 19.3072, "step": 170960 }, { "epoch": 0.34537021699519627, "grad_norm": 380.40093994140625, "learning_rate": 8.320414082776987e-06, "loss": 25.8855, "step": 170970 }, { "epoch": 0.3453904176278801, "grad_norm": 333.75457763671875, "learning_rate": 8.320153092453326e-06, "loss": 20.8207, "step": 170980 }, { "epoch": 0.3454106182605639, "grad_norm": 400.460205078125, "learning_rate": 8.319892085947643e-06, "loss": 17.9592, "step": 170990 }, { "epoch": 0.3454308188932477, "grad_norm": 294.1415100097656, "learning_rate": 8.319631063261209e-06, "loss": 25.5241, "step": 171000 }, { "epoch": 0.34545101952593155, "grad_norm": 503.7144775390625, "learning_rate": 8.319370024395294e-06, "loss": 19.167, "step": 171010 }, { "epoch": 0.34547122015861537, "grad_norm": 394.3111572265625, "learning_rate": 8.319108969351173e-06, "loss": 27.5096, "step": 171020 }, { "epoch": 0.3454914207912992, "grad_norm": 300.6436462402344, "learning_rate": 8.318847898130118e-06, "loss": 32.1391, "step": 171030 }, { "epoch": 0.345511621423983, "grad_norm": 281.4556579589844, "learning_rate": 8.318586810733401e-06, "loss": 15.5624, "step": 171040 }, { "epoch": 0.3455318220566668, "grad_norm": 235.9391632080078, "learning_rate": 8.318325707162293e-06, "loss": 10.1266, "step": 171050 }, { "epoch": 0.34555202268935065, "grad_norm": 96.39503479003906, "learning_rate": 8.318064587418068e-06, "loss": 24.4942, "step": 171060 }, { "epoch": 0.34557222332203447, "grad_norm": 144.9270477294922, "learning_rate": 8.317803451502e-06, "loss": 12.7796, "step": 171070 }, { "epoch": 0.3455924239547183, "grad_norm": 403.452880859375, "learning_rate": 8.31754229941536e-06, "loss": 28.23, "step": 171080 }, { "epoch": 0.34561262458740205, "grad_norm": 278.4861755371094, "learning_rate": 8.31728113115942e-06, "loss": 21.4252, "step": 171090 }, { "epoch": 0.34563282522008587, "grad_norm": 438.6116943359375, "learning_rate": 8.317019946735456e-06, "loss": 34.2879, "step": 171100 }, { "epoch": 0.3456530258527697, "grad_norm": 50.306766510009766, "learning_rate": 8.316758746144738e-06, "loss": 18.3035, "step": 171110 }, { "epoch": 0.3456732264854535, "grad_norm": 1020.4341430664062, "learning_rate": 8.31649752938854e-06, "loss": 31.2892, "step": 171120 }, { "epoch": 0.34569342711813733, "grad_norm": 217.30584716796875, "learning_rate": 8.316236296468135e-06, "loss": 24.1333, "step": 171130 }, { "epoch": 0.34571362775082115, "grad_norm": 337.0846252441406, "learning_rate": 8.315975047384798e-06, "loss": 16.1209, "step": 171140 }, { "epoch": 0.34573382838350497, "grad_norm": 550.1453857421875, "learning_rate": 8.315713782139801e-06, "loss": 16.9352, "step": 171150 }, { "epoch": 0.3457540290161888, "grad_norm": 332.5557556152344, "learning_rate": 8.315452500734415e-06, "loss": 23.1349, "step": 171160 }, { "epoch": 0.3457742296488726, "grad_norm": 190.5364532470703, "learning_rate": 8.315191203169917e-06, "loss": 24.9159, "step": 171170 }, { "epoch": 0.34579443028155643, "grad_norm": 385.6728820800781, "learning_rate": 8.314929889447578e-06, "loss": 18.1103, "step": 171180 }, { "epoch": 0.34581463091424025, "grad_norm": 704.8052978515625, "learning_rate": 8.314668559568674e-06, "loss": 14.2078, "step": 171190 }, { "epoch": 0.34583483154692407, "grad_norm": 430.57012939453125, "learning_rate": 8.314407213534477e-06, "loss": 33.2633, "step": 171200 }, { "epoch": 0.3458550321796079, "grad_norm": 31.710285186767578, "learning_rate": 8.31414585134626e-06, "loss": 22.9794, "step": 171210 }, { "epoch": 0.34587523281229166, "grad_norm": 379.823974609375, "learning_rate": 8.3138844730053e-06, "loss": 12.5186, "step": 171220 }, { "epoch": 0.3458954334449755, "grad_norm": 248.68043518066406, "learning_rate": 8.313623078512869e-06, "loss": 20.5562, "step": 171230 }, { "epoch": 0.3459156340776593, "grad_norm": 584.3414306640625, "learning_rate": 8.313361667870238e-06, "loss": 26.6529, "step": 171240 }, { "epoch": 0.3459358347103431, "grad_norm": 287.7368469238281, "learning_rate": 8.313100241078689e-06, "loss": 32.7192, "step": 171250 }, { "epoch": 0.34595603534302694, "grad_norm": 587.224365234375, "learning_rate": 8.312838798139488e-06, "loss": 31.9437, "step": 171260 }, { "epoch": 0.34597623597571076, "grad_norm": 289.5538024902344, "learning_rate": 8.312577339053914e-06, "loss": 16.768, "step": 171270 }, { "epoch": 0.3459964366083946, "grad_norm": 166.86204528808594, "learning_rate": 8.312315863823239e-06, "loss": 11.8873, "step": 171280 }, { "epoch": 0.3460166372410784, "grad_norm": 254.0652313232422, "learning_rate": 8.312054372448737e-06, "loss": 13.0492, "step": 171290 }, { "epoch": 0.3460368378737622, "grad_norm": 251.2845001220703, "learning_rate": 8.311792864931686e-06, "loss": 33.0301, "step": 171300 }, { "epoch": 0.34605703850644604, "grad_norm": 760.4174194335938, "learning_rate": 8.311531341273355e-06, "loss": 11.0291, "step": 171310 }, { "epoch": 0.34607723913912986, "grad_norm": 345.7659912109375, "learning_rate": 8.311269801475026e-06, "loss": 23.382, "step": 171320 }, { "epoch": 0.3460974397718137, "grad_norm": 368.26983642578125, "learning_rate": 8.311008245537967e-06, "loss": 18.7608, "step": 171330 }, { "epoch": 0.3461176404044975, "grad_norm": 170.34210205078125, "learning_rate": 8.310746673463456e-06, "loss": 16.7789, "step": 171340 }, { "epoch": 0.34613784103718126, "grad_norm": 1628.5052490234375, "learning_rate": 8.310485085252767e-06, "loss": 18.3712, "step": 171350 }, { "epoch": 0.3461580416698651, "grad_norm": 526.6033325195312, "learning_rate": 8.310223480907176e-06, "loss": 21.4235, "step": 171360 }, { "epoch": 0.3461782423025489, "grad_norm": 22.046300888061523, "learning_rate": 8.309961860427957e-06, "loss": 18.9913, "step": 171370 }, { "epoch": 0.3461984429352327, "grad_norm": 270.53314208984375, "learning_rate": 8.309700223816385e-06, "loss": 20.447, "step": 171380 }, { "epoch": 0.34621864356791654, "grad_norm": 324.48443603515625, "learning_rate": 8.309438571073734e-06, "loss": 31.985, "step": 171390 }, { "epoch": 0.34623884420060036, "grad_norm": 328.2781677246094, "learning_rate": 8.309176902201283e-06, "loss": 21.0664, "step": 171400 }, { "epoch": 0.3462590448332842, "grad_norm": 13.621750831604004, "learning_rate": 8.308915217200305e-06, "loss": 11.3892, "step": 171410 }, { "epoch": 0.346279245465968, "grad_norm": 211.65231323242188, "learning_rate": 8.308653516072074e-06, "loss": 14.412, "step": 171420 }, { "epoch": 0.3462994460986518, "grad_norm": 34.386322021484375, "learning_rate": 8.30839179881787e-06, "loss": 29.1981, "step": 171430 }, { "epoch": 0.34631964673133564, "grad_norm": 405.6966552734375, "learning_rate": 8.308130065438963e-06, "loss": 18.6333, "step": 171440 }, { "epoch": 0.34633984736401946, "grad_norm": 179.08432006835938, "learning_rate": 8.307868315936631e-06, "loss": 17.549, "step": 171450 }, { "epoch": 0.3463600479967033, "grad_norm": 298.62353515625, "learning_rate": 8.307606550312152e-06, "loss": 21.5799, "step": 171460 }, { "epoch": 0.3463802486293871, "grad_norm": 692.1006469726562, "learning_rate": 8.307344768566798e-06, "loss": 29.6304, "step": 171470 }, { "epoch": 0.34640044926207086, "grad_norm": 791.55224609375, "learning_rate": 8.30708297070185e-06, "loss": 34.4238, "step": 171480 }, { "epoch": 0.3464206498947547, "grad_norm": 225.66407775878906, "learning_rate": 8.306821156718577e-06, "loss": 15.597, "step": 171490 }, { "epoch": 0.3464408505274385, "grad_norm": 376.0147399902344, "learning_rate": 8.30655932661826e-06, "loss": 21.3888, "step": 171500 }, { "epoch": 0.3464610511601223, "grad_norm": 154.04544067382812, "learning_rate": 8.306297480402175e-06, "loss": 15.9232, "step": 171510 }, { "epoch": 0.34648125179280614, "grad_norm": 338.4514465332031, "learning_rate": 8.306035618071595e-06, "loss": 24.2222, "step": 171520 }, { "epoch": 0.34650145242548996, "grad_norm": 121.32572174072266, "learning_rate": 8.305773739627801e-06, "loss": 10.3388, "step": 171530 }, { "epoch": 0.3465216530581738, "grad_norm": 204.64381408691406, "learning_rate": 8.305511845072065e-06, "loss": 15.114, "step": 171540 }, { "epoch": 0.3465418536908576, "grad_norm": 56.73567581176758, "learning_rate": 8.305249934405663e-06, "loss": 19.4069, "step": 171550 }, { "epoch": 0.3465620543235414, "grad_norm": 133.40628051757812, "learning_rate": 8.304988007629878e-06, "loss": 15.3766, "step": 171560 }, { "epoch": 0.34658225495622524, "grad_norm": 675.3148803710938, "learning_rate": 8.30472606474598e-06, "loss": 44.3666, "step": 171570 }, { "epoch": 0.34660245558890906, "grad_norm": 647.8989868164062, "learning_rate": 8.304464105755248e-06, "loss": 34.4295, "step": 171580 }, { "epoch": 0.3466226562215929, "grad_norm": 238.79830932617188, "learning_rate": 8.304202130658959e-06, "loss": 24.1601, "step": 171590 }, { "epoch": 0.34664285685427665, "grad_norm": 529.0037231445312, "learning_rate": 8.303940139458389e-06, "loss": 16.5184, "step": 171600 }, { "epoch": 0.34666305748696047, "grad_norm": 1090.60107421875, "learning_rate": 8.303678132154817e-06, "loss": 43.0215, "step": 171610 }, { "epoch": 0.3466832581196443, "grad_norm": 48.80373764038086, "learning_rate": 8.303416108749516e-06, "loss": 13.3212, "step": 171620 }, { "epoch": 0.3467034587523281, "grad_norm": 313.8470458984375, "learning_rate": 8.303154069243769e-06, "loss": 25.8321, "step": 171630 }, { "epoch": 0.34672365938501193, "grad_norm": 269.7434387207031, "learning_rate": 8.302892013638846e-06, "loss": 18.3791, "step": 171640 }, { "epoch": 0.34674386001769575, "grad_norm": 760.076171875, "learning_rate": 8.302629941936032e-06, "loss": 17.7399, "step": 171650 }, { "epoch": 0.34676406065037957, "grad_norm": 256.5214538574219, "learning_rate": 8.302367854136598e-06, "loss": 19.016, "step": 171660 }, { "epoch": 0.3467842612830634, "grad_norm": 262.8376159667969, "learning_rate": 8.302105750241822e-06, "loss": 17.6686, "step": 171670 }, { "epoch": 0.3468044619157472, "grad_norm": 862.4227294921875, "learning_rate": 8.301843630252986e-06, "loss": 35.5272, "step": 171680 }, { "epoch": 0.34682466254843103, "grad_norm": 286.9437561035156, "learning_rate": 8.301581494171363e-06, "loss": 23.2039, "step": 171690 }, { "epoch": 0.34684486318111485, "grad_norm": 354.2810363769531, "learning_rate": 8.301319341998231e-06, "loss": 13.2275, "step": 171700 }, { "epoch": 0.34686506381379867, "grad_norm": 279.93743896484375, "learning_rate": 8.301057173734872e-06, "loss": 35.6388, "step": 171710 }, { "epoch": 0.3468852644464825, "grad_norm": 253.51821899414062, "learning_rate": 8.300794989382559e-06, "loss": 13.9146, "step": 171720 }, { "epoch": 0.34690546507916625, "grad_norm": 229.6984100341797, "learning_rate": 8.300532788942571e-06, "loss": 30.1753, "step": 171730 }, { "epoch": 0.3469256657118501, "grad_norm": 558.8506469726562, "learning_rate": 8.300270572416187e-06, "loss": 23.2753, "step": 171740 }, { "epoch": 0.3469458663445339, "grad_norm": 11.759117126464844, "learning_rate": 8.300008339804686e-06, "loss": 10.3466, "step": 171750 }, { "epoch": 0.3469660669772177, "grad_norm": 4.150234699249268, "learning_rate": 8.299746091109343e-06, "loss": 18.7439, "step": 171760 }, { "epoch": 0.34698626760990153, "grad_norm": 137.3746795654297, "learning_rate": 8.299483826331436e-06, "loss": 10.2812, "step": 171770 }, { "epoch": 0.34700646824258535, "grad_norm": 231.15675354003906, "learning_rate": 8.299221545472248e-06, "loss": 22.792, "step": 171780 }, { "epoch": 0.3470266688752692, "grad_norm": 581.825927734375, "learning_rate": 8.298959248533054e-06, "loss": 23.6777, "step": 171790 }, { "epoch": 0.347046869507953, "grad_norm": 512.1074829101562, "learning_rate": 8.298696935515132e-06, "loss": 36.8633, "step": 171800 }, { "epoch": 0.3470670701406368, "grad_norm": 146.6841278076172, "learning_rate": 8.29843460641976e-06, "loss": 34.9039, "step": 171810 }, { "epoch": 0.34708727077332063, "grad_norm": 253.1345672607422, "learning_rate": 8.29817226124822e-06, "loss": 16.4324, "step": 171820 }, { "epoch": 0.34710747140600445, "grad_norm": 183.64852905273438, "learning_rate": 8.297909900001787e-06, "loss": 16.7182, "step": 171830 }, { "epoch": 0.3471276720386883, "grad_norm": 451.6169128417969, "learning_rate": 8.297647522681741e-06, "loss": 31.6289, "step": 171840 }, { "epoch": 0.3471478726713721, "grad_norm": 416.873046875, "learning_rate": 8.297385129289361e-06, "loss": 10.0191, "step": 171850 }, { "epoch": 0.34716807330405586, "grad_norm": 28.152875900268555, "learning_rate": 8.297122719825928e-06, "loss": 21.4273, "step": 171860 }, { "epoch": 0.3471882739367397, "grad_norm": 398.7801818847656, "learning_rate": 8.296860294292716e-06, "loss": 18.7796, "step": 171870 }, { "epoch": 0.3472084745694235, "grad_norm": 533.4308471679688, "learning_rate": 8.296597852691008e-06, "loss": 25.6568, "step": 171880 }, { "epoch": 0.3472286752021073, "grad_norm": 134.2676239013672, "learning_rate": 8.296335395022083e-06, "loss": 20.53, "step": 171890 }, { "epoch": 0.34724887583479114, "grad_norm": 253.13568115234375, "learning_rate": 8.296072921287217e-06, "loss": 16.1016, "step": 171900 }, { "epoch": 0.34726907646747496, "grad_norm": 516.5763549804688, "learning_rate": 8.295810431487692e-06, "loss": 46.778, "step": 171910 }, { "epoch": 0.3472892771001588, "grad_norm": 213.3980712890625, "learning_rate": 8.295547925624786e-06, "loss": 21.5042, "step": 171920 }, { "epoch": 0.3473094777328426, "grad_norm": 312.1641845703125, "learning_rate": 8.295285403699783e-06, "loss": 20.217, "step": 171930 }, { "epoch": 0.3473296783655264, "grad_norm": 276.3150329589844, "learning_rate": 8.295022865713955e-06, "loss": 18.3805, "step": 171940 }, { "epoch": 0.34734987899821024, "grad_norm": 350.9801940917969, "learning_rate": 8.294760311668586e-06, "loss": 20.1115, "step": 171950 }, { "epoch": 0.34737007963089406, "grad_norm": 605.9822998046875, "learning_rate": 8.294497741564953e-06, "loss": 15.1653, "step": 171960 }, { "epoch": 0.3473902802635779, "grad_norm": 59.08359146118164, "learning_rate": 8.29423515540434e-06, "loss": 20.0098, "step": 171970 }, { "epoch": 0.3474104808962617, "grad_norm": 572.9135131835938, "learning_rate": 8.293972553188023e-06, "loss": 29.9143, "step": 171980 }, { "epoch": 0.34743068152894546, "grad_norm": 204.97848510742188, "learning_rate": 8.293709934917284e-06, "loss": 17.8247, "step": 171990 }, { "epoch": 0.3474508821616293, "grad_norm": 144.74002075195312, "learning_rate": 8.293447300593402e-06, "loss": 17.5793, "step": 172000 }, { "epoch": 0.3474710827943131, "grad_norm": 275.9584045410156, "learning_rate": 8.293184650217657e-06, "loss": 16.1705, "step": 172010 }, { "epoch": 0.3474912834269969, "grad_norm": 363.0588073730469, "learning_rate": 8.292921983791332e-06, "loss": 23.0349, "step": 172020 }, { "epoch": 0.34751148405968074, "grad_norm": 229.21067810058594, "learning_rate": 8.292659301315702e-06, "loss": 20.384, "step": 172030 }, { "epoch": 0.34753168469236456, "grad_norm": 285.2445373535156, "learning_rate": 8.29239660279205e-06, "loss": 11.975, "step": 172040 }, { "epoch": 0.3475518853250484, "grad_norm": 0.0, "learning_rate": 8.292133888221659e-06, "loss": 15.9117, "step": 172050 }, { "epoch": 0.3475720859577322, "grad_norm": 267.7245788574219, "learning_rate": 8.291871157605803e-06, "loss": 22.411, "step": 172060 }, { "epoch": 0.347592286590416, "grad_norm": 216.8616180419922, "learning_rate": 8.291608410945768e-06, "loss": 28.3138, "step": 172070 }, { "epoch": 0.34761248722309984, "grad_norm": 163.97698974609375, "learning_rate": 8.291345648242832e-06, "loss": 23.2232, "step": 172080 }, { "epoch": 0.34763268785578366, "grad_norm": 425.204833984375, "learning_rate": 8.291082869498277e-06, "loss": 21.3149, "step": 172090 }, { "epoch": 0.3476528884884675, "grad_norm": 653.385498046875, "learning_rate": 8.290820074713383e-06, "loss": 15.9267, "step": 172100 }, { "epoch": 0.3476730891211513, "grad_norm": 63.07657241821289, "learning_rate": 8.290557263889432e-06, "loss": 4.1051, "step": 172110 }, { "epoch": 0.34769328975383507, "grad_norm": 124.6961669921875, "learning_rate": 8.290294437027704e-06, "loss": 20.0723, "step": 172120 }, { "epoch": 0.3477134903865189, "grad_norm": 257.12127685546875, "learning_rate": 8.29003159412948e-06, "loss": 11.6086, "step": 172130 }, { "epoch": 0.3477336910192027, "grad_norm": 146.11514282226562, "learning_rate": 8.28976873519604e-06, "loss": 9.9045, "step": 172140 }, { "epoch": 0.3477538916518865, "grad_norm": 302.8657531738281, "learning_rate": 8.289505860228666e-06, "loss": 43.5293, "step": 172150 }, { "epoch": 0.34777409228457035, "grad_norm": 633.71875, "learning_rate": 8.28924296922864e-06, "loss": 18.7803, "step": 172160 }, { "epoch": 0.34779429291725417, "grad_norm": 593.2508544921875, "learning_rate": 8.288980062197243e-06, "loss": 24.7174, "step": 172170 }, { "epoch": 0.347814493549938, "grad_norm": 114.97169494628906, "learning_rate": 8.288717139135755e-06, "loss": 7.4869, "step": 172180 }, { "epoch": 0.3478346941826218, "grad_norm": 295.49859619140625, "learning_rate": 8.28845420004546e-06, "loss": 21.2783, "step": 172190 }, { "epoch": 0.3478548948153056, "grad_norm": 355.2464294433594, "learning_rate": 8.288191244927637e-06, "loss": 13.5316, "step": 172200 }, { "epoch": 0.34787509544798945, "grad_norm": 433.9135437011719, "learning_rate": 8.28792827378357e-06, "loss": 13.2927, "step": 172210 }, { "epoch": 0.34789529608067327, "grad_norm": 917.7648315429688, "learning_rate": 8.287665286614538e-06, "loss": 20.9219, "step": 172220 }, { "epoch": 0.3479154967133571, "grad_norm": 642.6190795898438, "learning_rate": 8.287402283421825e-06, "loss": 19.8222, "step": 172230 }, { "epoch": 0.34793569734604085, "grad_norm": 166.6343994140625, "learning_rate": 8.287139264206712e-06, "loss": 29.2182, "step": 172240 }, { "epoch": 0.34795589797872467, "grad_norm": 176.2686004638672, "learning_rate": 8.28687622897048e-06, "loss": 20.5901, "step": 172250 }, { "epoch": 0.3479760986114085, "grad_norm": 333.2506408691406, "learning_rate": 8.286613177714412e-06, "loss": 22.2212, "step": 172260 }, { "epoch": 0.3479962992440923, "grad_norm": 19.843992233276367, "learning_rate": 8.28635011043979e-06, "loss": 15.6599, "step": 172270 }, { "epoch": 0.34801649987677613, "grad_norm": 180.3463134765625, "learning_rate": 8.286087027147899e-06, "loss": 22.8229, "step": 172280 }, { "epoch": 0.34803670050945995, "grad_norm": 388.6662292480469, "learning_rate": 8.285823927840015e-06, "loss": 14.9701, "step": 172290 }, { "epoch": 0.34805690114214377, "grad_norm": 107.67650604248047, "learning_rate": 8.285560812517423e-06, "loss": 39.8903, "step": 172300 }, { "epoch": 0.3480771017748276, "grad_norm": 385.4369812011719, "learning_rate": 8.285297681181408e-06, "loss": 18.5939, "step": 172310 }, { "epoch": 0.3480973024075114, "grad_norm": 282.22515869140625, "learning_rate": 8.285034533833251e-06, "loss": 26.8097, "step": 172320 }, { "epoch": 0.34811750304019523, "grad_norm": 346.34185791015625, "learning_rate": 8.284771370474233e-06, "loss": 25.159, "step": 172330 }, { "epoch": 0.34813770367287905, "grad_norm": 7.281435012817383, "learning_rate": 8.284508191105638e-06, "loss": 19.1846, "step": 172340 }, { "epoch": 0.34815790430556287, "grad_norm": 254.45118713378906, "learning_rate": 8.284244995728749e-06, "loss": 23.8161, "step": 172350 }, { "epoch": 0.3481781049382467, "grad_norm": 481.2984313964844, "learning_rate": 8.283981784344847e-06, "loss": 26.3877, "step": 172360 }, { "epoch": 0.34819830557093046, "grad_norm": 247.38787841796875, "learning_rate": 8.283718556955216e-06, "loss": 20.0692, "step": 172370 }, { "epoch": 0.3482185062036143, "grad_norm": 244.7825469970703, "learning_rate": 8.283455313561141e-06, "loss": 36.8592, "step": 172380 }, { "epoch": 0.3482387068362981, "grad_norm": 564.6099243164062, "learning_rate": 8.283192054163902e-06, "loss": 20.645, "step": 172390 }, { "epoch": 0.3482589074689819, "grad_norm": 219.22683715820312, "learning_rate": 8.282928778764783e-06, "loss": 18.6215, "step": 172400 }, { "epoch": 0.34827910810166574, "grad_norm": 580.5264282226562, "learning_rate": 8.282665487365067e-06, "loss": 10.4525, "step": 172410 }, { "epoch": 0.34829930873434956, "grad_norm": 332.92877197265625, "learning_rate": 8.282402179966039e-06, "loss": 25.4324, "step": 172420 }, { "epoch": 0.3483195093670334, "grad_norm": 119.51548767089844, "learning_rate": 8.282138856568978e-06, "loss": 24.5579, "step": 172430 }, { "epoch": 0.3483397099997172, "grad_norm": 179.80712890625, "learning_rate": 8.281875517175173e-06, "loss": 14.182, "step": 172440 }, { "epoch": 0.348359910632401, "grad_norm": 0.0, "learning_rate": 8.281612161785903e-06, "loss": 14.5443, "step": 172450 }, { "epoch": 0.34838011126508484, "grad_norm": 786.3966674804688, "learning_rate": 8.281348790402455e-06, "loss": 34.3865, "step": 172460 }, { "epoch": 0.34840031189776866, "grad_norm": 819.1031494140625, "learning_rate": 8.28108540302611e-06, "loss": 27.3132, "step": 172470 }, { "epoch": 0.3484205125304525, "grad_norm": 1402.1392822265625, "learning_rate": 8.280821999658153e-06, "loss": 46.019, "step": 172480 }, { "epoch": 0.3484407131631363, "grad_norm": 98.4646224975586, "learning_rate": 8.280558580299868e-06, "loss": 25.2802, "step": 172490 }, { "epoch": 0.34846091379582006, "grad_norm": 343.40557861328125, "learning_rate": 8.280295144952537e-06, "loss": 28.518, "step": 172500 }, { "epoch": 0.3484811144285039, "grad_norm": 263.0292053222656, "learning_rate": 8.280031693617446e-06, "loss": 28.9754, "step": 172510 }, { "epoch": 0.3485013150611877, "grad_norm": 323.91119384765625, "learning_rate": 8.27976822629588e-06, "loss": 11.0985, "step": 172520 }, { "epoch": 0.3485215156938715, "grad_norm": 132.8201446533203, "learning_rate": 8.279504742989117e-06, "loss": 25.2072, "step": 172530 }, { "epoch": 0.34854171632655534, "grad_norm": 44.449310302734375, "learning_rate": 8.27924124369845e-06, "loss": 14.8191, "step": 172540 }, { "epoch": 0.34856191695923916, "grad_norm": 400.93115234375, "learning_rate": 8.278977728425157e-06, "loss": 28.6694, "step": 172550 }, { "epoch": 0.348582117591923, "grad_norm": 451.40460205078125, "learning_rate": 8.278714197170526e-06, "loss": 14.3261, "step": 172560 }, { "epoch": 0.3486023182246068, "grad_norm": 228.31069946289062, "learning_rate": 8.278450649935838e-06, "loss": 32.3324, "step": 172570 }, { "epoch": 0.3486225188572906, "grad_norm": 164.6622314453125, "learning_rate": 8.278187086722378e-06, "loss": 17.9522, "step": 172580 }, { "epoch": 0.34864271948997444, "grad_norm": 254.9337615966797, "learning_rate": 8.277923507531434e-06, "loss": 18.5715, "step": 172590 }, { "epoch": 0.34866292012265826, "grad_norm": 323.5970458984375, "learning_rate": 8.277659912364288e-06, "loss": 19.9423, "step": 172600 }, { "epoch": 0.3486831207553421, "grad_norm": 431.49163818359375, "learning_rate": 8.277396301222223e-06, "loss": 27.3396, "step": 172610 }, { "epoch": 0.3487033213880259, "grad_norm": 230.6212158203125, "learning_rate": 8.277132674106528e-06, "loss": 16.0703, "step": 172620 }, { "epoch": 0.34872352202070966, "grad_norm": 183.30612182617188, "learning_rate": 8.276869031018486e-06, "loss": 26.3593, "step": 172630 }, { "epoch": 0.3487437226533935, "grad_norm": 280.1132507324219, "learning_rate": 8.27660537195938e-06, "loss": 23.927, "step": 172640 }, { "epoch": 0.3487639232860773, "grad_norm": 368.6590576171875, "learning_rate": 8.276341696930499e-06, "loss": 14.7973, "step": 172650 }, { "epoch": 0.3487841239187611, "grad_norm": 203.162109375, "learning_rate": 8.276078005933125e-06, "loss": 22.8872, "step": 172660 }, { "epoch": 0.34880432455144494, "grad_norm": 361.19769287109375, "learning_rate": 8.275814298968544e-06, "loss": 13.9683, "step": 172670 }, { "epoch": 0.34882452518412876, "grad_norm": 243.895263671875, "learning_rate": 8.275550576038043e-06, "loss": 18.4989, "step": 172680 }, { "epoch": 0.3488447258168126, "grad_norm": 735.0826416015625, "learning_rate": 8.275286837142903e-06, "loss": 21.4545, "step": 172690 }, { "epoch": 0.3488649264494964, "grad_norm": 218.07638549804688, "learning_rate": 8.275023082284413e-06, "loss": 11.6073, "step": 172700 }, { "epoch": 0.3488851270821802, "grad_norm": 505.3890075683594, "learning_rate": 8.27475931146386e-06, "loss": 13.7006, "step": 172710 }, { "epoch": 0.34890532771486404, "grad_norm": 990.354736328125, "learning_rate": 8.274495524682524e-06, "loss": 27.0042, "step": 172720 }, { "epoch": 0.34892552834754786, "grad_norm": 286.0338134765625, "learning_rate": 8.274231721941696e-06, "loss": 25.4112, "step": 172730 }, { "epoch": 0.3489457289802317, "grad_norm": 241.5786590576172, "learning_rate": 8.273967903242659e-06, "loss": 11.1763, "step": 172740 }, { "epoch": 0.3489659296129155, "grad_norm": 457.9576721191406, "learning_rate": 8.273704068586698e-06, "loss": 21.6906, "step": 172750 }, { "epoch": 0.34898613024559927, "grad_norm": 379.2498474121094, "learning_rate": 8.273440217975103e-06, "loss": 18.4735, "step": 172760 }, { "epoch": 0.3490063308782831, "grad_norm": 247.22305297851562, "learning_rate": 8.273176351409157e-06, "loss": 25.7021, "step": 172770 }, { "epoch": 0.3490265315109669, "grad_norm": 112.18523406982422, "learning_rate": 8.272912468890146e-06, "loss": 17.5528, "step": 172780 }, { "epoch": 0.34904673214365073, "grad_norm": 398.4867248535156, "learning_rate": 8.272648570419357e-06, "loss": 28.2656, "step": 172790 }, { "epoch": 0.34906693277633455, "grad_norm": 177.0189666748047, "learning_rate": 8.272384655998075e-06, "loss": 19.7483, "step": 172800 }, { "epoch": 0.34908713340901837, "grad_norm": 352.15618896484375, "learning_rate": 8.272120725627588e-06, "loss": 27.4482, "step": 172810 }, { "epoch": 0.3491073340417022, "grad_norm": 625.2156372070312, "learning_rate": 8.27185677930918e-06, "loss": 25.3204, "step": 172820 }, { "epoch": 0.349127534674386, "grad_norm": 254.6190185546875, "learning_rate": 8.27159281704414e-06, "loss": 15.4011, "step": 172830 }, { "epoch": 0.34914773530706983, "grad_norm": 530.0823364257812, "learning_rate": 8.271328838833753e-06, "loss": 17.8979, "step": 172840 }, { "epoch": 0.34916793593975365, "grad_norm": 503.38897705078125, "learning_rate": 8.271064844679306e-06, "loss": 27.0733, "step": 172850 }, { "epoch": 0.34918813657243747, "grad_norm": 105.29623413085938, "learning_rate": 8.270800834582087e-06, "loss": 22.6751, "step": 172860 }, { "epoch": 0.3492083372051213, "grad_norm": 471.2449645996094, "learning_rate": 8.270536808543379e-06, "loss": 21.9737, "step": 172870 }, { "epoch": 0.34922853783780505, "grad_norm": 330.411376953125, "learning_rate": 8.270272766564473e-06, "loss": 28.9658, "step": 172880 }, { "epoch": 0.3492487384704889, "grad_norm": 774.26318359375, "learning_rate": 8.270008708646653e-06, "loss": 26.6438, "step": 172890 }, { "epoch": 0.3492689391031727, "grad_norm": 208.5485382080078, "learning_rate": 8.269744634791207e-06, "loss": 21.6636, "step": 172900 }, { "epoch": 0.3492891397358565, "grad_norm": 193.9556121826172, "learning_rate": 8.269480544999424e-06, "loss": 8.7336, "step": 172910 }, { "epoch": 0.34930934036854033, "grad_norm": 305.507080078125, "learning_rate": 8.26921643927259e-06, "loss": 26.8887, "step": 172920 }, { "epoch": 0.34932954100122415, "grad_norm": 288.28936767578125, "learning_rate": 8.268952317611989e-06, "loss": 19.6174, "step": 172930 }, { "epoch": 0.349349741633908, "grad_norm": 311.7254333496094, "learning_rate": 8.268688180018911e-06, "loss": 18.067, "step": 172940 }, { "epoch": 0.3493699422665918, "grad_norm": 365.0914611816406, "learning_rate": 8.268424026494646e-06, "loss": 16.0054, "step": 172950 }, { "epoch": 0.3493901428992756, "grad_norm": 470.6539611816406, "learning_rate": 8.268159857040475e-06, "loss": 20.0986, "step": 172960 }, { "epoch": 0.34941034353195943, "grad_norm": 747.6365356445312, "learning_rate": 8.267895671657692e-06, "loss": 24.2701, "step": 172970 }, { "epoch": 0.34943054416464325, "grad_norm": 203.64743041992188, "learning_rate": 8.26763147034758e-06, "loss": 29.9021, "step": 172980 }, { "epoch": 0.3494507447973271, "grad_norm": 140.28167724609375, "learning_rate": 8.26736725311143e-06, "loss": 17.3487, "step": 172990 }, { "epoch": 0.3494709454300109, "grad_norm": 280.7421875, "learning_rate": 8.267103019950529e-06, "loss": 9.4905, "step": 173000 }, { "epoch": 0.34949114606269466, "grad_norm": 62.15070343017578, "learning_rate": 8.266838770866162e-06, "loss": 15.2702, "step": 173010 }, { "epoch": 0.3495113466953785, "grad_norm": 154.53981018066406, "learning_rate": 8.26657450585962e-06, "loss": 16.3519, "step": 173020 }, { "epoch": 0.3495315473280623, "grad_norm": 795.0919799804688, "learning_rate": 8.266310224932191e-06, "loss": 30.8369, "step": 173030 }, { "epoch": 0.3495517479607461, "grad_norm": 13.746565818786621, "learning_rate": 8.26604592808516e-06, "loss": 83.0099, "step": 173040 }, { "epoch": 0.34957194859342994, "grad_norm": 396.38909912109375, "learning_rate": 8.26578161531982e-06, "loss": 27.1602, "step": 173050 }, { "epoch": 0.34959214922611376, "grad_norm": 448.2518615722656, "learning_rate": 8.265517286637453e-06, "loss": 21.784, "step": 173060 }, { "epoch": 0.3496123498587976, "grad_norm": 456.9084167480469, "learning_rate": 8.265252942039352e-06, "loss": 20.9633, "step": 173070 }, { "epoch": 0.3496325504914814, "grad_norm": 247.86790466308594, "learning_rate": 8.264988581526806e-06, "loss": 16.0689, "step": 173080 }, { "epoch": 0.3496527511241652, "grad_norm": 156.30860900878906, "learning_rate": 8.2647242051011e-06, "loss": 17.2633, "step": 173090 }, { "epoch": 0.34967295175684904, "grad_norm": 566.1484375, "learning_rate": 8.264459812763525e-06, "loss": 25.2013, "step": 173100 }, { "epoch": 0.34969315238953286, "grad_norm": 651.9746704101562, "learning_rate": 8.264195404515369e-06, "loss": 29.0919, "step": 173110 }, { "epoch": 0.3497133530222167, "grad_norm": 353.6759033203125, "learning_rate": 8.26393098035792e-06, "loss": 21.7618, "step": 173120 }, { "epoch": 0.3497335536549005, "grad_norm": 340.6568908691406, "learning_rate": 8.263666540292468e-06, "loss": 31.4526, "step": 173130 }, { "epoch": 0.34975375428758426, "grad_norm": 133.70962524414062, "learning_rate": 8.263402084320299e-06, "loss": 13.4754, "step": 173140 }, { "epoch": 0.3497739549202681, "grad_norm": 364.1566162109375, "learning_rate": 8.263137612442705e-06, "loss": 28.5798, "step": 173150 }, { "epoch": 0.3497941555529519, "grad_norm": 354.4394836425781, "learning_rate": 8.262873124660976e-06, "loss": 26.6927, "step": 173160 }, { "epoch": 0.3498143561856357, "grad_norm": 158.62098693847656, "learning_rate": 8.262608620976396e-06, "loss": 25.2727, "step": 173170 }, { "epoch": 0.34983455681831954, "grad_norm": 0.0, "learning_rate": 8.262344101390261e-06, "loss": 31.09, "step": 173180 }, { "epoch": 0.34985475745100336, "grad_norm": 114.46195983886719, "learning_rate": 8.262079565903853e-06, "loss": 28.5366, "step": 173190 }, { "epoch": 0.3498749580836872, "grad_norm": 532.5999145507812, "learning_rate": 8.261815014518465e-06, "loss": 18.1616, "step": 173200 }, { "epoch": 0.349895158716371, "grad_norm": 607.7233276367188, "learning_rate": 8.261550447235389e-06, "loss": 17.3359, "step": 173210 }, { "epoch": 0.3499153593490548, "grad_norm": 292.6091613769531, "learning_rate": 8.26128586405591e-06, "loss": 15.3743, "step": 173220 }, { "epoch": 0.34993555998173864, "grad_norm": 0.0, "learning_rate": 8.26102126498132e-06, "loss": 24.4837, "step": 173230 }, { "epoch": 0.34995576061442246, "grad_norm": 187.9465789794922, "learning_rate": 8.260756650012906e-06, "loss": 14.5279, "step": 173240 }, { "epoch": 0.3499759612471063, "grad_norm": 35.00516891479492, "learning_rate": 8.260492019151962e-06, "loss": 15.8181, "step": 173250 }, { "epoch": 0.3499961618797901, "grad_norm": 271.4961853027344, "learning_rate": 8.260227372399773e-06, "loss": 10.9975, "step": 173260 }, { "epoch": 0.35001636251247387, "grad_norm": 368.2056884765625, "learning_rate": 8.259962709757634e-06, "loss": 33.3139, "step": 173270 }, { "epoch": 0.3500365631451577, "grad_norm": 263.11785888671875, "learning_rate": 8.259698031226831e-06, "loss": 18.1834, "step": 173280 }, { "epoch": 0.3500567637778415, "grad_norm": 352.2793273925781, "learning_rate": 8.259433336808653e-06, "loss": 25.6054, "step": 173290 }, { "epoch": 0.3500769644105253, "grad_norm": 458.30010986328125, "learning_rate": 8.259168626504395e-06, "loss": 17.1906, "step": 173300 }, { "epoch": 0.35009716504320915, "grad_norm": 518.7818603515625, "learning_rate": 8.258903900315343e-06, "loss": 20.0604, "step": 173310 }, { "epoch": 0.35011736567589297, "grad_norm": 200.27467346191406, "learning_rate": 8.25863915824279e-06, "loss": 26.5523, "step": 173320 }, { "epoch": 0.3501375663085768, "grad_norm": 75.82208251953125, "learning_rate": 8.258374400288022e-06, "loss": 28.5823, "step": 173330 }, { "epoch": 0.3501577669412606, "grad_norm": 410.916259765625, "learning_rate": 8.258109626452335e-06, "loss": 20.5551, "step": 173340 }, { "epoch": 0.3501779675739444, "grad_norm": 84.4941635131836, "learning_rate": 8.257844836737017e-06, "loss": 12.2446, "step": 173350 }, { "epoch": 0.35019816820662825, "grad_norm": 147.6146697998047, "learning_rate": 8.257580031143357e-06, "loss": 16.6855, "step": 173360 }, { "epoch": 0.35021836883931207, "grad_norm": 204.3711700439453, "learning_rate": 8.257315209672648e-06, "loss": 19.6007, "step": 173370 }, { "epoch": 0.3502385694719959, "grad_norm": 354.1034851074219, "learning_rate": 8.257050372326179e-06, "loss": 25.5779, "step": 173380 }, { "epoch": 0.35025877010467965, "grad_norm": 212.71946716308594, "learning_rate": 8.256785519105242e-06, "loss": 25.467, "step": 173390 }, { "epoch": 0.35027897073736347, "grad_norm": 316.14605712890625, "learning_rate": 8.256520650011126e-06, "loss": 23.701, "step": 173400 }, { "epoch": 0.3502991713700473, "grad_norm": 665.3952026367188, "learning_rate": 8.256255765045124e-06, "loss": 33.3742, "step": 173410 }, { "epoch": 0.3503193720027311, "grad_norm": 280.0313415527344, "learning_rate": 8.255990864208529e-06, "loss": 35.0144, "step": 173420 }, { "epoch": 0.35033957263541493, "grad_norm": 364.93157958984375, "learning_rate": 8.255725947502627e-06, "loss": 29.1791, "step": 173430 }, { "epoch": 0.35035977326809875, "grad_norm": 277.6878967285156, "learning_rate": 8.255461014928713e-06, "loss": 30.7059, "step": 173440 }, { "epoch": 0.35037997390078257, "grad_norm": 497.4844970703125, "learning_rate": 8.255196066488074e-06, "loss": 43.9205, "step": 173450 }, { "epoch": 0.3504001745334664, "grad_norm": 313.2330322265625, "learning_rate": 8.254931102182007e-06, "loss": 33.247, "step": 173460 }, { "epoch": 0.3504203751661502, "grad_norm": 115.24762725830078, "learning_rate": 8.254666122011799e-06, "loss": 16.0769, "step": 173470 }, { "epoch": 0.35044057579883403, "grad_norm": 85.1580810546875, "learning_rate": 8.254401125978744e-06, "loss": 16.9964, "step": 173480 }, { "epoch": 0.35046077643151785, "grad_norm": 205.93287658691406, "learning_rate": 8.254136114084133e-06, "loss": 11.1739, "step": 173490 }, { "epoch": 0.35048097706420167, "grad_norm": 129.01499938964844, "learning_rate": 8.253871086329255e-06, "loss": 11.3391, "step": 173500 }, { "epoch": 0.3505011776968855, "grad_norm": 294.9815673828125, "learning_rate": 8.253606042715405e-06, "loss": 36.1178, "step": 173510 }, { "epoch": 0.35052137832956926, "grad_norm": 293.00482177734375, "learning_rate": 8.253340983243876e-06, "loss": 22.1145, "step": 173520 }, { "epoch": 0.3505415789622531, "grad_norm": 312.2038879394531, "learning_rate": 8.253075907915955e-06, "loss": 19.3004, "step": 173530 }, { "epoch": 0.3505617795949369, "grad_norm": 65.24540710449219, "learning_rate": 8.252810816732936e-06, "loss": 18.5178, "step": 173540 }, { "epoch": 0.3505819802276207, "grad_norm": 502.0503845214844, "learning_rate": 8.252545709696115e-06, "loss": 30.9567, "step": 173550 }, { "epoch": 0.35060218086030454, "grad_norm": 395.8454284667969, "learning_rate": 8.252280586806778e-06, "loss": 22.5952, "step": 173560 }, { "epoch": 0.35062238149298836, "grad_norm": 372.55950927734375, "learning_rate": 8.25201544806622e-06, "loss": 29.1731, "step": 173570 }, { "epoch": 0.3506425821256722, "grad_norm": 478.653076171875, "learning_rate": 8.251750293475735e-06, "loss": 36.4763, "step": 173580 }, { "epoch": 0.350662782758356, "grad_norm": 499.67462158203125, "learning_rate": 8.25148512303661e-06, "loss": 14.7104, "step": 173590 }, { "epoch": 0.3506829833910398, "grad_norm": 345.45751953125, "learning_rate": 8.251219936750145e-06, "loss": 15.0508, "step": 173600 }, { "epoch": 0.35070318402372364, "grad_norm": 28.967201232910156, "learning_rate": 8.250954734617627e-06, "loss": 19.9446, "step": 173610 }, { "epoch": 0.35072338465640746, "grad_norm": 449.6793212890625, "learning_rate": 8.250689516640349e-06, "loss": 11.0932, "step": 173620 }, { "epoch": 0.3507435852890913, "grad_norm": 629.494384765625, "learning_rate": 8.250424282819604e-06, "loss": 17.8922, "step": 173630 }, { "epoch": 0.3507637859217751, "grad_norm": 476.1920471191406, "learning_rate": 8.250159033156687e-06, "loss": 26.8721, "step": 173640 }, { "epoch": 0.35078398655445886, "grad_norm": 24.92816162109375, "learning_rate": 8.249893767652888e-06, "loss": 14.7205, "step": 173650 }, { "epoch": 0.3508041871871427, "grad_norm": 168.84359741210938, "learning_rate": 8.2496284863095e-06, "loss": 22.3083, "step": 173660 }, { "epoch": 0.3508243878198265, "grad_norm": 169.42941284179688, "learning_rate": 8.249363189127818e-06, "loss": 22.0579, "step": 173670 }, { "epoch": 0.3508445884525103, "grad_norm": 460.4420471191406, "learning_rate": 8.249097876109136e-06, "loss": 13.9122, "step": 173680 }, { "epoch": 0.35086478908519414, "grad_norm": 1094.1063232421875, "learning_rate": 8.248832547254742e-06, "loss": 26.5416, "step": 173690 }, { "epoch": 0.35088498971787796, "grad_norm": 378.02655029296875, "learning_rate": 8.248567202565934e-06, "loss": 18.1857, "step": 173700 }, { "epoch": 0.3509051903505618, "grad_norm": 285.1150207519531, "learning_rate": 8.248301842044003e-06, "loss": 22.5822, "step": 173710 }, { "epoch": 0.3509253909832456, "grad_norm": 228.94610595703125, "learning_rate": 8.24803646569024e-06, "loss": 25.0157, "step": 173720 }, { "epoch": 0.3509455916159294, "grad_norm": 418.0767517089844, "learning_rate": 8.247771073505946e-06, "loss": 29.071, "step": 173730 }, { "epoch": 0.35096579224861324, "grad_norm": 0.0, "learning_rate": 8.247505665492406e-06, "loss": 15.9256, "step": 173740 }, { "epoch": 0.35098599288129706, "grad_norm": 427.0414733886719, "learning_rate": 8.247240241650918e-06, "loss": 23.2211, "step": 173750 }, { "epoch": 0.3510061935139809, "grad_norm": 203.89007568359375, "learning_rate": 8.246974801982776e-06, "loss": 18.5033, "step": 173760 }, { "epoch": 0.3510263941466647, "grad_norm": 21.541364669799805, "learning_rate": 8.246709346489272e-06, "loss": 26.7519, "step": 173770 }, { "epoch": 0.35104659477934846, "grad_norm": 356.7249450683594, "learning_rate": 8.2464438751717e-06, "loss": 27.1319, "step": 173780 }, { "epoch": 0.3510667954120323, "grad_norm": 229.13775634765625, "learning_rate": 8.246178388031355e-06, "loss": 22.4318, "step": 173790 }, { "epoch": 0.3510869960447161, "grad_norm": 90.83468627929688, "learning_rate": 8.24591288506953e-06, "loss": 11.0315, "step": 173800 }, { "epoch": 0.3511071966773999, "grad_norm": 377.0789489746094, "learning_rate": 8.24564736628752e-06, "loss": 10.4452, "step": 173810 }, { "epoch": 0.35112739731008374, "grad_norm": 287.17877197265625, "learning_rate": 8.245381831686618e-06, "loss": 17.3845, "step": 173820 }, { "epoch": 0.35114759794276756, "grad_norm": 420.8179931640625, "learning_rate": 8.245116281268119e-06, "loss": 18.7658, "step": 173830 }, { "epoch": 0.3511677985754514, "grad_norm": 541.0289306640625, "learning_rate": 8.244850715033316e-06, "loss": 21.8132, "step": 173840 }, { "epoch": 0.3511879992081352, "grad_norm": 337.2922058105469, "learning_rate": 8.244585132983505e-06, "loss": 17.3764, "step": 173850 }, { "epoch": 0.351208199840819, "grad_norm": 165.65652465820312, "learning_rate": 8.244319535119978e-06, "loss": 20.6336, "step": 173860 }, { "epoch": 0.35122840047350284, "grad_norm": 309.7743835449219, "learning_rate": 8.244053921444034e-06, "loss": 20.4565, "step": 173870 }, { "epoch": 0.35124860110618666, "grad_norm": 261.0788879394531, "learning_rate": 8.24378829195696e-06, "loss": 13.4763, "step": 173880 }, { "epoch": 0.3512688017388705, "grad_norm": 148.37811279296875, "learning_rate": 8.243522646660058e-06, "loss": 24.8837, "step": 173890 }, { "epoch": 0.3512890023715543, "grad_norm": 416.64910888671875, "learning_rate": 8.243256985554622e-06, "loss": 21.6944, "step": 173900 }, { "epoch": 0.35130920300423807, "grad_norm": 183.1710968017578, "learning_rate": 8.24299130864194e-06, "loss": 36.5111, "step": 173910 }, { "epoch": 0.3513294036369219, "grad_norm": 356.537841796875, "learning_rate": 8.242725615923316e-06, "loss": 19.8763, "step": 173920 }, { "epoch": 0.3513496042696057, "grad_norm": 273.228515625, "learning_rate": 8.24245990740004e-06, "loss": 18.5587, "step": 173930 }, { "epoch": 0.35136980490228953, "grad_norm": 142.47279357910156, "learning_rate": 8.242194183073406e-06, "loss": 29.2401, "step": 173940 }, { "epoch": 0.35139000553497335, "grad_norm": 373.9376220703125, "learning_rate": 8.24192844294471e-06, "loss": 29.5076, "step": 173950 }, { "epoch": 0.35141020616765717, "grad_norm": 572.1922607421875, "learning_rate": 8.241662687015251e-06, "loss": 20.0522, "step": 173960 }, { "epoch": 0.351430406800341, "grad_norm": 183.8920135498047, "learning_rate": 8.24139691528632e-06, "loss": 17.893, "step": 173970 }, { "epoch": 0.3514506074330248, "grad_norm": 171.80726623535156, "learning_rate": 8.241131127759214e-06, "loss": 8.4419, "step": 173980 }, { "epoch": 0.35147080806570863, "grad_norm": 315.47296142578125, "learning_rate": 8.240865324435225e-06, "loss": 19.0958, "step": 173990 }, { "epoch": 0.35149100869839245, "grad_norm": 106.73255157470703, "learning_rate": 8.240599505315656e-06, "loss": 12.7868, "step": 174000 }, { "epoch": 0.35151120933107627, "grad_norm": 263.27276611328125, "learning_rate": 8.240333670401795e-06, "loss": 23.6233, "step": 174010 }, { "epoch": 0.3515314099637601, "grad_norm": 410.70855712890625, "learning_rate": 8.240067819694941e-06, "loss": 21.5445, "step": 174020 }, { "epoch": 0.35155161059644385, "grad_norm": 54.874942779541016, "learning_rate": 8.23980195319639e-06, "loss": 21.1357, "step": 174030 }, { "epoch": 0.3515718112291277, "grad_norm": 205.47137451171875, "learning_rate": 8.239536070907437e-06, "loss": 21.508, "step": 174040 }, { "epoch": 0.3515920118618115, "grad_norm": 252.79075622558594, "learning_rate": 8.239270172829379e-06, "loss": 13.6452, "step": 174050 }, { "epoch": 0.3516122124944953, "grad_norm": 515.2888793945312, "learning_rate": 8.23900425896351e-06, "loss": 14.9529, "step": 174060 }, { "epoch": 0.35163241312717913, "grad_norm": 226.2638397216797, "learning_rate": 8.238738329311126e-06, "loss": 19.0934, "step": 174070 }, { "epoch": 0.35165261375986295, "grad_norm": 142.75184631347656, "learning_rate": 8.238472383873528e-06, "loss": 16.1639, "step": 174080 }, { "epoch": 0.3516728143925468, "grad_norm": 659.6214599609375, "learning_rate": 8.238206422652007e-06, "loss": 16.7309, "step": 174090 }, { "epoch": 0.3516930150252306, "grad_norm": 402.1994934082031, "learning_rate": 8.237940445647858e-06, "loss": 24.8928, "step": 174100 }, { "epoch": 0.3517132156579144, "grad_norm": 302.8079833984375, "learning_rate": 8.23767445286238e-06, "loss": 23.3337, "step": 174110 }, { "epoch": 0.35173341629059823, "grad_norm": 323.9101257324219, "learning_rate": 8.237408444296872e-06, "loss": 21.0039, "step": 174120 }, { "epoch": 0.35175361692328205, "grad_norm": 721.6761474609375, "learning_rate": 8.237142419952628e-06, "loss": 35.1041, "step": 174130 }, { "epoch": 0.3517738175559659, "grad_norm": 198.2758026123047, "learning_rate": 8.236876379830943e-06, "loss": 27.8073, "step": 174140 }, { "epoch": 0.3517940181886497, "grad_norm": 277.5790100097656, "learning_rate": 8.236610323933115e-06, "loss": 12.6317, "step": 174150 }, { "epoch": 0.35181421882133346, "grad_norm": 651.7263793945312, "learning_rate": 8.236344252260442e-06, "loss": 24.7132, "step": 174160 }, { "epoch": 0.3518344194540173, "grad_norm": 112.15562438964844, "learning_rate": 8.236078164814218e-06, "loss": 24.0048, "step": 174170 }, { "epoch": 0.3518546200867011, "grad_norm": 137.72091674804688, "learning_rate": 8.235812061595742e-06, "loss": 22.982, "step": 174180 }, { "epoch": 0.3518748207193849, "grad_norm": 416.67938232421875, "learning_rate": 8.235545942606311e-06, "loss": 21.3787, "step": 174190 }, { "epoch": 0.35189502135206874, "grad_norm": 379.84375, "learning_rate": 8.235279807847223e-06, "loss": 18.8714, "step": 174200 }, { "epoch": 0.35191522198475256, "grad_norm": 168.63465881347656, "learning_rate": 8.235013657319772e-06, "loss": 37.499, "step": 174210 }, { "epoch": 0.3519354226174364, "grad_norm": 543.0819091796875, "learning_rate": 8.234747491025257e-06, "loss": 20.708, "step": 174220 }, { "epoch": 0.3519556232501202, "grad_norm": 199.30059814453125, "learning_rate": 8.234481308964975e-06, "loss": 19.8817, "step": 174230 }, { "epoch": 0.351975823882804, "grad_norm": 742.211669921875, "learning_rate": 8.234215111140222e-06, "loss": 35.86, "step": 174240 }, { "epoch": 0.35199602451548784, "grad_norm": 249.38819885253906, "learning_rate": 8.2339488975523e-06, "loss": 13.1015, "step": 174250 }, { "epoch": 0.35201622514817166, "grad_norm": 268.9845886230469, "learning_rate": 8.2336826682025e-06, "loss": 18.8086, "step": 174260 }, { "epoch": 0.3520364257808555, "grad_norm": 32.653297424316406, "learning_rate": 8.233416423092124e-06, "loss": 17.695, "step": 174270 }, { "epoch": 0.3520566264135393, "grad_norm": 196.7831268310547, "learning_rate": 8.23315016222247e-06, "loss": 27.7109, "step": 174280 }, { "epoch": 0.35207682704622306, "grad_norm": 114.65450286865234, "learning_rate": 8.232883885594831e-06, "loss": 15.3871, "step": 174290 }, { "epoch": 0.3520970276789069, "grad_norm": 89.75282287597656, "learning_rate": 8.232617593210512e-06, "loss": 12.5192, "step": 174300 }, { "epoch": 0.3521172283115907, "grad_norm": 99.19739532470703, "learning_rate": 8.232351285070804e-06, "loss": 14.9296, "step": 174310 }, { "epoch": 0.3521374289442745, "grad_norm": 668.9234619140625, "learning_rate": 8.23208496117701e-06, "loss": 35.1908, "step": 174320 }, { "epoch": 0.35215762957695834, "grad_norm": 85.74739074707031, "learning_rate": 8.231818621530424e-06, "loss": 32.8532, "step": 174330 }, { "epoch": 0.35217783020964216, "grad_norm": 365.96734619140625, "learning_rate": 8.231552266132346e-06, "loss": 47.5309, "step": 174340 }, { "epoch": 0.352198030842326, "grad_norm": 176.36427307128906, "learning_rate": 8.231285894984076e-06, "loss": 27.0716, "step": 174350 }, { "epoch": 0.3522182314750098, "grad_norm": 217.97398376464844, "learning_rate": 8.231019508086908e-06, "loss": 17.2415, "step": 174360 }, { "epoch": 0.3522384321076936, "grad_norm": 28.688718795776367, "learning_rate": 8.230753105442145e-06, "loss": 24.7315, "step": 174370 }, { "epoch": 0.35225863274037744, "grad_norm": 188.6749267578125, "learning_rate": 8.230486687051082e-06, "loss": 24.6121, "step": 174380 }, { "epoch": 0.35227883337306126, "grad_norm": 497.5626220703125, "learning_rate": 8.23022025291502e-06, "loss": 20.8604, "step": 174390 }, { "epoch": 0.3522990340057451, "grad_norm": 333.66998291015625, "learning_rate": 8.229953803035256e-06, "loss": 22.6922, "step": 174400 }, { "epoch": 0.3523192346384289, "grad_norm": 290.4405517578125, "learning_rate": 8.229687337413087e-06, "loss": 38.2276, "step": 174410 }, { "epoch": 0.35233943527111267, "grad_norm": 239.15504455566406, "learning_rate": 8.229420856049814e-06, "loss": 23.7161, "step": 174420 }, { "epoch": 0.3523596359037965, "grad_norm": 255.3310089111328, "learning_rate": 8.229154358946739e-06, "loss": 27.811, "step": 174430 }, { "epoch": 0.3523798365364803, "grad_norm": 406.867431640625, "learning_rate": 8.228887846105154e-06, "loss": 16.8247, "step": 174440 }, { "epoch": 0.3524000371691641, "grad_norm": 263.8954772949219, "learning_rate": 8.228621317526362e-06, "loss": 19.0497, "step": 174450 }, { "epoch": 0.35242023780184795, "grad_norm": 346.6134033203125, "learning_rate": 8.22835477321166e-06, "loss": 19.1966, "step": 174460 }, { "epoch": 0.35244043843453177, "grad_norm": 473.8358154296875, "learning_rate": 8.22808821316235e-06, "loss": 26.3218, "step": 174470 }, { "epoch": 0.3524606390672156, "grad_norm": 326.0342102050781, "learning_rate": 8.22782163737973e-06, "loss": 14.8154, "step": 174480 }, { "epoch": 0.3524808396998994, "grad_norm": 461.9324951171875, "learning_rate": 8.227555045865097e-06, "loss": 26.4447, "step": 174490 }, { "epoch": 0.3525010403325832, "grad_norm": 233.15130615234375, "learning_rate": 8.227288438619754e-06, "loss": 17.8253, "step": 174500 }, { "epoch": 0.35252124096526705, "grad_norm": 228.75013732910156, "learning_rate": 8.227021815644998e-06, "loss": 17.5633, "step": 174510 }, { "epoch": 0.35254144159795087, "grad_norm": 418.84356689453125, "learning_rate": 8.226755176942127e-06, "loss": 18.3394, "step": 174520 }, { "epoch": 0.3525616422306347, "grad_norm": 259.8781433105469, "learning_rate": 8.226488522512445e-06, "loss": 17.6516, "step": 174530 }, { "epoch": 0.3525818428633185, "grad_norm": 243.96273803710938, "learning_rate": 8.22622185235725e-06, "loss": 17.9322, "step": 174540 }, { "epoch": 0.35260204349600227, "grad_norm": 257.5859069824219, "learning_rate": 8.22595516647784e-06, "loss": 17.4949, "step": 174550 }, { "epoch": 0.3526222441286861, "grad_norm": 204.86953735351562, "learning_rate": 8.225688464875514e-06, "loss": 12.1208, "step": 174560 }, { "epoch": 0.3526424447613699, "grad_norm": 311.3590087890625, "learning_rate": 8.225421747551575e-06, "loss": 13.1372, "step": 174570 }, { "epoch": 0.35266264539405373, "grad_norm": 362.9967346191406, "learning_rate": 8.225155014507322e-06, "loss": 39.0035, "step": 174580 }, { "epoch": 0.35268284602673755, "grad_norm": 537.2332153320312, "learning_rate": 8.224888265744055e-06, "loss": 17.5193, "step": 174590 }, { "epoch": 0.35270304665942137, "grad_norm": 214.26461791992188, "learning_rate": 8.224621501263073e-06, "loss": 13.992, "step": 174600 }, { "epoch": 0.3527232472921052, "grad_norm": 364.31964111328125, "learning_rate": 8.224354721065676e-06, "loss": 31.4486, "step": 174610 }, { "epoch": 0.352743447924789, "grad_norm": 251.72650146484375, "learning_rate": 8.224087925153167e-06, "loss": 14.3412, "step": 174620 }, { "epoch": 0.35276364855747283, "grad_norm": 504.68255615234375, "learning_rate": 8.223821113526843e-06, "loss": 31.9438, "step": 174630 }, { "epoch": 0.35278384919015665, "grad_norm": 136.0260772705078, "learning_rate": 8.223554286188007e-06, "loss": 15.349, "step": 174640 }, { "epoch": 0.35280404982284047, "grad_norm": 588.8232421875, "learning_rate": 8.223287443137957e-06, "loss": 19.0485, "step": 174650 }, { "epoch": 0.3528242504555243, "grad_norm": 604.52587890625, "learning_rate": 8.223020584377997e-06, "loss": 22.0748, "step": 174660 }, { "epoch": 0.35284445108820806, "grad_norm": 476.4415283203125, "learning_rate": 8.222753709909423e-06, "loss": 22.4559, "step": 174670 }, { "epoch": 0.3528646517208919, "grad_norm": 541.7493286132812, "learning_rate": 8.22248681973354e-06, "loss": 34.7495, "step": 174680 }, { "epoch": 0.3528848523535757, "grad_norm": 213.7854461669922, "learning_rate": 8.222219913851647e-06, "loss": 14.4349, "step": 174690 }, { "epoch": 0.3529050529862595, "grad_norm": 723.773681640625, "learning_rate": 8.221952992265046e-06, "loss": 15.0004, "step": 174700 }, { "epoch": 0.35292525361894334, "grad_norm": 336.739501953125, "learning_rate": 8.221686054975035e-06, "loss": 26.9251, "step": 174710 }, { "epoch": 0.35294545425162716, "grad_norm": 144.41162109375, "learning_rate": 8.221419101982917e-06, "loss": 44.6784, "step": 174720 }, { "epoch": 0.352965654884311, "grad_norm": 0.0, "learning_rate": 8.221152133289993e-06, "loss": 21.9902, "step": 174730 }, { "epoch": 0.3529858555169948, "grad_norm": 518.8461303710938, "learning_rate": 8.220885148897566e-06, "loss": 17.9668, "step": 174740 }, { "epoch": 0.3530060561496786, "grad_norm": 106.73846435546875, "learning_rate": 8.220618148806934e-06, "loss": 17.3433, "step": 174750 }, { "epoch": 0.35302625678236244, "grad_norm": 98.9426498413086, "learning_rate": 8.2203511330194e-06, "loss": 13.1909, "step": 174760 }, { "epoch": 0.35304645741504626, "grad_norm": 218.96884155273438, "learning_rate": 8.220084101536264e-06, "loss": 26.3707, "step": 174770 }, { "epoch": 0.3530666580477301, "grad_norm": 333.53387451171875, "learning_rate": 8.21981705435883e-06, "loss": 16.5159, "step": 174780 }, { "epoch": 0.3530868586804139, "grad_norm": 444.73431396484375, "learning_rate": 8.219549991488398e-06, "loss": 20.9137, "step": 174790 }, { "epoch": 0.35310705931309766, "grad_norm": 478.2015380859375, "learning_rate": 8.21928291292627e-06, "loss": 23.1764, "step": 174800 }, { "epoch": 0.3531272599457815, "grad_norm": 356.33258056640625, "learning_rate": 8.219015818673747e-06, "loss": 38.6808, "step": 174810 }, { "epoch": 0.3531474605784653, "grad_norm": 219.99203491210938, "learning_rate": 8.218748708732131e-06, "loss": 10.3895, "step": 174820 }, { "epoch": 0.3531676612111491, "grad_norm": 176.7386932373047, "learning_rate": 8.218481583102725e-06, "loss": 22.943, "step": 174830 }, { "epoch": 0.35318786184383294, "grad_norm": 250.08804321289062, "learning_rate": 8.218214441786829e-06, "loss": 20.9442, "step": 174840 }, { "epoch": 0.35320806247651676, "grad_norm": 285.0740051269531, "learning_rate": 8.217947284785748e-06, "loss": 27.0863, "step": 174850 }, { "epoch": 0.3532282631092006, "grad_norm": 207.52349853515625, "learning_rate": 8.217680112100782e-06, "loss": 12.4482, "step": 174860 }, { "epoch": 0.3532484637418844, "grad_norm": 445.2847900390625, "learning_rate": 8.217412923733232e-06, "loss": 24.72, "step": 174870 }, { "epoch": 0.3532686643745682, "grad_norm": 41.659271240234375, "learning_rate": 8.217145719684403e-06, "loss": 31.4946, "step": 174880 }, { "epoch": 0.35328886500725204, "grad_norm": 108.79118347167969, "learning_rate": 8.216878499955594e-06, "loss": 11.8081, "step": 174890 }, { "epoch": 0.35330906563993586, "grad_norm": 251.42115783691406, "learning_rate": 8.21661126454811e-06, "loss": 11.5741, "step": 174900 }, { "epoch": 0.3533292662726197, "grad_norm": 322.25018310546875, "learning_rate": 8.216344013463255e-06, "loss": 15.4385, "step": 174910 }, { "epoch": 0.3533494669053035, "grad_norm": 183.340087890625, "learning_rate": 8.216076746702327e-06, "loss": 20.2645, "step": 174920 }, { "epoch": 0.35336966753798726, "grad_norm": 206.0087432861328, "learning_rate": 8.215809464266632e-06, "loss": 11.5247, "step": 174930 }, { "epoch": 0.3533898681706711, "grad_norm": 321.6869201660156, "learning_rate": 8.21554216615747e-06, "loss": 22.5507, "step": 174940 }, { "epoch": 0.3534100688033549, "grad_norm": 302.82305908203125, "learning_rate": 8.215274852376148e-06, "loss": 18.331, "step": 174950 }, { "epoch": 0.3534302694360387, "grad_norm": 170.0658721923828, "learning_rate": 8.215007522923965e-06, "loss": 8.9385, "step": 174960 }, { "epoch": 0.35345047006872254, "grad_norm": 438.54864501953125, "learning_rate": 8.214740177802225e-06, "loss": 31.1996, "step": 174970 }, { "epoch": 0.35347067070140636, "grad_norm": 146.8841552734375, "learning_rate": 8.214472817012232e-06, "loss": 24.322, "step": 174980 }, { "epoch": 0.3534908713340902, "grad_norm": 323.52142333984375, "learning_rate": 8.214205440555289e-06, "loss": 9.9754, "step": 174990 }, { "epoch": 0.353511071966774, "grad_norm": 151.72030639648438, "learning_rate": 8.213938048432697e-06, "loss": 10.8631, "step": 175000 }, { "epoch": 0.3535312725994578, "grad_norm": 425.7978515625, "learning_rate": 8.213670640645762e-06, "loss": 32.4664, "step": 175010 }, { "epoch": 0.35355147323214164, "grad_norm": 480.8081359863281, "learning_rate": 8.213403217195785e-06, "loss": 20.7231, "step": 175020 }, { "epoch": 0.35357167386482546, "grad_norm": 305.27197265625, "learning_rate": 8.21313577808407e-06, "loss": 25.3755, "step": 175030 }, { "epoch": 0.3535918744975093, "grad_norm": 230.11886596679688, "learning_rate": 8.212868323311923e-06, "loss": 9.5406, "step": 175040 }, { "epoch": 0.3536120751301931, "grad_norm": 179.1160430908203, "learning_rate": 8.212600852880644e-06, "loss": 28.9431, "step": 175050 }, { "epoch": 0.35363227576287687, "grad_norm": 136.70730590820312, "learning_rate": 8.212333366791539e-06, "loss": 25.482, "step": 175060 }, { "epoch": 0.3536524763955607, "grad_norm": 705.8853759765625, "learning_rate": 8.21206586504591e-06, "loss": 20.4179, "step": 175070 }, { "epoch": 0.3536726770282445, "grad_norm": 547.7363891601562, "learning_rate": 8.211798347645062e-06, "loss": 15.234, "step": 175080 }, { "epoch": 0.35369287766092833, "grad_norm": 309.3493347167969, "learning_rate": 8.211530814590298e-06, "loss": 23.4557, "step": 175090 }, { "epoch": 0.35371307829361215, "grad_norm": 417.8152160644531, "learning_rate": 8.211263265882923e-06, "loss": 15.0498, "step": 175100 }, { "epoch": 0.35373327892629597, "grad_norm": 142.96307373046875, "learning_rate": 8.21099570152424e-06, "loss": 32.4781, "step": 175110 }, { "epoch": 0.3537534795589798, "grad_norm": 252.72061157226562, "learning_rate": 8.210728121515552e-06, "loss": 21.5485, "step": 175120 }, { "epoch": 0.3537736801916636, "grad_norm": 164.5246124267578, "learning_rate": 8.210460525858167e-06, "loss": 25.8506, "step": 175130 }, { "epoch": 0.35379388082434743, "grad_norm": 117.74649810791016, "learning_rate": 8.210192914553385e-06, "loss": 20.4239, "step": 175140 }, { "epoch": 0.35381408145703125, "grad_norm": 304.66455078125, "learning_rate": 8.209925287602513e-06, "loss": 20.0685, "step": 175150 }, { "epoch": 0.35383428208971507, "grad_norm": 233.05548095703125, "learning_rate": 8.209657645006854e-06, "loss": 11.8249, "step": 175160 }, { "epoch": 0.3538544827223989, "grad_norm": 277.2091979980469, "learning_rate": 8.209389986767713e-06, "loss": 14.7205, "step": 175170 }, { "epoch": 0.3538746833550827, "grad_norm": 368.87591552734375, "learning_rate": 8.209122312886394e-06, "loss": 13.8992, "step": 175180 }, { "epoch": 0.3538948839877665, "grad_norm": 523.1936645507812, "learning_rate": 8.208854623364202e-06, "loss": 24.3276, "step": 175190 }, { "epoch": 0.3539150846204503, "grad_norm": 26.511260986328125, "learning_rate": 8.208586918202444e-06, "loss": 19.4903, "step": 175200 }, { "epoch": 0.3539352852531341, "grad_norm": 0.0, "learning_rate": 8.208319197402418e-06, "loss": 9.2607, "step": 175210 }, { "epoch": 0.35395548588581793, "grad_norm": 200.88131713867188, "learning_rate": 8.208051460965438e-06, "loss": 21.8787, "step": 175220 }, { "epoch": 0.35397568651850175, "grad_norm": 319.2743835449219, "learning_rate": 8.207783708892802e-06, "loss": 12.332, "step": 175230 }, { "epoch": 0.3539958871511856, "grad_norm": 173.7791290283203, "learning_rate": 8.207515941185818e-06, "loss": 25.0948, "step": 175240 }, { "epoch": 0.3540160877838694, "grad_norm": 483.6893310546875, "learning_rate": 8.20724815784579e-06, "loss": 17.6312, "step": 175250 }, { "epoch": 0.3540362884165532, "grad_norm": 100.1073226928711, "learning_rate": 8.206980358874024e-06, "loss": 20.1125, "step": 175260 }, { "epoch": 0.35405648904923703, "grad_norm": 190.61241149902344, "learning_rate": 8.206712544271825e-06, "loss": 22.5687, "step": 175270 }, { "epoch": 0.35407668968192085, "grad_norm": 297.8707580566406, "learning_rate": 8.206444714040496e-06, "loss": 19.7534, "step": 175280 }, { "epoch": 0.3540968903146047, "grad_norm": 79.12356567382812, "learning_rate": 8.206176868181346e-06, "loss": 10.8004, "step": 175290 }, { "epoch": 0.3541170909472885, "grad_norm": 141.0797119140625, "learning_rate": 8.205909006695679e-06, "loss": 17.8415, "step": 175300 }, { "epoch": 0.35413729157997226, "grad_norm": 211.64031982421875, "learning_rate": 8.205641129584798e-06, "loss": 15.1691, "step": 175310 }, { "epoch": 0.3541574922126561, "grad_norm": 538.218505859375, "learning_rate": 8.205373236850013e-06, "loss": 18.5033, "step": 175320 }, { "epoch": 0.3541776928453399, "grad_norm": 28.33978271484375, "learning_rate": 8.205105328492627e-06, "loss": 20.7679, "step": 175330 }, { "epoch": 0.3541978934780237, "grad_norm": 273.95489501953125, "learning_rate": 8.204837404513946e-06, "loss": 18.7928, "step": 175340 }, { "epoch": 0.35421809411070754, "grad_norm": 0.0, "learning_rate": 8.204569464915278e-06, "loss": 12.0007, "step": 175350 }, { "epoch": 0.35423829474339136, "grad_norm": 421.96270751953125, "learning_rate": 8.204301509697925e-06, "loss": 14.6929, "step": 175360 }, { "epoch": 0.3542584953760752, "grad_norm": 476.93817138671875, "learning_rate": 8.204033538863196e-06, "loss": 26.5668, "step": 175370 }, { "epoch": 0.354278696008759, "grad_norm": 321.7572937011719, "learning_rate": 8.203765552412396e-06, "loss": 25.1483, "step": 175380 }, { "epoch": 0.3542988966414428, "grad_norm": 336.88201904296875, "learning_rate": 8.203497550346832e-06, "loss": 47.5871, "step": 175390 }, { "epoch": 0.35431909727412664, "grad_norm": 500.8783264160156, "learning_rate": 8.203229532667808e-06, "loss": 20.0528, "step": 175400 }, { "epoch": 0.35433929790681046, "grad_norm": 501.5291748046875, "learning_rate": 8.202961499376633e-06, "loss": 28.1791, "step": 175410 }, { "epoch": 0.3543594985394943, "grad_norm": 172.3847198486328, "learning_rate": 8.202693450474611e-06, "loss": 15.962, "step": 175420 }, { "epoch": 0.3543796991721781, "grad_norm": 530.6918334960938, "learning_rate": 8.20242538596305e-06, "loss": 20.9476, "step": 175430 }, { "epoch": 0.35439989980486186, "grad_norm": 209.90892028808594, "learning_rate": 8.202157305843256e-06, "loss": 15.4572, "step": 175440 }, { "epoch": 0.3544201004375457, "grad_norm": 0.0, "learning_rate": 8.201889210116536e-06, "loss": 10.8894, "step": 175450 }, { "epoch": 0.3544403010702295, "grad_norm": 208.8284149169922, "learning_rate": 8.201621098784198e-06, "loss": 27.1503, "step": 175460 }, { "epoch": 0.3544605017029133, "grad_norm": 139.80990600585938, "learning_rate": 8.201352971847544e-06, "loss": 20.8345, "step": 175470 }, { "epoch": 0.35448070233559714, "grad_norm": 384.1830139160156, "learning_rate": 8.201084829307886e-06, "loss": 15.9517, "step": 175480 }, { "epoch": 0.35450090296828096, "grad_norm": 13.795295715332031, "learning_rate": 8.200816671166529e-06, "loss": 19.8572, "step": 175490 }, { "epoch": 0.3545211036009648, "grad_norm": 40.53992462158203, "learning_rate": 8.200548497424779e-06, "loss": 35.1675, "step": 175500 }, { "epoch": 0.3545413042336486, "grad_norm": 234.2230224609375, "learning_rate": 8.200280308083944e-06, "loss": 16.1526, "step": 175510 }, { "epoch": 0.3545615048663324, "grad_norm": 322.57843017578125, "learning_rate": 8.200012103145329e-06, "loss": 14.1687, "step": 175520 }, { "epoch": 0.35458170549901624, "grad_norm": 234.22349548339844, "learning_rate": 8.199743882610245e-06, "loss": 19.0625, "step": 175530 }, { "epoch": 0.35460190613170006, "grad_norm": 91.15047454833984, "learning_rate": 8.199475646479997e-06, "loss": 17.5612, "step": 175540 }, { "epoch": 0.3546221067643839, "grad_norm": 250.09579467773438, "learning_rate": 8.199207394755892e-06, "loss": 28.6515, "step": 175550 }, { "epoch": 0.3546423073970677, "grad_norm": 235.8031463623047, "learning_rate": 8.19893912743924e-06, "loss": 11.9678, "step": 175560 }, { "epoch": 0.35466250802975147, "grad_norm": 170.77743530273438, "learning_rate": 8.198670844531345e-06, "loss": 9.5903, "step": 175570 }, { "epoch": 0.3546827086624353, "grad_norm": 186.59239196777344, "learning_rate": 8.198402546033518e-06, "loss": 30.1099, "step": 175580 }, { "epoch": 0.3547029092951191, "grad_norm": 291.4851379394531, "learning_rate": 8.198134231947064e-06, "loss": 14.6909, "step": 175590 }, { "epoch": 0.3547231099278029, "grad_norm": 156.68643188476562, "learning_rate": 8.197865902273291e-06, "loss": 30.9946, "step": 175600 }, { "epoch": 0.35474331056048675, "grad_norm": 272.32666015625, "learning_rate": 8.197597557013507e-06, "loss": 21.5301, "step": 175610 }, { "epoch": 0.35476351119317057, "grad_norm": 179.51123046875, "learning_rate": 8.197329196169022e-06, "loss": 30.3105, "step": 175620 }, { "epoch": 0.3547837118258544, "grad_norm": 114.15765380859375, "learning_rate": 8.197060819741141e-06, "loss": 19.0839, "step": 175630 }, { "epoch": 0.3548039124585382, "grad_norm": 250.39151000976562, "learning_rate": 8.196792427731175e-06, "loss": 20.8312, "step": 175640 }, { "epoch": 0.354824113091222, "grad_norm": 555.1744384765625, "learning_rate": 8.196524020140428e-06, "loss": 12.6154, "step": 175650 }, { "epoch": 0.35484431372390585, "grad_norm": 76.54805755615234, "learning_rate": 8.196255596970214e-06, "loss": 24.1315, "step": 175660 }, { "epoch": 0.35486451435658967, "grad_norm": 169.8928680419922, "learning_rate": 8.195987158221835e-06, "loss": 20.6391, "step": 175670 }, { "epoch": 0.3548847149892735, "grad_norm": 369.7685852050781, "learning_rate": 8.195718703896603e-06, "loss": 28.2212, "step": 175680 }, { "epoch": 0.3549049156219573, "grad_norm": 87.45378112792969, "learning_rate": 8.195450233995826e-06, "loss": 15.1272, "step": 175690 }, { "epoch": 0.35492511625464107, "grad_norm": 377.84149169921875, "learning_rate": 8.19518174852081e-06, "loss": 30.2202, "step": 175700 }, { "epoch": 0.3549453168873249, "grad_norm": 176.96658325195312, "learning_rate": 8.194913247472868e-06, "loss": 12.0734, "step": 175710 }, { "epoch": 0.3549655175200087, "grad_norm": 513.4501342773438, "learning_rate": 8.194644730853307e-06, "loss": 22.8988, "step": 175720 }, { "epoch": 0.35498571815269253, "grad_norm": 376.0449523925781, "learning_rate": 8.194376198663434e-06, "loss": 30.6006, "step": 175730 }, { "epoch": 0.35500591878537635, "grad_norm": 344.9508056640625, "learning_rate": 8.194107650904556e-06, "loss": 20.524, "step": 175740 }, { "epoch": 0.35502611941806017, "grad_norm": 318.5543518066406, "learning_rate": 8.19383908757799e-06, "loss": 15.1572, "step": 175750 }, { "epoch": 0.355046320050744, "grad_norm": 418.2071533203125, "learning_rate": 8.193570508685035e-06, "loss": 22.2993, "step": 175760 }, { "epoch": 0.3550665206834278, "grad_norm": 186.5985870361328, "learning_rate": 8.193301914227008e-06, "loss": 13.8489, "step": 175770 }, { "epoch": 0.35508672131611163, "grad_norm": 168.11489868164062, "learning_rate": 8.193033304205213e-06, "loss": 11.1392, "step": 175780 }, { "epoch": 0.35510692194879545, "grad_norm": 423.18414306640625, "learning_rate": 8.192764678620961e-06, "loss": 25.4606, "step": 175790 }, { "epoch": 0.35512712258147927, "grad_norm": 371.9605407714844, "learning_rate": 8.192496037475562e-06, "loss": 21.4399, "step": 175800 }, { "epoch": 0.3551473232141631, "grad_norm": 260.40673828125, "learning_rate": 8.192227380770326e-06, "loss": 30.2611, "step": 175810 }, { "epoch": 0.3551675238468469, "grad_norm": 231.99053955078125, "learning_rate": 8.191958708506557e-06, "loss": 22.1698, "step": 175820 }, { "epoch": 0.3551877244795307, "grad_norm": 273.18145751953125, "learning_rate": 8.19169002068557e-06, "loss": 14.495, "step": 175830 }, { "epoch": 0.3552079251122145, "grad_norm": 222.3088836669922, "learning_rate": 8.191421317308674e-06, "loss": 7.9144, "step": 175840 }, { "epoch": 0.3552281257448983, "grad_norm": 329.75067138671875, "learning_rate": 8.191152598377179e-06, "loss": 23.5507, "step": 175850 }, { "epoch": 0.35524832637758214, "grad_norm": 128.2028350830078, "learning_rate": 8.19088386389239e-06, "loss": 21.9357, "step": 175860 }, { "epoch": 0.35526852701026596, "grad_norm": 0.0, "learning_rate": 8.19061511385562e-06, "loss": 13.032, "step": 175870 }, { "epoch": 0.3552887276429498, "grad_norm": 449.61065673828125, "learning_rate": 8.190346348268182e-06, "loss": 22.1488, "step": 175880 }, { "epoch": 0.3553089282756336, "grad_norm": 329.2639465332031, "learning_rate": 8.190077567131381e-06, "loss": 34.8015, "step": 175890 }, { "epoch": 0.3553291289083174, "grad_norm": 335.06671142578125, "learning_rate": 8.189808770446528e-06, "loss": 31.1617, "step": 175900 }, { "epoch": 0.35534932954100124, "grad_norm": 245.0072021484375, "learning_rate": 8.189539958214934e-06, "loss": 36.5704, "step": 175910 }, { "epoch": 0.35536953017368506, "grad_norm": 927.8746948242188, "learning_rate": 8.18927113043791e-06, "loss": 22.1905, "step": 175920 }, { "epoch": 0.3553897308063689, "grad_norm": 162.74734497070312, "learning_rate": 8.189002287116765e-06, "loss": 23.1583, "step": 175930 }, { "epoch": 0.3554099314390527, "grad_norm": 199.93133544921875, "learning_rate": 8.188733428252811e-06, "loss": 16.9274, "step": 175940 }, { "epoch": 0.35543013207173646, "grad_norm": 226.02700805664062, "learning_rate": 8.188464553847356e-06, "loss": 18.173, "step": 175950 }, { "epoch": 0.3554503327044203, "grad_norm": 137.37139892578125, "learning_rate": 8.18819566390171e-06, "loss": 13.6537, "step": 175960 }, { "epoch": 0.3554705333371041, "grad_norm": 553.63916015625, "learning_rate": 8.187926758417188e-06, "loss": 24.5758, "step": 175970 }, { "epoch": 0.3554907339697879, "grad_norm": 225.13082885742188, "learning_rate": 8.187657837395095e-06, "loss": 13.649, "step": 175980 }, { "epoch": 0.35551093460247174, "grad_norm": 323.3514404296875, "learning_rate": 8.187388900836745e-06, "loss": 22.7982, "step": 175990 }, { "epoch": 0.35553113523515556, "grad_norm": 315.59124755859375, "learning_rate": 8.18711994874345e-06, "loss": 13.6599, "step": 176000 }, { "epoch": 0.3555513358678394, "grad_norm": 88.8068618774414, "learning_rate": 8.186850981116516e-06, "loss": 14.2833, "step": 176010 }, { "epoch": 0.3555715365005232, "grad_norm": 227.50718688964844, "learning_rate": 8.18658199795726e-06, "loss": 13.7803, "step": 176020 }, { "epoch": 0.355591737133207, "grad_norm": 315.2069396972656, "learning_rate": 8.186312999266987e-06, "loss": 19.0751, "step": 176030 }, { "epoch": 0.35561193776589084, "grad_norm": 73.15437316894531, "learning_rate": 8.186043985047012e-06, "loss": 16.045, "step": 176040 }, { "epoch": 0.35563213839857466, "grad_norm": 233.13204956054688, "learning_rate": 8.185774955298645e-06, "loss": 22.5059, "step": 176050 }, { "epoch": 0.3556523390312585, "grad_norm": 887.4598388671875, "learning_rate": 8.185505910023196e-06, "loss": 15.9155, "step": 176060 }, { "epoch": 0.3556725396639423, "grad_norm": 376.2738952636719, "learning_rate": 8.18523684922198e-06, "loss": 18.6787, "step": 176070 }, { "epoch": 0.35569274029662606, "grad_norm": 923.0476684570312, "learning_rate": 8.184967772896304e-06, "loss": 22.9332, "step": 176080 }, { "epoch": 0.3557129409293099, "grad_norm": 354.8746337890625, "learning_rate": 8.184698681047482e-06, "loss": 15.8167, "step": 176090 }, { "epoch": 0.3557331415619937, "grad_norm": 372.5491027832031, "learning_rate": 8.184429573676825e-06, "loss": 21.0081, "step": 176100 }, { "epoch": 0.3557533421946775, "grad_norm": 125.72209930419922, "learning_rate": 8.184160450785645e-06, "loss": 27.6008, "step": 176110 }, { "epoch": 0.35577354282736134, "grad_norm": 286.2005615234375, "learning_rate": 8.183891312375251e-06, "loss": 18.7734, "step": 176120 }, { "epoch": 0.35579374346004516, "grad_norm": 143.1041717529297, "learning_rate": 8.18362215844696e-06, "loss": 22.5819, "step": 176130 }, { "epoch": 0.355813944092729, "grad_norm": 224.83644104003906, "learning_rate": 8.183352989002079e-06, "loss": 17.0005, "step": 176140 }, { "epoch": 0.3558341447254128, "grad_norm": 214.79086303710938, "learning_rate": 8.183083804041922e-06, "loss": 18.9911, "step": 176150 }, { "epoch": 0.3558543453580966, "grad_norm": 546.8751831054688, "learning_rate": 8.1828146035678e-06, "loss": 32.1151, "step": 176160 }, { "epoch": 0.35587454599078044, "grad_norm": 419.1285705566406, "learning_rate": 8.182545387581026e-06, "loss": 18.8873, "step": 176170 }, { "epoch": 0.35589474662346426, "grad_norm": 228.05996704101562, "learning_rate": 8.182276156082911e-06, "loss": 26.743, "step": 176180 }, { "epoch": 0.3559149472561481, "grad_norm": 637.60986328125, "learning_rate": 8.182006909074769e-06, "loss": 39.3111, "step": 176190 }, { "epoch": 0.3559351478888319, "grad_norm": 110.31604766845703, "learning_rate": 8.181737646557912e-06, "loss": 15.2449, "step": 176200 }, { "epoch": 0.35595534852151567, "grad_norm": 266.8143310546875, "learning_rate": 8.181468368533651e-06, "loss": 26.8028, "step": 176210 }, { "epoch": 0.3559755491541995, "grad_norm": 393.4587097167969, "learning_rate": 8.181199075003298e-06, "loss": 19.8652, "step": 176220 }, { "epoch": 0.3559957497868833, "grad_norm": 148.6682891845703, "learning_rate": 8.180929765968168e-06, "loss": 25.5401, "step": 176230 }, { "epoch": 0.35601595041956713, "grad_norm": 383.6908874511719, "learning_rate": 8.18066044142957e-06, "loss": 18.3912, "step": 176240 }, { "epoch": 0.35603615105225095, "grad_norm": 481.74847412109375, "learning_rate": 8.18039110138882e-06, "loss": 46.5433, "step": 176250 }, { "epoch": 0.35605635168493477, "grad_norm": 473.62237548828125, "learning_rate": 8.18012174584723e-06, "loss": 19.9649, "step": 176260 }, { "epoch": 0.3560765523176186, "grad_norm": 337.64031982421875, "learning_rate": 8.179852374806112e-06, "loss": 18.7701, "step": 176270 }, { "epoch": 0.3560967529503024, "grad_norm": 154.29649353027344, "learning_rate": 8.179582988266778e-06, "loss": 12.4532, "step": 176280 }, { "epoch": 0.35611695358298623, "grad_norm": 510.5845031738281, "learning_rate": 8.179313586230544e-06, "loss": 26.9538, "step": 176290 }, { "epoch": 0.35613715421567005, "grad_norm": 506.058837890625, "learning_rate": 8.179044168698722e-06, "loss": 41.5752, "step": 176300 }, { "epoch": 0.35615735484835387, "grad_norm": 2.1274006366729736, "learning_rate": 8.178774735672622e-06, "loss": 34.8584, "step": 176310 }, { "epoch": 0.3561775554810377, "grad_norm": 324.5058288574219, "learning_rate": 8.17850528715356e-06, "loss": 30.2117, "step": 176320 }, { "epoch": 0.3561977561137215, "grad_norm": 675.0393676757812, "learning_rate": 8.178235823142849e-06, "loss": 21.6079, "step": 176330 }, { "epoch": 0.3562179567464053, "grad_norm": 363.8622741699219, "learning_rate": 8.177966343641803e-06, "loss": 20.7453, "step": 176340 }, { "epoch": 0.3562381573790891, "grad_norm": 150.9447479248047, "learning_rate": 8.177696848651733e-06, "loss": 17.2247, "step": 176350 }, { "epoch": 0.3562583580117729, "grad_norm": 367.2740478515625, "learning_rate": 8.177427338173955e-06, "loss": 15.8243, "step": 176360 }, { "epoch": 0.35627855864445673, "grad_norm": 112.62686920166016, "learning_rate": 8.17715781220978e-06, "loss": 9.8631, "step": 176370 }, { "epoch": 0.35629875927714055, "grad_norm": 422.5419616699219, "learning_rate": 8.176888270760524e-06, "loss": 38.1294, "step": 176380 }, { "epoch": 0.3563189599098244, "grad_norm": 110.13074493408203, "learning_rate": 8.1766187138275e-06, "loss": 14.1147, "step": 176390 }, { "epoch": 0.3563391605425082, "grad_norm": 382.3007507324219, "learning_rate": 8.176349141412022e-06, "loss": 27.1379, "step": 176400 }, { "epoch": 0.356359361175192, "grad_norm": 617.1687622070312, "learning_rate": 8.176079553515403e-06, "loss": 21.4513, "step": 176410 }, { "epoch": 0.35637956180787583, "grad_norm": 383.61358642578125, "learning_rate": 8.175809950138958e-06, "loss": 18.3975, "step": 176420 }, { "epoch": 0.35639976244055965, "grad_norm": 883.5342407226562, "learning_rate": 8.175540331284e-06, "loss": 40.8268, "step": 176430 }, { "epoch": 0.3564199630732435, "grad_norm": 1106.4774169921875, "learning_rate": 8.175270696951846e-06, "loss": 24.8918, "step": 176440 }, { "epoch": 0.3564401637059273, "grad_norm": 262.6790771484375, "learning_rate": 8.175001047143804e-06, "loss": 15.9025, "step": 176450 }, { "epoch": 0.35646036433861106, "grad_norm": 217.76075744628906, "learning_rate": 8.174731381861194e-06, "loss": 26.5666, "step": 176460 }, { "epoch": 0.3564805649712949, "grad_norm": 0.0, "learning_rate": 8.174461701105328e-06, "loss": 43.5036, "step": 176470 }, { "epoch": 0.3565007656039787, "grad_norm": 164.65237426757812, "learning_rate": 8.17419200487752e-06, "loss": 18.7218, "step": 176480 }, { "epoch": 0.3565209662366625, "grad_norm": 110.66068267822266, "learning_rate": 8.173922293179086e-06, "loss": 14.1646, "step": 176490 }, { "epoch": 0.35654116686934634, "grad_norm": 307.12591552734375, "learning_rate": 8.173652566011339e-06, "loss": 25.6079, "step": 176500 }, { "epoch": 0.35656136750203016, "grad_norm": 323.90472412109375, "learning_rate": 8.173382823375594e-06, "loss": 22.5367, "step": 176510 }, { "epoch": 0.356581568134714, "grad_norm": 297.72802734375, "learning_rate": 8.173113065273167e-06, "loss": 10.2356, "step": 176520 }, { "epoch": 0.3566017687673978, "grad_norm": 150.96859741210938, "learning_rate": 8.17284329170537e-06, "loss": 19.0485, "step": 176530 }, { "epoch": 0.3566219694000816, "grad_norm": 296.5851135253906, "learning_rate": 8.172573502673522e-06, "loss": 12.0142, "step": 176540 }, { "epoch": 0.35664217003276544, "grad_norm": 115.306640625, "learning_rate": 8.172303698178934e-06, "loss": 12.6151, "step": 176550 }, { "epoch": 0.35666237066544926, "grad_norm": 278.1151123046875, "learning_rate": 8.172033878222924e-06, "loss": 16.9826, "step": 176560 }, { "epoch": 0.3566825712981331, "grad_norm": 417.2292785644531, "learning_rate": 8.171764042806804e-06, "loss": 17.308, "step": 176570 }, { "epoch": 0.3567027719308169, "grad_norm": 39.85641860961914, "learning_rate": 8.171494191931892e-06, "loss": 20.2227, "step": 176580 }, { "epoch": 0.35672297256350066, "grad_norm": 264.5662841796875, "learning_rate": 8.171224325599502e-06, "loss": 20.716, "step": 176590 }, { "epoch": 0.3567431731961845, "grad_norm": 360.0034484863281, "learning_rate": 8.170954443810947e-06, "loss": 21.1833, "step": 176600 }, { "epoch": 0.3567633738288683, "grad_norm": 375.4242248535156, "learning_rate": 8.170684546567546e-06, "loss": 42.1606, "step": 176610 }, { "epoch": 0.3567835744615521, "grad_norm": 209.0796356201172, "learning_rate": 8.170414633870617e-06, "loss": 35.0889, "step": 176620 }, { "epoch": 0.35680377509423594, "grad_norm": 228.99008178710938, "learning_rate": 8.170144705721465e-06, "loss": 29.2379, "step": 176630 }, { "epoch": 0.35682397572691976, "grad_norm": 312.22149658203125, "learning_rate": 8.169874762121416e-06, "loss": 15.5635, "step": 176640 }, { "epoch": 0.3568441763596036, "grad_norm": 166.72775268554688, "learning_rate": 8.169604803071783e-06, "loss": 14.9203, "step": 176650 }, { "epoch": 0.3568643769922874, "grad_norm": 404.24676513671875, "learning_rate": 8.169334828573878e-06, "loss": 14.8266, "step": 176660 }, { "epoch": 0.3568845776249712, "grad_norm": 306.0473327636719, "learning_rate": 8.16906483862902e-06, "loss": 15.9096, "step": 176670 }, { "epoch": 0.35690477825765504, "grad_norm": 600.3517456054688, "learning_rate": 8.168794833238523e-06, "loss": 33.9163, "step": 176680 }, { "epoch": 0.35692497889033886, "grad_norm": 186.2722625732422, "learning_rate": 8.168524812403707e-06, "loss": 20.8257, "step": 176690 }, { "epoch": 0.3569451795230227, "grad_norm": 165.3099365234375, "learning_rate": 8.168254776125883e-06, "loss": 27.5813, "step": 176700 }, { "epoch": 0.3569653801557065, "grad_norm": 295.9667663574219, "learning_rate": 8.167984724406371e-06, "loss": 12.7908, "step": 176710 }, { "epoch": 0.35698558078839027, "grad_norm": 159.37147521972656, "learning_rate": 8.167714657246486e-06, "loss": 19.8727, "step": 176720 }, { "epoch": 0.3570057814210741, "grad_norm": 25.18267250061035, "learning_rate": 8.167444574647542e-06, "loss": 16.8759, "step": 176730 }, { "epoch": 0.3570259820537579, "grad_norm": 363.8515625, "learning_rate": 8.16717447661086e-06, "loss": 21.4275, "step": 176740 }, { "epoch": 0.3570461826864417, "grad_norm": 180.76992797851562, "learning_rate": 8.166904363137752e-06, "loss": 21.5547, "step": 176750 }, { "epoch": 0.35706638331912555, "grad_norm": 439.1484375, "learning_rate": 8.166634234229535e-06, "loss": 22.1359, "step": 176760 }, { "epoch": 0.35708658395180937, "grad_norm": 343.101318359375, "learning_rate": 8.166364089887528e-06, "loss": 24.1444, "step": 176770 }, { "epoch": 0.3571067845844932, "grad_norm": 353.5688171386719, "learning_rate": 8.166093930113048e-06, "loss": 17.2133, "step": 176780 }, { "epoch": 0.357126985217177, "grad_norm": 181.92340087890625, "learning_rate": 8.16582375490741e-06, "loss": 21.315, "step": 176790 }, { "epoch": 0.3571471858498608, "grad_norm": 219.6976776123047, "learning_rate": 8.165553564271928e-06, "loss": 18.929, "step": 176800 }, { "epoch": 0.35716738648254465, "grad_norm": 216.3524169921875, "learning_rate": 8.165283358207924e-06, "loss": 22.4783, "step": 176810 }, { "epoch": 0.35718758711522847, "grad_norm": 150.197265625, "learning_rate": 8.165013136716714e-06, "loss": 16.4053, "step": 176820 }, { "epoch": 0.3572077877479123, "grad_norm": 702.2295532226562, "learning_rate": 8.164742899799612e-06, "loss": 24.8309, "step": 176830 }, { "epoch": 0.3572279883805961, "grad_norm": 404.8049011230469, "learning_rate": 8.164472647457937e-06, "loss": 10.358, "step": 176840 }, { "epoch": 0.35724818901327987, "grad_norm": 348.59307861328125, "learning_rate": 8.164202379693008e-06, "loss": 18.9069, "step": 176850 }, { "epoch": 0.3572683896459637, "grad_norm": 156.34878540039062, "learning_rate": 8.163932096506137e-06, "loss": 22.5644, "step": 176860 }, { "epoch": 0.3572885902786475, "grad_norm": 490.8922119140625, "learning_rate": 8.163661797898647e-06, "loss": 14.5632, "step": 176870 }, { "epoch": 0.35730879091133133, "grad_norm": 0.0, "learning_rate": 8.163391483871853e-06, "loss": 11.738, "step": 176880 }, { "epoch": 0.35732899154401515, "grad_norm": 262.0473327636719, "learning_rate": 8.163121154427073e-06, "loss": 16.9874, "step": 176890 }, { "epoch": 0.35734919217669897, "grad_norm": 127.74826049804688, "learning_rate": 8.162850809565623e-06, "loss": 20.8088, "step": 176900 }, { "epoch": 0.3573693928093828, "grad_norm": 323.52935791015625, "learning_rate": 8.162580449288822e-06, "loss": 19.569, "step": 176910 }, { "epoch": 0.3573895934420666, "grad_norm": 341.8924255371094, "learning_rate": 8.162310073597987e-06, "loss": 37.4541, "step": 176920 }, { "epoch": 0.35740979407475043, "grad_norm": 286.8242492675781, "learning_rate": 8.162039682494438e-06, "loss": 15.837, "step": 176930 }, { "epoch": 0.35742999470743425, "grad_norm": 202.81439208984375, "learning_rate": 8.16176927597949e-06, "loss": 20.7446, "step": 176940 }, { "epoch": 0.35745019534011807, "grad_norm": 388.888916015625, "learning_rate": 8.161498854054462e-06, "loss": 11.3683, "step": 176950 }, { "epoch": 0.3574703959728019, "grad_norm": 157.1527099609375, "learning_rate": 8.161228416720673e-06, "loss": 12.9921, "step": 176960 }, { "epoch": 0.3574905966054857, "grad_norm": 660.94970703125, "learning_rate": 8.160957963979438e-06, "loss": 21.556, "step": 176970 }, { "epoch": 0.3575107972381695, "grad_norm": 632.6655883789062, "learning_rate": 8.160687495832078e-06, "loss": 25.912, "step": 176980 }, { "epoch": 0.3575309978708533, "grad_norm": 272.7314453125, "learning_rate": 8.160417012279911e-06, "loss": 19.5816, "step": 176990 }, { "epoch": 0.3575511985035371, "grad_norm": 558.574462890625, "learning_rate": 8.160146513324256e-06, "loss": 17.094, "step": 177000 }, { "epoch": 0.35757139913622094, "grad_norm": 221.9307098388672, "learning_rate": 8.159875998966427e-06, "loss": 15.569, "step": 177010 }, { "epoch": 0.35759159976890476, "grad_norm": 190.47988891601562, "learning_rate": 8.15960546920775e-06, "loss": 19.9028, "step": 177020 }, { "epoch": 0.3576118004015886, "grad_norm": 541.5205688476562, "learning_rate": 8.159334924049536e-06, "loss": 21.5741, "step": 177030 }, { "epoch": 0.3576320010342724, "grad_norm": 125.38908386230469, "learning_rate": 8.159064363493106e-06, "loss": 22.344, "step": 177040 }, { "epoch": 0.3576522016669562, "grad_norm": 229.14401245117188, "learning_rate": 8.158793787539782e-06, "loss": 31.6825, "step": 177050 }, { "epoch": 0.35767240229964004, "grad_norm": 469.8580322265625, "learning_rate": 8.158523196190879e-06, "loss": 22.6719, "step": 177060 }, { "epoch": 0.35769260293232386, "grad_norm": 233.1326446533203, "learning_rate": 8.158252589447717e-06, "loss": 31.0353, "step": 177070 }, { "epoch": 0.3577128035650077, "grad_norm": 932.0514526367188, "learning_rate": 8.157981967311614e-06, "loss": 20.7697, "step": 177080 }, { "epoch": 0.3577330041976915, "grad_norm": 254.15057373046875, "learning_rate": 8.15771132978389e-06, "loss": 22.6798, "step": 177090 }, { "epoch": 0.35775320483037526, "grad_norm": 180.07411193847656, "learning_rate": 8.157440676865866e-06, "loss": 12.6404, "step": 177100 }, { "epoch": 0.3577734054630591, "grad_norm": 522.2327270507812, "learning_rate": 8.15717000855886e-06, "loss": 21.2127, "step": 177110 }, { "epoch": 0.3577936060957429, "grad_norm": 319.7325134277344, "learning_rate": 8.156899324864187e-06, "loss": 19.7357, "step": 177120 }, { "epoch": 0.3578138067284267, "grad_norm": 89.53311157226562, "learning_rate": 8.15662862578317e-06, "loss": 31.3576, "step": 177130 }, { "epoch": 0.35783400736111054, "grad_norm": 141.28158569335938, "learning_rate": 8.15635791131713e-06, "loss": 14.3895, "step": 177140 }, { "epoch": 0.35785420799379436, "grad_norm": 369.7592468261719, "learning_rate": 8.156087181467382e-06, "loss": 23.2178, "step": 177150 }, { "epoch": 0.3578744086264782, "grad_norm": 139.03793334960938, "learning_rate": 8.15581643623525e-06, "loss": 26.8406, "step": 177160 }, { "epoch": 0.357894609259162, "grad_norm": 167.70462036132812, "learning_rate": 8.155545675622049e-06, "loss": 19.3414, "step": 177170 }, { "epoch": 0.3579148098918458, "grad_norm": 239.5955810546875, "learning_rate": 8.155274899629104e-06, "loss": 11.719, "step": 177180 }, { "epoch": 0.35793501052452964, "grad_norm": 116.39954376220703, "learning_rate": 8.155004108257731e-06, "loss": 11.4954, "step": 177190 }, { "epoch": 0.35795521115721346, "grad_norm": 690.7243041992188, "learning_rate": 8.154733301509249e-06, "loss": 28.0211, "step": 177200 }, { "epoch": 0.3579754117898973, "grad_norm": 107.19574737548828, "learning_rate": 8.154462479384982e-06, "loss": 24.3833, "step": 177210 }, { "epoch": 0.3579956124225811, "grad_norm": 0.0, "learning_rate": 8.154191641886244e-06, "loss": 18.5619, "step": 177220 }, { "epoch": 0.35801581305526486, "grad_norm": 404.8437194824219, "learning_rate": 8.15392078901436e-06, "loss": 20.3156, "step": 177230 }, { "epoch": 0.3580360136879487, "grad_norm": 204.3350067138672, "learning_rate": 8.15364992077065e-06, "loss": 6.7456, "step": 177240 }, { "epoch": 0.3580562143206325, "grad_norm": 151.2241973876953, "learning_rate": 8.153379037156433e-06, "loss": 11.97, "step": 177250 }, { "epoch": 0.3580764149533163, "grad_norm": 175.3900909423828, "learning_rate": 8.153108138173027e-06, "loss": 19.2949, "step": 177260 }, { "epoch": 0.35809661558600014, "grad_norm": 415.1009216308594, "learning_rate": 8.152837223821755e-06, "loss": 18.273, "step": 177270 }, { "epoch": 0.35811681621868396, "grad_norm": 570.0031127929688, "learning_rate": 8.152566294103936e-06, "loss": 13.1457, "step": 177280 }, { "epoch": 0.3581370168513678, "grad_norm": 296.63922119140625, "learning_rate": 8.152295349020893e-06, "loss": 17.44, "step": 177290 }, { "epoch": 0.3581572174840516, "grad_norm": 273.9325256347656, "learning_rate": 8.152024388573945e-06, "loss": 22.9373, "step": 177300 }, { "epoch": 0.3581774181167354, "grad_norm": 8.390029907226562, "learning_rate": 8.15175341276441e-06, "loss": 27.4427, "step": 177310 }, { "epoch": 0.35819761874941924, "grad_norm": 270.2481689453125, "learning_rate": 8.151482421593613e-06, "loss": 29.5323, "step": 177320 }, { "epoch": 0.35821781938210306, "grad_norm": 44.932926177978516, "learning_rate": 8.151211415062872e-06, "loss": 25.6954, "step": 177330 }, { "epoch": 0.3582380200147869, "grad_norm": 217.44796752929688, "learning_rate": 8.15094039317351e-06, "loss": 27.2521, "step": 177340 }, { "epoch": 0.3582582206474707, "grad_norm": 322.80712890625, "learning_rate": 8.150669355926848e-06, "loss": 17.4832, "step": 177350 }, { "epoch": 0.35827842128015447, "grad_norm": 39.42262649536133, "learning_rate": 8.150398303324201e-06, "loss": 13.042, "step": 177360 }, { "epoch": 0.3582986219128383, "grad_norm": 87.11055755615234, "learning_rate": 8.150127235366897e-06, "loss": 14.2974, "step": 177370 }, { "epoch": 0.3583188225455221, "grad_norm": 76.55110168457031, "learning_rate": 8.149856152056257e-06, "loss": 26.908, "step": 177380 }, { "epoch": 0.35833902317820593, "grad_norm": 223.3442840576172, "learning_rate": 8.149585053393599e-06, "loss": 16.5849, "step": 177390 }, { "epoch": 0.35835922381088975, "grad_norm": 119.38973999023438, "learning_rate": 8.149313939380244e-06, "loss": 21.5998, "step": 177400 }, { "epoch": 0.35837942444357357, "grad_norm": 107.10466003417969, "learning_rate": 8.149042810017515e-06, "loss": 13.9512, "step": 177410 }, { "epoch": 0.3583996250762574, "grad_norm": 120.8878402709961, "learning_rate": 8.148771665306736e-06, "loss": 19.8157, "step": 177420 }, { "epoch": 0.3584198257089412, "grad_norm": 391.2931823730469, "learning_rate": 8.148500505249224e-06, "loss": 18.7689, "step": 177430 }, { "epoch": 0.35844002634162503, "grad_norm": 275.0185546875, "learning_rate": 8.1482293298463e-06, "loss": 24.1996, "step": 177440 }, { "epoch": 0.35846022697430885, "grad_norm": 422.6229248046875, "learning_rate": 8.147958139099292e-06, "loss": 29.3922, "step": 177450 }, { "epoch": 0.35848042760699267, "grad_norm": 154.3513641357422, "learning_rate": 8.147686933009515e-06, "loss": 17.5235, "step": 177460 }, { "epoch": 0.3585006282396765, "grad_norm": 1579.990234375, "learning_rate": 8.147415711578295e-06, "loss": 46.6266, "step": 177470 }, { "epoch": 0.3585208288723603, "grad_norm": 272.43731689453125, "learning_rate": 8.147144474806954e-06, "loss": 15.571, "step": 177480 }, { "epoch": 0.3585410295050441, "grad_norm": 389.5348205566406, "learning_rate": 8.14687322269681e-06, "loss": 21.6047, "step": 177490 }, { "epoch": 0.3585612301377279, "grad_norm": 249.24114990234375, "learning_rate": 8.146601955249187e-06, "loss": 12.6231, "step": 177500 }, { "epoch": 0.3585814307704117, "grad_norm": 500.5956115722656, "learning_rate": 8.14633067246541e-06, "loss": 25.8884, "step": 177510 }, { "epoch": 0.35860163140309553, "grad_norm": 402.0751953125, "learning_rate": 8.146059374346798e-06, "loss": 17.9025, "step": 177520 }, { "epoch": 0.35862183203577935, "grad_norm": 394.17034912109375, "learning_rate": 8.145788060894675e-06, "loss": 15.8957, "step": 177530 }, { "epoch": 0.3586420326684632, "grad_norm": 243.1011962890625, "learning_rate": 8.145516732110362e-06, "loss": 18.7353, "step": 177540 }, { "epoch": 0.358662233301147, "grad_norm": 185.15869140625, "learning_rate": 8.14524538799518e-06, "loss": 16.6319, "step": 177550 }, { "epoch": 0.3586824339338308, "grad_norm": 538.5538940429688, "learning_rate": 8.144974028550456e-06, "loss": 39.6256, "step": 177560 }, { "epoch": 0.35870263456651463, "grad_norm": 522.7257080078125, "learning_rate": 8.14470265377751e-06, "loss": 26.1518, "step": 177570 }, { "epoch": 0.35872283519919845, "grad_norm": 101.4979476928711, "learning_rate": 8.144431263677663e-06, "loss": 11.1302, "step": 177580 }, { "epoch": 0.3587430358318823, "grad_norm": 295.199951171875, "learning_rate": 8.144159858252241e-06, "loss": 12.3895, "step": 177590 }, { "epoch": 0.3587632364645661, "grad_norm": 405.6873474121094, "learning_rate": 8.143888437502565e-06, "loss": 28.0685, "step": 177600 }, { "epoch": 0.3587834370972499, "grad_norm": 347.684326171875, "learning_rate": 8.143617001429957e-06, "loss": 26.4165, "step": 177610 }, { "epoch": 0.3588036377299337, "grad_norm": 225.56626892089844, "learning_rate": 8.143345550035742e-06, "loss": 17.0122, "step": 177620 }, { "epoch": 0.3588238383626175, "grad_norm": 236.11630249023438, "learning_rate": 8.14307408332124e-06, "loss": 21.9522, "step": 177630 }, { "epoch": 0.3588440389953013, "grad_norm": 3678.4501953125, "learning_rate": 8.14280260128778e-06, "loss": 46.8544, "step": 177640 }, { "epoch": 0.35886423962798514, "grad_norm": 225.96859741210938, "learning_rate": 8.142531103936677e-06, "loss": 19.1811, "step": 177650 }, { "epoch": 0.35888444026066896, "grad_norm": 425.1288146972656, "learning_rate": 8.142259591269261e-06, "loss": 16.0799, "step": 177660 }, { "epoch": 0.3589046408933528, "grad_norm": 172.07049560546875, "learning_rate": 8.141988063286853e-06, "loss": 13.2955, "step": 177670 }, { "epoch": 0.3589248415260366, "grad_norm": 105.05752563476562, "learning_rate": 8.141716519990776e-06, "loss": 29.5382, "step": 177680 }, { "epoch": 0.3589450421587204, "grad_norm": 912.9052734375, "learning_rate": 8.141444961382353e-06, "loss": 32.276, "step": 177690 }, { "epoch": 0.35896524279140424, "grad_norm": 395.7779541015625, "learning_rate": 8.141173387462908e-06, "loss": 30.4266, "step": 177700 }, { "epoch": 0.35898544342408806, "grad_norm": 319.66900634765625, "learning_rate": 8.140901798233766e-06, "loss": 21.1216, "step": 177710 }, { "epoch": 0.3590056440567719, "grad_norm": 334.337890625, "learning_rate": 8.14063019369625e-06, "loss": 31.9455, "step": 177720 }, { "epoch": 0.3590258446894557, "grad_norm": 62.061004638671875, "learning_rate": 8.140358573851682e-06, "loss": 28.3737, "step": 177730 }, { "epoch": 0.35904604532213946, "grad_norm": 159.56768798828125, "learning_rate": 8.140086938701387e-06, "loss": 12.9285, "step": 177740 }, { "epoch": 0.3590662459548233, "grad_norm": 118.68318176269531, "learning_rate": 8.139815288246692e-06, "loss": 17.3272, "step": 177750 }, { "epoch": 0.3590864465875071, "grad_norm": 471.5908203125, "learning_rate": 8.139543622488914e-06, "loss": 33.0401, "step": 177760 }, { "epoch": 0.3591066472201909, "grad_norm": 547.989990234375, "learning_rate": 8.139271941429383e-06, "loss": 13.9508, "step": 177770 }, { "epoch": 0.35912684785287474, "grad_norm": 41.12980651855469, "learning_rate": 8.139000245069421e-06, "loss": 16.849, "step": 177780 }, { "epoch": 0.35914704848555856, "grad_norm": 335.8427734375, "learning_rate": 8.138728533410354e-06, "loss": 23.3907, "step": 177790 }, { "epoch": 0.3591672491182424, "grad_norm": 288.4302673339844, "learning_rate": 8.138456806453503e-06, "loss": 12.2277, "step": 177800 }, { "epoch": 0.3591874497509262, "grad_norm": 370.1208801269531, "learning_rate": 8.138185064200195e-06, "loss": 28.0126, "step": 177810 }, { "epoch": 0.35920765038361, "grad_norm": 624.81884765625, "learning_rate": 8.137913306651754e-06, "loss": 29.3944, "step": 177820 }, { "epoch": 0.35922785101629384, "grad_norm": 307.536376953125, "learning_rate": 8.137641533809503e-06, "loss": 20.5508, "step": 177830 }, { "epoch": 0.35924805164897766, "grad_norm": 412.4454345703125, "learning_rate": 8.137369745674769e-06, "loss": 16.5365, "step": 177840 }, { "epoch": 0.3592682522816615, "grad_norm": 319.1728820800781, "learning_rate": 8.137097942248875e-06, "loss": 27.5594, "step": 177850 }, { "epoch": 0.3592884529143453, "grad_norm": 41.05485534667969, "learning_rate": 8.136826123533144e-06, "loss": 9.4377, "step": 177860 }, { "epoch": 0.35930865354702907, "grad_norm": 80.19956970214844, "learning_rate": 8.136554289528906e-06, "loss": 18.4676, "step": 177870 }, { "epoch": 0.3593288541797129, "grad_norm": 618.17626953125, "learning_rate": 8.136282440237481e-06, "loss": 25.0548, "step": 177880 }, { "epoch": 0.3593490548123967, "grad_norm": 267.80023193359375, "learning_rate": 8.136010575660197e-06, "loss": 15.7565, "step": 177890 }, { "epoch": 0.3593692554450805, "grad_norm": 81.16761016845703, "learning_rate": 8.135738695798377e-06, "loss": 19.8086, "step": 177900 }, { "epoch": 0.35938945607776435, "grad_norm": 81.21318817138672, "learning_rate": 8.135466800653347e-06, "loss": 8.4235, "step": 177910 }, { "epoch": 0.35940965671044817, "grad_norm": 447.4490051269531, "learning_rate": 8.135194890226432e-06, "loss": 21.812, "step": 177920 }, { "epoch": 0.359429857343132, "grad_norm": 496.26336669921875, "learning_rate": 8.134922964518957e-06, "loss": 28.2013, "step": 177930 }, { "epoch": 0.3594500579758158, "grad_norm": 165.397705078125, "learning_rate": 8.134651023532249e-06, "loss": 24.8207, "step": 177940 }, { "epoch": 0.3594702586084996, "grad_norm": 153.73236083984375, "learning_rate": 8.13437906726763e-06, "loss": 20.5578, "step": 177950 }, { "epoch": 0.35949045924118345, "grad_norm": 514.3815307617188, "learning_rate": 8.13410709572643e-06, "loss": 41.7923, "step": 177960 }, { "epoch": 0.35951065987386727, "grad_norm": 235.31784057617188, "learning_rate": 8.13383510890997e-06, "loss": 10.1691, "step": 177970 }, { "epoch": 0.3595308605065511, "grad_norm": 375.8195495605469, "learning_rate": 8.133563106819579e-06, "loss": 20.8949, "step": 177980 }, { "epoch": 0.3595510611392349, "grad_norm": 368.558837890625, "learning_rate": 8.13329108945658e-06, "loss": 15.6579, "step": 177990 }, { "epoch": 0.35957126177191867, "grad_norm": 418.9250183105469, "learning_rate": 8.133019056822303e-06, "loss": 21.4967, "step": 178000 }, { "epoch": 0.3595914624046025, "grad_norm": 272.7425231933594, "learning_rate": 8.132747008918069e-06, "loss": 30.6576, "step": 178010 }, { "epoch": 0.3596116630372863, "grad_norm": 124.95350646972656, "learning_rate": 8.132474945745207e-06, "loss": 23.6846, "step": 178020 }, { "epoch": 0.35963186366997013, "grad_norm": 12.834783554077148, "learning_rate": 8.132202867305043e-06, "loss": 12.2226, "step": 178030 }, { "epoch": 0.35965206430265395, "grad_norm": 438.0843200683594, "learning_rate": 8.1319307735989e-06, "loss": 28.5289, "step": 178040 }, { "epoch": 0.35967226493533777, "grad_norm": 708.1385498046875, "learning_rate": 8.131658664628108e-06, "loss": 20.9409, "step": 178050 }, { "epoch": 0.3596924655680216, "grad_norm": 289.4418640136719, "learning_rate": 8.131386540393991e-06, "loss": 20.7532, "step": 178060 }, { "epoch": 0.3597126662007054, "grad_norm": 1.7920266389846802, "learning_rate": 8.131114400897874e-06, "loss": 15.1668, "step": 178070 }, { "epoch": 0.35973286683338923, "grad_norm": 369.3517150878906, "learning_rate": 8.130842246141086e-06, "loss": 23.9419, "step": 178080 }, { "epoch": 0.35975306746607305, "grad_norm": 221.20957946777344, "learning_rate": 8.130570076124954e-06, "loss": 18.8577, "step": 178090 }, { "epoch": 0.35977326809875687, "grad_norm": 154.10816955566406, "learning_rate": 8.130297890850803e-06, "loss": 21.0588, "step": 178100 }, { "epoch": 0.3597934687314407, "grad_norm": 134.4233856201172, "learning_rate": 8.130025690319958e-06, "loss": 34.7971, "step": 178110 }, { "epoch": 0.3598136693641245, "grad_norm": 179.65940856933594, "learning_rate": 8.129753474533749e-06, "loss": 19.7977, "step": 178120 }, { "epoch": 0.3598338699968083, "grad_norm": 64.34607696533203, "learning_rate": 8.1294812434935e-06, "loss": 20.0377, "step": 178130 }, { "epoch": 0.3598540706294921, "grad_norm": 154.5516357421875, "learning_rate": 8.129208997200539e-06, "loss": 16.8429, "step": 178140 }, { "epoch": 0.3598742712621759, "grad_norm": 405.3206787109375, "learning_rate": 8.128936735656195e-06, "loss": 19.1083, "step": 178150 }, { "epoch": 0.35989447189485974, "grad_norm": 239.75535583496094, "learning_rate": 8.12866445886179e-06, "loss": 16.9172, "step": 178160 }, { "epoch": 0.35991467252754356, "grad_norm": 261.7799377441406, "learning_rate": 8.128392166818655e-06, "loss": 14.2787, "step": 178170 }, { "epoch": 0.3599348731602274, "grad_norm": 262.73846435546875, "learning_rate": 8.128119859528116e-06, "loss": 7.0851, "step": 178180 }, { "epoch": 0.3599550737929112, "grad_norm": 172.008544921875, "learning_rate": 8.127847536991501e-06, "loss": 27.7313, "step": 178190 }, { "epoch": 0.359975274425595, "grad_norm": 404.06207275390625, "learning_rate": 8.127575199210136e-06, "loss": 25.9626, "step": 178200 }, { "epoch": 0.35999547505827884, "grad_norm": 332.434814453125, "learning_rate": 8.127302846185348e-06, "loss": 17.3455, "step": 178210 }, { "epoch": 0.36001567569096266, "grad_norm": 251.35398864746094, "learning_rate": 8.127030477918466e-06, "loss": 26.1134, "step": 178220 }, { "epoch": 0.3600358763236465, "grad_norm": 194.0442657470703, "learning_rate": 8.126758094410816e-06, "loss": 18.9469, "step": 178230 }, { "epoch": 0.3600560769563303, "grad_norm": 171.9292449951172, "learning_rate": 8.126485695663728e-06, "loss": 16.4982, "step": 178240 }, { "epoch": 0.3600762775890141, "grad_norm": 477.8019714355469, "learning_rate": 8.126213281678527e-06, "loss": 32.4806, "step": 178250 }, { "epoch": 0.3600964782216979, "grad_norm": 236.08116149902344, "learning_rate": 8.12594085245654e-06, "loss": 19.2366, "step": 178260 }, { "epoch": 0.3601166788543817, "grad_norm": 307.8812561035156, "learning_rate": 8.1256684079991e-06, "loss": 13.6259, "step": 178270 }, { "epoch": 0.3601368794870655, "grad_norm": 292.23101806640625, "learning_rate": 8.125395948307528e-06, "loss": 13.6197, "step": 178280 }, { "epoch": 0.36015708011974934, "grad_norm": 286.25543212890625, "learning_rate": 8.125123473383156e-06, "loss": 17.982, "step": 178290 }, { "epoch": 0.36017728075243316, "grad_norm": 279.8094482421875, "learning_rate": 8.124850983227313e-06, "loss": 30.5059, "step": 178300 }, { "epoch": 0.360197481385117, "grad_norm": 249.96014404296875, "learning_rate": 8.124578477841323e-06, "loss": 15.1226, "step": 178310 }, { "epoch": 0.3602176820178008, "grad_norm": 173.1991729736328, "learning_rate": 8.124305957226518e-06, "loss": 16.8638, "step": 178320 }, { "epoch": 0.3602378826504846, "grad_norm": 311.6595153808594, "learning_rate": 8.124033421384223e-06, "loss": 12.6041, "step": 178330 }, { "epoch": 0.36025808328316844, "grad_norm": 301.28729248046875, "learning_rate": 8.12376087031577e-06, "loss": 27.2433, "step": 178340 }, { "epoch": 0.36027828391585226, "grad_norm": 126.6536865234375, "learning_rate": 8.123488304022485e-06, "loss": 6.031, "step": 178350 }, { "epoch": 0.3602984845485361, "grad_norm": 190.41201782226562, "learning_rate": 8.123215722505695e-06, "loss": 32.0682, "step": 178360 }, { "epoch": 0.3603186851812199, "grad_norm": 168.0041961669922, "learning_rate": 8.122943125766731e-06, "loss": 26.1911, "step": 178370 }, { "epoch": 0.36033888581390366, "grad_norm": 70.51453399658203, "learning_rate": 8.122670513806924e-06, "loss": 31.5093, "step": 178380 }, { "epoch": 0.3603590864465875, "grad_norm": 304.33245849609375, "learning_rate": 8.122397886627596e-06, "loss": 28.6152, "step": 178390 }, { "epoch": 0.3603792870792713, "grad_norm": 233.43251037597656, "learning_rate": 8.12212524423008e-06, "loss": 12.1088, "step": 178400 }, { "epoch": 0.3603994877119551, "grad_norm": 710.3818969726562, "learning_rate": 8.121852586615705e-06, "loss": 26.966, "step": 178410 }, { "epoch": 0.36041968834463894, "grad_norm": 107.73423767089844, "learning_rate": 8.121579913785799e-06, "loss": 23.78, "step": 178420 }, { "epoch": 0.36043988897732276, "grad_norm": 186.5164031982422, "learning_rate": 8.12130722574169e-06, "loss": 11.9608, "step": 178430 }, { "epoch": 0.3604600896100066, "grad_norm": 0.7160611152648926, "learning_rate": 8.12103452248471e-06, "loss": 18.2533, "step": 178440 }, { "epoch": 0.3604802902426904, "grad_norm": 47.80165100097656, "learning_rate": 8.120761804016186e-06, "loss": 9.6821, "step": 178450 }, { "epoch": 0.3605004908753742, "grad_norm": 161.3666229248047, "learning_rate": 8.120489070337448e-06, "loss": 27.2322, "step": 178460 }, { "epoch": 0.36052069150805804, "grad_norm": 372.595947265625, "learning_rate": 8.120216321449823e-06, "loss": 15.2317, "step": 178470 }, { "epoch": 0.36054089214074186, "grad_norm": 255.07553100585938, "learning_rate": 8.119943557354641e-06, "loss": 12.8705, "step": 178480 }, { "epoch": 0.3605610927734257, "grad_norm": 165.1239013671875, "learning_rate": 8.119670778053236e-06, "loss": 23.8745, "step": 178490 }, { "epoch": 0.3605812934061095, "grad_norm": 273.04998779296875, "learning_rate": 8.119397983546932e-06, "loss": 11.4876, "step": 178500 }, { "epoch": 0.36060149403879327, "grad_norm": 263.959228515625, "learning_rate": 8.119125173837061e-06, "loss": 14.5232, "step": 178510 }, { "epoch": 0.3606216946714771, "grad_norm": 170.6798553466797, "learning_rate": 8.118852348924951e-06, "loss": 24.4867, "step": 178520 }, { "epoch": 0.3606418953041609, "grad_norm": 151.57278442382812, "learning_rate": 8.118579508811934e-06, "loss": 31.5987, "step": 178530 }, { "epoch": 0.36066209593684473, "grad_norm": 144.7301025390625, "learning_rate": 8.118306653499339e-06, "loss": 31.7075, "step": 178540 }, { "epoch": 0.36068229656952855, "grad_norm": 371.1591491699219, "learning_rate": 8.118033782988496e-06, "loss": 20.1079, "step": 178550 }, { "epoch": 0.36070249720221237, "grad_norm": 379.10894775390625, "learning_rate": 8.117760897280733e-06, "loss": 40.2178, "step": 178560 }, { "epoch": 0.3607226978348962, "grad_norm": 0.0, "learning_rate": 8.117487996377383e-06, "loss": 33.1215, "step": 178570 }, { "epoch": 0.36074289846758, "grad_norm": 368.7367248535156, "learning_rate": 8.117215080279774e-06, "loss": 24.4064, "step": 178580 }, { "epoch": 0.36076309910026383, "grad_norm": 315.78936767578125, "learning_rate": 8.116942148989238e-06, "loss": 28.3929, "step": 178590 }, { "epoch": 0.36078329973294765, "grad_norm": 280.503173828125, "learning_rate": 8.116669202507102e-06, "loss": 36.6946, "step": 178600 }, { "epoch": 0.36080350036563147, "grad_norm": 1003.7651977539062, "learning_rate": 8.116396240834699e-06, "loss": 21.8953, "step": 178610 }, { "epoch": 0.3608237009983153, "grad_norm": 34.07648468017578, "learning_rate": 8.116123263973359e-06, "loss": 21.8889, "step": 178620 }, { "epoch": 0.3608439016309991, "grad_norm": 215.9710235595703, "learning_rate": 8.11585027192441e-06, "loss": 17.5469, "step": 178630 }, { "epoch": 0.3608641022636829, "grad_norm": 275.6328125, "learning_rate": 8.115577264689188e-06, "loss": 18.9846, "step": 178640 }, { "epoch": 0.3608843028963667, "grad_norm": 157.8005828857422, "learning_rate": 8.115304242269017e-06, "loss": 27.4687, "step": 178650 }, { "epoch": 0.3609045035290505, "grad_norm": 150.2907257080078, "learning_rate": 8.115031204665233e-06, "loss": 12.099, "step": 178660 }, { "epoch": 0.36092470416173433, "grad_norm": 434.804443359375, "learning_rate": 8.114758151879164e-06, "loss": 18.1176, "step": 178670 }, { "epoch": 0.36094490479441815, "grad_norm": 458.3905334472656, "learning_rate": 8.114485083912143e-06, "loss": 15.0536, "step": 178680 }, { "epoch": 0.360965105427102, "grad_norm": 591.88720703125, "learning_rate": 8.1142120007655e-06, "loss": 26.6359, "step": 178690 }, { "epoch": 0.3609853060597858, "grad_norm": 374.6562805175781, "learning_rate": 8.113938902440563e-06, "loss": 29.0822, "step": 178700 }, { "epoch": 0.3610055066924696, "grad_norm": 508.7466735839844, "learning_rate": 8.113665788938667e-06, "loss": 14.764, "step": 178710 }, { "epoch": 0.36102570732515343, "grad_norm": 224.59512329101562, "learning_rate": 8.113392660261141e-06, "loss": 17.9259, "step": 178720 }, { "epoch": 0.36104590795783725, "grad_norm": 302.5039367675781, "learning_rate": 8.113119516409315e-06, "loss": 17.0396, "step": 178730 }, { "epoch": 0.3610661085905211, "grad_norm": 177.86036682128906, "learning_rate": 8.112846357384526e-06, "loss": 9.0924, "step": 178740 }, { "epoch": 0.3610863092232049, "grad_norm": 268.8063049316406, "learning_rate": 8.112573183188099e-06, "loss": 27.7914, "step": 178750 }, { "epoch": 0.3611065098558887, "grad_norm": 307.8240051269531, "learning_rate": 8.112299993821366e-06, "loss": 21.5118, "step": 178760 }, { "epoch": 0.3611267104885725, "grad_norm": 296.3933410644531, "learning_rate": 8.112026789285664e-06, "loss": 15.8793, "step": 178770 }, { "epoch": 0.3611469111212563, "grad_norm": 406.0406799316406, "learning_rate": 8.111753569582319e-06, "loss": 26.0626, "step": 178780 }, { "epoch": 0.3611671117539401, "grad_norm": 438.4875183105469, "learning_rate": 8.111480334712664e-06, "loss": 18.9998, "step": 178790 }, { "epoch": 0.36118731238662394, "grad_norm": 216.7554473876953, "learning_rate": 8.111207084678033e-06, "loss": 16.2508, "step": 178800 }, { "epoch": 0.36120751301930776, "grad_norm": 120.41849517822266, "learning_rate": 8.110933819479754e-06, "loss": 13.2047, "step": 178810 }, { "epoch": 0.3612277136519916, "grad_norm": 163.44383239746094, "learning_rate": 8.110660539119163e-06, "loss": 5.2167, "step": 178820 }, { "epoch": 0.3612479142846754, "grad_norm": 418.9299621582031, "learning_rate": 8.110387243597588e-06, "loss": 23.6331, "step": 178830 }, { "epoch": 0.3612681149173592, "grad_norm": 291.6322326660156, "learning_rate": 8.110113932916363e-06, "loss": 18.2262, "step": 178840 }, { "epoch": 0.36128831555004304, "grad_norm": 100.39775848388672, "learning_rate": 8.10984060707682e-06, "loss": 9.8939, "step": 178850 }, { "epoch": 0.36130851618272686, "grad_norm": 201.0769805908203, "learning_rate": 8.109567266080292e-06, "loss": 32.9718, "step": 178860 }, { "epoch": 0.3613287168154107, "grad_norm": 332.34979248046875, "learning_rate": 8.109293909928111e-06, "loss": 24.4754, "step": 178870 }, { "epoch": 0.3613489174480945, "grad_norm": 141.46893310546875, "learning_rate": 8.109020538621607e-06, "loss": 30.8438, "step": 178880 }, { "epoch": 0.3613691180807783, "grad_norm": 367.4607238769531, "learning_rate": 8.108747152162113e-06, "loss": 11.1331, "step": 178890 }, { "epoch": 0.3613893187134621, "grad_norm": 193.5831298828125, "learning_rate": 8.108473750550965e-06, "loss": 24.8769, "step": 178900 }, { "epoch": 0.3614095193461459, "grad_norm": 511.9454650878906, "learning_rate": 8.10820033378949e-06, "loss": 24.8657, "step": 178910 }, { "epoch": 0.3614297199788297, "grad_norm": 739.9171142578125, "learning_rate": 8.107926901879027e-06, "loss": 25.2875, "step": 178920 }, { "epoch": 0.36144992061151354, "grad_norm": 173.3311309814453, "learning_rate": 8.107653454820902e-06, "loss": 9.0723, "step": 178930 }, { "epoch": 0.36147012124419736, "grad_norm": 313.6308288574219, "learning_rate": 8.107379992616453e-06, "loss": 36.7586, "step": 178940 }, { "epoch": 0.3614903218768812, "grad_norm": 294.92852783203125, "learning_rate": 8.10710651526701e-06, "loss": 13.1715, "step": 178950 }, { "epoch": 0.361510522509565, "grad_norm": 265.9114685058594, "learning_rate": 8.106833022773908e-06, "loss": 17.5056, "step": 178960 }, { "epoch": 0.3615307231422488, "grad_norm": 240.9871063232422, "learning_rate": 8.106559515138477e-06, "loss": 13.4077, "step": 178970 }, { "epoch": 0.36155092377493264, "grad_norm": 378.0356750488281, "learning_rate": 8.106285992362052e-06, "loss": 15.2255, "step": 178980 }, { "epoch": 0.36157112440761646, "grad_norm": 286.4148864746094, "learning_rate": 8.106012454445966e-06, "loss": 28.6605, "step": 178990 }, { "epoch": 0.3615913250403003, "grad_norm": 336.7850646972656, "learning_rate": 8.105738901391553e-06, "loss": 25.3096, "step": 179000 }, { "epoch": 0.3616115256729841, "grad_norm": 301.5975646972656, "learning_rate": 8.105465333200144e-06, "loss": 21.3966, "step": 179010 }, { "epoch": 0.36163172630566787, "grad_norm": 692.1522216796875, "learning_rate": 8.105191749873075e-06, "loss": 28.2927, "step": 179020 }, { "epoch": 0.3616519269383517, "grad_norm": 53.17116928100586, "learning_rate": 8.104918151411676e-06, "loss": 13.9719, "step": 179030 }, { "epoch": 0.3616721275710355, "grad_norm": 599.1129150390625, "learning_rate": 8.104644537817284e-06, "loss": 41.971, "step": 179040 }, { "epoch": 0.3616923282037193, "grad_norm": 254.13145446777344, "learning_rate": 8.10437090909123e-06, "loss": 28.6401, "step": 179050 }, { "epoch": 0.36171252883640315, "grad_norm": 205.14694213867188, "learning_rate": 8.10409726523485e-06, "loss": 15.237, "step": 179060 }, { "epoch": 0.36173272946908697, "grad_norm": 340.8719482421875, "learning_rate": 8.103823606249476e-06, "loss": 18.8731, "step": 179070 }, { "epoch": 0.3617529301017708, "grad_norm": 309.194091796875, "learning_rate": 8.103549932136442e-06, "loss": 12.9702, "step": 179080 }, { "epoch": 0.3617731307344546, "grad_norm": 275.0919494628906, "learning_rate": 8.10327624289708e-06, "loss": 19.3744, "step": 179090 }, { "epoch": 0.3617933313671384, "grad_norm": 80.95147705078125, "learning_rate": 8.103002538532729e-06, "loss": 15.834, "step": 179100 }, { "epoch": 0.36181353199982225, "grad_norm": 251.77972412109375, "learning_rate": 8.102728819044718e-06, "loss": 26.2745, "step": 179110 }, { "epoch": 0.36183373263250607, "grad_norm": 643.5350341796875, "learning_rate": 8.102455084434385e-06, "loss": 38.1389, "step": 179120 }, { "epoch": 0.3618539332651899, "grad_norm": 379.8313293457031, "learning_rate": 8.102181334703061e-06, "loss": 11.8509, "step": 179130 }, { "epoch": 0.3618741338978737, "grad_norm": 83.8715591430664, "learning_rate": 8.101907569852081e-06, "loss": 8.7854, "step": 179140 }, { "epoch": 0.36189433453055747, "grad_norm": 102.44625091552734, "learning_rate": 8.101633789882781e-06, "loss": 17.9957, "step": 179150 }, { "epoch": 0.3619145351632413, "grad_norm": 417.5524597167969, "learning_rate": 8.101359994796494e-06, "loss": 20.7315, "step": 179160 }, { "epoch": 0.3619347357959251, "grad_norm": 226.74754333496094, "learning_rate": 8.101086184594554e-06, "loss": 15.6651, "step": 179170 }, { "epoch": 0.36195493642860893, "grad_norm": 15.949031829833984, "learning_rate": 8.100812359278294e-06, "loss": 10.568, "step": 179180 }, { "epoch": 0.36197513706129275, "grad_norm": 254.84605407714844, "learning_rate": 8.100538518849053e-06, "loss": 13.5963, "step": 179190 }, { "epoch": 0.36199533769397657, "grad_norm": 228.72442626953125, "learning_rate": 8.100264663308165e-06, "loss": 13.5986, "step": 179200 }, { "epoch": 0.3620155383266604, "grad_norm": 318.988525390625, "learning_rate": 8.09999079265696e-06, "loss": 16.5834, "step": 179210 }, { "epoch": 0.3620357389593442, "grad_norm": 210.2606964111328, "learning_rate": 8.099716906896776e-06, "loss": 40.0313, "step": 179220 }, { "epoch": 0.36205593959202803, "grad_norm": 214.91162109375, "learning_rate": 8.099443006028948e-06, "loss": 23.4077, "step": 179230 }, { "epoch": 0.36207614022471185, "grad_norm": 585.205810546875, "learning_rate": 8.099169090054812e-06, "loss": 21.4609, "step": 179240 }, { "epoch": 0.36209634085739567, "grad_norm": 478.70654296875, "learning_rate": 8.098895158975702e-06, "loss": 22.41, "step": 179250 }, { "epoch": 0.3621165414900795, "grad_norm": 457.42181396484375, "learning_rate": 8.098621212792952e-06, "loss": 20.5292, "step": 179260 }, { "epoch": 0.3621367421227633, "grad_norm": 8153.09814453125, "learning_rate": 8.098347251507896e-06, "loss": 55.5916, "step": 179270 }, { "epoch": 0.3621569427554471, "grad_norm": 198.93299865722656, "learning_rate": 8.098073275121876e-06, "loss": 17.4809, "step": 179280 }, { "epoch": 0.3621771433881309, "grad_norm": 238.85089111328125, "learning_rate": 8.097799283636219e-06, "loss": 20.1875, "step": 179290 }, { "epoch": 0.3621973440208147, "grad_norm": 377.8941955566406, "learning_rate": 8.097525277052265e-06, "loss": 15.8006, "step": 179300 }, { "epoch": 0.36221754465349854, "grad_norm": 475.4827880859375, "learning_rate": 8.097251255371347e-06, "loss": 23.9995, "step": 179310 }, { "epoch": 0.36223774528618236, "grad_norm": 211.14712524414062, "learning_rate": 8.096977218594803e-06, "loss": 20.1365, "step": 179320 }, { "epoch": 0.3622579459188662, "grad_norm": 139.49240112304688, "learning_rate": 8.09670316672397e-06, "loss": 25.3475, "step": 179330 }, { "epoch": 0.36227814655155, "grad_norm": 328.6842346191406, "learning_rate": 8.096429099760176e-06, "loss": 15.6569, "step": 179340 }, { "epoch": 0.3622983471842338, "grad_norm": 250.64974975585938, "learning_rate": 8.096155017704768e-06, "loss": 13.4918, "step": 179350 }, { "epoch": 0.36231854781691764, "grad_norm": 568.8255615234375, "learning_rate": 8.095880920559073e-06, "loss": 25.8204, "step": 179360 }, { "epoch": 0.36233874844960146, "grad_norm": 590.7182006835938, "learning_rate": 8.09560680832443e-06, "loss": 29.403, "step": 179370 }, { "epoch": 0.3623589490822853, "grad_norm": 440.53619384765625, "learning_rate": 8.095332681002175e-06, "loss": 32.4021, "step": 179380 }, { "epoch": 0.3623791497149691, "grad_norm": 104.9167251586914, "learning_rate": 8.095058538593645e-06, "loss": 13.8946, "step": 179390 }, { "epoch": 0.3623993503476529, "grad_norm": 487.217041015625, "learning_rate": 8.094784381100174e-06, "loss": 18.0514, "step": 179400 }, { "epoch": 0.3624195509803367, "grad_norm": 336.1904296875, "learning_rate": 8.094510208523099e-06, "loss": 32.6895, "step": 179410 }, { "epoch": 0.3624397516130205, "grad_norm": 348.8056335449219, "learning_rate": 8.094236020863758e-06, "loss": 27.8041, "step": 179420 }, { "epoch": 0.3624599522457043, "grad_norm": 779.1187744140625, "learning_rate": 8.093961818123483e-06, "loss": 24.2736, "step": 179430 }, { "epoch": 0.36248015287838814, "grad_norm": 270.6576843261719, "learning_rate": 8.093687600303616e-06, "loss": 12.3511, "step": 179440 }, { "epoch": 0.36250035351107196, "grad_norm": 310.16229248046875, "learning_rate": 8.09341336740549e-06, "loss": 13.923, "step": 179450 }, { "epoch": 0.3625205541437558, "grad_norm": 248.34266662597656, "learning_rate": 8.093139119430442e-06, "loss": 20.8611, "step": 179460 }, { "epoch": 0.3625407547764396, "grad_norm": 119.07149505615234, "learning_rate": 8.09286485637981e-06, "loss": 20.6115, "step": 179470 }, { "epoch": 0.3625609554091234, "grad_norm": 399.3204040527344, "learning_rate": 8.092590578254931e-06, "loss": 27.2081, "step": 179480 }, { "epoch": 0.36258115604180724, "grad_norm": 600.7573852539062, "learning_rate": 8.092316285057138e-06, "loss": 21.749, "step": 179490 }, { "epoch": 0.36260135667449106, "grad_norm": 459.50347900390625, "learning_rate": 8.092041976787772e-06, "loss": 20.2971, "step": 179500 }, { "epoch": 0.3626215573071749, "grad_norm": 401.5998229980469, "learning_rate": 8.091767653448169e-06, "loss": 20.64, "step": 179510 }, { "epoch": 0.3626417579398587, "grad_norm": 655.1898803710938, "learning_rate": 8.091493315039662e-06, "loss": 32.7677, "step": 179520 }, { "epoch": 0.36266195857254246, "grad_norm": 258.7969970703125, "learning_rate": 8.091218961563593e-06, "loss": 13.6186, "step": 179530 }, { "epoch": 0.3626821592052263, "grad_norm": 395.3252258300781, "learning_rate": 8.0909445930213e-06, "loss": 24.8892, "step": 179540 }, { "epoch": 0.3627023598379101, "grad_norm": 1308.294677734375, "learning_rate": 8.090670209414117e-06, "loss": 19.8494, "step": 179550 }, { "epoch": 0.3627225604705939, "grad_norm": 316.4848327636719, "learning_rate": 8.090395810743382e-06, "loss": 20.3628, "step": 179560 }, { "epoch": 0.36274276110327774, "grad_norm": 252.91949462890625, "learning_rate": 8.090121397010432e-06, "loss": 16.3285, "step": 179570 }, { "epoch": 0.36276296173596156, "grad_norm": 148.73690795898438, "learning_rate": 8.089846968216605e-06, "loss": 21.2634, "step": 179580 }, { "epoch": 0.3627831623686454, "grad_norm": 133.06092834472656, "learning_rate": 8.08957252436324e-06, "loss": 10.9878, "step": 179590 }, { "epoch": 0.3628033630013292, "grad_norm": 0.0, "learning_rate": 8.089298065451673e-06, "loss": 16.2394, "step": 179600 }, { "epoch": 0.362823563634013, "grad_norm": 45.15746307373047, "learning_rate": 8.08902359148324e-06, "loss": 24.4485, "step": 179610 }, { "epoch": 0.36284376426669684, "grad_norm": 297.2760314941406, "learning_rate": 8.088749102459284e-06, "loss": 26.2992, "step": 179620 }, { "epoch": 0.36286396489938066, "grad_norm": 109.16390228271484, "learning_rate": 8.088474598381134e-06, "loss": 15.4871, "step": 179630 }, { "epoch": 0.3628841655320645, "grad_norm": 278.6034240722656, "learning_rate": 8.088200079250139e-06, "loss": 19.4618, "step": 179640 }, { "epoch": 0.3629043661647483, "grad_norm": 268.61273193359375, "learning_rate": 8.08792554506763e-06, "loss": 14.559, "step": 179650 }, { "epoch": 0.36292456679743207, "grad_norm": 363.05902099609375, "learning_rate": 8.087650995834946e-06, "loss": 11.5801, "step": 179660 }, { "epoch": 0.3629447674301159, "grad_norm": 113.53397369384766, "learning_rate": 8.087376431553425e-06, "loss": 23.9318, "step": 179670 }, { "epoch": 0.3629649680627997, "grad_norm": 682.62255859375, "learning_rate": 8.087101852224406e-06, "loss": 17.3047, "step": 179680 }, { "epoch": 0.36298516869548353, "grad_norm": 0.0, "learning_rate": 8.086827257849225e-06, "loss": 14.565, "step": 179690 }, { "epoch": 0.36300536932816735, "grad_norm": 259.8128967285156, "learning_rate": 8.086552648429225e-06, "loss": 13.3646, "step": 179700 }, { "epoch": 0.36302556996085117, "grad_norm": 242.26133728027344, "learning_rate": 8.08627802396574e-06, "loss": 22.2276, "step": 179710 }, { "epoch": 0.363045770593535, "grad_norm": 382.8158874511719, "learning_rate": 8.086003384460113e-06, "loss": 23.9029, "step": 179720 }, { "epoch": 0.3630659712262188, "grad_norm": 200.7357635498047, "learning_rate": 8.085728729913677e-06, "loss": 14.8566, "step": 179730 }, { "epoch": 0.36308617185890263, "grad_norm": 473.85858154296875, "learning_rate": 8.085454060327775e-06, "loss": 20.0617, "step": 179740 }, { "epoch": 0.36310637249158645, "grad_norm": 316.0264587402344, "learning_rate": 8.085179375703745e-06, "loss": 32.3603, "step": 179750 }, { "epoch": 0.36312657312427027, "grad_norm": 427.6793212890625, "learning_rate": 8.084904676042923e-06, "loss": 20.8783, "step": 179760 }, { "epoch": 0.3631467737569541, "grad_norm": 450.01904296875, "learning_rate": 8.08462996134665e-06, "loss": 14.128, "step": 179770 }, { "epoch": 0.3631669743896379, "grad_norm": 311.484619140625, "learning_rate": 8.084355231616266e-06, "loss": 11.7866, "step": 179780 }, { "epoch": 0.3631871750223217, "grad_norm": 155.796875, "learning_rate": 8.084080486853106e-06, "loss": 25.2527, "step": 179790 }, { "epoch": 0.3632073756550055, "grad_norm": 71.85171508789062, "learning_rate": 8.083805727058514e-06, "loss": 42.2734, "step": 179800 }, { "epoch": 0.3632275762876893, "grad_norm": 518.867919921875, "learning_rate": 8.083530952233826e-06, "loss": 30.1017, "step": 179810 }, { "epoch": 0.36324777692037313, "grad_norm": 450.5592041015625, "learning_rate": 8.083256162380382e-06, "loss": 14.7259, "step": 179820 }, { "epoch": 0.36326797755305695, "grad_norm": 249.392822265625, "learning_rate": 8.082981357499522e-06, "loss": 15.3012, "step": 179830 }, { "epoch": 0.3632881781857408, "grad_norm": 48.18101119995117, "learning_rate": 8.082706537592585e-06, "loss": 32.7325, "step": 179840 }, { "epoch": 0.3633083788184246, "grad_norm": 333.1465148925781, "learning_rate": 8.08243170266091e-06, "loss": 18.8248, "step": 179850 }, { "epoch": 0.3633285794511084, "grad_norm": 203.4325408935547, "learning_rate": 8.082156852705837e-06, "loss": 11.5344, "step": 179860 }, { "epoch": 0.36334878008379223, "grad_norm": 190.8186798095703, "learning_rate": 8.081881987728704e-06, "loss": 10.2881, "step": 179870 }, { "epoch": 0.36336898071647605, "grad_norm": 154.9267578125, "learning_rate": 8.081607107730853e-06, "loss": 22.7857, "step": 179880 }, { "epoch": 0.3633891813491599, "grad_norm": 409.93231201171875, "learning_rate": 8.081332212713625e-06, "loss": 30.7053, "step": 179890 }, { "epoch": 0.3634093819818437, "grad_norm": 481.9869384765625, "learning_rate": 8.081057302678352e-06, "loss": 19.8066, "step": 179900 }, { "epoch": 0.3634295826145275, "grad_norm": 375.48876953125, "learning_rate": 8.080782377626383e-06, "loss": 16.9392, "step": 179910 }, { "epoch": 0.3634497832472113, "grad_norm": 90.09342193603516, "learning_rate": 8.080507437559056e-06, "loss": 20.7799, "step": 179920 }, { "epoch": 0.3634699838798951, "grad_norm": 581.5564575195312, "learning_rate": 8.080232482477705e-06, "loss": 27.3953, "step": 179930 }, { "epoch": 0.3634901845125789, "grad_norm": 136.51170349121094, "learning_rate": 8.079957512383679e-06, "loss": 18.1522, "step": 179940 }, { "epoch": 0.36351038514526274, "grad_norm": 470.9993896484375, "learning_rate": 8.07968252727831e-06, "loss": 23.1332, "step": 179950 }, { "epoch": 0.36353058577794656, "grad_norm": 195.46852111816406, "learning_rate": 8.079407527162944e-06, "loss": 31.0221, "step": 179960 }, { "epoch": 0.3635507864106304, "grad_norm": 290.1023864746094, "learning_rate": 8.079132512038917e-06, "loss": 17.3054, "step": 179970 }, { "epoch": 0.3635709870433142, "grad_norm": 275.0101623535156, "learning_rate": 8.078857481907573e-06, "loss": 12.4122, "step": 179980 }, { "epoch": 0.363591187675998, "grad_norm": 176.9078826904297, "learning_rate": 8.078582436770252e-06, "loss": 22.6297, "step": 179990 }, { "epoch": 0.36361138830868184, "grad_norm": 1177.5875244140625, "learning_rate": 8.078307376628292e-06, "loss": 26.6561, "step": 180000 }, { "epoch": 0.36363158894136566, "grad_norm": 202.5240936279297, "learning_rate": 8.078032301483036e-06, "loss": 23.2747, "step": 180010 }, { "epoch": 0.3636517895740495, "grad_norm": 247.7216796875, "learning_rate": 8.077757211335823e-06, "loss": 32.7405, "step": 180020 }, { "epoch": 0.3636719902067333, "grad_norm": 697.3446655273438, "learning_rate": 8.077482106187997e-06, "loss": 27.2772, "step": 180030 }, { "epoch": 0.3636921908394171, "grad_norm": 535.360595703125, "learning_rate": 8.077206986040894e-06, "loss": 31.7054, "step": 180040 }, { "epoch": 0.3637123914721009, "grad_norm": 483.3515930175781, "learning_rate": 8.076931850895858e-06, "loss": 14.1553, "step": 180050 }, { "epoch": 0.3637325921047847, "grad_norm": 167.90663146972656, "learning_rate": 8.07665670075423e-06, "loss": 18.0003, "step": 180060 }, { "epoch": 0.3637527927374685, "grad_norm": 239.6351318359375, "learning_rate": 8.07638153561735e-06, "loss": 22.6884, "step": 180070 }, { "epoch": 0.36377299337015234, "grad_norm": 63.19953536987305, "learning_rate": 8.076106355486559e-06, "loss": 20.5155, "step": 180080 }, { "epoch": 0.36379319400283616, "grad_norm": 358.9842224121094, "learning_rate": 8.0758311603632e-06, "loss": 17.3132, "step": 180090 }, { "epoch": 0.36381339463552, "grad_norm": 610.8993530273438, "learning_rate": 8.075555950248613e-06, "loss": 24.5383, "step": 180100 }, { "epoch": 0.3638335952682038, "grad_norm": 333.34814453125, "learning_rate": 8.075280725144136e-06, "loss": 25.2989, "step": 180110 }, { "epoch": 0.3638537959008876, "grad_norm": 342.98101806640625, "learning_rate": 8.075005485051117e-06, "loss": 34.5685, "step": 180120 }, { "epoch": 0.36387399653357144, "grad_norm": 73.77037048339844, "learning_rate": 8.074730229970895e-06, "loss": 20.427, "step": 180130 }, { "epoch": 0.36389419716625526, "grad_norm": 265.3182678222656, "learning_rate": 8.074454959904807e-06, "loss": 19.9213, "step": 180140 }, { "epoch": 0.3639143977989391, "grad_norm": 10.31361198425293, "learning_rate": 8.0741796748542e-06, "loss": 11.6826, "step": 180150 }, { "epoch": 0.3639345984316229, "grad_norm": 308.7628479003906, "learning_rate": 8.073904374820416e-06, "loss": 28.5231, "step": 180160 }, { "epoch": 0.36395479906430667, "grad_norm": 403.0682678222656, "learning_rate": 8.073629059804795e-06, "loss": 25.0095, "step": 180170 }, { "epoch": 0.3639749996969905, "grad_norm": 41.81344223022461, "learning_rate": 8.073353729808676e-06, "loss": 12.7195, "step": 180180 }, { "epoch": 0.3639952003296743, "grad_norm": 407.87384033203125, "learning_rate": 8.073078384833406e-06, "loss": 22.4835, "step": 180190 }, { "epoch": 0.3640154009623581, "grad_norm": 372.976318359375, "learning_rate": 8.072803024880322e-06, "loss": 15.3018, "step": 180200 }, { "epoch": 0.36403560159504195, "grad_norm": 296.61492919921875, "learning_rate": 8.072527649950772e-06, "loss": 16.108, "step": 180210 }, { "epoch": 0.36405580222772577, "grad_norm": 150.5760040283203, "learning_rate": 8.072252260046094e-06, "loss": 26.4811, "step": 180220 }, { "epoch": 0.3640760028604096, "grad_norm": 171.39198303222656, "learning_rate": 8.071976855167629e-06, "loss": 24.163, "step": 180230 }, { "epoch": 0.3640962034930934, "grad_norm": 75.21175384521484, "learning_rate": 8.071701435316724e-06, "loss": 38.5557, "step": 180240 }, { "epoch": 0.3641164041257772, "grad_norm": 92.39642333984375, "learning_rate": 8.071426000494716e-06, "loss": 19.9819, "step": 180250 }, { "epoch": 0.36413660475846105, "grad_norm": 263.0532531738281, "learning_rate": 8.071150550702953e-06, "loss": 34.3627, "step": 180260 }, { "epoch": 0.36415680539114487, "grad_norm": 316.1783752441406, "learning_rate": 8.070875085942772e-06, "loss": 17.8088, "step": 180270 }, { "epoch": 0.3641770060238287, "grad_norm": 53.37342834472656, "learning_rate": 8.070599606215522e-06, "loss": 13.4152, "step": 180280 }, { "epoch": 0.3641972066565125, "grad_norm": 147.43515014648438, "learning_rate": 8.070324111522538e-06, "loss": 24.2794, "step": 180290 }, { "epoch": 0.36421740728919627, "grad_norm": 372.1289367675781, "learning_rate": 8.07004860186517e-06, "loss": 19.9747, "step": 180300 }, { "epoch": 0.3642376079218801, "grad_norm": 399.1383361816406, "learning_rate": 8.069773077244756e-06, "loss": 18.232, "step": 180310 }, { "epoch": 0.3642578085545639, "grad_norm": 137.80567932128906, "learning_rate": 8.069497537662638e-06, "loss": 18.4426, "step": 180320 }, { "epoch": 0.36427800918724773, "grad_norm": 115.06241607666016, "learning_rate": 8.069221983120164e-06, "loss": 18.5016, "step": 180330 }, { "epoch": 0.36429820981993155, "grad_norm": 198.1746826171875, "learning_rate": 8.068946413618674e-06, "loss": 16.683, "step": 180340 }, { "epoch": 0.36431841045261537, "grad_norm": 204.9738006591797, "learning_rate": 8.068670829159511e-06, "loss": 16.9939, "step": 180350 }, { "epoch": 0.3643386110852992, "grad_norm": 234.5814971923828, "learning_rate": 8.06839522974402e-06, "loss": 12.5066, "step": 180360 }, { "epoch": 0.364358811717983, "grad_norm": 227.6265411376953, "learning_rate": 8.068119615373541e-06, "loss": 16.8407, "step": 180370 }, { "epoch": 0.36437901235066683, "grad_norm": 374.2120361328125, "learning_rate": 8.06784398604942e-06, "loss": 18.596, "step": 180380 }, { "epoch": 0.36439921298335065, "grad_norm": 206.06785583496094, "learning_rate": 8.067568341773e-06, "loss": 22.1805, "step": 180390 }, { "epoch": 0.36441941361603447, "grad_norm": 386.33636474609375, "learning_rate": 8.067292682545622e-06, "loss": 16.0675, "step": 180400 }, { "epoch": 0.3644396142487183, "grad_norm": 116.877685546875, "learning_rate": 8.067017008368632e-06, "loss": 9.1947, "step": 180410 }, { "epoch": 0.3644598148814021, "grad_norm": 469.15264892578125, "learning_rate": 8.066741319243374e-06, "loss": 17.3426, "step": 180420 }, { "epoch": 0.3644800155140859, "grad_norm": 278.024658203125, "learning_rate": 8.06646561517119e-06, "loss": 25.4617, "step": 180430 }, { "epoch": 0.3645002161467697, "grad_norm": 218.20603942871094, "learning_rate": 8.066189896153425e-06, "loss": 25.7228, "step": 180440 }, { "epoch": 0.3645204167794535, "grad_norm": 232.0675048828125, "learning_rate": 8.065914162191424e-06, "loss": 13.9541, "step": 180450 }, { "epoch": 0.36454061741213734, "grad_norm": 98.47352600097656, "learning_rate": 8.065638413286526e-06, "loss": 12.7971, "step": 180460 }, { "epoch": 0.36456081804482116, "grad_norm": 177.99237060546875, "learning_rate": 8.065362649440081e-06, "loss": 19.7376, "step": 180470 }, { "epoch": 0.364581018677505, "grad_norm": 50.33099365234375, "learning_rate": 8.065086870653428e-06, "loss": 17.8458, "step": 180480 }, { "epoch": 0.3646012193101888, "grad_norm": 170.24151611328125, "learning_rate": 8.064811076927915e-06, "loss": 35.6869, "step": 180490 }, { "epoch": 0.3646214199428726, "grad_norm": 394.4134826660156, "learning_rate": 8.064535268264883e-06, "loss": 21.9756, "step": 180500 }, { "epoch": 0.36464162057555644, "grad_norm": 441.3662414550781, "learning_rate": 8.064259444665678e-06, "loss": 14.1412, "step": 180510 }, { "epoch": 0.36466182120824026, "grad_norm": 175.33045959472656, "learning_rate": 8.063983606131645e-06, "loss": 25.9932, "step": 180520 }, { "epoch": 0.3646820218409241, "grad_norm": 277.73175048828125, "learning_rate": 8.063707752664127e-06, "loss": 15.8203, "step": 180530 }, { "epoch": 0.3647022224736079, "grad_norm": 425.87347412109375, "learning_rate": 8.06343188426447e-06, "loss": 31.1066, "step": 180540 }, { "epoch": 0.3647224231062917, "grad_norm": 168.72991943359375, "learning_rate": 8.063156000934017e-06, "loss": 33.5646, "step": 180550 }, { "epoch": 0.3647426237389755, "grad_norm": 400.6711730957031, "learning_rate": 8.062880102674112e-06, "loss": 27.162, "step": 180560 }, { "epoch": 0.3647628243716593, "grad_norm": 336.55059814453125, "learning_rate": 8.062604189486102e-06, "loss": 35.0813, "step": 180570 }, { "epoch": 0.3647830250043431, "grad_norm": 268.1875305175781, "learning_rate": 8.06232826137133e-06, "loss": 14.8456, "step": 180580 }, { "epoch": 0.36480322563702694, "grad_norm": 183.9904327392578, "learning_rate": 8.062052318331142e-06, "loss": 16.6811, "step": 180590 }, { "epoch": 0.36482342626971076, "grad_norm": 738.9470825195312, "learning_rate": 8.061776360366883e-06, "loss": 27.2586, "step": 180600 }, { "epoch": 0.3648436269023946, "grad_norm": 66.82939910888672, "learning_rate": 8.061500387479896e-06, "loss": 24.8332, "step": 180610 }, { "epoch": 0.3648638275350784, "grad_norm": 327.152587890625, "learning_rate": 8.06122439967153e-06, "loss": 8.9114, "step": 180620 }, { "epoch": 0.3648840281677622, "grad_norm": 217.15469360351562, "learning_rate": 8.060948396943125e-06, "loss": 21.3885, "step": 180630 }, { "epoch": 0.36490422880044604, "grad_norm": 465.2210388183594, "learning_rate": 8.060672379296028e-06, "loss": 30.2924, "step": 180640 }, { "epoch": 0.36492442943312986, "grad_norm": 444.6104431152344, "learning_rate": 8.060396346731587e-06, "loss": 21.5168, "step": 180650 }, { "epoch": 0.3649446300658137, "grad_norm": 706.1241455078125, "learning_rate": 8.060120299251145e-06, "loss": 41.7756, "step": 180660 }, { "epoch": 0.3649648306984975, "grad_norm": 262.6829528808594, "learning_rate": 8.059844236856047e-06, "loss": 22.9152, "step": 180670 }, { "epoch": 0.3649850313311813, "grad_norm": 351.7448425292969, "learning_rate": 8.05956815954764e-06, "loss": 30.323, "step": 180680 }, { "epoch": 0.3650052319638651, "grad_norm": 314.5424499511719, "learning_rate": 8.059292067327268e-06, "loss": 21.0463, "step": 180690 }, { "epoch": 0.3650254325965489, "grad_norm": 140.60572814941406, "learning_rate": 8.05901596019628e-06, "loss": 22.0193, "step": 180700 }, { "epoch": 0.3650456332292327, "grad_norm": 424.1038818359375, "learning_rate": 8.058739838156018e-06, "loss": 27.0004, "step": 180710 }, { "epoch": 0.36506583386191654, "grad_norm": 135.9637451171875, "learning_rate": 8.058463701207828e-06, "loss": 27.4563, "step": 180720 }, { "epoch": 0.36508603449460036, "grad_norm": 104.66922760009766, "learning_rate": 8.058187549353058e-06, "loss": 11.95, "step": 180730 }, { "epoch": 0.3651062351272842, "grad_norm": 329.4078674316406, "learning_rate": 8.057911382593052e-06, "loss": 12.527, "step": 180740 }, { "epoch": 0.365126435759968, "grad_norm": 178.28892517089844, "learning_rate": 8.057635200929157e-06, "loss": 18.3897, "step": 180750 }, { "epoch": 0.3651466363926518, "grad_norm": 65.33213806152344, "learning_rate": 8.057359004362719e-06, "loss": 18.7537, "step": 180760 }, { "epoch": 0.36516683702533564, "grad_norm": 190.8683624267578, "learning_rate": 8.057082792895084e-06, "loss": 14.075, "step": 180770 }, { "epoch": 0.36518703765801946, "grad_norm": 639.2693481445312, "learning_rate": 8.056806566527597e-06, "loss": 42.3231, "step": 180780 }, { "epoch": 0.3652072382907033, "grad_norm": 133.1455841064453, "learning_rate": 8.056530325261607e-06, "loss": 21.615, "step": 180790 }, { "epoch": 0.3652274389233871, "grad_norm": 1318.4779052734375, "learning_rate": 8.05625406909846e-06, "loss": 20.6613, "step": 180800 }, { "epoch": 0.36524763955607087, "grad_norm": 199.06468200683594, "learning_rate": 8.055977798039499e-06, "loss": 15.6217, "step": 180810 }, { "epoch": 0.3652678401887547, "grad_norm": 285.6034851074219, "learning_rate": 8.055701512086073e-06, "loss": 20.0044, "step": 180820 }, { "epoch": 0.3652880408214385, "grad_norm": 259.1186218261719, "learning_rate": 8.05542521123953e-06, "loss": 32.6816, "step": 180830 }, { "epoch": 0.36530824145412233, "grad_norm": 118.98157501220703, "learning_rate": 8.055148895501214e-06, "loss": 23.6858, "step": 180840 }, { "epoch": 0.36532844208680615, "grad_norm": 256.0003967285156, "learning_rate": 8.054872564872474e-06, "loss": 19.0068, "step": 180850 }, { "epoch": 0.36534864271948997, "grad_norm": 369.1144714355469, "learning_rate": 8.054596219354655e-06, "loss": 13.2962, "step": 180860 }, { "epoch": 0.3653688433521738, "grad_norm": 428.40936279296875, "learning_rate": 8.054319858949104e-06, "loss": 13.3953, "step": 180870 }, { "epoch": 0.3653890439848576, "grad_norm": 302.6059875488281, "learning_rate": 8.054043483657169e-06, "loss": 20.7315, "step": 180880 }, { "epoch": 0.36540924461754143, "grad_norm": 220.98788452148438, "learning_rate": 8.053767093480196e-06, "loss": 19.6729, "step": 180890 }, { "epoch": 0.36542944525022525, "grad_norm": 92.81193542480469, "learning_rate": 8.053490688419532e-06, "loss": 27.0483, "step": 180900 }, { "epoch": 0.36544964588290907, "grad_norm": 277.7098083496094, "learning_rate": 8.053214268476526e-06, "loss": 33.8312, "step": 180910 }, { "epoch": 0.3654698465155929, "grad_norm": 268.6290588378906, "learning_rate": 8.052937833652524e-06, "loss": 11.055, "step": 180920 }, { "epoch": 0.3654900471482767, "grad_norm": 56.17030334472656, "learning_rate": 8.052661383948873e-06, "loss": 26.7725, "step": 180930 }, { "epoch": 0.3655102477809605, "grad_norm": 205.01646423339844, "learning_rate": 8.05238491936692e-06, "loss": 20.3201, "step": 180940 }, { "epoch": 0.3655304484136443, "grad_norm": 299.5510559082031, "learning_rate": 8.052108439908014e-06, "loss": 33.1172, "step": 180950 }, { "epoch": 0.3655506490463281, "grad_norm": 272.5309753417969, "learning_rate": 8.0518319455735e-06, "loss": 15.8891, "step": 180960 }, { "epoch": 0.36557084967901193, "grad_norm": 322.0819091796875, "learning_rate": 8.051555436364728e-06, "loss": 24.6583, "step": 180970 }, { "epoch": 0.36559105031169575, "grad_norm": 156.18853759765625, "learning_rate": 8.051278912283046e-06, "loss": 17.5627, "step": 180980 }, { "epoch": 0.3656112509443796, "grad_norm": 505.347900390625, "learning_rate": 8.0510023733298e-06, "loss": 19.7401, "step": 180990 }, { "epoch": 0.3656314515770634, "grad_norm": 0.0, "learning_rate": 8.05072581950634e-06, "loss": 13.3696, "step": 181000 }, { "epoch": 0.3656516522097472, "grad_norm": 360.494873046875, "learning_rate": 8.05044925081401e-06, "loss": 29.8044, "step": 181010 }, { "epoch": 0.36567185284243103, "grad_norm": 199.1470947265625, "learning_rate": 8.050172667254161e-06, "loss": 17.9389, "step": 181020 }, { "epoch": 0.36569205347511485, "grad_norm": 480.0881042480469, "learning_rate": 8.04989606882814e-06, "loss": 12.2036, "step": 181030 }, { "epoch": 0.3657122541077987, "grad_norm": 26.534151077270508, "learning_rate": 8.049619455537296e-06, "loss": 16.3132, "step": 181040 }, { "epoch": 0.3657324547404825, "grad_norm": 239.21482849121094, "learning_rate": 8.049342827382978e-06, "loss": 15.8321, "step": 181050 }, { "epoch": 0.3657526553731663, "grad_norm": 176.50999450683594, "learning_rate": 8.049066184366532e-06, "loss": 37.6934, "step": 181060 }, { "epoch": 0.3657728560058501, "grad_norm": 449.13555908203125, "learning_rate": 8.048789526489305e-06, "loss": 21.8536, "step": 181070 }, { "epoch": 0.3657930566385339, "grad_norm": 224.65634155273438, "learning_rate": 8.04851285375265e-06, "loss": 10.3631, "step": 181080 }, { "epoch": 0.3658132572712177, "grad_norm": 1266.0233154296875, "learning_rate": 8.048236166157912e-06, "loss": 21.8589, "step": 181090 }, { "epoch": 0.36583345790390154, "grad_norm": 0.0, "learning_rate": 8.047959463706441e-06, "loss": 23.0395, "step": 181100 }, { "epoch": 0.36585365853658536, "grad_norm": 301.3067626953125, "learning_rate": 8.047682746399585e-06, "loss": 18.3553, "step": 181110 }, { "epoch": 0.3658738591692692, "grad_norm": 437.3012390136719, "learning_rate": 8.047406014238695e-06, "loss": 18.5423, "step": 181120 }, { "epoch": 0.365894059801953, "grad_norm": 190.35203552246094, "learning_rate": 8.047129267225116e-06, "loss": 19.1583, "step": 181130 }, { "epoch": 0.3659142604346368, "grad_norm": 98.4179916381836, "learning_rate": 8.046852505360196e-06, "loss": 20.7795, "step": 181140 }, { "epoch": 0.36593446106732064, "grad_norm": 0.0, "learning_rate": 8.04657572864529e-06, "loss": 18.3469, "step": 181150 }, { "epoch": 0.36595466170000446, "grad_norm": 384.51202392578125, "learning_rate": 8.046298937081742e-06, "loss": 11.4029, "step": 181160 }, { "epoch": 0.3659748623326883, "grad_norm": 198.22610473632812, "learning_rate": 8.046022130670903e-06, "loss": 18.2004, "step": 181170 }, { "epoch": 0.3659950629653721, "grad_norm": 497.5443115234375, "learning_rate": 8.045745309414122e-06, "loss": 25.3782, "step": 181180 }, { "epoch": 0.3660152635980559, "grad_norm": 30.041006088256836, "learning_rate": 8.045468473312748e-06, "loss": 10.5228, "step": 181190 }, { "epoch": 0.3660354642307397, "grad_norm": 55.62123489379883, "learning_rate": 8.045191622368128e-06, "loss": 14.5442, "step": 181200 }, { "epoch": 0.3660556648634235, "grad_norm": 598.5046997070312, "learning_rate": 8.044914756581614e-06, "loss": 31.4657, "step": 181210 }, { "epoch": 0.3660758654961073, "grad_norm": 371.818115234375, "learning_rate": 8.044637875954556e-06, "loss": 14.3213, "step": 181220 }, { "epoch": 0.36609606612879114, "grad_norm": 311.99774169921875, "learning_rate": 8.044360980488302e-06, "loss": 16.2018, "step": 181230 }, { "epoch": 0.36611626676147496, "grad_norm": 398.43927001953125, "learning_rate": 8.044084070184202e-06, "loss": 22.7983, "step": 181240 }, { "epoch": 0.3661364673941588, "grad_norm": 226.06613159179688, "learning_rate": 8.043807145043604e-06, "loss": 15.5066, "step": 181250 }, { "epoch": 0.3661566680268426, "grad_norm": 212.11447143554688, "learning_rate": 8.04353020506786e-06, "loss": 25.6169, "step": 181260 }, { "epoch": 0.3661768686595264, "grad_norm": 1308.9892578125, "learning_rate": 8.04325325025832e-06, "loss": 36.0956, "step": 181270 }, { "epoch": 0.36619706929221024, "grad_norm": 212.93930053710938, "learning_rate": 8.04297628061633e-06, "loss": 17.8809, "step": 181280 }, { "epoch": 0.36621726992489406, "grad_norm": 308.5074462890625, "learning_rate": 8.042699296143244e-06, "loss": 29.8462, "step": 181290 }, { "epoch": 0.3662374705575779, "grad_norm": 193.408203125, "learning_rate": 8.04242229684041e-06, "loss": 28.4466, "step": 181300 }, { "epoch": 0.3662576711902617, "grad_norm": 315.14520263671875, "learning_rate": 8.042145282709181e-06, "loss": 11.1157, "step": 181310 }, { "epoch": 0.3662778718229455, "grad_norm": 249.40370178222656, "learning_rate": 8.041868253750904e-06, "loss": 15.0005, "step": 181320 }, { "epoch": 0.3662980724556293, "grad_norm": 232.8643341064453, "learning_rate": 8.04159120996693e-06, "loss": 11.4303, "step": 181330 }, { "epoch": 0.3663182730883131, "grad_norm": 151.84938049316406, "learning_rate": 8.04131415135861e-06, "loss": 20.5955, "step": 181340 }, { "epoch": 0.3663384737209969, "grad_norm": 469.2001037597656, "learning_rate": 8.041037077927291e-06, "loss": 21.9896, "step": 181350 }, { "epoch": 0.36635867435368075, "grad_norm": 194.90103149414062, "learning_rate": 8.040759989674328e-06, "loss": 26.9918, "step": 181360 }, { "epoch": 0.36637887498636457, "grad_norm": 270.28662109375, "learning_rate": 8.04048288660107e-06, "loss": 28.6557, "step": 181370 }, { "epoch": 0.3663990756190484, "grad_norm": 294.26055908203125, "learning_rate": 8.040205768708866e-06, "loss": 15.6379, "step": 181380 }, { "epoch": 0.3664192762517322, "grad_norm": 452.2415771484375, "learning_rate": 8.03992863599907e-06, "loss": 14.6372, "step": 181390 }, { "epoch": 0.366439476884416, "grad_norm": 379.9623718261719, "learning_rate": 8.039651488473028e-06, "loss": 23.0659, "step": 181400 }, { "epoch": 0.36645967751709985, "grad_norm": 127.28617095947266, "learning_rate": 8.039374326132095e-06, "loss": 18.4194, "step": 181410 }, { "epoch": 0.36647987814978367, "grad_norm": 294.6429748535156, "learning_rate": 8.03909714897762e-06, "loss": 45.0225, "step": 181420 }, { "epoch": 0.3665000787824675, "grad_norm": 165.31117248535156, "learning_rate": 8.038819957010953e-06, "loss": 26.8929, "step": 181430 }, { "epoch": 0.3665202794151513, "grad_norm": 107.16597747802734, "learning_rate": 8.038542750233445e-06, "loss": 21.4697, "step": 181440 }, { "epoch": 0.36654048004783507, "grad_norm": 426.7367858886719, "learning_rate": 8.03826552864645e-06, "loss": 25.5019, "step": 181450 }, { "epoch": 0.3665606806805189, "grad_norm": 37.53121566772461, "learning_rate": 8.037988292251317e-06, "loss": 26.9047, "step": 181460 }, { "epoch": 0.3665808813132027, "grad_norm": 244.20053100585938, "learning_rate": 8.037711041049398e-06, "loss": 16.1819, "step": 181470 }, { "epoch": 0.36660108194588653, "grad_norm": 132.1400604248047, "learning_rate": 8.037433775042044e-06, "loss": 12.7387, "step": 181480 }, { "epoch": 0.36662128257857035, "grad_norm": 271.5543518066406, "learning_rate": 8.037156494230605e-06, "loss": 23.1184, "step": 181490 }, { "epoch": 0.36664148321125417, "grad_norm": 220.2806396484375, "learning_rate": 8.036879198616434e-06, "loss": 32.535, "step": 181500 }, { "epoch": 0.366661683843938, "grad_norm": 367.9124755859375, "learning_rate": 8.036601888200883e-06, "loss": 26.1523, "step": 181510 }, { "epoch": 0.3666818844766218, "grad_norm": 1217.584228515625, "learning_rate": 8.036324562985302e-06, "loss": 44.983, "step": 181520 }, { "epoch": 0.36670208510930563, "grad_norm": 280.4303894042969, "learning_rate": 8.036047222971043e-06, "loss": 28.8048, "step": 181530 }, { "epoch": 0.36672228574198945, "grad_norm": 259.6361999511719, "learning_rate": 8.035769868159457e-06, "loss": 12.2801, "step": 181540 }, { "epoch": 0.36674248637467327, "grad_norm": 218.78076171875, "learning_rate": 8.0354924985519e-06, "loss": 20.3203, "step": 181550 }, { "epoch": 0.3667626870073571, "grad_norm": 433.3406982421875, "learning_rate": 8.035215114149719e-06, "loss": 13.3701, "step": 181560 }, { "epoch": 0.3667828876400409, "grad_norm": 291.24066162109375, "learning_rate": 8.034937714954267e-06, "loss": 14.2289, "step": 181570 }, { "epoch": 0.3668030882727247, "grad_norm": 1930.4178466796875, "learning_rate": 8.034660300966898e-06, "loss": 23.6328, "step": 181580 }, { "epoch": 0.3668232889054085, "grad_norm": 342.5364685058594, "learning_rate": 8.034382872188961e-06, "loss": 21.7842, "step": 181590 }, { "epoch": 0.3668434895380923, "grad_norm": 158.06600952148438, "learning_rate": 8.034105428621812e-06, "loss": 11.9732, "step": 181600 }, { "epoch": 0.36686369017077614, "grad_norm": 337.66998291015625, "learning_rate": 8.0338279702668e-06, "loss": 17.6576, "step": 181610 }, { "epoch": 0.36688389080345996, "grad_norm": 307.86993408203125, "learning_rate": 8.033550497125277e-06, "loss": 14.5653, "step": 181620 }, { "epoch": 0.3669040914361438, "grad_norm": 206.07965087890625, "learning_rate": 8.0332730091986e-06, "loss": 18.8678, "step": 181630 }, { "epoch": 0.3669242920688276, "grad_norm": 258.3424377441406, "learning_rate": 8.032995506488117e-06, "loss": 18.9284, "step": 181640 }, { "epoch": 0.3669444927015114, "grad_norm": 325.928466796875, "learning_rate": 8.03271798899518e-06, "loss": 13.664, "step": 181650 }, { "epoch": 0.36696469333419524, "grad_norm": 564.956298828125, "learning_rate": 8.032440456721145e-06, "loss": 20.7381, "step": 181660 }, { "epoch": 0.36698489396687906, "grad_norm": 729.1417236328125, "learning_rate": 8.032162909667363e-06, "loss": 26.855, "step": 181670 }, { "epoch": 0.3670050945995629, "grad_norm": 445.1619873046875, "learning_rate": 8.031885347835187e-06, "loss": 19.5495, "step": 181680 }, { "epoch": 0.3670252952322467, "grad_norm": 163.62884521484375, "learning_rate": 8.031607771225969e-06, "loss": 20.3562, "step": 181690 }, { "epoch": 0.3670454958649305, "grad_norm": 498.330078125, "learning_rate": 8.031330179841062e-06, "loss": 23.177, "step": 181700 }, { "epoch": 0.3670656964976143, "grad_norm": 360.23919677734375, "learning_rate": 8.031052573681819e-06, "loss": 17.8511, "step": 181710 }, { "epoch": 0.3670858971302981, "grad_norm": 298.9189147949219, "learning_rate": 8.030774952749596e-06, "loss": 22.6676, "step": 181720 }, { "epoch": 0.3671060977629819, "grad_norm": 431.6170654296875, "learning_rate": 8.03049731704574e-06, "loss": 26.1707, "step": 181730 }, { "epoch": 0.36712629839566574, "grad_norm": 226.2337188720703, "learning_rate": 8.03021966657161e-06, "loss": 19.518, "step": 181740 }, { "epoch": 0.36714649902834956, "grad_norm": 21.196304321289062, "learning_rate": 8.029942001328555e-06, "loss": 27.2435, "step": 181750 }, { "epoch": 0.3671666996610334, "grad_norm": 157.5868377685547, "learning_rate": 8.029664321317932e-06, "loss": 12.0834, "step": 181760 }, { "epoch": 0.3671869002937172, "grad_norm": 444.8120422363281, "learning_rate": 8.029386626541092e-06, "loss": 14.9626, "step": 181770 }, { "epoch": 0.367207100926401, "grad_norm": 185.23583984375, "learning_rate": 8.02910891699939e-06, "loss": 18.7983, "step": 181780 }, { "epoch": 0.36722730155908484, "grad_norm": 328.7201232910156, "learning_rate": 8.028831192694176e-06, "loss": 5.5186, "step": 181790 }, { "epoch": 0.36724750219176866, "grad_norm": 409.76446533203125, "learning_rate": 8.028553453626809e-06, "loss": 11.8721, "step": 181800 }, { "epoch": 0.3672677028244525, "grad_norm": 0.0, "learning_rate": 8.028275699798638e-06, "loss": 26.1959, "step": 181810 }, { "epoch": 0.3672879034571363, "grad_norm": 473.5862121582031, "learning_rate": 8.027997931211017e-06, "loss": 22.0905, "step": 181820 }, { "epoch": 0.3673081040898201, "grad_norm": 85.01433563232422, "learning_rate": 8.027720147865304e-06, "loss": 12.6209, "step": 181830 }, { "epoch": 0.3673283047225039, "grad_norm": 388.554931640625, "learning_rate": 8.02744234976285e-06, "loss": 17.8679, "step": 181840 }, { "epoch": 0.3673485053551877, "grad_norm": 9.149590492248535, "learning_rate": 8.027164536905008e-06, "loss": 20.1847, "step": 181850 }, { "epoch": 0.3673687059878715, "grad_norm": 548.8598022460938, "learning_rate": 8.026886709293133e-06, "loss": 30.9179, "step": 181860 }, { "epoch": 0.36738890662055534, "grad_norm": 230.1243896484375, "learning_rate": 8.02660886692858e-06, "loss": 12.6422, "step": 181870 }, { "epoch": 0.36740910725323916, "grad_norm": 998.7691650390625, "learning_rate": 8.026331009812703e-06, "loss": 26.1502, "step": 181880 }, { "epoch": 0.367429307885923, "grad_norm": 0.0, "learning_rate": 8.026053137946855e-06, "loss": 22.601, "step": 181890 }, { "epoch": 0.3674495085186068, "grad_norm": 128.647216796875, "learning_rate": 8.02577525133239e-06, "loss": 25.0463, "step": 181900 }, { "epoch": 0.3674697091512906, "grad_norm": 464.27783203125, "learning_rate": 8.025497349970666e-06, "loss": 15.8772, "step": 181910 }, { "epoch": 0.36748990978397444, "grad_norm": 111.91634368896484, "learning_rate": 8.025219433863035e-06, "loss": 13.6979, "step": 181920 }, { "epoch": 0.36751011041665826, "grad_norm": 138.0834197998047, "learning_rate": 8.024941503010848e-06, "loss": 15.7702, "step": 181930 }, { "epoch": 0.3675303110493421, "grad_norm": 343.57586669921875, "learning_rate": 8.024663557415466e-06, "loss": 22.6088, "step": 181940 }, { "epoch": 0.3675505116820259, "grad_norm": 265.4393005371094, "learning_rate": 8.024385597078239e-06, "loss": 19.0071, "step": 181950 }, { "epoch": 0.3675707123147097, "grad_norm": 344.9518127441406, "learning_rate": 8.024107622000524e-06, "loss": 13.3685, "step": 181960 }, { "epoch": 0.3675909129473935, "grad_norm": 154.90765380859375, "learning_rate": 8.023829632183676e-06, "loss": 31.2523, "step": 181970 }, { "epoch": 0.3676111135800773, "grad_norm": 297.6966247558594, "learning_rate": 8.023551627629047e-06, "loss": 23.2807, "step": 181980 }, { "epoch": 0.36763131421276113, "grad_norm": 357.4823303222656, "learning_rate": 8.023273608337997e-06, "loss": 17.782, "step": 181990 }, { "epoch": 0.36765151484544495, "grad_norm": 16.68259620666504, "learning_rate": 8.022995574311876e-06, "loss": 11.8938, "step": 182000 }, { "epoch": 0.36767171547812877, "grad_norm": 381.8111572265625, "learning_rate": 8.022717525552041e-06, "loss": 21.6284, "step": 182010 }, { "epoch": 0.3676919161108126, "grad_norm": 656.380859375, "learning_rate": 8.022439462059849e-06, "loss": 26.4288, "step": 182020 }, { "epoch": 0.3677121167434964, "grad_norm": 235.6137237548828, "learning_rate": 8.022161383836652e-06, "loss": 10.7848, "step": 182030 }, { "epoch": 0.36773231737618023, "grad_norm": 372.8729553222656, "learning_rate": 8.021883290883808e-06, "loss": 23.4374, "step": 182040 }, { "epoch": 0.36775251800886405, "grad_norm": 283.1479797363281, "learning_rate": 8.021605183202669e-06, "loss": 29.1301, "step": 182050 }, { "epoch": 0.36777271864154787, "grad_norm": 308.2892150878906, "learning_rate": 8.021327060794597e-06, "loss": 12.4614, "step": 182060 }, { "epoch": 0.3677929192742317, "grad_norm": 293.7015686035156, "learning_rate": 8.02104892366094e-06, "loss": 33.1437, "step": 182070 }, { "epoch": 0.3678131199069155, "grad_norm": 90.2254409790039, "learning_rate": 8.02077077180306e-06, "loss": 29.6256, "step": 182080 }, { "epoch": 0.3678333205395993, "grad_norm": 115.06327056884766, "learning_rate": 8.020492605222307e-06, "loss": 23.6868, "step": 182090 }, { "epoch": 0.3678535211722831, "grad_norm": 110.12911987304688, "learning_rate": 8.020214423920039e-06, "loss": 40.4602, "step": 182100 }, { "epoch": 0.3678737218049669, "grad_norm": 183.18182373046875, "learning_rate": 8.019936227897614e-06, "loss": 28.3744, "step": 182110 }, { "epoch": 0.36789392243765073, "grad_norm": 234.80514526367188, "learning_rate": 8.019658017156384e-06, "loss": 26.6193, "step": 182120 }, { "epoch": 0.36791412307033455, "grad_norm": 323.7833251953125, "learning_rate": 8.01937979169771e-06, "loss": 23.1661, "step": 182130 }, { "epoch": 0.3679343237030184, "grad_norm": 111.76513671875, "learning_rate": 8.019101551522942e-06, "loss": 13.7392, "step": 182140 }, { "epoch": 0.3679545243357022, "grad_norm": 440.7177429199219, "learning_rate": 8.018823296633442e-06, "loss": 18.0887, "step": 182150 }, { "epoch": 0.367974724968386, "grad_norm": 273.23114013671875, "learning_rate": 8.018545027030564e-06, "loss": 17.4257, "step": 182160 }, { "epoch": 0.36799492560106983, "grad_norm": 375.09716796875, "learning_rate": 8.01826674271566e-06, "loss": 14.8377, "step": 182170 }, { "epoch": 0.36801512623375365, "grad_norm": 218.8509063720703, "learning_rate": 8.017988443690092e-06, "loss": 31.6913, "step": 182180 }, { "epoch": 0.3680353268664375, "grad_norm": 373.8122863769531, "learning_rate": 8.017710129955215e-06, "loss": 12.1373, "step": 182190 }, { "epoch": 0.3680555274991213, "grad_norm": 409.1675109863281, "learning_rate": 8.017431801512384e-06, "loss": 15.2659, "step": 182200 }, { "epoch": 0.3680757281318051, "grad_norm": 19.8114070892334, "learning_rate": 8.017153458362957e-06, "loss": 22.4653, "step": 182210 }, { "epoch": 0.3680959287644889, "grad_norm": 185.40997314453125, "learning_rate": 8.016875100508289e-06, "loss": 21.2064, "step": 182220 }, { "epoch": 0.3681161293971727, "grad_norm": 546.8681640625, "learning_rate": 8.016596727949737e-06, "loss": 42.8949, "step": 182230 }, { "epoch": 0.3681363300298565, "grad_norm": 49.502506256103516, "learning_rate": 8.01631834068866e-06, "loss": 40.2124, "step": 182240 }, { "epoch": 0.36815653066254034, "grad_norm": 827.6050415039062, "learning_rate": 8.016039938726413e-06, "loss": 13.8108, "step": 182250 }, { "epoch": 0.36817673129522416, "grad_norm": 310.0916748046875, "learning_rate": 8.015761522064353e-06, "loss": 26.8008, "step": 182260 }, { "epoch": 0.368196931927908, "grad_norm": 128.86485290527344, "learning_rate": 8.015483090703837e-06, "loss": 18.1005, "step": 182270 }, { "epoch": 0.3682171325605918, "grad_norm": 327.9010925292969, "learning_rate": 8.015204644646222e-06, "loss": 25.96, "step": 182280 }, { "epoch": 0.3682373331932756, "grad_norm": 194.9375457763672, "learning_rate": 8.014926183892867e-06, "loss": 5.7575, "step": 182290 }, { "epoch": 0.36825753382595944, "grad_norm": 233.583984375, "learning_rate": 8.014647708445124e-06, "loss": 25.1355, "step": 182300 }, { "epoch": 0.36827773445864326, "grad_norm": 356.98583984375, "learning_rate": 8.014369218304356e-06, "loss": 14.3048, "step": 182310 }, { "epoch": 0.3682979350913271, "grad_norm": 188.11618041992188, "learning_rate": 8.014090713471917e-06, "loss": 20.1552, "step": 182320 }, { "epoch": 0.3683181357240109, "grad_norm": 589.866943359375, "learning_rate": 8.013812193949166e-06, "loss": 27.9934, "step": 182330 }, { "epoch": 0.3683383363566947, "grad_norm": 141.0340576171875, "learning_rate": 8.01353365973746e-06, "loss": 24.0214, "step": 182340 }, { "epoch": 0.3683585369893785, "grad_norm": 167.4452667236328, "learning_rate": 8.013255110838156e-06, "loss": 31.5745, "step": 182350 }, { "epoch": 0.3683787376220623, "grad_norm": 162.32888793945312, "learning_rate": 8.012976547252614e-06, "loss": 23.7849, "step": 182360 }, { "epoch": 0.3683989382547461, "grad_norm": 282.12200927734375, "learning_rate": 8.012697968982187e-06, "loss": 14.7699, "step": 182370 }, { "epoch": 0.36841913888742994, "grad_norm": 216.16758728027344, "learning_rate": 8.012419376028237e-06, "loss": 17.8767, "step": 182380 }, { "epoch": 0.36843933952011376, "grad_norm": 198.56849670410156, "learning_rate": 8.01214076839212e-06, "loss": 26.734, "step": 182390 }, { "epoch": 0.3684595401527976, "grad_norm": 252.5966033935547, "learning_rate": 8.011862146075194e-06, "loss": 12.641, "step": 182400 }, { "epoch": 0.3684797407854814, "grad_norm": 321.9358215332031, "learning_rate": 8.011583509078817e-06, "loss": 18.2178, "step": 182410 }, { "epoch": 0.3684999414181652, "grad_norm": 270.317138671875, "learning_rate": 8.011304857404347e-06, "loss": 11.0165, "step": 182420 }, { "epoch": 0.36852014205084904, "grad_norm": 243.00306701660156, "learning_rate": 8.011026191053144e-06, "loss": 13.5829, "step": 182430 }, { "epoch": 0.36854034268353286, "grad_norm": 542.3758544921875, "learning_rate": 8.010747510026564e-06, "loss": 26.8064, "step": 182440 }, { "epoch": 0.3685605433162167, "grad_norm": 300.9794006347656, "learning_rate": 8.010468814325964e-06, "loss": 30.0583, "step": 182450 }, { "epoch": 0.3685807439489005, "grad_norm": 283.50048828125, "learning_rate": 8.010190103952706e-06, "loss": 28.1068, "step": 182460 }, { "epoch": 0.3686009445815843, "grad_norm": 273.48175048828125, "learning_rate": 8.009911378908147e-06, "loss": 21.4129, "step": 182470 }, { "epoch": 0.3686211452142681, "grad_norm": 270.3739013671875, "learning_rate": 8.009632639193643e-06, "loss": 22.8882, "step": 182480 }, { "epoch": 0.3686413458469519, "grad_norm": 54.407737731933594, "learning_rate": 8.009353884810555e-06, "loss": 33.8992, "step": 182490 }, { "epoch": 0.3686615464796357, "grad_norm": 281.2411804199219, "learning_rate": 8.009075115760243e-06, "loss": 20.3095, "step": 182500 }, { "epoch": 0.36868174711231955, "grad_norm": 224.91305541992188, "learning_rate": 8.008796332044062e-06, "loss": 28.5677, "step": 182510 }, { "epoch": 0.36870194774500337, "grad_norm": 110.41670227050781, "learning_rate": 8.008517533663372e-06, "loss": 18.562, "step": 182520 }, { "epoch": 0.3687221483776872, "grad_norm": 254.826416015625, "learning_rate": 8.008238720619534e-06, "loss": 14.6574, "step": 182530 }, { "epoch": 0.368742349010371, "grad_norm": 499.0784606933594, "learning_rate": 8.007959892913906e-06, "loss": 33.8873, "step": 182540 }, { "epoch": 0.3687625496430548, "grad_norm": 324.5614929199219, "learning_rate": 8.007681050547844e-06, "loss": 22.7784, "step": 182550 }, { "epoch": 0.36878275027573865, "grad_norm": 360.324462890625, "learning_rate": 8.007402193522711e-06, "loss": 20.8226, "step": 182560 }, { "epoch": 0.36880295090842247, "grad_norm": 230.46957397460938, "learning_rate": 8.007123321839865e-06, "loss": 19.8952, "step": 182570 }, { "epoch": 0.3688231515411063, "grad_norm": 311.17950439453125, "learning_rate": 8.006844435500663e-06, "loss": 24.645, "step": 182580 }, { "epoch": 0.3688433521737901, "grad_norm": 142.267822265625, "learning_rate": 8.006565534506465e-06, "loss": 15.6812, "step": 182590 }, { "epoch": 0.36886355280647387, "grad_norm": 163.6775665283203, "learning_rate": 8.006286618858634e-06, "loss": 15.9326, "step": 182600 }, { "epoch": 0.3688837534391577, "grad_norm": 155.4547576904297, "learning_rate": 8.006007688558526e-06, "loss": 19.693, "step": 182610 }, { "epoch": 0.3689039540718415, "grad_norm": 293.778564453125, "learning_rate": 8.005728743607499e-06, "loss": 25.8194, "step": 182620 }, { "epoch": 0.36892415470452533, "grad_norm": 353.3366394042969, "learning_rate": 8.005449784006917e-06, "loss": 20.6483, "step": 182630 }, { "epoch": 0.36894435533720915, "grad_norm": 308.8511962890625, "learning_rate": 8.005170809758136e-06, "loss": 14.3152, "step": 182640 }, { "epoch": 0.36896455596989297, "grad_norm": 298.14990234375, "learning_rate": 8.004891820862516e-06, "loss": 17.6346, "step": 182650 }, { "epoch": 0.3689847566025768, "grad_norm": 287.8395080566406, "learning_rate": 8.004612817321419e-06, "loss": 30.4644, "step": 182660 }, { "epoch": 0.3690049572352606, "grad_norm": 54.61326217651367, "learning_rate": 8.004333799136206e-06, "loss": 12.4792, "step": 182670 }, { "epoch": 0.36902515786794443, "grad_norm": 31.497806549072266, "learning_rate": 8.004054766308232e-06, "loss": 7.93, "step": 182680 }, { "epoch": 0.36904535850062825, "grad_norm": 618.0618286132812, "learning_rate": 8.003775718838859e-06, "loss": 30.8945, "step": 182690 }, { "epoch": 0.36906555913331207, "grad_norm": 313.2087097167969, "learning_rate": 8.003496656729448e-06, "loss": 23.7953, "step": 182700 }, { "epoch": 0.3690857597659959, "grad_norm": 92.88905334472656, "learning_rate": 8.003217579981358e-06, "loss": 12.4654, "step": 182710 }, { "epoch": 0.3691059603986797, "grad_norm": 167.1089324951172, "learning_rate": 8.002938488595951e-06, "loss": 31.8334, "step": 182720 }, { "epoch": 0.3691261610313635, "grad_norm": 116.03402709960938, "learning_rate": 8.002659382574584e-06, "loss": 13.6286, "step": 182730 }, { "epoch": 0.3691463616640473, "grad_norm": 144.52392578125, "learning_rate": 8.00238026191862e-06, "loss": 12.5434, "step": 182740 }, { "epoch": 0.3691665622967311, "grad_norm": 773.6723022460938, "learning_rate": 8.002101126629422e-06, "loss": 32.9493, "step": 182750 }, { "epoch": 0.36918676292941494, "grad_norm": 287.17919921875, "learning_rate": 8.001821976708344e-06, "loss": 15.8093, "step": 182760 }, { "epoch": 0.36920696356209876, "grad_norm": 295.60308837890625, "learning_rate": 8.001542812156751e-06, "loss": 18.0034, "step": 182770 }, { "epoch": 0.3692271641947826, "grad_norm": 343.4122314453125, "learning_rate": 8.001263632976001e-06, "loss": 48.4456, "step": 182780 }, { "epoch": 0.3692473648274664, "grad_norm": 688.2784423828125, "learning_rate": 8.000984439167457e-06, "loss": 25.0792, "step": 182790 }, { "epoch": 0.3692675654601502, "grad_norm": 125.90655517578125, "learning_rate": 8.000705230732478e-06, "loss": 17.4325, "step": 182800 }, { "epoch": 0.36928776609283404, "grad_norm": 459.7305603027344, "learning_rate": 8.000426007672426e-06, "loss": 13.4328, "step": 182810 }, { "epoch": 0.36930796672551786, "grad_norm": 1051.904296875, "learning_rate": 8.000146769988662e-06, "loss": 29.7899, "step": 182820 }, { "epoch": 0.3693281673582017, "grad_norm": 247.69480895996094, "learning_rate": 7.999867517682547e-06, "loss": 13.5358, "step": 182830 }, { "epoch": 0.3693483679908855, "grad_norm": 247.73817443847656, "learning_rate": 7.999588250755442e-06, "loss": 8.2417, "step": 182840 }, { "epoch": 0.3693685686235693, "grad_norm": 419.2734069824219, "learning_rate": 7.999308969208705e-06, "loss": 24.8946, "step": 182850 }, { "epoch": 0.3693887692562531, "grad_norm": 551.649169921875, "learning_rate": 7.999029673043703e-06, "loss": 13.6886, "step": 182860 }, { "epoch": 0.3694089698889369, "grad_norm": 191.8048858642578, "learning_rate": 7.99875036226179e-06, "loss": 18.8534, "step": 182870 }, { "epoch": 0.3694291705216207, "grad_norm": 69.09309387207031, "learning_rate": 7.998471036864336e-06, "loss": 12.7038, "step": 182880 }, { "epoch": 0.36944937115430454, "grad_norm": 274.2507019042969, "learning_rate": 7.998191696852696e-06, "loss": 16.1307, "step": 182890 }, { "epoch": 0.36946957178698836, "grad_norm": 302.17755126953125, "learning_rate": 7.997912342228232e-06, "loss": 43.6176, "step": 182900 }, { "epoch": 0.3694897724196722, "grad_norm": 430.791748046875, "learning_rate": 7.997632972992308e-06, "loss": 20.4433, "step": 182910 }, { "epoch": 0.369509973052356, "grad_norm": 204.36061096191406, "learning_rate": 7.997353589146284e-06, "loss": 18.573, "step": 182920 }, { "epoch": 0.3695301736850398, "grad_norm": 151.94058227539062, "learning_rate": 7.997074190691523e-06, "loss": 20.2455, "step": 182930 }, { "epoch": 0.36955037431772364, "grad_norm": 265.6204833984375, "learning_rate": 7.996794777629386e-06, "loss": 18.1534, "step": 182940 }, { "epoch": 0.36957057495040746, "grad_norm": 336.7605895996094, "learning_rate": 7.996515349961233e-06, "loss": 25.7767, "step": 182950 }, { "epoch": 0.3695907755830913, "grad_norm": 277.5458068847656, "learning_rate": 7.99623590768843e-06, "loss": 26.6926, "step": 182960 }, { "epoch": 0.3696109762157751, "grad_norm": 181.89364624023438, "learning_rate": 7.995956450812335e-06, "loss": 18.7098, "step": 182970 }, { "epoch": 0.3696311768484589, "grad_norm": 185.62586975097656, "learning_rate": 7.995676979334313e-06, "loss": 16.6785, "step": 182980 }, { "epoch": 0.3696513774811427, "grad_norm": 268.1764831542969, "learning_rate": 7.995397493255723e-06, "loss": 18.5679, "step": 182990 }, { "epoch": 0.3696715781138265, "grad_norm": 271.0916748046875, "learning_rate": 7.99511799257793e-06, "loss": 19.6207, "step": 183000 }, { "epoch": 0.3696917787465103, "grad_norm": 634.3836059570312, "learning_rate": 7.994838477302294e-06, "loss": 33.3889, "step": 183010 }, { "epoch": 0.36971197937919414, "grad_norm": 0.0, "learning_rate": 7.99455894743018e-06, "loss": 17.5061, "step": 183020 }, { "epoch": 0.36973218001187796, "grad_norm": 396.0495910644531, "learning_rate": 7.994279402962948e-06, "loss": 17.2442, "step": 183030 }, { "epoch": 0.3697523806445618, "grad_norm": 282.7486267089844, "learning_rate": 7.993999843901963e-06, "loss": 17.3378, "step": 183040 }, { "epoch": 0.3697725812772456, "grad_norm": 223.8278350830078, "learning_rate": 7.993720270248583e-06, "loss": 26.3356, "step": 183050 }, { "epoch": 0.3697927819099294, "grad_norm": 446.46783447265625, "learning_rate": 7.993440682004176e-06, "loss": 19.6404, "step": 183060 }, { "epoch": 0.36981298254261324, "grad_norm": 238.75689697265625, "learning_rate": 7.993161079170101e-06, "loss": 12.4138, "step": 183070 }, { "epoch": 0.36983318317529706, "grad_norm": 573.0714111328125, "learning_rate": 7.992881461747721e-06, "loss": 25.6773, "step": 183080 }, { "epoch": 0.3698533838079809, "grad_norm": 72.60753631591797, "learning_rate": 7.992601829738403e-06, "loss": 28.3327, "step": 183090 }, { "epoch": 0.3698735844406647, "grad_norm": 143.8813018798828, "learning_rate": 7.992322183143504e-06, "loss": 13.909, "step": 183100 }, { "epoch": 0.3698937850733485, "grad_norm": 23.858642578125, "learning_rate": 7.99204252196439e-06, "loss": 11.597, "step": 183110 }, { "epoch": 0.3699139857060323, "grad_norm": 322.3403015136719, "learning_rate": 7.991762846202423e-06, "loss": 20.6959, "step": 183120 }, { "epoch": 0.3699341863387161, "grad_norm": 253.86001586914062, "learning_rate": 7.991483155858968e-06, "loss": 18.0367, "step": 183130 }, { "epoch": 0.36995438697139993, "grad_norm": 676.9581298828125, "learning_rate": 7.991203450935385e-06, "loss": 29.5363, "step": 183140 }, { "epoch": 0.36997458760408375, "grad_norm": 375.1921081542969, "learning_rate": 7.990923731433043e-06, "loss": 18.2155, "step": 183150 }, { "epoch": 0.36999478823676757, "grad_norm": 298.8613586425781, "learning_rate": 7.990643997353296e-06, "loss": 28.1997, "step": 183160 }, { "epoch": 0.3700149888694514, "grad_norm": 201.21412658691406, "learning_rate": 7.990364248697517e-06, "loss": 16.3981, "step": 183170 }, { "epoch": 0.3700351895021352, "grad_norm": 510.563720703125, "learning_rate": 7.990084485467065e-06, "loss": 15.5078, "step": 183180 }, { "epoch": 0.37005539013481903, "grad_norm": 128.11463928222656, "learning_rate": 7.989804707663302e-06, "loss": 11.116, "step": 183190 }, { "epoch": 0.37007559076750285, "grad_norm": 567.2412719726562, "learning_rate": 7.989524915287595e-06, "loss": 26.81, "step": 183200 }, { "epoch": 0.37009579140018667, "grad_norm": 51.5490608215332, "learning_rate": 7.989245108341305e-06, "loss": 10.9638, "step": 183210 }, { "epoch": 0.3701159920328705, "grad_norm": 1504.31689453125, "learning_rate": 7.988965286825798e-06, "loss": 19.7384, "step": 183220 }, { "epoch": 0.3701361926655543, "grad_norm": 88.89399719238281, "learning_rate": 7.988685450742438e-06, "loss": 19.697, "step": 183230 }, { "epoch": 0.3701563932982381, "grad_norm": 182.2744598388672, "learning_rate": 7.988405600092585e-06, "loss": 18.5467, "step": 183240 }, { "epoch": 0.3701765939309219, "grad_norm": 100.19139099121094, "learning_rate": 7.988125734877607e-06, "loss": 18.1134, "step": 183250 }, { "epoch": 0.3701967945636057, "grad_norm": 313.427734375, "learning_rate": 7.987845855098864e-06, "loss": 18.4095, "step": 183260 }, { "epoch": 0.37021699519628953, "grad_norm": 162.49339294433594, "learning_rate": 7.987565960757726e-06, "loss": 27.7343, "step": 183270 }, { "epoch": 0.37023719582897335, "grad_norm": 304.8258361816406, "learning_rate": 7.987286051855552e-06, "loss": 16.3103, "step": 183280 }, { "epoch": 0.3702573964616572, "grad_norm": 205.7953643798828, "learning_rate": 7.98700612839371e-06, "loss": 11.7378, "step": 183290 }, { "epoch": 0.370277597094341, "grad_norm": 359.7651672363281, "learning_rate": 7.986726190373562e-06, "loss": 12.9544, "step": 183300 }, { "epoch": 0.3702977977270248, "grad_norm": 339.48089599609375, "learning_rate": 7.986446237796471e-06, "loss": 17.5074, "step": 183310 }, { "epoch": 0.37031799835970863, "grad_norm": 127.08641815185547, "learning_rate": 7.986166270663805e-06, "loss": 25.6502, "step": 183320 }, { "epoch": 0.37033819899239245, "grad_norm": 273.1402587890625, "learning_rate": 7.985886288976926e-06, "loss": 17.7708, "step": 183330 }, { "epoch": 0.3703583996250763, "grad_norm": 98.80635070800781, "learning_rate": 7.985606292737199e-06, "loss": 15.99, "step": 183340 }, { "epoch": 0.3703786002577601, "grad_norm": 5.535131454467773, "learning_rate": 7.985326281945988e-06, "loss": 15.9619, "step": 183350 }, { "epoch": 0.3703988008904439, "grad_norm": 206.50917053222656, "learning_rate": 7.98504625660466e-06, "loss": 17.1141, "step": 183360 }, { "epoch": 0.3704190015231277, "grad_norm": 393.49700927734375, "learning_rate": 7.98476621671458e-06, "loss": 14.8547, "step": 183370 }, { "epoch": 0.3704392021558115, "grad_norm": 571.000244140625, "learning_rate": 7.98448616227711e-06, "loss": 23.9105, "step": 183380 }, { "epoch": 0.3704594027884953, "grad_norm": 317.0956115722656, "learning_rate": 7.984206093293617e-06, "loss": 14.3499, "step": 183390 }, { "epoch": 0.37047960342117914, "grad_norm": 234.43984985351562, "learning_rate": 7.983926009765464e-06, "loss": 26.5742, "step": 183400 }, { "epoch": 0.37049980405386296, "grad_norm": 302.6190490722656, "learning_rate": 7.983645911694018e-06, "loss": 26.7659, "step": 183410 }, { "epoch": 0.3705200046865468, "grad_norm": 179.2404327392578, "learning_rate": 7.983365799080645e-06, "loss": 17.2147, "step": 183420 }, { "epoch": 0.3705402053192306, "grad_norm": 169.42054748535156, "learning_rate": 7.983085671926707e-06, "loss": 18.235, "step": 183430 }, { "epoch": 0.3705604059519144, "grad_norm": 147.49693298339844, "learning_rate": 7.982805530233573e-06, "loss": 27.4966, "step": 183440 }, { "epoch": 0.37058060658459824, "grad_norm": 106.38565063476562, "learning_rate": 7.982525374002607e-06, "loss": 16.7028, "step": 183450 }, { "epoch": 0.37060080721728206, "grad_norm": 41.47096252441406, "learning_rate": 7.982245203235172e-06, "loss": 20.8564, "step": 183460 }, { "epoch": 0.3706210078499659, "grad_norm": 548.6837158203125, "learning_rate": 7.981965017932638e-06, "loss": 18.4262, "step": 183470 }, { "epoch": 0.3706412084826497, "grad_norm": 121.4365005493164, "learning_rate": 7.981684818096367e-06, "loss": 25.0246, "step": 183480 }, { "epoch": 0.3706614091153335, "grad_norm": 300.11346435546875, "learning_rate": 7.981404603727726e-06, "loss": 24.432, "step": 183490 }, { "epoch": 0.3706816097480173, "grad_norm": 255.00531005859375, "learning_rate": 7.981124374828079e-06, "loss": 26.7517, "step": 183500 }, { "epoch": 0.3707018103807011, "grad_norm": 491.8828125, "learning_rate": 7.980844131398795e-06, "loss": 21.5452, "step": 183510 }, { "epoch": 0.3707220110133849, "grad_norm": 330.1701354980469, "learning_rate": 7.980563873441239e-06, "loss": 20.5814, "step": 183520 }, { "epoch": 0.37074221164606874, "grad_norm": 62.36580276489258, "learning_rate": 7.980283600956775e-06, "loss": 20.2907, "step": 183530 }, { "epoch": 0.37076241227875256, "grad_norm": 413.7655334472656, "learning_rate": 7.98000331394677e-06, "loss": 15.4138, "step": 183540 }, { "epoch": 0.3707826129114364, "grad_norm": 255.1747283935547, "learning_rate": 7.97972301241259e-06, "loss": 20.0934, "step": 183550 }, { "epoch": 0.3708028135441202, "grad_norm": 369.7781066894531, "learning_rate": 7.979442696355601e-06, "loss": 14.3371, "step": 183560 }, { "epoch": 0.370823014176804, "grad_norm": 428.614013671875, "learning_rate": 7.979162365777173e-06, "loss": 23.8498, "step": 183570 }, { "epoch": 0.37084321480948784, "grad_norm": 55.73485565185547, "learning_rate": 7.978882020678666e-06, "loss": 14.8278, "step": 183580 }, { "epoch": 0.37086341544217166, "grad_norm": 453.10809326171875, "learning_rate": 7.978601661061449e-06, "loss": 25.8035, "step": 183590 }, { "epoch": 0.3708836160748555, "grad_norm": 202.48785400390625, "learning_rate": 7.978321286926892e-06, "loss": 15.4495, "step": 183600 }, { "epoch": 0.3709038167075393, "grad_norm": 323.86334228515625, "learning_rate": 7.978040898276353e-06, "loss": 23.6162, "step": 183610 }, { "epoch": 0.3709240173402231, "grad_norm": 438.67620849609375, "learning_rate": 7.977760495111209e-06, "loss": 27.0456, "step": 183620 }, { "epoch": 0.3709442179729069, "grad_norm": 234.5774383544922, "learning_rate": 7.97748007743282e-06, "loss": 10.0673, "step": 183630 }, { "epoch": 0.3709644186055907, "grad_norm": 239.1904754638672, "learning_rate": 7.977199645242553e-06, "loss": 22.2049, "step": 183640 }, { "epoch": 0.3709846192382745, "grad_norm": 556.6365356445312, "learning_rate": 7.976919198541775e-06, "loss": 32.4702, "step": 183650 }, { "epoch": 0.37100481987095835, "grad_norm": 223.2879180908203, "learning_rate": 7.976638737331855e-06, "loss": 26.2789, "step": 183660 }, { "epoch": 0.37102502050364217, "grad_norm": 147.6748046875, "learning_rate": 7.97635826161416e-06, "loss": 14.5178, "step": 183670 }, { "epoch": 0.371045221136326, "grad_norm": 236.68357849121094, "learning_rate": 7.976077771390056e-06, "loss": 19.9844, "step": 183680 }, { "epoch": 0.3710654217690098, "grad_norm": 331.7314147949219, "learning_rate": 7.975797266660908e-06, "loss": 12.8052, "step": 183690 }, { "epoch": 0.3710856224016936, "grad_norm": 359.43426513671875, "learning_rate": 7.975516747428087e-06, "loss": 18.3442, "step": 183700 }, { "epoch": 0.37110582303437745, "grad_norm": 1844.8714599609375, "learning_rate": 7.975236213692956e-06, "loss": 16.0081, "step": 183710 }, { "epoch": 0.37112602366706127, "grad_norm": 475.0063781738281, "learning_rate": 7.974955665456887e-06, "loss": 16.2759, "step": 183720 }, { "epoch": 0.3711462242997451, "grad_norm": 486.0075378417969, "learning_rate": 7.974675102721244e-06, "loss": 25.068, "step": 183730 }, { "epoch": 0.3711664249324289, "grad_norm": 213.2720947265625, "learning_rate": 7.974394525487395e-06, "loss": 9.7411, "step": 183740 }, { "epoch": 0.3711866255651127, "grad_norm": 548.6483764648438, "learning_rate": 7.974113933756708e-06, "loss": 14.8954, "step": 183750 }, { "epoch": 0.3712068261977965, "grad_norm": 692.1478881835938, "learning_rate": 7.97383332753055e-06, "loss": 25.2171, "step": 183760 }, { "epoch": 0.3712270268304803, "grad_norm": 207.54747009277344, "learning_rate": 7.973552706810288e-06, "loss": 17.4244, "step": 183770 }, { "epoch": 0.37124722746316413, "grad_norm": 284.7762145996094, "learning_rate": 7.973272071597293e-06, "loss": 25.0729, "step": 183780 }, { "epoch": 0.37126742809584795, "grad_norm": 260.96905517578125, "learning_rate": 7.97299142189293e-06, "loss": 10.6164, "step": 183790 }, { "epoch": 0.37128762872853177, "grad_norm": 369.0173645019531, "learning_rate": 7.972710757698567e-06, "loss": 13.8966, "step": 183800 }, { "epoch": 0.3713078293612156, "grad_norm": 909.8313598632812, "learning_rate": 7.972430079015572e-06, "loss": 30.709, "step": 183810 }, { "epoch": 0.3713280299938994, "grad_norm": 64.78792572021484, "learning_rate": 7.972149385845314e-06, "loss": 16.7815, "step": 183820 }, { "epoch": 0.37134823062658323, "grad_norm": 370.19921875, "learning_rate": 7.97186867818916e-06, "loss": 11.4323, "step": 183830 }, { "epoch": 0.37136843125926705, "grad_norm": 507.5756530761719, "learning_rate": 7.971587956048479e-06, "loss": 19.0565, "step": 183840 }, { "epoch": 0.37138863189195087, "grad_norm": 364.47943115234375, "learning_rate": 7.971307219424637e-06, "loss": 16.0245, "step": 183850 }, { "epoch": 0.3714088325246347, "grad_norm": 378.9363708496094, "learning_rate": 7.971026468319006e-06, "loss": 24.7666, "step": 183860 }, { "epoch": 0.3714290331573185, "grad_norm": 730.85107421875, "learning_rate": 7.970745702732951e-06, "loss": 26.4011, "step": 183870 }, { "epoch": 0.3714492337900023, "grad_norm": 250.45175170898438, "learning_rate": 7.970464922667842e-06, "loss": 13.6054, "step": 183880 }, { "epoch": 0.3714694344226861, "grad_norm": 43.31391525268555, "learning_rate": 7.97018412812505e-06, "loss": 24.718, "step": 183890 }, { "epoch": 0.3714896350553699, "grad_norm": 366.313232421875, "learning_rate": 7.969903319105935e-06, "loss": 20.1178, "step": 183900 }, { "epoch": 0.37150983568805374, "grad_norm": 292.8300476074219, "learning_rate": 7.969622495611877e-06, "loss": 16.4202, "step": 183910 }, { "epoch": 0.37153003632073756, "grad_norm": 255.02626037597656, "learning_rate": 7.969341657644236e-06, "loss": 16.4985, "step": 183920 }, { "epoch": 0.3715502369534214, "grad_norm": 219.24993896484375, "learning_rate": 7.969060805204385e-06, "loss": 31.6158, "step": 183930 }, { "epoch": 0.3715704375861052, "grad_norm": 264.1980895996094, "learning_rate": 7.968779938293691e-06, "loss": 19.1418, "step": 183940 }, { "epoch": 0.371590638218789, "grad_norm": 291.5620422363281, "learning_rate": 7.968499056913525e-06, "loss": 13.4511, "step": 183950 }, { "epoch": 0.37161083885147284, "grad_norm": 200.1851348876953, "learning_rate": 7.968218161065253e-06, "loss": 39.5058, "step": 183960 }, { "epoch": 0.37163103948415666, "grad_norm": 38.206851959228516, "learning_rate": 7.967937250750248e-06, "loss": 11.4028, "step": 183970 }, { "epoch": 0.3716512401168405, "grad_norm": 103.1449966430664, "learning_rate": 7.967656325969875e-06, "loss": 14.8532, "step": 183980 }, { "epoch": 0.3716714407495243, "grad_norm": 384.0670471191406, "learning_rate": 7.967375386725505e-06, "loss": 25.3988, "step": 183990 }, { "epoch": 0.3716916413822081, "grad_norm": 121.80668640136719, "learning_rate": 7.967094433018508e-06, "loss": 14.2874, "step": 184000 }, { "epoch": 0.3717118420148919, "grad_norm": 422.5815124511719, "learning_rate": 7.966813464850252e-06, "loss": 21.3741, "step": 184010 }, { "epoch": 0.3717320426475757, "grad_norm": 162.81365966796875, "learning_rate": 7.966532482222106e-06, "loss": 16.8188, "step": 184020 }, { "epoch": 0.3717522432802595, "grad_norm": 271.2087097167969, "learning_rate": 7.966251485135443e-06, "loss": 16.3678, "step": 184030 }, { "epoch": 0.37177244391294334, "grad_norm": 248.1273956298828, "learning_rate": 7.96597047359163e-06, "loss": 13.3516, "step": 184040 }, { "epoch": 0.37179264454562716, "grad_norm": 165.0633544921875, "learning_rate": 7.965689447592034e-06, "loss": 20.2467, "step": 184050 }, { "epoch": 0.371812845178311, "grad_norm": 116.68669891357422, "learning_rate": 7.96540840713803e-06, "loss": 15.6072, "step": 184060 }, { "epoch": 0.3718330458109948, "grad_norm": 153.53713989257812, "learning_rate": 7.965127352230984e-06, "loss": 19.9521, "step": 184070 }, { "epoch": 0.3718532464436786, "grad_norm": 184.6919403076172, "learning_rate": 7.964846282872265e-06, "loss": 20.5157, "step": 184080 }, { "epoch": 0.37187344707636244, "grad_norm": 416.5748291015625, "learning_rate": 7.964565199063247e-06, "loss": 22.5533, "step": 184090 }, { "epoch": 0.37189364770904626, "grad_norm": 302.62396240234375, "learning_rate": 7.964284100805297e-06, "loss": 17.1546, "step": 184100 }, { "epoch": 0.3719138483417301, "grad_norm": 355.34454345703125, "learning_rate": 7.964002988099785e-06, "loss": 31.3367, "step": 184110 }, { "epoch": 0.3719340489744139, "grad_norm": 430.9053649902344, "learning_rate": 7.963721860948085e-06, "loss": 19.0881, "step": 184120 }, { "epoch": 0.3719542496070977, "grad_norm": 276.60308837890625, "learning_rate": 7.96344071935156e-06, "loss": 25.797, "step": 184130 }, { "epoch": 0.3719744502397815, "grad_norm": 198.67739868164062, "learning_rate": 7.963159563311587e-06, "loss": 25.2939, "step": 184140 }, { "epoch": 0.3719946508724653, "grad_norm": 257.9796142578125, "learning_rate": 7.962878392829533e-06, "loss": 24.5104, "step": 184150 }, { "epoch": 0.3720148515051491, "grad_norm": 659.9815063476562, "learning_rate": 7.96259720790677e-06, "loss": 29.7285, "step": 184160 }, { "epoch": 0.37203505213783294, "grad_norm": 228.37290954589844, "learning_rate": 7.962316008544666e-06, "loss": 13.8403, "step": 184170 }, { "epoch": 0.37205525277051676, "grad_norm": 2.4217166900634766, "learning_rate": 7.962034794744594e-06, "loss": 24.8965, "step": 184180 }, { "epoch": 0.3720754534032006, "grad_norm": 215.76852416992188, "learning_rate": 7.961753566507924e-06, "loss": 13.5333, "step": 184190 }, { "epoch": 0.3720956540358844, "grad_norm": 251.4265594482422, "learning_rate": 7.961472323836025e-06, "loss": 22.3213, "step": 184200 }, { "epoch": 0.3721158546685682, "grad_norm": 743.0811157226562, "learning_rate": 7.961191066730272e-06, "loss": 22.7665, "step": 184210 }, { "epoch": 0.37213605530125204, "grad_norm": 199.9608612060547, "learning_rate": 7.960909795192029e-06, "loss": 14.893, "step": 184220 }, { "epoch": 0.37215625593393586, "grad_norm": 97.30582427978516, "learning_rate": 7.960628509222674e-06, "loss": 12.9763, "step": 184230 }, { "epoch": 0.3721764565666197, "grad_norm": 185.48228454589844, "learning_rate": 7.960347208823572e-06, "loss": 26.1722, "step": 184240 }, { "epoch": 0.3721966571993035, "grad_norm": 427.7911071777344, "learning_rate": 7.960065893996099e-06, "loss": 11.8422, "step": 184250 }, { "epoch": 0.3722168578319873, "grad_norm": 236.9356689453125, "learning_rate": 7.959784564741622e-06, "loss": 19.8525, "step": 184260 }, { "epoch": 0.3722370584646711, "grad_norm": 254.95457458496094, "learning_rate": 7.959503221061515e-06, "loss": 24.67, "step": 184270 }, { "epoch": 0.3722572590973549, "grad_norm": 260.4693298339844, "learning_rate": 7.959221862957149e-06, "loss": 27.1759, "step": 184280 }, { "epoch": 0.37227745973003873, "grad_norm": 292.5946350097656, "learning_rate": 7.958940490429893e-06, "loss": 36.6283, "step": 184290 }, { "epoch": 0.37229766036272255, "grad_norm": 76.93849182128906, "learning_rate": 7.95865910348112e-06, "loss": 18.4378, "step": 184300 }, { "epoch": 0.37231786099540637, "grad_norm": 540.5844116210938, "learning_rate": 7.958377702112204e-06, "loss": 27.7749, "step": 184310 }, { "epoch": 0.3723380616280902, "grad_norm": 469.05902099609375, "learning_rate": 7.95809628632451e-06, "loss": 14.354, "step": 184320 }, { "epoch": 0.372358262260774, "grad_norm": 259.8488464355469, "learning_rate": 7.957814856119416e-06, "loss": 15.3164, "step": 184330 }, { "epoch": 0.37237846289345783, "grad_norm": 262.31158447265625, "learning_rate": 7.95753341149829e-06, "loss": 18.7401, "step": 184340 }, { "epoch": 0.37239866352614165, "grad_norm": 195.33380126953125, "learning_rate": 7.957251952462506e-06, "loss": 20.534, "step": 184350 }, { "epoch": 0.37241886415882547, "grad_norm": 395.56939697265625, "learning_rate": 7.956970479013433e-06, "loss": 17.5991, "step": 184360 }, { "epoch": 0.3724390647915093, "grad_norm": 467.8310546875, "learning_rate": 7.956688991152446e-06, "loss": 23.1862, "step": 184370 }, { "epoch": 0.3724592654241931, "grad_norm": 241.0390625, "learning_rate": 7.956407488880915e-06, "loss": 22.9269, "step": 184380 }, { "epoch": 0.37247946605687693, "grad_norm": 155.72215270996094, "learning_rate": 7.956125972200212e-06, "loss": 24.9957, "step": 184390 }, { "epoch": 0.3724996666895607, "grad_norm": 164.59249877929688, "learning_rate": 7.95584444111171e-06, "loss": 13.1441, "step": 184400 }, { "epoch": 0.3725198673222445, "grad_norm": 200.87725830078125, "learning_rate": 7.955562895616782e-06, "loss": 19.1187, "step": 184410 }, { "epoch": 0.37254006795492833, "grad_norm": 519.3486938476562, "learning_rate": 7.955281335716797e-06, "loss": 20.4239, "step": 184420 }, { "epoch": 0.37256026858761215, "grad_norm": 271.83648681640625, "learning_rate": 7.954999761413129e-06, "loss": 39.727, "step": 184430 }, { "epoch": 0.372580469220296, "grad_norm": 362.5187072753906, "learning_rate": 7.954718172707153e-06, "loss": 15.9214, "step": 184440 }, { "epoch": 0.3726006698529798, "grad_norm": 888.1478271484375, "learning_rate": 7.954436569600238e-06, "loss": 18.7764, "step": 184450 }, { "epoch": 0.3726208704856636, "grad_norm": 740.99560546875, "learning_rate": 7.954154952093754e-06, "loss": 25.378, "step": 184460 }, { "epoch": 0.37264107111834743, "grad_norm": 157.15164184570312, "learning_rate": 7.95387332018908e-06, "loss": 14.2277, "step": 184470 }, { "epoch": 0.37266127175103125, "grad_norm": 137.23184204101562, "learning_rate": 7.953591673887586e-06, "loss": 12.185, "step": 184480 }, { "epoch": 0.3726814723837151, "grad_norm": 351.32891845703125, "learning_rate": 7.953310013190645e-06, "loss": 19.4576, "step": 184490 }, { "epoch": 0.3727016730163989, "grad_norm": 1181.9573974609375, "learning_rate": 7.953028338099628e-06, "loss": 30.7105, "step": 184500 }, { "epoch": 0.3727218736490827, "grad_norm": 138.59971618652344, "learning_rate": 7.952746648615908e-06, "loss": 18.514, "step": 184510 }, { "epoch": 0.3727420742817665, "grad_norm": 416.4306945800781, "learning_rate": 7.952464944740861e-06, "loss": 18.8617, "step": 184520 }, { "epoch": 0.3727622749144503, "grad_norm": 64.54448699951172, "learning_rate": 7.952183226475858e-06, "loss": 15.8276, "step": 184530 }, { "epoch": 0.3727824755471341, "grad_norm": 87.07196807861328, "learning_rate": 7.95190149382227e-06, "loss": 24.905, "step": 184540 }, { "epoch": 0.37280267617981794, "grad_norm": 86.88279724121094, "learning_rate": 7.951619746781474e-06, "loss": 30.1484, "step": 184550 }, { "epoch": 0.37282287681250176, "grad_norm": 399.5159606933594, "learning_rate": 7.95133798535484e-06, "loss": 22.1146, "step": 184560 }, { "epoch": 0.3728430774451856, "grad_norm": 326.522705078125, "learning_rate": 7.951056209543744e-06, "loss": 29.5776, "step": 184570 }, { "epoch": 0.3728632780778694, "grad_norm": 346.9861145019531, "learning_rate": 7.950774419349557e-06, "loss": 22.8106, "step": 184580 }, { "epoch": 0.3728834787105532, "grad_norm": 152.81338500976562, "learning_rate": 7.950492614773653e-06, "loss": 24.0536, "step": 184590 }, { "epoch": 0.37290367934323704, "grad_norm": 0.0, "learning_rate": 7.950210795817406e-06, "loss": 17.0138, "step": 184600 }, { "epoch": 0.37292387997592086, "grad_norm": 660.4743041992188, "learning_rate": 7.949928962482191e-06, "loss": 27.2651, "step": 184610 }, { "epoch": 0.3729440806086047, "grad_norm": 391.15521240234375, "learning_rate": 7.94964711476938e-06, "loss": 18.858, "step": 184620 }, { "epoch": 0.3729642812412885, "grad_norm": 227.0800018310547, "learning_rate": 7.949365252680343e-06, "loss": 29.5279, "step": 184630 }, { "epoch": 0.3729844818739723, "grad_norm": 312.3140869140625, "learning_rate": 7.94908337621646e-06, "loss": 24.9015, "step": 184640 }, { "epoch": 0.3730046825066561, "grad_norm": 268.701171875, "learning_rate": 7.948801485379103e-06, "loss": 26.3336, "step": 184650 }, { "epoch": 0.3730248831393399, "grad_norm": 243.5205535888672, "learning_rate": 7.948519580169644e-06, "loss": 28.1202, "step": 184660 }, { "epoch": 0.3730450837720237, "grad_norm": 379.9081115722656, "learning_rate": 7.94823766058946e-06, "loss": 31.7025, "step": 184670 }, { "epoch": 0.37306528440470754, "grad_norm": 384.6133117675781, "learning_rate": 7.947955726639922e-06, "loss": 20.4581, "step": 184680 }, { "epoch": 0.37308548503739136, "grad_norm": 264.58477783203125, "learning_rate": 7.947673778322405e-06, "loss": 17.5783, "step": 184690 }, { "epoch": 0.3731056856700752, "grad_norm": 129.25689697265625, "learning_rate": 7.947391815638284e-06, "loss": 13.0007, "step": 184700 }, { "epoch": 0.373125886302759, "grad_norm": 170.17800903320312, "learning_rate": 7.947109838588932e-06, "loss": 24.5116, "step": 184710 }, { "epoch": 0.3731460869354428, "grad_norm": 587.4125366210938, "learning_rate": 7.946827847175724e-06, "loss": 15.4224, "step": 184720 }, { "epoch": 0.37316628756812664, "grad_norm": 337.5641784667969, "learning_rate": 7.946545841400035e-06, "loss": 24.3632, "step": 184730 }, { "epoch": 0.37318648820081046, "grad_norm": 539.6276245117188, "learning_rate": 7.94626382126324e-06, "loss": 19.887, "step": 184740 }, { "epoch": 0.3732066888334943, "grad_norm": 363.0155029296875, "learning_rate": 7.945981786766712e-06, "loss": 38.0237, "step": 184750 }, { "epoch": 0.3732268894661781, "grad_norm": 52.28741455078125, "learning_rate": 7.945699737911825e-06, "loss": 18.4077, "step": 184760 }, { "epoch": 0.3732470900988619, "grad_norm": 347.2657470703125, "learning_rate": 7.945417674699954e-06, "loss": 28.6896, "step": 184770 }, { "epoch": 0.3732672907315457, "grad_norm": 332.4136047363281, "learning_rate": 7.945135597132477e-06, "loss": 22.0173, "step": 184780 }, { "epoch": 0.3732874913642295, "grad_norm": 263.45233154296875, "learning_rate": 7.944853505210766e-06, "loss": 21.674, "step": 184790 }, { "epoch": 0.3733076919969133, "grad_norm": 226.17564392089844, "learning_rate": 7.944571398936193e-06, "loss": 19.5489, "step": 184800 }, { "epoch": 0.37332789262959715, "grad_norm": 278.71405029296875, "learning_rate": 7.94428927831014e-06, "loss": 23.5682, "step": 184810 }, { "epoch": 0.37334809326228097, "grad_norm": 247.7705841064453, "learning_rate": 7.944007143333976e-06, "loss": 16.4044, "step": 184820 }, { "epoch": 0.3733682938949648, "grad_norm": 199.451416015625, "learning_rate": 7.943724994009078e-06, "loss": 19.6303, "step": 184830 }, { "epoch": 0.3733884945276486, "grad_norm": 427.4113464355469, "learning_rate": 7.943442830336822e-06, "loss": 29.7668, "step": 184840 }, { "epoch": 0.3734086951603324, "grad_norm": 309.8070068359375, "learning_rate": 7.943160652318585e-06, "loss": 28.3996, "step": 184850 }, { "epoch": 0.37342889579301625, "grad_norm": 246.4385223388672, "learning_rate": 7.942878459955737e-06, "loss": 15.2405, "step": 184860 }, { "epoch": 0.37344909642570007, "grad_norm": 1155.147216796875, "learning_rate": 7.942596253249658e-06, "loss": 28.3222, "step": 184870 }, { "epoch": 0.3734692970583839, "grad_norm": 220.36927795410156, "learning_rate": 7.94231403220172e-06, "loss": 10.6979, "step": 184880 }, { "epoch": 0.3734894976910677, "grad_norm": 284.5457763671875, "learning_rate": 7.942031796813302e-06, "loss": 24.6864, "step": 184890 }, { "epoch": 0.3735096983237515, "grad_norm": 236.20889282226562, "learning_rate": 7.941749547085778e-06, "loss": 17.2292, "step": 184900 }, { "epoch": 0.3735298989564353, "grad_norm": 244.38455200195312, "learning_rate": 7.941467283020521e-06, "loss": 31.1537, "step": 184910 }, { "epoch": 0.3735500995891191, "grad_norm": 92.53909301757812, "learning_rate": 7.941185004618911e-06, "loss": 19.7693, "step": 184920 }, { "epoch": 0.37357030022180293, "grad_norm": 199.9776611328125, "learning_rate": 7.940902711882321e-06, "loss": 10.4553, "step": 184930 }, { "epoch": 0.37359050085448675, "grad_norm": 114.70950317382812, "learning_rate": 7.940620404812129e-06, "loss": 19.3286, "step": 184940 }, { "epoch": 0.37361070148717057, "grad_norm": 122.59977722167969, "learning_rate": 7.94033808340971e-06, "loss": 15.793, "step": 184950 }, { "epoch": 0.3736309021198544, "grad_norm": 496.680419921875, "learning_rate": 7.940055747676439e-06, "loss": 44.2226, "step": 184960 }, { "epoch": 0.3736511027525382, "grad_norm": 102.9199447631836, "learning_rate": 7.939773397613692e-06, "loss": 23.5337, "step": 184970 }, { "epoch": 0.37367130338522203, "grad_norm": 250.1410675048828, "learning_rate": 7.939491033222848e-06, "loss": 16.7879, "step": 184980 }, { "epoch": 0.37369150401790585, "grad_norm": 77.61206817626953, "learning_rate": 7.939208654505281e-06, "loss": 11.2551, "step": 184990 }, { "epoch": 0.37371170465058967, "grad_norm": 347.7478332519531, "learning_rate": 7.938926261462366e-06, "loss": 28.8498, "step": 185000 }, { "epoch": 0.3737319052832735, "grad_norm": 337.070068359375, "learning_rate": 7.938643854095482e-06, "loss": 53.4416, "step": 185010 }, { "epoch": 0.3737521059159573, "grad_norm": 633.2750244140625, "learning_rate": 7.938361432406005e-06, "loss": 19.5428, "step": 185020 }, { "epoch": 0.37377230654864113, "grad_norm": 52.884273529052734, "learning_rate": 7.93807899639531e-06, "loss": 12.7865, "step": 185030 }, { "epoch": 0.3737925071813249, "grad_norm": 62.2898063659668, "learning_rate": 7.937796546064773e-06, "loss": 24.0253, "step": 185040 }, { "epoch": 0.3738127078140087, "grad_norm": 526.62158203125, "learning_rate": 7.937514081415773e-06, "loss": 38.3259, "step": 185050 }, { "epoch": 0.37383290844669254, "grad_norm": 158.3021697998047, "learning_rate": 7.937231602449687e-06, "loss": 21.6599, "step": 185060 }, { "epoch": 0.37385310907937636, "grad_norm": 373.909423828125, "learning_rate": 7.936949109167887e-06, "loss": 39.3606, "step": 185070 }, { "epoch": 0.3738733097120602, "grad_norm": 191.75303649902344, "learning_rate": 7.936666601571756e-06, "loss": 15.6142, "step": 185080 }, { "epoch": 0.373893510344744, "grad_norm": 12.135784149169922, "learning_rate": 7.936384079662666e-06, "loss": 17.1578, "step": 185090 }, { "epoch": 0.3739137109774278, "grad_norm": 232.1132354736328, "learning_rate": 7.936101543441998e-06, "loss": 15.4894, "step": 185100 }, { "epoch": 0.37393391161011164, "grad_norm": 219.03277587890625, "learning_rate": 7.935818992911129e-06, "loss": 34.4901, "step": 185110 }, { "epoch": 0.37395411224279546, "grad_norm": 158.148193359375, "learning_rate": 7.935536428071431e-06, "loss": 13.8688, "step": 185120 }, { "epoch": 0.3739743128754793, "grad_norm": 255.30038452148438, "learning_rate": 7.935253848924285e-06, "loss": 16.5437, "step": 185130 }, { "epoch": 0.3739945135081631, "grad_norm": 241.79319763183594, "learning_rate": 7.93497125547107e-06, "loss": 17.6727, "step": 185140 }, { "epoch": 0.3740147141408469, "grad_norm": 327.62823486328125, "learning_rate": 7.934688647713158e-06, "loss": 17.1483, "step": 185150 }, { "epoch": 0.3740349147735307, "grad_norm": 313.69183349609375, "learning_rate": 7.93440602565193e-06, "loss": 16.6495, "step": 185160 }, { "epoch": 0.3740551154062145, "grad_norm": 118.04862213134766, "learning_rate": 7.934123389288765e-06, "loss": 18.0619, "step": 185170 }, { "epoch": 0.3740753160388983, "grad_norm": 39.635711669921875, "learning_rate": 7.933840738625035e-06, "loss": 9.1773, "step": 185180 }, { "epoch": 0.37409551667158214, "grad_norm": 280.9408874511719, "learning_rate": 7.933558073662125e-06, "loss": 19.29, "step": 185190 }, { "epoch": 0.37411571730426596, "grad_norm": 81.98887634277344, "learning_rate": 7.933275394401407e-06, "loss": 13.2514, "step": 185200 }, { "epoch": 0.3741359179369498, "grad_norm": 269.8737487792969, "learning_rate": 7.932992700844261e-06, "loss": 15.9975, "step": 185210 }, { "epoch": 0.3741561185696336, "grad_norm": 365.34173583984375, "learning_rate": 7.932709992992063e-06, "loss": 25.5277, "step": 185220 }, { "epoch": 0.3741763192023174, "grad_norm": 263.77081298828125, "learning_rate": 7.932427270846194e-06, "loss": 8.9371, "step": 185230 }, { "epoch": 0.37419651983500124, "grad_norm": 361.0832824707031, "learning_rate": 7.932144534408028e-06, "loss": 11.2786, "step": 185240 }, { "epoch": 0.37421672046768506, "grad_norm": 301.998291015625, "learning_rate": 7.931861783678946e-06, "loss": 20.0566, "step": 185250 }, { "epoch": 0.3742369211003689, "grad_norm": 184.75521850585938, "learning_rate": 7.931579018660327e-06, "loss": 30.3256, "step": 185260 }, { "epoch": 0.3742571217330527, "grad_norm": 120.67028045654297, "learning_rate": 7.931296239353546e-06, "loss": 10.0224, "step": 185270 }, { "epoch": 0.3742773223657365, "grad_norm": 328.575927734375, "learning_rate": 7.931013445759984e-06, "loss": 17.9088, "step": 185280 }, { "epoch": 0.3742975229984203, "grad_norm": 271.0068054199219, "learning_rate": 7.930730637881016e-06, "loss": 24.6827, "step": 185290 }, { "epoch": 0.3743177236311041, "grad_norm": 20.137245178222656, "learning_rate": 7.930447815718022e-06, "loss": 20.3838, "step": 185300 }, { "epoch": 0.3743379242637879, "grad_norm": 285.2964172363281, "learning_rate": 7.93016497927238e-06, "loss": 18.2215, "step": 185310 }, { "epoch": 0.37435812489647174, "grad_norm": 221.62429809570312, "learning_rate": 7.929882128545474e-06, "loss": 29.0239, "step": 185320 }, { "epoch": 0.37437832552915556, "grad_norm": 421.0754699707031, "learning_rate": 7.929599263538674e-06, "loss": 13.2906, "step": 185330 }, { "epoch": 0.3743985261618394, "grad_norm": 193.9161376953125, "learning_rate": 7.929316384253363e-06, "loss": 36.1366, "step": 185340 }, { "epoch": 0.3744187267945232, "grad_norm": 187.2124786376953, "learning_rate": 7.929033490690921e-06, "loss": 13.5693, "step": 185350 }, { "epoch": 0.374438927427207, "grad_norm": 439.8352966308594, "learning_rate": 7.928750582852722e-06, "loss": 23.5807, "step": 185360 }, { "epoch": 0.37445912805989084, "grad_norm": 157.73751831054688, "learning_rate": 7.92846766074015e-06, "loss": 13.0055, "step": 185370 }, { "epoch": 0.37447932869257466, "grad_norm": 39.84578323364258, "learning_rate": 7.928184724354581e-06, "loss": 16.693, "step": 185380 }, { "epoch": 0.3744995293252585, "grad_norm": 113.8099365234375, "learning_rate": 7.927901773697396e-06, "loss": 19.1534, "step": 185390 }, { "epoch": 0.3745197299579423, "grad_norm": 315.4614562988281, "learning_rate": 7.927618808769971e-06, "loss": 13.0848, "step": 185400 }, { "epoch": 0.3745399305906261, "grad_norm": 135.65469360351562, "learning_rate": 7.927335829573688e-06, "loss": 25.1562, "step": 185410 }, { "epoch": 0.3745601312233099, "grad_norm": 248.0388641357422, "learning_rate": 7.927052836109925e-06, "loss": 21.1852, "step": 185420 }, { "epoch": 0.3745803318559937, "grad_norm": 414.81536865234375, "learning_rate": 7.926769828380062e-06, "loss": 25.4115, "step": 185430 }, { "epoch": 0.37460053248867753, "grad_norm": 290.8090515136719, "learning_rate": 7.926486806385479e-06, "loss": 14.0105, "step": 185440 }, { "epoch": 0.37462073312136135, "grad_norm": 402.22869873046875, "learning_rate": 7.926203770127552e-06, "loss": 16.3543, "step": 185450 }, { "epoch": 0.37464093375404517, "grad_norm": 189.4053192138672, "learning_rate": 7.925920719607663e-06, "loss": 19.4546, "step": 185460 }, { "epoch": 0.374661134386729, "grad_norm": 384.5511779785156, "learning_rate": 7.925637654827192e-06, "loss": 17.1587, "step": 185470 }, { "epoch": 0.3746813350194128, "grad_norm": 233.11221313476562, "learning_rate": 7.925354575787517e-06, "loss": 18.7068, "step": 185480 }, { "epoch": 0.37470153565209663, "grad_norm": 2.587369918823242, "learning_rate": 7.925071482490018e-06, "loss": 22.7074, "step": 185490 }, { "epoch": 0.37472173628478045, "grad_norm": 85.40553283691406, "learning_rate": 7.92478837493608e-06, "loss": 26.1757, "step": 185500 }, { "epoch": 0.37474193691746427, "grad_norm": 85.49195098876953, "learning_rate": 7.924505253127072e-06, "loss": 17.4311, "step": 185510 }, { "epoch": 0.3747621375501481, "grad_norm": 122.28205871582031, "learning_rate": 7.924222117064385e-06, "loss": 10.086, "step": 185520 }, { "epoch": 0.3747823381828319, "grad_norm": 506.0031433105469, "learning_rate": 7.92393896674939e-06, "loss": 12.787, "step": 185530 }, { "epoch": 0.37480253881551573, "grad_norm": 277.2618103027344, "learning_rate": 7.923655802183475e-06, "loss": 26.1065, "step": 185540 }, { "epoch": 0.3748227394481995, "grad_norm": 444.6019592285156, "learning_rate": 7.923372623368014e-06, "loss": 22.7634, "step": 185550 }, { "epoch": 0.3748429400808833, "grad_norm": 387.7466125488281, "learning_rate": 7.92308943030439e-06, "loss": 27.8994, "step": 185560 }, { "epoch": 0.37486314071356713, "grad_norm": 244.80221557617188, "learning_rate": 7.922806222993981e-06, "loss": 15.4222, "step": 185570 }, { "epoch": 0.37488334134625095, "grad_norm": 283.90771484375, "learning_rate": 7.92252300143817e-06, "loss": 34.1183, "step": 185580 }, { "epoch": 0.3749035419789348, "grad_norm": 175.4582977294922, "learning_rate": 7.922239765638338e-06, "loss": 19.7774, "step": 185590 }, { "epoch": 0.3749237426116186, "grad_norm": 284.7312927246094, "learning_rate": 7.921956515595861e-06, "loss": 23.3967, "step": 185600 }, { "epoch": 0.3749439432443024, "grad_norm": 708.48388671875, "learning_rate": 7.921673251312124e-06, "loss": 35.8806, "step": 185610 }, { "epoch": 0.37496414387698623, "grad_norm": 16.169157028198242, "learning_rate": 7.921389972788505e-06, "loss": 12.2874, "step": 185620 }, { "epoch": 0.37498434450967005, "grad_norm": 187.00140380859375, "learning_rate": 7.921106680026388e-06, "loss": 17.9011, "step": 185630 }, { "epoch": 0.3750045451423539, "grad_norm": 246.0491943359375, "learning_rate": 7.920823373027149e-06, "loss": 13.4608, "step": 185640 }, { "epoch": 0.3750247457750377, "grad_norm": 277.2389221191406, "learning_rate": 7.920540051792171e-06, "loss": 22.0776, "step": 185650 }, { "epoch": 0.3750449464077215, "grad_norm": 373.6636657714844, "learning_rate": 7.920256716322837e-06, "loss": 10.6582, "step": 185660 }, { "epoch": 0.3750651470404053, "grad_norm": 329.0796203613281, "learning_rate": 7.919973366620525e-06, "loss": 22.3193, "step": 185670 }, { "epoch": 0.3750853476730891, "grad_norm": 235.19625854492188, "learning_rate": 7.919690002686615e-06, "loss": 22.2559, "step": 185680 }, { "epoch": 0.3751055483057729, "grad_norm": 250.4349822998047, "learning_rate": 7.919406624522492e-06, "loss": 27.1338, "step": 185690 }, { "epoch": 0.37512574893845674, "grad_norm": 273.69146728515625, "learning_rate": 7.919123232129535e-06, "loss": 16.1475, "step": 185700 }, { "epoch": 0.37514594957114056, "grad_norm": 426.8612365722656, "learning_rate": 7.918839825509126e-06, "loss": 19.9313, "step": 185710 }, { "epoch": 0.3751661502038244, "grad_norm": 2.136836051940918, "learning_rate": 7.918556404662645e-06, "loss": 8.9198, "step": 185720 }, { "epoch": 0.3751863508365082, "grad_norm": 333.435302734375, "learning_rate": 7.918272969591474e-06, "loss": 31.7761, "step": 185730 }, { "epoch": 0.375206551469192, "grad_norm": 451.25714111328125, "learning_rate": 7.917989520296996e-06, "loss": 18.5472, "step": 185740 }, { "epoch": 0.37522675210187584, "grad_norm": 157.28224182128906, "learning_rate": 7.917706056780588e-06, "loss": 29.4271, "step": 185750 }, { "epoch": 0.37524695273455966, "grad_norm": 308.7602844238281, "learning_rate": 7.917422579043637e-06, "loss": 11.4206, "step": 185760 }, { "epoch": 0.3752671533672435, "grad_norm": 436.5746765136719, "learning_rate": 7.91713908708752e-06, "loss": 22.6611, "step": 185770 }, { "epoch": 0.3752873539999273, "grad_norm": 424.0115051269531, "learning_rate": 7.916855580913622e-06, "loss": 25.717, "step": 185780 }, { "epoch": 0.3753075546326111, "grad_norm": 152.55250549316406, "learning_rate": 7.916572060523326e-06, "loss": 24.1854, "step": 185790 }, { "epoch": 0.3753277552652949, "grad_norm": 117.04178619384766, "learning_rate": 7.916288525918008e-06, "loss": 19.5701, "step": 185800 }, { "epoch": 0.3753479558979787, "grad_norm": 311.39178466796875, "learning_rate": 7.916004977099054e-06, "loss": 27.6695, "step": 185810 }, { "epoch": 0.3753681565306625, "grad_norm": 510.76214599609375, "learning_rate": 7.915721414067847e-06, "loss": 21.4665, "step": 185820 }, { "epoch": 0.37538835716334634, "grad_norm": 306.6426696777344, "learning_rate": 7.915437836825767e-06, "loss": 28.1763, "step": 185830 }, { "epoch": 0.37540855779603016, "grad_norm": 111.72242736816406, "learning_rate": 7.915154245374197e-06, "loss": 10.9436, "step": 185840 }, { "epoch": 0.375428758428714, "grad_norm": 190.03533935546875, "learning_rate": 7.914870639714517e-06, "loss": 16.0454, "step": 185850 }, { "epoch": 0.3754489590613978, "grad_norm": 154.81063842773438, "learning_rate": 7.914587019848113e-06, "loss": 22.2748, "step": 185860 }, { "epoch": 0.3754691596940816, "grad_norm": 369.34857177734375, "learning_rate": 7.914303385776365e-06, "loss": 14.7787, "step": 185870 }, { "epoch": 0.37548936032676544, "grad_norm": 130.79006958007812, "learning_rate": 7.914019737500655e-06, "loss": 20.2075, "step": 185880 }, { "epoch": 0.37550956095944926, "grad_norm": 416.4476013183594, "learning_rate": 7.913736075022366e-06, "loss": 19.2695, "step": 185890 }, { "epoch": 0.3755297615921331, "grad_norm": 323.06298828125, "learning_rate": 7.913452398342882e-06, "loss": 17.0022, "step": 185900 }, { "epoch": 0.3755499622248169, "grad_norm": 271.4762878417969, "learning_rate": 7.913168707463583e-06, "loss": 15.5294, "step": 185910 }, { "epoch": 0.3755701628575007, "grad_norm": 418.99835205078125, "learning_rate": 7.912885002385852e-06, "loss": 25.924, "step": 185920 }, { "epoch": 0.3755903634901845, "grad_norm": 741.998291015625, "learning_rate": 7.912601283111076e-06, "loss": 24.3933, "step": 185930 }, { "epoch": 0.3756105641228683, "grad_norm": 334.1812438964844, "learning_rate": 7.912317549640632e-06, "loss": 12.7267, "step": 185940 }, { "epoch": 0.3756307647555521, "grad_norm": 336.8026123046875, "learning_rate": 7.912033801975907e-06, "loss": 30.6624, "step": 185950 }, { "epoch": 0.37565096538823595, "grad_norm": 348.30810546875, "learning_rate": 7.911750040118282e-06, "loss": 25.4599, "step": 185960 }, { "epoch": 0.37567116602091977, "grad_norm": 390.4669494628906, "learning_rate": 7.91146626406914e-06, "loss": 15.6367, "step": 185970 }, { "epoch": 0.3756913666536036, "grad_norm": 597.0534057617188, "learning_rate": 7.911182473829865e-06, "loss": 14.134, "step": 185980 }, { "epoch": 0.3757115672862874, "grad_norm": 224.79171752929688, "learning_rate": 7.91089866940184e-06, "loss": 24.2641, "step": 185990 }, { "epoch": 0.3757317679189712, "grad_norm": 641.9180297851562, "learning_rate": 7.910614850786448e-06, "loss": 28.1794, "step": 186000 }, { "epoch": 0.37575196855165505, "grad_norm": 512.9077758789062, "learning_rate": 7.910331017985072e-06, "loss": 15.7285, "step": 186010 }, { "epoch": 0.37577216918433887, "grad_norm": 351.925048828125, "learning_rate": 7.910047170999095e-06, "loss": 29.4474, "step": 186020 }, { "epoch": 0.3757923698170227, "grad_norm": 65.32705688476562, "learning_rate": 7.9097633098299e-06, "loss": 42.512, "step": 186030 }, { "epoch": 0.3758125704497065, "grad_norm": 266.1769714355469, "learning_rate": 7.909479434478874e-06, "loss": 17.044, "step": 186040 }, { "epoch": 0.3758327710823903, "grad_norm": 333.8954162597656, "learning_rate": 7.909195544947398e-06, "loss": 27.1441, "step": 186050 }, { "epoch": 0.3758529717150741, "grad_norm": 236.4756317138672, "learning_rate": 7.908911641236855e-06, "loss": 21.2787, "step": 186060 }, { "epoch": 0.3758731723477579, "grad_norm": 165.28330993652344, "learning_rate": 7.908627723348628e-06, "loss": 23.4746, "step": 186070 }, { "epoch": 0.37589337298044173, "grad_norm": 201.89808654785156, "learning_rate": 7.908343791284104e-06, "loss": 29.5584, "step": 186080 }, { "epoch": 0.37591357361312555, "grad_norm": 306.7787780761719, "learning_rate": 7.908059845044665e-06, "loss": 13.0966, "step": 186090 }, { "epoch": 0.37593377424580937, "grad_norm": 359.8141784667969, "learning_rate": 7.907775884631694e-06, "loss": 15.9871, "step": 186100 }, { "epoch": 0.3759539748784932, "grad_norm": 132.39022827148438, "learning_rate": 7.907491910046578e-06, "loss": 34.8949, "step": 186110 }, { "epoch": 0.375974175511177, "grad_norm": 130.3917999267578, "learning_rate": 7.907207921290698e-06, "loss": 24.0736, "step": 186120 }, { "epoch": 0.37599437614386083, "grad_norm": 113.33358764648438, "learning_rate": 7.906923918365439e-06, "loss": 25.4645, "step": 186130 }, { "epoch": 0.37601457677654465, "grad_norm": 123.42782592773438, "learning_rate": 7.906639901272183e-06, "loss": 39.9559, "step": 186140 }, { "epoch": 0.37603477740922847, "grad_norm": 264.7405090332031, "learning_rate": 7.90635587001232e-06, "loss": 25.6556, "step": 186150 }, { "epoch": 0.3760549780419123, "grad_norm": 95.81927490234375, "learning_rate": 7.906071824587231e-06, "loss": 19.0319, "step": 186160 }, { "epoch": 0.3760751786745961, "grad_norm": 81.0352783203125, "learning_rate": 7.9057877649983e-06, "loss": 25.8791, "step": 186170 }, { "epoch": 0.37609537930727993, "grad_norm": 321.3166198730469, "learning_rate": 7.905503691246909e-06, "loss": 16.1987, "step": 186180 }, { "epoch": 0.3761155799399637, "grad_norm": 194.05514526367188, "learning_rate": 7.905219603334449e-06, "loss": 13.2131, "step": 186190 }, { "epoch": 0.3761357805726475, "grad_norm": 37.614803314208984, "learning_rate": 7.904935501262301e-06, "loss": 18.0454, "step": 186200 }, { "epoch": 0.37615598120533134, "grad_norm": 203.42010498046875, "learning_rate": 7.904651385031847e-06, "loss": 37.955, "step": 186210 }, { "epoch": 0.37617618183801516, "grad_norm": 529.5454711914062, "learning_rate": 7.904367254644475e-06, "loss": 20.5918, "step": 186220 }, { "epoch": 0.376196382470699, "grad_norm": 121.18347930908203, "learning_rate": 7.90408311010157e-06, "loss": 19.9113, "step": 186230 }, { "epoch": 0.3762165831033828, "grad_norm": 435.0130920410156, "learning_rate": 7.903798951404518e-06, "loss": 20.4026, "step": 186240 }, { "epoch": 0.3762367837360666, "grad_norm": 259.0460510253906, "learning_rate": 7.903514778554699e-06, "loss": 26.3641, "step": 186250 }, { "epoch": 0.37625698436875044, "grad_norm": 293.35992431640625, "learning_rate": 7.903230591553504e-06, "loss": 51.925, "step": 186260 }, { "epoch": 0.37627718500143426, "grad_norm": 576.8926391601562, "learning_rate": 7.902946390402313e-06, "loss": 33.8142, "step": 186270 }, { "epoch": 0.3762973856341181, "grad_norm": 115.98161315917969, "learning_rate": 7.902662175102514e-06, "loss": 11.9172, "step": 186280 }, { "epoch": 0.3763175862668019, "grad_norm": 225.28875732421875, "learning_rate": 7.90237794565549e-06, "loss": 33.6001, "step": 186290 }, { "epoch": 0.3763377868994857, "grad_norm": 241.9381103515625, "learning_rate": 7.90209370206263e-06, "loss": 13.248, "step": 186300 }, { "epoch": 0.3763579875321695, "grad_norm": 221.50494384765625, "learning_rate": 7.901809444325318e-06, "loss": 13.1992, "step": 186310 }, { "epoch": 0.3763781881648533, "grad_norm": 204.31094360351562, "learning_rate": 7.901525172444938e-06, "loss": 6.4794, "step": 186320 }, { "epoch": 0.3763983887975371, "grad_norm": 717.9684448242188, "learning_rate": 7.901240886422875e-06, "loss": 37.8508, "step": 186330 }, { "epoch": 0.37641858943022094, "grad_norm": 112.0523910522461, "learning_rate": 7.900956586260516e-06, "loss": 14.9296, "step": 186340 }, { "epoch": 0.37643879006290476, "grad_norm": 236.0635986328125, "learning_rate": 7.900672271959247e-06, "loss": 15.1673, "step": 186350 }, { "epoch": 0.3764589906955886, "grad_norm": 400.28521728515625, "learning_rate": 7.900387943520453e-06, "loss": 19.4627, "step": 186360 }, { "epoch": 0.3764791913282724, "grad_norm": 357.4207763671875, "learning_rate": 7.900103600945521e-06, "loss": 22.4554, "step": 186370 }, { "epoch": 0.3764993919609562, "grad_norm": 563.9426879882812, "learning_rate": 7.899819244235835e-06, "loss": 18.7488, "step": 186380 }, { "epoch": 0.37651959259364004, "grad_norm": 221.34249877929688, "learning_rate": 7.899534873392781e-06, "loss": 11.4356, "step": 186390 }, { "epoch": 0.37653979322632386, "grad_norm": 283.7043151855469, "learning_rate": 7.899250488417746e-06, "loss": 13.1127, "step": 186400 }, { "epoch": 0.3765599938590077, "grad_norm": 166.46102905273438, "learning_rate": 7.898966089312117e-06, "loss": 21.4531, "step": 186410 }, { "epoch": 0.3765801944916915, "grad_norm": 198.0901641845703, "learning_rate": 7.898681676077278e-06, "loss": 25.4644, "step": 186420 }, { "epoch": 0.3766003951243753, "grad_norm": 62.17695999145508, "learning_rate": 7.898397248714615e-06, "loss": 25.1601, "step": 186430 }, { "epoch": 0.3766205957570591, "grad_norm": 176.72970581054688, "learning_rate": 7.898112807225517e-06, "loss": 8.4735, "step": 186440 }, { "epoch": 0.3766407963897429, "grad_norm": 199.9274444580078, "learning_rate": 7.897828351611368e-06, "loss": 19.3175, "step": 186450 }, { "epoch": 0.3766609970224267, "grad_norm": 726.919189453125, "learning_rate": 7.897543881873555e-06, "loss": 30.8832, "step": 186460 }, { "epoch": 0.37668119765511054, "grad_norm": 401.5469055175781, "learning_rate": 7.897259398013465e-06, "loss": 19.6869, "step": 186470 }, { "epoch": 0.37670139828779436, "grad_norm": 47.72447967529297, "learning_rate": 7.896974900032483e-06, "loss": 46.1767, "step": 186480 }, { "epoch": 0.3767215989204782, "grad_norm": 20.12388801574707, "learning_rate": 7.896690387931997e-06, "loss": 30.3331, "step": 186490 }, { "epoch": 0.376741799553162, "grad_norm": 220.92852783203125, "learning_rate": 7.896405861713393e-06, "loss": 23.0494, "step": 186500 }, { "epoch": 0.3767620001858458, "grad_norm": 257.28021240234375, "learning_rate": 7.89612132137806e-06, "loss": 17.7899, "step": 186510 }, { "epoch": 0.37678220081852964, "grad_norm": 56.87631607055664, "learning_rate": 7.895836766927383e-06, "loss": 18.4828, "step": 186520 }, { "epoch": 0.37680240145121346, "grad_norm": 98.27031707763672, "learning_rate": 7.895552198362748e-06, "loss": 5.5348, "step": 186530 }, { "epoch": 0.3768226020838973, "grad_norm": 238.3933868408203, "learning_rate": 7.895267615685542e-06, "loss": 13.0739, "step": 186540 }, { "epoch": 0.3768428027165811, "grad_norm": 417.88739013671875, "learning_rate": 7.894983018897153e-06, "loss": 19.767, "step": 186550 }, { "epoch": 0.3768630033492649, "grad_norm": 368.0034484863281, "learning_rate": 7.89469840799897e-06, "loss": 19.2896, "step": 186560 }, { "epoch": 0.3768832039819487, "grad_norm": 312.037841796875, "learning_rate": 7.894413782992375e-06, "loss": 19.7305, "step": 186570 }, { "epoch": 0.3769034046146325, "grad_norm": 187.1287841796875, "learning_rate": 7.894129143878758e-06, "loss": 15.5544, "step": 186580 }, { "epoch": 0.37692360524731633, "grad_norm": 394.219482421875, "learning_rate": 7.89384449065951e-06, "loss": 15.7889, "step": 186590 }, { "epoch": 0.37694380588000015, "grad_norm": 801.2200927734375, "learning_rate": 7.893559823336013e-06, "loss": 25.3626, "step": 186600 }, { "epoch": 0.37696400651268397, "grad_norm": 414.70343017578125, "learning_rate": 7.893275141909655e-06, "loss": 12.1482, "step": 186610 }, { "epoch": 0.3769842071453678, "grad_norm": 33.069400787353516, "learning_rate": 7.892990446381828e-06, "loss": 20.7809, "step": 186620 }, { "epoch": 0.3770044077780516, "grad_norm": 420.0350646972656, "learning_rate": 7.892705736753913e-06, "loss": 13.7748, "step": 186630 }, { "epoch": 0.37702460841073543, "grad_norm": 159.99136352539062, "learning_rate": 7.892421013027302e-06, "loss": 24.7651, "step": 186640 }, { "epoch": 0.37704480904341925, "grad_norm": 271.33380126953125, "learning_rate": 7.892136275203383e-06, "loss": 16.0727, "step": 186650 }, { "epoch": 0.37706500967610307, "grad_norm": 809.55517578125, "learning_rate": 7.891851523283542e-06, "loss": 23.4556, "step": 186660 }, { "epoch": 0.3770852103087869, "grad_norm": 270.4852600097656, "learning_rate": 7.891566757269169e-06, "loss": 31.132, "step": 186670 }, { "epoch": 0.3771054109414707, "grad_norm": 491.2929992675781, "learning_rate": 7.891281977161648e-06, "loss": 21.6557, "step": 186680 }, { "epoch": 0.37712561157415453, "grad_norm": 416.9807434082031, "learning_rate": 7.89099718296237e-06, "loss": 23.0531, "step": 186690 }, { "epoch": 0.3771458122068383, "grad_norm": 563.9158935546875, "learning_rate": 7.890712374672724e-06, "loss": 14.6643, "step": 186700 }, { "epoch": 0.3771660128395221, "grad_norm": 156.35617065429688, "learning_rate": 7.890427552294093e-06, "loss": 14.5247, "step": 186710 }, { "epoch": 0.37718621347220593, "grad_norm": 585.1102905273438, "learning_rate": 7.890142715827871e-06, "loss": 20.162, "step": 186720 }, { "epoch": 0.37720641410488975, "grad_norm": 81.15010070800781, "learning_rate": 7.889857865275445e-06, "loss": 18.9025, "step": 186730 }, { "epoch": 0.3772266147375736, "grad_norm": 449.5769958496094, "learning_rate": 7.8895730006382e-06, "loss": 22.7428, "step": 186740 }, { "epoch": 0.3772468153702574, "grad_norm": 468.1115417480469, "learning_rate": 7.889288121917528e-06, "loss": 27.9427, "step": 186750 }, { "epoch": 0.3772670160029412, "grad_norm": 56.81266784667969, "learning_rate": 7.889003229114816e-06, "loss": 21.6837, "step": 186760 }, { "epoch": 0.37728721663562503, "grad_norm": 144.44442749023438, "learning_rate": 7.888718322231452e-06, "loss": 10.1322, "step": 186770 }, { "epoch": 0.37730741726830885, "grad_norm": 245.5484161376953, "learning_rate": 7.888433401268825e-06, "loss": 18.0232, "step": 186780 }, { "epoch": 0.3773276179009927, "grad_norm": 52.25250244140625, "learning_rate": 7.888148466228325e-06, "loss": 10.0442, "step": 186790 }, { "epoch": 0.3773478185336765, "grad_norm": 0.0, "learning_rate": 7.887863517111337e-06, "loss": 24.1766, "step": 186800 }, { "epoch": 0.3773680191663603, "grad_norm": 183.34756469726562, "learning_rate": 7.887578553919256e-06, "loss": 17.4428, "step": 186810 }, { "epoch": 0.37738821979904413, "grad_norm": 239.47459411621094, "learning_rate": 7.887293576653467e-06, "loss": 19.1699, "step": 186820 }, { "epoch": 0.3774084204317279, "grad_norm": 120.63771057128906, "learning_rate": 7.887008585315358e-06, "loss": 13.7408, "step": 186830 }, { "epoch": 0.3774286210644117, "grad_norm": 548.652099609375, "learning_rate": 7.88672357990632e-06, "loss": 34.2154, "step": 186840 }, { "epoch": 0.37744882169709554, "grad_norm": 378.44140625, "learning_rate": 7.88643856042774e-06, "loss": 17.7672, "step": 186850 }, { "epoch": 0.37746902232977936, "grad_norm": 586.1436767578125, "learning_rate": 7.886153526881011e-06, "loss": 27.0217, "step": 186860 }, { "epoch": 0.3774892229624632, "grad_norm": 303.7994689941406, "learning_rate": 7.885868479267517e-06, "loss": 10.8073, "step": 186870 }, { "epoch": 0.377509423595147, "grad_norm": 53.14724349975586, "learning_rate": 7.885583417588652e-06, "loss": 20.2567, "step": 186880 }, { "epoch": 0.3775296242278308, "grad_norm": 307.54254150390625, "learning_rate": 7.885298341845803e-06, "loss": 17.835, "step": 186890 }, { "epoch": 0.37754982486051464, "grad_norm": 381.0361633300781, "learning_rate": 7.88501325204036e-06, "loss": 20.6379, "step": 186900 }, { "epoch": 0.37757002549319846, "grad_norm": 249.66018676757812, "learning_rate": 7.88472814817371e-06, "loss": 17.7086, "step": 186910 }, { "epoch": 0.3775902261258823, "grad_norm": 327.5065002441406, "learning_rate": 7.884443030247248e-06, "loss": 7.7614, "step": 186920 }, { "epoch": 0.3776104267585661, "grad_norm": 175.55088806152344, "learning_rate": 7.88415789826236e-06, "loss": 12.0613, "step": 186930 }, { "epoch": 0.3776306273912499, "grad_norm": 200.0560302734375, "learning_rate": 7.883872752220434e-06, "loss": 24.1615, "step": 186940 }, { "epoch": 0.3776508280239337, "grad_norm": 72.640625, "learning_rate": 7.883587592122864e-06, "loss": 19.4451, "step": 186950 }, { "epoch": 0.3776710286566175, "grad_norm": 252.3730926513672, "learning_rate": 7.883302417971037e-06, "loss": 17.0961, "step": 186960 }, { "epoch": 0.3776912292893013, "grad_norm": 85.8256607055664, "learning_rate": 7.883017229766344e-06, "loss": 14.2206, "step": 186970 }, { "epoch": 0.37771142992198514, "grad_norm": 275.2225341796875, "learning_rate": 7.882732027510174e-06, "loss": 16.3081, "step": 186980 }, { "epoch": 0.37773163055466896, "grad_norm": 270.32806396484375, "learning_rate": 7.88244681120392e-06, "loss": 16.3678, "step": 186990 }, { "epoch": 0.3777518311873528, "grad_norm": 531.0042724609375, "learning_rate": 7.882161580848966e-06, "loss": 21.7932, "step": 187000 }, { "epoch": 0.3777720318200366, "grad_norm": 261.992919921875, "learning_rate": 7.88187633644671e-06, "loss": 22.2311, "step": 187010 }, { "epoch": 0.3777922324527204, "grad_norm": 131.06915283203125, "learning_rate": 7.881591077998536e-06, "loss": 30.0002, "step": 187020 }, { "epoch": 0.37781243308540424, "grad_norm": 553.0531005859375, "learning_rate": 7.881305805505836e-06, "loss": 24.4456, "step": 187030 }, { "epoch": 0.37783263371808806, "grad_norm": 248.23385620117188, "learning_rate": 7.881020518970003e-06, "loss": 10.2194, "step": 187040 }, { "epoch": 0.3778528343507719, "grad_norm": 380.9588928222656, "learning_rate": 7.880735218392424e-06, "loss": 25.3641, "step": 187050 }, { "epoch": 0.3778730349834557, "grad_norm": 564.7748413085938, "learning_rate": 7.880449903774492e-06, "loss": 49.3787, "step": 187060 }, { "epoch": 0.3778932356161395, "grad_norm": 446.4306640625, "learning_rate": 7.880164575117596e-06, "loss": 12.4321, "step": 187070 }, { "epoch": 0.3779134362488233, "grad_norm": 261.9026184082031, "learning_rate": 7.879879232423127e-06, "loss": 16.2894, "step": 187080 }, { "epoch": 0.3779336368815071, "grad_norm": 414.2398681640625, "learning_rate": 7.879593875692476e-06, "loss": 28.9286, "step": 187090 }, { "epoch": 0.3779538375141909, "grad_norm": 294.09210205078125, "learning_rate": 7.879308504927034e-06, "loss": 16.1029, "step": 187100 }, { "epoch": 0.37797403814687475, "grad_norm": 420.6872863769531, "learning_rate": 7.879023120128191e-06, "loss": 16.9602, "step": 187110 }, { "epoch": 0.37799423877955857, "grad_norm": 455.44366455078125, "learning_rate": 7.87873772129734e-06, "loss": 16.9659, "step": 187120 }, { "epoch": 0.3780144394122424, "grad_norm": 429.95159912109375, "learning_rate": 7.878452308435868e-06, "loss": 20.4883, "step": 187130 }, { "epoch": 0.3780346400449262, "grad_norm": 392.82489013671875, "learning_rate": 7.878166881545171e-06, "loss": 17.7292, "step": 187140 }, { "epoch": 0.37805484067761, "grad_norm": 594.9468994140625, "learning_rate": 7.877881440626635e-06, "loss": 17.4213, "step": 187150 }, { "epoch": 0.37807504131029385, "grad_norm": 321.25604248046875, "learning_rate": 7.877595985681656e-06, "loss": 23.6587, "step": 187160 }, { "epoch": 0.37809524194297767, "grad_norm": 444.82318115234375, "learning_rate": 7.877310516711623e-06, "loss": 27.5952, "step": 187170 }, { "epoch": 0.3781154425756615, "grad_norm": 289.4245910644531, "learning_rate": 7.877025033717926e-06, "loss": 18.8632, "step": 187180 }, { "epoch": 0.3781356432083453, "grad_norm": 287.969482421875, "learning_rate": 7.876739536701961e-06, "loss": 11.4843, "step": 187190 }, { "epoch": 0.3781558438410291, "grad_norm": 49.247135162353516, "learning_rate": 7.876454025665114e-06, "loss": 18.1612, "step": 187200 }, { "epoch": 0.3781760444737129, "grad_norm": 8.03761100769043, "learning_rate": 7.87616850060878e-06, "loss": 17.9355, "step": 187210 }, { "epoch": 0.3781962451063967, "grad_norm": 702.925537109375, "learning_rate": 7.875882961534347e-06, "loss": 42.1984, "step": 187220 }, { "epoch": 0.37821644573908053, "grad_norm": 66.88533020019531, "learning_rate": 7.875597408443212e-06, "loss": 8.2173, "step": 187230 }, { "epoch": 0.37823664637176435, "grad_norm": 282.1229248046875, "learning_rate": 7.875311841336763e-06, "loss": 20.4465, "step": 187240 }, { "epoch": 0.37825684700444817, "grad_norm": 217.71224975585938, "learning_rate": 7.875026260216395e-06, "loss": 13.7172, "step": 187250 }, { "epoch": 0.378277047637132, "grad_norm": 24.92588996887207, "learning_rate": 7.874740665083494e-06, "loss": 15.9615, "step": 187260 }, { "epoch": 0.3782972482698158, "grad_norm": 524.1507568359375, "learning_rate": 7.874455055939458e-06, "loss": 33.2417, "step": 187270 }, { "epoch": 0.37831744890249963, "grad_norm": 93.35189056396484, "learning_rate": 7.874169432785677e-06, "loss": 23.9342, "step": 187280 }, { "epoch": 0.37833764953518345, "grad_norm": 64.13849639892578, "learning_rate": 7.87388379562354e-06, "loss": 25.6536, "step": 187290 }, { "epoch": 0.37835785016786727, "grad_norm": 161.29470825195312, "learning_rate": 7.873598144454444e-06, "loss": 20.3868, "step": 187300 }, { "epoch": 0.3783780508005511, "grad_norm": 212.1283721923828, "learning_rate": 7.87331247927978e-06, "loss": 13.839, "step": 187310 }, { "epoch": 0.3783982514332349, "grad_norm": 165.12596130371094, "learning_rate": 7.873026800100937e-06, "loss": 17.4013, "step": 187320 }, { "epoch": 0.37841845206591873, "grad_norm": 211.37698364257812, "learning_rate": 7.872741106919313e-06, "loss": 25.3075, "step": 187330 }, { "epoch": 0.3784386526986025, "grad_norm": 265.1394348144531, "learning_rate": 7.872455399736295e-06, "loss": 19.939, "step": 187340 }, { "epoch": 0.3784588533312863, "grad_norm": 461.2636413574219, "learning_rate": 7.872169678553279e-06, "loss": 25.2502, "step": 187350 }, { "epoch": 0.37847905396397014, "grad_norm": 248.21490478515625, "learning_rate": 7.871883943371656e-06, "loss": 16.5622, "step": 187360 }, { "epoch": 0.37849925459665396, "grad_norm": 352.67138671875, "learning_rate": 7.871598194192817e-06, "loss": 15.2348, "step": 187370 }, { "epoch": 0.3785194552293378, "grad_norm": 199.8921356201172, "learning_rate": 7.871312431018158e-06, "loss": 9.7503, "step": 187380 }, { "epoch": 0.3785396558620216, "grad_norm": 313.0063171386719, "learning_rate": 7.871026653849071e-06, "loss": 24.3796, "step": 187390 }, { "epoch": 0.3785598564947054, "grad_norm": 338.9541015625, "learning_rate": 7.87074086268695e-06, "loss": 28.172, "step": 187400 }, { "epoch": 0.37858005712738924, "grad_norm": 111.48477935791016, "learning_rate": 7.870455057533184e-06, "loss": 16.271, "step": 187410 }, { "epoch": 0.37860025776007306, "grad_norm": 741.3116455078125, "learning_rate": 7.870169238389168e-06, "loss": 36.7561, "step": 187420 }, { "epoch": 0.3786204583927569, "grad_norm": 338.1759338378906, "learning_rate": 7.869883405256296e-06, "loss": 22.2131, "step": 187430 }, { "epoch": 0.3786406590254407, "grad_norm": 279.7348937988281, "learning_rate": 7.869597558135959e-06, "loss": 19.6798, "step": 187440 }, { "epoch": 0.3786608596581245, "grad_norm": 497.94775390625, "learning_rate": 7.869311697029553e-06, "loss": 23.199, "step": 187450 }, { "epoch": 0.37868106029080834, "grad_norm": 175.31829833984375, "learning_rate": 7.86902582193847e-06, "loss": 26.8558, "step": 187460 }, { "epoch": 0.3787012609234921, "grad_norm": 209.74476623535156, "learning_rate": 7.868739932864102e-06, "loss": 32.2394, "step": 187470 }, { "epoch": 0.3787214615561759, "grad_norm": 167.7667236328125, "learning_rate": 7.868454029807843e-06, "loss": 18.1855, "step": 187480 }, { "epoch": 0.37874166218885974, "grad_norm": 193.8592071533203, "learning_rate": 7.86816811277109e-06, "loss": 11.1593, "step": 187490 }, { "epoch": 0.37876186282154356, "grad_norm": 434.243896484375, "learning_rate": 7.86788218175523e-06, "loss": 23.9012, "step": 187500 }, { "epoch": 0.3787820634542274, "grad_norm": 346.6788024902344, "learning_rate": 7.867596236761663e-06, "loss": 28.7039, "step": 187510 }, { "epoch": 0.3788022640869112, "grad_norm": 156.46286010742188, "learning_rate": 7.867310277791778e-06, "loss": 12.1411, "step": 187520 }, { "epoch": 0.378822464719595, "grad_norm": 478.2828674316406, "learning_rate": 7.867024304846971e-06, "loss": 16.2423, "step": 187530 }, { "epoch": 0.37884266535227884, "grad_norm": 366.1106262207031, "learning_rate": 7.866738317928636e-06, "loss": 20.9038, "step": 187540 }, { "epoch": 0.37886286598496266, "grad_norm": 127.5936050415039, "learning_rate": 7.866452317038164e-06, "loss": 23.6495, "step": 187550 }, { "epoch": 0.3788830666176465, "grad_norm": 420.37908935546875, "learning_rate": 7.866166302176952e-06, "loss": 20.7163, "step": 187560 }, { "epoch": 0.3789032672503303, "grad_norm": 361.0739440917969, "learning_rate": 7.865880273346393e-06, "loss": 20.1822, "step": 187570 }, { "epoch": 0.3789234678830141, "grad_norm": 166.63406372070312, "learning_rate": 7.865594230547882e-06, "loss": 48.0889, "step": 187580 }, { "epoch": 0.3789436685156979, "grad_norm": 196.0758514404297, "learning_rate": 7.865308173782812e-06, "loss": 31.5443, "step": 187590 }, { "epoch": 0.3789638691483817, "grad_norm": 478.86419677734375, "learning_rate": 7.865022103052578e-06, "loss": 29.8794, "step": 187600 }, { "epoch": 0.3789840697810655, "grad_norm": 178.57083129882812, "learning_rate": 7.864736018358571e-06, "loss": 23.2072, "step": 187610 }, { "epoch": 0.37900427041374934, "grad_norm": 327.35235595703125, "learning_rate": 7.864449919702192e-06, "loss": 25.8881, "step": 187620 }, { "epoch": 0.37902447104643316, "grad_norm": 372.609130859375, "learning_rate": 7.864163807084831e-06, "loss": 22.5308, "step": 187630 }, { "epoch": 0.379044671679117, "grad_norm": 587.9843139648438, "learning_rate": 7.863877680507879e-06, "loss": 36.0277, "step": 187640 }, { "epoch": 0.3790648723118008, "grad_norm": 269.98089599609375, "learning_rate": 7.863591539972739e-06, "loss": 18.6639, "step": 187650 }, { "epoch": 0.3790850729444846, "grad_norm": 25.6934757232666, "learning_rate": 7.863305385480798e-06, "loss": 30.6338, "step": 187660 }, { "epoch": 0.37910527357716844, "grad_norm": 513.131591796875, "learning_rate": 7.863019217033456e-06, "loss": 22.7409, "step": 187670 }, { "epoch": 0.37912547420985226, "grad_norm": 385.01824951171875, "learning_rate": 7.862733034632105e-06, "loss": 23.6474, "step": 187680 }, { "epoch": 0.3791456748425361, "grad_norm": 1382.2589111328125, "learning_rate": 7.862446838278139e-06, "loss": 56.6964, "step": 187690 }, { "epoch": 0.3791658754752199, "grad_norm": 144.92486572265625, "learning_rate": 7.862160627972956e-06, "loss": 26.4031, "step": 187700 }, { "epoch": 0.3791860761079037, "grad_norm": 165.25994873046875, "learning_rate": 7.861874403717948e-06, "loss": 13.0505, "step": 187710 }, { "epoch": 0.3792062767405875, "grad_norm": 204.55722045898438, "learning_rate": 7.86158816551451e-06, "loss": 23.8193, "step": 187720 }, { "epoch": 0.3792264773732713, "grad_norm": 286.3887634277344, "learning_rate": 7.861301913364043e-06, "loss": 26.4976, "step": 187730 }, { "epoch": 0.37924667800595513, "grad_norm": 168.34889221191406, "learning_rate": 7.861015647267934e-06, "loss": 10.2569, "step": 187740 }, { "epoch": 0.37926687863863895, "grad_norm": 91.03985595703125, "learning_rate": 7.860729367227582e-06, "loss": 22.8745, "step": 187750 }, { "epoch": 0.37928707927132277, "grad_norm": 407.2997741699219, "learning_rate": 7.860443073244383e-06, "loss": 22.0248, "step": 187760 }, { "epoch": 0.3793072799040066, "grad_norm": 318.0643310546875, "learning_rate": 7.86015676531973e-06, "loss": 16.311, "step": 187770 }, { "epoch": 0.3793274805366904, "grad_norm": 137.506103515625, "learning_rate": 7.859870443455021e-06, "loss": 26.2462, "step": 187780 }, { "epoch": 0.37934768116937423, "grad_norm": 329.81475830078125, "learning_rate": 7.85958410765165e-06, "loss": 19.5564, "step": 187790 }, { "epoch": 0.37936788180205805, "grad_norm": 98.25946044921875, "learning_rate": 7.859297757911013e-06, "loss": 12.4747, "step": 187800 }, { "epoch": 0.37938808243474187, "grad_norm": 181.89390563964844, "learning_rate": 7.859011394234506e-06, "loss": 26.4729, "step": 187810 }, { "epoch": 0.3794082830674257, "grad_norm": 125.72621154785156, "learning_rate": 7.858725016623523e-06, "loss": 7.9051, "step": 187820 }, { "epoch": 0.3794284837001095, "grad_norm": 1475.867919921875, "learning_rate": 7.85843862507946e-06, "loss": 15.5508, "step": 187830 }, { "epoch": 0.37944868433279333, "grad_norm": 338.02703857421875, "learning_rate": 7.858152219603718e-06, "loss": 22.2253, "step": 187840 }, { "epoch": 0.3794688849654771, "grad_norm": 184.7093963623047, "learning_rate": 7.857865800197684e-06, "loss": 20.8725, "step": 187850 }, { "epoch": 0.3794890855981609, "grad_norm": 497.63275146484375, "learning_rate": 7.857579366862761e-06, "loss": 17.4115, "step": 187860 }, { "epoch": 0.37950928623084473, "grad_norm": 169.56068420410156, "learning_rate": 7.857292919600343e-06, "loss": 18.0911, "step": 187870 }, { "epoch": 0.37952948686352855, "grad_norm": 233.9649200439453, "learning_rate": 7.857006458411826e-06, "loss": 16.6976, "step": 187880 }, { "epoch": 0.3795496874962124, "grad_norm": 362.0227355957031, "learning_rate": 7.856719983298606e-06, "loss": 29.1188, "step": 187890 }, { "epoch": 0.3795698881288962, "grad_norm": 380.69952392578125, "learning_rate": 7.856433494262078e-06, "loss": 14.8565, "step": 187900 }, { "epoch": 0.37959008876158, "grad_norm": 285.18402099609375, "learning_rate": 7.856146991303641e-06, "loss": 24.2458, "step": 187910 }, { "epoch": 0.37961028939426383, "grad_norm": 195.2510223388672, "learning_rate": 7.85586047442469e-06, "loss": 26.4201, "step": 187920 }, { "epoch": 0.37963049002694765, "grad_norm": 442.2331237792969, "learning_rate": 7.85557394362662e-06, "loss": 23.95, "step": 187930 }, { "epoch": 0.3796506906596315, "grad_norm": 280.6783142089844, "learning_rate": 7.85528739891083e-06, "loss": 15.8332, "step": 187940 }, { "epoch": 0.3796708912923153, "grad_norm": 412.4226379394531, "learning_rate": 7.855000840278715e-06, "loss": 14.5837, "step": 187950 }, { "epoch": 0.3796910919249991, "grad_norm": 233.58099365234375, "learning_rate": 7.854714267731673e-06, "loss": 15.4633, "step": 187960 }, { "epoch": 0.37971129255768293, "grad_norm": 258.21435546875, "learning_rate": 7.8544276812711e-06, "loss": 31.6521, "step": 187970 }, { "epoch": 0.3797314931903667, "grad_norm": 31.6546688079834, "learning_rate": 7.85414108089839e-06, "loss": 11.8301, "step": 187980 }, { "epoch": 0.3797516938230505, "grad_norm": 348.6761474609375, "learning_rate": 7.853854466614945e-06, "loss": 25.8725, "step": 187990 }, { "epoch": 0.37977189445573434, "grad_norm": 4.186829090118408, "learning_rate": 7.85356783842216e-06, "loss": 25.387, "step": 188000 }, { "epoch": 0.37979209508841816, "grad_norm": 200.9108428955078, "learning_rate": 7.85328119632143e-06, "loss": 10.8614, "step": 188010 }, { "epoch": 0.379812295721102, "grad_norm": 287.229736328125, "learning_rate": 7.852994540314154e-06, "loss": 20.1935, "step": 188020 }, { "epoch": 0.3798324963537858, "grad_norm": 301.43133544921875, "learning_rate": 7.852707870401728e-06, "loss": 18.1211, "step": 188030 }, { "epoch": 0.3798526969864696, "grad_norm": 341.2064208984375, "learning_rate": 7.85242118658555e-06, "loss": 14.7123, "step": 188040 }, { "epoch": 0.37987289761915344, "grad_norm": 214.68600463867188, "learning_rate": 7.852134488867017e-06, "loss": 7.6885, "step": 188050 }, { "epoch": 0.37989309825183726, "grad_norm": 296.4685363769531, "learning_rate": 7.851847777247528e-06, "loss": 23.0847, "step": 188060 }, { "epoch": 0.3799132988845211, "grad_norm": 198.93344116210938, "learning_rate": 7.851561051728478e-06, "loss": 24.9719, "step": 188070 }, { "epoch": 0.3799334995172049, "grad_norm": 317.0389709472656, "learning_rate": 7.851274312311266e-06, "loss": 22.1381, "step": 188080 }, { "epoch": 0.3799537001498887, "grad_norm": 326.4126281738281, "learning_rate": 7.850987558997287e-06, "loss": 29.1025, "step": 188090 }, { "epoch": 0.37997390078257254, "grad_norm": 339.1131286621094, "learning_rate": 7.850700791787941e-06, "loss": 27.6823, "step": 188100 }, { "epoch": 0.3799941014152563, "grad_norm": 231.143798828125, "learning_rate": 7.850414010684626e-06, "loss": 11.523, "step": 188110 }, { "epoch": 0.3800143020479401, "grad_norm": 375.0612487792969, "learning_rate": 7.85012721568874e-06, "loss": 25.3161, "step": 188120 }, { "epoch": 0.38003450268062394, "grad_norm": 183.82325744628906, "learning_rate": 7.849840406801676e-06, "loss": 15.4342, "step": 188130 }, { "epoch": 0.38005470331330776, "grad_norm": 370.74981689453125, "learning_rate": 7.849553584024836e-06, "loss": 26.7288, "step": 188140 }, { "epoch": 0.3800749039459916, "grad_norm": 358.6734619140625, "learning_rate": 7.849266747359619e-06, "loss": 29.8715, "step": 188150 }, { "epoch": 0.3800951045786754, "grad_norm": 159.21055603027344, "learning_rate": 7.848979896807422e-06, "loss": 9.3639, "step": 188160 }, { "epoch": 0.3801153052113592, "grad_norm": 94.87987518310547, "learning_rate": 7.848693032369641e-06, "loss": 11.0378, "step": 188170 }, { "epoch": 0.38013550584404304, "grad_norm": 8.769743919372559, "learning_rate": 7.848406154047677e-06, "loss": 9.0954, "step": 188180 }, { "epoch": 0.38015570647672686, "grad_norm": 347.58673095703125, "learning_rate": 7.848119261842926e-06, "loss": 20.7473, "step": 188190 }, { "epoch": 0.3801759071094107, "grad_norm": 184.85934448242188, "learning_rate": 7.847832355756788e-06, "loss": 15.1273, "step": 188200 }, { "epoch": 0.3801961077420945, "grad_norm": 183.23202514648438, "learning_rate": 7.84754543579066e-06, "loss": 21.0695, "step": 188210 }, { "epoch": 0.3802163083747783, "grad_norm": 554.5150756835938, "learning_rate": 7.84725850194594e-06, "loss": 27.623, "step": 188220 }, { "epoch": 0.3802365090074621, "grad_norm": 20.296607971191406, "learning_rate": 7.84697155422403e-06, "loss": 16.1551, "step": 188230 }, { "epoch": 0.3802567096401459, "grad_norm": 41.41106033325195, "learning_rate": 7.846684592626324e-06, "loss": 17.2898, "step": 188240 }, { "epoch": 0.3802769102728297, "grad_norm": 676.966796875, "learning_rate": 7.846397617154223e-06, "loss": 48.0686, "step": 188250 }, { "epoch": 0.38029711090551355, "grad_norm": 257.82611083984375, "learning_rate": 7.846110627809123e-06, "loss": 22.1233, "step": 188260 }, { "epoch": 0.38031731153819737, "grad_norm": 502.7943115234375, "learning_rate": 7.845823624592427e-06, "loss": 20.2908, "step": 188270 }, { "epoch": 0.3803375121708812, "grad_norm": 594.6303100585938, "learning_rate": 7.845536607505533e-06, "loss": 22.8426, "step": 188280 }, { "epoch": 0.380357712803565, "grad_norm": 354.0299987792969, "learning_rate": 7.845249576549836e-06, "loss": 17.523, "step": 188290 }, { "epoch": 0.3803779134362488, "grad_norm": 427.1607971191406, "learning_rate": 7.844962531726742e-06, "loss": 18.2503, "step": 188300 }, { "epoch": 0.38039811406893265, "grad_norm": 159.6795654296875, "learning_rate": 7.844675473037641e-06, "loss": 10.183, "step": 188310 }, { "epoch": 0.38041831470161647, "grad_norm": 563.2664184570312, "learning_rate": 7.844388400483938e-06, "loss": 26.5594, "step": 188320 }, { "epoch": 0.3804385153343003, "grad_norm": 191.60902404785156, "learning_rate": 7.844101314067031e-06, "loss": 13.1436, "step": 188330 }, { "epoch": 0.3804587159669841, "grad_norm": 479.7440490722656, "learning_rate": 7.843814213788322e-06, "loss": 19.1327, "step": 188340 }, { "epoch": 0.3804789165996679, "grad_norm": 344.97552490234375, "learning_rate": 7.843527099649204e-06, "loss": 34.8778, "step": 188350 }, { "epoch": 0.3804991172323517, "grad_norm": 412.5584716796875, "learning_rate": 7.84323997165108e-06, "loss": 19.549, "step": 188360 }, { "epoch": 0.3805193178650355, "grad_norm": 256.4246520996094, "learning_rate": 7.842952829795352e-06, "loss": 31.2805, "step": 188370 }, { "epoch": 0.38053951849771933, "grad_norm": 239.4498748779297, "learning_rate": 7.842665674083413e-06, "loss": 12.7073, "step": 188380 }, { "epoch": 0.38055971913040315, "grad_norm": 223.3286895751953, "learning_rate": 7.842378504516669e-06, "loss": 15.2433, "step": 188390 }, { "epoch": 0.38057991976308697, "grad_norm": 89.0384292602539, "learning_rate": 7.842091321096515e-06, "loss": 12.6955, "step": 188400 }, { "epoch": 0.3806001203957708, "grad_norm": 216.61500549316406, "learning_rate": 7.841804123824354e-06, "loss": 17.2373, "step": 188410 }, { "epoch": 0.3806203210284546, "grad_norm": 203.37425231933594, "learning_rate": 7.841516912701585e-06, "loss": 31.8982, "step": 188420 }, { "epoch": 0.38064052166113843, "grad_norm": 425.505615234375, "learning_rate": 7.841229687729606e-06, "loss": 18.7633, "step": 188430 }, { "epoch": 0.38066072229382225, "grad_norm": 20.167072296142578, "learning_rate": 7.840942448909818e-06, "loss": 22.7873, "step": 188440 }, { "epoch": 0.38068092292650607, "grad_norm": 191.54022216796875, "learning_rate": 7.84065519624362e-06, "loss": 19.4611, "step": 188450 }, { "epoch": 0.3807011235591899, "grad_norm": 0.0, "learning_rate": 7.840367929732415e-06, "loss": 9.1188, "step": 188460 }, { "epoch": 0.3807213241918737, "grad_norm": 5.200672149658203, "learning_rate": 7.840080649377602e-06, "loss": 16.5978, "step": 188470 }, { "epoch": 0.38074152482455753, "grad_norm": 25.730377197265625, "learning_rate": 7.839793355180578e-06, "loss": 19.5453, "step": 188480 }, { "epoch": 0.3807617254572413, "grad_norm": 705.5593872070312, "learning_rate": 7.839506047142747e-06, "loss": 22.4301, "step": 188490 }, { "epoch": 0.3807819260899251, "grad_norm": 289.8410339355469, "learning_rate": 7.839218725265507e-06, "loss": 45.5734, "step": 188500 }, { "epoch": 0.38080212672260894, "grad_norm": 89.24897766113281, "learning_rate": 7.83893138955026e-06, "loss": 12.0118, "step": 188510 }, { "epoch": 0.38082232735529276, "grad_norm": 396.4593200683594, "learning_rate": 7.838644039998405e-06, "loss": 15.0735, "step": 188520 }, { "epoch": 0.3808425279879766, "grad_norm": 243.7600860595703, "learning_rate": 7.838356676611345e-06, "loss": 20.4574, "step": 188530 }, { "epoch": 0.3808627286206604, "grad_norm": 263.8718566894531, "learning_rate": 7.838069299390476e-06, "loss": 14.8737, "step": 188540 }, { "epoch": 0.3808829292533442, "grad_norm": 7.0934648513793945, "learning_rate": 7.837781908337204e-06, "loss": 10.8483, "step": 188550 }, { "epoch": 0.38090312988602804, "grad_norm": 673.3877563476562, "learning_rate": 7.837494503452925e-06, "loss": 24.6019, "step": 188560 }, { "epoch": 0.38092333051871186, "grad_norm": 190.78594970703125, "learning_rate": 7.837207084739044e-06, "loss": 28.2139, "step": 188570 }, { "epoch": 0.3809435311513957, "grad_norm": 437.1221618652344, "learning_rate": 7.83691965219696e-06, "loss": 14.5288, "step": 188580 }, { "epoch": 0.3809637317840795, "grad_norm": 1.64443838596344, "learning_rate": 7.836632205828072e-06, "loss": 22.5905, "step": 188590 }, { "epoch": 0.3809839324167633, "grad_norm": 326.869140625, "learning_rate": 7.836344745633785e-06, "loss": 28.7622, "step": 188600 }, { "epoch": 0.38100413304944714, "grad_norm": 263.68682861328125, "learning_rate": 7.836057271615496e-06, "loss": 16.929, "step": 188610 }, { "epoch": 0.3810243336821309, "grad_norm": 479.7554016113281, "learning_rate": 7.835769783774606e-06, "loss": 27.1058, "step": 188620 }, { "epoch": 0.3810445343148147, "grad_norm": 376.3658142089844, "learning_rate": 7.83548228211252e-06, "loss": 11.1179, "step": 188630 }, { "epoch": 0.38106473494749854, "grad_norm": 407.0502014160156, "learning_rate": 7.835194766630638e-06, "loss": 22.9546, "step": 188640 }, { "epoch": 0.38108493558018236, "grad_norm": 257.5386962890625, "learning_rate": 7.834907237330359e-06, "loss": 14.5117, "step": 188650 }, { "epoch": 0.3811051362128662, "grad_norm": 411.3096008300781, "learning_rate": 7.834619694213087e-06, "loss": 19.1848, "step": 188660 }, { "epoch": 0.38112533684555, "grad_norm": 398.0209655761719, "learning_rate": 7.83433213728022e-06, "loss": 21.6484, "step": 188670 }, { "epoch": 0.3811455374782338, "grad_norm": 335.4093017578125, "learning_rate": 7.834044566533166e-06, "loss": 15.6502, "step": 188680 }, { "epoch": 0.38116573811091764, "grad_norm": 305.3774108886719, "learning_rate": 7.833756981973321e-06, "loss": 47.7982, "step": 188690 }, { "epoch": 0.38118593874360146, "grad_norm": 414.6569519042969, "learning_rate": 7.833469383602086e-06, "loss": 18.7999, "step": 188700 }, { "epoch": 0.3812061393762853, "grad_norm": 258.8258056640625, "learning_rate": 7.833181771420869e-06, "loss": 28.8747, "step": 188710 }, { "epoch": 0.3812263400089691, "grad_norm": 500.5426940917969, "learning_rate": 7.832894145431062e-06, "loss": 15.8872, "step": 188720 }, { "epoch": 0.3812465406416529, "grad_norm": 124.81086730957031, "learning_rate": 7.832606505634077e-06, "loss": 14.3049, "step": 188730 }, { "epoch": 0.3812667412743367, "grad_norm": 498.0943603515625, "learning_rate": 7.832318852031311e-06, "loss": 38.6162, "step": 188740 }, { "epoch": 0.3812869419070205, "grad_norm": 347.20220947265625, "learning_rate": 7.832031184624165e-06, "loss": 11.5305, "step": 188750 }, { "epoch": 0.3813071425397043, "grad_norm": 345.37457275390625, "learning_rate": 7.831743503414043e-06, "loss": 24.0428, "step": 188760 }, { "epoch": 0.38132734317238814, "grad_norm": 64.19772338867188, "learning_rate": 7.831455808402348e-06, "loss": 17.8616, "step": 188770 }, { "epoch": 0.38134754380507196, "grad_norm": 117.52814483642578, "learning_rate": 7.831168099590478e-06, "loss": 11.869, "step": 188780 }, { "epoch": 0.3813677444377558, "grad_norm": 220.64796447753906, "learning_rate": 7.83088037697984e-06, "loss": 19.4358, "step": 188790 }, { "epoch": 0.3813879450704396, "grad_norm": 1010.0936279296875, "learning_rate": 7.830592640571833e-06, "loss": 23.1459, "step": 188800 }, { "epoch": 0.3814081457031234, "grad_norm": 291.99200439453125, "learning_rate": 7.830304890367862e-06, "loss": 12.9779, "step": 188810 }, { "epoch": 0.38142834633580724, "grad_norm": 431.78533935546875, "learning_rate": 7.83001712636933e-06, "loss": 24.7909, "step": 188820 }, { "epoch": 0.38144854696849106, "grad_norm": 663.5818481445312, "learning_rate": 7.829729348577636e-06, "loss": 26.1589, "step": 188830 }, { "epoch": 0.3814687476011749, "grad_norm": 198.33767700195312, "learning_rate": 7.829441556994182e-06, "loss": 17.9756, "step": 188840 }, { "epoch": 0.3814889482338587, "grad_norm": 255.27883911132812, "learning_rate": 7.829153751620375e-06, "loss": 10.1571, "step": 188850 }, { "epoch": 0.3815091488665425, "grad_norm": 182.56240844726562, "learning_rate": 7.828865932457617e-06, "loss": 34.9142, "step": 188860 }, { "epoch": 0.3815293494992263, "grad_norm": 393.34228515625, "learning_rate": 7.828578099507308e-06, "loss": 19.5887, "step": 188870 }, { "epoch": 0.3815495501319101, "grad_norm": 164.738525390625, "learning_rate": 7.828290252770852e-06, "loss": 27.7429, "step": 188880 }, { "epoch": 0.38156975076459393, "grad_norm": 457.4913635253906, "learning_rate": 7.828002392249654e-06, "loss": 20.3474, "step": 188890 }, { "epoch": 0.38158995139727775, "grad_norm": 227.4642791748047, "learning_rate": 7.827714517945116e-06, "loss": 15.9031, "step": 188900 }, { "epoch": 0.38161015202996157, "grad_norm": 295.95452880859375, "learning_rate": 7.827426629858636e-06, "loss": 9.3283, "step": 188910 }, { "epoch": 0.3816303526626454, "grad_norm": 437.8561096191406, "learning_rate": 7.827138727991625e-06, "loss": 19.6301, "step": 188920 }, { "epoch": 0.3816505532953292, "grad_norm": 493.2328796386719, "learning_rate": 7.826850812345484e-06, "loss": 18.632, "step": 188930 }, { "epoch": 0.38167075392801303, "grad_norm": 283.30999755859375, "learning_rate": 7.826562882921613e-06, "loss": 33.2296, "step": 188940 }, { "epoch": 0.38169095456069685, "grad_norm": 342.0257873535156, "learning_rate": 7.826274939721417e-06, "loss": 16.7926, "step": 188950 }, { "epoch": 0.38171115519338067, "grad_norm": 239.33985900878906, "learning_rate": 7.8259869827463e-06, "loss": 19.4174, "step": 188960 }, { "epoch": 0.3817313558260645, "grad_norm": 143.8975067138672, "learning_rate": 7.825699011997665e-06, "loss": 20.5529, "step": 188970 }, { "epoch": 0.3817515564587483, "grad_norm": 345.70050048828125, "learning_rate": 7.825411027476917e-06, "loss": 14.2311, "step": 188980 }, { "epoch": 0.38177175709143213, "grad_norm": 229.07493591308594, "learning_rate": 7.825123029185457e-06, "loss": 33.1764, "step": 188990 }, { "epoch": 0.3817919577241159, "grad_norm": 273.68994140625, "learning_rate": 7.82483501712469e-06, "loss": 15.8392, "step": 189000 }, { "epoch": 0.3818121583567997, "grad_norm": 395.2253112792969, "learning_rate": 7.824546991296021e-06, "loss": 27.5266, "step": 189010 }, { "epoch": 0.38183235898948353, "grad_norm": 167.81861877441406, "learning_rate": 7.824258951700852e-06, "loss": 14.8126, "step": 189020 }, { "epoch": 0.38185255962216735, "grad_norm": 233.30056762695312, "learning_rate": 7.823970898340587e-06, "loss": 25.8611, "step": 189030 }, { "epoch": 0.3818727602548512, "grad_norm": 341.90069580078125, "learning_rate": 7.82368283121663e-06, "loss": 26.3729, "step": 189040 }, { "epoch": 0.381892960887535, "grad_norm": 196.7256622314453, "learning_rate": 7.823394750330386e-06, "loss": 10.3041, "step": 189050 }, { "epoch": 0.3819131615202188, "grad_norm": 438.32391357421875, "learning_rate": 7.823106655683259e-06, "loss": 20.6065, "step": 189060 }, { "epoch": 0.38193336215290263, "grad_norm": 333.6849365234375, "learning_rate": 7.822818547276652e-06, "loss": 10.7622, "step": 189070 }, { "epoch": 0.38195356278558645, "grad_norm": 264.6697082519531, "learning_rate": 7.822530425111969e-06, "loss": 7.1328, "step": 189080 }, { "epoch": 0.3819737634182703, "grad_norm": 194.39158630371094, "learning_rate": 7.822242289190615e-06, "loss": 21.4957, "step": 189090 }, { "epoch": 0.3819939640509541, "grad_norm": 255.44224548339844, "learning_rate": 7.821954139513997e-06, "loss": 11.2411, "step": 189100 }, { "epoch": 0.3820141646836379, "grad_norm": 112.19602966308594, "learning_rate": 7.821665976083515e-06, "loss": 11.4495, "step": 189110 }, { "epoch": 0.38203436531632173, "grad_norm": 348.06817626953125, "learning_rate": 7.821377798900574e-06, "loss": 14.886, "step": 189120 }, { "epoch": 0.3820545659490055, "grad_norm": 320.6820068359375, "learning_rate": 7.82108960796658e-06, "loss": 34.5319, "step": 189130 }, { "epoch": 0.3820747665816893, "grad_norm": 500.6260070800781, "learning_rate": 7.82080140328294e-06, "loss": 18.5901, "step": 189140 }, { "epoch": 0.38209496721437314, "grad_norm": 46.95943832397461, "learning_rate": 7.820513184851052e-06, "loss": 35.735, "step": 189150 }, { "epoch": 0.38211516784705696, "grad_norm": 4.805820941925049, "learning_rate": 7.820224952672329e-06, "loss": 19.3777, "step": 189160 }, { "epoch": 0.3821353684797408, "grad_norm": 42.5538330078125, "learning_rate": 7.819936706748168e-06, "loss": 25.6616, "step": 189170 }, { "epoch": 0.3821555691124246, "grad_norm": 209.58544921875, "learning_rate": 7.81964844707998e-06, "loss": 25.7231, "step": 189180 }, { "epoch": 0.3821757697451084, "grad_norm": 399.9967041015625, "learning_rate": 7.819360173669168e-06, "loss": 19.52, "step": 189190 }, { "epoch": 0.38219597037779224, "grad_norm": 363.89886474609375, "learning_rate": 7.819071886517134e-06, "loss": 15.0135, "step": 189200 }, { "epoch": 0.38221617101047606, "grad_norm": 378.2880554199219, "learning_rate": 7.818783585625287e-06, "loss": 38.6341, "step": 189210 }, { "epoch": 0.3822363716431599, "grad_norm": 184.58001708984375, "learning_rate": 7.818495270995031e-06, "loss": 16.3344, "step": 189220 }, { "epoch": 0.3822565722758437, "grad_norm": 191.4387664794922, "learning_rate": 7.81820694262777e-06, "loss": 25.9883, "step": 189230 }, { "epoch": 0.3822767729085275, "grad_norm": 120.21363830566406, "learning_rate": 7.81791860052491e-06, "loss": 12.8679, "step": 189240 }, { "epoch": 0.38229697354121134, "grad_norm": 317.79876708984375, "learning_rate": 7.817630244687857e-06, "loss": 22.0221, "step": 189250 }, { "epoch": 0.3823171741738951, "grad_norm": 319.2288513183594, "learning_rate": 7.817341875118016e-06, "loss": 20.4518, "step": 189260 }, { "epoch": 0.3823373748065789, "grad_norm": 204.496826171875, "learning_rate": 7.817053491816794e-06, "loss": 13.0705, "step": 189270 }, { "epoch": 0.38235757543926274, "grad_norm": 220.59739685058594, "learning_rate": 7.816765094785593e-06, "loss": 11.7069, "step": 189280 }, { "epoch": 0.38237777607194656, "grad_norm": 88.51126098632812, "learning_rate": 7.81647668402582e-06, "loss": 13.5182, "step": 189290 }, { "epoch": 0.3823979767046304, "grad_norm": 94.82627868652344, "learning_rate": 7.816188259538885e-06, "loss": 17.1684, "step": 189300 }, { "epoch": 0.3824181773373142, "grad_norm": 81.74517059326172, "learning_rate": 7.815899821326185e-06, "loss": 27.4373, "step": 189310 }, { "epoch": 0.382438377969998, "grad_norm": 1723.1722412109375, "learning_rate": 7.815611369389134e-06, "loss": 19.7561, "step": 189320 }, { "epoch": 0.38245857860268184, "grad_norm": 190.59263610839844, "learning_rate": 7.815322903729133e-06, "loss": 17.1776, "step": 189330 }, { "epoch": 0.38247877923536566, "grad_norm": 230.8385009765625, "learning_rate": 7.81503442434759e-06, "loss": 14.2021, "step": 189340 }, { "epoch": 0.3824989798680495, "grad_norm": 375.6081237792969, "learning_rate": 7.814745931245911e-06, "loss": 31.2461, "step": 189350 }, { "epoch": 0.3825191805007333, "grad_norm": 307.5336608886719, "learning_rate": 7.814457424425501e-06, "loss": 22.9752, "step": 189360 }, { "epoch": 0.3825393811334171, "grad_norm": 33.234859466552734, "learning_rate": 7.814168903887768e-06, "loss": 11.9272, "step": 189370 }, { "epoch": 0.3825595817661009, "grad_norm": 325.43927001953125, "learning_rate": 7.813880369634114e-06, "loss": 18.9073, "step": 189380 }, { "epoch": 0.3825797823987847, "grad_norm": 341.5445251464844, "learning_rate": 7.813591821665953e-06, "loss": 22.4223, "step": 189390 }, { "epoch": 0.3825999830314685, "grad_norm": 97.49735260009766, "learning_rate": 7.813303259984685e-06, "loss": 12.5746, "step": 189400 }, { "epoch": 0.38262018366415235, "grad_norm": 440.65142822265625, "learning_rate": 7.813014684591718e-06, "loss": 62.7057, "step": 189410 }, { "epoch": 0.38264038429683617, "grad_norm": 228.19708251953125, "learning_rate": 7.812726095488457e-06, "loss": 24.607, "step": 189420 }, { "epoch": 0.38266058492952, "grad_norm": 341.644287109375, "learning_rate": 7.812437492676312e-06, "loss": 16.3769, "step": 189430 }, { "epoch": 0.3826807855622038, "grad_norm": 227.7577667236328, "learning_rate": 7.812148876156687e-06, "loss": 15.3016, "step": 189440 }, { "epoch": 0.3827009861948876, "grad_norm": 23.760311126708984, "learning_rate": 7.81186024593099e-06, "loss": 31.487, "step": 189450 }, { "epoch": 0.38272118682757145, "grad_norm": 1633.0228271484375, "learning_rate": 7.811571602000628e-06, "loss": 35.1215, "step": 189460 }, { "epoch": 0.38274138746025527, "grad_norm": 33.92759323120117, "learning_rate": 7.811282944367004e-06, "loss": 14.6683, "step": 189470 }, { "epoch": 0.3827615880929391, "grad_norm": 357.4024963378906, "learning_rate": 7.810994273031532e-06, "loss": 22.3616, "step": 189480 }, { "epoch": 0.3827817887256229, "grad_norm": 257.3802490234375, "learning_rate": 7.81070558799561e-06, "loss": 6.1518, "step": 189490 }, { "epoch": 0.3828019893583067, "grad_norm": 395.04638671875, "learning_rate": 7.810416889260653e-06, "loss": 28.7303, "step": 189500 }, { "epoch": 0.3828221899909905, "grad_norm": 578.2666015625, "learning_rate": 7.810128176828065e-06, "loss": 26.6905, "step": 189510 }, { "epoch": 0.3828423906236743, "grad_norm": 326.9017639160156, "learning_rate": 7.809839450699253e-06, "loss": 17.0463, "step": 189520 }, { "epoch": 0.38286259125635813, "grad_norm": 0.0, "learning_rate": 7.809550710875624e-06, "loss": 8.6508, "step": 189530 }, { "epoch": 0.38288279188904195, "grad_norm": 525.8245849609375, "learning_rate": 7.809261957358585e-06, "loss": 19.747, "step": 189540 }, { "epoch": 0.38290299252172577, "grad_norm": 488.80059814453125, "learning_rate": 7.808973190149544e-06, "loss": 22.3579, "step": 189550 }, { "epoch": 0.3829231931544096, "grad_norm": 621.4241333007812, "learning_rate": 7.80868440924991e-06, "loss": 21.5327, "step": 189560 }, { "epoch": 0.3829433937870934, "grad_norm": 52.53449249267578, "learning_rate": 7.808395614661086e-06, "loss": 4.379, "step": 189570 }, { "epoch": 0.38296359441977723, "grad_norm": 172.646240234375, "learning_rate": 7.808106806384484e-06, "loss": 22.3911, "step": 189580 }, { "epoch": 0.38298379505246105, "grad_norm": 342.46331787109375, "learning_rate": 7.80781798442151e-06, "loss": 29.5103, "step": 189590 }, { "epoch": 0.38300399568514487, "grad_norm": 260.1891174316406, "learning_rate": 7.807529148773572e-06, "loss": 21.2511, "step": 189600 }, { "epoch": 0.3830241963178287, "grad_norm": 332.2121276855469, "learning_rate": 7.807240299442078e-06, "loss": 19.3463, "step": 189610 }, { "epoch": 0.3830443969505125, "grad_norm": 266.1343078613281, "learning_rate": 7.806951436428433e-06, "loss": 20.6977, "step": 189620 }, { "epoch": 0.38306459758319633, "grad_norm": 156.08102416992188, "learning_rate": 7.80666255973405e-06, "loss": 34.969, "step": 189630 }, { "epoch": 0.3830847982158801, "grad_norm": 205.93748474121094, "learning_rate": 7.806373669360332e-06, "loss": 23.4018, "step": 189640 }, { "epoch": 0.3831049988485639, "grad_norm": 392.34814453125, "learning_rate": 7.80608476530869e-06, "loss": 17.8724, "step": 189650 }, { "epoch": 0.38312519948124774, "grad_norm": 229.1925811767578, "learning_rate": 7.80579584758053e-06, "loss": 27.4994, "step": 189660 }, { "epoch": 0.38314540011393156, "grad_norm": 243.60528564453125, "learning_rate": 7.805506916177263e-06, "loss": 8.9475, "step": 189670 }, { "epoch": 0.3831656007466154, "grad_norm": 591.6468505859375, "learning_rate": 7.805217971100295e-06, "loss": 35.2381, "step": 189680 }, { "epoch": 0.3831858013792992, "grad_norm": 161.00865173339844, "learning_rate": 7.804929012351034e-06, "loss": 10.5985, "step": 189690 }, { "epoch": 0.383206002011983, "grad_norm": 439.59637451171875, "learning_rate": 7.80464003993089e-06, "loss": 21.1403, "step": 189700 }, { "epoch": 0.38322620264466684, "grad_norm": 147.86199951171875, "learning_rate": 7.80435105384127e-06, "loss": 15.119, "step": 189710 }, { "epoch": 0.38324640327735066, "grad_norm": 255.6207733154297, "learning_rate": 7.804062054083585e-06, "loss": 16.9672, "step": 189720 }, { "epoch": 0.3832666039100345, "grad_norm": 0.0, "learning_rate": 7.803773040659239e-06, "loss": 21.8953, "step": 189730 }, { "epoch": 0.3832868045427183, "grad_norm": 304.88604736328125, "learning_rate": 7.803484013569644e-06, "loss": 20.9893, "step": 189740 }, { "epoch": 0.3833070051754021, "grad_norm": 358.0574035644531, "learning_rate": 7.80319497281621e-06, "loss": 29.0053, "step": 189750 }, { "epoch": 0.38332720580808594, "grad_norm": 20.54576301574707, "learning_rate": 7.802905918400342e-06, "loss": 18.5024, "step": 189760 }, { "epoch": 0.3833474064407697, "grad_norm": 257.09124755859375, "learning_rate": 7.80261685032345e-06, "loss": 20.824, "step": 189770 }, { "epoch": 0.3833676070734535, "grad_norm": 201.9254913330078, "learning_rate": 7.802327768586944e-06, "loss": 10.7037, "step": 189780 }, { "epoch": 0.38338780770613734, "grad_norm": 248.52548217773438, "learning_rate": 7.802038673192233e-06, "loss": 14.9945, "step": 189790 }, { "epoch": 0.38340800833882116, "grad_norm": 153.8669891357422, "learning_rate": 7.801749564140724e-06, "loss": 39.8081, "step": 189800 }, { "epoch": 0.383428208971505, "grad_norm": 231.83741760253906, "learning_rate": 7.801460441433828e-06, "loss": 15.2513, "step": 189810 }, { "epoch": 0.3834484096041888, "grad_norm": 357.7398986816406, "learning_rate": 7.801171305072954e-06, "loss": 52.1384, "step": 189820 }, { "epoch": 0.3834686102368726, "grad_norm": 293.14703369140625, "learning_rate": 7.80088215505951e-06, "loss": 30.7611, "step": 189830 }, { "epoch": 0.38348881086955644, "grad_norm": 193.69400024414062, "learning_rate": 7.800592991394906e-06, "loss": 19.2349, "step": 189840 }, { "epoch": 0.38350901150224026, "grad_norm": 185.81674194335938, "learning_rate": 7.800303814080552e-06, "loss": 16.8238, "step": 189850 }, { "epoch": 0.3835292121349241, "grad_norm": 0.0, "learning_rate": 7.800014623117858e-06, "loss": 37.2292, "step": 189860 }, { "epoch": 0.3835494127676079, "grad_norm": 202.38710021972656, "learning_rate": 7.799725418508231e-06, "loss": 15.8446, "step": 189870 }, { "epoch": 0.3835696134002917, "grad_norm": 233.6189422607422, "learning_rate": 7.799436200253082e-06, "loss": 16.8288, "step": 189880 }, { "epoch": 0.38358981403297554, "grad_norm": 339.3306884765625, "learning_rate": 7.79914696835382e-06, "loss": 26.4671, "step": 189890 }, { "epoch": 0.3836100146656593, "grad_norm": 226.34478759765625, "learning_rate": 7.798857722811857e-06, "loss": 32.8667, "step": 189900 }, { "epoch": 0.3836302152983431, "grad_norm": 491.3672790527344, "learning_rate": 7.798568463628597e-06, "loss": 14.8282, "step": 189910 }, { "epoch": 0.38365041593102694, "grad_norm": 323.1797180175781, "learning_rate": 7.798279190805458e-06, "loss": 15.0925, "step": 189920 }, { "epoch": 0.38367061656371076, "grad_norm": 309.21942138671875, "learning_rate": 7.797989904343844e-06, "loss": 17.8119, "step": 189930 }, { "epoch": 0.3836908171963946, "grad_norm": 255.4132080078125, "learning_rate": 7.797700604245166e-06, "loss": 22.8069, "step": 189940 }, { "epoch": 0.3837110178290784, "grad_norm": 259.75732421875, "learning_rate": 7.797411290510836e-06, "loss": 21.2743, "step": 189950 }, { "epoch": 0.3837312184617622, "grad_norm": 203.40597534179688, "learning_rate": 7.797121963142263e-06, "loss": 16.1005, "step": 189960 }, { "epoch": 0.38375141909444604, "grad_norm": 764.6876831054688, "learning_rate": 7.796832622140854e-06, "loss": 16.8986, "step": 189970 }, { "epoch": 0.38377161972712986, "grad_norm": 32.20394515991211, "learning_rate": 7.796543267508023e-06, "loss": 24.8967, "step": 189980 }, { "epoch": 0.3837918203598137, "grad_norm": 105.36257934570312, "learning_rate": 7.79625389924518e-06, "loss": 17.0771, "step": 189990 }, { "epoch": 0.3838120209924975, "grad_norm": 376.1561279296875, "learning_rate": 7.795964517353734e-06, "loss": 24.2949, "step": 190000 }, { "epoch": 0.3838322216251813, "grad_norm": 15.198107719421387, "learning_rate": 7.795675121835099e-06, "loss": 11.3698, "step": 190010 }, { "epoch": 0.3838524222578651, "grad_norm": 388.4515380859375, "learning_rate": 7.795385712690678e-06, "loss": 17.3752, "step": 190020 }, { "epoch": 0.3838726228905489, "grad_norm": 212.92649841308594, "learning_rate": 7.795096289921888e-06, "loss": 41.4644, "step": 190030 }, { "epoch": 0.38389282352323273, "grad_norm": 322.86376953125, "learning_rate": 7.794806853530139e-06, "loss": 26.8104, "step": 190040 }, { "epoch": 0.38391302415591655, "grad_norm": 346.3350830078125, "learning_rate": 7.79451740351684e-06, "loss": 21.2462, "step": 190050 }, { "epoch": 0.38393322478860037, "grad_norm": 374.8015441894531, "learning_rate": 7.7942279398834e-06, "loss": 33.6896, "step": 190060 }, { "epoch": 0.3839534254212842, "grad_norm": 521.8737182617188, "learning_rate": 7.793938462631233e-06, "loss": 21.4431, "step": 190070 }, { "epoch": 0.383973626053968, "grad_norm": 468.3540954589844, "learning_rate": 7.79364897176175e-06, "loss": 18.1617, "step": 190080 }, { "epoch": 0.38399382668665183, "grad_norm": 533.8782958984375, "learning_rate": 7.79335946727636e-06, "loss": 26.4409, "step": 190090 }, { "epoch": 0.38401402731933565, "grad_norm": 259.8974304199219, "learning_rate": 7.793069949176474e-06, "loss": 20.1672, "step": 190100 }, { "epoch": 0.38403422795201947, "grad_norm": 378.27947998046875, "learning_rate": 7.792780417463505e-06, "loss": 30.8578, "step": 190110 }, { "epoch": 0.3840544285847033, "grad_norm": 362.5474853515625, "learning_rate": 7.792490872138861e-06, "loss": 30.0389, "step": 190120 }, { "epoch": 0.3840746292173871, "grad_norm": 267.0578918457031, "learning_rate": 7.792201313203957e-06, "loss": 22.9391, "step": 190130 }, { "epoch": 0.38409482985007093, "grad_norm": 150.80775451660156, "learning_rate": 7.791911740660203e-06, "loss": 17.2142, "step": 190140 }, { "epoch": 0.3841150304827547, "grad_norm": 55.9863395690918, "learning_rate": 7.791622154509008e-06, "loss": 23.3326, "step": 190150 }, { "epoch": 0.3841352311154385, "grad_norm": 142.45651245117188, "learning_rate": 7.791332554751784e-06, "loss": 11.7437, "step": 190160 }, { "epoch": 0.38415543174812233, "grad_norm": 312.29107666015625, "learning_rate": 7.791042941389948e-06, "loss": 16.2627, "step": 190170 }, { "epoch": 0.38417563238080615, "grad_norm": 364.7142639160156, "learning_rate": 7.790753314424903e-06, "loss": 27.7604, "step": 190180 }, { "epoch": 0.38419583301349, "grad_norm": 308.6564636230469, "learning_rate": 7.790463673858069e-06, "loss": 25.4585, "step": 190190 }, { "epoch": 0.3842160336461738, "grad_norm": 232.73402404785156, "learning_rate": 7.79017401969085e-06, "loss": 17.5717, "step": 190200 }, { "epoch": 0.3842362342788576, "grad_norm": 356.84375, "learning_rate": 7.789884351924662e-06, "loss": 21.5409, "step": 190210 }, { "epoch": 0.38425643491154143, "grad_norm": 262.44921875, "learning_rate": 7.789594670560917e-06, "loss": 25.7987, "step": 190220 }, { "epoch": 0.38427663554422525, "grad_norm": 726.6702270507812, "learning_rate": 7.789304975601025e-06, "loss": 22.7348, "step": 190230 }, { "epoch": 0.3842968361769091, "grad_norm": 186.14329528808594, "learning_rate": 7.789015267046399e-06, "loss": 9.8924, "step": 190240 }, { "epoch": 0.3843170368095929, "grad_norm": 283.9210205078125, "learning_rate": 7.788725544898452e-06, "loss": 24.2809, "step": 190250 }, { "epoch": 0.3843372374422767, "grad_norm": 464.6257629394531, "learning_rate": 7.788435809158593e-06, "loss": 31.0973, "step": 190260 }, { "epoch": 0.38435743807496053, "grad_norm": 132.0215301513672, "learning_rate": 7.788146059828238e-06, "loss": 22.5246, "step": 190270 }, { "epoch": 0.3843776387076443, "grad_norm": 373.3683776855469, "learning_rate": 7.787856296908795e-06, "loss": 23.282, "step": 190280 }, { "epoch": 0.3843978393403281, "grad_norm": 203.931884765625, "learning_rate": 7.787566520401681e-06, "loss": 32.0366, "step": 190290 }, { "epoch": 0.38441803997301194, "grad_norm": 348.469970703125, "learning_rate": 7.787276730308304e-06, "loss": 30.7335, "step": 190300 }, { "epoch": 0.38443824060569576, "grad_norm": 242.3341522216797, "learning_rate": 7.786986926630079e-06, "loss": 23.9149, "step": 190310 }, { "epoch": 0.3844584412383796, "grad_norm": 494.2724609375, "learning_rate": 7.786697109368418e-06, "loss": 18.1179, "step": 190320 }, { "epoch": 0.3844786418710634, "grad_norm": 180.49270629882812, "learning_rate": 7.786407278524733e-06, "loss": 14.9536, "step": 190330 }, { "epoch": 0.3844988425037472, "grad_norm": 240.28819274902344, "learning_rate": 7.786117434100438e-06, "loss": 13.966, "step": 190340 }, { "epoch": 0.38451904313643104, "grad_norm": 355.9014587402344, "learning_rate": 7.785827576096943e-06, "loss": 18.6192, "step": 190350 }, { "epoch": 0.38453924376911486, "grad_norm": 378.7132568359375, "learning_rate": 7.785537704515662e-06, "loss": 11.2951, "step": 190360 }, { "epoch": 0.3845594444017987, "grad_norm": 472.87030029296875, "learning_rate": 7.785247819358009e-06, "loss": 26.0953, "step": 190370 }, { "epoch": 0.3845796450344825, "grad_norm": 174.11288452148438, "learning_rate": 7.784957920625396e-06, "loss": 38.2453, "step": 190380 }, { "epoch": 0.3845998456671663, "grad_norm": 583.6879272460938, "learning_rate": 7.784668008319235e-06, "loss": 19.9744, "step": 190390 }, { "epoch": 0.38462004629985014, "grad_norm": 244.9833221435547, "learning_rate": 7.78437808244094e-06, "loss": 25.444, "step": 190400 }, { "epoch": 0.3846402469325339, "grad_norm": 72.69369506835938, "learning_rate": 7.784088142991926e-06, "loss": 24.1741, "step": 190410 }, { "epoch": 0.3846604475652177, "grad_norm": 348.1921081542969, "learning_rate": 7.783798189973601e-06, "loss": 17.1039, "step": 190420 }, { "epoch": 0.38468064819790154, "grad_norm": 249.26316833496094, "learning_rate": 7.783508223387384e-06, "loss": 7.9494, "step": 190430 }, { "epoch": 0.38470084883058536, "grad_norm": 1587.608154296875, "learning_rate": 7.783218243234684e-06, "loss": 12.0243, "step": 190440 }, { "epoch": 0.3847210494632692, "grad_norm": 171.93418884277344, "learning_rate": 7.782928249516915e-06, "loss": 14.462, "step": 190450 }, { "epoch": 0.384741250095953, "grad_norm": 39.958946228027344, "learning_rate": 7.782638242235493e-06, "loss": 18.5136, "step": 190460 }, { "epoch": 0.3847614507286368, "grad_norm": 86.62773132324219, "learning_rate": 7.782348221391828e-06, "loss": 19.4601, "step": 190470 }, { "epoch": 0.38478165136132064, "grad_norm": 478.40203857421875, "learning_rate": 7.782058186987337e-06, "loss": 15.8402, "step": 190480 }, { "epoch": 0.38480185199400446, "grad_norm": 500.8219299316406, "learning_rate": 7.781768139023431e-06, "loss": 18.598, "step": 190490 }, { "epoch": 0.3848220526266883, "grad_norm": 176.76065063476562, "learning_rate": 7.781478077501526e-06, "loss": 24.7446, "step": 190500 }, { "epoch": 0.3848422532593721, "grad_norm": 525.25537109375, "learning_rate": 7.78118800242303e-06, "loss": 19.0369, "step": 190510 }, { "epoch": 0.3848624538920559, "grad_norm": 215.05828857421875, "learning_rate": 7.780897913789364e-06, "loss": 12.3977, "step": 190520 }, { "epoch": 0.38488265452473974, "grad_norm": 492.6200256347656, "learning_rate": 7.780607811601939e-06, "loss": 27.7902, "step": 190530 }, { "epoch": 0.3849028551574235, "grad_norm": 386.2331848144531, "learning_rate": 7.78031769586217e-06, "loss": 35.324, "step": 190540 }, { "epoch": 0.3849230557901073, "grad_norm": 104.83644104003906, "learning_rate": 7.780027566571467e-06, "loss": 18.1014, "step": 190550 }, { "epoch": 0.38494325642279115, "grad_norm": 207.39920043945312, "learning_rate": 7.779737423731248e-06, "loss": 14.7118, "step": 190560 }, { "epoch": 0.38496345705547497, "grad_norm": 554.3973388671875, "learning_rate": 7.779447267342926e-06, "loss": 25.6141, "step": 190570 }, { "epoch": 0.3849836576881588, "grad_norm": 204.90586853027344, "learning_rate": 7.779157097407914e-06, "loss": 27.1342, "step": 190580 }, { "epoch": 0.3850038583208426, "grad_norm": 601.329345703125, "learning_rate": 7.77886691392763e-06, "loss": 15.772, "step": 190590 }, { "epoch": 0.3850240589535264, "grad_norm": 429.49871826171875, "learning_rate": 7.778576716903484e-06, "loss": 18.6083, "step": 190600 }, { "epoch": 0.38504425958621025, "grad_norm": 240.8666534423828, "learning_rate": 7.778286506336892e-06, "loss": 12.254, "step": 190610 }, { "epoch": 0.38506446021889407, "grad_norm": 331.68927001953125, "learning_rate": 7.777996282229267e-06, "loss": 16.7441, "step": 190620 }, { "epoch": 0.3850846608515779, "grad_norm": 444.00140380859375, "learning_rate": 7.777706044582027e-06, "loss": 22.8881, "step": 190630 }, { "epoch": 0.3851048614842617, "grad_norm": 1690.7781982421875, "learning_rate": 7.777415793396585e-06, "loss": 31.0503, "step": 190640 }, { "epoch": 0.3851250621169455, "grad_norm": 782.3836669921875, "learning_rate": 7.777125528674356e-06, "loss": 16.3224, "step": 190650 }, { "epoch": 0.3851452627496293, "grad_norm": 437.8273010253906, "learning_rate": 7.776835250416752e-06, "loss": 35.5877, "step": 190660 }, { "epoch": 0.3851654633823131, "grad_norm": 171.25006103515625, "learning_rate": 7.776544958625189e-06, "loss": 17.5099, "step": 190670 }, { "epoch": 0.38518566401499693, "grad_norm": 421.9563293457031, "learning_rate": 7.776254653301086e-06, "loss": 18.0364, "step": 190680 }, { "epoch": 0.38520586464768075, "grad_norm": 573.5492553710938, "learning_rate": 7.775964334445851e-06, "loss": 25.1693, "step": 190690 }, { "epoch": 0.38522606528036457, "grad_norm": 284.5480041503906, "learning_rate": 7.775674002060905e-06, "loss": 26.2877, "step": 190700 }, { "epoch": 0.3852462659130484, "grad_norm": 193.10023498535156, "learning_rate": 7.775383656147659e-06, "loss": 10.5967, "step": 190710 }, { "epoch": 0.3852664665457322, "grad_norm": 314.179443359375, "learning_rate": 7.77509329670753e-06, "loss": 17.553, "step": 190720 }, { "epoch": 0.38528666717841603, "grad_norm": 520.2635498046875, "learning_rate": 7.774802923741936e-06, "loss": 22.0596, "step": 190730 }, { "epoch": 0.38530686781109985, "grad_norm": 130.1132049560547, "learning_rate": 7.774512537252284e-06, "loss": 26.2724, "step": 190740 }, { "epoch": 0.38532706844378367, "grad_norm": 378.4585876464844, "learning_rate": 7.774222137239998e-06, "loss": 12.7258, "step": 190750 }, { "epoch": 0.3853472690764675, "grad_norm": 601.0895385742188, "learning_rate": 7.773931723706487e-06, "loss": 16.7666, "step": 190760 }, { "epoch": 0.3853674697091513, "grad_norm": 228.34921264648438, "learning_rate": 7.773641296653171e-06, "loss": 23.5775, "step": 190770 }, { "epoch": 0.38538767034183513, "grad_norm": 405.3677673339844, "learning_rate": 7.773350856081464e-06, "loss": 20.8397, "step": 190780 }, { "epoch": 0.3854078709745189, "grad_norm": 81.24579620361328, "learning_rate": 7.773060401992781e-06, "loss": 15.0614, "step": 190790 }, { "epoch": 0.3854280716072027, "grad_norm": 470.1910095214844, "learning_rate": 7.772769934388537e-06, "loss": 13.7391, "step": 190800 }, { "epoch": 0.38544827223988654, "grad_norm": 92.21508026123047, "learning_rate": 7.772479453270149e-06, "loss": 15.935, "step": 190810 }, { "epoch": 0.38546847287257036, "grad_norm": 214.7903289794922, "learning_rate": 7.772188958639034e-06, "loss": 17.1995, "step": 190820 }, { "epoch": 0.3854886735052542, "grad_norm": 267.424560546875, "learning_rate": 7.771898450496605e-06, "loss": 23.7148, "step": 190830 }, { "epoch": 0.385508874137938, "grad_norm": 425.11846923828125, "learning_rate": 7.771607928844278e-06, "loss": 16.9268, "step": 190840 }, { "epoch": 0.3855290747706218, "grad_norm": 228.52731323242188, "learning_rate": 7.771317393683471e-06, "loss": 18.6659, "step": 190850 }, { "epoch": 0.38554927540330564, "grad_norm": 502.8092956542969, "learning_rate": 7.7710268450156e-06, "loss": 19.0829, "step": 190860 }, { "epoch": 0.38556947603598946, "grad_norm": 171.64120483398438, "learning_rate": 7.770736282842079e-06, "loss": 24.9719, "step": 190870 }, { "epoch": 0.3855896766686733, "grad_norm": 399.7977294921875, "learning_rate": 7.770445707164325e-06, "loss": 20.8826, "step": 190880 }, { "epoch": 0.3856098773013571, "grad_norm": 104.05833435058594, "learning_rate": 7.770155117983757e-06, "loss": 13.9597, "step": 190890 }, { "epoch": 0.3856300779340409, "grad_norm": 134.9064483642578, "learning_rate": 7.769864515301787e-06, "loss": 17.2102, "step": 190900 }, { "epoch": 0.38565027856672474, "grad_norm": 379.99853515625, "learning_rate": 7.769573899119834e-06, "loss": 34.4059, "step": 190910 }, { "epoch": 0.3856704791994085, "grad_norm": 184.9699249267578, "learning_rate": 7.769283269439314e-06, "loss": 19.1921, "step": 190920 }, { "epoch": 0.3856906798320923, "grad_norm": 230.49176025390625, "learning_rate": 7.768992626261642e-06, "loss": 20.2383, "step": 190930 }, { "epoch": 0.38571088046477614, "grad_norm": 312.4473876953125, "learning_rate": 7.768701969588237e-06, "loss": 15.7273, "step": 190940 }, { "epoch": 0.38573108109745996, "grad_norm": 74.35285186767578, "learning_rate": 7.768411299420513e-06, "loss": 14.2671, "step": 190950 }, { "epoch": 0.3857512817301438, "grad_norm": 349.5299987792969, "learning_rate": 7.76812061575989e-06, "loss": 20.0125, "step": 190960 }, { "epoch": 0.3857714823628276, "grad_norm": 208.8681182861328, "learning_rate": 7.767829918607782e-06, "loss": 15.1067, "step": 190970 }, { "epoch": 0.3857916829955114, "grad_norm": 117.26119995117188, "learning_rate": 7.767539207965606e-06, "loss": 20.3782, "step": 190980 }, { "epoch": 0.38581188362819524, "grad_norm": 159.76853942871094, "learning_rate": 7.767248483834781e-06, "loss": 7.4541, "step": 190990 }, { "epoch": 0.38583208426087906, "grad_norm": 729.2867431640625, "learning_rate": 7.76695774621672e-06, "loss": 17.4021, "step": 191000 }, { "epoch": 0.3858522848935629, "grad_norm": 234.7200927734375, "learning_rate": 7.766666995112846e-06, "loss": 13.9221, "step": 191010 }, { "epoch": 0.3858724855262467, "grad_norm": 272.76513671875, "learning_rate": 7.76637623052457e-06, "loss": 20.4481, "step": 191020 }, { "epoch": 0.3858926861589305, "grad_norm": 373.59344482421875, "learning_rate": 7.766085452453312e-06, "loss": 17.1985, "step": 191030 }, { "epoch": 0.38591288679161434, "grad_norm": 324.0469055175781, "learning_rate": 7.765794660900489e-06, "loss": 26.7433, "step": 191040 }, { "epoch": 0.3859330874242981, "grad_norm": 204.33006286621094, "learning_rate": 7.76550385586752e-06, "loss": 11.3316, "step": 191050 }, { "epoch": 0.3859532880569819, "grad_norm": 417.6356506347656, "learning_rate": 7.76521303735582e-06, "loss": 22.1993, "step": 191060 }, { "epoch": 0.38597348868966574, "grad_norm": 606.5404663085938, "learning_rate": 7.764922205366807e-06, "loss": 27.6923, "step": 191070 }, { "epoch": 0.38599368932234956, "grad_norm": 186.98336791992188, "learning_rate": 7.764631359901897e-06, "loss": 23.3811, "step": 191080 }, { "epoch": 0.3860138899550334, "grad_norm": 390.24957275390625, "learning_rate": 7.764340500962511e-06, "loss": 28.4372, "step": 191090 }, { "epoch": 0.3860340905877172, "grad_norm": 170.90921020507812, "learning_rate": 7.764049628550063e-06, "loss": 9.9554, "step": 191100 }, { "epoch": 0.386054291220401, "grad_norm": 101.16758728027344, "learning_rate": 7.763758742665973e-06, "loss": 35.2447, "step": 191110 }, { "epoch": 0.38607449185308484, "grad_norm": 315.2352294921875, "learning_rate": 7.763467843311658e-06, "loss": 27.0785, "step": 191120 }, { "epoch": 0.38609469248576866, "grad_norm": 325.4975891113281, "learning_rate": 7.763176930488537e-06, "loss": 23.8233, "step": 191130 }, { "epoch": 0.3861148931184525, "grad_norm": 355.9261169433594, "learning_rate": 7.762886004198024e-06, "loss": 18.2954, "step": 191140 }, { "epoch": 0.3861350937511363, "grad_norm": 233.7310028076172, "learning_rate": 7.762595064441542e-06, "loss": 16.1788, "step": 191150 }, { "epoch": 0.3861552943838201, "grad_norm": 290.951171875, "learning_rate": 7.762304111220506e-06, "loss": 10.9946, "step": 191160 }, { "epoch": 0.38617549501650394, "grad_norm": 213.33009338378906, "learning_rate": 7.762013144536337e-06, "loss": 32.5143, "step": 191170 }, { "epoch": 0.3861956956491877, "grad_norm": 220.18475341796875, "learning_rate": 7.761722164390448e-06, "loss": 16.3928, "step": 191180 }, { "epoch": 0.38621589628187153, "grad_norm": 623.9146728515625, "learning_rate": 7.761431170784261e-06, "loss": 39.2043, "step": 191190 }, { "epoch": 0.38623609691455535, "grad_norm": 455.8894958496094, "learning_rate": 7.761140163719194e-06, "loss": 17.405, "step": 191200 }, { "epoch": 0.38625629754723917, "grad_norm": 349.6864929199219, "learning_rate": 7.760849143196664e-06, "loss": 15.4879, "step": 191210 }, { "epoch": 0.386276498179923, "grad_norm": 209.12913513183594, "learning_rate": 7.76055810921809e-06, "loss": 17.8421, "step": 191220 }, { "epoch": 0.3862966988126068, "grad_norm": 707.8167724609375, "learning_rate": 7.760267061784891e-06, "loss": 25.0078, "step": 191230 }, { "epoch": 0.38631689944529063, "grad_norm": 363.41021728515625, "learning_rate": 7.759976000898486e-06, "loss": 21.2288, "step": 191240 }, { "epoch": 0.38633710007797445, "grad_norm": 213.58877563476562, "learning_rate": 7.759684926560292e-06, "loss": 13.5569, "step": 191250 }, { "epoch": 0.38635730071065827, "grad_norm": 161.18182373046875, "learning_rate": 7.759393838771728e-06, "loss": 17.3642, "step": 191260 }, { "epoch": 0.3863775013433421, "grad_norm": 137.2944793701172, "learning_rate": 7.759102737534214e-06, "loss": 26.8406, "step": 191270 }, { "epoch": 0.3863977019760259, "grad_norm": 430.2156677246094, "learning_rate": 7.758811622849167e-06, "loss": 12.7229, "step": 191280 }, { "epoch": 0.38641790260870973, "grad_norm": 458.86419677734375, "learning_rate": 7.758520494718006e-06, "loss": 25.8501, "step": 191290 }, { "epoch": 0.3864381032413935, "grad_norm": 111.54752349853516, "learning_rate": 7.758229353142153e-06, "loss": 16.5112, "step": 191300 }, { "epoch": 0.3864583038740773, "grad_norm": 754.3347778320312, "learning_rate": 7.757938198123024e-06, "loss": 20.2217, "step": 191310 }, { "epoch": 0.38647850450676113, "grad_norm": 358.7660217285156, "learning_rate": 7.757647029662037e-06, "loss": 16.5582, "step": 191320 }, { "epoch": 0.38649870513944495, "grad_norm": 82.18794250488281, "learning_rate": 7.757355847760614e-06, "loss": 11.3892, "step": 191330 }, { "epoch": 0.3865189057721288, "grad_norm": 98.63973999023438, "learning_rate": 7.757064652420172e-06, "loss": 27.956, "step": 191340 }, { "epoch": 0.3865391064048126, "grad_norm": 227.98011779785156, "learning_rate": 7.756773443642132e-06, "loss": 159.1444, "step": 191350 }, { "epoch": 0.3865593070374964, "grad_norm": 192.70172119140625, "learning_rate": 7.756482221427914e-06, "loss": 43.8589, "step": 191360 }, { "epoch": 0.38657950767018023, "grad_norm": 118.09973907470703, "learning_rate": 7.756190985778933e-06, "loss": 20.7906, "step": 191370 }, { "epoch": 0.38659970830286405, "grad_norm": 722.4407958984375, "learning_rate": 7.755899736696613e-06, "loss": 27.3598, "step": 191380 }, { "epoch": 0.3866199089355479, "grad_norm": 436.3580322265625, "learning_rate": 7.755608474182372e-06, "loss": 29.1565, "step": 191390 }, { "epoch": 0.3866401095682317, "grad_norm": 25.297212600708008, "learning_rate": 7.755317198237631e-06, "loss": 23.107, "step": 191400 }, { "epoch": 0.3866603102009155, "grad_norm": 798.80712890625, "learning_rate": 7.755025908863807e-06, "loss": 30.8362, "step": 191410 }, { "epoch": 0.38668051083359933, "grad_norm": 232.66844177246094, "learning_rate": 7.75473460606232e-06, "loss": 12.973, "step": 191420 }, { "epoch": 0.3867007114662831, "grad_norm": 5.787586688995361, "learning_rate": 7.75444328983459e-06, "loss": 18.2142, "step": 191430 }, { "epoch": 0.3867209120989669, "grad_norm": 408.4725036621094, "learning_rate": 7.75415196018204e-06, "loss": 38.1794, "step": 191440 }, { "epoch": 0.38674111273165074, "grad_norm": 399.9501037597656, "learning_rate": 7.753860617106085e-06, "loss": 17.444, "step": 191450 }, { "epoch": 0.38676131336433456, "grad_norm": 174.24473571777344, "learning_rate": 7.75356926060815e-06, "loss": 21.1196, "step": 191460 }, { "epoch": 0.3867815139970184, "grad_norm": 173.15296936035156, "learning_rate": 7.75327789068965e-06, "loss": 15.4223, "step": 191470 }, { "epoch": 0.3868017146297022, "grad_norm": 309.2243957519531, "learning_rate": 7.752986507352009e-06, "loss": 22.8944, "step": 191480 }, { "epoch": 0.386821915262386, "grad_norm": 388.70758056640625, "learning_rate": 7.752695110596644e-06, "loss": 18.7242, "step": 191490 }, { "epoch": 0.38684211589506984, "grad_norm": 442.8129577636719, "learning_rate": 7.752403700424978e-06, "loss": 23.6696, "step": 191500 }, { "epoch": 0.38686231652775366, "grad_norm": 719.2984619140625, "learning_rate": 7.75211227683843e-06, "loss": 14.8973, "step": 191510 }, { "epoch": 0.3868825171604375, "grad_norm": 329.86669921875, "learning_rate": 7.751820839838423e-06, "loss": 17.4997, "step": 191520 }, { "epoch": 0.3869027177931213, "grad_norm": 64.41311645507812, "learning_rate": 7.751529389426372e-06, "loss": 13.9133, "step": 191530 }, { "epoch": 0.3869229184258051, "grad_norm": 335.64422607421875, "learning_rate": 7.7512379256037e-06, "loss": 29.7995, "step": 191540 }, { "epoch": 0.38694311905848894, "grad_norm": 257.32025146484375, "learning_rate": 7.75094644837183e-06, "loss": 9.564, "step": 191550 }, { "epoch": 0.3869633196911727, "grad_norm": 229.3131103515625, "learning_rate": 7.750654957732179e-06, "loss": 14.7669, "step": 191560 }, { "epoch": 0.3869835203238565, "grad_norm": 380.4468078613281, "learning_rate": 7.75036345368617e-06, "loss": 16.2458, "step": 191570 }, { "epoch": 0.38700372095654034, "grad_norm": 355.1098937988281, "learning_rate": 7.750071936235223e-06, "loss": 23.0267, "step": 191580 }, { "epoch": 0.38702392158922416, "grad_norm": 149.6070098876953, "learning_rate": 7.74978040538076e-06, "loss": 9.4818, "step": 191590 }, { "epoch": 0.387044122221908, "grad_norm": 75.49007415771484, "learning_rate": 7.7494888611242e-06, "loss": 20.2942, "step": 191600 }, { "epoch": 0.3870643228545918, "grad_norm": 123.99274444580078, "learning_rate": 7.749197303466964e-06, "loss": 21.4896, "step": 191610 }, { "epoch": 0.3870845234872756, "grad_norm": 161.93531799316406, "learning_rate": 7.748905732410475e-06, "loss": 27.9756, "step": 191620 }, { "epoch": 0.38710472411995944, "grad_norm": 559.0492553710938, "learning_rate": 7.748614147956153e-06, "loss": 20.2356, "step": 191630 }, { "epoch": 0.38712492475264326, "grad_norm": 259.5020751953125, "learning_rate": 7.748322550105419e-06, "loss": 14.9904, "step": 191640 }, { "epoch": 0.3871451253853271, "grad_norm": 279.73980712890625, "learning_rate": 7.748030938859692e-06, "loss": 24.28, "step": 191650 }, { "epoch": 0.3871653260180109, "grad_norm": 341.34295654296875, "learning_rate": 7.747739314220398e-06, "loss": 14.5461, "step": 191660 }, { "epoch": 0.3871855266506947, "grad_norm": 75.42556762695312, "learning_rate": 7.747447676188955e-06, "loss": 25.2818, "step": 191670 }, { "epoch": 0.38720572728337854, "grad_norm": 179.56692504882812, "learning_rate": 7.747156024766785e-06, "loss": 31.3081, "step": 191680 }, { "epoch": 0.3872259279160623, "grad_norm": 125.3934326171875, "learning_rate": 7.74686435995531e-06, "loss": 17.4237, "step": 191690 }, { "epoch": 0.3872461285487461, "grad_norm": 225.60592651367188, "learning_rate": 7.74657268175595e-06, "loss": 17.7469, "step": 191700 }, { "epoch": 0.38726632918142995, "grad_norm": 192.5275421142578, "learning_rate": 7.74628099017013e-06, "loss": 21.5523, "step": 191710 }, { "epoch": 0.38728652981411377, "grad_norm": 201.87120056152344, "learning_rate": 7.74598928519927e-06, "loss": 24.7757, "step": 191720 }, { "epoch": 0.3873067304467976, "grad_norm": 303.92498779296875, "learning_rate": 7.745697566844788e-06, "loss": 20.0947, "step": 191730 }, { "epoch": 0.3873269310794814, "grad_norm": 112.59220123291016, "learning_rate": 7.745405835108112e-06, "loss": 22.649, "step": 191740 }, { "epoch": 0.3873471317121652, "grad_norm": 98.92120361328125, "learning_rate": 7.74511408999066e-06, "loss": 21.7379, "step": 191750 }, { "epoch": 0.38736733234484905, "grad_norm": 262.8480224609375, "learning_rate": 7.744822331493855e-06, "loss": 12.7217, "step": 191760 }, { "epoch": 0.38738753297753287, "grad_norm": 386.04766845703125, "learning_rate": 7.744530559619117e-06, "loss": 29.9491, "step": 191770 }, { "epoch": 0.3874077336102167, "grad_norm": 221.23648071289062, "learning_rate": 7.744238774367873e-06, "loss": 18.0562, "step": 191780 }, { "epoch": 0.3874279342429005, "grad_norm": 293.9769287109375, "learning_rate": 7.743946975741541e-06, "loss": 16.6403, "step": 191790 }, { "epoch": 0.3874481348755843, "grad_norm": 187.405517578125, "learning_rate": 7.743655163741544e-06, "loss": 27.9307, "step": 191800 }, { "epoch": 0.3874683355082681, "grad_norm": 186.2740478515625, "learning_rate": 7.743363338369303e-06, "loss": 27.5766, "step": 191810 }, { "epoch": 0.3874885361409519, "grad_norm": 341.5776672363281, "learning_rate": 7.743071499626244e-06, "loss": 17.248, "step": 191820 }, { "epoch": 0.38750873677363573, "grad_norm": 394.06500244140625, "learning_rate": 7.742779647513785e-06, "loss": 20.7685, "step": 191830 }, { "epoch": 0.38752893740631955, "grad_norm": 166.6071319580078, "learning_rate": 7.742487782033352e-06, "loss": 15.7632, "step": 191840 }, { "epoch": 0.38754913803900337, "grad_norm": 197.57403564453125, "learning_rate": 7.742195903186366e-06, "loss": 13.2974, "step": 191850 }, { "epoch": 0.3875693386716872, "grad_norm": 151.60096740722656, "learning_rate": 7.74190401097425e-06, "loss": 20.8045, "step": 191860 }, { "epoch": 0.387589539304371, "grad_norm": 397.1344909667969, "learning_rate": 7.741612105398429e-06, "loss": 23.9867, "step": 191870 }, { "epoch": 0.38760973993705483, "grad_norm": 171.3758087158203, "learning_rate": 7.74132018646032e-06, "loss": 16.3928, "step": 191880 }, { "epoch": 0.38762994056973865, "grad_norm": 133.99484252929688, "learning_rate": 7.741028254161349e-06, "loss": 12.3706, "step": 191890 }, { "epoch": 0.38765014120242247, "grad_norm": 180.68759155273438, "learning_rate": 7.740736308502939e-06, "loss": 21.6957, "step": 191900 }, { "epoch": 0.3876703418351063, "grad_norm": 95.6943588256836, "learning_rate": 7.740444349486512e-06, "loss": 13.9147, "step": 191910 }, { "epoch": 0.3876905424677901, "grad_norm": 478.73419189453125, "learning_rate": 7.740152377113493e-06, "loss": 22.6322, "step": 191920 }, { "epoch": 0.38771074310047393, "grad_norm": 284.2960510253906, "learning_rate": 7.739860391385303e-06, "loss": 22.8102, "step": 191930 }, { "epoch": 0.3877309437331577, "grad_norm": 330.6656799316406, "learning_rate": 7.739568392303364e-06, "loss": 32.3959, "step": 191940 }, { "epoch": 0.3877511443658415, "grad_norm": 1104.9599609375, "learning_rate": 7.739276379869105e-06, "loss": 26.9811, "step": 191950 }, { "epoch": 0.38777134499852534, "grad_norm": 331.3559875488281, "learning_rate": 7.738984354083942e-06, "loss": 42.3927, "step": 191960 }, { "epoch": 0.38779154563120916, "grad_norm": 404.5426940917969, "learning_rate": 7.738692314949304e-06, "loss": 22.57, "step": 191970 }, { "epoch": 0.387811746263893, "grad_norm": 412.352783203125, "learning_rate": 7.738400262466608e-06, "loss": 15.2895, "step": 191980 }, { "epoch": 0.3878319468965768, "grad_norm": 305.0199890136719, "learning_rate": 7.738108196637284e-06, "loss": 23.5704, "step": 191990 }, { "epoch": 0.3878521475292606, "grad_norm": 233.75970458984375, "learning_rate": 7.737816117462752e-06, "loss": 16.3054, "step": 192000 }, { "epoch": 0.38787234816194444, "grad_norm": 183.88137817382812, "learning_rate": 7.737524024944437e-06, "loss": 22.3913, "step": 192010 }, { "epoch": 0.38789254879462826, "grad_norm": 739.701416015625, "learning_rate": 7.737231919083761e-06, "loss": 37.5644, "step": 192020 }, { "epoch": 0.3879127494273121, "grad_norm": 401.48101806640625, "learning_rate": 7.736939799882149e-06, "loss": 17.2885, "step": 192030 }, { "epoch": 0.3879329500599959, "grad_norm": 305.9223327636719, "learning_rate": 7.736647667341025e-06, "loss": 36.8294, "step": 192040 }, { "epoch": 0.3879531506926797, "grad_norm": 889.1023559570312, "learning_rate": 7.736355521461812e-06, "loss": 36.097, "step": 192050 }, { "epoch": 0.38797335132536354, "grad_norm": 394.557861328125, "learning_rate": 7.736063362245931e-06, "loss": 28.2617, "step": 192060 }, { "epoch": 0.3879935519580473, "grad_norm": 614.652587890625, "learning_rate": 7.735771189694813e-06, "loss": 17.6581, "step": 192070 }, { "epoch": 0.3880137525907311, "grad_norm": 179.66131591796875, "learning_rate": 7.735479003809876e-06, "loss": 9.9457, "step": 192080 }, { "epoch": 0.38803395322341494, "grad_norm": 144.80239868164062, "learning_rate": 7.735186804592548e-06, "loss": 11.2095, "step": 192090 }, { "epoch": 0.38805415385609876, "grad_norm": 361.1067199707031, "learning_rate": 7.734894592044249e-06, "loss": 24.6209, "step": 192100 }, { "epoch": 0.3880743544887826, "grad_norm": 398.0833740234375, "learning_rate": 7.734602366166406e-06, "loss": 17.7591, "step": 192110 }, { "epoch": 0.3880945551214664, "grad_norm": 197.4931182861328, "learning_rate": 7.734310126960444e-06, "loss": 26.0308, "step": 192120 }, { "epoch": 0.3881147557541502, "grad_norm": 262.24951171875, "learning_rate": 7.734017874427786e-06, "loss": 19.6965, "step": 192130 }, { "epoch": 0.38813495638683404, "grad_norm": 15.633508682250977, "learning_rate": 7.733725608569856e-06, "loss": 13.7338, "step": 192140 }, { "epoch": 0.38815515701951786, "grad_norm": 252.8040008544922, "learning_rate": 7.733433329388079e-06, "loss": 12.8804, "step": 192150 }, { "epoch": 0.3881753576522017, "grad_norm": 295.9927673339844, "learning_rate": 7.733141036883878e-06, "loss": 15.3605, "step": 192160 }, { "epoch": 0.3881955582848855, "grad_norm": 338.4163513183594, "learning_rate": 7.73284873105868e-06, "loss": 28.1346, "step": 192170 }, { "epoch": 0.3882157589175693, "grad_norm": 132.29185485839844, "learning_rate": 7.73255641191391e-06, "loss": 24.7882, "step": 192180 }, { "epoch": 0.38823595955025314, "grad_norm": 310.9661865234375, "learning_rate": 7.73226407945099e-06, "loss": 13.8218, "step": 192190 }, { "epoch": 0.3882561601829369, "grad_norm": 450.2756042480469, "learning_rate": 7.731971733671347e-06, "loss": 24.5412, "step": 192200 }, { "epoch": 0.3882763608156207, "grad_norm": 281.07818603515625, "learning_rate": 7.731679374576404e-06, "loss": 12.587, "step": 192210 }, { "epoch": 0.38829656144830454, "grad_norm": 330.68890380859375, "learning_rate": 7.731387002167587e-06, "loss": 13.8512, "step": 192220 }, { "epoch": 0.38831676208098836, "grad_norm": 535.4540405273438, "learning_rate": 7.731094616446323e-06, "loss": 26.0862, "step": 192230 }, { "epoch": 0.3883369627136722, "grad_norm": 513.419189453125, "learning_rate": 7.730802217414034e-06, "loss": 18.8331, "step": 192240 }, { "epoch": 0.388357163346356, "grad_norm": 159.6501007080078, "learning_rate": 7.730509805072146e-06, "loss": 22.9805, "step": 192250 }, { "epoch": 0.3883773639790398, "grad_norm": 266.5825500488281, "learning_rate": 7.730217379422084e-06, "loss": 32.1138, "step": 192260 }, { "epoch": 0.38839756461172364, "grad_norm": 238.41355895996094, "learning_rate": 7.729924940465275e-06, "loss": 17.9281, "step": 192270 }, { "epoch": 0.38841776524440746, "grad_norm": 788.0181884765625, "learning_rate": 7.729632488203142e-06, "loss": 42.753, "step": 192280 }, { "epoch": 0.3884379658770913, "grad_norm": 285.4902038574219, "learning_rate": 7.729340022637111e-06, "loss": 9.903, "step": 192290 }, { "epoch": 0.3884581665097751, "grad_norm": 336.642333984375, "learning_rate": 7.729047543768608e-06, "loss": 25.862, "step": 192300 }, { "epoch": 0.3884783671424589, "grad_norm": 570.6214599609375, "learning_rate": 7.72875505159906e-06, "loss": 29.1715, "step": 192310 }, { "epoch": 0.38849856777514274, "grad_norm": 155.3254852294922, "learning_rate": 7.728462546129888e-06, "loss": 22.7431, "step": 192320 }, { "epoch": 0.3885187684078265, "grad_norm": 164.51100158691406, "learning_rate": 7.728170027362523e-06, "loss": 19.8067, "step": 192330 }, { "epoch": 0.38853896904051033, "grad_norm": 170.8310546875, "learning_rate": 7.727877495298386e-06, "loss": 34.7659, "step": 192340 }, { "epoch": 0.38855916967319415, "grad_norm": 330.7498474121094, "learning_rate": 7.727584949938907e-06, "loss": 21.3973, "step": 192350 }, { "epoch": 0.38857937030587797, "grad_norm": 1366.435546875, "learning_rate": 7.727292391285507e-06, "loss": 45.4149, "step": 192360 }, { "epoch": 0.3885995709385618, "grad_norm": 51.65348434448242, "learning_rate": 7.726999819339618e-06, "loss": 37.7116, "step": 192370 }, { "epoch": 0.3886197715712456, "grad_norm": 333.90643310546875, "learning_rate": 7.72670723410266e-06, "loss": 12.0313, "step": 192380 }, { "epoch": 0.38863997220392943, "grad_norm": 250.7086181640625, "learning_rate": 7.726414635576062e-06, "loss": 21.835, "step": 192390 }, { "epoch": 0.38866017283661325, "grad_norm": 99.44264221191406, "learning_rate": 7.726122023761252e-06, "loss": 16.7722, "step": 192400 }, { "epoch": 0.38868037346929707, "grad_norm": 316.75994873046875, "learning_rate": 7.72582939865965e-06, "loss": 21.675, "step": 192410 }, { "epoch": 0.3887005741019809, "grad_norm": 250.49037170410156, "learning_rate": 7.72553676027269e-06, "loss": 16.0169, "step": 192420 }, { "epoch": 0.3887207747346647, "grad_norm": 103.27822875976562, "learning_rate": 7.725244108601793e-06, "loss": 33.0113, "step": 192430 }, { "epoch": 0.38874097536734853, "grad_norm": 316.40185546875, "learning_rate": 7.724951443648386e-06, "loss": 18.3506, "step": 192440 }, { "epoch": 0.3887611760000323, "grad_norm": 277.0000915527344, "learning_rate": 7.724658765413897e-06, "loss": 15.9658, "step": 192450 }, { "epoch": 0.3887813766327161, "grad_norm": 52.55702209472656, "learning_rate": 7.72436607389975e-06, "loss": 26.9331, "step": 192460 }, { "epoch": 0.38880157726539993, "grad_norm": 278.3560791015625, "learning_rate": 7.724073369107376e-06, "loss": 10.8066, "step": 192470 }, { "epoch": 0.38882177789808375, "grad_norm": 223.42715454101562, "learning_rate": 7.723780651038196e-06, "loss": 33.8277, "step": 192480 }, { "epoch": 0.3888419785307676, "grad_norm": 301.831787109375, "learning_rate": 7.723487919693642e-06, "loss": 15.6647, "step": 192490 }, { "epoch": 0.3888621791634514, "grad_norm": 375.55218505859375, "learning_rate": 7.723195175075136e-06, "loss": 22.0178, "step": 192500 }, { "epoch": 0.3888823797961352, "grad_norm": 0.0, "learning_rate": 7.722902417184109e-06, "loss": 15.3054, "step": 192510 }, { "epoch": 0.38890258042881903, "grad_norm": 251.27955627441406, "learning_rate": 7.722609646021984e-06, "loss": 21.3906, "step": 192520 }, { "epoch": 0.38892278106150285, "grad_norm": 302.0898742675781, "learning_rate": 7.72231686159019e-06, "loss": 14.5807, "step": 192530 }, { "epoch": 0.3889429816941867, "grad_norm": 109.38668823242188, "learning_rate": 7.722024063890154e-06, "loss": 12.8443, "step": 192540 }, { "epoch": 0.3889631823268705, "grad_norm": 157.08822631835938, "learning_rate": 7.721731252923305e-06, "loss": 13.3732, "step": 192550 }, { "epoch": 0.3889833829595543, "grad_norm": 299.6491394042969, "learning_rate": 7.721438428691065e-06, "loss": 25.7093, "step": 192560 }, { "epoch": 0.38900358359223813, "grad_norm": 175.39622497558594, "learning_rate": 7.721145591194865e-06, "loss": 14.7934, "step": 192570 }, { "epoch": 0.3890237842249219, "grad_norm": 544.25634765625, "learning_rate": 7.720852740436134e-06, "loss": 14.4889, "step": 192580 }, { "epoch": 0.3890439848576057, "grad_norm": 307.2697448730469, "learning_rate": 7.720559876416293e-06, "loss": 21.4278, "step": 192590 }, { "epoch": 0.38906418549028954, "grad_norm": 174.79664611816406, "learning_rate": 7.720266999136774e-06, "loss": 24.0739, "step": 192600 }, { "epoch": 0.38908438612297336, "grad_norm": 370.8664245605469, "learning_rate": 7.719974108599005e-06, "loss": 23.76, "step": 192610 }, { "epoch": 0.3891045867556572, "grad_norm": 401.61175537109375, "learning_rate": 7.719681204804413e-06, "loss": 36.486, "step": 192620 }, { "epoch": 0.389124787388341, "grad_norm": 371.7220764160156, "learning_rate": 7.71938828775442e-06, "loss": 15.2999, "step": 192630 }, { "epoch": 0.3891449880210248, "grad_norm": 283.34259033203125, "learning_rate": 7.719095357450462e-06, "loss": 18.679, "step": 192640 }, { "epoch": 0.38916518865370864, "grad_norm": 249.94131469726562, "learning_rate": 7.718802413893963e-06, "loss": 19.8644, "step": 192650 }, { "epoch": 0.38918538928639246, "grad_norm": 79.08025360107422, "learning_rate": 7.718509457086351e-06, "loss": 23.0439, "step": 192660 }, { "epoch": 0.3892055899190763, "grad_norm": 281.539306640625, "learning_rate": 7.718216487029051e-06, "loss": 13.7148, "step": 192670 }, { "epoch": 0.3892257905517601, "grad_norm": 600.9617919921875, "learning_rate": 7.717923503723496e-06, "loss": 31.4343, "step": 192680 }, { "epoch": 0.3892459911844439, "grad_norm": 188.7032470703125, "learning_rate": 7.71763050717111e-06, "loss": 18.6346, "step": 192690 }, { "epoch": 0.38926619181712774, "grad_norm": 112.60848236083984, "learning_rate": 7.717337497373324e-06, "loss": 12.1749, "step": 192700 }, { "epoch": 0.3892863924498115, "grad_norm": 271.02740478515625, "learning_rate": 7.717044474331565e-06, "loss": 32.2515, "step": 192710 }, { "epoch": 0.3893065930824953, "grad_norm": 453.67803955078125, "learning_rate": 7.716751438047259e-06, "loss": 16.5761, "step": 192720 }, { "epoch": 0.38932679371517914, "grad_norm": 432.2919616699219, "learning_rate": 7.716458388521837e-06, "loss": 35.737, "step": 192730 }, { "epoch": 0.38934699434786296, "grad_norm": 415.10748291015625, "learning_rate": 7.716165325756727e-06, "loss": 16.1025, "step": 192740 }, { "epoch": 0.3893671949805468, "grad_norm": 278.7324523925781, "learning_rate": 7.715872249753353e-06, "loss": 16.0946, "step": 192750 }, { "epoch": 0.3893873956132306, "grad_norm": 255.84722900390625, "learning_rate": 7.715579160513152e-06, "loss": 23.5336, "step": 192760 }, { "epoch": 0.3894075962459144, "grad_norm": 17.803544998168945, "learning_rate": 7.715286058037544e-06, "loss": 14.4603, "step": 192770 }, { "epoch": 0.38942779687859824, "grad_norm": 146.4586181640625, "learning_rate": 7.714992942327962e-06, "loss": 18.2356, "step": 192780 }, { "epoch": 0.38944799751128206, "grad_norm": 249.72447204589844, "learning_rate": 7.714699813385834e-06, "loss": 21.4348, "step": 192790 }, { "epoch": 0.3894681981439659, "grad_norm": 218.7489471435547, "learning_rate": 7.714406671212589e-06, "loss": 36.5761, "step": 192800 }, { "epoch": 0.3894883987766497, "grad_norm": 338.7288818359375, "learning_rate": 7.714113515809653e-06, "loss": 14.1156, "step": 192810 }, { "epoch": 0.3895085994093335, "grad_norm": 254.4302520751953, "learning_rate": 7.71382034717846e-06, "loss": 16.6875, "step": 192820 }, { "epoch": 0.38952880004201734, "grad_norm": 82.7022933959961, "learning_rate": 7.713527165320432e-06, "loss": 26.9583, "step": 192830 }, { "epoch": 0.3895490006747011, "grad_norm": 176.47909545898438, "learning_rate": 7.713233970237004e-06, "loss": 9.9317, "step": 192840 }, { "epoch": 0.3895692013073849, "grad_norm": 21.919960021972656, "learning_rate": 7.712940761929604e-06, "loss": 16.7429, "step": 192850 }, { "epoch": 0.38958940194006875, "grad_norm": 244.1616973876953, "learning_rate": 7.712647540399658e-06, "loss": 14.7949, "step": 192860 }, { "epoch": 0.38960960257275257, "grad_norm": 358.0343933105469, "learning_rate": 7.712354305648597e-06, "loss": 12.4139, "step": 192870 }, { "epoch": 0.3896298032054364, "grad_norm": 234.85606384277344, "learning_rate": 7.71206105767785e-06, "loss": 22.4984, "step": 192880 }, { "epoch": 0.3896500038381202, "grad_norm": 271.9931335449219, "learning_rate": 7.711767796488847e-06, "loss": 13.2198, "step": 192890 }, { "epoch": 0.389670204470804, "grad_norm": 539.104736328125, "learning_rate": 7.711474522083015e-06, "loss": 25.7552, "step": 192900 }, { "epoch": 0.38969040510348785, "grad_norm": 371.15887451171875, "learning_rate": 7.711181234461786e-06, "loss": 12.0025, "step": 192910 }, { "epoch": 0.38971060573617167, "grad_norm": 384.42218017578125, "learning_rate": 7.71088793362659e-06, "loss": 21.4849, "step": 192920 }, { "epoch": 0.3897308063688555, "grad_norm": 323.582275390625, "learning_rate": 7.710594619578853e-06, "loss": 16.4594, "step": 192930 }, { "epoch": 0.3897510070015393, "grad_norm": 356.45379638671875, "learning_rate": 7.710301292320007e-06, "loss": 17.2747, "step": 192940 }, { "epoch": 0.3897712076342231, "grad_norm": 418.76873779296875, "learning_rate": 7.710007951851482e-06, "loss": 13.6653, "step": 192950 }, { "epoch": 0.38979140826690695, "grad_norm": 4.333935260772705, "learning_rate": 7.709714598174706e-06, "loss": 8.9145, "step": 192960 }, { "epoch": 0.3898116088995907, "grad_norm": 363.8253173828125, "learning_rate": 7.709421231291112e-06, "loss": 26.4408, "step": 192970 }, { "epoch": 0.38983180953227453, "grad_norm": 1192.8935546875, "learning_rate": 7.709127851202126e-06, "loss": 23.751, "step": 192980 }, { "epoch": 0.38985201016495835, "grad_norm": 192.88636779785156, "learning_rate": 7.708834457909179e-06, "loss": 15.3698, "step": 192990 }, { "epoch": 0.38987221079764217, "grad_norm": 594.34375, "learning_rate": 7.7085410514137e-06, "loss": 17.9119, "step": 193000 }, { "epoch": 0.389892411430326, "grad_norm": 58.0769157409668, "learning_rate": 7.708247631717122e-06, "loss": 20.6228, "step": 193010 }, { "epoch": 0.3899126120630098, "grad_norm": 408.14361572265625, "learning_rate": 7.707954198820873e-06, "loss": 15.4974, "step": 193020 }, { "epoch": 0.38993281269569363, "grad_norm": 68.30350494384766, "learning_rate": 7.707660752726384e-06, "loss": 16.1015, "step": 193030 }, { "epoch": 0.38995301332837745, "grad_norm": 53.01427459716797, "learning_rate": 7.707367293435086e-06, "loss": 26.6339, "step": 193040 }, { "epoch": 0.38997321396106127, "grad_norm": 209.24917602539062, "learning_rate": 7.707073820948407e-06, "loss": 17.308, "step": 193050 }, { "epoch": 0.3899934145937451, "grad_norm": 545.6006469726562, "learning_rate": 7.706780335267778e-06, "loss": 28.3598, "step": 193060 }, { "epoch": 0.3900136152264289, "grad_norm": 416.3035583496094, "learning_rate": 7.706486836394632e-06, "loss": 29.2072, "step": 193070 }, { "epoch": 0.39003381585911273, "grad_norm": 899.1925048828125, "learning_rate": 7.706193324330396e-06, "loss": 34.1971, "step": 193080 }, { "epoch": 0.3900540164917965, "grad_norm": 134.35385131835938, "learning_rate": 7.705899799076502e-06, "loss": 16.0578, "step": 193090 }, { "epoch": 0.3900742171244803, "grad_norm": 7.396317005157471, "learning_rate": 7.70560626063438e-06, "loss": 13.3698, "step": 193100 }, { "epoch": 0.39009441775716414, "grad_norm": 304.11370849609375, "learning_rate": 7.70531270900546e-06, "loss": 21.4184, "step": 193110 }, { "epoch": 0.39011461838984796, "grad_norm": 190.4987030029297, "learning_rate": 7.705019144191178e-06, "loss": 14.925, "step": 193120 }, { "epoch": 0.3901348190225318, "grad_norm": 160.68768310546875, "learning_rate": 7.704725566192959e-06, "loss": 11.1276, "step": 193130 }, { "epoch": 0.3901550196552156, "grad_norm": 273.1963806152344, "learning_rate": 7.704431975012234e-06, "loss": 25.7875, "step": 193140 }, { "epoch": 0.3901752202878994, "grad_norm": 113.93478393554688, "learning_rate": 7.704138370650437e-06, "loss": 15.6928, "step": 193150 }, { "epoch": 0.39019542092058324, "grad_norm": 674.81494140625, "learning_rate": 7.703844753108997e-06, "loss": 24.5866, "step": 193160 }, { "epoch": 0.39021562155326706, "grad_norm": 426.4443359375, "learning_rate": 7.703551122389345e-06, "loss": 20.443, "step": 193170 }, { "epoch": 0.3902358221859509, "grad_norm": 394.1851501464844, "learning_rate": 7.703257478492915e-06, "loss": 17.2254, "step": 193180 }, { "epoch": 0.3902560228186347, "grad_norm": 415.3370056152344, "learning_rate": 7.702963821421135e-06, "loss": 19.7609, "step": 193190 }, { "epoch": 0.3902762234513185, "grad_norm": 154.19558715820312, "learning_rate": 7.702670151175435e-06, "loss": 18.946, "step": 193200 }, { "epoch": 0.39029642408400234, "grad_norm": 26.77177619934082, "learning_rate": 7.70237646775725e-06, "loss": 36.0303, "step": 193210 }, { "epoch": 0.3903166247166861, "grad_norm": 820.5164794921875, "learning_rate": 7.70208277116801e-06, "loss": 15.5564, "step": 193220 }, { "epoch": 0.3903368253493699, "grad_norm": 0.0, "learning_rate": 7.701789061409148e-06, "loss": 26.2978, "step": 193230 }, { "epoch": 0.39035702598205374, "grad_norm": 371.2290344238281, "learning_rate": 7.701495338482093e-06, "loss": 25.6886, "step": 193240 }, { "epoch": 0.39037722661473756, "grad_norm": 38.680423736572266, "learning_rate": 7.701201602388276e-06, "loss": 26.4571, "step": 193250 }, { "epoch": 0.3903974272474214, "grad_norm": 356.66925048828125, "learning_rate": 7.70090785312913e-06, "loss": 24.5346, "step": 193260 }, { "epoch": 0.3904176278801052, "grad_norm": 324.1791076660156, "learning_rate": 7.700614090706087e-06, "loss": 45.8998, "step": 193270 }, { "epoch": 0.390437828512789, "grad_norm": 657.3759155273438, "learning_rate": 7.70032031512058e-06, "loss": 16.6499, "step": 193280 }, { "epoch": 0.39045802914547284, "grad_norm": 130.220458984375, "learning_rate": 7.700026526374038e-06, "loss": 18.0542, "step": 193290 }, { "epoch": 0.39047822977815666, "grad_norm": 332.5142517089844, "learning_rate": 7.699732724467894e-06, "loss": 35.5659, "step": 193300 }, { "epoch": 0.3904984304108405, "grad_norm": 268.6617126464844, "learning_rate": 7.69943890940358e-06, "loss": 19.9344, "step": 193310 }, { "epoch": 0.3905186310435243, "grad_norm": 336.018310546875, "learning_rate": 7.699145081182528e-06, "loss": 10.6341, "step": 193320 }, { "epoch": 0.3905388316762081, "grad_norm": 381.0491943359375, "learning_rate": 7.69885123980617e-06, "loss": 23.8663, "step": 193330 }, { "epoch": 0.39055903230889194, "grad_norm": 432.4365234375, "learning_rate": 7.69855738527594e-06, "loss": 25.3256, "step": 193340 }, { "epoch": 0.3905792329415757, "grad_norm": 530.2938232421875, "learning_rate": 7.698263517593268e-06, "loss": 33.9724, "step": 193350 }, { "epoch": 0.3905994335742595, "grad_norm": 300.0419616699219, "learning_rate": 7.697969636759586e-06, "loss": 37.3941, "step": 193360 }, { "epoch": 0.39061963420694334, "grad_norm": 243.92738342285156, "learning_rate": 7.69767574277633e-06, "loss": 19.4604, "step": 193370 }, { "epoch": 0.39063983483962716, "grad_norm": 286.27398681640625, "learning_rate": 7.697381835644926e-06, "loss": 19.7006, "step": 193380 }, { "epoch": 0.390660035472311, "grad_norm": 988.9487915039062, "learning_rate": 7.697087915366811e-06, "loss": 26.7049, "step": 193390 }, { "epoch": 0.3906802361049948, "grad_norm": 332.3198547363281, "learning_rate": 7.696793981943418e-06, "loss": 19.1312, "step": 193400 }, { "epoch": 0.3907004367376786, "grad_norm": 336.0328369140625, "learning_rate": 7.696500035376177e-06, "loss": 25.8598, "step": 193410 }, { "epoch": 0.39072063737036244, "grad_norm": 128.2937774658203, "learning_rate": 7.696206075666523e-06, "loss": 18.4666, "step": 193420 }, { "epoch": 0.39074083800304626, "grad_norm": 379.9452209472656, "learning_rate": 7.695912102815886e-06, "loss": 15.9851, "step": 193430 }, { "epoch": 0.3907610386357301, "grad_norm": 183.00648498535156, "learning_rate": 7.6956181168257e-06, "loss": 22.2109, "step": 193440 }, { "epoch": 0.3907812392684139, "grad_norm": 170.57254028320312, "learning_rate": 7.695324117697401e-06, "loss": 40.3447, "step": 193450 }, { "epoch": 0.3908014399010977, "grad_norm": 170.14598083496094, "learning_rate": 7.695030105432417e-06, "loss": 16.5873, "step": 193460 }, { "epoch": 0.39082164053378154, "grad_norm": 243.78009033203125, "learning_rate": 7.694736080032185e-06, "loss": 14.3712, "step": 193470 }, { "epoch": 0.3908418411664653, "grad_norm": 217.4012451171875, "learning_rate": 7.694442041498133e-06, "loss": 13.3284, "step": 193480 }, { "epoch": 0.39086204179914913, "grad_norm": 377.2984619140625, "learning_rate": 7.6941479898317e-06, "loss": 24.7592, "step": 193490 }, { "epoch": 0.39088224243183295, "grad_norm": 219.76495361328125, "learning_rate": 7.693853925034316e-06, "loss": 27.3446, "step": 193500 }, { "epoch": 0.39090244306451677, "grad_norm": 108.59087371826172, "learning_rate": 7.693559847107415e-06, "loss": 9.9749, "step": 193510 }, { "epoch": 0.3909226436972006, "grad_norm": 157.78631591796875, "learning_rate": 7.693265756052427e-06, "loss": 24.3156, "step": 193520 }, { "epoch": 0.3909428443298844, "grad_norm": 323.11810302734375, "learning_rate": 7.692971651870793e-06, "loss": 12.3596, "step": 193530 }, { "epoch": 0.39096304496256823, "grad_norm": 328.6010437011719, "learning_rate": 7.692677534563939e-06, "loss": 21.4681, "step": 193540 }, { "epoch": 0.39098324559525205, "grad_norm": 517.076171875, "learning_rate": 7.692383404133302e-06, "loss": 24.5201, "step": 193550 }, { "epoch": 0.39100344622793587, "grad_norm": 318.320556640625, "learning_rate": 7.692089260580315e-06, "loss": 13.5581, "step": 193560 }, { "epoch": 0.3910236468606197, "grad_norm": 303.8072509765625, "learning_rate": 7.69179510390641e-06, "loss": 15.1158, "step": 193570 }, { "epoch": 0.3910438474933035, "grad_norm": 19.806184768676758, "learning_rate": 7.691500934113022e-06, "loss": 24.1892, "step": 193580 }, { "epoch": 0.39106404812598733, "grad_norm": 416.0664367675781, "learning_rate": 7.691206751201588e-06, "loss": 22.2586, "step": 193590 }, { "epoch": 0.39108424875867115, "grad_norm": 392.65863037109375, "learning_rate": 7.690912555173536e-06, "loss": 36.6606, "step": 193600 }, { "epoch": 0.3911044493913549, "grad_norm": 529.1873779296875, "learning_rate": 7.690618346030303e-06, "loss": 26.553, "step": 193610 }, { "epoch": 0.39112465002403873, "grad_norm": 80.73873138427734, "learning_rate": 7.690324123773324e-06, "loss": 20.4123, "step": 193620 }, { "epoch": 0.39114485065672255, "grad_norm": 338.83380126953125, "learning_rate": 7.69002988840403e-06, "loss": 16.8388, "step": 193630 }, { "epoch": 0.3911650512894064, "grad_norm": 154.91665649414062, "learning_rate": 7.689735639923857e-06, "loss": 14.6388, "step": 193640 }, { "epoch": 0.3911852519220902, "grad_norm": 369.33013916015625, "learning_rate": 7.689441378334239e-06, "loss": 15.0412, "step": 193650 }, { "epoch": 0.391205452554774, "grad_norm": 457.6107482910156, "learning_rate": 7.68914710363661e-06, "loss": 18.2355, "step": 193660 }, { "epoch": 0.39122565318745783, "grad_norm": 185.09840393066406, "learning_rate": 7.688852815832405e-06, "loss": 7.8946, "step": 193670 }, { "epoch": 0.39124585382014165, "grad_norm": 349.3139343261719, "learning_rate": 7.688558514923055e-06, "loss": 24.1781, "step": 193680 }, { "epoch": 0.3912660544528255, "grad_norm": 315.4298400878906, "learning_rate": 7.688264200909998e-06, "loss": 21.8047, "step": 193690 }, { "epoch": 0.3912862550855093, "grad_norm": 221.2463836669922, "learning_rate": 7.687969873794667e-06, "loss": 23.2387, "step": 193700 }, { "epoch": 0.3913064557181931, "grad_norm": 931.820068359375, "learning_rate": 7.687675533578497e-06, "loss": 22.7389, "step": 193710 }, { "epoch": 0.39132665635087693, "grad_norm": 0.0, "learning_rate": 7.687381180262924e-06, "loss": 28.6526, "step": 193720 }, { "epoch": 0.3913468569835607, "grad_norm": 539.36083984375, "learning_rate": 7.687086813849378e-06, "loss": 18.2785, "step": 193730 }, { "epoch": 0.3913670576162445, "grad_norm": 206.40650939941406, "learning_rate": 7.6867924343393e-06, "loss": 22.5074, "step": 193740 }, { "epoch": 0.39138725824892834, "grad_norm": 211.206787109375, "learning_rate": 7.686498041734121e-06, "loss": 33.2014, "step": 193750 }, { "epoch": 0.39140745888161216, "grad_norm": 113.89512634277344, "learning_rate": 7.686203636035274e-06, "loss": 17.043, "step": 193760 }, { "epoch": 0.391427659514296, "grad_norm": 925.3001708984375, "learning_rate": 7.685909217244198e-06, "loss": 32.8062, "step": 193770 }, { "epoch": 0.3914478601469798, "grad_norm": 104.15580749511719, "learning_rate": 7.685614785362325e-06, "loss": 18.1203, "step": 193780 }, { "epoch": 0.3914680607796636, "grad_norm": 140.74188232421875, "learning_rate": 7.685320340391093e-06, "loss": 11.0914, "step": 193790 }, { "epoch": 0.39148826141234744, "grad_norm": 224.26979064941406, "learning_rate": 7.685025882331936e-06, "loss": 32.6111, "step": 193800 }, { "epoch": 0.39150846204503126, "grad_norm": 471.6272888183594, "learning_rate": 7.684731411186285e-06, "loss": 20.8601, "step": 193810 }, { "epoch": 0.3915286626777151, "grad_norm": 0.0, "learning_rate": 7.684436926955584e-06, "loss": 17.9405, "step": 193820 }, { "epoch": 0.3915488633103989, "grad_norm": 191.8218231201172, "learning_rate": 7.684142429641258e-06, "loss": 16.1021, "step": 193830 }, { "epoch": 0.3915690639430827, "grad_norm": 196.4030303955078, "learning_rate": 7.683847919244748e-06, "loss": 19.2488, "step": 193840 }, { "epoch": 0.39158926457576654, "grad_norm": 119.58429718017578, "learning_rate": 7.683553395767492e-06, "loss": 17.1304, "step": 193850 }, { "epoch": 0.3916094652084503, "grad_norm": 381.9131774902344, "learning_rate": 7.683258859210921e-06, "loss": 35.6521, "step": 193860 }, { "epoch": 0.3916296658411341, "grad_norm": 29.023494720458984, "learning_rate": 7.68296430957647e-06, "loss": 19.444, "step": 193870 }, { "epoch": 0.39164986647381794, "grad_norm": 190.25953674316406, "learning_rate": 7.682669746865577e-06, "loss": 26.655, "step": 193880 }, { "epoch": 0.39167006710650176, "grad_norm": 86.31261444091797, "learning_rate": 7.682375171079677e-06, "loss": 12.3824, "step": 193890 }, { "epoch": 0.3916902677391856, "grad_norm": 441.6230163574219, "learning_rate": 7.682080582220206e-06, "loss": 19.9106, "step": 193900 }, { "epoch": 0.3917104683718694, "grad_norm": 119.77789306640625, "learning_rate": 7.681785980288601e-06, "loss": 25.1635, "step": 193910 }, { "epoch": 0.3917306690045532, "grad_norm": 195.38034057617188, "learning_rate": 7.681491365286294e-06, "loss": 16.7662, "step": 193920 }, { "epoch": 0.39175086963723704, "grad_norm": 390.7822265625, "learning_rate": 7.681196737214725e-06, "loss": 25.7287, "step": 193930 }, { "epoch": 0.39177107026992086, "grad_norm": 231.7421417236328, "learning_rate": 7.680902096075327e-06, "loss": 10.6734, "step": 193940 }, { "epoch": 0.3917912709026047, "grad_norm": 220.8636932373047, "learning_rate": 7.680607441869538e-06, "loss": 15.7929, "step": 193950 }, { "epoch": 0.3918114715352885, "grad_norm": 454.4981994628906, "learning_rate": 7.680312774598794e-06, "loss": 24.9773, "step": 193960 }, { "epoch": 0.3918316721679723, "grad_norm": 86.79987335205078, "learning_rate": 7.68001809426453e-06, "loss": 18.7966, "step": 193970 }, { "epoch": 0.39185187280065614, "grad_norm": 153.4053497314453, "learning_rate": 7.679723400868181e-06, "loss": 18.0449, "step": 193980 }, { "epoch": 0.3918720734333399, "grad_norm": 450.51513671875, "learning_rate": 7.679428694411188e-06, "loss": 17.3701, "step": 193990 }, { "epoch": 0.3918922740660237, "grad_norm": 298.091552734375, "learning_rate": 7.679133974894984e-06, "loss": 18.7065, "step": 194000 }, { "epoch": 0.39191247469870755, "grad_norm": 609.201904296875, "learning_rate": 7.678839242321005e-06, "loss": 19.1568, "step": 194010 }, { "epoch": 0.39193267533139137, "grad_norm": 190.18017578125, "learning_rate": 7.67854449669069e-06, "loss": 17.4619, "step": 194020 }, { "epoch": 0.3919528759640752, "grad_norm": 537.0602416992188, "learning_rate": 7.678249738005473e-06, "loss": 23.4716, "step": 194030 }, { "epoch": 0.391973076596759, "grad_norm": 184.1594696044922, "learning_rate": 7.677954966266791e-06, "loss": 18.4125, "step": 194040 }, { "epoch": 0.3919932772294428, "grad_norm": 306.44744873046875, "learning_rate": 7.67766018147608e-06, "loss": 19.1926, "step": 194050 }, { "epoch": 0.39201347786212665, "grad_norm": 83.82701110839844, "learning_rate": 7.677365383634782e-06, "loss": 13.0075, "step": 194060 }, { "epoch": 0.39203367849481047, "grad_norm": 38.41979217529297, "learning_rate": 7.677070572744327e-06, "loss": 14.5475, "step": 194070 }, { "epoch": 0.3920538791274943, "grad_norm": 271.3953552246094, "learning_rate": 7.676775748806156e-06, "loss": 18.126, "step": 194080 }, { "epoch": 0.3920740797601781, "grad_norm": 610.7979125976562, "learning_rate": 7.676480911821705e-06, "loss": 19.6157, "step": 194090 }, { "epoch": 0.3920942803928619, "grad_norm": 218.88763427734375, "learning_rate": 7.676186061792408e-06, "loss": 27.5681, "step": 194100 }, { "epoch": 0.39211448102554575, "grad_norm": 149.15049743652344, "learning_rate": 7.675891198719707e-06, "loss": 39.6398, "step": 194110 }, { "epoch": 0.3921346816582295, "grad_norm": 231.4601593017578, "learning_rate": 7.675596322605036e-06, "loss": 10.3035, "step": 194120 }, { "epoch": 0.39215488229091333, "grad_norm": 454.0722961425781, "learning_rate": 7.675301433449833e-06, "loss": 20.0926, "step": 194130 }, { "epoch": 0.39217508292359715, "grad_norm": 129.3306121826172, "learning_rate": 7.675006531255537e-06, "loss": 18.3034, "step": 194140 }, { "epoch": 0.39219528355628097, "grad_norm": 317.0411071777344, "learning_rate": 7.67471161602358e-06, "loss": 22.1259, "step": 194150 }, { "epoch": 0.3922154841889648, "grad_norm": 180.41403198242188, "learning_rate": 7.674416687755406e-06, "loss": 12.2259, "step": 194160 }, { "epoch": 0.3922356848216486, "grad_norm": 263.5476379394531, "learning_rate": 7.67412174645245e-06, "loss": 29.1061, "step": 194170 }, { "epoch": 0.39225588545433243, "grad_norm": 440.67718505859375, "learning_rate": 7.673826792116146e-06, "loss": 19.8042, "step": 194180 }, { "epoch": 0.39227608608701625, "grad_norm": 689.5050048828125, "learning_rate": 7.673531824747937e-06, "loss": 33.1369, "step": 194190 }, { "epoch": 0.39229628671970007, "grad_norm": 228.04107666015625, "learning_rate": 7.673236844349257e-06, "loss": 17.0653, "step": 194200 }, { "epoch": 0.3923164873523839, "grad_norm": 263.9972839355469, "learning_rate": 7.672941850921545e-06, "loss": 17.4847, "step": 194210 }, { "epoch": 0.3923366879850677, "grad_norm": 658.2273559570312, "learning_rate": 7.67264684446624e-06, "loss": 16.4903, "step": 194220 }, { "epoch": 0.39235688861775153, "grad_norm": 239.1370391845703, "learning_rate": 7.672351824984777e-06, "loss": 27.2624, "step": 194230 }, { "epoch": 0.39237708925043535, "grad_norm": 211.70648193359375, "learning_rate": 7.672056792478595e-06, "loss": 13.3679, "step": 194240 }, { "epoch": 0.3923972898831191, "grad_norm": 115.9996337890625, "learning_rate": 7.671761746949133e-06, "loss": 27.3641, "step": 194250 }, { "epoch": 0.39241749051580294, "grad_norm": 245.15211486816406, "learning_rate": 7.671466688397828e-06, "loss": 18.3405, "step": 194260 }, { "epoch": 0.39243769114848676, "grad_norm": 102.13873291015625, "learning_rate": 7.671171616826117e-06, "loss": 19.8608, "step": 194270 }, { "epoch": 0.3924578917811706, "grad_norm": 413.1636962890625, "learning_rate": 7.670876532235444e-06, "loss": 18.5905, "step": 194280 }, { "epoch": 0.3924780924138544, "grad_norm": 210.2196044921875, "learning_rate": 7.670581434627237e-06, "loss": 44.8951, "step": 194290 }, { "epoch": 0.3924982930465382, "grad_norm": 598.1609497070312, "learning_rate": 7.670286324002943e-06, "loss": 9.6196, "step": 194300 }, { "epoch": 0.39251849367922204, "grad_norm": 121.0558090209961, "learning_rate": 7.669991200363997e-06, "loss": 15.9144, "step": 194310 }, { "epoch": 0.39253869431190586, "grad_norm": 26.014314651489258, "learning_rate": 7.669696063711837e-06, "loss": 20.8719, "step": 194320 }, { "epoch": 0.3925588949445897, "grad_norm": 318.3029479980469, "learning_rate": 7.669400914047903e-06, "loss": 17.9225, "step": 194330 }, { "epoch": 0.3925790955772735, "grad_norm": 242.23634338378906, "learning_rate": 7.669105751373633e-06, "loss": 17.7954, "step": 194340 }, { "epoch": 0.3925992962099573, "grad_norm": 341.5506591796875, "learning_rate": 7.668810575690465e-06, "loss": 21.6148, "step": 194350 }, { "epoch": 0.39261949684264114, "grad_norm": 263.9589538574219, "learning_rate": 7.668515386999837e-06, "loss": 13.6862, "step": 194360 }, { "epoch": 0.3926396974753249, "grad_norm": 318.4580078125, "learning_rate": 7.66822018530319e-06, "loss": 23.7706, "step": 194370 }, { "epoch": 0.3926598981080087, "grad_norm": 196.20858764648438, "learning_rate": 7.667924970601961e-06, "loss": 27.2758, "step": 194380 }, { "epoch": 0.39268009874069254, "grad_norm": 0.0, "learning_rate": 7.667629742897589e-06, "loss": 24.2493, "step": 194390 }, { "epoch": 0.39270029937337636, "grad_norm": 167.59475708007812, "learning_rate": 7.667334502191514e-06, "loss": 12.8018, "step": 194400 }, { "epoch": 0.3927205000060602, "grad_norm": 380.3171691894531, "learning_rate": 7.667039248485173e-06, "loss": 14.7757, "step": 194410 }, { "epoch": 0.392740700638744, "grad_norm": 193.55014038085938, "learning_rate": 7.666743981780007e-06, "loss": 19.8224, "step": 194420 }, { "epoch": 0.3927609012714278, "grad_norm": 42.686370849609375, "learning_rate": 7.666448702077454e-06, "loss": 12.9949, "step": 194430 }, { "epoch": 0.39278110190411164, "grad_norm": 93.37039947509766, "learning_rate": 7.666153409378954e-06, "loss": 12.8639, "step": 194440 }, { "epoch": 0.39280130253679546, "grad_norm": 471.76947021484375, "learning_rate": 7.665858103685944e-06, "loss": 20.9049, "step": 194450 }, { "epoch": 0.3928215031694793, "grad_norm": 386.82537841796875, "learning_rate": 7.665562784999865e-06, "loss": 27.0353, "step": 194460 }, { "epoch": 0.3928417038021631, "grad_norm": 537.5155029296875, "learning_rate": 7.665267453322158e-06, "loss": 16.1122, "step": 194470 }, { "epoch": 0.3928619044348469, "grad_norm": 190.0880889892578, "learning_rate": 7.664972108654261e-06, "loss": 22.8019, "step": 194480 }, { "epoch": 0.39288210506753074, "grad_norm": 199.64041137695312, "learning_rate": 7.664676750997611e-06, "loss": 15.9239, "step": 194490 }, { "epoch": 0.3929023057002145, "grad_norm": 54.16620635986328, "learning_rate": 7.66438138035365e-06, "loss": 9.1593, "step": 194500 }, { "epoch": 0.3929225063328983, "grad_norm": 133.44947814941406, "learning_rate": 7.664085996723819e-06, "loss": 15.7618, "step": 194510 }, { "epoch": 0.39294270696558214, "grad_norm": 32.27739715576172, "learning_rate": 7.663790600109554e-06, "loss": 29.1076, "step": 194520 }, { "epoch": 0.39296290759826596, "grad_norm": 440.6134338378906, "learning_rate": 7.663495190512297e-06, "loss": 24.6156, "step": 194530 }, { "epoch": 0.3929831082309498, "grad_norm": 578.973388671875, "learning_rate": 7.663199767933489e-06, "loss": 21.6806, "step": 194540 }, { "epoch": 0.3930033088636336, "grad_norm": 497.9605407714844, "learning_rate": 7.662904332374568e-06, "loss": 32.29, "step": 194550 }, { "epoch": 0.3930235094963174, "grad_norm": 597.739990234375, "learning_rate": 7.662608883836975e-06, "loss": 26.1673, "step": 194560 }, { "epoch": 0.39304371012900124, "grad_norm": 183.5893096923828, "learning_rate": 7.662313422322147e-06, "loss": 13.3268, "step": 194570 }, { "epoch": 0.39306391076168506, "grad_norm": 397.7215270996094, "learning_rate": 7.662017947831528e-06, "loss": 18.2192, "step": 194580 }, { "epoch": 0.3930841113943689, "grad_norm": 710.2374877929688, "learning_rate": 7.661722460366556e-06, "loss": 37.5838, "step": 194590 }, { "epoch": 0.3931043120270527, "grad_norm": 293.24566650390625, "learning_rate": 7.66142695992867e-06, "loss": 16.3148, "step": 194600 }, { "epoch": 0.3931245126597365, "grad_norm": 167.86123657226562, "learning_rate": 7.661131446519314e-06, "loss": 20.9999, "step": 194610 }, { "epoch": 0.39314471329242034, "grad_norm": 609.3424682617188, "learning_rate": 7.660835920139926e-06, "loss": 31.1597, "step": 194620 }, { "epoch": 0.3931649139251041, "grad_norm": 406.4717712402344, "learning_rate": 7.660540380791944e-06, "loss": 14.7817, "step": 194630 }, { "epoch": 0.39318511455778793, "grad_norm": 146.87686157226562, "learning_rate": 7.660244828476812e-06, "loss": 16.7037, "step": 194640 }, { "epoch": 0.39320531519047175, "grad_norm": 310.5599060058594, "learning_rate": 7.659949263195971e-06, "loss": 28.7355, "step": 194650 }, { "epoch": 0.39322551582315557, "grad_norm": 319.1809387207031, "learning_rate": 7.659653684950859e-06, "loss": 13.6016, "step": 194660 }, { "epoch": 0.3932457164558394, "grad_norm": 243.95379638671875, "learning_rate": 7.659358093742917e-06, "loss": 15.9031, "step": 194670 }, { "epoch": 0.3932659170885232, "grad_norm": 579.4546508789062, "learning_rate": 7.659062489573585e-06, "loss": 27.5262, "step": 194680 }, { "epoch": 0.39328611772120703, "grad_norm": 165.79025268554688, "learning_rate": 7.658766872444307e-06, "loss": 13.1114, "step": 194690 }, { "epoch": 0.39330631835389085, "grad_norm": 312.02532958984375, "learning_rate": 7.658471242356521e-06, "loss": 14.4709, "step": 194700 }, { "epoch": 0.39332651898657467, "grad_norm": 143.44891357421875, "learning_rate": 7.658175599311667e-06, "loss": 23.3184, "step": 194710 }, { "epoch": 0.3933467196192585, "grad_norm": 403.1902160644531, "learning_rate": 7.65787994331119e-06, "loss": 24.6021, "step": 194720 }, { "epoch": 0.3933669202519423, "grad_norm": 380.4971618652344, "learning_rate": 7.657584274356529e-06, "loss": 24.2974, "step": 194730 }, { "epoch": 0.39338712088462613, "grad_norm": 468.9192810058594, "learning_rate": 7.657288592449124e-06, "loss": 22.2887, "step": 194740 }, { "epoch": 0.39340732151730995, "grad_norm": 229.05140686035156, "learning_rate": 7.656992897590416e-06, "loss": 22.3392, "step": 194750 }, { "epoch": 0.3934275221499937, "grad_norm": 283.75421142578125, "learning_rate": 7.656697189781846e-06, "loss": 16.9436, "step": 194760 }, { "epoch": 0.39344772278267753, "grad_norm": 149.7548065185547, "learning_rate": 7.656401469024856e-06, "loss": 15.3834, "step": 194770 }, { "epoch": 0.39346792341536135, "grad_norm": 177.55157470703125, "learning_rate": 7.65610573532089e-06, "loss": 25.2313, "step": 194780 }, { "epoch": 0.3934881240480452, "grad_norm": 179.21536254882812, "learning_rate": 7.655809988671383e-06, "loss": 49.5092, "step": 194790 }, { "epoch": 0.393508324680729, "grad_norm": 271.7005310058594, "learning_rate": 7.655514229077784e-06, "loss": 17.1618, "step": 194800 }, { "epoch": 0.3935285253134128, "grad_norm": 356.3199157714844, "learning_rate": 7.65521845654153e-06, "loss": 11.5489, "step": 194810 }, { "epoch": 0.39354872594609663, "grad_norm": 253.8485870361328, "learning_rate": 7.654922671064062e-06, "loss": 18.3566, "step": 194820 }, { "epoch": 0.39356892657878045, "grad_norm": 117.10907745361328, "learning_rate": 7.654626872646824e-06, "loss": 25.6376, "step": 194830 }, { "epoch": 0.3935891272114643, "grad_norm": 335.3909606933594, "learning_rate": 7.654331061291254e-06, "loss": 25.6247, "step": 194840 }, { "epoch": 0.3936093278441481, "grad_norm": 166.45440673828125, "learning_rate": 7.6540352369988e-06, "loss": 28.8478, "step": 194850 }, { "epoch": 0.3936295284768319, "grad_norm": 206.85145568847656, "learning_rate": 7.653739399770897e-06, "loss": 15.4822, "step": 194860 }, { "epoch": 0.39364972910951573, "grad_norm": 258.195556640625, "learning_rate": 7.653443549608993e-06, "loss": 35.2904, "step": 194870 }, { "epoch": 0.3936699297421995, "grad_norm": 600.34912109375, "learning_rate": 7.653147686514523e-06, "loss": 29.3278, "step": 194880 }, { "epoch": 0.3936901303748833, "grad_norm": 444.9201965332031, "learning_rate": 7.652851810488937e-06, "loss": 31.3569, "step": 194890 }, { "epoch": 0.39371033100756714, "grad_norm": 70.63555145263672, "learning_rate": 7.652555921533671e-06, "loss": 12.7931, "step": 194900 }, { "epoch": 0.39373053164025096, "grad_norm": 528.3826904296875, "learning_rate": 7.65226001965017e-06, "loss": 24.9259, "step": 194910 }, { "epoch": 0.3937507322729348, "grad_norm": 203.94825744628906, "learning_rate": 7.651964104839876e-06, "loss": 17.4038, "step": 194920 }, { "epoch": 0.3937709329056186, "grad_norm": 562.08056640625, "learning_rate": 7.651668177104227e-06, "loss": 15.559, "step": 194930 }, { "epoch": 0.3937911335383024, "grad_norm": 38.24285125732422, "learning_rate": 7.651372236444673e-06, "loss": 22.0156, "step": 194940 }, { "epoch": 0.39381133417098624, "grad_norm": 289.3218994140625, "learning_rate": 7.65107628286265e-06, "loss": 24.2907, "step": 194950 }, { "epoch": 0.39383153480367006, "grad_norm": 538.4490966796875, "learning_rate": 7.650780316359604e-06, "loss": 18.612, "step": 194960 }, { "epoch": 0.3938517354363539, "grad_norm": 141.97787475585938, "learning_rate": 7.650484336936976e-06, "loss": 13.2621, "step": 194970 }, { "epoch": 0.3938719360690377, "grad_norm": 230.7985076904297, "learning_rate": 7.650188344596207e-06, "loss": 14.7206, "step": 194980 }, { "epoch": 0.3938921367017215, "grad_norm": 551.9215087890625, "learning_rate": 7.649892339338743e-06, "loss": 24.8613, "step": 194990 }, { "epoch": 0.39391233733440534, "grad_norm": 56.51571273803711, "learning_rate": 7.649596321166024e-06, "loss": 15.4644, "step": 195000 }, { "epoch": 0.3939325379670891, "grad_norm": 424.7755432128906, "learning_rate": 7.649300290079497e-06, "loss": 25.3022, "step": 195010 }, { "epoch": 0.3939527385997729, "grad_norm": 995.9240112304688, "learning_rate": 7.6490042460806e-06, "loss": 30.9905, "step": 195020 }, { "epoch": 0.39397293923245674, "grad_norm": 497.32464599609375, "learning_rate": 7.648708189170777e-06, "loss": 31.8876, "step": 195030 }, { "epoch": 0.39399313986514056, "grad_norm": 230.71151733398438, "learning_rate": 7.648412119351471e-06, "loss": 19.4944, "step": 195040 }, { "epoch": 0.3940133404978244, "grad_norm": 287.7003479003906, "learning_rate": 7.648116036624125e-06, "loss": 35.7516, "step": 195050 }, { "epoch": 0.3940335411305082, "grad_norm": 226.27464294433594, "learning_rate": 7.647819940990184e-06, "loss": 15.0225, "step": 195060 }, { "epoch": 0.394053741763192, "grad_norm": 148.74713134765625, "learning_rate": 7.647523832451091e-06, "loss": 29.7381, "step": 195070 }, { "epoch": 0.39407394239587584, "grad_norm": 225.23306274414062, "learning_rate": 7.647227711008288e-06, "loss": 18.5852, "step": 195080 }, { "epoch": 0.39409414302855966, "grad_norm": 31.996891021728516, "learning_rate": 7.646931576663215e-06, "loss": 21.6829, "step": 195090 }, { "epoch": 0.3941143436612435, "grad_norm": 9.77858829498291, "learning_rate": 7.646635429417322e-06, "loss": 12.6305, "step": 195100 }, { "epoch": 0.3941345442939273, "grad_norm": 269.21429443359375, "learning_rate": 7.646339269272045e-06, "loss": 18.9455, "step": 195110 }, { "epoch": 0.3941547449266111, "grad_norm": 283.38262939453125, "learning_rate": 7.646043096228835e-06, "loss": 17.1071, "step": 195120 }, { "epoch": 0.39417494555929494, "grad_norm": 347.9460144042969, "learning_rate": 7.645746910289128e-06, "loss": 19.1558, "step": 195130 }, { "epoch": 0.3941951461919787, "grad_norm": 139.70343017578125, "learning_rate": 7.645450711454377e-06, "loss": 14.3769, "step": 195140 }, { "epoch": 0.3942153468246625, "grad_norm": 454.0701599121094, "learning_rate": 7.645154499726017e-06, "loss": 21.3303, "step": 195150 }, { "epoch": 0.39423554745734635, "grad_norm": 246.4590606689453, "learning_rate": 7.644858275105494e-06, "loss": 11.95, "step": 195160 }, { "epoch": 0.39425574809003017, "grad_norm": 201.2974395751953, "learning_rate": 7.644562037594254e-06, "loss": 18.6055, "step": 195170 }, { "epoch": 0.394275948722714, "grad_norm": 595.9337158203125, "learning_rate": 7.644265787193739e-06, "loss": 22.5723, "step": 195180 }, { "epoch": 0.3942961493553978, "grad_norm": 454.85797119140625, "learning_rate": 7.643969523905392e-06, "loss": 22.2373, "step": 195190 }, { "epoch": 0.3943163499880816, "grad_norm": 315.9944152832031, "learning_rate": 7.64367324773066e-06, "loss": 30.295, "step": 195200 }, { "epoch": 0.39433655062076545, "grad_norm": 180.69094848632812, "learning_rate": 7.643376958670983e-06, "loss": 20.1329, "step": 195210 }, { "epoch": 0.39435675125344927, "grad_norm": 293.5047302246094, "learning_rate": 7.643080656727809e-06, "loss": 16.0287, "step": 195220 }, { "epoch": 0.3943769518861331, "grad_norm": 231.29153442382812, "learning_rate": 7.642784341902581e-06, "loss": 11.1602, "step": 195230 }, { "epoch": 0.3943971525188169, "grad_norm": 228.00096130371094, "learning_rate": 7.642488014196742e-06, "loss": 24.4023, "step": 195240 }, { "epoch": 0.3944173531515007, "grad_norm": 501.7188720703125, "learning_rate": 7.642191673611737e-06, "loss": 26.6407, "step": 195250 }, { "epoch": 0.39443755378418455, "grad_norm": 335.8973693847656, "learning_rate": 7.641895320149008e-06, "loss": 20.8122, "step": 195260 }, { "epoch": 0.3944577544168683, "grad_norm": 152.6516876220703, "learning_rate": 7.641598953810006e-06, "loss": 13.811, "step": 195270 }, { "epoch": 0.39447795504955213, "grad_norm": 364.88018798828125, "learning_rate": 7.641302574596168e-06, "loss": 23.1165, "step": 195280 }, { "epoch": 0.39449815568223595, "grad_norm": 193.8957977294922, "learning_rate": 7.64100618250894e-06, "loss": 21.9596, "step": 195290 }, { "epoch": 0.39451835631491977, "grad_norm": 165.779296875, "learning_rate": 7.640709777549773e-06, "loss": 38.2931, "step": 195300 }, { "epoch": 0.3945385569476036, "grad_norm": 1140.3321533203125, "learning_rate": 7.640413359720105e-06, "loss": 23.4998, "step": 195310 }, { "epoch": 0.3945587575802874, "grad_norm": 149.1036376953125, "learning_rate": 7.64011692902138e-06, "loss": 12.7853, "step": 195320 }, { "epoch": 0.39457895821297123, "grad_norm": 412.5675048828125, "learning_rate": 7.639820485455047e-06, "loss": 20.4548, "step": 195330 }, { "epoch": 0.39459915884565505, "grad_norm": 691.0968627929688, "learning_rate": 7.639524029022552e-06, "loss": 29.7139, "step": 195340 }, { "epoch": 0.39461935947833887, "grad_norm": 384.7610168457031, "learning_rate": 7.639227559725333e-06, "loss": 19.8274, "step": 195350 }, { "epoch": 0.3946395601110227, "grad_norm": 701.662353515625, "learning_rate": 7.63893107756484e-06, "loss": 18.853, "step": 195360 }, { "epoch": 0.3946597607437065, "grad_norm": 583.7568359375, "learning_rate": 7.638634582542516e-06, "loss": 18.1129, "step": 195370 }, { "epoch": 0.39467996137639033, "grad_norm": 555.453857421875, "learning_rate": 7.63833807465981e-06, "loss": 17.4225, "step": 195380 }, { "epoch": 0.39470016200907415, "grad_norm": 367.0882568359375, "learning_rate": 7.638041553918162e-06, "loss": 17.4292, "step": 195390 }, { "epoch": 0.3947203626417579, "grad_norm": 262.5814208984375, "learning_rate": 7.637745020319019e-06, "loss": 11.2565, "step": 195400 }, { "epoch": 0.39474056327444174, "grad_norm": 320.02313232421875, "learning_rate": 7.63744847386383e-06, "loss": 16.5876, "step": 195410 }, { "epoch": 0.39476076390712556, "grad_norm": 270.3629455566406, "learning_rate": 7.637151914554033e-06, "loss": 26.3514, "step": 195420 }, { "epoch": 0.3947809645398094, "grad_norm": 318.1084899902344, "learning_rate": 7.63685534239108e-06, "loss": 19.9079, "step": 195430 }, { "epoch": 0.3948011651724932, "grad_norm": 143.34129333496094, "learning_rate": 7.636558757376413e-06, "loss": 15.1242, "step": 195440 }, { "epoch": 0.394821365805177, "grad_norm": 723.2462768554688, "learning_rate": 7.636262159511479e-06, "loss": 24.2107, "step": 195450 }, { "epoch": 0.39484156643786084, "grad_norm": 5.736114978790283, "learning_rate": 7.63596554879772e-06, "loss": 28.7018, "step": 195460 }, { "epoch": 0.39486176707054466, "grad_norm": 193.23330688476562, "learning_rate": 7.635668925236588e-06, "loss": 14.4767, "step": 195470 }, { "epoch": 0.3948819677032285, "grad_norm": 296.91253662109375, "learning_rate": 7.635372288829524e-06, "loss": 29.0185, "step": 195480 }, { "epoch": 0.3949021683359123, "grad_norm": 433.8604431152344, "learning_rate": 7.635075639577976e-06, "loss": 16.8856, "step": 195490 }, { "epoch": 0.3949223689685961, "grad_norm": 839.92529296875, "learning_rate": 7.634778977483389e-06, "loss": 41.8922, "step": 195500 }, { "epoch": 0.39494256960127994, "grad_norm": 353.88128662109375, "learning_rate": 7.634482302547208e-06, "loss": 26.8454, "step": 195510 }, { "epoch": 0.3949627702339637, "grad_norm": 109.0381851196289, "learning_rate": 7.63418561477088e-06, "loss": 19.9252, "step": 195520 }, { "epoch": 0.3949829708666475, "grad_norm": 107.25232696533203, "learning_rate": 7.63388891415585e-06, "loss": 13.6184, "step": 195530 }, { "epoch": 0.39500317149933134, "grad_norm": 344.1968994140625, "learning_rate": 7.633592200703566e-06, "loss": 24.5713, "step": 195540 }, { "epoch": 0.39502337213201516, "grad_norm": 514.9895629882812, "learning_rate": 7.633295474415473e-06, "loss": 22.2372, "step": 195550 }, { "epoch": 0.395043572764699, "grad_norm": 370.78948974609375, "learning_rate": 7.632998735293016e-06, "loss": 18.9996, "step": 195560 }, { "epoch": 0.3950637733973828, "grad_norm": 263.2669677734375, "learning_rate": 7.632701983337645e-06, "loss": 33.432, "step": 195570 }, { "epoch": 0.3950839740300666, "grad_norm": 384.9313659667969, "learning_rate": 7.632405218550801e-06, "loss": 20.5556, "step": 195580 }, { "epoch": 0.39510417466275044, "grad_norm": 66.5440444946289, "learning_rate": 7.632108440933934e-06, "loss": 12.5785, "step": 195590 }, { "epoch": 0.39512437529543426, "grad_norm": 279.04925537109375, "learning_rate": 7.63181165048849e-06, "loss": 13.7486, "step": 195600 }, { "epoch": 0.3951445759281181, "grad_norm": 168.29052734375, "learning_rate": 7.631514847215914e-06, "loss": 33.1621, "step": 195610 }, { "epoch": 0.3951647765608019, "grad_norm": 516.8004760742188, "learning_rate": 7.631218031117658e-06, "loss": 23.866, "step": 195620 }, { "epoch": 0.3951849771934857, "grad_norm": 607.4094848632812, "learning_rate": 7.630921202195161e-06, "loss": 25.3501, "step": 195630 }, { "epoch": 0.39520517782616954, "grad_norm": 196.5140380859375, "learning_rate": 7.630624360449875e-06, "loss": 14.7553, "step": 195640 }, { "epoch": 0.3952253784588533, "grad_norm": 275.20574951171875, "learning_rate": 7.630327505883243e-06, "loss": 12.9907, "step": 195650 }, { "epoch": 0.3952455790915371, "grad_norm": 162.9631805419922, "learning_rate": 7.630030638496714e-06, "loss": 19.8624, "step": 195660 }, { "epoch": 0.39526577972422094, "grad_norm": 403.0829772949219, "learning_rate": 7.629733758291736e-06, "loss": 23.3691, "step": 195670 }, { "epoch": 0.39528598035690476, "grad_norm": 603.3643188476562, "learning_rate": 7.629436865269753e-06, "loss": 23.9639, "step": 195680 }, { "epoch": 0.3953061809895886, "grad_norm": 271.5706787109375, "learning_rate": 7.629139959432215e-06, "loss": 16.865, "step": 195690 }, { "epoch": 0.3953263816222724, "grad_norm": 169.62759399414062, "learning_rate": 7.628843040780567e-06, "loss": 18.4141, "step": 195700 }, { "epoch": 0.3953465822549562, "grad_norm": 130.78997802734375, "learning_rate": 7.628546109316257e-06, "loss": 18.1482, "step": 195710 }, { "epoch": 0.39536678288764004, "grad_norm": 245.90194702148438, "learning_rate": 7.628249165040731e-06, "loss": 18.589, "step": 195720 }, { "epoch": 0.39538698352032386, "grad_norm": 258.561767578125, "learning_rate": 7.627952207955439e-06, "loss": 21.4238, "step": 195730 }, { "epoch": 0.3954071841530077, "grad_norm": 298.8541259765625, "learning_rate": 7.627655238061825e-06, "loss": 33.3295, "step": 195740 }, { "epoch": 0.3954273847856915, "grad_norm": 532.6821899414062, "learning_rate": 7.627358255361339e-06, "loss": 16.7866, "step": 195750 }, { "epoch": 0.3954475854183753, "grad_norm": 194.97183227539062, "learning_rate": 7.627061259855428e-06, "loss": 7.5935, "step": 195760 }, { "epoch": 0.39546778605105914, "grad_norm": 310.5850830078125, "learning_rate": 7.626764251545539e-06, "loss": 21.454, "step": 195770 }, { "epoch": 0.3954879866837429, "grad_norm": 447.12158203125, "learning_rate": 7.62646723043312e-06, "loss": 21.5435, "step": 195780 }, { "epoch": 0.39550818731642673, "grad_norm": 271.2187194824219, "learning_rate": 7.626170196519618e-06, "loss": 31.3261, "step": 195790 }, { "epoch": 0.39552838794911055, "grad_norm": 419.5432434082031, "learning_rate": 7.6258731498064796e-06, "loss": 47.6234, "step": 195800 }, { "epoch": 0.39554858858179437, "grad_norm": 138.79046630859375, "learning_rate": 7.625576090295155e-06, "loss": 23.1665, "step": 195810 }, { "epoch": 0.3955687892144782, "grad_norm": 253.0830535888672, "learning_rate": 7.625279017987091e-06, "loss": 17.261, "step": 195820 }, { "epoch": 0.395588989847162, "grad_norm": 313.8537902832031, "learning_rate": 7.624981932883735e-06, "loss": 14.6143, "step": 195830 }, { "epoch": 0.39560919047984583, "grad_norm": 77.01154327392578, "learning_rate": 7.624684834986536e-06, "loss": 14.8828, "step": 195840 }, { "epoch": 0.39562939111252965, "grad_norm": 119.32646179199219, "learning_rate": 7.624387724296941e-06, "loss": 17.1623, "step": 195850 }, { "epoch": 0.39564959174521347, "grad_norm": 160.1696319580078, "learning_rate": 7.6240906008163985e-06, "loss": 24.0905, "step": 195860 }, { "epoch": 0.3956697923778973, "grad_norm": 85.5107650756836, "learning_rate": 7.623793464546359e-06, "loss": 36.3991, "step": 195870 }, { "epoch": 0.3956899930105811, "grad_norm": 630.5499877929688, "learning_rate": 7.623496315488264e-06, "loss": 33.1544, "step": 195880 }, { "epoch": 0.39571019364326493, "grad_norm": 312.9757080078125, "learning_rate": 7.623199153643569e-06, "loss": 15.6533, "step": 195890 }, { "epoch": 0.39573039427594875, "grad_norm": 132.02232360839844, "learning_rate": 7.622901979013717e-06, "loss": 16.3631, "step": 195900 }, { "epoch": 0.3957505949086325, "grad_norm": 257.602783203125, "learning_rate": 7.6226047916001624e-06, "loss": 16.5492, "step": 195910 }, { "epoch": 0.39577079554131633, "grad_norm": 247.9805450439453, "learning_rate": 7.622307591404347e-06, "loss": 23.2062, "step": 195920 }, { "epoch": 0.39579099617400015, "grad_norm": 152.7764892578125, "learning_rate": 7.622010378427725e-06, "loss": 27.1087, "step": 195930 }, { "epoch": 0.395811196806684, "grad_norm": 230.0106658935547, "learning_rate": 7.621713152671742e-06, "loss": 13.4086, "step": 195940 }, { "epoch": 0.3958313974393678, "grad_norm": 182.54153442382812, "learning_rate": 7.6214159141378465e-06, "loss": 21.2977, "step": 195950 }, { "epoch": 0.3958515980720516, "grad_norm": 389.78118896484375, "learning_rate": 7.621118662827487e-06, "loss": 14.0395, "step": 195960 }, { "epoch": 0.39587179870473543, "grad_norm": 283.34515380859375, "learning_rate": 7.620821398742114e-06, "loss": 11.1751, "step": 195970 }, { "epoch": 0.39589199933741925, "grad_norm": 572.1862182617188, "learning_rate": 7.620524121883175e-06, "loss": 15.4908, "step": 195980 }, { "epoch": 0.3959121999701031, "grad_norm": 214.23191833496094, "learning_rate": 7.62022683225212e-06, "loss": 19.4062, "step": 195990 }, { "epoch": 0.3959324006027869, "grad_norm": 60.46697998046875, "learning_rate": 7.619929529850397e-06, "loss": 31.9195, "step": 196000 }, { "epoch": 0.3959526012354707, "grad_norm": 318.0898132324219, "learning_rate": 7.6196322146794534e-06, "loss": 11.4277, "step": 196010 }, { "epoch": 0.39597280186815453, "grad_norm": 443.96533203125, "learning_rate": 7.619334886740744e-06, "loss": 14.0452, "step": 196020 }, { "epoch": 0.39599300250083835, "grad_norm": 327.90496826171875, "learning_rate": 7.61903754603571e-06, "loss": 24.3004, "step": 196030 }, { "epoch": 0.3960132031335221, "grad_norm": 103.91563415527344, "learning_rate": 7.618740192565806e-06, "loss": 19.8119, "step": 196040 }, { "epoch": 0.39603340376620594, "grad_norm": 561.7388305664062, "learning_rate": 7.6184428263324815e-06, "loss": 38.8107, "step": 196050 }, { "epoch": 0.39605360439888976, "grad_norm": 344.4429016113281, "learning_rate": 7.618145447337182e-06, "loss": 16.7449, "step": 196060 }, { "epoch": 0.3960738050315736, "grad_norm": 270.0063781738281, "learning_rate": 7.617848055581361e-06, "loss": 18.05, "step": 196070 }, { "epoch": 0.3960940056642574, "grad_norm": 134.54653930664062, "learning_rate": 7.6175506510664645e-06, "loss": 12.9702, "step": 196080 }, { "epoch": 0.3961142062969412, "grad_norm": 180.42955017089844, "learning_rate": 7.617253233793944e-06, "loss": 11.3876, "step": 196090 }, { "epoch": 0.39613440692962504, "grad_norm": 390.9675598144531, "learning_rate": 7.616955803765249e-06, "loss": 24.5607, "step": 196100 }, { "epoch": 0.39615460756230886, "grad_norm": 494.6651611328125, "learning_rate": 7.616658360981828e-06, "loss": 17.2424, "step": 196110 }, { "epoch": 0.3961748081949927, "grad_norm": 249.0922088623047, "learning_rate": 7.616360905445132e-06, "loss": 24.3302, "step": 196120 }, { "epoch": 0.3961950088276765, "grad_norm": 585.2081909179688, "learning_rate": 7.616063437156611e-06, "loss": 37.3783, "step": 196130 }, { "epoch": 0.3962152094603603, "grad_norm": 299.4219665527344, "learning_rate": 7.615765956117714e-06, "loss": 12.0669, "step": 196140 }, { "epoch": 0.39623541009304414, "grad_norm": 363.14520263671875, "learning_rate": 7.61546846232989e-06, "loss": 21.7385, "step": 196150 }, { "epoch": 0.3962556107257279, "grad_norm": 74.28112030029297, "learning_rate": 7.615170955794592e-06, "loss": 35.9823, "step": 196160 }, { "epoch": 0.3962758113584117, "grad_norm": 196.0849609375, "learning_rate": 7.614873436513265e-06, "loss": 14.3376, "step": 196170 }, { "epoch": 0.39629601199109554, "grad_norm": 318.63739013671875, "learning_rate": 7.614575904487365e-06, "loss": 21.5833, "step": 196180 }, { "epoch": 0.39631621262377936, "grad_norm": 383.2812805175781, "learning_rate": 7.6142783597183365e-06, "loss": 20.8216, "step": 196190 }, { "epoch": 0.3963364132564632, "grad_norm": 436.72119140625, "learning_rate": 7.613980802207633e-06, "loss": 20.6293, "step": 196200 }, { "epoch": 0.396356613889147, "grad_norm": 175.2423553466797, "learning_rate": 7.613683231956705e-06, "loss": 22.0344, "step": 196210 }, { "epoch": 0.3963768145218308, "grad_norm": 221.42440795898438, "learning_rate": 7.613385648967002e-06, "loss": 18.5197, "step": 196220 }, { "epoch": 0.39639701515451464, "grad_norm": 217.03355407714844, "learning_rate": 7.613088053239974e-06, "loss": 12.2641, "step": 196230 }, { "epoch": 0.39641721578719846, "grad_norm": 154.56585693359375, "learning_rate": 7.612790444777072e-06, "loss": 25.0173, "step": 196240 }, { "epoch": 0.3964374164198823, "grad_norm": 422.5224609375, "learning_rate": 7.612492823579744e-06, "loss": 17.6931, "step": 196250 }, { "epoch": 0.3964576170525661, "grad_norm": 592.8372192382812, "learning_rate": 7.612195189649445e-06, "loss": 37.7053, "step": 196260 }, { "epoch": 0.3964778176852499, "grad_norm": 171.89598083496094, "learning_rate": 7.611897542987623e-06, "loss": 15.4513, "step": 196270 }, { "epoch": 0.39649801831793374, "grad_norm": 268.6927490234375, "learning_rate": 7.611599883595731e-06, "loss": 12.9699, "step": 196280 }, { "epoch": 0.3965182189506175, "grad_norm": 150.1835174560547, "learning_rate": 7.611302211475216e-06, "loss": 19.4415, "step": 196290 }, { "epoch": 0.3965384195833013, "grad_norm": 411.337646484375, "learning_rate": 7.6110045266275305e-06, "loss": 16.5364, "step": 196300 }, { "epoch": 0.39655862021598515, "grad_norm": 171.4298858642578, "learning_rate": 7.610706829054126e-06, "loss": 25.6389, "step": 196310 }, { "epoch": 0.39657882084866897, "grad_norm": 314.48297119140625, "learning_rate": 7.610409118756454e-06, "loss": 15.2863, "step": 196320 }, { "epoch": 0.3965990214813528, "grad_norm": 394.3216247558594, "learning_rate": 7.610111395735962e-06, "loss": 34.4372, "step": 196330 }, { "epoch": 0.3966192221140366, "grad_norm": 280.4970397949219, "learning_rate": 7.609813659994107e-06, "loss": 15.4254, "step": 196340 }, { "epoch": 0.3966394227467204, "grad_norm": 453.7530517578125, "learning_rate": 7.6095159115323335e-06, "loss": 17.0022, "step": 196350 }, { "epoch": 0.39665962337940425, "grad_norm": 274.3356018066406, "learning_rate": 7.609218150352098e-06, "loss": 26.8366, "step": 196360 }, { "epoch": 0.39667982401208807, "grad_norm": 267.62493896484375, "learning_rate": 7.608920376454849e-06, "loss": 20.6224, "step": 196370 }, { "epoch": 0.3967000246447719, "grad_norm": 366.4648742675781, "learning_rate": 7.608622589842039e-06, "loss": 15.2126, "step": 196380 }, { "epoch": 0.3967202252774557, "grad_norm": 228.87850952148438, "learning_rate": 7.608324790515119e-06, "loss": 15.1592, "step": 196390 }, { "epoch": 0.3967404259101395, "grad_norm": 384.29730224609375, "learning_rate": 7.6080269784755405e-06, "loss": 27.2037, "step": 196400 }, { "epoch": 0.39676062654282335, "grad_norm": 223.87950134277344, "learning_rate": 7.607729153724755e-06, "loss": 16.3489, "step": 196410 }, { "epoch": 0.3967808271755071, "grad_norm": 296.6397399902344, "learning_rate": 7.607431316264211e-06, "loss": 14.7221, "step": 196420 }, { "epoch": 0.39680102780819093, "grad_norm": 181.79135131835938, "learning_rate": 7.607133466095365e-06, "loss": 10.7117, "step": 196430 }, { "epoch": 0.39682122844087475, "grad_norm": 264.9047546386719, "learning_rate": 7.606835603219666e-06, "loss": 17.1049, "step": 196440 }, { "epoch": 0.39684142907355857, "grad_norm": 293.80487060546875, "learning_rate": 7.60653772763857e-06, "loss": 14.3569, "step": 196450 }, { "epoch": 0.3968616297062424, "grad_norm": 242.92227172851562, "learning_rate": 7.606239839353522e-06, "loss": 17.0347, "step": 196460 }, { "epoch": 0.3968818303389262, "grad_norm": 328.82623291015625, "learning_rate": 7.605941938365977e-06, "loss": 12.1774, "step": 196470 }, { "epoch": 0.39690203097161003, "grad_norm": 604.5963745117188, "learning_rate": 7.6056440246773884e-06, "loss": 24.8425, "step": 196480 }, { "epoch": 0.39692223160429385, "grad_norm": 319.56884765625, "learning_rate": 7.605346098289206e-06, "loss": 19.212, "step": 196490 }, { "epoch": 0.39694243223697767, "grad_norm": 376.6470947265625, "learning_rate": 7.605048159202884e-06, "loss": 22.6171, "step": 196500 }, { "epoch": 0.3969626328696615, "grad_norm": 0.0, "learning_rate": 7.60475020741987e-06, "loss": 24.1404, "step": 196510 }, { "epoch": 0.3969828335023453, "grad_norm": 413.4575500488281, "learning_rate": 7.604452242941622e-06, "loss": 17.3267, "step": 196520 }, { "epoch": 0.39700303413502913, "grad_norm": 397.43878173828125, "learning_rate": 7.60415426576959e-06, "loss": 21.0242, "step": 196530 }, { "epoch": 0.39702323476771295, "grad_norm": 194.7661895751953, "learning_rate": 7.603856275905223e-06, "loss": 16.3005, "step": 196540 }, { "epoch": 0.3970434354003967, "grad_norm": 596.8765258789062, "learning_rate": 7.6035582733499805e-06, "loss": 25.6689, "step": 196550 }, { "epoch": 0.39706363603308054, "grad_norm": 730.8421630859375, "learning_rate": 7.6032602581053075e-06, "loss": 17.0999, "step": 196560 }, { "epoch": 0.39708383666576436, "grad_norm": 220.65560913085938, "learning_rate": 7.602962230172661e-06, "loss": 13.4715, "step": 196570 }, { "epoch": 0.3971040372984482, "grad_norm": 116.64985656738281, "learning_rate": 7.6026641895534925e-06, "loss": 21.9579, "step": 196580 }, { "epoch": 0.397124237931132, "grad_norm": 242.88543701171875, "learning_rate": 7.602366136249254e-06, "loss": 14.7921, "step": 196590 }, { "epoch": 0.3971444385638158, "grad_norm": 98.62287139892578, "learning_rate": 7.6020680702613995e-06, "loss": 17.4644, "step": 196600 }, { "epoch": 0.39716463919649964, "grad_norm": 288.7069396972656, "learning_rate": 7.60176999159138e-06, "loss": 23.0549, "step": 196610 }, { "epoch": 0.39718483982918346, "grad_norm": 380.5025329589844, "learning_rate": 7.601471900240648e-06, "loss": 21.8005, "step": 196620 }, { "epoch": 0.3972050404618673, "grad_norm": 255.35775756835938, "learning_rate": 7.601173796210659e-06, "loss": 21.7958, "step": 196630 }, { "epoch": 0.3972252410945511, "grad_norm": 201.38038635253906, "learning_rate": 7.600875679502864e-06, "loss": 17.8924, "step": 196640 }, { "epoch": 0.3972454417272349, "grad_norm": 288.1910400390625, "learning_rate": 7.6005775501187165e-06, "loss": 19.966, "step": 196650 }, { "epoch": 0.39726564235991874, "grad_norm": 148.360595703125, "learning_rate": 7.60027940805967e-06, "loss": 14.0948, "step": 196660 }, { "epoch": 0.39728584299260256, "grad_norm": 540.915771484375, "learning_rate": 7.5999812533271755e-06, "loss": 15.9216, "step": 196670 }, { "epoch": 0.3973060436252863, "grad_norm": 173.6855010986328, "learning_rate": 7.599683085922689e-06, "loss": 15.3806, "step": 196680 }, { "epoch": 0.39732624425797014, "grad_norm": 289.3764953613281, "learning_rate": 7.599384905847662e-06, "loss": 20.5498, "step": 196690 }, { "epoch": 0.39734644489065396, "grad_norm": 276.78265380859375, "learning_rate": 7.5990867131035474e-06, "loss": 12.5067, "step": 196700 }, { "epoch": 0.3973666455233378, "grad_norm": 626.9718627929688, "learning_rate": 7.598788507691801e-06, "loss": 22.2889, "step": 196710 }, { "epoch": 0.3973868461560216, "grad_norm": 309.85113525390625, "learning_rate": 7.5984902896138736e-06, "loss": 32.3097, "step": 196720 }, { "epoch": 0.3974070467887054, "grad_norm": 76.18515014648438, "learning_rate": 7.598192058871221e-06, "loss": 26.232, "step": 196730 }, { "epoch": 0.39742724742138924, "grad_norm": 133.7754364013672, "learning_rate": 7.597893815465294e-06, "loss": 12.1932, "step": 196740 }, { "epoch": 0.39744744805407306, "grad_norm": 286.5175476074219, "learning_rate": 7.597595559397548e-06, "loss": 17.4878, "step": 196750 }, { "epoch": 0.3974676486867569, "grad_norm": 233.51309204101562, "learning_rate": 7.597297290669437e-06, "loss": 8.8278, "step": 196760 }, { "epoch": 0.3974878493194407, "grad_norm": 323.18310546875, "learning_rate": 7.596999009282413e-06, "loss": 20.5059, "step": 196770 }, { "epoch": 0.3975080499521245, "grad_norm": 365.426025390625, "learning_rate": 7.5967007152379305e-06, "loss": 29.5079, "step": 196780 }, { "epoch": 0.39752825058480834, "grad_norm": 229.75201416015625, "learning_rate": 7.596402408537444e-06, "loss": 23.8807, "step": 196790 }, { "epoch": 0.3975484512174921, "grad_norm": 628.5294799804688, "learning_rate": 7.596104089182408e-06, "loss": 26.1366, "step": 196800 }, { "epoch": 0.3975686518501759, "grad_norm": 575.3751831054688, "learning_rate": 7.595805757174275e-06, "loss": 20.856, "step": 196810 }, { "epoch": 0.39758885248285974, "grad_norm": 260.66180419921875, "learning_rate": 7.5955074125145e-06, "loss": 21.6481, "step": 196820 }, { "epoch": 0.39760905311554356, "grad_norm": 231.44540405273438, "learning_rate": 7.595209055204534e-06, "loss": 12.2188, "step": 196830 }, { "epoch": 0.3976292537482274, "grad_norm": 333.1389465332031, "learning_rate": 7.594910685245837e-06, "loss": 25.1314, "step": 196840 }, { "epoch": 0.3976494543809112, "grad_norm": 200.00396728515625, "learning_rate": 7.594612302639859e-06, "loss": 23.7266, "step": 196850 }, { "epoch": 0.397669655013595, "grad_norm": 252.30804443359375, "learning_rate": 7.5943139073880555e-06, "loss": 18.3158, "step": 196860 }, { "epoch": 0.39768985564627884, "grad_norm": 367.0006103515625, "learning_rate": 7.5940154994918806e-06, "loss": 12.7885, "step": 196870 }, { "epoch": 0.39771005627896266, "grad_norm": 197.39724731445312, "learning_rate": 7.593717078952788e-06, "loss": 23.9666, "step": 196880 }, { "epoch": 0.3977302569116465, "grad_norm": 352.8096923828125, "learning_rate": 7.593418645772235e-06, "loss": 28.0981, "step": 196890 }, { "epoch": 0.3977504575443303, "grad_norm": 515.5092163085938, "learning_rate": 7.5931201999516715e-06, "loss": 24.0515, "step": 196900 }, { "epoch": 0.3977706581770141, "grad_norm": 749.9255981445312, "learning_rate": 7.592821741492555e-06, "loss": 19.9823, "step": 196910 }, { "epoch": 0.39779085880969794, "grad_norm": 505.81317138671875, "learning_rate": 7.592523270396342e-06, "loss": 58.5766, "step": 196920 }, { "epoch": 0.3978110594423817, "grad_norm": 315.5837097167969, "learning_rate": 7.592224786664484e-06, "loss": 19.9954, "step": 196930 }, { "epoch": 0.39783126007506553, "grad_norm": 212.61859130859375, "learning_rate": 7.591926290298435e-06, "loss": 23.0713, "step": 196940 }, { "epoch": 0.39785146070774935, "grad_norm": 148.7266082763672, "learning_rate": 7.591627781299654e-06, "loss": 15.0862, "step": 196950 }, { "epoch": 0.39787166134043317, "grad_norm": 153.5541534423828, "learning_rate": 7.5913292596695906e-06, "loss": 15.4249, "step": 196960 }, { "epoch": 0.397891861973117, "grad_norm": 325.7481994628906, "learning_rate": 7.5910307254097075e-06, "loss": 9.5312, "step": 196970 }, { "epoch": 0.3979120626058008, "grad_norm": 647.8545532226562, "learning_rate": 7.590732178521451e-06, "loss": 19.4572, "step": 196980 }, { "epoch": 0.39793226323848463, "grad_norm": 390.50286865234375, "learning_rate": 7.590433619006281e-06, "loss": 22.1838, "step": 196990 }, { "epoch": 0.39795246387116845, "grad_norm": 166.29832458496094, "learning_rate": 7.590135046865652e-06, "loss": 17.0849, "step": 197000 }, { "epoch": 0.39797266450385227, "grad_norm": 358.026611328125, "learning_rate": 7.589836462101019e-06, "loss": 16.3701, "step": 197010 }, { "epoch": 0.3979928651365361, "grad_norm": 257.49114990234375, "learning_rate": 7.589537864713836e-06, "loss": 22.2205, "step": 197020 }, { "epoch": 0.3980130657692199, "grad_norm": 215.25628662109375, "learning_rate": 7.58923925470556e-06, "loss": 11.6027, "step": 197030 }, { "epoch": 0.39803326640190373, "grad_norm": 171.20823669433594, "learning_rate": 7.588940632077647e-06, "loss": 15.7667, "step": 197040 }, { "epoch": 0.39805346703458755, "grad_norm": 20.094932556152344, "learning_rate": 7.588641996831551e-06, "loss": 16.6386, "step": 197050 }, { "epoch": 0.3980736676672713, "grad_norm": 0.0, "learning_rate": 7.588343348968728e-06, "loss": 20.067, "step": 197060 }, { "epoch": 0.39809386829995513, "grad_norm": 418.7384338378906, "learning_rate": 7.588044688490633e-06, "loss": 18.5833, "step": 197070 }, { "epoch": 0.39811406893263895, "grad_norm": 710.0728149414062, "learning_rate": 7.587746015398723e-06, "loss": 42.0858, "step": 197080 }, { "epoch": 0.3981342695653228, "grad_norm": 141.45550537109375, "learning_rate": 7.587447329694451e-06, "loss": 10.9955, "step": 197090 }, { "epoch": 0.3981544701980066, "grad_norm": 316.5946350097656, "learning_rate": 7.587148631379276e-06, "loss": 26.3005, "step": 197100 }, { "epoch": 0.3981746708306904, "grad_norm": 200.53729248046875, "learning_rate": 7.586849920454652e-06, "loss": 17.9916, "step": 197110 }, { "epoch": 0.39819487146337423, "grad_norm": 143.71287536621094, "learning_rate": 7.586551196922034e-06, "loss": 23.2799, "step": 197120 }, { "epoch": 0.39821507209605805, "grad_norm": 2.3539798259735107, "learning_rate": 7.586252460782882e-06, "loss": 12.1689, "step": 197130 }, { "epoch": 0.3982352727287419, "grad_norm": 210.6708221435547, "learning_rate": 7.585953712038646e-06, "loss": 9.6649, "step": 197140 }, { "epoch": 0.3982554733614257, "grad_norm": 1021.3026123046875, "learning_rate": 7.585654950690786e-06, "loss": 34.4251, "step": 197150 }, { "epoch": 0.3982756739941095, "grad_norm": 280.7657775878906, "learning_rate": 7.585356176740759e-06, "loss": 14.9469, "step": 197160 }, { "epoch": 0.39829587462679333, "grad_norm": 304.1202697753906, "learning_rate": 7.5850573901900185e-06, "loss": 21.0546, "step": 197170 }, { "epoch": 0.39831607525947715, "grad_norm": 372.9208068847656, "learning_rate": 7.584758591040022e-06, "loss": 23.661, "step": 197180 }, { "epoch": 0.3983362758921609, "grad_norm": 425.7402038574219, "learning_rate": 7.584459779292226e-06, "loss": 22.7648, "step": 197190 }, { "epoch": 0.39835647652484474, "grad_norm": 145.73182678222656, "learning_rate": 7.5841609549480854e-06, "loss": 28.4098, "step": 197200 }, { "epoch": 0.39837667715752856, "grad_norm": 319.3498229980469, "learning_rate": 7.583862118009058e-06, "loss": 26.5046, "step": 197210 }, { "epoch": 0.3983968777902124, "grad_norm": 553.8629760742188, "learning_rate": 7.583563268476602e-06, "loss": 23.0763, "step": 197220 }, { "epoch": 0.3984170784228962, "grad_norm": 354.61126708984375, "learning_rate": 7.583264406352169e-06, "loss": 12.8738, "step": 197230 }, { "epoch": 0.39843727905558, "grad_norm": 248.4993438720703, "learning_rate": 7.582965531637221e-06, "loss": 13.3738, "step": 197240 }, { "epoch": 0.39845747968826384, "grad_norm": 561.8800048828125, "learning_rate": 7.58266664433321e-06, "loss": 28.2738, "step": 197250 }, { "epoch": 0.39847768032094766, "grad_norm": 147.6110076904297, "learning_rate": 7.582367744441597e-06, "loss": 17.1136, "step": 197260 }, { "epoch": 0.3984978809536315, "grad_norm": 33.31745529174805, "learning_rate": 7.582068831963836e-06, "loss": 15.3232, "step": 197270 }, { "epoch": 0.3985180815863153, "grad_norm": 78.95259094238281, "learning_rate": 7.5817699069013835e-06, "loss": 18.289, "step": 197280 }, { "epoch": 0.3985382822189991, "grad_norm": 236.0439453125, "learning_rate": 7.5814709692557e-06, "loss": 22.0516, "step": 197290 }, { "epoch": 0.39855848285168294, "grad_norm": 295.2016906738281, "learning_rate": 7.581172019028238e-06, "loss": 11.5531, "step": 197300 }, { "epoch": 0.39857868348436676, "grad_norm": 230.6957244873047, "learning_rate": 7.580873056220458e-06, "loss": 25.9474, "step": 197310 }, { "epoch": 0.3985988841170505, "grad_norm": 212.35694885253906, "learning_rate": 7.580574080833816e-06, "loss": 11.5842, "step": 197320 }, { "epoch": 0.39861908474973434, "grad_norm": 146.1280975341797, "learning_rate": 7.580275092869766e-06, "loss": 10.0423, "step": 197330 }, { "epoch": 0.39863928538241816, "grad_norm": 223.53643798828125, "learning_rate": 7.579976092329772e-06, "loss": 19.0149, "step": 197340 }, { "epoch": 0.398659486015102, "grad_norm": 110.02352142333984, "learning_rate": 7.579677079215286e-06, "loss": 21.5929, "step": 197350 }, { "epoch": 0.3986796866477858, "grad_norm": 264.9632568359375, "learning_rate": 7.5793780535277665e-06, "loss": 16.988, "step": 197360 }, { "epoch": 0.3986998872804696, "grad_norm": 168.6101531982422, "learning_rate": 7.579079015268671e-06, "loss": 18.0216, "step": 197370 }, { "epoch": 0.39872008791315344, "grad_norm": 509.2603454589844, "learning_rate": 7.5787799644394576e-06, "loss": 28.0463, "step": 197380 }, { "epoch": 0.39874028854583726, "grad_norm": 9.884483337402344, "learning_rate": 7.578480901041583e-06, "loss": 15.8237, "step": 197390 }, { "epoch": 0.3987604891785211, "grad_norm": 265.16949462890625, "learning_rate": 7.578181825076506e-06, "loss": 18.874, "step": 197400 }, { "epoch": 0.3987806898112049, "grad_norm": 325.5540771484375, "learning_rate": 7.577882736545683e-06, "loss": 16.8648, "step": 197410 }, { "epoch": 0.3988008904438887, "grad_norm": 60.85361099243164, "learning_rate": 7.577583635450572e-06, "loss": 35.5535, "step": 197420 }, { "epoch": 0.39882109107657254, "grad_norm": 369.356201171875, "learning_rate": 7.577284521792632e-06, "loss": 45.7981, "step": 197430 }, { "epoch": 0.3988412917092563, "grad_norm": 218.2154541015625, "learning_rate": 7.576985395573318e-06, "loss": 17.3711, "step": 197440 }, { "epoch": 0.3988614923419401, "grad_norm": 29.592369079589844, "learning_rate": 7.576686256794092e-06, "loss": 19.7378, "step": 197450 }, { "epoch": 0.39888169297462395, "grad_norm": 738.89208984375, "learning_rate": 7.576387105456408e-06, "loss": 23.0383, "step": 197460 }, { "epoch": 0.39890189360730777, "grad_norm": 241.32232666015625, "learning_rate": 7.576087941561725e-06, "loss": 10.139, "step": 197470 }, { "epoch": 0.3989220942399916, "grad_norm": 263.060546875, "learning_rate": 7.575788765111504e-06, "loss": 20.7429, "step": 197480 }, { "epoch": 0.3989422948726754, "grad_norm": 358.0314025878906, "learning_rate": 7.5754895761072e-06, "loss": 19.4394, "step": 197490 }, { "epoch": 0.3989624955053592, "grad_norm": 28.549489974975586, "learning_rate": 7.575190374550272e-06, "loss": 19.1117, "step": 197500 }, { "epoch": 0.39898269613804305, "grad_norm": 847.1609497070312, "learning_rate": 7.574891160442179e-06, "loss": 33.4823, "step": 197510 }, { "epoch": 0.39900289677072687, "grad_norm": 407.9217529296875, "learning_rate": 7.574591933784378e-06, "loss": 24.3847, "step": 197520 }, { "epoch": 0.3990230974034107, "grad_norm": 378.9081726074219, "learning_rate": 7.574292694578329e-06, "loss": 17.4001, "step": 197530 }, { "epoch": 0.3990432980360945, "grad_norm": 38.607337951660156, "learning_rate": 7.573993442825489e-06, "loss": 13.2309, "step": 197540 }, { "epoch": 0.3990634986687783, "grad_norm": 103.85604858398438, "learning_rate": 7.573694178527317e-06, "loss": 12.0034, "step": 197550 }, { "epoch": 0.39908369930146215, "grad_norm": 237.58819580078125, "learning_rate": 7.573394901685271e-06, "loss": 11.4367, "step": 197560 }, { "epoch": 0.3991038999341459, "grad_norm": 192.59043884277344, "learning_rate": 7.573095612300813e-06, "loss": 27.3578, "step": 197570 }, { "epoch": 0.39912410056682973, "grad_norm": 29.524765014648438, "learning_rate": 7.572796310375397e-06, "loss": 27.3232, "step": 197580 }, { "epoch": 0.39914430119951355, "grad_norm": 13.357166290283203, "learning_rate": 7.5724969959104835e-06, "loss": 13.3151, "step": 197590 }, { "epoch": 0.39916450183219737, "grad_norm": 143.96530151367188, "learning_rate": 7.572197668907533e-06, "loss": 14.4776, "step": 197600 }, { "epoch": 0.3991847024648812, "grad_norm": 192.93910217285156, "learning_rate": 7.571898329368004e-06, "loss": 16.6859, "step": 197610 }, { "epoch": 0.399204903097565, "grad_norm": 214.7217254638672, "learning_rate": 7.571598977293351e-06, "loss": 19.9036, "step": 197620 }, { "epoch": 0.39922510373024883, "grad_norm": 233.1929168701172, "learning_rate": 7.571299612685039e-06, "loss": 18.5402, "step": 197630 }, { "epoch": 0.39924530436293265, "grad_norm": 940.0118408203125, "learning_rate": 7.571000235544524e-06, "loss": 25.7483, "step": 197640 }, { "epoch": 0.39926550499561647, "grad_norm": 560.79443359375, "learning_rate": 7.570700845873265e-06, "loss": 32.4813, "step": 197650 }, { "epoch": 0.3992857056283003, "grad_norm": 111.63124084472656, "learning_rate": 7.570401443672723e-06, "loss": 19.6049, "step": 197660 }, { "epoch": 0.3993059062609841, "grad_norm": 233.95162963867188, "learning_rate": 7.570102028944356e-06, "loss": 32.2108, "step": 197670 }, { "epoch": 0.39932610689366793, "grad_norm": 281.4308776855469, "learning_rate": 7.569802601689623e-06, "loss": 34.8472, "step": 197680 }, { "epoch": 0.39934630752635175, "grad_norm": 436.92584228515625, "learning_rate": 7.569503161909984e-06, "loss": 10.5465, "step": 197690 }, { "epoch": 0.3993665081590355, "grad_norm": 294.0990295410156, "learning_rate": 7.569203709606898e-06, "loss": 31.3854, "step": 197700 }, { "epoch": 0.39938670879171934, "grad_norm": 0.0, "learning_rate": 7.568904244781825e-06, "loss": 19.2889, "step": 197710 }, { "epoch": 0.39940690942440316, "grad_norm": 143.31314086914062, "learning_rate": 7.568604767436225e-06, "loss": 15.4806, "step": 197720 }, { "epoch": 0.399427110057087, "grad_norm": 252.13414001464844, "learning_rate": 7.5683052775715545e-06, "loss": 27.985, "step": 197730 }, { "epoch": 0.3994473106897708, "grad_norm": 208.4791259765625, "learning_rate": 7.568005775189278e-06, "loss": 18.1576, "step": 197740 }, { "epoch": 0.3994675113224546, "grad_norm": 144.7069854736328, "learning_rate": 7.5677062602908515e-06, "loss": 20.829, "step": 197750 }, { "epoch": 0.39948771195513844, "grad_norm": 367.3947448730469, "learning_rate": 7.567406732877735e-06, "loss": 14.1067, "step": 197760 }, { "epoch": 0.39950791258782226, "grad_norm": 67.12899780273438, "learning_rate": 7.567107192951393e-06, "loss": 16.1314, "step": 197770 }, { "epoch": 0.3995281132205061, "grad_norm": 149.95945739746094, "learning_rate": 7.566807640513278e-06, "loss": 9.8657, "step": 197780 }, { "epoch": 0.3995483138531899, "grad_norm": 0.0, "learning_rate": 7.5665080755648575e-06, "loss": 18.364, "step": 197790 }, { "epoch": 0.3995685144858737, "grad_norm": 178.62435913085938, "learning_rate": 7.566208498107586e-06, "loss": 22.888, "step": 197800 }, { "epoch": 0.39958871511855754, "grad_norm": 278.9465637207031, "learning_rate": 7.5659089081429245e-06, "loss": 21.9758, "step": 197810 }, { "epoch": 0.39960891575124136, "grad_norm": 386.1084289550781, "learning_rate": 7.565609305672336e-06, "loss": 20.9704, "step": 197820 }, { "epoch": 0.3996291163839251, "grad_norm": 437.77642822265625, "learning_rate": 7.565309690697279e-06, "loss": 26.9996, "step": 197830 }, { "epoch": 0.39964931701660894, "grad_norm": 339.415771484375, "learning_rate": 7.565010063219214e-06, "loss": 17.1962, "step": 197840 }, { "epoch": 0.39966951764929276, "grad_norm": 130.03709411621094, "learning_rate": 7.5647104232395985e-06, "loss": 22.0512, "step": 197850 }, { "epoch": 0.3996897182819766, "grad_norm": 342.1125793457031, "learning_rate": 7.564410770759897e-06, "loss": 29.3938, "step": 197860 }, { "epoch": 0.3997099189146604, "grad_norm": 221.88661193847656, "learning_rate": 7.564111105781568e-06, "loss": 26.883, "step": 197870 }, { "epoch": 0.3997301195473442, "grad_norm": 317.0343933105469, "learning_rate": 7.5638114283060735e-06, "loss": 19.923, "step": 197880 }, { "epoch": 0.39975032018002804, "grad_norm": 72.91720581054688, "learning_rate": 7.5635117383348725e-06, "loss": 16.9683, "step": 197890 }, { "epoch": 0.39977052081271186, "grad_norm": 265.1347961425781, "learning_rate": 7.563212035869426e-06, "loss": 15.2266, "step": 197900 }, { "epoch": 0.3997907214453957, "grad_norm": 407.9832763671875, "learning_rate": 7.5629123209111955e-06, "loss": 21.7793, "step": 197910 }, { "epoch": 0.3998109220780795, "grad_norm": 189.5552215576172, "learning_rate": 7.56261259346164e-06, "loss": 25.8723, "step": 197920 }, { "epoch": 0.3998311227107633, "grad_norm": 509.00439453125, "learning_rate": 7.5623128535222224e-06, "loss": 21.2374, "step": 197930 }, { "epoch": 0.39985132334344714, "grad_norm": 324.2961730957031, "learning_rate": 7.562013101094403e-06, "loss": 12.4009, "step": 197940 }, { "epoch": 0.3998715239761309, "grad_norm": 451.033447265625, "learning_rate": 7.561713336179642e-06, "loss": 40.8254, "step": 197950 }, { "epoch": 0.3998917246088147, "grad_norm": 615.39453125, "learning_rate": 7.561413558779401e-06, "loss": 35.7418, "step": 197960 }, { "epoch": 0.39991192524149854, "grad_norm": 437.7013244628906, "learning_rate": 7.5611137688951405e-06, "loss": 26.8291, "step": 197970 }, { "epoch": 0.39993212587418236, "grad_norm": 259.8296813964844, "learning_rate": 7.560813966528323e-06, "loss": 14.1063, "step": 197980 }, { "epoch": 0.3999523265068662, "grad_norm": 251.0192413330078, "learning_rate": 7.560514151680409e-06, "loss": 15.4545, "step": 197990 }, { "epoch": 0.39997252713955, "grad_norm": 833.412841796875, "learning_rate": 7.560214324352858e-06, "loss": 31.0604, "step": 198000 }, { "epoch": 0.3999927277722338, "grad_norm": 440.1564025878906, "learning_rate": 7.559914484547135e-06, "loss": 25.0369, "step": 198010 }, { "epoch": 0.40001292840491764, "grad_norm": 459.42205810546875, "learning_rate": 7.559614632264698e-06, "loss": 21.5419, "step": 198020 }, { "epoch": 0.40003312903760146, "grad_norm": 179.53684997558594, "learning_rate": 7.559314767507009e-06, "loss": 30.1356, "step": 198030 }, { "epoch": 0.4000533296702853, "grad_norm": 624.1626586914062, "learning_rate": 7.559014890275533e-06, "loss": 20.3844, "step": 198040 }, { "epoch": 0.4000735303029691, "grad_norm": 437.28936767578125, "learning_rate": 7.5587150005717256e-06, "loss": 32.7551, "step": 198050 }, { "epoch": 0.4000937309356529, "grad_norm": 289.4705505371094, "learning_rate": 7.558415098397054e-06, "loss": 21.8476, "step": 198060 }, { "epoch": 0.40011393156833674, "grad_norm": 413.32037353515625, "learning_rate": 7.558115183752975e-06, "loss": 15.6188, "step": 198070 }, { "epoch": 0.4001341322010205, "grad_norm": 457.376220703125, "learning_rate": 7.557815256640954e-06, "loss": 19.0615, "step": 198080 }, { "epoch": 0.40015433283370433, "grad_norm": 274.9142150878906, "learning_rate": 7.557515317062451e-06, "loss": 21.5506, "step": 198090 }, { "epoch": 0.40017453346638815, "grad_norm": 1022.1998291015625, "learning_rate": 7.55721536501893e-06, "loss": 16.0603, "step": 198100 }, { "epoch": 0.40019473409907197, "grad_norm": 323.8982849121094, "learning_rate": 7.556915400511853e-06, "loss": 8.3824, "step": 198110 }, { "epoch": 0.4002149347317558, "grad_norm": 134.51675415039062, "learning_rate": 7.556615423542677e-06, "loss": 16.716, "step": 198120 }, { "epoch": 0.4002351353644396, "grad_norm": 198.5113525390625, "learning_rate": 7.5563154341128695e-06, "loss": 18.7452, "step": 198130 }, { "epoch": 0.40025533599712343, "grad_norm": 43.27518081665039, "learning_rate": 7.55601543222389e-06, "loss": 25.0763, "step": 198140 }, { "epoch": 0.40027553662980725, "grad_norm": 136.52850341796875, "learning_rate": 7.555715417877201e-06, "loss": 16.0831, "step": 198150 }, { "epoch": 0.40029573726249107, "grad_norm": 314.7955627441406, "learning_rate": 7.5554153910742655e-06, "loss": 16.898, "step": 198160 }, { "epoch": 0.4003159378951749, "grad_norm": 179.41659545898438, "learning_rate": 7.555115351816545e-06, "loss": 17.799, "step": 198170 }, { "epoch": 0.4003361385278587, "grad_norm": 279.18768310546875, "learning_rate": 7.554815300105502e-06, "loss": 22.907, "step": 198180 }, { "epoch": 0.40035633916054253, "grad_norm": 12.507867813110352, "learning_rate": 7.5545152359426e-06, "loss": 22.522, "step": 198190 }, { "epoch": 0.40037653979322635, "grad_norm": 146.4342041015625, "learning_rate": 7.5542151593293e-06, "loss": 16.0112, "step": 198200 }, { "epoch": 0.4003967404259101, "grad_norm": 215.08714294433594, "learning_rate": 7.553915070267065e-06, "loss": 26.7081, "step": 198210 }, { "epoch": 0.40041694105859393, "grad_norm": 302.6405334472656, "learning_rate": 7.553614968757359e-06, "loss": 17.0812, "step": 198220 }, { "epoch": 0.40043714169127775, "grad_norm": 278.9008483886719, "learning_rate": 7.553314854801641e-06, "loss": 23.6622, "step": 198230 }, { "epoch": 0.4004573423239616, "grad_norm": 23.49720573425293, "learning_rate": 7.553014728401378e-06, "loss": 21.264, "step": 198240 }, { "epoch": 0.4004775429566454, "grad_norm": 416.87109375, "learning_rate": 7.55271458955803e-06, "loss": 28.4586, "step": 198250 }, { "epoch": 0.4004977435893292, "grad_norm": 205.1663818359375, "learning_rate": 7.5524144382730605e-06, "loss": 13.1268, "step": 198260 }, { "epoch": 0.40051794422201303, "grad_norm": 240.08218383789062, "learning_rate": 7.552114274547933e-06, "loss": 9.4288, "step": 198270 }, { "epoch": 0.40053814485469685, "grad_norm": 0.0, "learning_rate": 7.5518140983841095e-06, "loss": 21.8534, "step": 198280 }, { "epoch": 0.4005583454873807, "grad_norm": 357.9220275878906, "learning_rate": 7.551513909783055e-06, "loss": 13.0981, "step": 198290 }, { "epoch": 0.4005785461200645, "grad_norm": 464.34820556640625, "learning_rate": 7.55121370874623e-06, "loss": 16.1463, "step": 198300 }, { "epoch": 0.4005987467527483, "grad_norm": 368.53045654296875, "learning_rate": 7.550913495275098e-06, "loss": 20.7134, "step": 198310 }, { "epoch": 0.40061894738543213, "grad_norm": 203.9601593017578, "learning_rate": 7.550613269371124e-06, "loss": 19.6209, "step": 198320 }, { "epoch": 0.40063914801811595, "grad_norm": 389.8086242675781, "learning_rate": 7.550313031035772e-06, "loss": 13.8423, "step": 198330 }, { "epoch": 0.4006593486507997, "grad_norm": 219.64041137695312, "learning_rate": 7.550012780270499e-06, "loss": 17.4917, "step": 198340 }, { "epoch": 0.40067954928348354, "grad_norm": 441.5366516113281, "learning_rate": 7.549712517076777e-06, "loss": 19.8357, "step": 198350 }, { "epoch": 0.40069974991616736, "grad_norm": 519.1427612304688, "learning_rate": 7.5494122414560645e-06, "loss": 19.3545, "step": 198360 }, { "epoch": 0.4007199505488512, "grad_norm": 337.1890869140625, "learning_rate": 7.549111953409827e-06, "loss": 17.6045, "step": 198370 }, { "epoch": 0.400740151181535, "grad_norm": 240.98548889160156, "learning_rate": 7.548811652939525e-06, "loss": 19.2288, "step": 198380 }, { "epoch": 0.4007603518142188, "grad_norm": 253.1871795654297, "learning_rate": 7.548511340046625e-06, "loss": 29.9426, "step": 198390 }, { "epoch": 0.40078055244690264, "grad_norm": 421.31060791015625, "learning_rate": 7.548211014732589e-06, "loss": 15.5932, "step": 198400 }, { "epoch": 0.40080075307958646, "grad_norm": 160.997314453125, "learning_rate": 7.547910676998883e-06, "loss": 21.7233, "step": 198410 }, { "epoch": 0.4008209537122703, "grad_norm": 500.6905212402344, "learning_rate": 7.547610326846968e-06, "loss": 23.2721, "step": 198420 }, { "epoch": 0.4008411543449541, "grad_norm": 196.02830505371094, "learning_rate": 7.547309964278311e-06, "loss": 19.3975, "step": 198430 }, { "epoch": 0.4008613549776379, "grad_norm": 153.66464233398438, "learning_rate": 7.547009589294374e-06, "loss": 15.3255, "step": 198440 }, { "epoch": 0.40088155561032174, "grad_norm": 21.808988571166992, "learning_rate": 7.546709201896619e-06, "loss": 10.2376, "step": 198450 }, { "epoch": 0.40090175624300556, "grad_norm": 13.942381858825684, "learning_rate": 7.546408802086513e-06, "loss": 17.0496, "step": 198460 }, { "epoch": 0.4009219568756893, "grad_norm": 66.741455078125, "learning_rate": 7.546108389865521e-06, "loss": 18.2338, "step": 198470 }, { "epoch": 0.40094215750837314, "grad_norm": 267.86328125, "learning_rate": 7.545807965235103e-06, "loss": 22.8528, "step": 198480 }, { "epoch": 0.40096235814105696, "grad_norm": 22.518783569335938, "learning_rate": 7.5455075281967295e-06, "loss": 11.5267, "step": 198490 }, { "epoch": 0.4009825587737408, "grad_norm": 0.0, "learning_rate": 7.545207078751858e-06, "loss": 20.538, "step": 198500 }, { "epoch": 0.4010027594064246, "grad_norm": 209.03773498535156, "learning_rate": 7.544906616901957e-06, "loss": 10.4156, "step": 198510 }, { "epoch": 0.4010229600391084, "grad_norm": 534.1510620117188, "learning_rate": 7.544606142648489e-06, "loss": 24.4829, "step": 198520 }, { "epoch": 0.40104316067179224, "grad_norm": 73.2962875366211, "learning_rate": 7.544305655992918e-06, "loss": 28.9997, "step": 198530 }, { "epoch": 0.40106336130447606, "grad_norm": 252.25941467285156, "learning_rate": 7.544005156936713e-06, "loss": 38.6591, "step": 198540 }, { "epoch": 0.4010835619371599, "grad_norm": 339.1732482910156, "learning_rate": 7.543704645481333e-06, "loss": 23.5624, "step": 198550 }, { "epoch": 0.4011037625698437, "grad_norm": 209.32640075683594, "learning_rate": 7.543404121628246e-06, "loss": 39.1869, "step": 198560 }, { "epoch": 0.4011239632025275, "grad_norm": 971.4519653320312, "learning_rate": 7.543103585378917e-06, "loss": 19.2603, "step": 198570 }, { "epoch": 0.40114416383521134, "grad_norm": 306.6560974121094, "learning_rate": 7.5428030367348075e-06, "loss": 36.669, "step": 198580 }, { "epoch": 0.4011643644678951, "grad_norm": 326.9656066894531, "learning_rate": 7.542502475697385e-06, "loss": 16.7784, "step": 198590 }, { "epoch": 0.4011845651005789, "grad_norm": 136.67721557617188, "learning_rate": 7.542201902268115e-06, "loss": 15.5119, "step": 198600 }, { "epoch": 0.40120476573326275, "grad_norm": 165.49078369140625, "learning_rate": 7.541901316448459e-06, "loss": 16.2937, "step": 198610 }, { "epoch": 0.40122496636594657, "grad_norm": 333.9935302734375, "learning_rate": 7.541600718239887e-06, "loss": 13.3817, "step": 198620 }, { "epoch": 0.4012451669986304, "grad_norm": 197.2796173095703, "learning_rate": 7.541300107643858e-06, "loss": 25.5073, "step": 198630 }, { "epoch": 0.4012653676313142, "grad_norm": 123.6022720336914, "learning_rate": 7.540999484661844e-06, "loss": 14.9782, "step": 198640 }, { "epoch": 0.401285568263998, "grad_norm": 270.4721984863281, "learning_rate": 7.5406988492953045e-06, "loss": 18.5914, "step": 198650 }, { "epoch": 0.40130576889668185, "grad_norm": 73.2373046875, "learning_rate": 7.5403982015457065e-06, "loss": 18.5683, "step": 198660 }, { "epoch": 0.40132596952936567, "grad_norm": 225.48089599609375, "learning_rate": 7.540097541414518e-06, "loss": 20.7806, "step": 198670 }, { "epoch": 0.4013461701620495, "grad_norm": 375.59674072265625, "learning_rate": 7.5397968689032e-06, "loss": 20.0703, "step": 198680 }, { "epoch": 0.4013663707947333, "grad_norm": 206.65509033203125, "learning_rate": 7.539496184013222e-06, "loss": 15.0459, "step": 198690 }, { "epoch": 0.4013865714274171, "grad_norm": 440.2734375, "learning_rate": 7.539195486746047e-06, "loss": 20.94, "step": 198700 }, { "epoch": 0.40140677206010095, "grad_norm": 370.8414306640625, "learning_rate": 7.538894777103141e-06, "loss": 12.4493, "step": 198710 }, { "epoch": 0.4014269726927847, "grad_norm": 65.81218719482422, "learning_rate": 7.538594055085971e-06, "loss": 18.7305, "step": 198720 }, { "epoch": 0.40144717332546853, "grad_norm": 731.241943359375, "learning_rate": 7.538293320696e-06, "loss": 17.3078, "step": 198730 }, { "epoch": 0.40146737395815235, "grad_norm": 168.3695831298828, "learning_rate": 7.537992573934694e-06, "loss": 17.3226, "step": 198740 }, { "epoch": 0.40148757459083617, "grad_norm": 177.513916015625, "learning_rate": 7.537691814803522e-06, "loss": 13.6803, "step": 198750 }, { "epoch": 0.40150777522352, "grad_norm": 212.62582397460938, "learning_rate": 7.537391043303947e-06, "loss": 17.987, "step": 198760 }, { "epoch": 0.4015279758562038, "grad_norm": 316.54486083984375, "learning_rate": 7.537090259437436e-06, "loss": 18.4334, "step": 198770 }, { "epoch": 0.40154817648888763, "grad_norm": 328.6372375488281, "learning_rate": 7.536789463205455e-06, "loss": 13.8243, "step": 198780 }, { "epoch": 0.40156837712157145, "grad_norm": 90.42134857177734, "learning_rate": 7.536488654609469e-06, "loss": 13.7505, "step": 198790 }, { "epoch": 0.40158857775425527, "grad_norm": 105.0622329711914, "learning_rate": 7.536187833650947e-06, "loss": 22.2151, "step": 198800 }, { "epoch": 0.4016087783869391, "grad_norm": 0.0, "learning_rate": 7.535887000331352e-06, "loss": 28.9561, "step": 198810 }, { "epoch": 0.4016289790196229, "grad_norm": 161.89015197753906, "learning_rate": 7.53558615465215e-06, "loss": 19.4202, "step": 198820 }, { "epoch": 0.40164917965230673, "grad_norm": 401.1818542480469, "learning_rate": 7.535285296614809e-06, "loss": 21.21, "step": 198830 }, { "epoch": 0.40166938028499055, "grad_norm": 300.74761962890625, "learning_rate": 7.534984426220795e-06, "loss": 14.014, "step": 198840 }, { "epoch": 0.4016895809176743, "grad_norm": 464.4872741699219, "learning_rate": 7.534683543471577e-06, "loss": 27.894, "step": 198850 }, { "epoch": 0.40170978155035814, "grad_norm": 245.05462646484375, "learning_rate": 7.534382648368617e-06, "loss": 11.7077, "step": 198860 }, { "epoch": 0.40172998218304196, "grad_norm": 404.2679443359375, "learning_rate": 7.534081740913382e-06, "loss": 31.2255, "step": 198870 }, { "epoch": 0.4017501828157258, "grad_norm": 340.21539306640625, "learning_rate": 7.5337808211073415e-06, "loss": 17.8677, "step": 198880 }, { "epoch": 0.4017703834484096, "grad_norm": 406.598388671875, "learning_rate": 7.53347988895196e-06, "loss": 13.7886, "step": 198890 }, { "epoch": 0.4017905840810934, "grad_norm": 65.28353881835938, "learning_rate": 7.533178944448705e-06, "loss": 23.6478, "step": 198900 }, { "epoch": 0.40181078471377724, "grad_norm": 498.2472229003906, "learning_rate": 7.532877987599043e-06, "loss": 23.0583, "step": 198910 }, { "epoch": 0.40183098534646106, "grad_norm": 137.30589294433594, "learning_rate": 7.532577018404441e-06, "loss": 19.2113, "step": 198920 }, { "epoch": 0.4018511859791449, "grad_norm": 230.93463134765625, "learning_rate": 7.532276036866366e-06, "loss": 34.9574, "step": 198930 }, { "epoch": 0.4018713866118287, "grad_norm": 404.3818664550781, "learning_rate": 7.531975042986285e-06, "loss": 14.2791, "step": 198940 }, { "epoch": 0.4018915872445125, "grad_norm": 643.3522338867188, "learning_rate": 7.531674036765662e-06, "loss": 12.0863, "step": 198950 }, { "epoch": 0.40191178787719634, "grad_norm": 455.3923034667969, "learning_rate": 7.531373018205971e-06, "loss": 8.242, "step": 198960 }, { "epoch": 0.40193198850988016, "grad_norm": 195.4125518798828, "learning_rate": 7.531071987308672e-06, "loss": 23.8133, "step": 198970 }, { "epoch": 0.4019521891425639, "grad_norm": 178.51112365722656, "learning_rate": 7.5307709440752364e-06, "loss": 15.1408, "step": 198980 }, { "epoch": 0.40197238977524774, "grad_norm": 145.88925170898438, "learning_rate": 7.530469888507131e-06, "loss": 14.8435, "step": 198990 }, { "epoch": 0.40199259040793156, "grad_norm": 167.15345764160156, "learning_rate": 7.530168820605819e-06, "loss": 20.2583, "step": 199000 }, { "epoch": 0.4020127910406154, "grad_norm": 244.78744506835938, "learning_rate": 7.529867740372774e-06, "loss": 10.1715, "step": 199010 }, { "epoch": 0.4020329916732992, "grad_norm": 487.5633850097656, "learning_rate": 7.52956664780946e-06, "loss": 10.1829, "step": 199020 }, { "epoch": 0.402053192305983, "grad_norm": 301.6481018066406, "learning_rate": 7.529265542917343e-06, "loss": 20.7432, "step": 199030 }, { "epoch": 0.40207339293866684, "grad_norm": 428.47900390625, "learning_rate": 7.528964425697895e-06, "loss": 24.435, "step": 199040 }, { "epoch": 0.40209359357135066, "grad_norm": 238.72862243652344, "learning_rate": 7.5286632961525806e-06, "loss": 31.5494, "step": 199050 }, { "epoch": 0.4021137942040345, "grad_norm": 216.0967559814453, "learning_rate": 7.528362154282868e-06, "loss": 15.1123, "step": 199060 }, { "epoch": 0.4021339948367183, "grad_norm": 480.82415771484375, "learning_rate": 7.528061000090223e-06, "loss": 19.4999, "step": 199070 }, { "epoch": 0.4021541954694021, "grad_norm": 124.0877456665039, "learning_rate": 7.527759833576118e-06, "loss": 27.3579, "step": 199080 }, { "epoch": 0.40217439610208594, "grad_norm": 601.0496215820312, "learning_rate": 7.527458654742017e-06, "loss": 21.5035, "step": 199090 }, { "epoch": 0.40219459673476976, "grad_norm": 419.7713623046875, "learning_rate": 7.527157463589389e-06, "loss": 22.1963, "step": 199100 }, { "epoch": 0.4022147973674535, "grad_norm": 175.3748016357422, "learning_rate": 7.526856260119702e-06, "loss": 14.2104, "step": 199110 }, { "epoch": 0.40223499800013734, "grad_norm": 339.481201171875, "learning_rate": 7.526555044334424e-06, "loss": 18.2481, "step": 199120 }, { "epoch": 0.40225519863282116, "grad_norm": 1495.420654296875, "learning_rate": 7.526253816235023e-06, "loss": 20.1963, "step": 199130 }, { "epoch": 0.402275399265505, "grad_norm": 81.70526885986328, "learning_rate": 7.525952575822967e-06, "loss": 19.0099, "step": 199140 }, { "epoch": 0.4022955998981888, "grad_norm": 192.27442932128906, "learning_rate": 7.525651323099725e-06, "loss": 20.4819, "step": 199150 }, { "epoch": 0.4023158005308726, "grad_norm": 195.0780487060547, "learning_rate": 7.525350058066765e-06, "loss": 14.3676, "step": 199160 }, { "epoch": 0.40233600116355644, "grad_norm": 251.3155517578125, "learning_rate": 7.525048780725554e-06, "loss": 10.903, "step": 199170 }, { "epoch": 0.40235620179624026, "grad_norm": 160.53997802734375, "learning_rate": 7.524747491077563e-06, "loss": 23.6308, "step": 199180 }, { "epoch": 0.4023764024289241, "grad_norm": 379.61480712890625, "learning_rate": 7.524446189124259e-06, "loss": 20.9685, "step": 199190 }, { "epoch": 0.4023966030616079, "grad_norm": 708.205810546875, "learning_rate": 7.52414487486711e-06, "loss": 18.9456, "step": 199200 }, { "epoch": 0.4024168036942917, "grad_norm": 545.3832397460938, "learning_rate": 7.523843548307583e-06, "loss": 15.3875, "step": 199210 }, { "epoch": 0.40243700432697554, "grad_norm": 110.79427337646484, "learning_rate": 7.523542209447152e-06, "loss": 10.1563, "step": 199220 }, { "epoch": 0.4024572049596593, "grad_norm": 208.44070434570312, "learning_rate": 7.5232408582872805e-06, "loss": 6.9952, "step": 199230 }, { "epoch": 0.40247740559234313, "grad_norm": 290.46954345703125, "learning_rate": 7.52293949482944e-06, "loss": 32.3481, "step": 199240 }, { "epoch": 0.40249760622502695, "grad_norm": 398.9017639160156, "learning_rate": 7.522638119075097e-06, "loss": 22.3843, "step": 199250 }, { "epoch": 0.40251780685771077, "grad_norm": 187.2017059326172, "learning_rate": 7.522336731025723e-06, "loss": 26.2361, "step": 199260 }, { "epoch": 0.4025380074903946, "grad_norm": 156.52920532226562, "learning_rate": 7.522035330682784e-06, "loss": 19.5941, "step": 199270 }, { "epoch": 0.4025582081230784, "grad_norm": 602.2750244140625, "learning_rate": 7.521733918047753e-06, "loss": 14.8073, "step": 199280 }, { "epoch": 0.40257840875576223, "grad_norm": 409.26556396484375, "learning_rate": 7.5214324931220936e-06, "loss": 19.8077, "step": 199290 }, { "epoch": 0.40259860938844605, "grad_norm": 338.9705810546875, "learning_rate": 7.521131055907283e-06, "loss": 13.3657, "step": 199300 }, { "epoch": 0.40261881002112987, "grad_norm": 212.8897705078125, "learning_rate": 7.520829606404781e-06, "loss": 19.79, "step": 199310 }, { "epoch": 0.4026390106538137, "grad_norm": 403.32763671875, "learning_rate": 7.520528144616063e-06, "loss": 16.5278, "step": 199320 }, { "epoch": 0.4026592112864975, "grad_norm": 468.1279602050781, "learning_rate": 7.520226670542597e-06, "loss": 20.4785, "step": 199330 }, { "epoch": 0.40267941191918133, "grad_norm": 574.6720581054688, "learning_rate": 7.519925184185851e-06, "loss": 18.8178, "step": 199340 }, { "epoch": 0.40269961255186515, "grad_norm": 196.21714782714844, "learning_rate": 7.5196236855472945e-06, "loss": 12.1465, "step": 199350 }, { "epoch": 0.4027198131845489, "grad_norm": 184.18675231933594, "learning_rate": 7.519322174628399e-06, "loss": 28.0402, "step": 199360 }, { "epoch": 0.40274001381723273, "grad_norm": 489.64111328125, "learning_rate": 7.519020651430633e-06, "loss": 16.027, "step": 199370 }, { "epoch": 0.40276021444991655, "grad_norm": 130.7205810546875, "learning_rate": 7.518719115955464e-06, "loss": 13.7062, "step": 199380 }, { "epoch": 0.4027804150826004, "grad_norm": 47.02116394042969, "learning_rate": 7.518417568204366e-06, "loss": 22.034, "step": 199390 }, { "epoch": 0.4028006157152842, "grad_norm": 422.4451904296875, "learning_rate": 7.518116008178805e-06, "loss": 23.8998, "step": 199400 }, { "epoch": 0.402820816347968, "grad_norm": 119.76779174804688, "learning_rate": 7.517814435880252e-06, "loss": 23.1736, "step": 199410 }, { "epoch": 0.40284101698065183, "grad_norm": 399.29791259765625, "learning_rate": 7.517512851310178e-06, "loss": 17.1684, "step": 199420 }, { "epoch": 0.40286121761333565, "grad_norm": 55.60679244995117, "learning_rate": 7.517211254470049e-06, "loss": 7.8456, "step": 199430 }, { "epoch": 0.4028814182460195, "grad_norm": 255.8061981201172, "learning_rate": 7.51690964536134e-06, "loss": 23.0327, "step": 199440 }, { "epoch": 0.4029016188787033, "grad_norm": 502.9276123046875, "learning_rate": 7.516608023985516e-06, "loss": 33.3897, "step": 199450 }, { "epoch": 0.4029218195113871, "grad_norm": 291.9715576171875, "learning_rate": 7.516306390344052e-06, "loss": 26.5959, "step": 199460 }, { "epoch": 0.40294202014407093, "grad_norm": 1663.9923095703125, "learning_rate": 7.516004744438417e-06, "loss": 28.4637, "step": 199470 }, { "epoch": 0.40296222077675475, "grad_norm": 238.53408813476562, "learning_rate": 7.515703086270078e-06, "loss": 21.6418, "step": 199480 }, { "epoch": 0.4029824214094385, "grad_norm": 402.2608947753906, "learning_rate": 7.515401415840509e-06, "loss": 21.5059, "step": 199490 }, { "epoch": 0.40300262204212234, "grad_norm": 435.546142578125, "learning_rate": 7.515099733151177e-06, "loss": 13.315, "step": 199500 }, { "epoch": 0.40302282267480616, "grad_norm": 484.3211669921875, "learning_rate": 7.514798038203553e-06, "loss": 31.4937, "step": 199510 }, { "epoch": 0.40304302330749, "grad_norm": 123.31654357910156, "learning_rate": 7.514496330999111e-06, "loss": 21.4371, "step": 199520 }, { "epoch": 0.4030632239401738, "grad_norm": 510.3427429199219, "learning_rate": 7.514194611539316e-06, "loss": 14.3239, "step": 199530 }, { "epoch": 0.4030834245728576, "grad_norm": 584.1061401367188, "learning_rate": 7.513892879825643e-06, "loss": 20.2045, "step": 199540 }, { "epoch": 0.40310362520554144, "grad_norm": 208.0918426513672, "learning_rate": 7.5135911358595615e-06, "loss": 22.7163, "step": 199550 }, { "epoch": 0.40312382583822526, "grad_norm": 104.84254455566406, "learning_rate": 7.513289379642541e-06, "loss": 16.7673, "step": 199560 }, { "epoch": 0.4031440264709091, "grad_norm": 1478.080322265625, "learning_rate": 7.512987611176052e-06, "loss": 26.2568, "step": 199570 }, { "epoch": 0.4031642271035929, "grad_norm": 372.0586853027344, "learning_rate": 7.512685830461568e-06, "loss": 20.1462, "step": 199580 }, { "epoch": 0.4031844277362767, "grad_norm": 412.786865234375, "learning_rate": 7.5123840375005565e-06, "loss": 22.621, "step": 199590 }, { "epoch": 0.40320462836896054, "grad_norm": 1281.910888671875, "learning_rate": 7.512082232294491e-06, "loss": 36.3618, "step": 199600 }, { "epoch": 0.40322482900164436, "grad_norm": 0.0, "learning_rate": 7.51178041484484e-06, "loss": 31.8541, "step": 199610 }, { "epoch": 0.4032450296343281, "grad_norm": 412.55023193359375, "learning_rate": 7.511478585153078e-06, "loss": 22.7852, "step": 199620 }, { "epoch": 0.40326523026701194, "grad_norm": 335.76385498046875, "learning_rate": 7.511176743220672e-06, "loss": 22.0988, "step": 199630 }, { "epoch": 0.40328543089969576, "grad_norm": 100.4019775390625, "learning_rate": 7.510874889049096e-06, "loss": 17.002, "step": 199640 }, { "epoch": 0.4033056315323796, "grad_norm": 282.4847106933594, "learning_rate": 7.51057302263982e-06, "loss": 17.4236, "step": 199650 }, { "epoch": 0.4033258321650634, "grad_norm": 873.4385375976562, "learning_rate": 7.510271143994317e-06, "loss": 22.0166, "step": 199660 }, { "epoch": 0.4033460327977472, "grad_norm": 4.611435890197754, "learning_rate": 7.509969253114056e-06, "loss": 17.4929, "step": 199670 }, { "epoch": 0.40336623343043104, "grad_norm": 348.3924560546875, "learning_rate": 7.509667350000509e-06, "loss": 15.5707, "step": 199680 }, { "epoch": 0.40338643406311486, "grad_norm": 263.3329772949219, "learning_rate": 7.509365434655147e-06, "loss": 16.1694, "step": 199690 }, { "epoch": 0.4034066346957987, "grad_norm": 78.46598815917969, "learning_rate": 7.509063507079443e-06, "loss": 15.8515, "step": 199700 }, { "epoch": 0.4034268353284825, "grad_norm": 156.7500457763672, "learning_rate": 7.508761567274868e-06, "loss": 25.2279, "step": 199710 }, { "epoch": 0.4034470359611663, "grad_norm": 228.83567810058594, "learning_rate": 7.5084596152428925e-06, "loss": 18.9538, "step": 199720 }, { "epoch": 0.40346723659385014, "grad_norm": 379.5489196777344, "learning_rate": 7.508157650984991e-06, "loss": 22.4164, "step": 199730 }, { "epoch": 0.40348743722653396, "grad_norm": 236.03543090820312, "learning_rate": 7.507855674502631e-06, "loss": 18.0695, "step": 199740 }, { "epoch": 0.4035076378592177, "grad_norm": 133.21343994140625, "learning_rate": 7.507553685797288e-06, "loss": 15.7685, "step": 199750 }, { "epoch": 0.40352783849190155, "grad_norm": 769.5610961914062, "learning_rate": 7.507251684870433e-06, "loss": 19.0652, "step": 199760 }, { "epoch": 0.40354803912458537, "grad_norm": 256.3149719238281, "learning_rate": 7.506949671723534e-06, "loss": 19.0435, "step": 199770 }, { "epoch": 0.4035682397572692, "grad_norm": 913.3328247070312, "learning_rate": 7.506647646358069e-06, "loss": 23.5891, "step": 199780 }, { "epoch": 0.403588440389953, "grad_norm": 345.80987548828125, "learning_rate": 7.5063456087755085e-06, "loss": 25.7175, "step": 199790 }, { "epoch": 0.4036086410226368, "grad_norm": 158.8275146484375, "learning_rate": 7.5060435589773215e-06, "loss": 21.927, "step": 199800 }, { "epoch": 0.40362884165532065, "grad_norm": 332.47442626953125, "learning_rate": 7.505741496964984e-06, "loss": 22.1789, "step": 199810 }, { "epoch": 0.40364904228800447, "grad_norm": 572.9505004882812, "learning_rate": 7.505439422739964e-06, "loss": 17.3011, "step": 199820 }, { "epoch": 0.4036692429206883, "grad_norm": 319.12103271484375, "learning_rate": 7.505137336303739e-06, "loss": 40.8066, "step": 199830 }, { "epoch": 0.4036894435533721, "grad_norm": 419.4090576171875, "learning_rate": 7.504835237657776e-06, "loss": 25.4572, "step": 199840 }, { "epoch": 0.4037096441860559, "grad_norm": 671.3436279296875, "learning_rate": 7.5045331268035505e-06, "loss": 22.3972, "step": 199850 }, { "epoch": 0.40372984481873975, "grad_norm": 564.0520629882812, "learning_rate": 7.5042310037425345e-06, "loss": 34.5035, "step": 199860 }, { "epoch": 0.4037500454514235, "grad_norm": 502.3507080078125, "learning_rate": 7.5039288684762e-06, "loss": 25.1555, "step": 199870 }, { "epoch": 0.40377024608410733, "grad_norm": 0.0, "learning_rate": 7.50362672100602e-06, "loss": 19.3063, "step": 199880 }, { "epoch": 0.40379044671679115, "grad_norm": 603.8942260742188, "learning_rate": 7.503324561333467e-06, "loss": 17.7634, "step": 199890 }, { "epoch": 0.40381064734947497, "grad_norm": 209.19766235351562, "learning_rate": 7.503022389460014e-06, "loss": 22.3699, "step": 199900 }, { "epoch": 0.4038308479821588, "grad_norm": 140.9329376220703, "learning_rate": 7.502720205387133e-06, "loss": 19.5344, "step": 199910 }, { "epoch": 0.4038510486148426, "grad_norm": 494.5918273925781, "learning_rate": 7.5024180091162976e-06, "loss": 21.5009, "step": 199920 }, { "epoch": 0.40387124924752643, "grad_norm": 2.1553947925567627, "learning_rate": 7.50211580064898e-06, "loss": 18.7201, "step": 199930 }, { "epoch": 0.40389144988021025, "grad_norm": 264.9994201660156, "learning_rate": 7.501813579986655e-06, "loss": 19.8931, "step": 199940 }, { "epoch": 0.40391165051289407, "grad_norm": 178.32191467285156, "learning_rate": 7.501511347130792e-06, "loss": 19.8835, "step": 199950 }, { "epoch": 0.4039318511455779, "grad_norm": 250.5416259765625, "learning_rate": 7.501209102082867e-06, "loss": 11.4166, "step": 199960 }, { "epoch": 0.4039520517782617, "grad_norm": 381.7201843261719, "learning_rate": 7.500906844844352e-06, "loss": 23.8153, "step": 199970 }, { "epoch": 0.40397225241094553, "grad_norm": 280.6001892089844, "learning_rate": 7.5006045754167216e-06, "loss": 16.4223, "step": 199980 }, { "epoch": 0.40399245304362935, "grad_norm": 385.7357482910156, "learning_rate": 7.5003022938014445e-06, "loss": 23.3681, "step": 199990 }, { "epoch": 0.4040126536763131, "grad_norm": 354.80657958984375, "learning_rate": 7.500000000000001e-06, "loss": 25.2868, "step": 200000 }, { "epoch": 0.40403285430899694, "grad_norm": 0.0, "learning_rate": 7.499697694013859e-06, "loss": 8.9787, "step": 200010 }, { "epoch": 0.40405305494168076, "grad_norm": 460.0044250488281, "learning_rate": 7.499395375844494e-06, "loss": 13.9768, "step": 200020 }, { "epoch": 0.4040732555743646, "grad_norm": 659.6134033203125, "learning_rate": 7.49909304549338e-06, "loss": 26.5935, "step": 200030 }, { "epoch": 0.4040934562070484, "grad_norm": 314.9097900390625, "learning_rate": 7.498790702961987e-06, "loss": 7.3007, "step": 200040 }, { "epoch": 0.4041136568397322, "grad_norm": 348.9200134277344, "learning_rate": 7.498488348251793e-06, "loss": 25.4908, "step": 200050 }, { "epoch": 0.40413385747241604, "grad_norm": 301.13604736328125, "learning_rate": 7.498185981364268e-06, "loss": 12.6463, "step": 200060 }, { "epoch": 0.40415405810509986, "grad_norm": 0.0, "learning_rate": 7.497883602300891e-06, "loss": 23.4154, "step": 200070 }, { "epoch": 0.4041742587377837, "grad_norm": 291.0962219238281, "learning_rate": 7.49758121106313e-06, "loss": 19.7582, "step": 200080 }, { "epoch": 0.4041944593704675, "grad_norm": 113.79582977294922, "learning_rate": 7.49727880765246e-06, "loss": 21.0909, "step": 200090 }, { "epoch": 0.4042146600031513, "grad_norm": 486.31005859375, "learning_rate": 7.496976392070358e-06, "loss": 29.4842, "step": 200100 }, { "epoch": 0.40423486063583514, "grad_norm": 393.4908142089844, "learning_rate": 7.496673964318295e-06, "loss": 10.0424, "step": 200110 }, { "epoch": 0.40425506126851896, "grad_norm": 307.1171875, "learning_rate": 7.496371524397747e-06, "loss": 42.8031, "step": 200120 }, { "epoch": 0.4042752619012027, "grad_norm": 160.3863983154297, "learning_rate": 7.496069072310185e-06, "loss": 13.6151, "step": 200130 }, { "epoch": 0.40429546253388654, "grad_norm": 251.89181518554688, "learning_rate": 7.495766608057087e-06, "loss": 19.5588, "step": 200140 }, { "epoch": 0.40431566316657036, "grad_norm": 147.1830596923828, "learning_rate": 7.495464131639923e-06, "loss": 11.1564, "step": 200150 }, { "epoch": 0.4043358637992542, "grad_norm": 318.1654052734375, "learning_rate": 7.495161643060171e-06, "loss": 25.0333, "step": 200160 }, { "epoch": 0.404356064431938, "grad_norm": 338.8375244140625, "learning_rate": 7.494859142319304e-06, "loss": 29.2886, "step": 200170 }, { "epoch": 0.4043762650646218, "grad_norm": 213.86183166503906, "learning_rate": 7.494556629418796e-06, "loss": 31.1467, "step": 200180 }, { "epoch": 0.40439646569730564, "grad_norm": 264.6633605957031, "learning_rate": 7.494254104360121e-06, "loss": 25.5135, "step": 200190 }, { "epoch": 0.40441666632998946, "grad_norm": 269.99652099609375, "learning_rate": 7.493951567144755e-06, "loss": 16.2605, "step": 200200 }, { "epoch": 0.4044368669626733, "grad_norm": 206.89358520507812, "learning_rate": 7.493649017774172e-06, "loss": 15.9692, "step": 200210 }, { "epoch": 0.4044570675953571, "grad_norm": 62.03826141357422, "learning_rate": 7.493346456249844e-06, "loss": 17.2481, "step": 200220 }, { "epoch": 0.4044772682280409, "grad_norm": 484.0910339355469, "learning_rate": 7.493043882573249e-06, "loss": 13.5144, "step": 200230 }, { "epoch": 0.40449746886072474, "grad_norm": 410.0087585449219, "learning_rate": 7.49274129674586e-06, "loss": 29.6309, "step": 200240 }, { "epoch": 0.40451766949340856, "grad_norm": 283.734130859375, "learning_rate": 7.4924386987691525e-06, "loss": 15.6167, "step": 200250 }, { "epoch": 0.4045378701260923, "grad_norm": 303.5086364746094, "learning_rate": 7.492136088644601e-06, "loss": 13.3896, "step": 200260 }, { "epoch": 0.40455807075877614, "grad_norm": 249.42576599121094, "learning_rate": 7.4918334663736805e-06, "loss": 26.8, "step": 200270 }, { "epoch": 0.40457827139145996, "grad_norm": 233.20301818847656, "learning_rate": 7.491530831957866e-06, "loss": 26.0301, "step": 200280 }, { "epoch": 0.4045984720241438, "grad_norm": 319.3768310546875, "learning_rate": 7.491228185398633e-06, "loss": 26.7866, "step": 200290 }, { "epoch": 0.4046186726568276, "grad_norm": 425.7076110839844, "learning_rate": 7.490925526697455e-06, "loss": 32.2912, "step": 200300 }, { "epoch": 0.4046388732895114, "grad_norm": 0.0, "learning_rate": 7.490622855855808e-06, "loss": 29.7672, "step": 200310 }, { "epoch": 0.40465907392219524, "grad_norm": 193.47201538085938, "learning_rate": 7.490320172875169e-06, "loss": 21.3807, "step": 200320 }, { "epoch": 0.40467927455487906, "grad_norm": 52.747344970703125, "learning_rate": 7.490017477757009e-06, "loss": 14.4457, "step": 200330 }, { "epoch": 0.4046994751875629, "grad_norm": 140.22921752929688, "learning_rate": 7.489714770502807e-06, "loss": 19.8915, "step": 200340 }, { "epoch": 0.4047196758202467, "grad_norm": 246.37094116210938, "learning_rate": 7.489412051114038e-06, "loss": 23.6253, "step": 200350 }, { "epoch": 0.4047398764529305, "grad_norm": 302.69775390625, "learning_rate": 7.4891093195921764e-06, "loss": 11.8308, "step": 200360 }, { "epoch": 0.40476007708561434, "grad_norm": 300.3870544433594, "learning_rate": 7.488806575938697e-06, "loss": 14.3703, "step": 200370 }, { "epoch": 0.40478027771829816, "grad_norm": 304.1172180175781, "learning_rate": 7.488503820155075e-06, "loss": 17.8708, "step": 200380 }, { "epoch": 0.40480047835098193, "grad_norm": 214.02464294433594, "learning_rate": 7.488201052242791e-06, "loss": 28.1682, "step": 200390 }, { "epoch": 0.40482067898366575, "grad_norm": 349.71112060546875, "learning_rate": 7.487898272203314e-06, "loss": 19.4524, "step": 200400 }, { "epoch": 0.40484087961634957, "grad_norm": 156.09033203125, "learning_rate": 7.487595480038122e-06, "loss": 23.0463, "step": 200410 }, { "epoch": 0.4048610802490334, "grad_norm": 249.72958374023438, "learning_rate": 7.487292675748693e-06, "loss": 21.4146, "step": 200420 }, { "epoch": 0.4048812808817172, "grad_norm": 197.47256469726562, "learning_rate": 7.4869898593365e-06, "loss": 15.5335, "step": 200430 }, { "epoch": 0.40490148151440103, "grad_norm": 180.19688415527344, "learning_rate": 7.4866870308030215e-06, "loss": 25.9209, "step": 200440 }, { "epoch": 0.40492168214708485, "grad_norm": 94.06522369384766, "learning_rate": 7.486384190149731e-06, "loss": 27.7618, "step": 200450 }, { "epoch": 0.40494188277976867, "grad_norm": 303.9765625, "learning_rate": 7.486081337378106e-06, "loss": 14.9389, "step": 200460 }, { "epoch": 0.4049620834124525, "grad_norm": 426.7656555175781, "learning_rate": 7.485778472489622e-06, "loss": 16.4719, "step": 200470 }, { "epoch": 0.4049822840451363, "grad_norm": 470.8321838378906, "learning_rate": 7.485475595485756e-06, "loss": 14.9174, "step": 200480 }, { "epoch": 0.40500248467782013, "grad_norm": 412.90875244140625, "learning_rate": 7.4851727063679806e-06, "loss": 9.5622, "step": 200490 }, { "epoch": 0.40502268531050395, "grad_norm": 229.27427673339844, "learning_rate": 7.484869805137778e-06, "loss": 18.7808, "step": 200500 }, { "epoch": 0.4050428859431877, "grad_norm": 232.1036376953125, "learning_rate": 7.48456689179662e-06, "loss": 19.4411, "step": 200510 }, { "epoch": 0.40506308657587153, "grad_norm": 149.75094604492188, "learning_rate": 7.484263966345984e-06, "loss": 14.6361, "step": 200520 }, { "epoch": 0.40508328720855535, "grad_norm": 373.34405517578125, "learning_rate": 7.483961028787346e-06, "loss": 16.7336, "step": 200530 }, { "epoch": 0.4051034878412392, "grad_norm": 254.04225158691406, "learning_rate": 7.483658079122185e-06, "loss": 17.7092, "step": 200540 }, { "epoch": 0.405123688473923, "grad_norm": 471.966064453125, "learning_rate": 7.483355117351976e-06, "loss": 14.8871, "step": 200550 }, { "epoch": 0.4051438891066068, "grad_norm": 360.5208435058594, "learning_rate": 7.483052143478193e-06, "loss": 16.6812, "step": 200560 }, { "epoch": 0.40516408973929063, "grad_norm": 262.086181640625, "learning_rate": 7.482749157502317e-06, "loss": 28.2642, "step": 200570 }, { "epoch": 0.40518429037197445, "grad_norm": 274.7903137207031, "learning_rate": 7.482446159425822e-06, "loss": 9.6046, "step": 200580 }, { "epoch": 0.4052044910046583, "grad_norm": 883.265625, "learning_rate": 7.482143149250185e-06, "loss": 21.6702, "step": 200590 }, { "epoch": 0.4052246916373421, "grad_norm": 210.96331787109375, "learning_rate": 7.481840126976885e-06, "loss": 12.0101, "step": 200600 }, { "epoch": 0.4052448922700259, "grad_norm": 290.23822021484375, "learning_rate": 7.481537092607396e-06, "loss": 15.6284, "step": 200610 }, { "epoch": 0.40526509290270973, "grad_norm": 3.2993383407592773, "learning_rate": 7.4812340461431965e-06, "loss": 23.0564, "step": 200620 }, { "epoch": 0.40528529353539355, "grad_norm": 193.82957458496094, "learning_rate": 7.480930987585763e-06, "loss": 15.4396, "step": 200630 }, { "epoch": 0.4053054941680773, "grad_norm": 695.3344116210938, "learning_rate": 7.480627916936574e-06, "loss": 25.3935, "step": 200640 }, { "epoch": 0.40532569480076114, "grad_norm": 172.79408264160156, "learning_rate": 7.480324834197103e-06, "loss": 20.299, "step": 200650 }, { "epoch": 0.40534589543344496, "grad_norm": 77.49116516113281, "learning_rate": 7.480021739368831e-06, "loss": 18.4435, "step": 200660 }, { "epoch": 0.4053660960661288, "grad_norm": 416.256591796875, "learning_rate": 7.479718632453233e-06, "loss": 19.1791, "step": 200670 }, { "epoch": 0.4053862966988126, "grad_norm": 250.17233276367188, "learning_rate": 7.4794155134517885e-06, "loss": 19.6475, "step": 200680 }, { "epoch": 0.4054064973314964, "grad_norm": 385.2862548828125, "learning_rate": 7.479112382365973e-06, "loss": 25.5815, "step": 200690 }, { "epoch": 0.40542669796418024, "grad_norm": 373.034912109375, "learning_rate": 7.478809239197264e-06, "loss": 30.7924, "step": 200700 }, { "epoch": 0.40544689859686406, "grad_norm": 76.58561706542969, "learning_rate": 7.47850608394714e-06, "loss": 12.6703, "step": 200710 }, { "epoch": 0.4054670992295479, "grad_norm": 404.6880798339844, "learning_rate": 7.478202916617077e-06, "loss": 13.6554, "step": 200720 }, { "epoch": 0.4054872998622317, "grad_norm": 0.0, "learning_rate": 7.477899737208555e-06, "loss": 18.8262, "step": 200730 }, { "epoch": 0.4055075004949155, "grad_norm": 401.7245788574219, "learning_rate": 7.477596545723049e-06, "loss": 23.878, "step": 200740 }, { "epoch": 0.40552770112759934, "grad_norm": 239.3308563232422, "learning_rate": 7.477293342162038e-06, "loss": 20.9405, "step": 200750 }, { "epoch": 0.40554790176028316, "grad_norm": 178.26229858398438, "learning_rate": 7.476990126527e-06, "loss": 17.6077, "step": 200760 }, { "epoch": 0.4055681023929669, "grad_norm": 139.00753784179688, "learning_rate": 7.476686898819414e-06, "loss": 7.7755, "step": 200770 }, { "epoch": 0.40558830302565074, "grad_norm": 30.433666229248047, "learning_rate": 7.476383659040754e-06, "loss": 20.168, "step": 200780 }, { "epoch": 0.40560850365833456, "grad_norm": 251.48321533203125, "learning_rate": 7.476080407192502e-06, "loss": 15.128, "step": 200790 }, { "epoch": 0.4056287042910184, "grad_norm": 144.9492645263672, "learning_rate": 7.475777143276133e-06, "loss": 15.847, "step": 200800 }, { "epoch": 0.4056489049237022, "grad_norm": 441.35675048828125, "learning_rate": 7.475473867293127e-06, "loss": 24.0854, "step": 200810 }, { "epoch": 0.405669105556386, "grad_norm": 175.20635986328125, "learning_rate": 7.475170579244964e-06, "loss": 16.3253, "step": 200820 }, { "epoch": 0.40568930618906984, "grad_norm": 598.854248046875, "learning_rate": 7.474867279133115e-06, "loss": 25.5684, "step": 200830 }, { "epoch": 0.40570950682175366, "grad_norm": 392.0216979980469, "learning_rate": 7.474563966959068e-06, "loss": 16.0376, "step": 200840 }, { "epoch": 0.4057297074544375, "grad_norm": 52.39712905883789, "learning_rate": 7.4742606427242935e-06, "loss": 23.8842, "step": 200850 }, { "epoch": 0.4057499080871213, "grad_norm": 427.8211975097656, "learning_rate": 7.473957306430273e-06, "loss": 22.3839, "step": 200860 }, { "epoch": 0.4057701087198051, "grad_norm": 184.8780059814453, "learning_rate": 7.473653958078484e-06, "loss": 15.8045, "step": 200870 }, { "epoch": 0.40579030935248894, "grad_norm": 319.7393493652344, "learning_rate": 7.473350597670407e-06, "loss": 26.9051, "step": 200880 }, { "epoch": 0.40581050998517276, "grad_norm": 92.69673919677734, "learning_rate": 7.47304722520752e-06, "loss": 14.5637, "step": 200890 }, { "epoch": 0.4058307106178565, "grad_norm": 348.9046936035156, "learning_rate": 7.4727438406912986e-06, "loss": 19.9684, "step": 200900 }, { "epoch": 0.40585091125054035, "grad_norm": 448.63043212890625, "learning_rate": 7.472440444123224e-06, "loss": 19.9968, "step": 200910 }, { "epoch": 0.40587111188322417, "grad_norm": 91.20626831054688, "learning_rate": 7.472137035504776e-06, "loss": 11.94, "step": 200920 }, { "epoch": 0.405891312515908, "grad_norm": 265.0384521484375, "learning_rate": 7.471833614837431e-06, "loss": 19.9643, "step": 200930 }, { "epoch": 0.4059115131485918, "grad_norm": 150.7137451171875, "learning_rate": 7.471530182122668e-06, "loss": 25.3677, "step": 200940 }, { "epoch": 0.4059317137812756, "grad_norm": 402.5342712402344, "learning_rate": 7.471226737361968e-06, "loss": 18.9408, "step": 200950 }, { "epoch": 0.40595191441395945, "grad_norm": 295.8528747558594, "learning_rate": 7.470923280556808e-06, "loss": 22.684, "step": 200960 }, { "epoch": 0.40597211504664327, "grad_norm": 308.02191162109375, "learning_rate": 7.4706198117086685e-06, "loss": 9.8793, "step": 200970 }, { "epoch": 0.4059923156793271, "grad_norm": 319.5994873046875, "learning_rate": 7.4703163308190275e-06, "loss": 14.5145, "step": 200980 }, { "epoch": 0.4060125163120109, "grad_norm": 551.9739379882812, "learning_rate": 7.470012837889362e-06, "loss": 25.0849, "step": 200990 }, { "epoch": 0.4060327169446947, "grad_norm": 369.46575927734375, "learning_rate": 7.469709332921155e-06, "loss": 16.7495, "step": 201000 }, { "epoch": 0.40605291757737855, "grad_norm": 288.7779846191406, "learning_rate": 7.469405815915885e-06, "loss": 22.974, "step": 201010 }, { "epoch": 0.4060731182100623, "grad_norm": 560.9701538085938, "learning_rate": 7.469102286875029e-06, "loss": 23.4036, "step": 201020 }, { "epoch": 0.40609331884274613, "grad_norm": 337.5340576171875, "learning_rate": 7.46879874580007e-06, "loss": 17.0708, "step": 201030 }, { "epoch": 0.40611351947542995, "grad_norm": 427.7958984375, "learning_rate": 7.468495192692484e-06, "loss": 27.3462, "step": 201040 }, { "epoch": 0.40613372010811377, "grad_norm": 236.15048217773438, "learning_rate": 7.468191627553752e-06, "loss": 16.4916, "step": 201050 }, { "epoch": 0.4061539207407976, "grad_norm": 599.4652709960938, "learning_rate": 7.467888050385355e-06, "loss": 32.5534, "step": 201060 }, { "epoch": 0.4061741213734814, "grad_norm": 245.82057189941406, "learning_rate": 7.467584461188769e-06, "loss": 17.8026, "step": 201070 }, { "epoch": 0.40619432200616523, "grad_norm": 176.00413513183594, "learning_rate": 7.467280859965476e-06, "loss": 20.9547, "step": 201080 }, { "epoch": 0.40621452263884905, "grad_norm": 198.5925750732422, "learning_rate": 7.4669772467169555e-06, "loss": 18.6221, "step": 201090 }, { "epoch": 0.40623472327153287, "grad_norm": 227.9332733154297, "learning_rate": 7.4666736214446855e-06, "loss": 11.2847, "step": 201100 }, { "epoch": 0.4062549239042167, "grad_norm": 117.17353057861328, "learning_rate": 7.46636998415015e-06, "loss": 31.6461, "step": 201110 }, { "epoch": 0.4062751245369005, "grad_norm": 184.00904846191406, "learning_rate": 7.466066334834825e-06, "loss": 14.1346, "step": 201120 }, { "epoch": 0.40629532516958433, "grad_norm": 152.1755828857422, "learning_rate": 7.465762673500192e-06, "loss": 22.5796, "step": 201130 }, { "epoch": 0.40631552580226815, "grad_norm": 217.8818359375, "learning_rate": 7.465459000147731e-06, "loss": 16.6648, "step": 201140 }, { "epoch": 0.4063357264349519, "grad_norm": 254.74468994140625, "learning_rate": 7.46515531477892e-06, "loss": 17.0713, "step": 201150 }, { "epoch": 0.40635592706763574, "grad_norm": 58.350135803222656, "learning_rate": 7.464851617395244e-06, "loss": 11.6619, "step": 201160 }, { "epoch": 0.40637612770031956, "grad_norm": 229.7838592529297, "learning_rate": 7.464547907998179e-06, "loss": 22.2018, "step": 201170 }, { "epoch": 0.4063963283330034, "grad_norm": 292.2922058105469, "learning_rate": 7.464244186589206e-06, "loss": 17.7155, "step": 201180 }, { "epoch": 0.4064165289656872, "grad_norm": 680.5963134765625, "learning_rate": 7.4639404531698054e-06, "loss": 17.7068, "step": 201190 }, { "epoch": 0.406436729598371, "grad_norm": 380.7636413574219, "learning_rate": 7.463636707741458e-06, "loss": 22.7946, "step": 201200 }, { "epoch": 0.40645693023105484, "grad_norm": 199.4380645751953, "learning_rate": 7.463332950305646e-06, "loss": 21.9893, "step": 201210 }, { "epoch": 0.40647713086373866, "grad_norm": 483.46173095703125, "learning_rate": 7.463029180863846e-06, "loss": 20.2809, "step": 201220 }, { "epoch": 0.4064973314964225, "grad_norm": 680.7317504882812, "learning_rate": 7.462725399417541e-06, "loss": 35.4942, "step": 201230 }, { "epoch": 0.4065175321291063, "grad_norm": 479.9726257324219, "learning_rate": 7.4624216059682106e-06, "loss": 33.2931, "step": 201240 }, { "epoch": 0.4065377327617901, "grad_norm": 302.7505187988281, "learning_rate": 7.462117800517337e-06, "loss": 24.6516, "step": 201250 }, { "epoch": 0.40655793339447394, "grad_norm": 350.97198486328125, "learning_rate": 7.461813983066398e-06, "loss": 27.9183, "step": 201260 }, { "epoch": 0.40657813402715776, "grad_norm": 145.878662109375, "learning_rate": 7.461510153616879e-06, "loss": 12.231, "step": 201270 }, { "epoch": 0.4065983346598415, "grad_norm": 4.10416841506958, "learning_rate": 7.461206312170255e-06, "loss": 13.8297, "step": 201280 }, { "epoch": 0.40661853529252534, "grad_norm": 355.8040466308594, "learning_rate": 7.460902458728012e-06, "loss": 23.4143, "step": 201290 }, { "epoch": 0.40663873592520916, "grad_norm": 198.96527099609375, "learning_rate": 7.460598593291628e-06, "loss": 30.4962, "step": 201300 }, { "epoch": 0.406658936557893, "grad_norm": 217.9447784423828, "learning_rate": 7.460294715862586e-06, "loss": 22.8561, "step": 201310 }, { "epoch": 0.4066791371905768, "grad_norm": 449.6788635253906, "learning_rate": 7.459990826442366e-06, "loss": 26.5574, "step": 201320 }, { "epoch": 0.4066993378232606, "grad_norm": 277.52349853515625, "learning_rate": 7.459686925032446e-06, "loss": 21.5419, "step": 201330 }, { "epoch": 0.40671953845594444, "grad_norm": 186.83250427246094, "learning_rate": 7.459383011634314e-06, "loss": 14.2965, "step": 201340 }, { "epoch": 0.40673973908862826, "grad_norm": 412.6858215332031, "learning_rate": 7.459079086249445e-06, "loss": 29.0029, "step": 201350 }, { "epoch": 0.4067599397213121, "grad_norm": 775.7713012695312, "learning_rate": 7.458775148879325e-06, "loss": 25.5583, "step": 201360 }, { "epoch": 0.4067801403539959, "grad_norm": 289.3417663574219, "learning_rate": 7.458471199525431e-06, "loss": 25.4264, "step": 201370 }, { "epoch": 0.4068003409866797, "grad_norm": 370.2702941894531, "learning_rate": 7.458167238189249e-06, "loss": 29.1446, "step": 201380 }, { "epoch": 0.40682054161936354, "grad_norm": 248.77935791015625, "learning_rate": 7.457863264872256e-06, "loss": 12.6867, "step": 201390 }, { "epoch": 0.40684074225204736, "grad_norm": 0.0, "learning_rate": 7.4575592795759356e-06, "loss": 8.3878, "step": 201400 }, { "epoch": 0.4068609428847311, "grad_norm": 115.4716567993164, "learning_rate": 7.45725528230177e-06, "loss": 15.3985, "step": 201410 }, { "epoch": 0.40688114351741494, "grad_norm": 382.2156982421875, "learning_rate": 7.456951273051239e-06, "loss": 15.2451, "step": 201420 }, { "epoch": 0.40690134415009876, "grad_norm": 326.85577392578125, "learning_rate": 7.456647251825828e-06, "loss": 22.7506, "step": 201430 }, { "epoch": 0.4069215447827826, "grad_norm": 76.8209457397461, "learning_rate": 7.4563432186270135e-06, "loss": 10.5146, "step": 201440 }, { "epoch": 0.4069417454154664, "grad_norm": 62.84104919433594, "learning_rate": 7.456039173456282e-06, "loss": 16.7703, "step": 201450 }, { "epoch": 0.4069619460481502, "grad_norm": 364.99658203125, "learning_rate": 7.455735116315113e-06, "loss": 18.1091, "step": 201460 }, { "epoch": 0.40698214668083404, "grad_norm": 49.99381637573242, "learning_rate": 7.455431047204988e-06, "loss": 14.7735, "step": 201470 }, { "epoch": 0.40700234731351786, "grad_norm": 210.0736846923828, "learning_rate": 7.455126966127392e-06, "loss": 15.806, "step": 201480 }, { "epoch": 0.4070225479462017, "grad_norm": 127.91079711914062, "learning_rate": 7.4548228730838025e-06, "loss": 14.0307, "step": 201490 }, { "epoch": 0.4070427485788855, "grad_norm": 363.0612487792969, "learning_rate": 7.454518768075705e-06, "loss": 18.7536, "step": 201500 }, { "epoch": 0.4070629492115693, "grad_norm": 196.97535705566406, "learning_rate": 7.454214651104581e-06, "loss": 10.0935, "step": 201510 }, { "epoch": 0.40708314984425314, "grad_norm": 429.4825439453125, "learning_rate": 7.453910522171912e-06, "loss": 17.0095, "step": 201520 }, { "epoch": 0.40710335047693696, "grad_norm": 295.8457946777344, "learning_rate": 7.453606381279181e-06, "loss": 18.0869, "step": 201530 }, { "epoch": 0.40712355110962073, "grad_norm": 327.6145324707031, "learning_rate": 7.45330222842787e-06, "loss": 26.0005, "step": 201540 }, { "epoch": 0.40714375174230455, "grad_norm": 560.4935913085938, "learning_rate": 7.452998063619461e-06, "loss": 11.0935, "step": 201550 }, { "epoch": 0.40716395237498837, "grad_norm": 384.98907470703125, "learning_rate": 7.452693886855438e-06, "loss": 19.3453, "step": 201560 }, { "epoch": 0.4071841530076722, "grad_norm": 250.84930419921875, "learning_rate": 7.452389698137281e-06, "loss": 37.6146, "step": 201570 }, { "epoch": 0.407204353640356, "grad_norm": 28.037702560424805, "learning_rate": 7.452085497466476e-06, "loss": 14.4218, "step": 201580 }, { "epoch": 0.40722455427303983, "grad_norm": 189.10501098632812, "learning_rate": 7.451781284844503e-06, "loss": 7.5377, "step": 201590 }, { "epoch": 0.40724475490572365, "grad_norm": 130.6306610107422, "learning_rate": 7.451477060272844e-06, "loss": 18.1011, "step": 201600 }, { "epoch": 0.40726495553840747, "grad_norm": 49.98479080200195, "learning_rate": 7.4511728237529845e-06, "loss": 22.9648, "step": 201610 }, { "epoch": 0.4072851561710913, "grad_norm": 547.0013427734375, "learning_rate": 7.450868575286405e-06, "loss": 28.3463, "step": 201620 }, { "epoch": 0.4073053568037751, "grad_norm": 218.30516052246094, "learning_rate": 7.450564314874591e-06, "loss": 21.936, "step": 201630 }, { "epoch": 0.40732555743645893, "grad_norm": 318.4761962890625, "learning_rate": 7.450260042519022e-06, "loss": 21.4097, "step": 201640 }, { "epoch": 0.40734575806914275, "grad_norm": 125.58501434326172, "learning_rate": 7.449955758221184e-06, "loss": 16.0479, "step": 201650 }, { "epoch": 0.4073659587018265, "grad_norm": 248.72921752929688, "learning_rate": 7.449651461982559e-06, "loss": 32.3734, "step": 201660 }, { "epoch": 0.40738615933451033, "grad_norm": 267.4306640625, "learning_rate": 7.449347153804628e-06, "loss": 18.6635, "step": 201670 }, { "epoch": 0.40740635996719415, "grad_norm": 297.6438903808594, "learning_rate": 7.4490428336888775e-06, "loss": 29.2152, "step": 201680 }, { "epoch": 0.407426560599878, "grad_norm": 296.4525451660156, "learning_rate": 7.4487385016367885e-06, "loss": 44.6551, "step": 201690 }, { "epoch": 0.4074467612325618, "grad_norm": 268.97021484375, "learning_rate": 7.448434157649846e-06, "loss": 24.2784, "step": 201700 }, { "epoch": 0.4074669618652456, "grad_norm": 310.8606262207031, "learning_rate": 7.448129801729531e-06, "loss": 26.5189, "step": 201710 }, { "epoch": 0.40748716249792943, "grad_norm": 309.08660888671875, "learning_rate": 7.447825433877329e-06, "loss": 15.8835, "step": 201720 }, { "epoch": 0.40750736313061325, "grad_norm": 334.3938293457031, "learning_rate": 7.447521054094723e-06, "loss": 15.521, "step": 201730 }, { "epoch": 0.4075275637632971, "grad_norm": 208.1516876220703, "learning_rate": 7.447216662383196e-06, "loss": 30.1816, "step": 201740 }, { "epoch": 0.4075477643959809, "grad_norm": 279.76397705078125, "learning_rate": 7.446912258744232e-06, "loss": 21.2723, "step": 201750 }, { "epoch": 0.4075679650286647, "grad_norm": 446.3786315917969, "learning_rate": 7.446607843179314e-06, "loss": 25.6034, "step": 201760 }, { "epoch": 0.40758816566134853, "grad_norm": 0.0, "learning_rate": 7.446303415689927e-06, "loss": 19.0322, "step": 201770 }, { "epoch": 0.40760836629403235, "grad_norm": 144.20736694335938, "learning_rate": 7.4459989762775516e-06, "loss": 27.8846, "step": 201780 }, { "epoch": 0.4076285669267161, "grad_norm": 208.34288024902344, "learning_rate": 7.445694524943677e-06, "loss": 15.4608, "step": 201790 }, { "epoch": 0.40764876755939994, "grad_norm": 333.7382507324219, "learning_rate": 7.445390061689782e-06, "loss": 13.5286, "step": 201800 }, { "epoch": 0.40766896819208376, "grad_norm": 278.0628967285156, "learning_rate": 7.4450855865173534e-06, "loss": 20.9035, "step": 201810 }, { "epoch": 0.4076891688247676, "grad_norm": 276.7947692871094, "learning_rate": 7.444781099427873e-06, "loss": 18.9411, "step": 201820 }, { "epoch": 0.4077093694574514, "grad_norm": 305.7211608886719, "learning_rate": 7.444476600422827e-06, "loss": 17.1297, "step": 201830 }, { "epoch": 0.4077295700901352, "grad_norm": 242.77749633789062, "learning_rate": 7.444172089503698e-06, "loss": 9.1844, "step": 201840 }, { "epoch": 0.40774977072281904, "grad_norm": 40.39383316040039, "learning_rate": 7.443867566671971e-06, "loss": 6.9335, "step": 201850 }, { "epoch": 0.40776997135550286, "grad_norm": 207.13015747070312, "learning_rate": 7.4435630319291295e-06, "loss": 19.1076, "step": 201860 }, { "epoch": 0.4077901719881867, "grad_norm": 106.8858413696289, "learning_rate": 7.4432584852766575e-06, "loss": 17.5557, "step": 201870 }, { "epoch": 0.4078103726208705, "grad_norm": 402.64410400390625, "learning_rate": 7.442953926716042e-06, "loss": 28.3276, "step": 201880 }, { "epoch": 0.4078305732535543, "grad_norm": 416.41033935546875, "learning_rate": 7.442649356248765e-06, "loss": 23.3642, "step": 201890 }, { "epoch": 0.40785077388623814, "grad_norm": 272.0313720703125, "learning_rate": 7.44234477387631e-06, "loss": 14.0464, "step": 201900 }, { "epoch": 0.40787097451892196, "grad_norm": 143.98280334472656, "learning_rate": 7.442040179600163e-06, "loss": 12.7754, "step": 201910 }, { "epoch": 0.4078911751516057, "grad_norm": 1199.779052734375, "learning_rate": 7.4417355734218085e-06, "loss": 43.9639, "step": 201920 }, { "epoch": 0.40791137578428954, "grad_norm": 0.0, "learning_rate": 7.441430955342733e-06, "loss": 12.5379, "step": 201930 }, { "epoch": 0.40793157641697336, "grad_norm": 0.0, "learning_rate": 7.441126325364415e-06, "loss": 14.4754, "step": 201940 }, { "epoch": 0.4079517770496572, "grad_norm": 384.8746032714844, "learning_rate": 7.440821683488346e-06, "loss": 24.8033, "step": 201950 }, { "epoch": 0.407971977682341, "grad_norm": 179.961669921875, "learning_rate": 7.440517029716008e-06, "loss": 23.1304, "step": 201960 }, { "epoch": 0.4079921783150248, "grad_norm": 235.22872924804688, "learning_rate": 7.4402123640488855e-06, "loss": 21.3821, "step": 201970 }, { "epoch": 0.40801237894770864, "grad_norm": 35.12517547607422, "learning_rate": 7.439907686488463e-06, "loss": 15.5233, "step": 201980 }, { "epoch": 0.40803257958039246, "grad_norm": 825.1240234375, "learning_rate": 7.439602997036229e-06, "loss": 14.8238, "step": 201990 }, { "epoch": 0.4080527802130763, "grad_norm": 322.73779296875, "learning_rate": 7.4392982956936644e-06, "loss": 27.556, "step": 202000 }, { "epoch": 0.4080729808457601, "grad_norm": 0.0, "learning_rate": 7.438993582462255e-06, "loss": 33.179, "step": 202010 }, { "epoch": 0.4080931814784439, "grad_norm": 223.98135375976562, "learning_rate": 7.438688857343488e-06, "loss": 16.7736, "step": 202020 }, { "epoch": 0.40811338211112774, "grad_norm": 475.3682861328125, "learning_rate": 7.438384120338846e-06, "loss": 14.4623, "step": 202030 }, { "epoch": 0.40813358274381156, "grad_norm": 317.32476806640625, "learning_rate": 7.4380793714498175e-06, "loss": 11.4131, "step": 202040 }, { "epoch": 0.4081537833764953, "grad_norm": 456.9071350097656, "learning_rate": 7.437774610677884e-06, "loss": 29.8613, "step": 202050 }, { "epoch": 0.40817398400917915, "grad_norm": 159.4449920654297, "learning_rate": 7.437469838024534e-06, "loss": 16.0196, "step": 202060 }, { "epoch": 0.40819418464186297, "grad_norm": 359.5242004394531, "learning_rate": 7.437165053491251e-06, "loss": 17.7385, "step": 202070 }, { "epoch": 0.4082143852745468, "grad_norm": 48.9443473815918, "learning_rate": 7.43686025707952e-06, "loss": 13.7438, "step": 202080 }, { "epoch": 0.4082345859072306, "grad_norm": 522.9203491210938, "learning_rate": 7.43655544879083e-06, "loss": 10.3101, "step": 202090 }, { "epoch": 0.4082547865399144, "grad_norm": 175.78424072265625, "learning_rate": 7.436250628626662e-06, "loss": 16.092, "step": 202100 }, { "epoch": 0.40827498717259825, "grad_norm": 454.2933349609375, "learning_rate": 7.4359457965885066e-06, "loss": 23.8741, "step": 202110 }, { "epoch": 0.40829518780528207, "grad_norm": 429.9792785644531, "learning_rate": 7.435640952677844e-06, "loss": 23.4487, "step": 202120 }, { "epoch": 0.4083153884379659, "grad_norm": 469.8943176269531, "learning_rate": 7.435336096896164e-06, "loss": 32.5806, "step": 202130 }, { "epoch": 0.4083355890706497, "grad_norm": 6.043949127197266, "learning_rate": 7.435031229244951e-06, "loss": 22.1368, "step": 202140 }, { "epoch": 0.4083557897033335, "grad_norm": 267.8232116699219, "learning_rate": 7.434726349725692e-06, "loss": 27.6577, "step": 202150 }, { "epoch": 0.40837599033601735, "grad_norm": 174.5907745361328, "learning_rate": 7.434421458339871e-06, "loss": 20.5765, "step": 202160 }, { "epoch": 0.40839619096870117, "grad_norm": 298.5137023925781, "learning_rate": 7.434116555088975e-06, "loss": 24.6516, "step": 202170 }, { "epoch": 0.40841639160138493, "grad_norm": 348.0345764160156, "learning_rate": 7.4338116399744905e-06, "loss": 21.3567, "step": 202180 }, { "epoch": 0.40843659223406875, "grad_norm": 347.6243591308594, "learning_rate": 7.433506712997903e-06, "loss": 27.7789, "step": 202190 }, { "epoch": 0.40845679286675257, "grad_norm": 410.5999755859375, "learning_rate": 7.433201774160701e-06, "loss": 17.546, "step": 202200 }, { "epoch": 0.4084769934994364, "grad_norm": 319.9814758300781, "learning_rate": 7.432896823464366e-06, "loss": 14.433, "step": 202210 }, { "epoch": 0.4084971941321202, "grad_norm": 311.9885559082031, "learning_rate": 7.432591860910389e-06, "loss": 29.9081, "step": 202220 }, { "epoch": 0.40851739476480403, "grad_norm": 432.7333679199219, "learning_rate": 7.432286886500253e-06, "loss": 26.9117, "step": 202230 }, { "epoch": 0.40853759539748785, "grad_norm": 507.9525146484375, "learning_rate": 7.431981900235446e-06, "loss": 20.8449, "step": 202240 }, { "epoch": 0.40855779603017167, "grad_norm": 213.31509399414062, "learning_rate": 7.431676902117453e-06, "loss": 15.625, "step": 202250 }, { "epoch": 0.4085779966628555, "grad_norm": 324.9443359375, "learning_rate": 7.431371892147763e-06, "loss": 21.3856, "step": 202260 }, { "epoch": 0.4085981972955393, "grad_norm": 10.106415748596191, "learning_rate": 7.431066870327861e-06, "loss": 19.3375, "step": 202270 }, { "epoch": 0.40861839792822313, "grad_norm": 1494.6552734375, "learning_rate": 7.430761836659235e-06, "loss": 26.8062, "step": 202280 }, { "epoch": 0.40863859856090695, "grad_norm": 310.5498962402344, "learning_rate": 7.430456791143369e-06, "loss": 29.5094, "step": 202290 }, { "epoch": 0.4086587991935907, "grad_norm": 302.2179260253906, "learning_rate": 7.430151733781752e-06, "loss": 14.8611, "step": 202300 }, { "epoch": 0.40867899982627454, "grad_norm": 138.7933807373047, "learning_rate": 7.42984666457587e-06, "loss": 10.3499, "step": 202310 }, { "epoch": 0.40869920045895836, "grad_norm": 171.27093505859375, "learning_rate": 7.42954158352721e-06, "loss": 34.8057, "step": 202320 }, { "epoch": 0.4087194010916422, "grad_norm": 165.5064239501953, "learning_rate": 7.42923649063726e-06, "loss": 17.7142, "step": 202330 }, { "epoch": 0.408739601724326, "grad_norm": 515.0400390625, "learning_rate": 7.428931385907505e-06, "loss": 23.6431, "step": 202340 }, { "epoch": 0.4087598023570098, "grad_norm": 187.26495361328125, "learning_rate": 7.428626269339433e-06, "loss": 19.025, "step": 202350 }, { "epoch": 0.40878000298969364, "grad_norm": 309.9441223144531, "learning_rate": 7.428321140934532e-06, "loss": 31.6632, "step": 202360 }, { "epoch": 0.40880020362237746, "grad_norm": 280.3335266113281, "learning_rate": 7.428016000694287e-06, "loss": 15.2578, "step": 202370 }, { "epoch": 0.4088204042550613, "grad_norm": 0.0, "learning_rate": 7.427710848620188e-06, "loss": 15.6843, "step": 202380 }, { "epoch": 0.4088406048877451, "grad_norm": 415.28118896484375, "learning_rate": 7.4274056847137185e-06, "loss": 34.8269, "step": 202390 }, { "epoch": 0.4088608055204289, "grad_norm": 103.96068572998047, "learning_rate": 7.42710050897637e-06, "loss": 40.055, "step": 202400 }, { "epoch": 0.40888100615311274, "grad_norm": 191.7279510498047, "learning_rate": 7.426795321409628e-06, "loss": 16.3247, "step": 202410 }, { "epoch": 0.40890120678579656, "grad_norm": 350.35870361328125, "learning_rate": 7.42649012201498e-06, "loss": 28.8513, "step": 202420 }, { "epoch": 0.4089214074184803, "grad_norm": 285.3521423339844, "learning_rate": 7.426184910793914e-06, "loss": 10.1632, "step": 202430 }, { "epoch": 0.40894160805116414, "grad_norm": 297.0989074707031, "learning_rate": 7.425879687747915e-06, "loss": 17.4253, "step": 202440 }, { "epoch": 0.40896180868384796, "grad_norm": 175.1661376953125, "learning_rate": 7.425574452878474e-06, "loss": 21.7561, "step": 202450 }, { "epoch": 0.4089820093165318, "grad_norm": 417.1772766113281, "learning_rate": 7.425269206187076e-06, "loss": 13.9367, "step": 202460 }, { "epoch": 0.4090022099492156, "grad_norm": 279.9949951171875, "learning_rate": 7.42496394767521e-06, "loss": 11.3857, "step": 202470 }, { "epoch": 0.4090224105818994, "grad_norm": 123.1187744140625, "learning_rate": 7.424658677344365e-06, "loss": 17.1499, "step": 202480 }, { "epoch": 0.40904261121458324, "grad_norm": 189.03054809570312, "learning_rate": 7.424353395196029e-06, "loss": 11.5167, "step": 202490 }, { "epoch": 0.40906281184726706, "grad_norm": 359.3128662109375, "learning_rate": 7.424048101231687e-06, "loss": 25.0759, "step": 202500 }, { "epoch": 0.4090830124799509, "grad_norm": 357.05609130859375, "learning_rate": 7.423742795452827e-06, "loss": 17.5194, "step": 202510 }, { "epoch": 0.4091032131126347, "grad_norm": 372.687255859375, "learning_rate": 7.423437477860941e-06, "loss": 16.4191, "step": 202520 }, { "epoch": 0.4091234137453185, "grad_norm": 172.88124084472656, "learning_rate": 7.423132148457512e-06, "loss": 14.5427, "step": 202530 }, { "epoch": 0.40914361437800234, "grad_norm": 137.24090576171875, "learning_rate": 7.422826807244034e-06, "loss": 14.1777, "step": 202540 }, { "epoch": 0.40916381501068616, "grad_norm": 389.3085632324219, "learning_rate": 7.42252145422199e-06, "loss": 23.6549, "step": 202550 }, { "epoch": 0.4091840156433699, "grad_norm": 264.5079040527344, "learning_rate": 7.422216089392872e-06, "loss": 19.855, "step": 202560 }, { "epoch": 0.40920421627605374, "grad_norm": 200.31024169921875, "learning_rate": 7.421910712758165e-06, "loss": 17.6888, "step": 202570 }, { "epoch": 0.40922441690873756, "grad_norm": 335.3602600097656, "learning_rate": 7.421605324319359e-06, "loss": 22.6221, "step": 202580 }, { "epoch": 0.4092446175414214, "grad_norm": 560.1017456054688, "learning_rate": 7.421299924077943e-06, "loss": 40.1226, "step": 202590 }, { "epoch": 0.4092648181741052, "grad_norm": 83.44961547851562, "learning_rate": 7.4209945120354045e-06, "loss": 17.057, "step": 202600 }, { "epoch": 0.409285018806789, "grad_norm": 167.87405395507812, "learning_rate": 7.420689088193232e-06, "loss": 24.7383, "step": 202610 }, { "epoch": 0.40930521943947284, "grad_norm": 368.8326721191406, "learning_rate": 7.420383652552915e-06, "loss": 16.5248, "step": 202620 }, { "epoch": 0.40932542007215666, "grad_norm": 696.7982788085938, "learning_rate": 7.420078205115942e-06, "loss": 23.3668, "step": 202630 }, { "epoch": 0.4093456207048405, "grad_norm": 506.15008544921875, "learning_rate": 7.4197727458837995e-06, "loss": 19.1929, "step": 202640 }, { "epoch": 0.4093658213375243, "grad_norm": 387.203125, "learning_rate": 7.419467274857981e-06, "loss": 12.44, "step": 202650 }, { "epoch": 0.4093860219702081, "grad_norm": 405.4519348144531, "learning_rate": 7.419161792039969e-06, "loss": 21.3904, "step": 202660 }, { "epoch": 0.40940622260289194, "grad_norm": 550.1858520507812, "learning_rate": 7.41885629743126e-06, "loss": 29.7676, "step": 202670 }, { "epoch": 0.40942642323557576, "grad_norm": 210.41612243652344, "learning_rate": 7.418550791033335e-06, "loss": 26.285, "step": 202680 }, { "epoch": 0.40944662386825953, "grad_norm": 156.86526489257812, "learning_rate": 7.418245272847688e-06, "loss": 23.6969, "step": 202690 }, { "epoch": 0.40946682450094335, "grad_norm": 403.2210998535156, "learning_rate": 7.4179397428758085e-06, "loss": 25.2275, "step": 202700 }, { "epoch": 0.40948702513362717, "grad_norm": 108.23860168457031, "learning_rate": 7.4176342011191816e-06, "loss": 12.1721, "step": 202710 }, { "epoch": 0.409507225766311, "grad_norm": 360.3715515136719, "learning_rate": 7.417328647579301e-06, "loss": 15.3639, "step": 202720 }, { "epoch": 0.4095274263989948, "grad_norm": 405.38720703125, "learning_rate": 7.417023082257653e-06, "loss": 21.6179, "step": 202730 }, { "epoch": 0.40954762703167863, "grad_norm": 313.8208312988281, "learning_rate": 7.416717505155726e-06, "loss": 22.6346, "step": 202740 }, { "epoch": 0.40956782766436245, "grad_norm": 159.28472900390625, "learning_rate": 7.416411916275012e-06, "loss": 12.5881, "step": 202750 }, { "epoch": 0.40958802829704627, "grad_norm": 246.0618133544922, "learning_rate": 7.416106315617e-06, "loss": 12.7646, "step": 202760 }, { "epoch": 0.4096082289297301, "grad_norm": 211.38795471191406, "learning_rate": 7.415800703183179e-06, "loss": 16.5058, "step": 202770 }, { "epoch": 0.4096284295624139, "grad_norm": 437.5416259765625, "learning_rate": 7.415495078975038e-06, "loss": 22.9677, "step": 202780 }, { "epoch": 0.40964863019509773, "grad_norm": 424.41790771484375, "learning_rate": 7.415189442994066e-06, "loss": 10.9922, "step": 202790 }, { "epoch": 0.40966883082778155, "grad_norm": 61.68048095703125, "learning_rate": 7.414883795241754e-06, "loss": 22.2669, "step": 202800 }, { "epoch": 0.40968903146046537, "grad_norm": 218.9053192138672, "learning_rate": 7.4145781357195936e-06, "loss": 15.2872, "step": 202810 }, { "epoch": 0.40970923209314913, "grad_norm": 0.0, "learning_rate": 7.414272464429068e-06, "loss": 14.0027, "step": 202820 }, { "epoch": 0.40972943272583295, "grad_norm": 499.3514404296875, "learning_rate": 7.413966781371676e-06, "loss": 20.4586, "step": 202830 }, { "epoch": 0.4097496333585168, "grad_norm": 962.5541381835938, "learning_rate": 7.413661086548899e-06, "loss": 20.8497, "step": 202840 }, { "epoch": 0.4097698339912006, "grad_norm": 300.2274169921875, "learning_rate": 7.413355379962231e-06, "loss": 31.6723, "step": 202850 }, { "epoch": 0.4097900346238844, "grad_norm": 315.60491943359375, "learning_rate": 7.413049661613163e-06, "loss": 12.5624, "step": 202860 }, { "epoch": 0.40981023525656823, "grad_norm": 416.9862365722656, "learning_rate": 7.412743931503182e-06, "loss": 12.1045, "step": 202870 }, { "epoch": 0.40983043588925205, "grad_norm": 285.10101318359375, "learning_rate": 7.412438189633781e-06, "loss": 15.6862, "step": 202880 }, { "epoch": 0.4098506365219359, "grad_norm": 152.4774627685547, "learning_rate": 7.412132436006449e-06, "loss": 12.9348, "step": 202890 }, { "epoch": 0.4098708371546197, "grad_norm": 315.7986755371094, "learning_rate": 7.411826670622676e-06, "loss": 18.6132, "step": 202900 }, { "epoch": 0.4098910377873035, "grad_norm": 315.5300598144531, "learning_rate": 7.411520893483952e-06, "loss": 9.2022, "step": 202910 }, { "epoch": 0.40991123841998733, "grad_norm": 248.45164489746094, "learning_rate": 7.411215104591767e-06, "loss": 9.4805, "step": 202920 }, { "epoch": 0.40993143905267115, "grad_norm": 67.37311553955078, "learning_rate": 7.410909303947613e-06, "loss": 21.1455, "step": 202930 }, { "epoch": 0.4099516396853549, "grad_norm": 129.3515625, "learning_rate": 7.4106034915529786e-06, "loss": 26.2065, "step": 202940 }, { "epoch": 0.40997184031803874, "grad_norm": 6.785626411437988, "learning_rate": 7.410297667409356e-06, "loss": 15.8955, "step": 202950 }, { "epoch": 0.40999204095072256, "grad_norm": 115.67985534667969, "learning_rate": 7.409991831518235e-06, "loss": 22.6355, "step": 202960 }, { "epoch": 0.4100122415834064, "grad_norm": 75.98882293701172, "learning_rate": 7.409685983881107e-06, "loss": 17.0622, "step": 202970 }, { "epoch": 0.4100324422160902, "grad_norm": 713.0682983398438, "learning_rate": 7.409380124499459e-06, "loss": 28.8033, "step": 202980 }, { "epoch": 0.410052642848774, "grad_norm": 442.6023254394531, "learning_rate": 7.409074253374786e-06, "loss": 16.5796, "step": 202990 }, { "epoch": 0.41007284348145784, "grad_norm": 310.48883056640625, "learning_rate": 7.408768370508577e-06, "loss": 20.5485, "step": 203000 }, { "epoch": 0.41009304411414166, "grad_norm": 345.45928955078125, "learning_rate": 7.408462475902324e-06, "loss": 23.7525, "step": 203010 }, { "epoch": 0.4101132447468255, "grad_norm": 612.8237915039062, "learning_rate": 7.408156569557515e-06, "loss": 23.3818, "step": 203020 }, { "epoch": 0.4101334453795093, "grad_norm": 573.7870483398438, "learning_rate": 7.407850651475645e-06, "loss": 28.077, "step": 203030 }, { "epoch": 0.4101536460121931, "grad_norm": 407.9990539550781, "learning_rate": 7.407544721658203e-06, "loss": 19.9041, "step": 203040 }, { "epoch": 0.41017384664487694, "grad_norm": 8.264093399047852, "learning_rate": 7.407238780106679e-06, "loss": 9.8761, "step": 203050 }, { "epoch": 0.41019404727756076, "grad_norm": 176.06838989257812, "learning_rate": 7.406932826822565e-06, "loss": 20.9991, "step": 203060 }, { "epoch": 0.4102142479102445, "grad_norm": 329.43524169921875, "learning_rate": 7.406626861807352e-06, "loss": 23.1037, "step": 203070 }, { "epoch": 0.41023444854292834, "grad_norm": 257.8447570800781, "learning_rate": 7.406320885062532e-06, "loss": 13.4388, "step": 203080 }, { "epoch": 0.41025464917561216, "grad_norm": 149.50701904296875, "learning_rate": 7.406014896589597e-06, "loss": 11.3881, "step": 203090 }, { "epoch": 0.410274849808296, "grad_norm": 300.67462158203125, "learning_rate": 7.405708896390037e-06, "loss": 30.4883, "step": 203100 }, { "epoch": 0.4102950504409798, "grad_norm": 261.0084533691406, "learning_rate": 7.405402884465342e-06, "loss": 14.3874, "step": 203110 }, { "epoch": 0.4103152510736636, "grad_norm": 454.1365966796875, "learning_rate": 7.405096860817007e-06, "loss": 28.2591, "step": 203120 }, { "epoch": 0.41033545170634744, "grad_norm": 99.49092864990234, "learning_rate": 7.404790825446522e-06, "loss": 21.5628, "step": 203130 }, { "epoch": 0.41035565233903126, "grad_norm": 102.48304748535156, "learning_rate": 7.404484778355374e-06, "loss": 22.1249, "step": 203140 }, { "epoch": 0.4103758529717151, "grad_norm": 0.0, "learning_rate": 7.404178719545063e-06, "loss": 13.7441, "step": 203150 }, { "epoch": 0.4103960536043989, "grad_norm": 394.6719665527344, "learning_rate": 7.403872649017074e-06, "loss": 11.3515, "step": 203160 }, { "epoch": 0.4104162542370827, "grad_norm": 0.0, "learning_rate": 7.403566566772904e-06, "loss": 37.7648, "step": 203170 }, { "epoch": 0.41043645486976654, "grad_norm": 358.02703857421875, "learning_rate": 7.40326047281404e-06, "loss": 25.9071, "step": 203180 }, { "epoch": 0.41045665550245036, "grad_norm": 522.4255981445312, "learning_rate": 7.4029543671419765e-06, "loss": 36.1616, "step": 203190 }, { "epoch": 0.4104768561351341, "grad_norm": 524.76416015625, "learning_rate": 7.402648249758204e-06, "loss": 13.6104, "step": 203200 }, { "epoch": 0.41049705676781795, "grad_norm": 531.1576538085938, "learning_rate": 7.4023421206642176e-06, "loss": 25.5932, "step": 203210 }, { "epoch": 0.41051725740050177, "grad_norm": 402.12225341796875, "learning_rate": 7.402035979861505e-06, "loss": 19.942, "step": 203220 }, { "epoch": 0.4105374580331856, "grad_norm": 914.9557495117188, "learning_rate": 7.401729827351561e-06, "loss": 18.3288, "step": 203230 }, { "epoch": 0.4105576586658694, "grad_norm": 291.556884765625, "learning_rate": 7.401423663135877e-06, "loss": 15.2257, "step": 203240 }, { "epoch": 0.4105778592985532, "grad_norm": 168.8301544189453, "learning_rate": 7.401117487215945e-06, "loss": 19.1808, "step": 203250 }, { "epoch": 0.41059805993123705, "grad_norm": 205.13941955566406, "learning_rate": 7.400811299593259e-06, "loss": 26.6728, "step": 203260 }, { "epoch": 0.41061826056392087, "grad_norm": 240.63104248046875, "learning_rate": 7.400505100269307e-06, "loss": 14.6706, "step": 203270 }, { "epoch": 0.4106384611966047, "grad_norm": 235.5559844970703, "learning_rate": 7.4001988892455875e-06, "loss": 25.0444, "step": 203280 }, { "epoch": 0.4106586618292885, "grad_norm": 327.7813415527344, "learning_rate": 7.399892666523588e-06, "loss": 14.1107, "step": 203290 }, { "epoch": 0.4106788624619723, "grad_norm": 326.0201416015625, "learning_rate": 7.3995864321048036e-06, "loss": 20.7706, "step": 203300 }, { "epoch": 0.41069906309465615, "grad_norm": 610.1513061523438, "learning_rate": 7.399280185990726e-06, "loss": 27.6324, "step": 203310 }, { "epoch": 0.41071926372733997, "grad_norm": 312.6311950683594, "learning_rate": 7.3989739281828466e-06, "loss": 26.6213, "step": 203320 }, { "epoch": 0.41073946436002373, "grad_norm": 127.47289276123047, "learning_rate": 7.39866765868266e-06, "loss": 15.1707, "step": 203330 }, { "epoch": 0.41075966499270755, "grad_norm": 62.87796401977539, "learning_rate": 7.398361377491659e-06, "loss": 17.7179, "step": 203340 }, { "epoch": 0.41077986562539137, "grad_norm": 5.42194938659668, "learning_rate": 7.398055084611333e-06, "loss": 14.6872, "step": 203350 }, { "epoch": 0.4108000662580752, "grad_norm": 202.76097106933594, "learning_rate": 7.397748780043179e-06, "loss": 14.3895, "step": 203360 }, { "epoch": 0.410820266890759, "grad_norm": 138.66746520996094, "learning_rate": 7.3974424637886886e-06, "loss": 14.1814, "step": 203370 }, { "epoch": 0.41084046752344283, "grad_norm": 496.3817443847656, "learning_rate": 7.397136135849354e-06, "loss": 13.4596, "step": 203380 }, { "epoch": 0.41086066815612665, "grad_norm": 68.78467559814453, "learning_rate": 7.3968297962266696e-06, "loss": 28.7486, "step": 203390 }, { "epoch": 0.41088086878881047, "grad_norm": 81.5542221069336, "learning_rate": 7.396523444922126e-06, "loss": 12.904, "step": 203400 }, { "epoch": 0.4109010694214943, "grad_norm": 289.6869812011719, "learning_rate": 7.396217081937218e-06, "loss": 11.475, "step": 203410 }, { "epoch": 0.4109212700541781, "grad_norm": 87.90916442871094, "learning_rate": 7.395910707273439e-06, "loss": 11.007, "step": 203420 }, { "epoch": 0.41094147068686193, "grad_norm": 432.1829833984375, "learning_rate": 7.395604320932281e-06, "loss": 11.0307, "step": 203430 }, { "epoch": 0.41096167131954575, "grad_norm": 83.71272277832031, "learning_rate": 7.39529792291524e-06, "loss": 16.5259, "step": 203440 }, { "epoch": 0.41098187195222957, "grad_norm": 87.71505737304688, "learning_rate": 7.394991513223806e-06, "loss": 30.9097, "step": 203450 }, { "epoch": 0.41100207258491334, "grad_norm": 232.15628051757812, "learning_rate": 7.394685091859474e-06, "loss": 23.0117, "step": 203460 }, { "epoch": 0.41102227321759716, "grad_norm": 385.1955261230469, "learning_rate": 7.394378658823738e-06, "loss": 11.9156, "step": 203470 }, { "epoch": 0.411042473850281, "grad_norm": 325.1283264160156, "learning_rate": 7.3940722141180885e-06, "loss": 15.1534, "step": 203480 }, { "epoch": 0.4110626744829648, "grad_norm": 526.6800537109375, "learning_rate": 7.393765757744025e-06, "loss": 14.89, "step": 203490 }, { "epoch": 0.4110828751156486, "grad_norm": 270.0126647949219, "learning_rate": 7.393459289703035e-06, "loss": 9.0824, "step": 203500 }, { "epoch": 0.41110307574833244, "grad_norm": 268.830078125, "learning_rate": 7.393152809996616e-06, "loss": 19.276, "step": 203510 }, { "epoch": 0.41112327638101626, "grad_norm": 227.9228057861328, "learning_rate": 7.392846318626259e-06, "loss": 21.2644, "step": 203520 }, { "epoch": 0.4111434770137001, "grad_norm": 380.7354736328125, "learning_rate": 7.392539815593459e-06, "loss": 12.7125, "step": 203530 }, { "epoch": 0.4111636776463839, "grad_norm": 374.5453186035156, "learning_rate": 7.392233300899712e-06, "loss": 12.81, "step": 203540 }, { "epoch": 0.4111838782790677, "grad_norm": 351.98895263671875, "learning_rate": 7.391926774546509e-06, "loss": 22.2162, "step": 203550 }, { "epoch": 0.41120407891175154, "grad_norm": 487.41864013671875, "learning_rate": 7.391620236535345e-06, "loss": 18.4534, "step": 203560 }, { "epoch": 0.41122427954443536, "grad_norm": 340.775634765625, "learning_rate": 7.391313686867715e-06, "loss": 23.9022, "step": 203570 }, { "epoch": 0.4112444801771191, "grad_norm": 235.37982177734375, "learning_rate": 7.391007125545111e-06, "loss": 16.9134, "step": 203580 }, { "epoch": 0.41126468080980294, "grad_norm": 69.06404876708984, "learning_rate": 7.390700552569028e-06, "loss": 11.5614, "step": 203590 }, { "epoch": 0.41128488144248676, "grad_norm": 365.2658996582031, "learning_rate": 7.390393967940962e-06, "loss": 24.5155, "step": 203600 }, { "epoch": 0.4113050820751706, "grad_norm": 163.42892456054688, "learning_rate": 7.390087371662403e-06, "loss": 10.0215, "step": 203610 }, { "epoch": 0.4113252827078544, "grad_norm": 541.3896484375, "learning_rate": 7.389780763734851e-06, "loss": 24.7475, "step": 203620 }, { "epoch": 0.4113454833405382, "grad_norm": 219.10179138183594, "learning_rate": 7.3894741441597965e-06, "loss": 18.6311, "step": 203630 }, { "epoch": 0.41136568397322204, "grad_norm": 666.78271484375, "learning_rate": 7.389167512938734e-06, "loss": 30.5525, "step": 203640 }, { "epoch": 0.41138588460590586, "grad_norm": 653.0774536132812, "learning_rate": 7.38886087007316e-06, "loss": 19.5639, "step": 203650 }, { "epoch": 0.4114060852385897, "grad_norm": 373.59393310546875, "learning_rate": 7.388554215564567e-06, "loss": 16.3985, "step": 203660 }, { "epoch": 0.4114262858712735, "grad_norm": 346.81640625, "learning_rate": 7.388247549414451e-06, "loss": 25.9684, "step": 203670 }, { "epoch": 0.4114464865039573, "grad_norm": 950.396484375, "learning_rate": 7.387940871624306e-06, "loss": 15.2841, "step": 203680 }, { "epoch": 0.41146668713664114, "grad_norm": 606.906982421875, "learning_rate": 7.387634182195626e-06, "loss": 21.5458, "step": 203690 }, { "epoch": 0.41148688776932496, "grad_norm": 163.8409881591797, "learning_rate": 7.3873274811299065e-06, "loss": 13.7479, "step": 203700 }, { "epoch": 0.4115070884020087, "grad_norm": 264.5555725097656, "learning_rate": 7.387020768428644e-06, "loss": 12.151, "step": 203710 }, { "epoch": 0.41152728903469254, "grad_norm": 140.77288818359375, "learning_rate": 7.386714044093331e-06, "loss": 24.0381, "step": 203720 }, { "epoch": 0.41154748966737636, "grad_norm": 101.73542785644531, "learning_rate": 7.3864073081254625e-06, "loss": 21.759, "step": 203730 }, { "epoch": 0.4115676903000602, "grad_norm": 385.21795654296875, "learning_rate": 7.386100560526537e-06, "loss": 33.2145, "step": 203740 }, { "epoch": 0.411587890932744, "grad_norm": 303.6557312011719, "learning_rate": 7.3857938012980425e-06, "loss": 20.1732, "step": 203750 }, { "epoch": 0.4116080915654278, "grad_norm": 58.27839660644531, "learning_rate": 7.385487030441482e-06, "loss": 13.8074, "step": 203760 }, { "epoch": 0.41162829219811164, "grad_norm": 8.041471481323242, "learning_rate": 7.385180247958343e-06, "loss": 14.7887, "step": 203770 }, { "epoch": 0.41164849283079546, "grad_norm": 277.2176818847656, "learning_rate": 7.384873453850128e-06, "loss": 13.6498, "step": 203780 }, { "epoch": 0.4116686934634793, "grad_norm": 313.611328125, "learning_rate": 7.3845666481183285e-06, "loss": 15.7543, "step": 203790 }, { "epoch": 0.4116888940961631, "grad_norm": 544.3148193359375, "learning_rate": 7.3842598307644396e-06, "loss": 16.5751, "step": 203800 }, { "epoch": 0.4117090947288469, "grad_norm": 483.9716491699219, "learning_rate": 7.38395300178996e-06, "loss": 16.7201, "step": 203810 }, { "epoch": 0.41172929536153074, "grad_norm": 561.1036376953125, "learning_rate": 7.383646161196379e-06, "loss": 24.7235, "step": 203820 }, { "epoch": 0.41174949599421456, "grad_norm": 47.11389923095703, "learning_rate": 7.3833393089851975e-06, "loss": 30.07, "step": 203830 }, { "epoch": 0.41176969662689833, "grad_norm": 240.0993194580078, "learning_rate": 7.383032445157909e-06, "loss": 16.512, "step": 203840 }, { "epoch": 0.41178989725958215, "grad_norm": 83.08414459228516, "learning_rate": 7.382725569716009e-06, "loss": 32.1523, "step": 203850 }, { "epoch": 0.41181009789226597, "grad_norm": 22.421377182006836, "learning_rate": 7.3824186826609945e-06, "loss": 15.6681, "step": 203860 }, { "epoch": 0.4118302985249498, "grad_norm": 111.38150024414062, "learning_rate": 7.382111783994361e-06, "loss": 22.6536, "step": 203870 }, { "epoch": 0.4118504991576336, "grad_norm": 494.5090637207031, "learning_rate": 7.381804873717602e-06, "loss": 31.3807, "step": 203880 }, { "epoch": 0.41187069979031743, "grad_norm": 34.09320831298828, "learning_rate": 7.381497951832215e-06, "loss": 27.2106, "step": 203890 }, { "epoch": 0.41189090042300125, "grad_norm": 58.55751037597656, "learning_rate": 7.381191018339697e-06, "loss": 29.7797, "step": 203900 }, { "epoch": 0.41191110105568507, "grad_norm": 474.6679382324219, "learning_rate": 7.380884073241541e-06, "loss": 17.9899, "step": 203910 }, { "epoch": 0.4119313016883689, "grad_norm": 120.73897552490234, "learning_rate": 7.380577116539247e-06, "loss": 19.6223, "step": 203920 }, { "epoch": 0.4119515023210527, "grad_norm": 99.71367645263672, "learning_rate": 7.380270148234306e-06, "loss": 16.4969, "step": 203930 }, { "epoch": 0.41197170295373653, "grad_norm": 200.96847534179688, "learning_rate": 7.379963168328219e-06, "loss": 21.4217, "step": 203940 }, { "epoch": 0.41199190358642035, "grad_norm": 375.8184509277344, "learning_rate": 7.379656176822481e-06, "loss": 22.9137, "step": 203950 }, { "epoch": 0.41201210421910417, "grad_norm": 259.62274169921875, "learning_rate": 7.379349173718585e-06, "loss": 8.3552, "step": 203960 }, { "epoch": 0.41203230485178793, "grad_norm": 206.06546020507812, "learning_rate": 7.379042159018031e-06, "loss": 24.1159, "step": 203970 }, { "epoch": 0.41205250548447175, "grad_norm": 651.1390380859375, "learning_rate": 7.378735132722313e-06, "loss": 29.297, "step": 203980 }, { "epoch": 0.4120727061171556, "grad_norm": 242.53668212890625, "learning_rate": 7.3784280948329315e-06, "loss": 27.1348, "step": 203990 }, { "epoch": 0.4120929067498394, "grad_norm": 15.449658393859863, "learning_rate": 7.378121045351378e-06, "loss": 13.8822, "step": 204000 }, { "epoch": 0.4121131073825232, "grad_norm": 572.482666015625, "learning_rate": 7.37781398427915e-06, "loss": 22.2015, "step": 204010 }, { "epoch": 0.41213330801520703, "grad_norm": 423.80560302734375, "learning_rate": 7.377506911617746e-06, "loss": 12.15, "step": 204020 }, { "epoch": 0.41215350864789085, "grad_norm": 333.690185546875, "learning_rate": 7.3771998273686615e-06, "loss": 10.3966, "step": 204030 }, { "epoch": 0.4121737092805747, "grad_norm": 104.61331176757812, "learning_rate": 7.376892731533392e-06, "loss": 36.7318, "step": 204040 }, { "epoch": 0.4121939099132585, "grad_norm": 423.7516174316406, "learning_rate": 7.376585624113438e-06, "loss": 22.6514, "step": 204050 }, { "epoch": 0.4122141105459423, "grad_norm": 363.3681945800781, "learning_rate": 7.3762785051102924e-06, "loss": 11.4511, "step": 204060 }, { "epoch": 0.41223431117862613, "grad_norm": 182.66326904296875, "learning_rate": 7.375971374525454e-06, "loss": 24.1933, "step": 204070 }, { "epoch": 0.41225451181130995, "grad_norm": 186.41079711914062, "learning_rate": 7.375664232360421e-06, "loss": 58.8916, "step": 204080 }, { "epoch": 0.4122747124439937, "grad_norm": 104.02167510986328, "learning_rate": 7.375357078616685e-06, "loss": 21.9016, "step": 204090 }, { "epoch": 0.41229491307667754, "grad_norm": 8.429420471191406, "learning_rate": 7.37504991329575e-06, "loss": 6.6171, "step": 204100 }, { "epoch": 0.41231511370936136, "grad_norm": 307.2082824707031, "learning_rate": 7.374742736399108e-06, "loss": 26.284, "step": 204110 }, { "epoch": 0.4123353143420452, "grad_norm": 104.54947662353516, "learning_rate": 7.374435547928258e-06, "loss": 24.0655, "step": 204120 }, { "epoch": 0.412355514974729, "grad_norm": 195.92245483398438, "learning_rate": 7.374128347884698e-06, "loss": 20.7598, "step": 204130 }, { "epoch": 0.4123757156074128, "grad_norm": 29.02288246154785, "learning_rate": 7.373821136269924e-06, "loss": 22.3593, "step": 204140 }, { "epoch": 0.41239591624009664, "grad_norm": 289.1363525390625, "learning_rate": 7.373513913085434e-06, "loss": 12.975, "step": 204150 }, { "epoch": 0.41241611687278046, "grad_norm": 296.1258544921875, "learning_rate": 7.3732066783327246e-06, "loss": 28.3176, "step": 204160 }, { "epoch": 0.4124363175054643, "grad_norm": 256.05511474609375, "learning_rate": 7.372899432013294e-06, "loss": 17.2703, "step": 204170 }, { "epoch": 0.4124565181381481, "grad_norm": 560.12451171875, "learning_rate": 7.372592174128639e-06, "loss": 23.799, "step": 204180 }, { "epoch": 0.4124767187708319, "grad_norm": 335.15972900390625, "learning_rate": 7.372284904680258e-06, "loss": 21.6153, "step": 204190 }, { "epoch": 0.41249691940351574, "grad_norm": 252.37322998046875, "learning_rate": 7.371977623669646e-06, "loss": 32.5616, "step": 204200 }, { "epoch": 0.41251712003619956, "grad_norm": 41.52864074707031, "learning_rate": 7.371670331098306e-06, "loss": 20.0235, "step": 204210 }, { "epoch": 0.4125373206688833, "grad_norm": 399.67071533203125, "learning_rate": 7.371363026967731e-06, "loss": 26.6496, "step": 204220 }, { "epoch": 0.41255752130156714, "grad_norm": 221.12005615234375, "learning_rate": 7.3710557112794205e-06, "loss": 9.717, "step": 204230 }, { "epoch": 0.41257772193425096, "grad_norm": 167.55307006835938, "learning_rate": 7.370748384034871e-06, "loss": 17.5768, "step": 204240 }, { "epoch": 0.4125979225669348, "grad_norm": 196.6586151123047, "learning_rate": 7.370441045235581e-06, "loss": 16.8564, "step": 204250 }, { "epoch": 0.4126181231996186, "grad_norm": 201.33041381835938, "learning_rate": 7.370133694883052e-06, "loss": 13.2552, "step": 204260 }, { "epoch": 0.4126383238323024, "grad_norm": 16.27325439453125, "learning_rate": 7.369826332978776e-06, "loss": 25.8773, "step": 204270 }, { "epoch": 0.41265852446498624, "grad_norm": 256.61187744140625, "learning_rate": 7.369518959524254e-06, "loss": 17.932, "step": 204280 }, { "epoch": 0.41267872509767006, "grad_norm": 472.1260070800781, "learning_rate": 7.369211574520985e-06, "loss": 18.1781, "step": 204290 }, { "epoch": 0.4126989257303539, "grad_norm": 0.7232304811477661, "learning_rate": 7.368904177970466e-06, "loss": 12.7231, "step": 204300 }, { "epoch": 0.4127191263630377, "grad_norm": 189.30003356933594, "learning_rate": 7.368596769874194e-06, "loss": 27.7163, "step": 204310 }, { "epoch": 0.4127393269957215, "grad_norm": 485.16656494140625, "learning_rate": 7.368289350233672e-06, "loss": 16.4379, "step": 204320 }, { "epoch": 0.41275952762840534, "grad_norm": 0.0, "learning_rate": 7.367981919050392e-06, "loss": 33.6204, "step": 204330 }, { "epoch": 0.41277972826108916, "grad_norm": 225.47848510742188, "learning_rate": 7.367674476325856e-06, "loss": 21.7402, "step": 204340 }, { "epoch": 0.4127999288937729, "grad_norm": 570.21337890625, "learning_rate": 7.3673670220615615e-06, "loss": 29.9381, "step": 204350 }, { "epoch": 0.41282012952645675, "grad_norm": 215.88026428222656, "learning_rate": 7.367059556259008e-06, "loss": 25.7043, "step": 204360 }, { "epoch": 0.41284033015914057, "grad_norm": 428.2153015136719, "learning_rate": 7.366752078919693e-06, "loss": 17.9188, "step": 204370 }, { "epoch": 0.4128605307918244, "grad_norm": 168.07803344726562, "learning_rate": 7.366444590045114e-06, "loss": 10.9499, "step": 204380 }, { "epoch": 0.4128807314245082, "grad_norm": 19.843788146972656, "learning_rate": 7.366137089636774e-06, "loss": 27.3845, "step": 204390 }, { "epoch": 0.412900932057192, "grad_norm": 138.6793212890625, "learning_rate": 7.365829577696166e-06, "loss": 13.4005, "step": 204400 }, { "epoch": 0.41292113268987585, "grad_norm": 534.2427368164062, "learning_rate": 7.365522054224793e-06, "loss": 26.5131, "step": 204410 }, { "epoch": 0.41294133332255967, "grad_norm": 98.3174819946289, "learning_rate": 7.365214519224153e-06, "loss": 25.1214, "step": 204420 }, { "epoch": 0.4129615339552435, "grad_norm": 463.2063293457031, "learning_rate": 7.3649069726957426e-06, "loss": 13.5309, "step": 204430 }, { "epoch": 0.4129817345879273, "grad_norm": 286.7100830078125, "learning_rate": 7.364599414641064e-06, "loss": 18.5391, "step": 204440 }, { "epoch": 0.4130019352206111, "grad_norm": 420.7353515625, "learning_rate": 7.364291845061613e-06, "loss": 15.5523, "step": 204450 }, { "epoch": 0.41302213585329495, "grad_norm": 39.123321533203125, "learning_rate": 7.3639842639588924e-06, "loss": 20.7259, "step": 204460 }, { "epoch": 0.41304233648597877, "grad_norm": 433.3465576171875, "learning_rate": 7.363676671334397e-06, "loss": 25.0828, "step": 204470 }, { "epoch": 0.41306253711866253, "grad_norm": 0.0, "learning_rate": 7.36336906718963e-06, "loss": 47.5208, "step": 204480 }, { "epoch": 0.41308273775134635, "grad_norm": 368.8836975097656, "learning_rate": 7.363061451526088e-06, "loss": 33.26, "step": 204490 }, { "epoch": 0.41310293838403017, "grad_norm": 230.3015899658203, "learning_rate": 7.362753824345271e-06, "loss": 17.8627, "step": 204500 }, { "epoch": 0.413123139016714, "grad_norm": 32.407352447509766, "learning_rate": 7.362446185648678e-06, "loss": 17.0221, "step": 204510 }, { "epoch": 0.4131433396493978, "grad_norm": 162.64370727539062, "learning_rate": 7.362138535437809e-06, "loss": 13.4251, "step": 204520 }, { "epoch": 0.41316354028208163, "grad_norm": 440.64678955078125, "learning_rate": 7.361830873714165e-06, "loss": 16.428, "step": 204530 }, { "epoch": 0.41318374091476545, "grad_norm": 0.0, "learning_rate": 7.361523200479241e-06, "loss": 18.4399, "step": 204540 }, { "epoch": 0.41320394154744927, "grad_norm": 275.8545837402344, "learning_rate": 7.361215515734542e-06, "loss": 18.3342, "step": 204550 }, { "epoch": 0.4132241421801331, "grad_norm": 495.301025390625, "learning_rate": 7.360907819481564e-06, "loss": 23.0418, "step": 204560 }, { "epoch": 0.4132443428128169, "grad_norm": 316.9502258300781, "learning_rate": 7.360600111721807e-06, "loss": 21.6263, "step": 204570 }, { "epoch": 0.41326454344550073, "grad_norm": 274.69866943359375, "learning_rate": 7.360292392456772e-06, "loss": 18.9345, "step": 204580 }, { "epoch": 0.41328474407818455, "grad_norm": 419.72955322265625, "learning_rate": 7.359984661687957e-06, "loss": 19.2842, "step": 204590 }, { "epoch": 0.41330494471086837, "grad_norm": 267.0309753417969, "learning_rate": 7.3596769194168646e-06, "loss": 12.6231, "step": 204600 }, { "epoch": 0.41332514534355214, "grad_norm": 314.3584899902344, "learning_rate": 7.359369165644992e-06, "loss": 41.3065, "step": 204610 }, { "epoch": 0.41334534597623596, "grad_norm": 359.59820556640625, "learning_rate": 7.359061400373841e-06, "loss": 11.1777, "step": 204620 }, { "epoch": 0.4133655466089198, "grad_norm": 175.6316375732422, "learning_rate": 7.358753623604911e-06, "loss": 15.3772, "step": 204630 }, { "epoch": 0.4133857472416036, "grad_norm": 368.94873046875, "learning_rate": 7.358445835339702e-06, "loss": 23.6798, "step": 204640 }, { "epoch": 0.4134059478742874, "grad_norm": 34.366722106933594, "learning_rate": 7.358138035579712e-06, "loss": 18.1346, "step": 204650 }, { "epoch": 0.41342614850697124, "grad_norm": 169.16346740722656, "learning_rate": 7.357830224326445e-06, "loss": 18.7972, "step": 204660 }, { "epoch": 0.41344634913965506, "grad_norm": 103.52387237548828, "learning_rate": 7.357522401581398e-06, "loss": 22.1057, "step": 204670 }, { "epoch": 0.4134665497723389, "grad_norm": 167.9623565673828, "learning_rate": 7.357214567346075e-06, "loss": 16.5391, "step": 204680 }, { "epoch": 0.4134867504050227, "grad_norm": 165.42784118652344, "learning_rate": 7.356906721621974e-06, "loss": 20.7015, "step": 204690 }, { "epoch": 0.4135069510377065, "grad_norm": 462.42529296875, "learning_rate": 7.3565988644105926e-06, "loss": 21.3778, "step": 204700 }, { "epoch": 0.41352715167039034, "grad_norm": 33.7977409362793, "learning_rate": 7.356290995713436e-06, "loss": 25.1027, "step": 204710 }, { "epoch": 0.41354735230307416, "grad_norm": 263.3853759765625, "learning_rate": 7.355983115532004e-06, "loss": 12.0336, "step": 204720 }, { "epoch": 0.4135675529357579, "grad_norm": 212.0270538330078, "learning_rate": 7.355675223867794e-06, "loss": 23.7431, "step": 204730 }, { "epoch": 0.41358775356844174, "grad_norm": 0.0, "learning_rate": 7.35536732072231e-06, "loss": 9.8658, "step": 204740 }, { "epoch": 0.41360795420112556, "grad_norm": 239.8069305419922, "learning_rate": 7.35505940609705e-06, "loss": 27.1398, "step": 204750 }, { "epoch": 0.4136281548338094, "grad_norm": 157.71556091308594, "learning_rate": 7.354751479993518e-06, "loss": 21.3431, "step": 204760 }, { "epoch": 0.4136483554664932, "grad_norm": 77.94368743896484, "learning_rate": 7.354443542413212e-06, "loss": 16.7014, "step": 204770 }, { "epoch": 0.413668556099177, "grad_norm": 318.4212341308594, "learning_rate": 7.3541355933576345e-06, "loss": 19.467, "step": 204780 }, { "epoch": 0.41368875673186084, "grad_norm": 220.1785125732422, "learning_rate": 7.353827632828283e-06, "loss": 14.3416, "step": 204790 }, { "epoch": 0.41370895736454466, "grad_norm": 408.7906799316406, "learning_rate": 7.353519660826665e-06, "loss": 16.9213, "step": 204800 }, { "epoch": 0.4137291579972285, "grad_norm": 195.95521545410156, "learning_rate": 7.353211677354274e-06, "loss": 13.7401, "step": 204810 }, { "epoch": 0.4137493586299123, "grad_norm": 267.3808898925781, "learning_rate": 7.352903682412617e-06, "loss": 13.4629, "step": 204820 }, { "epoch": 0.4137695592625961, "grad_norm": 852.6129150390625, "learning_rate": 7.352595676003191e-06, "loss": 18.7779, "step": 204830 }, { "epoch": 0.41378975989527994, "grad_norm": 178.7244415283203, "learning_rate": 7.3522876581275e-06, "loss": 21.3874, "step": 204840 }, { "epoch": 0.41380996052796376, "grad_norm": 0.0, "learning_rate": 7.351979628787045e-06, "loss": 24.5079, "step": 204850 }, { "epoch": 0.4138301611606475, "grad_norm": 387.8885803222656, "learning_rate": 7.351671587983325e-06, "loss": 10.6546, "step": 204860 }, { "epoch": 0.41385036179333134, "grad_norm": 131.01918029785156, "learning_rate": 7.351363535717845e-06, "loss": 9.9297, "step": 204870 }, { "epoch": 0.41387056242601516, "grad_norm": 289.492431640625, "learning_rate": 7.3510554719921015e-06, "loss": 27.3346, "step": 204880 }, { "epoch": 0.413890763058699, "grad_norm": 205.06744384765625, "learning_rate": 7.350747396807601e-06, "loss": 14.5753, "step": 204890 }, { "epoch": 0.4139109636913828, "grad_norm": 277.4912109375, "learning_rate": 7.350439310165842e-06, "loss": 12.5957, "step": 204900 }, { "epoch": 0.4139311643240666, "grad_norm": 180.942626953125, "learning_rate": 7.350131212068328e-06, "loss": 21.7488, "step": 204910 }, { "epoch": 0.41395136495675044, "grad_norm": 584.9170532226562, "learning_rate": 7.349823102516558e-06, "loss": 21.0482, "step": 204920 }, { "epoch": 0.41397156558943426, "grad_norm": 262.3549499511719, "learning_rate": 7.349514981512036e-06, "loss": 17.5213, "step": 204930 }, { "epoch": 0.4139917662221181, "grad_norm": 227.3072967529297, "learning_rate": 7.349206849056263e-06, "loss": 29.1081, "step": 204940 }, { "epoch": 0.4140119668548019, "grad_norm": 276.5566711425781, "learning_rate": 7.34889870515074e-06, "loss": 18.3444, "step": 204950 }, { "epoch": 0.4140321674874857, "grad_norm": 219.87550354003906, "learning_rate": 7.34859054979697e-06, "loss": 44.4981, "step": 204960 }, { "epoch": 0.41405236812016954, "grad_norm": 190.48406982421875, "learning_rate": 7.348282382996454e-06, "loss": 27.7155, "step": 204970 }, { "epoch": 0.41407256875285336, "grad_norm": 277.020751953125, "learning_rate": 7.3479742047506955e-06, "loss": 26.0071, "step": 204980 }, { "epoch": 0.41409276938553713, "grad_norm": 426.6697692871094, "learning_rate": 7.347666015061195e-06, "loss": 22.4134, "step": 204990 }, { "epoch": 0.41411297001822095, "grad_norm": 234.8662872314453, "learning_rate": 7.347357813929455e-06, "loss": 21.8143, "step": 205000 }, { "epoch": 0.41413317065090477, "grad_norm": 489.7890319824219, "learning_rate": 7.347049601356977e-06, "loss": 31.2474, "step": 205010 }, { "epoch": 0.4141533712835886, "grad_norm": 285.2772216796875, "learning_rate": 7.346741377345264e-06, "loss": 24.9465, "step": 205020 }, { "epoch": 0.4141735719162724, "grad_norm": 1.8811410665512085, "learning_rate": 7.34643314189582e-06, "loss": 10.8244, "step": 205030 }, { "epoch": 0.41419377254895623, "grad_norm": 250.7355194091797, "learning_rate": 7.346124895010144e-06, "loss": 12.6483, "step": 205040 }, { "epoch": 0.41421397318164005, "grad_norm": 251.49728393554688, "learning_rate": 7.345816636689741e-06, "loss": 21.3302, "step": 205050 }, { "epoch": 0.41423417381432387, "grad_norm": 125.16238403320312, "learning_rate": 7.345508366936111e-06, "loss": 11.915, "step": 205060 }, { "epoch": 0.4142543744470077, "grad_norm": 230.04171752929688, "learning_rate": 7.345200085750758e-06, "loss": 8.947, "step": 205070 }, { "epoch": 0.4142745750796915, "grad_norm": 128.422607421875, "learning_rate": 7.344891793135184e-06, "loss": 40.8065, "step": 205080 }, { "epoch": 0.41429477571237533, "grad_norm": 265.7334289550781, "learning_rate": 7.344583489090893e-06, "loss": 17.3982, "step": 205090 }, { "epoch": 0.41431497634505915, "grad_norm": 402.27801513671875, "learning_rate": 7.344275173619385e-06, "loss": 11.953, "step": 205100 }, { "epoch": 0.41433517697774297, "grad_norm": 233.81332397460938, "learning_rate": 7.343966846722164e-06, "loss": 34.0643, "step": 205110 }, { "epoch": 0.41435537761042673, "grad_norm": 290.8232727050781, "learning_rate": 7.343658508400734e-06, "loss": 13.2563, "step": 205120 }, { "epoch": 0.41437557824311055, "grad_norm": 46.01779556274414, "learning_rate": 7.343350158656596e-06, "loss": 25.1186, "step": 205130 }, { "epoch": 0.4143957788757944, "grad_norm": 40.113651275634766, "learning_rate": 7.343041797491253e-06, "loss": 11.4984, "step": 205140 }, { "epoch": 0.4144159795084782, "grad_norm": 600.0860595703125, "learning_rate": 7.3427334249062085e-06, "loss": 40.364, "step": 205150 }, { "epoch": 0.414436180141162, "grad_norm": 144.65505981445312, "learning_rate": 7.342425040902967e-06, "loss": 20.4215, "step": 205160 }, { "epoch": 0.41445638077384583, "grad_norm": 691.25439453125, "learning_rate": 7.3421166454830295e-06, "loss": 27.9647, "step": 205170 }, { "epoch": 0.41447658140652965, "grad_norm": 391.4794006347656, "learning_rate": 7.341808238647898e-06, "loss": 23.508, "step": 205180 }, { "epoch": 0.4144967820392135, "grad_norm": 385.577880859375, "learning_rate": 7.3414998203990784e-06, "loss": 15.8461, "step": 205190 }, { "epoch": 0.4145169826718973, "grad_norm": 216.5390625, "learning_rate": 7.341191390738073e-06, "loss": 17.7234, "step": 205200 }, { "epoch": 0.4145371833045811, "grad_norm": 203.39524841308594, "learning_rate": 7.340882949666385e-06, "loss": 16.6133, "step": 205210 }, { "epoch": 0.41455738393726493, "grad_norm": 234.35284423828125, "learning_rate": 7.340574497185516e-06, "loss": 16.5258, "step": 205220 }, { "epoch": 0.41457758456994875, "grad_norm": 450.99700927734375, "learning_rate": 7.340266033296972e-06, "loss": 23.4899, "step": 205230 }, { "epoch": 0.4145977852026326, "grad_norm": 117.01457214355469, "learning_rate": 7.339957558002254e-06, "loss": 23.645, "step": 205240 }, { "epoch": 0.41461798583531634, "grad_norm": 0.0, "learning_rate": 7.3396490713028674e-06, "loss": 24.2873, "step": 205250 }, { "epoch": 0.41463818646800016, "grad_norm": 229.01571655273438, "learning_rate": 7.339340573200314e-06, "loss": 16.8973, "step": 205260 }, { "epoch": 0.414658387100684, "grad_norm": 409.83221435546875, "learning_rate": 7.339032063696101e-06, "loss": 17.4457, "step": 205270 }, { "epoch": 0.4146785877333678, "grad_norm": 507.9785461425781, "learning_rate": 7.3387235427917266e-06, "loss": 14.5862, "step": 205280 }, { "epoch": 0.4146987883660516, "grad_norm": 193.5515899658203, "learning_rate": 7.338415010488699e-06, "loss": 17.2647, "step": 205290 }, { "epoch": 0.41471898899873544, "grad_norm": 49.979248046875, "learning_rate": 7.33810646678852e-06, "loss": 11.1292, "step": 205300 }, { "epoch": 0.41473918963141926, "grad_norm": 310.4367370605469, "learning_rate": 7.3377979116926925e-06, "loss": 16.2777, "step": 205310 }, { "epoch": 0.4147593902641031, "grad_norm": 715.353515625, "learning_rate": 7.337489345202723e-06, "loss": 22.822, "step": 205320 }, { "epoch": 0.4147795908967869, "grad_norm": 485.4568786621094, "learning_rate": 7.337180767320113e-06, "loss": 21.2194, "step": 205330 }, { "epoch": 0.4147997915294707, "grad_norm": 470.0102233886719, "learning_rate": 7.336872178046368e-06, "loss": 36.536, "step": 205340 }, { "epoch": 0.41481999216215454, "grad_norm": 533.8746337890625, "learning_rate": 7.33656357738299e-06, "loss": 27.8516, "step": 205350 }, { "epoch": 0.41484019279483836, "grad_norm": 138.65545654296875, "learning_rate": 7.336254965331486e-06, "loss": 12.0903, "step": 205360 }, { "epoch": 0.4148603934275221, "grad_norm": 321.1905212402344, "learning_rate": 7.335946341893359e-06, "loss": 22.5003, "step": 205370 }, { "epoch": 0.41488059406020594, "grad_norm": 466.1109924316406, "learning_rate": 7.335637707070111e-06, "loss": 39.15, "step": 205380 }, { "epoch": 0.41490079469288976, "grad_norm": 465.2254333496094, "learning_rate": 7.33532906086325e-06, "loss": 9.898, "step": 205390 }, { "epoch": 0.4149209953255736, "grad_norm": 249.19764709472656, "learning_rate": 7.335020403274277e-06, "loss": 20.5745, "step": 205400 }, { "epoch": 0.4149411959582574, "grad_norm": 345.005126953125, "learning_rate": 7.334711734304698e-06, "loss": 27.5963, "step": 205410 }, { "epoch": 0.4149613965909412, "grad_norm": 88.71549224853516, "learning_rate": 7.334403053956018e-06, "loss": 16.4582, "step": 205420 }, { "epoch": 0.41498159722362504, "grad_norm": 246.57797241210938, "learning_rate": 7.334094362229741e-06, "loss": 15.2067, "step": 205430 }, { "epoch": 0.41500179785630886, "grad_norm": 310.0328063964844, "learning_rate": 7.333785659127371e-06, "loss": 14.8752, "step": 205440 }, { "epoch": 0.4150219984889927, "grad_norm": 220.1820068359375, "learning_rate": 7.333476944650411e-06, "loss": 19.2921, "step": 205450 }, { "epoch": 0.4150421991216765, "grad_norm": 400.4833984375, "learning_rate": 7.333168218800369e-06, "loss": 19.4952, "step": 205460 }, { "epoch": 0.4150623997543603, "grad_norm": 360.54461669921875, "learning_rate": 7.332859481578747e-06, "loss": 23.1957, "step": 205470 }, { "epoch": 0.41508260038704414, "grad_norm": 78.1609115600586, "learning_rate": 7.332550732987051e-06, "loss": 20.2901, "step": 205480 }, { "epoch": 0.41510280101972796, "grad_norm": 326.21807861328125, "learning_rate": 7.332241973026786e-06, "loss": 21.2341, "step": 205490 }, { "epoch": 0.4151230016524117, "grad_norm": 249.06698608398438, "learning_rate": 7.3319332016994575e-06, "loss": 12.8499, "step": 205500 }, { "epoch": 0.41514320228509555, "grad_norm": 420.4228515625, "learning_rate": 7.331624419006568e-06, "loss": 17.018, "step": 205510 }, { "epoch": 0.41516340291777937, "grad_norm": 351.95233154296875, "learning_rate": 7.331315624949624e-06, "loss": 55.2029, "step": 205520 }, { "epoch": 0.4151836035504632, "grad_norm": 0.0, "learning_rate": 7.33100681953013e-06, "loss": 7.4809, "step": 205530 }, { "epoch": 0.415203804183147, "grad_norm": 221.49917602539062, "learning_rate": 7.330698002749593e-06, "loss": 13.4213, "step": 205540 }, { "epoch": 0.4152240048158308, "grad_norm": 85.44388580322266, "learning_rate": 7.330389174609516e-06, "loss": 13.0862, "step": 205550 }, { "epoch": 0.41524420544851465, "grad_norm": 487.2480163574219, "learning_rate": 7.330080335111405e-06, "loss": 16.3758, "step": 205560 }, { "epoch": 0.41526440608119847, "grad_norm": 0.0, "learning_rate": 7.329771484256764e-06, "loss": 9.5928, "step": 205570 }, { "epoch": 0.4152846067138823, "grad_norm": 506.5587463378906, "learning_rate": 7.3294626220470984e-06, "loss": 14.2606, "step": 205580 }, { "epoch": 0.4153048073465661, "grad_norm": 158.94607543945312, "learning_rate": 7.329153748483918e-06, "loss": 16.2691, "step": 205590 }, { "epoch": 0.4153250079792499, "grad_norm": 350.6422424316406, "learning_rate": 7.3288448635687215e-06, "loss": 21.245, "step": 205600 }, { "epoch": 0.41534520861193375, "grad_norm": 190.71241760253906, "learning_rate": 7.32853596730302e-06, "loss": 20.808, "step": 205610 }, { "epoch": 0.41536540924461757, "grad_norm": 197.82662963867188, "learning_rate": 7.3282270596883155e-06, "loss": 22.7761, "step": 205620 }, { "epoch": 0.41538560987730133, "grad_norm": 288.343017578125, "learning_rate": 7.327918140726115e-06, "loss": 15.4726, "step": 205630 }, { "epoch": 0.41540581050998515, "grad_norm": 260.0469055175781, "learning_rate": 7.327609210417923e-06, "loss": 20.8571, "step": 205640 }, { "epoch": 0.41542601114266897, "grad_norm": 266.2338562011719, "learning_rate": 7.327300268765246e-06, "loss": 18.2455, "step": 205650 }, { "epoch": 0.4154462117753528, "grad_norm": 248.40672302246094, "learning_rate": 7.3269913157695915e-06, "loss": 36.5179, "step": 205660 }, { "epoch": 0.4154664124080366, "grad_norm": 411.75634765625, "learning_rate": 7.326682351432462e-06, "loss": 29.7699, "step": 205670 }, { "epoch": 0.41548661304072043, "grad_norm": 247.29388427734375, "learning_rate": 7.326373375755365e-06, "loss": 14.2005, "step": 205680 }, { "epoch": 0.41550681367340425, "grad_norm": 409.9481201171875, "learning_rate": 7.326064388739806e-06, "loss": 20.8848, "step": 205690 }, { "epoch": 0.41552701430608807, "grad_norm": 355.793212890625, "learning_rate": 7.325755390387293e-06, "loss": 20.0798, "step": 205700 }, { "epoch": 0.4155472149387719, "grad_norm": 184.0315399169922, "learning_rate": 7.325446380699329e-06, "loss": 30.6741, "step": 205710 }, { "epoch": 0.4155674155714557, "grad_norm": 155.53282165527344, "learning_rate": 7.3251373596774214e-06, "loss": 16.5689, "step": 205720 }, { "epoch": 0.41558761620413953, "grad_norm": 521.6328735351562, "learning_rate": 7.324828327323077e-06, "loss": 17.6317, "step": 205730 }, { "epoch": 0.41560781683682335, "grad_norm": 606.6430053710938, "learning_rate": 7.3245192836378e-06, "loss": 42.5909, "step": 205740 }, { "epoch": 0.41562801746950717, "grad_norm": 283.1160583496094, "learning_rate": 7.3242102286231e-06, "loss": 8.6435, "step": 205750 }, { "epoch": 0.41564821810219094, "grad_norm": 80.5193099975586, "learning_rate": 7.323901162280478e-06, "loss": 10.8742, "step": 205760 }, { "epoch": 0.41566841873487476, "grad_norm": 333.7984619140625, "learning_rate": 7.323592084611446e-06, "loss": 20.262, "step": 205770 }, { "epoch": 0.4156886193675586, "grad_norm": 217.42288208007812, "learning_rate": 7.3232829956175074e-06, "loss": 14.0363, "step": 205780 }, { "epoch": 0.4157088200002424, "grad_norm": 329.5908203125, "learning_rate": 7.32297389530017e-06, "loss": 26.6044, "step": 205790 }, { "epoch": 0.4157290206329262, "grad_norm": 308.5736389160156, "learning_rate": 7.32266478366094e-06, "loss": 12.6128, "step": 205800 }, { "epoch": 0.41574922126561004, "grad_norm": 664.8925170898438, "learning_rate": 7.322355660701321e-06, "loss": 22.9849, "step": 205810 }, { "epoch": 0.41576942189829386, "grad_norm": 155.80039978027344, "learning_rate": 7.322046526422824e-06, "loss": 17.4677, "step": 205820 }, { "epoch": 0.4157896225309777, "grad_norm": 343.0555114746094, "learning_rate": 7.321737380826954e-06, "loss": 29.26, "step": 205830 }, { "epoch": 0.4158098231636615, "grad_norm": 151.739990234375, "learning_rate": 7.321428223915217e-06, "loss": 31.4684, "step": 205840 }, { "epoch": 0.4158300237963453, "grad_norm": 106.23629760742188, "learning_rate": 7.321119055689121e-06, "loss": 16.2133, "step": 205850 }, { "epoch": 0.41585022442902914, "grad_norm": 364.6474304199219, "learning_rate": 7.3208098761501714e-06, "loss": 18.9127, "step": 205860 }, { "epoch": 0.41587042506171296, "grad_norm": 411.35552978515625, "learning_rate": 7.320500685299876e-06, "loss": 21.5928, "step": 205870 }, { "epoch": 0.4158906256943968, "grad_norm": 219.50405883789062, "learning_rate": 7.320191483139742e-06, "loss": 24.6386, "step": 205880 }, { "epoch": 0.41591082632708054, "grad_norm": 91.75994873046875, "learning_rate": 7.319882269671277e-06, "loss": 21.9877, "step": 205890 }, { "epoch": 0.41593102695976436, "grad_norm": 157.8224639892578, "learning_rate": 7.319573044895986e-06, "loss": 20.8262, "step": 205900 }, { "epoch": 0.4159512275924482, "grad_norm": 267.9029235839844, "learning_rate": 7.319263808815378e-06, "loss": 19.6867, "step": 205910 }, { "epoch": 0.415971428225132, "grad_norm": 304.75018310546875, "learning_rate": 7.318954561430959e-06, "loss": 14.3852, "step": 205920 }, { "epoch": 0.4159916288578158, "grad_norm": 578.9948120117188, "learning_rate": 7.318645302744237e-06, "loss": 22.8863, "step": 205930 }, { "epoch": 0.41601182949049964, "grad_norm": 850.388671875, "learning_rate": 7.318336032756717e-06, "loss": 26.7475, "step": 205940 }, { "epoch": 0.41603203012318346, "grad_norm": 433.115966796875, "learning_rate": 7.318026751469912e-06, "loss": 9.2737, "step": 205950 }, { "epoch": 0.4160522307558673, "grad_norm": 200.549072265625, "learning_rate": 7.317717458885324e-06, "loss": 19.6071, "step": 205960 }, { "epoch": 0.4160724313885511, "grad_norm": 192.6115264892578, "learning_rate": 7.317408155004462e-06, "loss": 21.9371, "step": 205970 }, { "epoch": 0.4160926320212349, "grad_norm": 62.48785400390625, "learning_rate": 7.317098839828835e-06, "loss": 35.2809, "step": 205980 }, { "epoch": 0.41611283265391874, "grad_norm": 227.67335510253906, "learning_rate": 7.316789513359948e-06, "loss": 22.3525, "step": 205990 }, { "epoch": 0.41613303328660256, "grad_norm": 263.6271667480469, "learning_rate": 7.31648017559931e-06, "loss": 17.6568, "step": 206000 }, { "epoch": 0.4161532339192863, "grad_norm": 318.1611022949219, "learning_rate": 7.316170826548428e-06, "loss": 21.2894, "step": 206010 }, { "epoch": 0.41617343455197015, "grad_norm": 141.9016571044922, "learning_rate": 7.315861466208811e-06, "loss": 12.5801, "step": 206020 }, { "epoch": 0.41619363518465397, "grad_norm": 131.27987670898438, "learning_rate": 7.315552094581966e-06, "loss": 19.7895, "step": 206030 }, { "epoch": 0.4162138358173378, "grad_norm": 113.2379379272461, "learning_rate": 7.315242711669401e-06, "loss": 22.4765, "step": 206040 }, { "epoch": 0.4162340364500216, "grad_norm": 142.2930908203125, "learning_rate": 7.3149333174726246e-06, "loss": 11.2174, "step": 206050 }, { "epoch": 0.4162542370827054, "grad_norm": 376.2331848144531, "learning_rate": 7.314623911993143e-06, "loss": 21.8237, "step": 206060 }, { "epoch": 0.41627443771538924, "grad_norm": 8.397297859191895, "learning_rate": 7.314314495232467e-06, "loss": 21.7181, "step": 206070 }, { "epoch": 0.41629463834807306, "grad_norm": 257.24383544921875, "learning_rate": 7.314005067192099e-06, "loss": 26.2114, "step": 206080 }, { "epoch": 0.4163148389807569, "grad_norm": 1051.022705078125, "learning_rate": 7.313695627873554e-06, "loss": 24.4851, "step": 206090 }, { "epoch": 0.4163350396134407, "grad_norm": 331.6260986328125, "learning_rate": 7.313386177278335e-06, "loss": 24.6697, "step": 206100 }, { "epoch": 0.4163552402461245, "grad_norm": 306.7793273925781, "learning_rate": 7.3130767154079555e-06, "loss": 30.5777, "step": 206110 }, { "epoch": 0.41637544087880834, "grad_norm": 230.6069793701172, "learning_rate": 7.312767242263919e-06, "loss": 8.828, "step": 206120 }, { "epoch": 0.41639564151149216, "grad_norm": 322.91485595703125, "learning_rate": 7.312457757847734e-06, "loss": 15.9705, "step": 206130 }, { "epoch": 0.41641584214417593, "grad_norm": 362.5783386230469, "learning_rate": 7.312148262160913e-06, "loss": 19.4925, "step": 206140 }, { "epoch": 0.41643604277685975, "grad_norm": 398.41046142578125, "learning_rate": 7.31183875520496e-06, "loss": 8.2413, "step": 206150 }, { "epoch": 0.41645624340954357, "grad_norm": 727.8536987304688, "learning_rate": 7.311529236981385e-06, "loss": 56.7082, "step": 206160 }, { "epoch": 0.4164764440422274, "grad_norm": 548.3096313476562, "learning_rate": 7.3112197074916975e-06, "loss": 22.7387, "step": 206170 }, { "epoch": 0.4164966446749112, "grad_norm": 283.8753662109375, "learning_rate": 7.310910166737406e-06, "loss": 22.4877, "step": 206180 }, { "epoch": 0.41651684530759503, "grad_norm": 315.47393798828125, "learning_rate": 7.3106006147200185e-06, "loss": 21.0235, "step": 206190 }, { "epoch": 0.41653704594027885, "grad_norm": 1186.2174072265625, "learning_rate": 7.310291051441044e-06, "loss": 26.9795, "step": 206200 }, { "epoch": 0.41655724657296267, "grad_norm": 245.65757751464844, "learning_rate": 7.30998147690199e-06, "loss": 15.5666, "step": 206210 }, { "epoch": 0.4165774472056465, "grad_norm": 118.85250091552734, "learning_rate": 7.3096718911043675e-06, "loss": 14.0039, "step": 206220 }, { "epoch": 0.4165976478383303, "grad_norm": 176.1197509765625, "learning_rate": 7.309362294049683e-06, "loss": 27.8719, "step": 206230 }, { "epoch": 0.41661784847101413, "grad_norm": 286.16680908203125, "learning_rate": 7.309052685739448e-06, "loss": 14.599, "step": 206240 }, { "epoch": 0.41663804910369795, "grad_norm": 158.51046752929688, "learning_rate": 7.308743066175172e-06, "loss": 22.6672, "step": 206250 }, { "epoch": 0.41665824973638177, "grad_norm": 214.65892028808594, "learning_rate": 7.308433435358357e-06, "loss": 19.7271, "step": 206260 }, { "epoch": 0.41667845036906553, "grad_norm": 246.73822021484375, "learning_rate": 7.308123793290523e-06, "loss": 22.5343, "step": 206270 }, { "epoch": 0.41669865100174935, "grad_norm": 0.0, "learning_rate": 7.307814139973171e-06, "loss": 8.0718, "step": 206280 }, { "epoch": 0.4167188516344332, "grad_norm": 254.7047576904297, "learning_rate": 7.307504475407813e-06, "loss": 29.1165, "step": 206290 }, { "epoch": 0.416739052267117, "grad_norm": 37.85653305053711, "learning_rate": 7.307194799595958e-06, "loss": 20.2401, "step": 206300 }, { "epoch": 0.4167592528998008, "grad_norm": 455.5197448730469, "learning_rate": 7.306885112539116e-06, "loss": 18.9998, "step": 206310 }, { "epoch": 0.41677945353248463, "grad_norm": 97.05670166015625, "learning_rate": 7.306575414238794e-06, "loss": 16.9997, "step": 206320 }, { "epoch": 0.41679965416516845, "grad_norm": 783.0582885742188, "learning_rate": 7.306265704696505e-06, "loss": 31.7786, "step": 206330 }, { "epoch": 0.4168198547978523, "grad_norm": 226.2368621826172, "learning_rate": 7.305955983913756e-06, "loss": 17.0617, "step": 206340 }, { "epoch": 0.4168400554305361, "grad_norm": 154.26406860351562, "learning_rate": 7.305646251892058e-06, "loss": 13.1304, "step": 206350 }, { "epoch": 0.4168602560632199, "grad_norm": 85.54403686523438, "learning_rate": 7.30533650863292e-06, "loss": 16.6437, "step": 206360 }, { "epoch": 0.41688045669590373, "grad_norm": 429.21343994140625, "learning_rate": 7.305026754137849e-06, "loss": 15.7678, "step": 206370 }, { "epoch": 0.41690065732858755, "grad_norm": 467.5975646972656, "learning_rate": 7.304716988408359e-06, "loss": 21.1085, "step": 206380 }, { "epoch": 0.4169208579612714, "grad_norm": 492.19964599609375, "learning_rate": 7.3044072114459585e-06, "loss": 27.7149, "step": 206390 }, { "epoch": 0.41694105859395514, "grad_norm": 513.038818359375, "learning_rate": 7.3040974232521555e-06, "loss": 16.9692, "step": 206400 }, { "epoch": 0.41696125922663896, "grad_norm": 319.0800476074219, "learning_rate": 7.3037876238284625e-06, "loss": 12.5587, "step": 206410 }, { "epoch": 0.4169814598593228, "grad_norm": 262.45556640625, "learning_rate": 7.303477813176385e-06, "loss": 21.4171, "step": 206420 }, { "epoch": 0.4170016604920066, "grad_norm": 93.37469482421875, "learning_rate": 7.303167991297439e-06, "loss": 21.2198, "step": 206430 }, { "epoch": 0.4170218611246904, "grad_norm": 341.7030029296875, "learning_rate": 7.302858158193131e-06, "loss": 24.4291, "step": 206440 }, { "epoch": 0.41704206175737424, "grad_norm": 404.665771484375, "learning_rate": 7.302548313864971e-06, "loss": 14.3783, "step": 206450 }, { "epoch": 0.41706226239005806, "grad_norm": 267.8836669921875, "learning_rate": 7.30223845831447e-06, "loss": 15.2472, "step": 206460 }, { "epoch": 0.4170824630227419, "grad_norm": 433.713623046875, "learning_rate": 7.301928591543137e-06, "loss": 19.1483, "step": 206470 }, { "epoch": 0.4171026636554257, "grad_norm": 434.7796630859375, "learning_rate": 7.301618713552485e-06, "loss": 20.0062, "step": 206480 }, { "epoch": 0.4171228642881095, "grad_norm": 415.1568298339844, "learning_rate": 7.301308824344022e-06, "loss": 23.3544, "step": 206490 }, { "epoch": 0.41714306492079334, "grad_norm": 339.5324401855469, "learning_rate": 7.300998923919259e-06, "loss": 24.4786, "step": 206500 }, { "epoch": 0.41716326555347716, "grad_norm": 95.99866485595703, "learning_rate": 7.300689012279706e-06, "loss": 19.7179, "step": 206510 }, { "epoch": 0.4171834661861609, "grad_norm": 74.35858154296875, "learning_rate": 7.300379089426874e-06, "loss": 22.5468, "step": 206520 }, { "epoch": 0.41720366681884474, "grad_norm": 239.240966796875, "learning_rate": 7.300069155362272e-06, "loss": 26.1255, "step": 206530 }, { "epoch": 0.41722386745152856, "grad_norm": 336.8525085449219, "learning_rate": 7.299759210087415e-06, "loss": 15.1832, "step": 206540 }, { "epoch": 0.4172440680842124, "grad_norm": 403.6529235839844, "learning_rate": 7.299449253603807e-06, "loss": 27.7613, "step": 206550 }, { "epoch": 0.4172642687168962, "grad_norm": 23.99775505065918, "learning_rate": 7.299139285912965e-06, "loss": 12.5595, "step": 206560 }, { "epoch": 0.41728446934958, "grad_norm": 327.5641784667969, "learning_rate": 7.298829307016395e-06, "loss": 10.5379, "step": 206570 }, { "epoch": 0.41730466998226384, "grad_norm": 161.5351104736328, "learning_rate": 7.298519316915611e-06, "loss": 31.4756, "step": 206580 }, { "epoch": 0.41732487061494766, "grad_norm": 270.6864318847656, "learning_rate": 7.298209315612123e-06, "loss": 20.0125, "step": 206590 }, { "epoch": 0.4173450712476315, "grad_norm": 270.8692626953125, "learning_rate": 7.297899303107441e-06, "loss": 23.7548, "step": 206600 }, { "epoch": 0.4173652718803153, "grad_norm": 490.5101318359375, "learning_rate": 7.297589279403076e-06, "loss": 24.9465, "step": 206610 }, { "epoch": 0.4173854725129991, "grad_norm": 407.35784912109375, "learning_rate": 7.297279244500539e-06, "loss": 21.2708, "step": 206620 }, { "epoch": 0.41740567314568294, "grad_norm": 173.758544921875, "learning_rate": 7.296969198401342e-06, "loss": 15.0551, "step": 206630 }, { "epoch": 0.41742587377836676, "grad_norm": 344.61297607421875, "learning_rate": 7.296659141106996e-06, "loss": 32.1319, "step": 206640 }, { "epoch": 0.4174460744110505, "grad_norm": 217.84144592285156, "learning_rate": 7.2963490726190134e-06, "loss": 11.4993, "step": 206650 }, { "epoch": 0.41746627504373435, "grad_norm": 592.6160888671875, "learning_rate": 7.296038992938902e-06, "loss": 19.3051, "step": 206660 }, { "epoch": 0.41748647567641817, "grad_norm": 557.9229736328125, "learning_rate": 7.2957289020681755e-06, "loss": 44.4147, "step": 206670 }, { "epoch": 0.417506676309102, "grad_norm": 297.8149719238281, "learning_rate": 7.295418800008345e-06, "loss": 31.2286, "step": 206680 }, { "epoch": 0.4175268769417858, "grad_norm": 489.1856994628906, "learning_rate": 7.295108686760921e-06, "loss": 23.6133, "step": 206690 }, { "epoch": 0.4175470775744696, "grad_norm": 376.3635559082031, "learning_rate": 7.294798562327417e-06, "loss": 17.9782, "step": 206700 }, { "epoch": 0.41756727820715345, "grad_norm": 563.873046875, "learning_rate": 7.2944884267093405e-06, "loss": 20.2396, "step": 206710 }, { "epoch": 0.41758747883983727, "grad_norm": 13.3892183303833, "learning_rate": 7.294178279908208e-06, "loss": 42.8439, "step": 206720 }, { "epoch": 0.4176076794725211, "grad_norm": 396.2105407714844, "learning_rate": 7.293868121925528e-06, "loss": 11.7718, "step": 206730 }, { "epoch": 0.4176278801052049, "grad_norm": 222.01210021972656, "learning_rate": 7.293557952762813e-06, "loss": 15.6826, "step": 206740 }, { "epoch": 0.4176480807378887, "grad_norm": 343.85357666015625, "learning_rate": 7.293247772421577e-06, "loss": 16.2857, "step": 206750 }, { "epoch": 0.41766828137057255, "grad_norm": 243.59780883789062, "learning_rate": 7.292937580903326e-06, "loss": 14.8061, "step": 206760 }, { "epoch": 0.41768848200325637, "grad_norm": 10.68545913696289, "learning_rate": 7.2926273782095766e-06, "loss": 22.0627, "step": 206770 }, { "epoch": 0.41770868263594013, "grad_norm": 210.8752899169922, "learning_rate": 7.29231716434184e-06, "loss": 19.5811, "step": 206780 }, { "epoch": 0.41772888326862395, "grad_norm": 308.42840576171875, "learning_rate": 7.292006939301627e-06, "loss": 28.7006, "step": 206790 }, { "epoch": 0.41774908390130777, "grad_norm": 287.4221496582031, "learning_rate": 7.291696703090449e-06, "loss": 14.2412, "step": 206800 }, { "epoch": 0.4177692845339916, "grad_norm": 210.3727569580078, "learning_rate": 7.291386455709823e-06, "loss": 15.9892, "step": 206810 }, { "epoch": 0.4177894851666754, "grad_norm": 121.81523895263672, "learning_rate": 7.291076197161253e-06, "loss": 19.1965, "step": 206820 }, { "epoch": 0.41780968579935923, "grad_norm": 197.18771362304688, "learning_rate": 7.290765927446258e-06, "loss": 27.4721, "step": 206830 }, { "epoch": 0.41782988643204305, "grad_norm": 182.56834411621094, "learning_rate": 7.290455646566347e-06, "loss": 30.144, "step": 206840 }, { "epoch": 0.41785008706472687, "grad_norm": 326.1869201660156, "learning_rate": 7.2901453545230325e-06, "loss": 8.9949, "step": 206850 }, { "epoch": 0.4178702876974107, "grad_norm": 320.1968688964844, "learning_rate": 7.289835051317828e-06, "loss": 15.4973, "step": 206860 }, { "epoch": 0.4178904883300945, "grad_norm": 536.5492553710938, "learning_rate": 7.289524736952245e-06, "loss": 19.1071, "step": 206870 }, { "epoch": 0.41791068896277833, "grad_norm": 451.21441650390625, "learning_rate": 7.289214411427796e-06, "loss": 16.233, "step": 206880 }, { "epoch": 0.41793088959546215, "grad_norm": 1147.4725341796875, "learning_rate": 7.288904074745994e-06, "loss": 20.669, "step": 206890 }, { "epoch": 0.41795109022814597, "grad_norm": 216.52194213867188, "learning_rate": 7.288593726908351e-06, "loss": 14.6417, "step": 206900 }, { "epoch": 0.41797129086082974, "grad_norm": 294.5904235839844, "learning_rate": 7.28828336791638e-06, "loss": 19.6402, "step": 206910 }, { "epoch": 0.41799149149351356, "grad_norm": 269.4460754394531, "learning_rate": 7.287972997771592e-06, "loss": 18.9895, "step": 206920 }, { "epoch": 0.4180116921261974, "grad_norm": 94.86051940917969, "learning_rate": 7.287662616475504e-06, "loss": 18.379, "step": 206930 }, { "epoch": 0.4180318927588812, "grad_norm": 394.1547546386719, "learning_rate": 7.287352224029623e-06, "loss": 37.444, "step": 206940 }, { "epoch": 0.418052093391565, "grad_norm": 401.5856018066406, "learning_rate": 7.287041820435465e-06, "loss": 28.2786, "step": 206950 }, { "epoch": 0.41807229402424884, "grad_norm": 142.1681671142578, "learning_rate": 7.286731405694544e-06, "loss": 5.3537, "step": 206960 }, { "epoch": 0.41809249465693266, "grad_norm": 174.61636352539062, "learning_rate": 7.28642097980837e-06, "loss": 24.2253, "step": 206970 }, { "epoch": 0.4181126952896165, "grad_norm": 396.6031494140625, "learning_rate": 7.286110542778459e-06, "loss": 25.0322, "step": 206980 }, { "epoch": 0.4181328959223003, "grad_norm": 281.1639404296875, "learning_rate": 7.285800094606321e-06, "loss": 11.6152, "step": 206990 }, { "epoch": 0.4181530965549841, "grad_norm": 307.69317626953125, "learning_rate": 7.285489635293472e-06, "loss": 21.1079, "step": 207000 }, { "epoch": 0.41817329718766794, "grad_norm": 561.9136352539062, "learning_rate": 7.2851791648414226e-06, "loss": 26.6682, "step": 207010 }, { "epoch": 0.41819349782035176, "grad_norm": 199.8949737548828, "learning_rate": 7.284868683251688e-06, "loss": 18.1372, "step": 207020 }, { "epoch": 0.4182136984530356, "grad_norm": 98.99993133544922, "learning_rate": 7.284558190525779e-06, "loss": 16.5955, "step": 207030 }, { "epoch": 0.41823389908571934, "grad_norm": 207.32949829101562, "learning_rate": 7.284247686665212e-06, "loss": 15.3724, "step": 207040 }, { "epoch": 0.41825409971840316, "grad_norm": 439.5002746582031, "learning_rate": 7.283937171671498e-06, "loss": 22.2698, "step": 207050 }, { "epoch": 0.418274300351087, "grad_norm": 109.05426025390625, "learning_rate": 7.283626645546152e-06, "loss": 18.2678, "step": 207060 }, { "epoch": 0.4182945009837708, "grad_norm": 207.6930694580078, "learning_rate": 7.283316108290685e-06, "loss": 11.7266, "step": 207070 }, { "epoch": 0.4183147016164546, "grad_norm": 233.33383178710938, "learning_rate": 7.283005559906614e-06, "loss": 14.2277, "step": 207080 }, { "epoch": 0.41833490224913844, "grad_norm": 51.24480056762695, "learning_rate": 7.282695000395451e-06, "loss": 20.5501, "step": 207090 }, { "epoch": 0.41835510288182226, "grad_norm": 156.79656982421875, "learning_rate": 7.282384429758709e-06, "loss": 16.2376, "step": 207100 }, { "epoch": 0.4183753035145061, "grad_norm": 154.17478942871094, "learning_rate": 7.282073847997901e-06, "loss": 10.9338, "step": 207110 }, { "epoch": 0.4183955041471899, "grad_norm": 7.515901565551758, "learning_rate": 7.281763255114542e-06, "loss": 22.8859, "step": 207120 }, { "epoch": 0.4184157047798737, "grad_norm": 359.1336669921875, "learning_rate": 7.281452651110148e-06, "loss": 19.6651, "step": 207130 }, { "epoch": 0.41843590541255754, "grad_norm": 304.8218688964844, "learning_rate": 7.281142035986227e-06, "loss": 18.6777, "step": 207140 }, { "epoch": 0.41845610604524136, "grad_norm": 154.61358642578125, "learning_rate": 7.280831409744299e-06, "loss": 18.0453, "step": 207150 }, { "epoch": 0.4184763066779251, "grad_norm": 399.9915466308594, "learning_rate": 7.280520772385875e-06, "loss": 8.4157, "step": 207160 }, { "epoch": 0.41849650731060895, "grad_norm": 577.450927734375, "learning_rate": 7.280210123912468e-06, "loss": 15.1861, "step": 207170 }, { "epoch": 0.41851670794329277, "grad_norm": 226.8422088623047, "learning_rate": 7.2798994643255945e-06, "loss": 21.1062, "step": 207180 }, { "epoch": 0.4185369085759766, "grad_norm": 561.8253784179688, "learning_rate": 7.279588793626767e-06, "loss": 26.1197, "step": 207190 }, { "epoch": 0.4185571092086604, "grad_norm": 277.5335693359375, "learning_rate": 7.279278111817502e-06, "loss": 15.6091, "step": 207200 }, { "epoch": 0.4185773098413442, "grad_norm": 181.7730712890625, "learning_rate": 7.2789674188993096e-06, "loss": 20.6322, "step": 207210 }, { "epoch": 0.41859751047402805, "grad_norm": 3.0093138217926025, "learning_rate": 7.278656714873707e-06, "loss": 18.2728, "step": 207220 }, { "epoch": 0.41861771110671187, "grad_norm": 252.72630310058594, "learning_rate": 7.2783459997422075e-06, "loss": 23.6404, "step": 207230 }, { "epoch": 0.4186379117393957, "grad_norm": 186.35110473632812, "learning_rate": 7.278035273506327e-06, "loss": 25.2977, "step": 207240 }, { "epoch": 0.4186581123720795, "grad_norm": 399.3916931152344, "learning_rate": 7.2777245361675786e-06, "loss": 25.8033, "step": 207250 }, { "epoch": 0.4186783130047633, "grad_norm": 137.1000518798828, "learning_rate": 7.277413787727478e-06, "loss": 21.4045, "step": 207260 }, { "epoch": 0.41869851363744715, "grad_norm": 433.19085693359375, "learning_rate": 7.277103028187536e-06, "loss": 32.8902, "step": 207270 }, { "epoch": 0.41871871427013097, "grad_norm": 558.540283203125, "learning_rate": 7.276792257549273e-06, "loss": 13.3337, "step": 207280 }, { "epoch": 0.41873891490281473, "grad_norm": 143.8072509765625, "learning_rate": 7.276481475814199e-06, "loss": 24.6033, "step": 207290 }, { "epoch": 0.41875911553549855, "grad_norm": 389.7957458496094, "learning_rate": 7.27617068298383e-06, "loss": 23.4558, "step": 207300 }, { "epoch": 0.41877931616818237, "grad_norm": 688.309814453125, "learning_rate": 7.2758598790596836e-06, "loss": 29.055, "step": 207310 }, { "epoch": 0.4187995168008662, "grad_norm": 129.86865234375, "learning_rate": 7.275549064043269e-06, "loss": 28.291, "step": 207320 }, { "epoch": 0.41881971743355, "grad_norm": 303.3720703125, "learning_rate": 7.275238237936106e-06, "loss": 19.5779, "step": 207330 }, { "epoch": 0.41883991806623383, "grad_norm": 243.81149291992188, "learning_rate": 7.274927400739708e-06, "loss": 24.6308, "step": 207340 }, { "epoch": 0.41886011869891765, "grad_norm": 220.48731994628906, "learning_rate": 7.274616552455589e-06, "loss": 22.2188, "step": 207350 }, { "epoch": 0.41888031933160147, "grad_norm": 158.1676788330078, "learning_rate": 7.274305693085266e-06, "loss": 10.5716, "step": 207360 }, { "epoch": 0.4189005199642853, "grad_norm": 518.6220703125, "learning_rate": 7.273994822630251e-06, "loss": 18.9787, "step": 207370 }, { "epoch": 0.4189207205969691, "grad_norm": 474.80657958984375, "learning_rate": 7.273683941092063e-06, "loss": 21.6741, "step": 207380 }, { "epoch": 0.41894092122965293, "grad_norm": 349.7982482910156, "learning_rate": 7.273373048472214e-06, "loss": 19.4303, "step": 207390 }, { "epoch": 0.41896112186233675, "grad_norm": 370.5306701660156, "learning_rate": 7.27306214477222e-06, "loss": 23.7624, "step": 207400 }, { "epoch": 0.41898132249502057, "grad_norm": 197.60379028320312, "learning_rate": 7.272751229993598e-06, "loss": 12.3, "step": 207410 }, { "epoch": 0.41900152312770433, "grad_norm": 462.4417724609375, "learning_rate": 7.272440304137862e-06, "loss": 26.4219, "step": 207420 }, { "epoch": 0.41902172376038815, "grad_norm": 119.20293426513672, "learning_rate": 7.2721293672065275e-06, "loss": 35.3381, "step": 207430 }, { "epoch": 0.419041924393072, "grad_norm": 518.6134643554688, "learning_rate": 7.27181841920111e-06, "loss": 19.8565, "step": 207440 }, { "epoch": 0.4190621250257558, "grad_norm": 220.7142333984375, "learning_rate": 7.271507460123124e-06, "loss": 13.2324, "step": 207450 }, { "epoch": 0.4190823256584396, "grad_norm": 311.3455505371094, "learning_rate": 7.271196489974087e-06, "loss": 25.9017, "step": 207460 }, { "epoch": 0.41910252629112343, "grad_norm": 181.4228057861328, "learning_rate": 7.270885508755515e-06, "loss": 22.2446, "step": 207470 }, { "epoch": 0.41912272692380725, "grad_norm": 322.5124816894531, "learning_rate": 7.27057451646892e-06, "loss": 13.1965, "step": 207480 }, { "epoch": 0.4191429275564911, "grad_norm": 197.50765991210938, "learning_rate": 7.270263513115823e-06, "loss": 9.8446, "step": 207490 }, { "epoch": 0.4191631281891749, "grad_norm": 177.5516815185547, "learning_rate": 7.269952498697734e-06, "loss": 9.4513, "step": 207500 }, { "epoch": 0.4191833288218587, "grad_norm": 278.1107482910156, "learning_rate": 7.269641473216174e-06, "loss": 20.766, "step": 207510 }, { "epoch": 0.41920352945454253, "grad_norm": 183.46688842773438, "learning_rate": 7.269330436672656e-06, "loss": 15.0118, "step": 207520 }, { "epoch": 0.41922373008722635, "grad_norm": 202.0924530029297, "learning_rate": 7.269019389068697e-06, "loss": 23.0774, "step": 207530 }, { "epoch": 0.4192439307199102, "grad_norm": 429.0045471191406, "learning_rate": 7.2687083304058125e-06, "loss": 30.0506, "step": 207540 }, { "epoch": 0.41926413135259394, "grad_norm": 59.426509857177734, "learning_rate": 7.268397260685518e-06, "loss": 20.5187, "step": 207550 }, { "epoch": 0.41928433198527776, "grad_norm": 135.19903564453125, "learning_rate": 7.268086179909331e-06, "loss": 17.1043, "step": 207560 }, { "epoch": 0.4193045326179616, "grad_norm": 562.0259399414062, "learning_rate": 7.267775088078768e-06, "loss": 22.161, "step": 207570 }, { "epoch": 0.4193247332506454, "grad_norm": 214.99696350097656, "learning_rate": 7.267463985195343e-06, "loss": 31.2766, "step": 207580 }, { "epoch": 0.4193449338833292, "grad_norm": 366.89410400390625, "learning_rate": 7.267152871260573e-06, "loss": 20.4169, "step": 207590 }, { "epoch": 0.41936513451601304, "grad_norm": 192.9465789794922, "learning_rate": 7.266841746275977e-06, "loss": 20.8616, "step": 207600 }, { "epoch": 0.41938533514869686, "grad_norm": 242.76910400390625, "learning_rate": 7.266530610243068e-06, "loss": 13.643, "step": 207610 }, { "epoch": 0.4194055357813807, "grad_norm": 56.5727424621582, "learning_rate": 7.266219463163363e-06, "loss": 23.8141, "step": 207620 }, { "epoch": 0.4194257364140645, "grad_norm": 5.056003570556641, "learning_rate": 7.265908305038381e-06, "loss": 13.4956, "step": 207630 }, { "epoch": 0.4194459370467483, "grad_norm": 175.9929962158203, "learning_rate": 7.265597135869635e-06, "loss": 13.3963, "step": 207640 }, { "epoch": 0.41946613767943214, "grad_norm": 0.0, "learning_rate": 7.265285955658645e-06, "loss": 16.8123, "step": 207650 }, { "epoch": 0.41948633831211596, "grad_norm": 347.4056396484375, "learning_rate": 7.264974764406924e-06, "loss": 10.6286, "step": 207660 }, { "epoch": 0.4195065389447998, "grad_norm": 544.3998413085938, "learning_rate": 7.2646635621159925e-06, "loss": 17.3291, "step": 207670 }, { "epoch": 0.41952673957748354, "grad_norm": 429.03985595703125, "learning_rate": 7.264352348787364e-06, "loss": 17.9437, "step": 207680 }, { "epoch": 0.41954694021016736, "grad_norm": 382.8370666503906, "learning_rate": 7.2640411244225576e-06, "loss": 14.7673, "step": 207690 }, { "epoch": 0.4195671408428512, "grad_norm": 214.17189025878906, "learning_rate": 7.26372988902309e-06, "loss": 30.3906, "step": 207700 }, { "epoch": 0.419587341475535, "grad_norm": 325.3562316894531, "learning_rate": 7.263418642590476e-06, "loss": 14.1428, "step": 207710 }, { "epoch": 0.4196075421082188, "grad_norm": 247.222412109375, "learning_rate": 7.263107385126236e-06, "loss": 15.0379, "step": 207720 }, { "epoch": 0.41962774274090264, "grad_norm": 548.447509765625, "learning_rate": 7.262796116631882e-06, "loss": 15.2464, "step": 207730 }, { "epoch": 0.41964794337358646, "grad_norm": 235.99559020996094, "learning_rate": 7.262484837108937e-06, "loss": 33.5099, "step": 207740 }, { "epoch": 0.4196681440062703, "grad_norm": 163.82022094726562, "learning_rate": 7.262173546558914e-06, "loss": 17.3102, "step": 207750 }, { "epoch": 0.4196883446389541, "grad_norm": 252.9697723388672, "learning_rate": 7.261862244983333e-06, "loss": 20.8391, "step": 207760 }, { "epoch": 0.4197085452716379, "grad_norm": 132.9105682373047, "learning_rate": 7.261550932383707e-06, "loss": 6.3647, "step": 207770 }, { "epoch": 0.41972874590432174, "grad_norm": 114.45140838623047, "learning_rate": 7.2612396087615586e-06, "loss": 18.495, "step": 207780 }, { "epoch": 0.41974894653700556, "grad_norm": 209.42050170898438, "learning_rate": 7.260928274118402e-06, "loss": 28.5052, "step": 207790 }, { "epoch": 0.4197691471696893, "grad_norm": 485.1256103515625, "learning_rate": 7.260616928455754e-06, "loss": 18.97, "step": 207800 }, { "epoch": 0.41978934780237315, "grad_norm": 375.1117858886719, "learning_rate": 7.260305571775135e-06, "loss": 29.413, "step": 207810 }, { "epoch": 0.41980954843505697, "grad_norm": 238.83375549316406, "learning_rate": 7.2599942040780605e-06, "loss": 20.7452, "step": 207820 }, { "epoch": 0.4198297490677408, "grad_norm": 601.5579223632812, "learning_rate": 7.259682825366047e-06, "loss": 16.0197, "step": 207830 }, { "epoch": 0.4198499497004246, "grad_norm": 406.24560546875, "learning_rate": 7.2593714356406146e-06, "loss": 23.8918, "step": 207840 }, { "epoch": 0.4198701503331084, "grad_norm": 249.09622192382812, "learning_rate": 7.259060034903278e-06, "loss": 17.1813, "step": 207850 }, { "epoch": 0.41989035096579225, "grad_norm": 79.64942932128906, "learning_rate": 7.258748623155558e-06, "loss": 14.6886, "step": 207860 }, { "epoch": 0.41991055159847607, "grad_norm": 103.80247497558594, "learning_rate": 7.258437200398974e-06, "loss": 26.8912, "step": 207870 }, { "epoch": 0.4199307522311599, "grad_norm": 704.4451904296875, "learning_rate": 7.258125766635038e-06, "loss": 20.7482, "step": 207880 }, { "epoch": 0.4199509528638437, "grad_norm": 113.96632385253906, "learning_rate": 7.257814321865271e-06, "loss": 12.6618, "step": 207890 }, { "epoch": 0.4199711534965275, "grad_norm": 248.24281311035156, "learning_rate": 7.257502866091192e-06, "loss": 22.3103, "step": 207900 }, { "epoch": 0.41999135412921135, "grad_norm": 480.1746826171875, "learning_rate": 7.257191399314315e-06, "loss": 14.8609, "step": 207910 }, { "epoch": 0.42001155476189517, "grad_norm": 24.040891647338867, "learning_rate": 7.256879921536164e-06, "loss": 23.866, "step": 207920 }, { "epoch": 0.42003175539457893, "grad_norm": 179.66920471191406, "learning_rate": 7.256568432758252e-06, "loss": 15.963, "step": 207930 }, { "epoch": 0.42005195602726275, "grad_norm": 465.275390625, "learning_rate": 7.256256932982101e-06, "loss": 19.6779, "step": 207940 }, { "epoch": 0.42007215665994657, "grad_norm": 65.21052551269531, "learning_rate": 7.2559454222092265e-06, "loss": 17.0778, "step": 207950 }, { "epoch": 0.4200923572926304, "grad_norm": 323.18841552734375, "learning_rate": 7.255633900441147e-06, "loss": 25.4911, "step": 207960 }, { "epoch": 0.4201125579253142, "grad_norm": 0.0, "learning_rate": 7.255322367679382e-06, "loss": 13.4181, "step": 207970 }, { "epoch": 0.42013275855799803, "grad_norm": 126.66571807861328, "learning_rate": 7.255010823925448e-06, "loss": 24.602, "step": 207980 }, { "epoch": 0.42015295919068185, "grad_norm": 326.78173828125, "learning_rate": 7.254699269180867e-06, "loss": 17.0724, "step": 207990 }, { "epoch": 0.42017315982336567, "grad_norm": 351.76123046875, "learning_rate": 7.254387703447154e-06, "loss": 20.4942, "step": 208000 }, { "epoch": 0.4201933604560495, "grad_norm": 604.0069580078125, "learning_rate": 7.25407612672583e-06, "loss": 13.7497, "step": 208010 }, { "epoch": 0.4202135610887333, "grad_norm": 183.9239044189453, "learning_rate": 7.253764539018411e-06, "loss": 21.2994, "step": 208020 }, { "epoch": 0.42023376172141713, "grad_norm": 31.0350341796875, "learning_rate": 7.253452940326418e-06, "loss": 18.8755, "step": 208030 }, { "epoch": 0.42025396235410095, "grad_norm": 467.78173828125, "learning_rate": 7.253141330651367e-06, "loss": 20.2604, "step": 208040 }, { "epoch": 0.42027416298678477, "grad_norm": 144.03231811523438, "learning_rate": 7.2528297099947796e-06, "loss": 19.7643, "step": 208050 }, { "epoch": 0.42029436361946854, "grad_norm": 291.4289245605469, "learning_rate": 7.252518078358173e-06, "loss": 29.1008, "step": 208060 }, { "epoch": 0.42031456425215236, "grad_norm": 405.0200500488281, "learning_rate": 7.252206435743067e-06, "loss": 20.147, "step": 208070 }, { "epoch": 0.4203347648848362, "grad_norm": 173.55471801757812, "learning_rate": 7.251894782150981e-06, "loss": 11.2244, "step": 208080 }, { "epoch": 0.42035496551752, "grad_norm": 275.7857666015625, "learning_rate": 7.251583117583429e-06, "loss": 17.153, "step": 208090 }, { "epoch": 0.4203751661502038, "grad_norm": 400.6690979003906, "learning_rate": 7.251271442041938e-06, "loss": 31.9011, "step": 208100 }, { "epoch": 0.42039536678288764, "grad_norm": 389.43804931640625, "learning_rate": 7.250959755528022e-06, "loss": 17.362, "step": 208110 }, { "epoch": 0.42041556741557146, "grad_norm": 646.1773071289062, "learning_rate": 7.2506480580432005e-06, "loss": 23.0935, "step": 208120 }, { "epoch": 0.4204357680482553, "grad_norm": 765.6721801757812, "learning_rate": 7.250336349588995e-06, "loss": 28.1013, "step": 208130 }, { "epoch": 0.4204559686809391, "grad_norm": 34.49343490600586, "learning_rate": 7.250024630166921e-06, "loss": 25.5272, "step": 208140 }, { "epoch": 0.4204761693136229, "grad_norm": 361.8033752441406, "learning_rate": 7.2497128997785e-06, "loss": 11.5341, "step": 208150 }, { "epoch": 0.42049636994630674, "grad_norm": 188.29981994628906, "learning_rate": 7.249401158425252e-06, "loss": 20.4569, "step": 208160 }, { "epoch": 0.42051657057899056, "grad_norm": 230.90499877929688, "learning_rate": 7.249089406108696e-06, "loss": 11.9958, "step": 208170 }, { "epoch": 0.4205367712116744, "grad_norm": 197.0443572998047, "learning_rate": 7.248777642830351e-06, "loss": 13.2978, "step": 208180 }, { "epoch": 0.42055697184435814, "grad_norm": 314.84832763671875, "learning_rate": 7.248465868591735e-06, "loss": 20.0842, "step": 208190 }, { "epoch": 0.42057717247704196, "grad_norm": 82.34967041015625, "learning_rate": 7.24815408339437e-06, "loss": 8.472, "step": 208200 }, { "epoch": 0.4205973731097258, "grad_norm": 260.7162170410156, "learning_rate": 7.247842287239775e-06, "loss": 16.221, "step": 208210 }, { "epoch": 0.4206175737424096, "grad_norm": 51.487396240234375, "learning_rate": 7.247530480129469e-06, "loss": 15.7676, "step": 208220 }, { "epoch": 0.4206377743750934, "grad_norm": 361.9606628417969, "learning_rate": 7.247218662064972e-06, "loss": 29.6978, "step": 208230 }, { "epoch": 0.42065797500777724, "grad_norm": 372.94659423828125, "learning_rate": 7.2469068330478046e-06, "loss": 14.128, "step": 208240 }, { "epoch": 0.42067817564046106, "grad_norm": 159.37596130371094, "learning_rate": 7.246594993079483e-06, "loss": 17.6873, "step": 208250 }, { "epoch": 0.4206983762731449, "grad_norm": 26.09391975402832, "learning_rate": 7.246283142161533e-06, "loss": 10.4402, "step": 208260 }, { "epoch": 0.4207185769058287, "grad_norm": 21.074125289916992, "learning_rate": 7.245971280295469e-06, "loss": 22.6484, "step": 208270 }, { "epoch": 0.4207387775385125, "grad_norm": 138.3171844482422, "learning_rate": 7.245659407482815e-06, "loss": 16.4843, "step": 208280 }, { "epoch": 0.42075897817119634, "grad_norm": 149.5893096923828, "learning_rate": 7.2453475237250895e-06, "loss": 19.4389, "step": 208290 }, { "epoch": 0.42077917880388016, "grad_norm": 596.9138793945312, "learning_rate": 7.245035629023812e-06, "loss": 22.865, "step": 208300 }, { "epoch": 0.420799379436564, "grad_norm": 118.26265716552734, "learning_rate": 7.244723723380504e-06, "loss": 25.8048, "step": 208310 }, { "epoch": 0.42081958006924775, "grad_norm": 184.2266845703125, "learning_rate": 7.244411806796684e-06, "loss": 17.4876, "step": 208320 }, { "epoch": 0.42083978070193157, "grad_norm": 308.9013366699219, "learning_rate": 7.244099879273873e-06, "loss": 28.3907, "step": 208330 }, { "epoch": 0.4208599813346154, "grad_norm": 106.57027435302734, "learning_rate": 7.243787940813591e-06, "loss": 22.3949, "step": 208340 }, { "epoch": 0.4208801819672992, "grad_norm": 89.88408660888672, "learning_rate": 7.24347599141736e-06, "loss": 18.5721, "step": 208350 }, { "epoch": 0.420900382599983, "grad_norm": 285.4138488769531, "learning_rate": 7.243164031086697e-06, "loss": 29.1839, "step": 208360 }, { "epoch": 0.42092058323266685, "grad_norm": 199.2744903564453, "learning_rate": 7.242852059823127e-06, "loss": 14.3809, "step": 208370 }, { "epoch": 0.42094078386535067, "grad_norm": 116.57947540283203, "learning_rate": 7.2425400776281665e-06, "loss": 15.584, "step": 208380 }, { "epoch": 0.4209609844980345, "grad_norm": 493.33953857421875, "learning_rate": 7.242228084503338e-06, "loss": 13.0162, "step": 208390 }, { "epoch": 0.4209811851307183, "grad_norm": 358.26507568359375, "learning_rate": 7.241916080450163e-06, "loss": 10.4708, "step": 208400 }, { "epoch": 0.4210013857634021, "grad_norm": 395.22869873046875, "learning_rate": 7.241604065470158e-06, "loss": 23.2537, "step": 208410 }, { "epoch": 0.42102158639608595, "grad_norm": 98.81809997558594, "learning_rate": 7.24129203956485e-06, "loss": 15.9572, "step": 208420 }, { "epoch": 0.42104178702876977, "grad_norm": 268.452392578125, "learning_rate": 7.240980002735754e-06, "loss": 10.7321, "step": 208430 }, { "epoch": 0.42106198766145353, "grad_norm": 446.9203186035156, "learning_rate": 7.240667954984395e-06, "loss": 24.6626, "step": 208440 }, { "epoch": 0.42108218829413735, "grad_norm": 542.419189453125, "learning_rate": 7.24035589631229e-06, "loss": 22.4491, "step": 208450 }, { "epoch": 0.42110238892682117, "grad_norm": 250.5294647216797, "learning_rate": 7.240043826720964e-06, "loss": 26.6069, "step": 208460 }, { "epoch": 0.421122589559505, "grad_norm": 1683.912353515625, "learning_rate": 7.239731746211936e-06, "loss": 27.9319, "step": 208470 }, { "epoch": 0.4211427901921888, "grad_norm": 46.853206634521484, "learning_rate": 7.239419654786727e-06, "loss": 10.3063, "step": 208480 }, { "epoch": 0.42116299082487263, "grad_norm": 388.7464904785156, "learning_rate": 7.239107552446858e-06, "loss": 23.5464, "step": 208490 }, { "epoch": 0.42118319145755645, "grad_norm": 181.59056091308594, "learning_rate": 7.238795439193849e-06, "loss": 12.7317, "step": 208500 }, { "epoch": 0.42120339209024027, "grad_norm": 180.44216918945312, "learning_rate": 7.2384833150292234e-06, "loss": 24.1294, "step": 208510 }, { "epoch": 0.4212235927229241, "grad_norm": 265.2977600097656, "learning_rate": 7.238171179954502e-06, "loss": 21.3883, "step": 208520 }, { "epoch": 0.4212437933556079, "grad_norm": 463.5624084472656, "learning_rate": 7.237859033971206e-06, "loss": 16.7439, "step": 208530 }, { "epoch": 0.42126399398829173, "grad_norm": 678.9560546875, "learning_rate": 7.2375468770808555e-06, "loss": 32.5567, "step": 208540 }, { "epoch": 0.42128419462097555, "grad_norm": 391.43048095703125, "learning_rate": 7.2372347092849744e-06, "loss": 17.2615, "step": 208550 }, { "epoch": 0.42130439525365937, "grad_norm": 164.27000427246094, "learning_rate": 7.236922530585082e-06, "loss": 18.1402, "step": 208560 }, { "epoch": 0.42132459588634313, "grad_norm": 185.44174194335938, "learning_rate": 7.236610340982699e-06, "loss": 14.8023, "step": 208570 }, { "epoch": 0.42134479651902695, "grad_norm": 184.92828369140625, "learning_rate": 7.236298140479352e-06, "loss": 26.6432, "step": 208580 }, { "epoch": 0.4213649971517108, "grad_norm": 80.50387573242188, "learning_rate": 7.235985929076556e-06, "loss": 24.4503, "step": 208590 }, { "epoch": 0.4213851977843946, "grad_norm": 366.666259765625, "learning_rate": 7.235673706775837e-06, "loss": 17.2325, "step": 208600 }, { "epoch": 0.4214053984170784, "grad_norm": 105.18376159667969, "learning_rate": 7.235361473578715e-06, "loss": 13.7788, "step": 208610 }, { "epoch": 0.42142559904976223, "grad_norm": 119.50759887695312, "learning_rate": 7.235049229486713e-06, "loss": 24.8205, "step": 208620 }, { "epoch": 0.42144579968244605, "grad_norm": 303.9819641113281, "learning_rate": 7.23473697450135e-06, "loss": 13.606, "step": 208630 }, { "epoch": 0.4214660003151299, "grad_norm": 259.4858703613281, "learning_rate": 7.234424708624152e-06, "loss": 22.5136, "step": 208640 }, { "epoch": 0.4214862009478137, "grad_norm": 406.2663879394531, "learning_rate": 7.234112431856639e-06, "loss": 12.5032, "step": 208650 }, { "epoch": 0.4215064015804975, "grad_norm": 130.26148986816406, "learning_rate": 7.233800144200332e-06, "loss": 6.6625, "step": 208660 }, { "epoch": 0.42152660221318133, "grad_norm": 178.67352294921875, "learning_rate": 7.233487845656755e-06, "loss": 21.6145, "step": 208670 }, { "epoch": 0.42154680284586515, "grad_norm": 176.14096069335938, "learning_rate": 7.233175536227428e-06, "loss": 14.7197, "step": 208680 }, { "epoch": 0.421567003478549, "grad_norm": 230.51040649414062, "learning_rate": 7.2328632159138764e-06, "loss": 17.2008, "step": 208690 }, { "epoch": 0.42158720411123274, "grad_norm": 413.6365661621094, "learning_rate": 7.2325508847176175e-06, "loss": 21.817, "step": 208700 }, { "epoch": 0.42160740474391656, "grad_norm": 148.48524475097656, "learning_rate": 7.232238542640178e-06, "loss": 8.2442, "step": 208710 }, { "epoch": 0.4216276053766004, "grad_norm": 191.02044677734375, "learning_rate": 7.23192618968308e-06, "loss": 26.1476, "step": 208720 }, { "epoch": 0.4216478060092842, "grad_norm": 228.0900115966797, "learning_rate": 7.231613825847842e-06, "loss": 21.2992, "step": 208730 }, { "epoch": 0.421668006641968, "grad_norm": 164.08668518066406, "learning_rate": 7.23130145113599e-06, "loss": 15.5707, "step": 208740 }, { "epoch": 0.42168820727465184, "grad_norm": 158.39845275878906, "learning_rate": 7.2309890655490446e-06, "loss": 14.2396, "step": 208750 }, { "epoch": 0.42170840790733566, "grad_norm": 256.40460205078125, "learning_rate": 7.23067666908853e-06, "loss": 15.624, "step": 208760 }, { "epoch": 0.4217286085400195, "grad_norm": 92.12860107421875, "learning_rate": 7.230364261755967e-06, "loss": 17.7092, "step": 208770 }, { "epoch": 0.4217488091727033, "grad_norm": 399.0726623535156, "learning_rate": 7.230051843552879e-06, "loss": 14.366, "step": 208780 }, { "epoch": 0.4217690098053871, "grad_norm": 276.1424255371094, "learning_rate": 7.22973941448079e-06, "loss": 13.3954, "step": 208790 }, { "epoch": 0.42178921043807094, "grad_norm": 274.2366638183594, "learning_rate": 7.2294269745412214e-06, "loss": 15.4772, "step": 208800 }, { "epoch": 0.42180941107075476, "grad_norm": 0.0, "learning_rate": 7.229114523735695e-06, "loss": 22.0458, "step": 208810 }, { "epoch": 0.4218296117034386, "grad_norm": 476.23968505859375, "learning_rate": 7.228802062065735e-06, "loss": 34.0573, "step": 208820 }, { "epoch": 0.42184981233612234, "grad_norm": 201.3818817138672, "learning_rate": 7.228489589532865e-06, "loss": 14.6082, "step": 208830 }, { "epoch": 0.42187001296880616, "grad_norm": 197.51583862304688, "learning_rate": 7.228177106138605e-06, "loss": 16.8244, "step": 208840 }, { "epoch": 0.42189021360149, "grad_norm": 971.824462890625, "learning_rate": 7.227864611884483e-06, "loss": 28.4531, "step": 208850 }, { "epoch": 0.4219104142341738, "grad_norm": 321.6971740722656, "learning_rate": 7.227552106772015e-06, "loss": 6.8873, "step": 208860 }, { "epoch": 0.4219306148668576, "grad_norm": 317.76702880859375, "learning_rate": 7.227239590802733e-06, "loss": 12.1487, "step": 208870 }, { "epoch": 0.42195081549954144, "grad_norm": 140.68832397460938, "learning_rate": 7.226927063978153e-06, "loss": 19.1866, "step": 208880 }, { "epoch": 0.42197101613222526, "grad_norm": 449.8706970214844, "learning_rate": 7.2266145262998e-06, "loss": 32.2991, "step": 208890 }, { "epoch": 0.4219912167649091, "grad_norm": 482.41888427734375, "learning_rate": 7.226301977769199e-06, "loss": 17.0586, "step": 208900 }, { "epoch": 0.4220114173975929, "grad_norm": 356.9971923828125, "learning_rate": 7.225989418387871e-06, "loss": 13.1084, "step": 208910 }, { "epoch": 0.4220316180302767, "grad_norm": 172.78213500976562, "learning_rate": 7.2256768481573414e-06, "loss": 8.3046, "step": 208920 }, { "epoch": 0.42205181866296054, "grad_norm": 287.916748046875, "learning_rate": 7.225364267079134e-06, "loss": 23.398, "step": 208930 }, { "epoch": 0.42207201929564436, "grad_norm": 92.01688385009766, "learning_rate": 7.225051675154768e-06, "loss": 23.6847, "step": 208940 }, { "epoch": 0.4220922199283282, "grad_norm": 324.4417419433594, "learning_rate": 7.224739072385773e-06, "loss": 14.5546, "step": 208950 }, { "epoch": 0.42211242056101195, "grad_norm": 411.3382568359375, "learning_rate": 7.224426458773668e-06, "loss": 47.4799, "step": 208960 }, { "epoch": 0.42213262119369577, "grad_norm": 294.321044921875, "learning_rate": 7.224113834319978e-06, "loss": 18.6417, "step": 208970 }, { "epoch": 0.4221528218263796, "grad_norm": 630.4127807617188, "learning_rate": 7.223801199026228e-06, "loss": 18.5683, "step": 208980 }, { "epoch": 0.4221730224590634, "grad_norm": 136.45762634277344, "learning_rate": 7.22348855289394e-06, "loss": 22.9135, "step": 208990 }, { "epoch": 0.4221932230917472, "grad_norm": 189.09071350097656, "learning_rate": 7.223175895924638e-06, "loss": 19.0446, "step": 209000 }, { "epoch": 0.42221342372443105, "grad_norm": 35.44863510131836, "learning_rate": 7.2228632281198475e-06, "loss": 23.6993, "step": 209010 }, { "epoch": 0.42223362435711487, "grad_norm": 310.8429870605469, "learning_rate": 7.22255054948109e-06, "loss": 12.9231, "step": 209020 }, { "epoch": 0.4222538249897987, "grad_norm": 124.09288024902344, "learning_rate": 7.222237860009892e-06, "loss": 14.2788, "step": 209030 }, { "epoch": 0.4222740256224825, "grad_norm": 103.65320587158203, "learning_rate": 7.2219251597077745e-06, "loss": 19.6619, "step": 209040 }, { "epoch": 0.4222942262551663, "grad_norm": 103.98101806640625, "learning_rate": 7.221612448576265e-06, "loss": 14.8514, "step": 209050 }, { "epoch": 0.42231442688785015, "grad_norm": 257.23223876953125, "learning_rate": 7.221299726616885e-06, "loss": 19.6836, "step": 209060 }, { "epoch": 0.42233462752053397, "grad_norm": 329.54620361328125, "learning_rate": 7.220986993831159e-06, "loss": 20.4935, "step": 209070 }, { "epoch": 0.42235482815321773, "grad_norm": 177.7585906982422, "learning_rate": 7.220674250220614e-06, "loss": 12.3472, "step": 209080 }, { "epoch": 0.42237502878590155, "grad_norm": 371.9766845703125, "learning_rate": 7.220361495786769e-06, "loss": 27.9731, "step": 209090 }, { "epoch": 0.42239522941858537, "grad_norm": 337.5389099121094, "learning_rate": 7.220048730531154e-06, "loss": 10.8747, "step": 209100 }, { "epoch": 0.4224154300512692, "grad_norm": 95.08724212646484, "learning_rate": 7.219735954455289e-06, "loss": 23.0402, "step": 209110 }, { "epoch": 0.422435630683953, "grad_norm": 230.83555603027344, "learning_rate": 7.219423167560701e-06, "loss": 22.7921, "step": 209120 }, { "epoch": 0.42245583131663683, "grad_norm": 130.54432678222656, "learning_rate": 7.219110369848913e-06, "loss": 15.1396, "step": 209130 }, { "epoch": 0.42247603194932065, "grad_norm": 8.229086875915527, "learning_rate": 7.218797561321451e-06, "loss": 18.0505, "step": 209140 }, { "epoch": 0.42249623258200447, "grad_norm": 232.97003173828125, "learning_rate": 7.2184847419798384e-06, "loss": 17.3058, "step": 209150 }, { "epoch": 0.4225164332146883, "grad_norm": 341.20611572265625, "learning_rate": 7.2181719118256e-06, "loss": 13.4055, "step": 209160 }, { "epoch": 0.4225366338473721, "grad_norm": 370.28790283203125, "learning_rate": 7.217859070860261e-06, "loss": 24.0835, "step": 209170 }, { "epoch": 0.42255683448005593, "grad_norm": 174.5486602783203, "learning_rate": 7.217546219085346e-06, "loss": 14.3117, "step": 209180 }, { "epoch": 0.42257703511273975, "grad_norm": 335.2047119140625, "learning_rate": 7.21723335650238e-06, "loss": 22.7938, "step": 209190 }, { "epoch": 0.42259723574542357, "grad_norm": 112.47090911865234, "learning_rate": 7.216920483112886e-06, "loss": 17.0549, "step": 209200 }, { "epoch": 0.42261743637810734, "grad_norm": 367.8523864746094, "learning_rate": 7.216607598918392e-06, "loss": 21.4397, "step": 209210 }, { "epoch": 0.42263763701079116, "grad_norm": 85.30694580078125, "learning_rate": 7.216294703920421e-06, "loss": 23.8722, "step": 209220 }, { "epoch": 0.422657837643475, "grad_norm": 169.58953857421875, "learning_rate": 7.215981798120499e-06, "loss": 14.9267, "step": 209230 }, { "epoch": 0.4226780382761588, "grad_norm": 288.9798583984375, "learning_rate": 7.215668881520149e-06, "loss": 15.4207, "step": 209240 }, { "epoch": 0.4226982389088426, "grad_norm": 0.0, "learning_rate": 7.215355954120899e-06, "loss": 23.0729, "step": 209250 }, { "epoch": 0.42271843954152644, "grad_norm": 285.6874084472656, "learning_rate": 7.2150430159242724e-06, "loss": 23.5276, "step": 209260 }, { "epoch": 0.42273864017421026, "grad_norm": 266.3119812011719, "learning_rate": 7.214730066931794e-06, "loss": 15.5106, "step": 209270 }, { "epoch": 0.4227588408068941, "grad_norm": 465.8023376464844, "learning_rate": 7.214417107144991e-06, "loss": 20.6717, "step": 209280 }, { "epoch": 0.4227790414395779, "grad_norm": 198.12197875976562, "learning_rate": 7.2141041365653876e-06, "loss": 22.6535, "step": 209290 }, { "epoch": 0.4227992420722617, "grad_norm": 374.4003601074219, "learning_rate": 7.21379115519451e-06, "loss": 30.1927, "step": 209300 }, { "epoch": 0.42281944270494554, "grad_norm": 369.00799560546875, "learning_rate": 7.213478163033879e-06, "loss": 9.964, "step": 209310 }, { "epoch": 0.42283964333762936, "grad_norm": 124.21248626708984, "learning_rate": 7.213165160085027e-06, "loss": 19.2604, "step": 209320 }, { "epoch": 0.4228598439703132, "grad_norm": 204.09532165527344, "learning_rate": 7.212852146349476e-06, "loss": 10.7469, "step": 209330 }, { "epoch": 0.42288004460299694, "grad_norm": 187.04380798339844, "learning_rate": 7.212539121828752e-06, "loss": 23.6163, "step": 209340 }, { "epoch": 0.42290024523568076, "grad_norm": 257.26043701171875, "learning_rate": 7.212226086524381e-06, "loss": 15.1203, "step": 209350 }, { "epoch": 0.4229204458683646, "grad_norm": 458.1248474121094, "learning_rate": 7.211913040437887e-06, "loss": 30.244, "step": 209360 }, { "epoch": 0.4229406465010484, "grad_norm": 57.24076843261719, "learning_rate": 7.211599983570799e-06, "loss": 12.094, "step": 209370 }, { "epoch": 0.4229608471337322, "grad_norm": 32.499908447265625, "learning_rate": 7.211286915924639e-06, "loss": 26.3457, "step": 209380 }, { "epoch": 0.42298104776641604, "grad_norm": 554.9761962890625, "learning_rate": 7.210973837500936e-06, "loss": 21.282, "step": 209390 }, { "epoch": 0.42300124839909986, "grad_norm": 485.9480895996094, "learning_rate": 7.210660748301214e-06, "loss": 19.3023, "step": 209400 }, { "epoch": 0.4230214490317837, "grad_norm": 389.592041015625, "learning_rate": 7.210347648327001e-06, "loss": 21.9284, "step": 209410 }, { "epoch": 0.4230416496644675, "grad_norm": 258.4899597167969, "learning_rate": 7.21003453757982e-06, "loss": 28.2935, "step": 209420 }, { "epoch": 0.4230618502971513, "grad_norm": 562.930908203125, "learning_rate": 7.209721416061199e-06, "loss": 19.4533, "step": 209430 }, { "epoch": 0.42308205092983514, "grad_norm": 256.5397644042969, "learning_rate": 7.209408283772664e-06, "loss": 17.2897, "step": 209440 }, { "epoch": 0.42310225156251896, "grad_norm": 355.2579040527344, "learning_rate": 7.209095140715742e-06, "loss": 19.1613, "step": 209450 }, { "epoch": 0.4231224521952028, "grad_norm": 383.28564453125, "learning_rate": 7.208781986891957e-06, "loss": 29.7756, "step": 209460 }, { "epoch": 0.42314265282788655, "grad_norm": 329.2330322265625, "learning_rate": 7.208468822302837e-06, "loss": 24.5137, "step": 209470 }, { "epoch": 0.42316285346057037, "grad_norm": 366.9367370605469, "learning_rate": 7.208155646949908e-06, "loss": 23.9124, "step": 209480 }, { "epoch": 0.4231830540932542, "grad_norm": 175.47128295898438, "learning_rate": 7.207842460834695e-06, "loss": 24.5936, "step": 209490 }, { "epoch": 0.423203254725938, "grad_norm": 970.8670654296875, "learning_rate": 7.207529263958727e-06, "loss": 22.305, "step": 209500 }, { "epoch": 0.4232234553586218, "grad_norm": 165.945068359375, "learning_rate": 7.2072160563235285e-06, "loss": 5.6988, "step": 209510 }, { "epoch": 0.42324365599130565, "grad_norm": 186.55113220214844, "learning_rate": 7.206902837930626e-06, "loss": 13.922, "step": 209520 }, { "epoch": 0.42326385662398947, "grad_norm": 477.7577819824219, "learning_rate": 7.206589608781549e-06, "loss": 41.1079, "step": 209530 }, { "epoch": 0.4232840572566733, "grad_norm": 145.6040496826172, "learning_rate": 7.206276368877821e-06, "loss": 17.2299, "step": 209540 }, { "epoch": 0.4233042578893571, "grad_norm": 100.45630645751953, "learning_rate": 7.2059631182209676e-06, "loss": 12.2171, "step": 209550 }, { "epoch": 0.4233244585220409, "grad_norm": 258.6007080078125, "learning_rate": 7.205649856812519e-06, "loss": 25.1818, "step": 209560 }, { "epoch": 0.42334465915472475, "grad_norm": 451.7904052734375, "learning_rate": 7.205336584653999e-06, "loss": 35.6905, "step": 209570 }, { "epoch": 0.42336485978740857, "grad_norm": 153.0631866455078, "learning_rate": 7.205023301746938e-06, "loss": 32.4119, "step": 209580 }, { "epoch": 0.42338506042009233, "grad_norm": 351.3072509765625, "learning_rate": 7.204710008092861e-06, "loss": 24.4407, "step": 209590 }, { "epoch": 0.42340526105277615, "grad_norm": 685.1107177734375, "learning_rate": 7.2043967036932935e-06, "loss": 22.6757, "step": 209600 }, { "epoch": 0.42342546168545997, "grad_norm": 192.15965270996094, "learning_rate": 7.204083388549764e-06, "loss": 19.1175, "step": 209610 }, { "epoch": 0.4234456623181438, "grad_norm": 422.2698974609375, "learning_rate": 7.203770062663801e-06, "loss": 31.9044, "step": 209620 }, { "epoch": 0.4234658629508276, "grad_norm": 415.322265625, "learning_rate": 7.203456726036927e-06, "loss": 15.6594, "step": 209630 }, { "epoch": 0.42348606358351143, "grad_norm": 93.75147247314453, "learning_rate": 7.203143378670675e-06, "loss": 20.5807, "step": 209640 }, { "epoch": 0.42350626421619525, "grad_norm": 170.8455810546875, "learning_rate": 7.202830020566567e-06, "loss": 18.1365, "step": 209650 }, { "epoch": 0.42352646484887907, "grad_norm": 152.6268768310547, "learning_rate": 7.202516651726135e-06, "loss": 29.554, "step": 209660 }, { "epoch": 0.4235466654815629, "grad_norm": 460.4654846191406, "learning_rate": 7.202203272150903e-06, "loss": 31.2123, "step": 209670 }, { "epoch": 0.4235668661142467, "grad_norm": 122.9192123413086, "learning_rate": 7.2018898818423985e-06, "loss": 15.3384, "step": 209680 }, { "epoch": 0.42358706674693053, "grad_norm": 280.1474304199219, "learning_rate": 7.201576480802151e-06, "loss": 13.6137, "step": 209690 }, { "epoch": 0.42360726737961435, "grad_norm": 171.86985778808594, "learning_rate": 7.201263069031686e-06, "loss": 21.2956, "step": 209700 }, { "epoch": 0.42362746801229817, "grad_norm": 251.3374786376953, "learning_rate": 7.200949646532532e-06, "loss": 17.6283, "step": 209710 }, { "epoch": 0.42364766864498193, "grad_norm": 497.8843994140625, "learning_rate": 7.200636213306216e-06, "loss": 24.2281, "step": 209720 }, { "epoch": 0.42366786927766575, "grad_norm": 312.8103942871094, "learning_rate": 7.200322769354267e-06, "loss": 27.9171, "step": 209730 }, { "epoch": 0.4236880699103496, "grad_norm": 156.49002075195312, "learning_rate": 7.20000931467821e-06, "loss": 14.7604, "step": 209740 }, { "epoch": 0.4237082705430334, "grad_norm": 268.81573486328125, "learning_rate": 7.199695849279576e-06, "loss": 20.9434, "step": 209750 }, { "epoch": 0.4237284711757172, "grad_norm": 227.0083465576172, "learning_rate": 7.199382373159891e-06, "loss": 12.3226, "step": 209760 }, { "epoch": 0.42374867180840103, "grad_norm": 136.42681884765625, "learning_rate": 7.1990688863206835e-06, "loss": 9.9145, "step": 209770 }, { "epoch": 0.42376887244108485, "grad_norm": 390.649169921875, "learning_rate": 7.19875538876348e-06, "loss": 30.654, "step": 209780 }, { "epoch": 0.4237890730737687, "grad_norm": 285.2930603027344, "learning_rate": 7.198441880489809e-06, "loss": 15.8566, "step": 209790 }, { "epoch": 0.4238092737064525, "grad_norm": 245.26341247558594, "learning_rate": 7.1981283615012e-06, "loss": 11.8039, "step": 209800 }, { "epoch": 0.4238294743391363, "grad_norm": 156.446533203125, "learning_rate": 7.197814831799179e-06, "loss": 14.6797, "step": 209810 }, { "epoch": 0.42384967497182013, "grad_norm": 350.9565124511719, "learning_rate": 7.197501291385276e-06, "loss": 26.2112, "step": 209820 }, { "epoch": 0.42386987560450395, "grad_norm": 177.53599548339844, "learning_rate": 7.197187740261018e-06, "loss": 17.7712, "step": 209830 }, { "epoch": 0.4238900762371878, "grad_norm": 715.6395874023438, "learning_rate": 7.196874178427933e-06, "loss": 20.0539, "step": 209840 }, { "epoch": 0.42391027686987154, "grad_norm": 23.22755241394043, "learning_rate": 7.196560605887551e-06, "loss": 16.443, "step": 209850 }, { "epoch": 0.42393047750255536, "grad_norm": 215.2098846435547, "learning_rate": 7.196247022641398e-06, "loss": 11.9472, "step": 209860 }, { "epoch": 0.4239506781352392, "grad_norm": 376.1112365722656, "learning_rate": 7.195933428691003e-06, "loss": 24.9291, "step": 209870 }, { "epoch": 0.423970878767923, "grad_norm": 402.9820861816406, "learning_rate": 7.195619824037895e-06, "loss": 20.6287, "step": 209880 }, { "epoch": 0.4239910794006068, "grad_norm": 117.93054962158203, "learning_rate": 7.195306208683602e-06, "loss": 20.4834, "step": 209890 }, { "epoch": 0.42401128003329064, "grad_norm": 432.49542236328125, "learning_rate": 7.194992582629654e-06, "loss": 29.5177, "step": 209900 }, { "epoch": 0.42403148066597446, "grad_norm": 461.4139404296875, "learning_rate": 7.194678945877578e-06, "loss": 15.0305, "step": 209910 }, { "epoch": 0.4240516812986583, "grad_norm": 33.335548400878906, "learning_rate": 7.194365298428901e-06, "loss": 21.888, "step": 209920 }, { "epoch": 0.4240718819313421, "grad_norm": 23.093856811523438, "learning_rate": 7.194051640285156e-06, "loss": 22.6835, "step": 209930 }, { "epoch": 0.4240920825640259, "grad_norm": 273.06707763671875, "learning_rate": 7.1937379714478696e-06, "loss": 13.9063, "step": 209940 }, { "epoch": 0.42411228319670974, "grad_norm": 133.49578857421875, "learning_rate": 7.19342429191857e-06, "loss": 18.4754, "step": 209950 }, { "epoch": 0.42413248382939356, "grad_norm": 384.3269958496094, "learning_rate": 7.193110601698785e-06, "loss": 33.6514, "step": 209960 }, { "epoch": 0.4241526844620774, "grad_norm": 478.7608642578125, "learning_rate": 7.192796900790046e-06, "loss": 28.551, "step": 209970 }, { "epoch": 0.42417288509476114, "grad_norm": 223.3717041015625, "learning_rate": 7.192483189193881e-06, "loss": 21.127, "step": 209980 }, { "epoch": 0.42419308572744496, "grad_norm": 112.8879165649414, "learning_rate": 7.192169466911818e-06, "loss": 10.7619, "step": 209990 }, { "epoch": 0.4242132863601288, "grad_norm": 28.1068172454834, "learning_rate": 7.191855733945388e-06, "loss": 18.6221, "step": 210000 }, { "epoch": 0.4242334869928126, "grad_norm": 237.60415649414062, "learning_rate": 7.191541990296118e-06, "loss": 26.5385, "step": 210010 }, { "epoch": 0.4242536876254964, "grad_norm": 136.3446044921875, "learning_rate": 7.191228235965539e-06, "loss": 9.8216, "step": 210020 }, { "epoch": 0.42427388825818024, "grad_norm": 245.97743225097656, "learning_rate": 7.190914470955179e-06, "loss": 13.5155, "step": 210030 }, { "epoch": 0.42429408889086406, "grad_norm": 62.12400817871094, "learning_rate": 7.190600695266567e-06, "loss": 24.7152, "step": 210040 }, { "epoch": 0.4243142895235479, "grad_norm": 157.30845642089844, "learning_rate": 7.190286908901234e-06, "loss": 14.3003, "step": 210050 }, { "epoch": 0.4243344901562317, "grad_norm": 118.48530578613281, "learning_rate": 7.189973111860708e-06, "loss": 19.7104, "step": 210060 }, { "epoch": 0.4243546907889155, "grad_norm": 287.3713073730469, "learning_rate": 7.189659304146519e-06, "loss": 11.1554, "step": 210070 }, { "epoch": 0.42437489142159934, "grad_norm": 191.65191650390625, "learning_rate": 7.189345485760194e-06, "loss": 15.8713, "step": 210080 }, { "epoch": 0.42439509205428316, "grad_norm": 0.0, "learning_rate": 7.189031656703267e-06, "loss": 9.6758, "step": 210090 }, { "epoch": 0.424415292686967, "grad_norm": 229.73046875, "learning_rate": 7.188717816977264e-06, "loss": 23.0963, "step": 210100 }, { "epoch": 0.42443549331965075, "grad_norm": 174.34913635253906, "learning_rate": 7.1884039665837165e-06, "loss": 8.5012, "step": 210110 }, { "epoch": 0.42445569395233457, "grad_norm": 33.016136169433594, "learning_rate": 7.188090105524152e-06, "loss": 27.853, "step": 210120 }, { "epoch": 0.4244758945850184, "grad_norm": 231.1636505126953, "learning_rate": 7.187776233800104e-06, "loss": 16.6867, "step": 210130 }, { "epoch": 0.4244960952177022, "grad_norm": 229.30934143066406, "learning_rate": 7.187462351413099e-06, "loss": 20.9089, "step": 210140 }, { "epoch": 0.424516295850386, "grad_norm": 215.28372192382812, "learning_rate": 7.187148458364668e-06, "loss": 8.3754, "step": 210150 }, { "epoch": 0.42453649648306985, "grad_norm": 258.3394470214844, "learning_rate": 7.18683455465634e-06, "loss": 11.1373, "step": 210160 }, { "epoch": 0.42455669711575367, "grad_norm": 231.74925231933594, "learning_rate": 7.1865206402896455e-06, "loss": 18.007, "step": 210170 }, { "epoch": 0.4245768977484375, "grad_norm": 131.7431640625, "learning_rate": 7.1862067152661155e-06, "loss": 13.8254, "step": 210180 }, { "epoch": 0.4245970983811213, "grad_norm": 304.91827392578125, "learning_rate": 7.185892779587278e-06, "loss": 18.9593, "step": 210190 }, { "epoch": 0.4246172990138051, "grad_norm": 321.6558837890625, "learning_rate": 7.185578833254665e-06, "loss": 13.4849, "step": 210200 }, { "epoch": 0.42463749964648895, "grad_norm": 233.89035034179688, "learning_rate": 7.185264876269806e-06, "loss": 21.4638, "step": 210210 }, { "epoch": 0.42465770027917277, "grad_norm": 274.2545166015625, "learning_rate": 7.18495090863423e-06, "loss": 20.0721, "step": 210220 }, { "epoch": 0.42467790091185653, "grad_norm": 588.8479614257812, "learning_rate": 7.184636930349471e-06, "loss": 25.6499, "step": 210230 }, { "epoch": 0.42469810154454035, "grad_norm": 660.7930908203125, "learning_rate": 7.184322941417052e-06, "loss": 22.416, "step": 210240 }, { "epoch": 0.42471830217722417, "grad_norm": 200.8104705810547, "learning_rate": 7.184008941838512e-06, "loss": 23.0715, "step": 210250 }, { "epoch": 0.424738502809908, "grad_norm": 19.885658264160156, "learning_rate": 7.183694931615374e-06, "loss": 7.2076, "step": 210260 }, { "epoch": 0.4247587034425918, "grad_norm": 387.86273193359375, "learning_rate": 7.183380910749176e-06, "loss": 23.5066, "step": 210270 }, { "epoch": 0.42477890407527563, "grad_norm": 380.25469970703125, "learning_rate": 7.1830668792414405e-06, "loss": 15.568, "step": 210280 }, { "epoch": 0.42479910470795945, "grad_norm": 185.95164489746094, "learning_rate": 7.182752837093704e-06, "loss": 11.4259, "step": 210290 }, { "epoch": 0.42481930534064327, "grad_norm": 126.13370513916016, "learning_rate": 7.182438784307495e-06, "loss": 11.9976, "step": 210300 }, { "epoch": 0.4248395059733271, "grad_norm": 280.63690185546875, "learning_rate": 7.182124720884342e-06, "loss": 12.342, "step": 210310 }, { "epoch": 0.4248597066060109, "grad_norm": 290.5371398925781, "learning_rate": 7.181810646825779e-06, "loss": 17.6882, "step": 210320 }, { "epoch": 0.42487990723869473, "grad_norm": 542.0457763671875, "learning_rate": 7.1814965621333355e-06, "loss": 31.2093, "step": 210330 }, { "epoch": 0.42490010787137855, "grad_norm": 218.0406494140625, "learning_rate": 7.181182466808542e-06, "loss": 19.6717, "step": 210340 }, { "epoch": 0.42492030850406237, "grad_norm": 0.0, "learning_rate": 7.18086836085293e-06, "loss": 14.7079, "step": 210350 }, { "epoch": 0.42494050913674614, "grad_norm": 74.43020629882812, "learning_rate": 7.18055424426803e-06, "loss": 14.2388, "step": 210360 }, { "epoch": 0.42496070976942996, "grad_norm": 226.8289794921875, "learning_rate": 7.180240117055372e-06, "loss": 12.7486, "step": 210370 }, { "epoch": 0.4249809104021138, "grad_norm": 255.152587890625, "learning_rate": 7.1799259792164914e-06, "loss": 17.6921, "step": 210380 }, { "epoch": 0.4250011110347976, "grad_norm": 267.7167053222656, "learning_rate": 7.179611830752914e-06, "loss": 17.3208, "step": 210390 }, { "epoch": 0.4250213116674814, "grad_norm": 17.928421020507812, "learning_rate": 7.179297671666171e-06, "loss": 9.2888, "step": 210400 }, { "epoch": 0.42504151230016524, "grad_norm": 318.06591796875, "learning_rate": 7.178983501957798e-06, "loss": 20.4739, "step": 210410 }, { "epoch": 0.42506171293284906, "grad_norm": 91.31649780273438, "learning_rate": 7.178669321629321e-06, "loss": 21.3793, "step": 210420 }, { "epoch": 0.4250819135655329, "grad_norm": 0.0, "learning_rate": 7.178355130682278e-06, "loss": 14.5328, "step": 210430 }, { "epoch": 0.4251021141982167, "grad_norm": 304.348876953125, "learning_rate": 7.178040929118193e-06, "loss": 112.1826, "step": 210440 }, { "epoch": 0.4251223148309005, "grad_norm": 395.1297912597656, "learning_rate": 7.177726716938602e-06, "loss": 14.2612, "step": 210450 }, { "epoch": 0.42514251546358434, "grad_norm": 373.11163330078125, "learning_rate": 7.177412494145035e-06, "loss": 21.4792, "step": 210460 }, { "epoch": 0.42516271609626816, "grad_norm": 623.6839599609375, "learning_rate": 7.177098260739024e-06, "loss": 15.5262, "step": 210470 }, { "epoch": 0.425182916728952, "grad_norm": 491.85186767578125, "learning_rate": 7.176784016722099e-06, "loss": 14.3433, "step": 210480 }, { "epoch": 0.42520311736163574, "grad_norm": 130.1648712158203, "learning_rate": 7.1764697620957935e-06, "loss": 15.2087, "step": 210490 }, { "epoch": 0.42522331799431956, "grad_norm": 152.86862182617188, "learning_rate": 7.176155496861639e-06, "loss": 21.2045, "step": 210500 }, { "epoch": 0.4252435186270034, "grad_norm": 287.9232482910156, "learning_rate": 7.175841221021165e-06, "loss": 14.8733, "step": 210510 }, { "epoch": 0.4252637192596872, "grad_norm": 272.0674743652344, "learning_rate": 7.175526934575906e-06, "loss": 23.3937, "step": 210520 }, { "epoch": 0.425283919892371, "grad_norm": 384.3063659667969, "learning_rate": 7.175212637527391e-06, "loss": 16.3844, "step": 210530 }, { "epoch": 0.42530412052505484, "grad_norm": 429.4483642578125, "learning_rate": 7.174898329877156e-06, "loss": 20.5714, "step": 210540 }, { "epoch": 0.42532432115773866, "grad_norm": 343.37786865234375, "learning_rate": 7.174584011626729e-06, "loss": 14.8663, "step": 210550 }, { "epoch": 0.4253445217904225, "grad_norm": 303.0111389160156, "learning_rate": 7.1742696827776415e-06, "loss": 22.3332, "step": 210560 }, { "epoch": 0.4253647224231063, "grad_norm": 616.0396728515625, "learning_rate": 7.17395534333143e-06, "loss": 25.812, "step": 210570 }, { "epoch": 0.4253849230557901, "grad_norm": 363.1705017089844, "learning_rate": 7.173640993289621e-06, "loss": 19.4889, "step": 210580 }, { "epoch": 0.42540512368847394, "grad_norm": 50.54901123046875, "learning_rate": 7.173326632653752e-06, "loss": 13.9667, "step": 210590 }, { "epoch": 0.42542532432115776, "grad_norm": 614.775390625, "learning_rate": 7.173012261425352e-06, "loss": 18.191, "step": 210600 }, { "epoch": 0.4254455249538416, "grad_norm": 315.2774963378906, "learning_rate": 7.172697879605954e-06, "loss": 18.2491, "step": 210610 }, { "epoch": 0.42546572558652535, "grad_norm": 384.29534912109375, "learning_rate": 7.1723834871970885e-06, "loss": 26.6588, "step": 210620 }, { "epoch": 0.42548592621920917, "grad_norm": 166.99932861328125, "learning_rate": 7.172069084200291e-06, "loss": 20.3508, "step": 210630 }, { "epoch": 0.425506126851893, "grad_norm": 199.97264099121094, "learning_rate": 7.171754670617093e-06, "loss": 21.1075, "step": 210640 }, { "epoch": 0.4255263274845768, "grad_norm": 497.3868408203125, "learning_rate": 7.171440246449024e-06, "loss": 26.794, "step": 210650 }, { "epoch": 0.4255465281172606, "grad_norm": 426.8668518066406, "learning_rate": 7.171125811697619e-06, "loss": 17.2615, "step": 210660 }, { "epoch": 0.42556672874994445, "grad_norm": 221.47088623046875, "learning_rate": 7.1708113663644105e-06, "loss": 24.0453, "step": 210670 }, { "epoch": 0.42558692938262827, "grad_norm": 387.373046875, "learning_rate": 7.170496910450932e-06, "loss": 21.7356, "step": 210680 }, { "epoch": 0.4256071300153121, "grad_norm": 117.55718231201172, "learning_rate": 7.170182443958712e-06, "loss": 36.6815, "step": 210690 }, { "epoch": 0.4256273306479959, "grad_norm": 123.97139739990234, "learning_rate": 7.169867966889288e-06, "loss": 28.3183, "step": 210700 }, { "epoch": 0.4256475312806797, "grad_norm": 502.1156311035156, "learning_rate": 7.16955347924419e-06, "loss": 31.4013, "step": 210710 }, { "epoch": 0.42566773191336355, "grad_norm": 276.0956726074219, "learning_rate": 7.169238981024952e-06, "loss": 14.6568, "step": 210720 }, { "epoch": 0.42568793254604737, "grad_norm": 399.0640563964844, "learning_rate": 7.168924472233107e-06, "loss": 20.1769, "step": 210730 }, { "epoch": 0.4257081331787312, "grad_norm": 146.53030395507812, "learning_rate": 7.168609952870185e-06, "loss": 18.1486, "step": 210740 }, { "epoch": 0.42572833381141495, "grad_norm": 421.3998718261719, "learning_rate": 7.168295422937723e-06, "loss": 17.241, "step": 210750 }, { "epoch": 0.42574853444409877, "grad_norm": 282.74053955078125, "learning_rate": 7.167980882437251e-06, "loss": 24.346, "step": 210760 }, { "epoch": 0.4257687350767826, "grad_norm": 321.1767272949219, "learning_rate": 7.167666331370303e-06, "loss": 29.5556, "step": 210770 }, { "epoch": 0.4257889357094664, "grad_norm": 491.2781982421875, "learning_rate": 7.167351769738413e-06, "loss": 10.0029, "step": 210780 }, { "epoch": 0.42580913634215023, "grad_norm": 262.0843505859375, "learning_rate": 7.167037197543112e-06, "loss": 16.6583, "step": 210790 }, { "epoch": 0.42582933697483405, "grad_norm": 89.12399291992188, "learning_rate": 7.166722614785937e-06, "loss": 5.7623, "step": 210800 }, { "epoch": 0.42584953760751787, "grad_norm": 271.04931640625, "learning_rate": 7.1664080214684176e-06, "loss": 24.0223, "step": 210810 }, { "epoch": 0.4258697382402017, "grad_norm": 180.9813995361328, "learning_rate": 7.166093417592087e-06, "loss": 13.1182, "step": 210820 }, { "epoch": 0.4258899388728855, "grad_norm": 329.6639709472656, "learning_rate": 7.165778803158481e-06, "loss": 14.8679, "step": 210830 }, { "epoch": 0.42591013950556933, "grad_norm": 377.5058898925781, "learning_rate": 7.165464178169133e-06, "loss": 15.6499, "step": 210840 }, { "epoch": 0.42593034013825315, "grad_norm": 213.0333709716797, "learning_rate": 7.1651495426255725e-06, "loss": 18.6177, "step": 210850 }, { "epoch": 0.42595054077093697, "grad_norm": 216.04312133789062, "learning_rate": 7.164834896529338e-06, "loss": 18.953, "step": 210860 }, { "epoch": 0.42597074140362073, "grad_norm": 220.18817138671875, "learning_rate": 7.164520239881958e-06, "loss": 32.388, "step": 210870 }, { "epoch": 0.42599094203630455, "grad_norm": 307.2826843261719, "learning_rate": 7.164205572684971e-06, "loss": 20.8141, "step": 210880 }, { "epoch": 0.4260111426689884, "grad_norm": 63.51272964477539, "learning_rate": 7.163890894939909e-06, "loss": 16.9065, "step": 210890 }, { "epoch": 0.4260313433016722, "grad_norm": 383.6554870605469, "learning_rate": 7.1635762066483035e-06, "loss": 16.1063, "step": 210900 }, { "epoch": 0.426051543934356, "grad_norm": 286.2699279785156, "learning_rate": 7.163261507811692e-06, "loss": 26.1984, "step": 210910 }, { "epoch": 0.42607174456703983, "grad_norm": 344.9894104003906, "learning_rate": 7.162946798431605e-06, "loss": 15.4793, "step": 210920 }, { "epoch": 0.42609194519972365, "grad_norm": 749.8561401367188, "learning_rate": 7.162632078509578e-06, "loss": 20.5732, "step": 210930 }, { "epoch": 0.4261121458324075, "grad_norm": 154.98153686523438, "learning_rate": 7.162317348047144e-06, "loss": 11.8352, "step": 210940 }, { "epoch": 0.4261323464650913, "grad_norm": 312.3851013183594, "learning_rate": 7.162002607045838e-06, "loss": 14.7004, "step": 210950 }, { "epoch": 0.4261525470977751, "grad_norm": 371.6275939941406, "learning_rate": 7.161687855507193e-06, "loss": 12.9178, "step": 210960 }, { "epoch": 0.42617274773045893, "grad_norm": 326.66534423828125, "learning_rate": 7.161373093432745e-06, "loss": 17.8498, "step": 210970 }, { "epoch": 0.42619294836314275, "grad_norm": 112.00231170654297, "learning_rate": 7.161058320824026e-06, "loss": 21.5978, "step": 210980 }, { "epoch": 0.4262131489958266, "grad_norm": 638.4155883789062, "learning_rate": 7.160743537682569e-06, "loss": 30.8009, "step": 210990 }, { "epoch": 0.42623334962851034, "grad_norm": 332.0113525390625, "learning_rate": 7.160428744009913e-06, "loss": 13.5238, "step": 211000 }, { "epoch": 0.42625355026119416, "grad_norm": 124.74098205566406, "learning_rate": 7.160113939807587e-06, "loss": 15.3167, "step": 211010 }, { "epoch": 0.426273750893878, "grad_norm": 19.364486694335938, "learning_rate": 7.159799125077129e-06, "loss": 9.8914, "step": 211020 }, { "epoch": 0.4262939515265618, "grad_norm": 316.47320556640625, "learning_rate": 7.159484299820071e-06, "loss": 20.7994, "step": 211030 }, { "epoch": 0.4263141521592456, "grad_norm": 157.97311401367188, "learning_rate": 7.15916946403795e-06, "loss": 18.957, "step": 211040 }, { "epoch": 0.42633435279192944, "grad_norm": 268.0684509277344, "learning_rate": 7.1588546177322975e-06, "loss": 22.1392, "step": 211050 }, { "epoch": 0.42635455342461326, "grad_norm": 190.57839965820312, "learning_rate": 7.158539760904649e-06, "loss": 14.8748, "step": 211060 }, { "epoch": 0.4263747540572971, "grad_norm": 31.57967185974121, "learning_rate": 7.158224893556541e-06, "loss": 31.2669, "step": 211070 }, { "epoch": 0.4263949546899809, "grad_norm": 0.0, "learning_rate": 7.157910015689505e-06, "loss": 6.8713, "step": 211080 }, { "epoch": 0.4264151553226647, "grad_norm": 202.49928283691406, "learning_rate": 7.157595127305079e-06, "loss": 40.7484, "step": 211090 }, { "epoch": 0.42643535595534854, "grad_norm": 142.66787719726562, "learning_rate": 7.157280228404796e-06, "loss": 14.7037, "step": 211100 }, { "epoch": 0.42645555658803236, "grad_norm": 101.28871154785156, "learning_rate": 7.15696531899019e-06, "loss": 15.6252, "step": 211110 }, { "epoch": 0.4264757572207162, "grad_norm": 275.408447265625, "learning_rate": 7.156650399062797e-06, "loss": 20.5819, "step": 211120 }, { "epoch": 0.42649595785339994, "grad_norm": 305.0511474609375, "learning_rate": 7.156335468624151e-06, "loss": 16.9842, "step": 211130 }, { "epoch": 0.42651615848608376, "grad_norm": 310.3803405761719, "learning_rate": 7.156020527675788e-06, "loss": 18.7121, "step": 211140 }, { "epoch": 0.4265363591187676, "grad_norm": 140.221923828125, "learning_rate": 7.155705576219242e-06, "loss": 9.4043, "step": 211150 }, { "epoch": 0.4265565597514514, "grad_norm": 208.40548706054688, "learning_rate": 7.155390614256048e-06, "loss": 16.5081, "step": 211160 }, { "epoch": 0.4265767603841352, "grad_norm": 151.4898681640625, "learning_rate": 7.1550756417877436e-06, "loss": 24.7238, "step": 211170 }, { "epoch": 0.42659696101681904, "grad_norm": 198.06996154785156, "learning_rate": 7.15476065881586e-06, "loss": 21.7476, "step": 211180 }, { "epoch": 0.42661716164950286, "grad_norm": 132.30784606933594, "learning_rate": 7.154445665341933e-06, "loss": 19.0648, "step": 211190 }, { "epoch": 0.4266373622821867, "grad_norm": 542.8726806640625, "learning_rate": 7.154130661367503e-06, "loss": 17.6397, "step": 211200 }, { "epoch": 0.4266575629148705, "grad_norm": 511.3194885253906, "learning_rate": 7.1538156468940986e-06, "loss": 28.6541, "step": 211210 }, { "epoch": 0.4266777635475543, "grad_norm": 329.9306335449219, "learning_rate": 7.15350062192326e-06, "loss": 10.8363, "step": 211220 }, { "epoch": 0.42669796418023814, "grad_norm": 303.32037353515625, "learning_rate": 7.153185586456518e-06, "loss": 17.8723, "step": 211230 }, { "epoch": 0.42671816481292196, "grad_norm": 54.578975677490234, "learning_rate": 7.152870540495413e-06, "loss": 17.6301, "step": 211240 }, { "epoch": 0.4267383654456058, "grad_norm": 77.7297134399414, "learning_rate": 7.1525554840414765e-06, "loss": 16.0623, "step": 211250 }, { "epoch": 0.42675856607828955, "grad_norm": 103.90699005126953, "learning_rate": 7.152240417096247e-06, "loss": 17.8029, "step": 211260 }, { "epoch": 0.42677876671097337, "grad_norm": 267.3092041015625, "learning_rate": 7.151925339661256e-06, "loss": 25.9411, "step": 211270 }, { "epoch": 0.4267989673436572, "grad_norm": 294.2054443359375, "learning_rate": 7.151610251738045e-06, "loss": 13.5685, "step": 211280 }, { "epoch": 0.426819167976341, "grad_norm": 190.93319702148438, "learning_rate": 7.151295153328146e-06, "loss": 17.1952, "step": 211290 }, { "epoch": 0.4268393686090248, "grad_norm": 242.74440002441406, "learning_rate": 7.150980044433094e-06, "loss": 38.746, "step": 211300 }, { "epoch": 0.42685956924170865, "grad_norm": 492.8813781738281, "learning_rate": 7.150664925054427e-06, "loss": 23.2812, "step": 211310 }, { "epoch": 0.42687976987439247, "grad_norm": 144.62705993652344, "learning_rate": 7.1503497951936794e-06, "loss": 26.5672, "step": 211320 }, { "epoch": 0.4268999705070763, "grad_norm": 452.99078369140625, "learning_rate": 7.1500346548523894e-06, "loss": 34.6904, "step": 211330 }, { "epoch": 0.4269201711397601, "grad_norm": 147.33229064941406, "learning_rate": 7.14971950403209e-06, "loss": 8.258, "step": 211340 }, { "epoch": 0.4269403717724439, "grad_norm": 290.10284423828125, "learning_rate": 7.149404342734317e-06, "loss": 25.2229, "step": 211350 }, { "epoch": 0.42696057240512775, "grad_norm": 369.0220642089844, "learning_rate": 7.14908917096061e-06, "loss": 17.3543, "step": 211360 }, { "epoch": 0.42698077303781157, "grad_norm": 161.92572021484375, "learning_rate": 7.148773988712503e-06, "loss": 19.2011, "step": 211370 }, { "epoch": 0.4270009736704954, "grad_norm": 39.406803131103516, "learning_rate": 7.148458795991531e-06, "loss": 20.0006, "step": 211380 }, { "epoch": 0.42702117430317915, "grad_norm": 420.61785888671875, "learning_rate": 7.148143592799232e-06, "loss": 15.0052, "step": 211390 }, { "epoch": 0.42704137493586297, "grad_norm": 193.6995849609375, "learning_rate": 7.1478283791371415e-06, "loss": 36.1865, "step": 211400 }, { "epoch": 0.4270615755685468, "grad_norm": 346.0033264160156, "learning_rate": 7.147513155006798e-06, "loss": 16.4156, "step": 211410 }, { "epoch": 0.4270817762012306, "grad_norm": 651.3091430664062, "learning_rate": 7.147197920409733e-06, "loss": 24.334, "step": 211420 }, { "epoch": 0.42710197683391443, "grad_norm": 134.62872314453125, "learning_rate": 7.146882675347486e-06, "loss": 17.2105, "step": 211430 }, { "epoch": 0.42712217746659825, "grad_norm": 531.6235961914062, "learning_rate": 7.146567419821595e-06, "loss": 26.1683, "step": 211440 }, { "epoch": 0.42714237809928207, "grad_norm": 344.92718505859375, "learning_rate": 7.146252153833594e-06, "loss": 33.2233, "step": 211450 }, { "epoch": 0.4271625787319659, "grad_norm": 209.5555419921875, "learning_rate": 7.145936877385019e-06, "loss": 29.9197, "step": 211460 }, { "epoch": 0.4271827793646497, "grad_norm": 487.1298828125, "learning_rate": 7.145621590477409e-06, "loss": 18.1851, "step": 211470 }, { "epoch": 0.42720297999733353, "grad_norm": 75.56934356689453, "learning_rate": 7.1453062931123e-06, "loss": 9.3361, "step": 211480 }, { "epoch": 0.42722318063001735, "grad_norm": 133.0726776123047, "learning_rate": 7.144990985291228e-06, "loss": 19.5141, "step": 211490 }, { "epoch": 0.42724338126270117, "grad_norm": 248.1111602783203, "learning_rate": 7.1446756670157306e-06, "loss": 22.3643, "step": 211500 }, { "epoch": 0.42726358189538494, "grad_norm": 379.4938049316406, "learning_rate": 7.144360338287343e-06, "loss": 18.0909, "step": 211510 }, { "epoch": 0.42728378252806876, "grad_norm": 302.9147033691406, "learning_rate": 7.1440449991076045e-06, "loss": 27.0998, "step": 211520 }, { "epoch": 0.4273039831607526, "grad_norm": 234.14622497558594, "learning_rate": 7.143729649478049e-06, "loss": 25.8313, "step": 211530 }, { "epoch": 0.4273241837934364, "grad_norm": 132.7403106689453, "learning_rate": 7.143414289400217e-06, "loss": 10.3976, "step": 211540 }, { "epoch": 0.4273443844261202, "grad_norm": 387.9154968261719, "learning_rate": 7.143098918875643e-06, "loss": 13.6952, "step": 211550 }, { "epoch": 0.42736458505880404, "grad_norm": 261.7142028808594, "learning_rate": 7.142783537905864e-06, "loss": 16.3299, "step": 211560 }, { "epoch": 0.42738478569148786, "grad_norm": 259.0485534667969, "learning_rate": 7.1424681464924185e-06, "loss": 23.9799, "step": 211570 }, { "epoch": 0.4274049863241717, "grad_norm": 434.5473937988281, "learning_rate": 7.142152744636843e-06, "loss": 28.9769, "step": 211580 }, { "epoch": 0.4274251869568555, "grad_norm": 235.84396362304688, "learning_rate": 7.141837332340675e-06, "loss": 22.6452, "step": 211590 }, { "epoch": 0.4274453875895393, "grad_norm": 691.1411743164062, "learning_rate": 7.141521909605452e-06, "loss": 34.8928, "step": 211600 }, { "epoch": 0.42746558822222314, "grad_norm": 824.0036010742188, "learning_rate": 7.141206476432711e-06, "loss": 22.4814, "step": 211610 }, { "epoch": 0.42748578885490696, "grad_norm": 67.79928588867188, "learning_rate": 7.140891032823989e-06, "loss": 25.5031, "step": 211620 }, { "epoch": 0.4275059894875908, "grad_norm": 81.88496398925781, "learning_rate": 7.140575578780824e-06, "loss": 23.0096, "step": 211630 }, { "epoch": 0.42752619012027454, "grad_norm": 213.28233337402344, "learning_rate": 7.1402601143047514e-06, "loss": 25.9773, "step": 211640 }, { "epoch": 0.42754639075295836, "grad_norm": 353.75628662109375, "learning_rate": 7.139944639397313e-06, "loss": 22.3387, "step": 211650 }, { "epoch": 0.4275665913856422, "grad_norm": 156.1005401611328, "learning_rate": 7.1396291540600435e-06, "loss": 29.0979, "step": 211660 }, { "epoch": 0.427586792018326, "grad_norm": 131.66302490234375, "learning_rate": 7.13931365829448e-06, "loss": 19.5581, "step": 211670 }, { "epoch": 0.4276069926510098, "grad_norm": 336.60308837890625, "learning_rate": 7.138998152102162e-06, "loss": 16.4413, "step": 211680 }, { "epoch": 0.42762719328369364, "grad_norm": 233.72540283203125, "learning_rate": 7.138682635484626e-06, "loss": 19.1636, "step": 211690 }, { "epoch": 0.42764739391637746, "grad_norm": 0.0, "learning_rate": 7.138367108443411e-06, "loss": 9.7191, "step": 211700 }, { "epoch": 0.4276675945490613, "grad_norm": 26.237369537353516, "learning_rate": 7.138051570980053e-06, "loss": 20.0578, "step": 211710 }, { "epoch": 0.4276877951817451, "grad_norm": 219.63172912597656, "learning_rate": 7.137736023096091e-06, "loss": 19.0695, "step": 211720 }, { "epoch": 0.4277079958144289, "grad_norm": 513.9913940429688, "learning_rate": 7.1374204647930636e-06, "loss": 29.6529, "step": 211730 }, { "epoch": 0.42772819644711274, "grad_norm": 0.0, "learning_rate": 7.137104896072508e-06, "loss": 17.7257, "step": 211740 }, { "epoch": 0.42774839707979656, "grad_norm": 13.632720947265625, "learning_rate": 7.1367893169359636e-06, "loss": 14.1868, "step": 211750 }, { "epoch": 0.4277685977124804, "grad_norm": 171.5211181640625, "learning_rate": 7.136473727384965e-06, "loss": 28.8381, "step": 211760 }, { "epoch": 0.42778879834516415, "grad_norm": 295.8318786621094, "learning_rate": 7.136158127421053e-06, "loss": 16.8614, "step": 211770 }, { "epoch": 0.42780899897784797, "grad_norm": 42.41835021972656, "learning_rate": 7.1358425170457655e-06, "loss": 12.9116, "step": 211780 }, { "epoch": 0.4278291996105318, "grad_norm": 798.959228515625, "learning_rate": 7.135526896260643e-06, "loss": 23.8272, "step": 211790 }, { "epoch": 0.4278494002432156, "grad_norm": 265.05633544921875, "learning_rate": 7.135211265067217e-06, "loss": 9.2032, "step": 211800 }, { "epoch": 0.4278696008758994, "grad_norm": 278.96624755859375, "learning_rate": 7.1348956234670345e-06, "loss": 18.8835, "step": 211810 }, { "epoch": 0.42788980150858325, "grad_norm": 69.62640380859375, "learning_rate": 7.134579971461627e-06, "loss": 18.9868, "step": 211820 }, { "epoch": 0.42791000214126707, "grad_norm": 233.5619659423828, "learning_rate": 7.134264309052537e-06, "loss": 16.7004, "step": 211830 }, { "epoch": 0.4279302027739509, "grad_norm": 251.17672729492188, "learning_rate": 7.1339486362413005e-06, "loss": 18.2198, "step": 211840 }, { "epoch": 0.4279504034066347, "grad_norm": 408.78363037109375, "learning_rate": 7.133632953029457e-06, "loss": 8.8347, "step": 211850 }, { "epoch": 0.4279706040393185, "grad_norm": 214.22122192382812, "learning_rate": 7.133317259418546e-06, "loss": 28.2217, "step": 211860 }, { "epoch": 0.42799080467200235, "grad_norm": 281.55120849609375, "learning_rate": 7.133001555410106e-06, "loss": 19.9013, "step": 211870 }, { "epoch": 0.42801100530468617, "grad_norm": 549.3682861328125, "learning_rate": 7.132685841005674e-06, "loss": 19.5295, "step": 211880 }, { "epoch": 0.42803120593737, "grad_norm": 668.198486328125, "learning_rate": 7.1323701162067905e-06, "loss": 32.8887, "step": 211890 }, { "epoch": 0.42805140657005375, "grad_norm": 499.3558044433594, "learning_rate": 7.1320543810149945e-06, "loss": 19.7586, "step": 211900 }, { "epoch": 0.42807160720273757, "grad_norm": 125.52816009521484, "learning_rate": 7.131738635431822e-06, "loss": 23.3607, "step": 211910 }, { "epoch": 0.4280918078354214, "grad_norm": 128.2690887451172, "learning_rate": 7.131422879458815e-06, "loss": 14.3206, "step": 211920 }, { "epoch": 0.4281120084681052, "grad_norm": 743.8948364257812, "learning_rate": 7.131107113097512e-06, "loss": 23.3363, "step": 211930 }, { "epoch": 0.42813220910078903, "grad_norm": 297.2441711425781, "learning_rate": 7.13079133634945e-06, "loss": 15.6553, "step": 211940 }, { "epoch": 0.42815240973347285, "grad_norm": 4.181972980499268, "learning_rate": 7.130475549216171e-06, "loss": 8.6928, "step": 211950 }, { "epoch": 0.42817261036615667, "grad_norm": 476.68548583984375, "learning_rate": 7.130159751699211e-06, "loss": 20.3302, "step": 211960 }, { "epoch": 0.4281928109988405, "grad_norm": 365.27423095703125, "learning_rate": 7.129843943800112e-06, "loss": 20.8436, "step": 211970 }, { "epoch": 0.4282130116315243, "grad_norm": 27.002361297607422, "learning_rate": 7.129528125520411e-06, "loss": 20.4163, "step": 211980 }, { "epoch": 0.42823321226420813, "grad_norm": 155.55963134765625, "learning_rate": 7.129212296861649e-06, "loss": 14.6075, "step": 211990 }, { "epoch": 0.42825341289689195, "grad_norm": 306.83746337890625, "learning_rate": 7.128896457825364e-06, "loss": 18.8569, "step": 212000 }, { "epoch": 0.42827361352957577, "grad_norm": 261.2841491699219, "learning_rate": 7.128580608413096e-06, "loss": 15.6103, "step": 212010 }, { "epoch": 0.4282938141622596, "grad_norm": 359.9295654296875, "learning_rate": 7.128264748626385e-06, "loss": 12.4031, "step": 212020 }, { "epoch": 0.42831401479494335, "grad_norm": 351.07208251953125, "learning_rate": 7.127948878466768e-06, "loss": 19.3463, "step": 212030 }, { "epoch": 0.4283342154276272, "grad_norm": 105.96043395996094, "learning_rate": 7.127632997935787e-06, "loss": 23.8856, "step": 212040 }, { "epoch": 0.428354416060311, "grad_norm": 200.9671173095703, "learning_rate": 7.127317107034982e-06, "loss": 20.2345, "step": 212050 }, { "epoch": 0.4283746166929948, "grad_norm": 708.6273193359375, "learning_rate": 7.12700120576589e-06, "loss": 28.3726, "step": 212060 }, { "epoch": 0.42839481732567863, "grad_norm": 127.94776916503906, "learning_rate": 7.126685294130053e-06, "loss": 12.0016, "step": 212070 }, { "epoch": 0.42841501795836245, "grad_norm": 373.4671325683594, "learning_rate": 7.126369372129009e-06, "loss": 23.2183, "step": 212080 }, { "epoch": 0.4284352185910463, "grad_norm": 9.083283424377441, "learning_rate": 7.1260534397643e-06, "loss": 6.1618, "step": 212090 }, { "epoch": 0.4284554192237301, "grad_norm": 171.49757385253906, "learning_rate": 7.125737497037464e-06, "loss": 11.7739, "step": 212100 }, { "epoch": 0.4284756198564139, "grad_norm": 386.94293212890625, "learning_rate": 7.125421543950039e-06, "loss": 13.5176, "step": 212110 }, { "epoch": 0.42849582048909773, "grad_norm": 270.9695739746094, "learning_rate": 7.12510558050357e-06, "loss": 28.8975, "step": 212120 }, { "epoch": 0.42851602112178155, "grad_norm": 241.26148986816406, "learning_rate": 7.124789606699594e-06, "loss": 16.9602, "step": 212130 }, { "epoch": 0.4285362217544654, "grad_norm": 827.15234375, "learning_rate": 7.1244736225396485e-06, "loss": 20.9474, "step": 212140 }, { "epoch": 0.42855642238714914, "grad_norm": 487.1937561035156, "learning_rate": 7.124157628025279e-06, "loss": 25.9331, "step": 212150 }, { "epoch": 0.42857662301983296, "grad_norm": 280.6077575683594, "learning_rate": 7.123841623158021e-06, "loss": 25.394, "step": 212160 }, { "epoch": 0.4285968236525168, "grad_norm": 611.3958740234375, "learning_rate": 7.123525607939418e-06, "loss": 29.7861, "step": 212170 }, { "epoch": 0.4286170242852006, "grad_norm": 96.37902069091797, "learning_rate": 7.1232095823710064e-06, "loss": 9.0783, "step": 212180 }, { "epoch": 0.4286372249178844, "grad_norm": 117.46464538574219, "learning_rate": 7.1228935464543325e-06, "loss": 17.0959, "step": 212190 }, { "epoch": 0.42865742555056824, "grad_norm": 462.5782470703125, "learning_rate": 7.12257750019093e-06, "loss": 25.8158, "step": 212200 }, { "epoch": 0.42867762618325206, "grad_norm": 35.850101470947266, "learning_rate": 7.122261443582343e-06, "loss": 21.1911, "step": 212210 }, { "epoch": 0.4286978268159359, "grad_norm": 232.48800659179688, "learning_rate": 7.12194537663011e-06, "loss": 24.3604, "step": 212220 }, { "epoch": 0.4287180274486197, "grad_norm": 173.88681030273438, "learning_rate": 7.121629299335775e-06, "loss": 22.8349, "step": 212230 }, { "epoch": 0.4287382280813035, "grad_norm": 0.0, "learning_rate": 7.121313211700875e-06, "loss": 16.0512, "step": 212240 }, { "epoch": 0.42875842871398734, "grad_norm": 166.6842041015625, "learning_rate": 7.120997113726951e-06, "loss": 11.2806, "step": 212250 }, { "epoch": 0.42877862934667116, "grad_norm": 129.62106323242188, "learning_rate": 7.120681005415546e-06, "loss": 12.4012, "step": 212260 }, { "epoch": 0.428798829979355, "grad_norm": 365.7792053222656, "learning_rate": 7.120364886768197e-06, "loss": 16.4327, "step": 212270 }, { "epoch": 0.42881903061203874, "grad_norm": 452.4293518066406, "learning_rate": 7.120048757786448e-06, "loss": 31.3573, "step": 212280 }, { "epoch": 0.42883923124472256, "grad_norm": 417.0277099609375, "learning_rate": 7.119732618471838e-06, "loss": 38.5172, "step": 212290 }, { "epoch": 0.4288594318774064, "grad_norm": 112.3646011352539, "learning_rate": 7.119416468825908e-06, "loss": 29.3779, "step": 212300 }, { "epoch": 0.4288796325100902, "grad_norm": 256.39739990234375, "learning_rate": 7.119100308850201e-06, "loss": 36.0894, "step": 212310 }, { "epoch": 0.428899833142774, "grad_norm": 541.7203979492188, "learning_rate": 7.118784138546254e-06, "loss": 15.2062, "step": 212320 }, { "epoch": 0.42892003377545784, "grad_norm": 32.3232307434082, "learning_rate": 7.1184679579156115e-06, "loss": 31.0791, "step": 212330 }, { "epoch": 0.42894023440814166, "grad_norm": 406.9208068847656, "learning_rate": 7.118151766959811e-06, "loss": 25.2834, "step": 212340 }, { "epoch": 0.4289604350408255, "grad_norm": 267.8036193847656, "learning_rate": 7.117835565680399e-06, "loss": 22.8294, "step": 212350 }, { "epoch": 0.4289806356735093, "grad_norm": 184.96133422851562, "learning_rate": 7.11751935407891e-06, "loss": 21.5855, "step": 212360 }, { "epoch": 0.4290008363061931, "grad_norm": 117.63150024414062, "learning_rate": 7.11720313215689e-06, "loss": 21.6815, "step": 212370 }, { "epoch": 0.42902103693887694, "grad_norm": 225.78302001953125, "learning_rate": 7.116886899915879e-06, "loss": 26.1858, "step": 212380 }, { "epoch": 0.42904123757156076, "grad_norm": 566.1354370117188, "learning_rate": 7.116570657357418e-06, "loss": 31.6858, "step": 212390 }, { "epoch": 0.4290614382042446, "grad_norm": 271.49151611328125, "learning_rate": 7.116254404483049e-06, "loss": 9.052, "step": 212400 }, { "epoch": 0.42908163883692835, "grad_norm": 213.4356689453125, "learning_rate": 7.115938141294309e-06, "loss": 30.7031, "step": 212410 }, { "epoch": 0.42910183946961217, "grad_norm": 176.2161865234375, "learning_rate": 7.1156218677927465e-06, "loss": 21.7297, "step": 212420 }, { "epoch": 0.429122040102296, "grad_norm": 286.7548828125, "learning_rate": 7.115305583979899e-06, "loss": 15.3676, "step": 212430 }, { "epoch": 0.4291422407349798, "grad_norm": 13.529806137084961, "learning_rate": 7.114989289857308e-06, "loss": 22.8874, "step": 212440 }, { "epoch": 0.4291624413676636, "grad_norm": 182.17904663085938, "learning_rate": 7.114672985426516e-06, "loss": 24.5365, "step": 212450 }, { "epoch": 0.42918264200034745, "grad_norm": 257.1654052734375, "learning_rate": 7.114356670689065e-06, "loss": 10.4939, "step": 212460 }, { "epoch": 0.42920284263303127, "grad_norm": 400.21258544921875, "learning_rate": 7.114040345646497e-06, "loss": 23.7578, "step": 212470 }, { "epoch": 0.4292230432657151, "grad_norm": 121.98100280761719, "learning_rate": 7.113724010300351e-06, "loss": 20.2218, "step": 212480 }, { "epoch": 0.4292432438983989, "grad_norm": 377.3930358886719, "learning_rate": 7.113407664652171e-06, "loss": 14.2062, "step": 212490 }, { "epoch": 0.4292634445310827, "grad_norm": 126.98472595214844, "learning_rate": 7.113091308703498e-06, "loss": 17.1785, "step": 212500 }, { "epoch": 0.42928364516376655, "grad_norm": 37.18383026123047, "learning_rate": 7.1127749424558755e-06, "loss": 19.3199, "step": 212510 }, { "epoch": 0.42930384579645037, "grad_norm": 308.546875, "learning_rate": 7.112458565910841e-06, "loss": 11.6683, "step": 212520 }, { "epoch": 0.4293240464291342, "grad_norm": 494.0459899902344, "learning_rate": 7.112142179069943e-06, "loss": 23.9329, "step": 212530 }, { "epoch": 0.42934424706181795, "grad_norm": 1039.6826171875, "learning_rate": 7.111825781934719e-06, "loss": 25.8302, "step": 212540 }, { "epoch": 0.42936444769450177, "grad_norm": 315.1341552734375, "learning_rate": 7.111509374506712e-06, "loss": 21.7369, "step": 212550 }, { "epoch": 0.4293846483271856, "grad_norm": 212.78622436523438, "learning_rate": 7.111192956787466e-06, "loss": 11.7799, "step": 212560 }, { "epoch": 0.4294048489598694, "grad_norm": 153.45648193359375, "learning_rate": 7.1108765287785185e-06, "loss": 17.7408, "step": 212570 }, { "epoch": 0.42942504959255323, "grad_norm": 185.0307159423828, "learning_rate": 7.110560090481418e-06, "loss": 19.4982, "step": 212580 }, { "epoch": 0.42944525022523705, "grad_norm": 171.0176544189453, "learning_rate": 7.1102436418977e-06, "loss": 34.7137, "step": 212590 }, { "epoch": 0.42946545085792087, "grad_norm": 616.1397705078125, "learning_rate": 7.1099271830289155e-06, "loss": 27.175, "step": 212600 }, { "epoch": 0.4294856514906047, "grad_norm": 85.49983215332031, "learning_rate": 7.109610713876598e-06, "loss": 20.4642, "step": 212610 }, { "epoch": 0.4295058521232885, "grad_norm": 151.2859344482422, "learning_rate": 7.109294234442294e-06, "loss": 23.843, "step": 212620 }, { "epoch": 0.42952605275597233, "grad_norm": 140.93470764160156, "learning_rate": 7.108977744727548e-06, "loss": 20.7521, "step": 212630 }, { "epoch": 0.42954625338865615, "grad_norm": 541.8628540039062, "learning_rate": 7.108661244733899e-06, "loss": 21.1802, "step": 212640 }, { "epoch": 0.42956645402133997, "grad_norm": 63.40633773803711, "learning_rate": 7.10834473446289e-06, "loss": 20.4283, "step": 212650 }, { "epoch": 0.42958665465402374, "grad_norm": 783.7577514648438, "learning_rate": 7.108028213916065e-06, "loss": 35.8806, "step": 212660 }, { "epoch": 0.42960685528670756, "grad_norm": 309.9933166503906, "learning_rate": 7.107711683094966e-06, "loss": 12.0363, "step": 212670 }, { "epoch": 0.4296270559193914, "grad_norm": 332.6861877441406, "learning_rate": 7.107395142001135e-06, "loss": 25.5969, "step": 212680 }, { "epoch": 0.4296472565520752, "grad_norm": 1080.428955078125, "learning_rate": 7.107078590636118e-06, "loss": 25.4658, "step": 212690 }, { "epoch": 0.429667457184759, "grad_norm": 463.9501953125, "learning_rate": 7.106762029001455e-06, "loss": 11.2113, "step": 212700 }, { "epoch": 0.42968765781744284, "grad_norm": 248.3670196533203, "learning_rate": 7.106445457098687e-06, "loss": 16.1101, "step": 212710 }, { "epoch": 0.42970785845012666, "grad_norm": 360.8236999511719, "learning_rate": 7.106128874929364e-06, "loss": 22.7373, "step": 212720 }, { "epoch": 0.4297280590828105, "grad_norm": 306.7997741699219, "learning_rate": 7.10581228249502e-06, "loss": 21.6671, "step": 212730 }, { "epoch": 0.4297482597154943, "grad_norm": 162.12937927246094, "learning_rate": 7.105495679797203e-06, "loss": 15.4331, "step": 212740 }, { "epoch": 0.4297684603481781, "grad_norm": 202.7210693359375, "learning_rate": 7.105179066837456e-06, "loss": 16.1146, "step": 212750 }, { "epoch": 0.42978866098086194, "grad_norm": 160.81214904785156, "learning_rate": 7.104862443617322e-06, "loss": 15.3864, "step": 212760 }, { "epoch": 0.42980886161354576, "grad_norm": 652.774658203125, "learning_rate": 7.104545810138343e-06, "loss": 16.8305, "step": 212770 }, { "epoch": 0.4298290622462296, "grad_norm": 265.049072265625, "learning_rate": 7.1042291664020635e-06, "loss": 20.8192, "step": 212780 }, { "epoch": 0.42984926287891334, "grad_norm": 188.65890502929688, "learning_rate": 7.103912512410025e-06, "loss": 22.953, "step": 212790 }, { "epoch": 0.42986946351159716, "grad_norm": 238.45358276367188, "learning_rate": 7.103595848163775e-06, "loss": 22.5891, "step": 212800 }, { "epoch": 0.429889664144281, "grad_norm": 120.81053924560547, "learning_rate": 7.103279173664851e-06, "loss": 17.1552, "step": 212810 }, { "epoch": 0.4299098647769648, "grad_norm": 383.380859375, "learning_rate": 7.1029624889148005e-06, "loss": 14.6005, "step": 212820 }, { "epoch": 0.4299300654096486, "grad_norm": 326.6242980957031, "learning_rate": 7.102645793915166e-06, "loss": 27.0537, "step": 212830 }, { "epoch": 0.42995026604233244, "grad_norm": 255.0185546875, "learning_rate": 7.10232908866749e-06, "loss": 28.3321, "step": 212840 }, { "epoch": 0.42997046667501626, "grad_norm": 209.60604858398438, "learning_rate": 7.102012373173319e-06, "loss": 29.8141, "step": 212850 }, { "epoch": 0.4299906673077001, "grad_norm": 222.7012176513672, "learning_rate": 7.101695647434193e-06, "loss": 18.2258, "step": 212860 }, { "epoch": 0.4300108679403839, "grad_norm": 73.76668548583984, "learning_rate": 7.101378911451659e-06, "loss": 12.7992, "step": 212870 }, { "epoch": 0.4300310685730677, "grad_norm": 453.4419860839844, "learning_rate": 7.101062165227257e-06, "loss": 21.3499, "step": 212880 }, { "epoch": 0.43005126920575154, "grad_norm": 410.2442626953125, "learning_rate": 7.100745408762534e-06, "loss": 12.2976, "step": 212890 }, { "epoch": 0.43007146983843536, "grad_norm": 853.5909423828125, "learning_rate": 7.100428642059033e-06, "loss": 30.0637, "step": 212900 }, { "epoch": 0.4300916704711192, "grad_norm": 222.78494262695312, "learning_rate": 7.100111865118295e-06, "loss": 17.4231, "step": 212910 }, { "epoch": 0.43011187110380295, "grad_norm": 334.7839050292969, "learning_rate": 7.099795077941869e-06, "loss": 24.856, "step": 212920 }, { "epoch": 0.43013207173648677, "grad_norm": 185.11868286132812, "learning_rate": 7.099478280531296e-06, "loss": 10.5208, "step": 212930 }, { "epoch": 0.4301522723691706, "grad_norm": 102.82315826416016, "learning_rate": 7.0991614728881205e-06, "loss": 25.1961, "step": 212940 }, { "epoch": 0.4301724730018544, "grad_norm": 158.32159423828125, "learning_rate": 7.098844655013886e-06, "loss": 16.8613, "step": 212950 }, { "epoch": 0.4301926736345382, "grad_norm": 45.05907440185547, "learning_rate": 7.098527826910138e-06, "loss": 44.6265, "step": 212960 }, { "epoch": 0.43021287426722205, "grad_norm": 257.8794250488281, "learning_rate": 7.09821098857842e-06, "loss": 24.8087, "step": 212970 }, { "epoch": 0.43023307489990587, "grad_norm": 378.0870361328125, "learning_rate": 7.097894140020276e-06, "loss": 36.2741, "step": 212980 }, { "epoch": 0.4302532755325897, "grad_norm": 293.7903137207031, "learning_rate": 7.097577281237249e-06, "loss": 15.581, "step": 212990 }, { "epoch": 0.4302734761652735, "grad_norm": 336.5411071777344, "learning_rate": 7.0972604122308865e-06, "loss": 19.9438, "step": 213000 }, { "epoch": 0.4302936767979573, "grad_norm": 335.5711669921875, "learning_rate": 7.096943533002732e-06, "loss": 17.4428, "step": 213010 }, { "epoch": 0.43031387743064115, "grad_norm": 203.89344787597656, "learning_rate": 7.096626643554325e-06, "loss": 21.9546, "step": 213020 }, { "epoch": 0.43033407806332497, "grad_norm": 350.3420104980469, "learning_rate": 7.0963097438872175e-06, "loss": 37.9893, "step": 213030 }, { "epoch": 0.4303542786960088, "grad_norm": 154.0564727783203, "learning_rate": 7.09599283400295e-06, "loss": 33.0371, "step": 213040 }, { "epoch": 0.43037447932869255, "grad_norm": 184.82508850097656, "learning_rate": 7.095675913903067e-06, "loss": 18.5827, "step": 213050 }, { "epoch": 0.43039467996137637, "grad_norm": 171.3484344482422, "learning_rate": 7.095358983589115e-06, "loss": 10.3562, "step": 213060 }, { "epoch": 0.4304148805940602, "grad_norm": 180.84117126464844, "learning_rate": 7.095042043062635e-06, "loss": 13.4579, "step": 213070 }, { "epoch": 0.430435081226744, "grad_norm": 334.8918762207031, "learning_rate": 7.094725092325177e-06, "loss": 30.9517, "step": 213080 }, { "epoch": 0.43045528185942783, "grad_norm": 399.44061279296875, "learning_rate": 7.094408131378281e-06, "loss": 20.2123, "step": 213090 }, { "epoch": 0.43047548249211165, "grad_norm": 482.45709228515625, "learning_rate": 7.094091160223493e-06, "loss": 12.4586, "step": 213100 }, { "epoch": 0.43049568312479547, "grad_norm": 187.94287109375, "learning_rate": 7.093774178862361e-06, "loss": 13.8199, "step": 213110 }, { "epoch": 0.4305158837574793, "grad_norm": 199.9274444580078, "learning_rate": 7.0934571872964265e-06, "loss": 27.3783, "step": 213120 }, { "epoch": 0.4305360843901631, "grad_norm": 114.7433853149414, "learning_rate": 7.093140185527236e-06, "loss": 17.9515, "step": 213130 }, { "epoch": 0.43055628502284693, "grad_norm": 344.6724548339844, "learning_rate": 7.092823173556333e-06, "loss": 21.1026, "step": 213140 }, { "epoch": 0.43057648565553075, "grad_norm": 290.3861083984375, "learning_rate": 7.092506151385265e-06, "loss": 17.3791, "step": 213150 }, { "epoch": 0.43059668628821457, "grad_norm": 16.06027603149414, "learning_rate": 7.092189119015575e-06, "loss": 13.801, "step": 213160 }, { "epoch": 0.4306168869208984, "grad_norm": 207.0883331298828, "learning_rate": 7.09187207644881e-06, "loss": 22.2985, "step": 213170 }, { "epoch": 0.43063708755358215, "grad_norm": 103.95832061767578, "learning_rate": 7.091555023686512e-06, "loss": 31.3376, "step": 213180 }, { "epoch": 0.430657288186266, "grad_norm": 307.6065979003906, "learning_rate": 7.091237960730231e-06, "loss": 21.6436, "step": 213190 }, { "epoch": 0.4306774888189498, "grad_norm": 206.03057861328125, "learning_rate": 7.090920887581507e-06, "loss": 23.6381, "step": 213200 }, { "epoch": 0.4306976894516336, "grad_norm": 235.21315002441406, "learning_rate": 7.090603804241892e-06, "loss": 14.5831, "step": 213210 }, { "epoch": 0.43071789008431743, "grad_norm": 631.27685546875, "learning_rate": 7.090286710712925e-06, "loss": 34.6523, "step": 213220 }, { "epoch": 0.43073809071700125, "grad_norm": 304.72430419921875, "learning_rate": 7.089969606996155e-06, "loss": 10.6992, "step": 213230 }, { "epoch": 0.4307582913496851, "grad_norm": 578.2530517578125, "learning_rate": 7.089652493093127e-06, "loss": 37.0656, "step": 213240 }, { "epoch": 0.4307784919823689, "grad_norm": 180.32998657226562, "learning_rate": 7.0893353690053845e-06, "loss": 15.9158, "step": 213250 }, { "epoch": 0.4307986926150527, "grad_norm": 337.01708984375, "learning_rate": 7.089018234734476e-06, "loss": 16.8199, "step": 213260 }, { "epoch": 0.43081889324773653, "grad_norm": 97.22914123535156, "learning_rate": 7.088701090281945e-06, "loss": 11.6004, "step": 213270 }, { "epoch": 0.43083909388042035, "grad_norm": 326.46209716796875, "learning_rate": 7.088383935649339e-06, "loss": 20.5215, "step": 213280 }, { "epoch": 0.4308592945131042, "grad_norm": 653.8909912109375, "learning_rate": 7.088066770838204e-06, "loss": 19.3477, "step": 213290 }, { "epoch": 0.43087949514578794, "grad_norm": 433.4763488769531, "learning_rate": 7.087749595850084e-06, "loss": 25.7452, "step": 213300 }, { "epoch": 0.43089969577847176, "grad_norm": 382.6387939453125, "learning_rate": 7.087432410686526e-06, "loss": 22.3321, "step": 213310 }, { "epoch": 0.4309198964111556, "grad_norm": 155.4407196044922, "learning_rate": 7.087115215349074e-06, "loss": 17.175, "step": 213320 }, { "epoch": 0.4309400970438394, "grad_norm": 224.74295043945312, "learning_rate": 7.086798009839278e-06, "loss": 12.0474, "step": 213330 }, { "epoch": 0.4309602976765232, "grad_norm": 191.77935791015625, "learning_rate": 7.086480794158679e-06, "loss": 16.7441, "step": 213340 }, { "epoch": 0.43098049830920704, "grad_norm": 131.4822540283203, "learning_rate": 7.086163568308828e-06, "loss": 14.3358, "step": 213350 }, { "epoch": 0.43100069894189086, "grad_norm": 222.73692321777344, "learning_rate": 7.085846332291267e-06, "loss": 26.2976, "step": 213360 }, { "epoch": 0.4310208995745747, "grad_norm": 458.41009521484375, "learning_rate": 7.085529086107545e-06, "loss": 27.2396, "step": 213370 }, { "epoch": 0.4310411002072585, "grad_norm": 179.7150421142578, "learning_rate": 7.085211829759207e-06, "loss": 21.2832, "step": 213380 }, { "epoch": 0.4310613008399423, "grad_norm": 405.43695068359375, "learning_rate": 7.084894563247798e-06, "loss": 16.4852, "step": 213390 }, { "epoch": 0.43108150147262614, "grad_norm": 142.2472686767578, "learning_rate": 7.0845772865748684e-06, "loss": 11.3402, "step": 213400 }, { "epoch": 0.43110170210530996, "grad_norm": 169.24847412109375, "learning_rate": 7.0842599997419606e-06, "loss": 31.7533, "step": 213410 }, { "epoch": 0.4311219027379938, "grad_norm": 593.2669067382812, "learning_rate": 7.083942702750622e-06, "loss": 11.5997, "step": 213420 }, { "epoch": 0.43114210337067754, "grad_norm": 149.34898376464844, "learning_rate": 7.083625395602401e-06, "loss": 21.7311, "step": 213430 }, { "epoch": 0.43116230400336136, "grad_norm": 285.866943359375, "learning_rate": 7.08330807829884e-06, "loss": 17.4123, "step": 213440 }, { "epoch": 0.4311825046360452, "grad_norm": 256.8253479003906, "learning_rate": 7.0829907508414894e-06, "loss": 24.5615, "step": 213450 }, { "epoch": 0.431202705268729, "grad_norm": 153.95066833496094, "learning_rate": 7.082673413231896e-06, "loss": 17.5665, "step": 213460 }, { "epoch": 0.4312229059014128, "grad_norm": 20.731021881103516, "learning_rate": 7.082356065471603e-06, "loss": 19.7676, "step": 213470 }, { "epoch": 0.43124310653409664, "grad_norm": 49.13193893432617, "learning_rate": 7.082038707562159e-06, "loss": 15.0109, "step": 213480 }, { "epoch": 0.43126330716678046, "grad_norm": 377.2490539550781, "learning_rate": 7.081721339505112e-06, "loss": 24.0881, "step": 213490 }, { "epoch": 0.4312835077994643, "grad_norm": 1.8049124479293823, "learning_rate": 7.081403961302007e-06, "loss": 16.4306, "step": 213500 }, { "epoch": 0.4313037084321481, "grad_norm": 240.13873291015625, "learning_rate": 7.081086572954392e-06, "loss": 22.3406, "step": 213510 }, { "epoch": 0.4313239090648319, "grad_norm": 396.0218505859375, "learning_rate": 7.080769174463812e-06, "loss": 28.9023, "step": 213520 }, { "epoch": 0.43134410969751574, "grad_norm": 252.62973022460938, "learning_rate": 7.080451765831817e-06, "loss": 17.2433, "step": 213530 }, { "epoch": 0.43136431033019956, "grad_norm": 194.14468383789062, "learning_rate": 7.0801343470599525e-06, "loss": 19.8916, "step": 213540 }, { "epoch": 0.4313845109628834, "grad_norm": 474.9602966308594, "learning_rate": 7.079816918149765e-06, "loss": 25.2776, "step": 213550 }, { "epoch": 0.43140471159556715, "grad_norm": 485.9934387207031, "learning_rate": 7.079499479102802e-06, "loss": 24.135, "step": 213560 }, { "epoch": 0.43142491222825097, "grad_norm": 461.6441955566406, "learning_rate": 7.07918202992061e-06, "loss": 10.1274, "step": 213570 }, { "epoch": 0.4314451128609348, "grad_norm": 31.50727653503418, "learning_rate": 7.0788645706047384e-06, "loss": 9.8385, "step": 213580 }, { "epoch": 0.4314653134936186, "grad_norm": 214.09979248046875, "learning_rate": 7.078547101156732e-06, "loss": 19.3702, "step": 213590 }, { "epoch": 0.4314855141263024, "grad_norm": 321.2070007324219, "learning_rate": 7.07822962157814e-06, "loss": 30.3991, "step": 213600 }, { "epoch": 0.43150571475898625, "grad_norm": 238.95513916015625, "learning_rate": 7.077912131870508e-06, "loss": 18.6852, "step": 213610 }, { "epoch": 0.43152591539167007, "grad_norm": 162.11097717285156, "learning_rate": 7.077594632035385e-06, "loss": 12.4419, "step": 213620 }, { "epoch": 0.4315461160243539, "grad_norm": 87.54935455322266, "learning_rate": 7.077277122074317e-06, "loss": 19.408, "step": 213630 }, { "epoch": 0.4315663166570377, "grad_norm": 71.52122497558594, "learning_rate": 7.076959601988853e-06, "loss": 10.1619, "step": 213640 }, { "epoch": 0.4315865172897215, "grad_norm": 12.894814491271973, "learning_rate": 7.076642071780539e-06, "loss": 17.1068, "step": 213650 }, { "epoch": 0.43160671792240535, "grad_norm": 273.3442687988281, "learning_rate": 7.076324531450924e-06, "loss": 9.8164, "step": 213660 }, { "epoch": 0.43162691855508917, "grad_norm": 274.76959228515625, "learning_rate": 7.076006981001556e-06, "loss": 23.1394, "step": 213670 }, { "epoch": 0.431647119187773, "grad_norm": 314.98486328125, "learning_rate": 7.07568942043398e-06, "loss": 36.1535, "step": 213680 }, { "epoch": 0.43166731982045675, "grad_norm": 334.3439636230469, "learning_rate": 7.075371849749747e-06, "loss": 24.542, "step": 213690 }, { "epoch": 0.43168752045314057, "grad_norm": 210.07199096679688, "learning_rate": 7.075054268950402e-06, "loss": 21.5774, "step": 213700 }, { "epoch": 0.4317077210858244, "grad_norm": 187.89193725585938, "learning_rate": 7.074736678037495e-06, "loss": 19.2322, "step": 213710 }, { "epoch": 0.4317279217185082, "grad_norm": 394.22119140625, "learning_rate": 7.074419077012572e-06, "loss": 11.5345, "step": 213720 }, { "epoch": 0.43174812235119203, "grad_norm": 795.6603393554688, "learning_rate": 7.074101465877183e-06, "loss": 27.045, "step": 213730 }, { "epoch": 0.43176832298387585, "grad_norm": 363.9726257324219, "learning_rate": 7.073783844632875e-06, "loss": 26.6971, "step": 213740 }, { "epoch": 0.43178852361655967, "grad_norm": 329.17120361328125, "learning_rate": 7.073466213281196e-06, "loss": 14.9573, "step": 213750 }, { "epoch": 0.4318087242492435, "grad_norm": 213.96507263183594, "learning_rate": 7.073148571823694e-06, "loss": 19.4148, "step": 213760 }, { "epoch": 0.4318289248819273, "grad_norm": 296.2957458496094, "learning_rate": 7.072830920261918e-06, "loss": 23.1831, "step": 213770 }, { "epoch": 0.43184912551461113, "grad_norm": 539.728515625, "learning_rate": 7.072513258597416e-06, "loss": 18.7623, "step": 213780 }, { "epoch": 0.43186932614729495, "grad_norm": 151.15383911132812, "learning_rate": 7.072195586831733e-06, "loss": 15.9247, "step": 213790 }, { "epoch": 0.43188952677997877, "grad_norm": 5.4107208251953125, "learning_rate": 7.071877904966422e-06, "loss": 15.1105, "step": 213800 }, { "epoch": 0.4319097274126626, "grad_norm": 490.4677429199219, "learning_rate": 7.071560213003028e-06, "loss": 17.591, "step": 213810 }, { "epoch": 0.43192992804534636, "grad_norm": 157.35092163085938, "learning_rate": 7.071242510943101e-06, "loss": 24.9597, "step": 213820 }, { "epoch": 0.4319501286780302, "grad_norm": 79.19880676269531, "learning_rate": 7.070924798788191e-06, "loss": 11.3494, "step": 213830 }, { "epoch": 0.431970329310714, "grad_norm": 278.0061950683594, "learning_rate": 7.070607076539844e-06, "loss": 14.2525, "step": 213840 }, { "epoch": 0.4319905299433978, "grad_norm": 358.8413391113281, "learning_rate": 7.070289344199609e-06, "loss": 27.918, "step": 213850 }, { "epoch": 0.43201073057608164, "grad_norm": 381.8889465332031, "learning_rate": 7.069971601769035e-06, "loss": 19.3892, "step": 213860 }, { "epoch": 0.43203093120876546, "grad_norm": 115.72716522216797, "learning_rate": 7.069653849249671e-06, "loss": 15.377, "step": 213870 }, { "epoch": 0.4320511318414493, "grad_norm": 545.5418701171875, "learning_rate": 7.069336086643064e-06, "loss": 16.4671, "step": 213880 }, { "epoch": 0.4320713324741331, "grad_norm": 274.7618713378906, "learning_rate": 7.0690183139507625e-06, "loss": 8.4756, "step": 213890 }, { "epoch": 0.4320915331068169, "grad_norm": 291.2508239746094, "learning_rate": 7.0687005311743195e-06, "loss": 30.3292, "step": 213900 }, { "epoch": 0.43211173373950074, "grad_norm": 157.2443389892578, "learning_rate": 7.068382738315281e-06, "loss": 9.9654, "step": 213910 }, { "epoch": 0.43213193437218456, "grad_norm": 1161.6968994140625, "learning_rate": 7.068064935375194e-06, "loss": 21.8224, "step": 213920 }, { "epoch": 0.4321521350048684, "grad_norm": 247.69422912597656, "learning_rate": 7.06774712235561e-06, "loss": 23.1509, "step": 213930 }, { "epoch": 0.43217233563755214, "grad_norm": 143.9205322265625, "learning_rate": 7.067429299258078e-06, "loss": 10.3108, "step": 213940 }, { "epoch": 0.43219253627023596, "grad_norm": 201.99806213378906, "learning_rate": 7.0671114660841455e-06, "loss": 10.091, "step": 213950 }, { "epoch": 0.4322127369029198, "grad_norm": 368.875732421875, "learning_rate": 7.066793622835364e-06, "loss": 26.0801, "step": 213960 }, { "epoch": 0.4322329375356036, "grad_norm": 145.4344024658203, "learning_rate": 7.066475769513278e-06, "loss": 14.1262, "step": 213970 }, { "epoch": 0.4322531381682874, "grad_norm": 292.13751220703125, "learning_rate": 7.066157906119442e-06, "loss": 15.5936, "step": 213980 }, { "epoch": 0.43227333880097124, "grad_norm": 154.63304138183594, "learning_rate": 7.0658400326554025e-06, "loss": 18.8874, "step": 213990 }, { "epoch": 0.43229353943365506, "grad_norm": 302.5494079589844, "learning_rate": 7.06552214912271e-06, "loss": 22.8152, "step": 214000 }, { "epoch": 0.4323137400663389, "grad_norm": 216.5895233154297, "learning_rate": 7.065204255522913e-06, "loss": 15.8323, "step": 214010 }, { "epoch": 0.4323339406990227, "grad_norm": 414.3367919921875, "learning_rate": 7.064886351857559e-06, "loss": 15.9155, "step": 214020 }, { "epoch": 0.4323541413317065, "grad_norm": 467.2950744628906, "learning_rate": 7.064568438128201e-06, "loss": 22.9209, "step": 214030 }, { "epoch": 0.43237434196439034, "grad_norm": 337.51947021484375, "learning_rate": 7.064250514336386e-06, "loss": 17.7857, "step": 214040 }, { "epoch": 0.43239454259707416, "grad_norm": 348.55145263671875, "learning_rate": 7.063932580483665e-06, "loss": 30.159, "step": 214050 }, { "epoch": 0.432414743229758, "grad_norm": 199.5076141357422, "learning_rate": 7.063614636571586e-06, "loss": 19.7252, "step": 214060 }, { "epoch": 0.43243494386244175, "grad_norm": 0.7484303712844849, "learning_rate": 7.063296682601701e-06, "loss": 33.5495, "step": 214070 }, { "epoch": 0.43245514449512557, "grad_norm": 136.85079956054688, "learning_rate": 7.062978718575558e-06, "loss": 26.4516, "step": 214080 }, { "epoch": 0.4324753451278094, "grad_norm": 175.97999572753906, "learning_rate": 7.062660744494706e-06, "loss": 31.81, "step": 214090 }, { "epoch": 0.4324955457604932, "grad_norm": 126.60018157958984, "learning_rate": 7.0623427603606965e-06, "loss": 16.7557, "step": 214100 }, { "epoch": 0.432515746393177, "grad_norm": 296.2315673828125, "learning_rate": 7.062024766175077e-06, "loss": 15.7991, "step": 214110 }, { "epoch": 0.43253594702586085, "grad_norm": 656.7332153320312, "learning_rate": 7.061706761939402e-06, "loss": 20.205, "step": 214120 }, { "epoch": 0.43255614765854467, "grad_norm": 360.67547607421875, "learning_rate": 7.0613887476552145e-06, "loss": 16.667, "step": 214130 }, { "epoch": 0.4325763482912285, "grad_norm": 91.0293960571289, "learning_rate": 7.061070723324071e-06, "loss": 11.0471, "step": 214140 }, { "epoch": 0.4325965489239123, "grad_norm": 390.89752197265625, "learning_rate": 7.060752688947519e-06, "loss": 16.7789, "step": 214150 }, { "epoch": 0.4326167495565961, "grad_norm": 238.47105407714844, "learning_rate": 7.060434644527105e-06, "loss": 23.8286, "step": 214160 }, { "epoch": 0.43263695018927995, "grad_norm": 235.2141571044922, "learning_rate": 7.060116590064385e-06, "loss": 15.2151, "step": 214170 }, { "epoch": 0.43265715082196377, "grad_norm": 212.2063446044922, "learning_rate": 7.059798525560907e-06, "loss": 19.2578, "step": 214180 }, { "epoch": 0.4326773514546476, "grad_norm": 90.86183166503906, "learning_rate": 7.05948045101822e-06, "loss": 9.8909, "step": 214190 }, { "epoch": 0.43269755208733135, "grad_norm": 61.98146057128906, "learning_rate": 7.059162366437875e-06, "loss": 16.7566, "step": 214200 }, { "epoch": 0.43271775272001517, "grad_norm": 461.389892578125, "learning_rate": 7.058844271821423e-06, "loss": 21.8443, "step": 214210 }, { "epoch": 0.432737953352699, "grad_norm": 20.96030044555664, "learning_rate": 7.058526167170413e-06, "loss": 17.3532, "step": 214220 }, { "epoch": 0.4327581539853828, "grad_norm": 294.7895812988281, "learning_rate": 7.058208052486397e-06, "loss": 14.1136, "step": 214230 }, { "epoch": 0.43277835461806663, "grad_norm": 390.11444091796875, "learning_rate": 7.057889927770922e-06, "loss": 12.2071, "step": 214240 }, { "epoch": 0.43279855525075045, "grad_norm": 284.3426208496094, "learning_rate": 7.057571793025545e-06, "loss": 14.4783, "step": 214250 }, { "epoch": 0.43281875588343427, "grad_norm": 442.78369140625, "learning_rate": 7.057253648251811e-06, "loss": 25.0507, "step": 214260 }, { "epoch": 0.4328389565161181, "grad_norm": 212.68060302734375, "learning_rate": 7.056935493451271e-06, "loss": 10.1077, "step": 214270 }, { "epoch": 0.4328591571488019, "grad_norm": 121.29270935058594, "learning_rate": 7.056617328625479e-06, "loss": 28.8945, "step": 214280 }, { "epoch": 0.43287935778148573, "grad_norm": 100.73682403564453, "learning_rate": 7.056299153775981e-06, "loss": 15.1384, "step": 214290 }, { "epoch": 0.43289955841416955, "grad_norm": 387.51654052734375, "learning_rate": 7.0559809689043325e-06, "loss": 18.2134, "step": 214300 }, { "epoch": 0.43291975904685337, "grad_norm": 13.077478408813477, "learning_rate": 7.055662774012081e-06, "loss": 20.5539, "step": 214310 }, { "epoch": 0.4329399596795372, "grad_norm": 0.0, "learning_rate": 7.0553445691007785e-06, "loss": 16.7423, "step": 214320 }, { "epoch": 0.43296016031222095, "grad_norm": 574.614501953125, "learning_rate": 7.055026354171976e-06, "loss": 24.7338, "step": 214330 }, { "epoch": 0.4329803609449048, "grad_norm": 119.8930435180664, "learning_rate": 7.054708129227225e-06, "loss": 29.7437, "step": 214340 }, { "epoch": 0.4330005615775886, "grad_norm": 284.2837829589844, "learning_rate": 7.0543898942680755e-06, "loss": 29.1253, "step": 214350 }, { "epoch": 0.4330207622102724, "grad_norm": 166.3603973388672, "learning_rate": 7.054071649296078e-06, "loss": 14.1886, "step": 214360 }, { "epoch": 0.43304096284295623, "grad_norm": 80.72021484375, "learning_rate": 7.053753394312786e-06, "loss": 17.989, "step": 214370 }, { "epoch": 0.43306116347564005, "grad_norm": 178.58872985839844, "learning_rate": 7.053435129319746e-06, "loss": 13.3796, "step": 214380 }, { "epoch": 0.4330813641083239, "grad_norm": 122.52217864990234, "learning_rate": 7.0531168543185155e-06, "loss": 24.072, "step": 214390 }, { "epoch": 0.4331015647410077, "grad_norm": 152.51174926757812, "learning_rate": 7.052798569310641e-06, "loss": 12.8871, "step": 214400 }, { "epoch": 0.4331217653736915, "grad_norm": 161.40206909179688, "learning_rate": 7.052480274297675e-06, "loss": 22.4849, "step": 214410 }, { "epoch": 0.43314196600637533, "grad_norm": 184.04579162597656, "learning_rate": 7.0521619692811705e-06, "loss": 19.348, "step": 214420 }, { "epoch": 0.43316216663905915, "grad_norm": 202.3116912841797, "learning_rate": 7.051843654262675e-06, "loss": 11.751, "step": 214430 }, { "epoch": 0.433182367271743, "grad_norm": 45.445533752441406, "learning_rate": 7.051525329243745e-06, "loss": 10.5655, "step": 214440 }, { "epoch": 0.4332025679044268, "grad_norm": 226.3568878173828, "learning_rate": 7.0512069942259275e-06, "loss": 18.1085, "step": 214450 }, { "epoch": 0.43322276853711056, "grad_norm": 623.3917236328125, "learning_rate": 7.050888649210778e-06, "loss": 20.2542, "step": 214460 }, { "epoch": 0.4332429691697944, "grad_norm": 327.1083679199219, "learning_rate": 7.050570294199843e-06, "loss": 27.8403, "step": 214470 }, { "epoch": 0.4332631698024782, "grad_norm": 150.01368713378906, "learning_rate": 7.050251929194679e-06, "loss": 23.1885, "step": 214480 }, { "epoch": 0.433283370435162, "grad_norm": 171.1770782470703, "learning_rate": 7.049933554196835e-06, "loss": 31.1592, "step": 214490 }, { "epoch": 0.43330357106784584, "grad_norm": 118.5301513671875, "learning_rate": 7.049615169207864e-06, "loss": 9.8971, "step": 214500 }, { "epoch": 0.43332377170052966, "grad_norm": 790.8311157226562, "learning_rate": 7.049296774229317e-06, "loss": 18.5455, "step": 214510 }, { "epoch": 0.4333439723332135, "grad_norm": 363.1620788574219, "learning_rate": 7.048978369262747e-06, "loss": 9.6922, "step": 214520 }, { "epoch": 0.4333641729658973, "grad_norm": 444.4999084472656, "learning_rate": 7.048659954309704e-06, "loss": 25.2896, "step": 214530 }, { "epoch": 0.4333843735985811, "grad_norm": 457.74847412109375, "learning_rate": 7.048341529371741e-06, "loss": 20.0222, "step": 214540 }, { "epoch": 0.43340457423126494, "grad_norm": 75.98638916015625, "learning_rate": 7.048023094450412e-06, "loss": 25.2772, "step": 214550 }, { "epoch": 0.43342477486394876, "grad_norm": 508.5006103515625, "learning_rate": 7.047704649547263e-06, "loss": 31.5986, "step": 214560 }, { "epoch": 0.4334449754966326, "grad_norm": 305.576904296875, "learning_rate": 7.047386194663852e-06, "loss": 36.8521, "step": 214570 }, { "epoch": 0.43346517612931634, "grad_norm": 71.35709381103516, "learning_rate": 7.047067729801727e-06, "loss": 23.8657, "step": 214580 }, { "epoch": 0.43348537676200016, "grad_norm": 49.142967224121094, "learning_rate": 7.046749254962445e-06, "loss": 25.4872, "step": 214590 }, { "epoch": 0.433505577394684, "grad_norm": 346.8016662597656, "learning_rate": 7.0464307701475544e-06, "loss": 20.161, "step": 214600 }, { "epoch": 0.4335257780273678, "grad_norm": 239.65213012695312, "learning_rate": 7.046112275358607e-06, "loss": 11.3539, "step": 214610 }, { "epoch": 0.4335459786600516, "grad_norm": 263.7312927246094, "learning_rate": 7.045793770597159e-06, "loss": 14.6619, "step": 214620 }, { "epoch": 0.43356617929273544, "grad_norm": 238.74024963378906, "learning_rate": 7.045475255864757e-06, "loss": 16.2724, "step": 214630 }, { "epoch": 0.43358637992541926, "grad_norm": 468.49481201171875, "learning_rate": 7.0451567311629596e-06, "loss": 19.8366, "step": 214640 }, { "epoch": 0.4336065805581031, "grad_norm": 158.48744201660156, "learning_rate": 7.044838196493315e-06, "loss": 15.7521, "step": 214650 }, { "epoch": 0.4336267811907869, "grad_norm": 0.0, "learning_rate": 7.044519651857378e-06, "loss": 16.3107, "step": 214660 }, { "epoch": 0.4336469818234707, "grad_norm": 231.6261749267578, "learning_rate": 7.0442010972567e-06, "loss": 38.2635, "step": 214670 }, { "epoch": 0.43366718245615454, "grad_norm": 264.6258850097656, "learning_rate": 7.043882532692834e-06, "loss": 18.0443, "step": 214680 }, { "epoch": 0.43368738308883836, "grad_norm": 431.02166748046875, "learning_rate": 7.043563958167331e-06, "loss": 22.9399, "step": 214690 }, { "epoch": 0.4337075837215222, "grad_norm": 152.3155517578125, "learning_rate": 7.043245373681746e-06, "loss": 29.7354, "step": 214700 }, { "epoch": 0.43372778435420595, "grad_norm": 358.1011962890625, "learning_rate": 7.042926779237631e-06, "loss": 18.6358, "step": 214710 }, { "epoch": 0.43374798498688977, "grad_norm": 456.7130432128906, "learning_rate": 7.0426081748365386e-06, "loss": 23.5871, "step": 214720 }, { "epoch": 0.4337681856195736, "grad_norm": 246.7151641845703, "learning_rate": 7.042289560480023e-06, "loss": 10.1544, "step": 214730 }, { "epoch": 0.4337883862522574, "grad_norm": 118.59222412109375, "learning_rate": 7.041970936169632e-06, "loss": 11.8522, "step": 214740 }, { "epoch": 0.4338085868849412, "grad_norm": 247.9459991455078, "learning_rate": 7.041652301906925e-06, "loss": 11.2003, "step": 214750 }, { "epoch": 0.43382878751762505, "grad_norm": 25.268665313720703, "learning_rate": 7.041333657693452e-06, "loss": 25.5613, "step": 214760 }, { "epoch": 0.43384898815030887, "grad_norm": 550.7073364257812, "learning_rate": 7.041015003530766e-06, "loss": 21.4596, "step": 214770 }, { "epoch": 0.4338691887829927, "grad_norm": 252.6304473876953, "learning_rate": 7.040696339420421e-06, "loss": 18.2151, "step": 214780 }, { "epoch": 0.4338893894156765, "grad_norm": 171.03323364257812, "learning_rate": 7.0403776653639685e-06, "loss": 63.6352, "step": 214790 }, { "epoch": 0.4339095900483603, "grad_norm": 316.4093017578125, "learning_rate": 7.0400589813629645e-06, "loss": 26.6686, "step": 214800 }, { "epoch": 0.43392979068104415, "grad_norm": 133.22238159179688, "learning_rate": 7.039740287418959e-06, "loss": 10.8382, "step": 214810 }, { "epoch": 0.43394999131372797, "grad_norm": 289.9556579589844, "learning_rate": 7.039421583533508e-06, "loss": 28.1795, "step": 214820 }, { "epoch": 0.4339701919464118, "grad_norm": 213.07522583007812, "learning_rate": 7.039102869708162e-06, "loss": 22.7816, "step": 214830 }, { "epoch": 0.43399039257909555, "grad_norm": 329.1095275878906, "learning_rate": 7.038784145944477e-06, "loss": 11.6074, "step": 214840 }, { "epoch": 0.43401059321177937, "grad_norm": 656.9208984375, "learning_rate": 7.038465412244005e-06, "loss": 17.5226, "step": 214850 }, { "epoch": 0.4340307938444632, "grad_norm": 102.11808013916016, "learning_rate": 7.0381466686083e-06, "loss": 18.6036, "step": 214860 }, { "epoch": 0.434050994477147, "grad_norm": 297.678955078125, "learning_rate": 7.037827915038915e-06, "loss": 15.5507, "step": 214870 }, { "epoch": 0.43407119510983083, "grad_norm": 203.6171417236328, "learning_rate": 7.037509151537404e-06, "loss": 16.3592, "step": 214880 }, { "epoch": 0.43409139574251465, "grad_norm": 135.59332275390625, "learning_rate": 7.0371903781053215e-06, "loss": 28.4097, "step": 214890 }, { "epoch": 0.43411159637519847, "grad_norm": 677.1357421875, "learning_rate": 7.036871594744218e-06, "loss": 25.3455, "step": 214900 }, { "epoch": 0.4341317970078823, "grad_norm": 500.0668640136719, "learning_rate": 7.036552801455652e-06, "loss": 16.8251, "step": 214910 }, { "epoch": 0.4341519976405661, "grad_norm": 45.664981842041016, "learning_rate": 7.0362339982411735e-06, "loss": 19.9703, "step": 214920 }, { "epoch": 0.43417219827324993, "grad_norm": 308.1906433105469, "learning_rate": 7.035915185102337e-06, "loss": 31.8074, "step": 214930 }, { "epoch": 0.43419239890593375, "grad_norm": 473.4103698730469, "learning_rate": 7.035596362040697e-06, "loss": 24.8082, "step": 214940 }, { "epoch": 0.43421259953861757, "grad_norm": 419.897705078125, "learning_rate": 7.035277529057807e-06, "loss": 20.7737, "step": 214950 }, { "epoch": 0.4342328001713014, "grad_norm": 247.57821655273438, "learning_rate": 7.034958686155222e-06, "loss": 13.8373, "step": 214960 }, { "epoch": 0.43425300080398516, "grad_norm": 367.4712219238281, "learning_rate": 7.0346398333344945e-06, "loss": 18.9678, "step": 214970 }, { "epoch": 0.434273201436669, "grad_norm": 640.169921875, "learning_rate": 7.03432097059718e-06, "loss": 23.3512, "step": 214980 }, { "epoch": 0.4342934020693528, "grad_norm": 417.4309387207031, "learning_rate": 7.03400209794483e-06, "loss": 21.6158, "step": 214990 }, { "epoch": 0.4343136027020366, "grad_norm": 256.1366271972656, "learning_rate": 7.033683215379002e-06, "loss": 16.1505, "step": 215000 }, { "epoch": 0.43433380333472044, "grad_norm": 334.5623474121094, "learning_rate": 7.033364322901248e-06, "loss": 22.7598, "step": 215010 }, { "epoch": 0.43435400396740426, "grad_norm": 256.7177429199219, "learning_rate": 7.0330454205131235e-06, "loss": 20.8307, "step": 215020 }, { "epoch": 0.4343742046000881, "grad_norm": 0.0, "learning_rate": 7.032726508216182e-06, "loss": 14.7437, "step": 215030 }, { "epoch": 0.4343944052327719, "grad_norm": 344.3137512207031, "learning_rate": 7.032407586011978e-06, "loss": 19.4405, "step": 215040 }, { "epoch": 0.4344146058654557, "grad_norm": 586.20947265625, "learning_rate": 7.0320886539020674e-06, "loss": 22.5719, "step": 215050 }, { "epoch": 0.43443480649813954, "grad_norm": 247.5828399658203, "learning_rate": 7.031769711887999e-06, "loss": 30.5688, "step": 215060 }, { "epoch": 0.43445500713082336, "grad_norm": 270.5705261230469, "learning_rate": 7.031450759971335e-06, "loss": 15.486, "step": 215070 }, { "epoch": 0.4344752077635072, "grad_norm": 183.8125457763672, "learning_rate": 7.031131798153625e-06, "loss": 20.0332, "step": 215080 }, { "epoch": 0.434495408396191, "grad_norm": 169.62062072753906, "learning_rate": 7.030812826436426e-06, "loss": 18.6815, "step": 215090 }, { "epoch": 0.43451560902887476, "grad_norm": 189.5517120361328, "learning_rate": 7.030493844821291e-06, "loss": 19.3401, "step": 215100 }, { "epoch": 0.4345358096615586, "grad_norm": 111.4783706665039, "learning_rate": 7.0301748533097745e-06, "loss": 16.6068, "step": 215110 }, { "epoch": 0.4345560102942424, "grad_norm": 304.1570739746094, "learning_rate": 7.029855851903433e-06, "loss": 27.1507, "step": 215120 }, { "epoch": 0.4345762109269262, "grad_norm": 168.10325622558594, "learning_rate": 7.029536840603821e-06, "loss": 19.783, "step": 215130 }, { "epoch": 0.43459641155961004, "grad_norm": 440.1117248535156, "learning_rate": 7.029217819412491e-06, "loss": 15.6989, "step": 215140 }, { "epoch": 0.43461661219229386, "grad_norm": 332.8207092285156, "learning_rate": 7.028898788331e-06, "loss": 14.8859, "step": 215150 }, { "epoch": 0.4346368128249777, "grad_norm": 167.9835968017578, "learning_rate": 7.028579747360903e-06, "loss": 7.3236, "step": 215160 }, { "epoch": 0.4346570134576615, "grad_norm": 251.2379150390625, "learning_rate": 7.028260696503754e-06, "loss": 16.343, "step": 215170 }, { "epoch": 0.4346772140903453, "grad_norm": 278.6465759277344, "learning_rate": 7.027941635761109e-06, "loss": 11.3214, "step": 215180 }, { "epoch": 0.43469741472302914, "grad_norm": 185.0143280029297, "learning_rate": 7.02762256513452e-06, "loss": 20.664, "step": 215190 }, { "epoch": 0.43471761535571296, "grad_norm": 406.9326171875, "learning_rate": 7.027303484625547e-06, "loss": 17.4016, "step": 215200 }, { "epoch": 0.4347378159883968, "grad_norm": 1243.183837890625, "learning_rate": 7.026984394235742e-06, "loss": 21.6856, "step": 215210 }, { "epoch": 0.43475801662108055, "grad_norm": 303.80584716796875, "learning_rate": 7.0266652939666605e-06, "loss": 16.3521, "step": 215220 }, { "epoch": 0.43477821725376437, "grad_norm": 392.8240966796875, "learning_rate": 7.026346183819859e-06, "loss": 30.9287, "step": 215230 }, { "epoch": 0.4347984178864482, "grad_norm": 178.8391571044922, "learning_rate": 7.026027063796891e-06, "loss": 21.1364, "step": 215240 }, { "epoch": 0.434818618519132, "grad_norm": 220.93446350097656, "learning_rate": 7.025707933899314e-06, "loss": 15.1593, "step": 215250 }, { "epoch": 0.4348388191518158, "grad_norm": 501.018310546875, "learning_rate": 7.0253887941286816e-06, "loss": 18.3226, "step": 215260 }, { "epoch": 0.43485901978449965, "grad_norm": 291.4002990722656, "learning_rate": 7.025069644486549e-06, "loss": 24.6238, "step": 215270 }, { "epoch": 0.43487922041718347, "grad_norm": 510.5931701660156, "learning_rate": 7.024750484974473e-06, "loss": 15.9882, "step": 215280 }, { "epoch": 0.4348994210498673, "grad_norm": 294.44598388671875, "learning_rate": 7.02443131559401e-06, "loss": 36.2244, "step": 215290 }, { "epoch": 0.4349196216825511, "grad_norm": 324.10906982421875, "learning_rate": 7.024112136346713e-06, "loss": 26.0995, "step": 215300 }, { "epoch": 0.4349398223152349, "grad_norm": 441.0466003417969, "learning_rate": 7.023792947234139e-06, "loss": 18.826, "step": 215310 }, { "epoch": 0.43496002294791875, "grad_norm": 214.3717803955078, "learning_rate": 7.023473748257844e-06, "loss": 12.7352, "step": 215320 }, { "epoch": 0.43498022358060257, "grad_norm": 142.53643798828125, "learning_rate": 7.023154539419383e-06, "loss": 12.5537, "step": 215330 }, { "epoch": 0.4350004242132864, "grad_norm": 276.1595458984375, "learning_rate": 7.0228353207203136e-06, "loss": 14.3753, "step": 215340 }, { "epoch": 0.43502062484597015, "grad_norm": 408.0410461425781, "learning_rate": 7.022516092162187e-06, "loss": 18.0317, "step": 215350 }, { "epoch": 0.43504082547865397, "grad_norm": 393.6905822753906, "learning_rate": 7.022196853746565e-06, "loss": 21.4598, "step": 215360 }, { "epoch": 0.4350610261113378, "grad_norm": 355.596923828125, "learning_rate": 7.0218776054750004e-06, "loss": 16.2121, "step": 215370 }, { "epoch": 0.4350812267440216, "grad_norm": 509.7431640625, "learning_rate": 7.021558347349049e-06, "loss": 14.2409, "step": 215380 }, { "epoch": 0.43510142737670543, "grad_norm": 364.8689270019531, "learning_rate": 7.0212390793702675e-06, "loss": 17.5498, "step": 215390 }, { "epoch": 0.43512162800938925, "grad_norm": 49.157867431640625, "learning_rate": 7.0209198015402115e-06, "loss": 17.9513, "step": 215400 }, { "epoch": 0.43514182864207307, "grad_norm": 157.30287170410156, "learning_rate": 7.02060051386044e-06, "loss": 18.856, "step": 215410 }, { "epoch": 0.4351620292747569, "grad_norm": 763.6799926757812, "learning_rate": 7.020281216332503e-06, "loss": 26.089, "step": 215420 }, { "epoch": 0.4351822299074407, "grad_norm": 356.6274108886719, "learning_rate": 7.019961908957962e-06, "loss": 21.3404, "step": 215430 }, { "epoch": 0.43520243054012453, "grad_norm": 352.75567626953125, "learning_rate": 7.019642591738372e-06, "loss": 20.9827, "step": 215440 }, { "epoch": 0.43522263117280835, "grad_norm": 163.4756317138672, "learning_rate": 7.01932326467529e-06, "loss": 18.9826, "step": 215450 }, { "epoch": 0.43524283180549217, "grad_norm": 245.24078369140625, "learning_rate": 7.0190039277702685e-06, "loss": 14.4162, "step": 215460 }, { "epoch": 0.435263032438176, "grad_norm": 243.39328002929688, "learning_rate": 7.018684581024868e-06, "loss": 15.2495, "step": 215470 }, { "epoch": 0.43528323307085975, "grad_norm": 603.317138671875, "learning_rate": 7.018365224440644e-06, "loss": 33.8597, "step": 215480 }, { "epoch": 0.4353034337035436, "grad_norm": 396.7857971191406, "learning_rate": 7.018045858019154e-06, "loss": 45.7229, "step": 215490 }, { "epoch": 0.4353236343362274, "grad_norm": 333.40826416015625, "learning_rate": 7.0177264817619514e-06, "loss": 26.9476, "step": 215500 }, { "epoch": 0.4353438349689112, "grad_norm": 422.90472412109375, "learning_rate": 7.017407095670594e-06, "loss": 23.0073, "step": 215510 }, { "epoch": 0.43536403560159503, "grad_norm": 61.612796783447266, "learning_rate": 7.0170876997466406e-06, "loss": 21.299, "step": 215520 }, { "epoch": 0.43538423623427885, "grad_norm": 301.858642578125, "learning_rate": 7.016768293991647e-06, "loss": 14.3707, "step": 215530 }, { "epoch": 0.4354044368669627, "grad_norm": 183.82823181152344, "learning_rate": 7.016448878407167e-06, "loss": 17.5999, "step": 215540 }, { "epoch": 0.4354246374996465, "grad_norm": 178.40806579589844, "learning_rate": 7.016129452994761e-06, "loss": 16.7827, "step": 215550 }, { "epoch": 0.4354448381323303, "grad_norm": 1083.2467041015625, "learning_rate": 7.015810017755985e-06, "loss": 15.3123, "step": 215560 }, { "epoch": 0.43546503876501413, "grad_norm": 267.93829345703125, "learning_rate": 7.015490572692396e-06, "loss": 11.4674, "step": 215570 }, { "epoch": 0.43548523939769795, "grad_norm": 184.74610900878906, "learning_rate": 7.015171117805549e-06, "loss": 13.9653, "step": 215580 }, { "epoch": 0.4355054400303818, "grad_norm": 393.5919189453125, "learning_rate": 7.014851653097003e-06, "loss": 18.8942, "step": 215590 }, { "epoch": 0.4355256406630656, "grad_norm": 92.4108657836914, "learning_rate": 7.014532178568314e-06, "loss": 14.6793, "step": 215600 }, { "epoch": 0.43554584129574936, "grad_norm": 1741.7396240234375, "learning_rate": 7.014212694221041e-06, "loss": 41.174, "step": 215610 }, { "epoch": 0.4355660419284332, "grad_norm": 750.9249267578125, "learning_rate": 7.013893200056736e-06, "loss": 29.3777, "step": 215620 }, { "epoch": 0.435586242561117, "grad_norm": 419.9778747558594, "learning_rate": 7.013573696076964e-06, "loss": 12.3385, "step": 215630 }, { "epoch": 0.4356064431938008, "grad_norm": 318.4319763183594, "learning_rate": 7.013254182283275e-06, "loss": 13.0365, "step": 215640 }, { "epoch": 0.43562664382648464, "grad_norm": 12.773296356201172, "learning_rate": 7.01293465867723e-06, "loss": 10.8718, "step": 215650 }, { "epoch": 0.43564684445916846, "grad_norm": 71.82109069824219, "learning_rate": 7.012615125260388e-06, "loss": 9.6645, "step": 215660 }, { "epoch": 0.4356670450918523, "grad_norm": 261.8061828613281, "learning_rate": 7.012295582034301e-06, "loss": 26.2101, "step": 215670 }, { "epoch": 0.4356872457245361, "grad_norm": 473.7610778808594, "learning_rate": 7.011976029000531e-06, "loss": 25.7495, "step": 215680 }, { "epoch": 0.4357074463572199, "grad_norm": 69.41788482666016, "learning_rate": 7.011656466160633e-06, "loss": 13.1046, "step": 215690 }, { "epoch": 0.43572764698990374, "grad_norm": 893.3094482421875, "learning_rate": 7.011336893516167e-06, "loss": 18.2674, "step": 215700 }, { "epoch": 0.43574784762258756, "grad_norm": 157.2799835205078, "learning_rate": 7.011017311068686e-06, "loss": 15.548, "step": 215710 }, { "epoch": 0.4357680482552714, "grad_norm": 495.2589111328125, "learning_rate": 7.010697718819753e-06, "loss": 17.3996, "step": 215720 }, { "epoch": 0.43578824888795514, "grad_norm": 11.50699520111084, "learning_rate": 7.0103781167709224e-06, "loss": 8.6028, "step": 215730 }, { "epoch": 0.43580844952063896, "grad_norm": 148.05963134765625, "learning_rate": 7.010058504923753e-06, "loss": 16.9938, "step": 215740 }, { "epoch": 0.4358286501533228, "grad_norm": 300.656005859375, "learning_rate": 7.009738883279802e-06, "loss": 20.4639, "step": 215750 }, { "epoch": 0.4358488507860066, "grad_norm": 198.98207092285156, "learning_rate": 7.009419251840627e-06, "loss": 23.8778, "step": 215760 }, { "epoch": 0.4358690514186904, "grad_norm": 194.3978729248047, "learning_rate": 7.0090996106077855e-06, "loss": 18.0164, "step": 215770 }, { "epoch": 0.43588925205137424, "grad_norm": 435.6606750488281, "learning_rate": 7.008779959582838e-06, "loss": 31.1274, "step": 215780 }, { "epoch": 0.43590945268405806, "grad_norm": 285.09722900390625, "learning_rate": 7.008460298767341e-06, "loss": 14.0945, "step": 215790 }, { "epoch": 0.4359296533167419, "grad_norm": 125.36673736572266, "learning_rate": 7.008140628162851e-06, "loss": 27.3267, "step": 215800 }, { "epoch": 0.4359498539494257, "grad_norm": 327.7503662109375, "learning_rate": 7.007820947770927e-06, "loss": 13.4958, "step": 215810 }, { "epoch": 0.4359700545821095, "grad_norm": 715.1808471679688, "learning_rate": 7.007501257593126e-06, "loss": 28.0054, "step": 215820 }, { "epoch": 0.43599025521479334, "grad_norm": 404.6307067871094, "learning_rate": 7.0071815576310085e-06, "loss": 21.5436, "step": 215830 }, { "epoch": 0.43601045584747716, "grad_norm": 207.91273498535156, "learning_rate": 7.006861847886133e-06, "loss": 16.7104, "step": 215840 }, { "epoch": 0.436030656480161, "grad_norm": 462.0039367675781, "learning_rate": 7.006542128360054e-06, "loss": 20.1898, "step": 215850 }, { "epoch": 0.43605085711284475, "grad_norm": 15.930275917053223, "learning_rate": 7.006222399054334e-06, "loss": 13.6113, "step": 215860 }, { "epoch": 0.43607105774552857, "grad_norm": 309.4143981933594, "learning_rate": 7.005902659970528e-06, "loss": 20.6773, "step": 215870 }, { "epoch": 0.4360912583782124, "grad_norm": 93.15816497802734, "learning_rate": 7.0055829111101954e-06, "loss": 22.25, "step": 215880 }, { "epoch": 0.4361114590108962, "grad_norm": 379.2583923339844, "learning_rate": 7.005263152474896e-06, "loss": 14.5099, "step": 215890 }, { "epoch": 0.43613165964358, "grad_norm": 128.00901794433594, "learning_rate": 7.0049433840661875e-06, "loss": 14.541, "step": 215900 }, { "epoch": 0.43615186027626385, "grad_norm": 116.20124816894531, "learning_rate": 7.004623605885628e-06, "loss": 17.3502, "step": 215910 }, { "epoch": 0.43617206090894767, "grad_norm": 859.9505004882812, "learning_rate": 7.004303817934775e-06, "loss": 16.946, "step": 215920 }, { "epoch": 0.4361922615416315, "grad_norm": 684.4489135742188, "learning_rate": 7.003984020215189e-06, "loss": 17.412, "step": 215930 }, { "epoch": 0.4362124621743153, "grad_norm": 72.29467010498047, "learning_rate": 7.003664212728427e-06, "loss": 13.5961, "step": 215940 }, { "epoch": 0.4362326628069991, "grad_norm": 497.1455993652344, "learning_rate": 7.003344395476051e-06, "loss": 19.1541, "step": 215950 }, { "epoch": 0.43625286343968295, "grad_norm": 1026.9884033203125, "learning_rate": 7.003024568459614e-06, "loss": 36.7228, "step": 215960 }, { "epoch": 0.43627306407236677, "grad_norm": 222.2440643310547, "learning_rate": 7.002704731680682e-06, "loss": 19.9814, "step": 215970 }, { "epoch": 0.4362932647050506, "grad_norm": 430.69677734375, "learning_rate": 7.0023848851408076e-06, "loss": 18.5001, "step": 215980 }, { "epoch": 0.43631346533773435, "grad_norm": 428.6326599121094, "learning_rate": 7.002065028841553e-06, "loss": 12.8981, "step": 215990 }, { "epoch": 0.43633366597041817, "grad_norm": 74.66525268554688, "learning_rate": 7.0017451627844765e-06, "loss": 15.2977, "step": 216000 }, { "epoch": 0.436353866603102, "grad_norm": 142.62408447265625, "learning_rate": 7.001425286971135e-06, "loss": 17.3344, "step": 216010 }, { "epoch": 0.4363740672357858, "grad_norm": 262.3960266113281, "learning_rate": 7.001105401403091e-06, "loss": 14.4236, "step": 216020 }, { "epoch": 0.43639426786846963, "grad_norm": 242.39064025878906, "learning_rate": 7.000785506081902e-06, "loss": 24.9744, "step": 216030 }, { "epoch": 0.43641446850115345, "grad_norm": 325.9830017089844, "learning_rate": 7.000465601009126e-06, "loss": 14.1368, "step": 216040 }, { "epoch": 0.43643466913383727, "grad_norm": 587.1112060546875, "learning_rate": 7.0001456861863236e-06, "loss": 22.6376, "step": 216050 }, { "epoch": 0.4364548697665211, "grad_norm": 304.29974365234375, "learning_rate": 6.999825761615055e-06, "loss": 15.381, "step": 216060 }, { "epoch": 0.4364750703992049, "grad_norm": 13.087930679321289, "learning_rate": 6.999505827296877e-06, "loss": 18.9327, "step": 216070 }, { "epoch": 0.43649527103188873, "grad_norm": 423.27294921875, "learning_rate": 6.999185883233351e-06, "loss": 22.6607, "step": 216080 }, { "epoch": 0.43651547166457255, "grad_norm": 134.41635131835938, "learning_rate": 6.998865929426035e-06, "loss": 9.2228, "step": 216090 }, { "epoch": 0.43653567229725637, "grad_norm": 304.0077209472656, "learning_rate": 6.998545965876489e-06, "loss": 15.8826, "step": 216100 }, { "epoch": 0.4365558729299402, "grad_norm": 441.8407287597656, "learning_rate": 6.998225992586273e-06, "loss": 29.861, "step": 216110 }, { "epoch": 0.43657607356262396, "grad_norm": 249.66897583007812, "learning_rate": 6.997906009556943e-06, "loss": 17.5095, "step": 216120 }, { "epoch": 0.4365962741953078, "grad_norm": 362.1988525390625, "learning_rate": 6.997586016790065e-06, "loss": 13.5223, "step": 216130 }, { "epoch": 0.4366164748279916, "grad_norm": 187.70925903320312, "learning_rate": 6.997266014287193e-06, "loss": 17.0663, "step": 216140 }, { "epoch": 0.4366366754606754, "grad_norm": 186.5120849609375, "learning_rate": 6.996946002049889e-06, "loss": 13.7245, "step": 216150 }, { "epoch": 0.43665687609335924, "grad_norm": 419.76300048828125, "learning_rate": 6.9966259800797124e-06, "loss": 28.8798, "step": 216160 }, { "epoch": 0.43667707672604306, "grad_norm": 156.83245849609375, "learning_rate": 6.996305948378223e-06, "loss": 18.7459, "step": 216170 }, { "epoch": 0.4366972773587269, "grad_norm": 143.71060180664062, "learning_rate": 6.995985906946982e-06, "loss": 32.858, "step": 216180 }, { "epoch": 0.4367174779914107, "grad_norm": 309.574951171875, "learning_rate": 6.995665855787546e-06, "loss": 13.9285, "step": 216190 }, { "epoch": 0.4367376786240945, "grad_norm": 333.0892028808594, "learning_rate": 6.995345794901477e-06, "loss": 10.8511, "step": 216200 }, { "epoch": 0.43675787925677834, "grad_norm": 302.8669738769531, "learning_rate": 6.995025724290334e-06, "loss": 11.6484, "step": 216210 }, { "epoch": 0.43677807988946216, "grad_norm": 343.58465576171875, "learning_rate": 6.994705643955678e-06, "loss": 20.8458, "step": 216220 }, { "epoch": 0.436798280522146, "grad_norm": 342.8822326660156, "learning_rate": 6.994385553899069e-06, "loss": 19.6768, "step": 216230 }, { "epoch": 0.4368184811548298, "grad_norm": 281.4910888671875, "learning_rate": 6.9940654541220675e-06, "loss": 24.9373, "step": 216240 }, { "epoch": 0.43683868178751356, "grad_norm": 329.94854736328125, "learning_rate": 6.993745344626232e-06, "loss": 26.5525, "step": 216250 }, { "epoch": 0.4368588824201974, "grad_norm": 60.6929931640625, "learning_rate": 6.993425225413123e-06, "loss": 21.9992, "step": 216260 }, { "epoch": 0.4368790830528812, "grad_norm": 135.46096801757812, "learning_rate": 6.993105096484303e-06, "loss": 32.1377, "step": 216270 }, { "epoch": 0.436899283685565, "grad_norm": 212.33242797851562, "learning_rate": 6.992784957841328e-06, "loss": 11.7194, "step": 216280 }, { "epoch": 0.43691948431824884, "grad_norm": 300.6083068847656, "learning_rate": 6.9924648094857625e-06, "loss": 21.0319, "step": 216290 }, { "epoch": 0.43693968495093266, "grad_norm": 324.2675476074219, "learning_rate": 6.992144651419163e-06, "loss": 10.6814, "step": 216300 }, { "epoch": 0.4369598855836165, "grad_norm": 177.3191375732422, "learning_rate": 6.991824483643095e-06, "loss": 21.482, "step": 216310 }, { "epoch": 0.4369800862163003, "grad_norm": 262.4141540527344, "learning_rate": 6.991504306159115e-06, "loss": 10.3618, "step": 216320 }, { "epoch": 0.4370002868489841, "grad_norm": 175.207763671875, "learning_rate": 6.991184118968783e-06, "loss": 20.5907, "step": 216330 }, { "epoch": 0.43702048748166794, "grad_norm": 0.0, "learning_rate": 6.990863922073664e-06, "loss": 14.1202, "step": 216340 }, { "epoch": 0.43704068811435176, "grad_norm": 343.4942932128906, "learning_rate": 6.990543715475314e-06, "loss": 21.2943, "step": 216350 }, { "epoch": 0.4370608887470356, "grad_norm": 253.36492919921875, "learning_rate": 6.9902234991752945e-06, "loss": 15.3818, "step": 216360 }, { "epoch": 0.43708108937971935, "grad_norm": 346.9786376953125, "learning_rate": 6.989903273175169e-06, "loss": 61.2612, "step": 216370 }, { "epoch": 0.43710129001240317, "grad_norm": 336.92626953125, "learning_rate": 6.989583037476495e-06, "loss": 18.6497, "step": 216380 }, { "epoch": 0.437121490645087, "grad_norm": 1314.62353515625, "learning_rate": 6.989262792080834e-06, "loss": 19.664, "step": 216390 }, { "epoch": 0.4371416912777708, "grad_norm": 48.15350341796875, "learning_rate": 6.98894253698975e-06, "loss": 17.2409, "step": 216400 }, { "epoch": 0.4371618919104546, "grad_norm": 177.6345977783203, "learning_rate": 6.988622272204799e-06, "loss": 21.4271, "step": 216410 }, { "epoch": 0.43718209254313845, "grad_norm": 148.08287048339844, "learning_rate": 6.9883019977275446e-06, "loss": 19.9227, "step": 216420 }, { "epoch": 0.43720229317582227, "grad_norm": 293.5564880371094, "learning_rate": 6.987981713559548e-06, "loss": 17.7272, "step": 216430 }, { "epoch": 0.4372224938085061, "grad_norm": 514.6180419921875, "learning_rate": 6.98766141970237e-06, "loss": 14.3237, "step": 216440 }, { "epoch": 0.4372426944411899, "grad_norm": 120.87969970703125, "learning_rate": 6.987341116157571e-06, "loss": 14.1545, "step": 216450 }, { "epoch": 0.4372628950738737, "grad_norm": 134.6324005126953, "learning_rate": 6.987020802926711e-06, "loss": 19.2817, "step": 216460 }, { "epoch": 0.43728309570655755, "grad_norm": 476.8459167480469, "learning_rate": 6.986700480011353e-06, "loss": 11.6963, "step": 216470 }, { "epoch": 0.43730329633924137, "grad_norm": 53.31153869628906, "learning_rate": 6.986380147413059e-06, "loss": 17.8827, "step": 216480 }, { "epoch": 0.4373234969719252, "grad_norm": 221.1599884033203, "learning_rate": 6.986059805133389e-06, "loss": 11.8437, "step": 216490 }, { "epoch": 0.43734369760460895, "grad_norm": 92.4846420288086, "learning_rate": 6.985739453173903e-06, "loss": 18.5632, "step": 216500 }, { "epoch": 0.43736389823729277, "grad_norm": 12.228981971740723, "learning_rate": 6.985419091536167e-06, "loss": 49.6591, "step": 216510 }, { "epoch": 0.4373840988699766, "grad_norm": 237.54812622070312, "learning_rate": 6.985098720221736e-06, "loss": 35.8917, "step": 216520 }, { "epoch": 0.4374042995026604, "grad_norm": 428.269287109375, "learning_rate": 6.984778339232174e-06, "loss": 12.0743, "step": 216530 }, { "epoch": 0.43742450013534423, "grad_norm": 245.52420043945312, "learning_rate": 6.984457948569045e-06, "loss": 22.1537, "step": 216540 }, { "epoch": 0.43744470076802805, "grad_norm": 444.3219299316406, "learning_rate": 6.984137548233909e-06, "loss": 19.2422, "step": 216550 }, { "epoch": 0.43746490140071187, "grad_norm": 314.3553161621094, "learning_rate": 6.983817138228327e-06, "loss": 23.1165, "step": 216560 }, { "epoch": 0.4374851020333957, "grad_norm": 434.1788330078125, "learning_rate": 6.983496718553859e-06, "loss": 23.8593, "step": 216570 }, { "epoch": 0.4375053026660795, "grad_norm": 245.72901916503906, "learning_rate": 6.98317628921207e-06, "loss": 16.2533, "step": 216580 }, { "epoch": 0.43752550329876333, "grad_norm": 128.78802490234375, "learning_rate": 6.982855850204521e-06, "loss": 13.2012, "step": 216590 }, { "epoch": 0.43754570393144715, "grad_norm": 241.71250915527344, "learning_rate": 6.9825354015327715e-06, "loss": 41.7371, "step": 216600 }, { "epoch": 0.43756590456413097, "grad_norm": 124.66697692871094, "learning_rate": 6.9822149431983865e-06, "loss": 20.4151, "step": 216610 }, { "epoch": 0.4375861051968148, "grad_norm": 330.076904296875, "learning_rate": 6.981894475202924e-06, "loss": 16.3693, "step": 216620 }, { "epoch": 0.43760630582949855, "grad_norm": 389.4688415527344, "learning_rate": 6.981573997547951e-06, "loss": 23.2989, "step": 216630 }, { "epoch": 0.4376265064621824, "grad_norm": 236.78781127929688, "learning_rate": 6.981253510235025e-06, "loss": 25.7445, "step": 216640 }, { "epoch": 0.4376467070948662, "grad_norm": 446.3905334472656, "learning_rate": 6.98093301326571e-06, "loss": 44.9723, "step": 216650 }, { "epoch": 0.43766690772755, "grad_norm": 358.5753173828125, "learning_rate": 6.980612506641567e-06, "loss": 15.6247, "step": 216660 }, { "epoch": 0.43768710836023383, "grad_norm": 304.6280212402344, "learning_rate": 6.9802919903641605e-06, "loss": 11.4491, "step": 216670 }, { "epoch": 0.43770730899291765, "grad_norm": 274.2132873535156, "learning_rate": 6.9799714644350504e-06, "loss": 10.5955, "step": 216680 }, { "epoch": 0.4377275096256015, "grad_norm": 199.38565063476562, "learning_rate": 6.979650928855799e-06, "loss": 16.7536, "step": 216690 }, { "epoch": 0.4377477102582853, "grad_norm": 394.798828125, "learning_rate": 6.979330383627969e-06, "loss": 28.2482, "step": 216700 }, { "epoch": 0.4377679108909691, "grad_norm": 347.6001281738281, "learning_rate": 6.9790098287531225e-06, "loss": 9.9508, "step": 216710 }, { "epoch": 0.43778811152365293, "grad_norm": 0.0, "learning_rate": 6.978689264232824e-06, "loss": 27.3367, "step": 216720 }, { "epoch": 0.43780831215633675, "grad_norm": 305.5069580078125, "learning_rate": 6.978368690068632e-06, "loss": 12.6773, "step": 216730 }, { "epoch": 0.4378285127890206, "grad_norm": 88.37347412109375, "learning_rate": 6.9780481062621115e-06, "loss": 23.8881, "step": 216740 }, { "epoch": 0.4378487134217044, "grad_norm": 422.849365234375, "learning_rate": 6.977727512814826e-06, "loss": 33.9144, "step": 216750 }, { "epoch": 0.43786891405438816, "grad_norm": 318.004150390625, "learning_rate": 6.977406909728335e-06, "loss": 21.1258, "step": 216760 }, { "epoch": 0.437889114687072, "grad_norm": 85.90986633300781, "learning_rate": 6.9770862970042015e-06, "loss": 10.5534, "step": 216770 }, { "epoch": 0.4379093153197558, "grad_norm": 158.95144653320312, "learning_rate": 6.97676567464399e-06, "loss": 14.9924, "step": 216780 }, { "epoch": 0.4379295159524396, "grad_norm": 274.92889404296875, "learning_rate": 6.976445042649265e-06, "loss": 13.0459, "step": 216790 }, { "epoch": 0.43794971658512344, "grad_norm": 567.2266235351562, "learning_rate": 6.976124401021583e-06, "loss": 50.1125, "step": 216800 }, { "epoch": 0.43796991721780726, "grad_norm": 203.92388916015625, "learning_rate": 6.975803749762512e-06, "loss": 16.2417, "step": 216810 }, { "epoch": 0.4379901178504911, "grad_norm": 401.3974914550781, "learning_rate": 6.975483088873613e-06, "loss": 26.1295, "step": 216820 }, { "epoch": 0.4380103184831749, "grad_norm": 240.92103576660156, "learning_rate": 6.975162418356448e-06, "loss": 31.7336, "step": 216830 }, { "epoch": 0.4380305191158587, "grad_norm": 3193.0078125, "learning_rate": 6.974841738212581e-06, "loss": 25.2816, "step": 216840 }, { "epoch": 0.43805071974854254, "grad_norm": 128.3775177001953, "learning_rate": 6.974521048443577e-06, "loss": 9.5089, "step": 216850 }, { "epoch": 0.43807092038122636, "grad_norm": 182.8789825439453, "learning_rate": 6.974200349050996e-06, "loss": 11.6577, "step": 216860 }, { "epoch": 0.4380911210139102, "grad_norm": 706.62353515625, "learning_rate": 6.9738796400364005e-06, "loss": 17.7439, "step": 216870 }, { "epoch": 0.438111321646594, "grad_norm": 199.5137939453125, "learning_rate": 6.973558921401356e-06, "loss": 16.4295, "step": 216880 }, { "epoch": 0.43813152227927776, "grad_norm": 110.73387908935547, "learning_rate": 6.973238193147425e-06, "loss": 20.8794, "step": 216890 }, { "epoch": 0.4381517229119616, "grad_norm": 102.72981262207031, "learning_rate": 6.97291745527617e-06, "loss": 26.6903, "step": 216900 }, { "epoch": 0.4381719235446454, "grad_norm": 0.0, "learning_rate": 6.972596707789154e-06, "loss": 14.4281, "step": 216910 }, { "epoch": 0.4381921241773292, "grad_norm": 10.576709747314453, "learning_rate": 6.972275950687942e-06, "loss": 19.0183, "step": 216920 }, { "epoch": 0.43821232481001304, "grad_norm": 206.65184020996094, "learning_rate": 6.9719551839740964e-06, "loss": 12.8681, "step": 216930 }, { "epoch": 0.43823252544269686, "grad_norm": 332.3371276855469, "learning_rate": 6.971634407649179e-06, "loss": 26.9299, "step": 216940 }, { "epoch": 0.4382527260753807, "grad_norm": 5.158157825469971, "learning_rate": 6.971313621714756e-06, "loss": 23.0883, "step": 216950 }, { "epoch": 0.4382729267080645, "grad_norm": 327.34625244140625, "learning_rate": 6.970992826172389e-06, "loss": 43.1785, "step": 216960 }, { "epoch": 0.4382931273407483, "grad_norm": 345.0984191894531, "learning_rate": 6.970672021023641e-06, "loss": 11.2089, "step": 216970 }, { "epoch": 0.43831332797343214, "grad_norm": 312.06304931640625, "learning_rate": 6.970351206270079e-06, "loss": 29.0811, "step": 216980 }, { "epoch": 0.43833352860611596, "grad_norm": 265.03778076171875, "learning_rate": 6.970030381913262e-06, "loss": 18.8137, "step": 216990 }, { "epoch": 0.4383537292387998, "grad_norm": 181.86972045898438, "learning_rate": 6.9697095479547564e-06, "loss": 15.9436, "step": 217000 }, { "epoch": 0.43837392987148355, "grad_norm": 65.0184326171875, "learning_rate": 6.969388704396126e-06, "loss": 20.0296, "step": 217010 }, { "epoch": 0.43839413050416737, "grad_norm": 366.57452392578125, "learning_rate": 6.969067851238933e-06, "loss": 12.6096, "step": 217020 }, { "epoch": 0.4384143311368512, "grad_norm": 46.77051544189453, "learning_rate": 6.968746988484742e-06, "loss": 24.9369, "step": 217030 }, { "epoch": 0.438434531769535, "grad_norm": 225.16705322265625, "learning_rate": 6.968426116135118e-06, "loss": 26.6589, "step": 217040 }, { "epoch": 0.4384547324022188, "grad_norm": 394.2978820800781, "learning_rate": 6.968105234191623e-06, "loss": 30.7515, "step": 217050 }, { "epoch": 0.43847493303490265, "grad_norm": 375.0109558105469, "learning_rate": 6.9677843426558235e-06, "loss": 20.7878, "step": 217060 }, { "epoch": 0.43849513366758647, "grad_norm": 187.0615234375, "learning_rate": 6.967463441529278e-06, "loss": 20.4119, "step": 217070 }, { "epoch": 0.4385153343002703, "grad_norm": 336.0079345703125, "learning_rate": 6.967142530813558e-06, "loss": 11.3782, "step": 217080 }, { "epoch": 0.4385355349329541, "grad_norm": 85.40040588378906, "learning_rate": 6.966821610510222e-06, "loss": 17.1216, "step": 217090 }, { "epoch": 0.4385557355656379, "grad_norm": 342.46990966796875, "learning_rate": 6.966500680620837e-06, "loss": 30.2829, "step": 217100 }, { "epoch": 0.43857593619832175, "grad_norm": 164.8860321044922, "learning_rate": 6.966179741146966e-06, "loss": 14.2625, "step": 217110 }, { "epoch": 0.43859613683100557, "grad_norm": 261.3809509277344, "learning_rate": 6.965858792090174e-06, "loss": 16.1254, "step": 217120 }, { "epoch": 0.4386163374636894, "grad_norm": 91.49510955810547, "learning_rate": 6.965537833452024e-06, "loss": 32.2327, "step": 217130 }, { "epoch": 0.43863653809637315, "grad_norm": 227.63314819335938, "learning_rate": 6.9652168652340804e-06, "loss": 24.4534, "step": 217140 }, { "epoch": 0.43865673872905697, "grad_norm": 565.2056884765625, "learning_rate": 6.9648958874379084e-06, "loss": 18.4406, "step": 217150 }, { "epoch": 0.4386769393617408, "grad_norm": 134.83474731445312, "learning_rate": 6.964574900065072e-06, "loss": 16.7033, "step": 217160 }, { "epoch": 0.4386971399944246, "grad_norm": 620.0621948242188, "learning_rate": 6.964253903117138e-06, "loss": 12.6393, "step": 217170 }, { "epoch": 0.43871734062710843, "grad_norm": 272.5164489746094, "learning_rate": 6.963932896595665e-06, "loss": 33.4615, "step": 217180 }, { "epoch": 0.43873754125979225, "grad_norm": 44.18101501464844, "learning_rate": 6.963611880502225e-06, "loss": 10.2502, "step": 217190 }, { "epoch": 0.43875774189247607, "grad_norm": 304.0390930175781, "learning_rate": 6.963290854838376e-06, "loss": 18.4155, "step": 217200 }, { "epoch": 0.4387779425251599, "grad_norm": 392.530517578125, "learning_rate": 6.962969819605686e-06, "loss": 16.4954, "step": 217210 }, { "epoch": 0.4387981431578437, "grad_norm": 19.74650001525879, "learning_rate": 6.96264877480572e-06, "loss": 14.7667, "step": 217220 }, { "epoch": 0.43881834379052753, "grad_norm": 5.164533615112305, "learning_rate": 6.96232772044004e-06, "loss": 21.211, "step": 217230 }, { "epoch": 0.43883854442321135, "grad_norm": 774.5592651367188, "learning_rate": 6.962006656510216e-06, "loss": 22.8492, "step": 217240 }, { "epoch": 0.43885874505589517, "grad_norm": 376.98931884765625, "learning_rate": 6.961685583017808e-06, "loss": 14.8284, "step": 217250 }, { "epoch": 0.438878945688579, "grad_norm": 276.6520080566406, "learning_rate": 6.961364499964383e-06, "loss": 20.5822, "step": 217260 }, { "epoch": 0.43889914632126276, "grad_norm": 232.26983642578125, "learning_rate": 6.961043407351505e-06, "loss": 29.3182, "step": 217270 }, { "epoch": 0.4389193469539466, "grad_norm": 108.15164947509766, "learning_rate": 6.960722305180737e-06, "loss": 17.9535, "step": 217280 }, { "epoch": 0.4389395475866304, "grad_norm": 472.78656005859375, "learning_rate": 6.9604011934536495e-06, "loss": 23.1308, "step": 217290 }, { "epoch": 0.4389597482193142, "grad_norm": 78.6825942993164, "learning_rate": 6.960080072171802e-06, "loss": 13.1421, "step": 217300 }, { "epoch": 0.43897994885199804, "grad_norm": 296.48126220703125, "learning_rate": 6.959758941336762e-06, "loss": 27.6591, "step": 217310 }, { "epoch": 0.43900014948468186, "grad_norm": 441.0160827636719, "learning_rate": 6.959437800950097e-06, "loss": 26.6677, "step": 217320 }, { "epoch": 0.4390203501173657, "grad_norm": 751.2532958984375, "learning_rate": 6.959116651013369e-06, "loss": 42.6366, "step": 217330 }, { "epoch": 0.4390405507500495, "grad_norm": 1093.8497314453125, "learning_rate": 6.958795491528142e-06, "loss": 45.2656, "step": 217340 }, { "epoch": 0.4390607513827333, "grad_norm": 341.9880676269531, "learning_rate": 6.9584743224959846e-06, "loss": 16.6363, "step": 217350 }, { "epoch": 0.43908095201541714, "grad_norm": 320.728515625, "learning_rate": 6.958153143918462e-06, "loss": 14.5058, "step": 217360 }, { "epoch": 0.43910115264810096, "grad_norm": 157.25701904296875, "learning_rate": 6.957831955797137e-06, "loss": 13.9639, "step": 217370 }, { "epoch": 0.4391213532807848, "grad_norm": 423.9961242675781, "learning_rate": 6.957510758133579e-06, "loss": 23.9766, "step": 217380 }, { "epoch": 0.4391415539134686, "grad_norm": 200.01654052734375, "learning_rate": 6.957189550929346e-06, "loss": 21.7223, "step": 217390 }, { "epoch": 0.43916175454615236, "grad_norm": 187.51596069335938, "learning_rate": 6.9568683341860135e-06, "loss": 13.6135, "step": 217400 }, { "epoch": 0.4391819551788362, "grad_norm": 281.9305725097656, "learning_rate": 6.9565471079051395e-06, "loss": 18.2908, "step": 217410 }, { "epoch": 0.43920215581152, "grad_norm": 274.11712646484375, "learning_rate": 6.956225872088292e-06, "loss": 10.4901, "step": 217420 }, { "epoch": 0.4392223564442038, "grad_norm": 161.9459991455078, "learning_rate": 6.9559046267370375e-06, "loss": 17.4598, "step": 217430 }, { "epoch": 0.43924255707688764, "grad_norm": 180.14833068847656, "learning_rate": 6.955583371852942e-06, "loss": 18.9559, "step": 217440 }, { "epoch": 0.43926275770957146, "grad_norm": 522.2338256835938, "learning_rate": 6.95526210743757e-06, "loss": 26.9909, "step": 217450 }, { "epoch": 0.4392829583422553, "grad_norm": 118.70719146728516, "learning_rate": 6.954940833492487e-06, "loss": 8.4854, "step": 217460 }, { "epoch": 0.4393031589749391, "grad_norm": 298.576904296875, "learning_rate": 6.954619550019259e-06, "loss": 21.9083, "step": 217470 }, { "epoch": 0.4393233596076229, "grad_norm": 323.90386962890625, "learning_rate": 6.954298257019454e-06, "loss": 12.8559, "step": 217480 }, { "epoch": 0.43934356024030674, "grad_norm": 964.5706176757812, "learning_rate": 6.953976954494636e-06, "loss": 23.2359, "step": 217490 }, { "epoch": 0.43936376087299056, "grad_norm": 129.5939178466797, "learning_rate": 6.953655642446368e-06, "loss": 20.0249, "step": 217500 }, { "epoch": 0.4393839615056744, "grad_norm": 188.68711853027344, "learning_rate": 6.953334320876224e-06, "loss": 16.445, "step": 217510 }, { "epoch": 0.4394041621383582, "grad_norm": 298.38897705078125, "learning_rate": 6.9530129897857626e-06, "loss": 14.8967, "step": 217520 }, { "epoch": 0.43942436277104197, "grad_norm": 347.3416748046875, "learning_rate": 6.952691649176554e-06, "loss": 42.5064, "step": 217530 }, { "epoch": 0.4394445634037258, "grad_norm": 209.84986877441406, "learning_rate": 6.952370299050163e-06, "loss": 15.3902, "step": 217540 }, { "epoch": 0.4394647640364096, "grad_norm": 0.0, "learning_rate": 6.952048939408156e-06, "loss": 22.2496, "step": 217550 }, { "epoch": 0.4394849646690934, "grad_norm": 258.4037780761719, "learning_rate": 6.9517275702521e-06, "loss": 13.9008, "step": 217560 }, { "epoch": 0.43950516530177725, "grad_norm": 1403.2706298828125, "learning_rate": 6.9514061915835584e-06, "loss": 47.764, "step": 217570 }, { "epoch": 0.43952536593446107, "grad_norm": 442.0766906738281, "learning_rate": 6.9510848034041e-06, "loss": 39.9094, "step": 217580 }, { "epoch": 0.4395455665671449, "grad_norm": 208.8603973388672, "learning_rate": 6.950763405715292e-06, "loss": 21.7397, "step": 217590 }, { "epoch": 0.4395657671998287, "grad_norm": 140.8825225830078, "learning_rate": 6.950441998518699e-06, "loss": 22.0363, "step": 217600 }, { "epoch": 0.4395859678325125, "grad_norm": 633.6480712890625, "learning_rate": 6.950120581815889e-06, "loss": 44.3826, "step": 217610 }, { "epoch": 0.43960616846519635, "grad_norm": 180.97268676757812, "learning_rate": 6.9497991556084275e-06, "loss": 15.7391, "step": 217620 }, { "epoch": 0.43962636909788017, "grad_norm": 418.5404968261719, "learning_rate": 6.9494777198978815e-06, "loss": 17.5379, "step": 217630 }, { "epoch": 0.439646569730564, "grad_norm": 169.9822235107422, "learning_rate": 6.949156274685818e-06, "loss": 15.0035, "step": 217640 }, { "epoch": 0.43966677036324775, "grad_norm": 82.10293579101562, "learning_rate": 6.948834819973803e-06, "loss": 20.2223, "step": 217650 }, { "epoch": 0.43968697099593157, "grad_norm": 208.22096252441406, "learning_rate": 6.948513355763402e-06, "loss": 22.0515, "step": 217660 }, { "epoch": 0.4397071716286154, "grad_norm": 309.8206481933594, "learning_rate": 6.948191882056185e-06, "loss": 28.0477, "step": 217670 }, { "epoch": 0.4397273722612992, "grad_norm": 868.6277465820312, "learning_rate": 6.947870398853716e-06, "loss": 36.1528, "step": 217680 }, { "epoch": 0.43974757289398303, "grad_norm": 2.3124332427978516, "learning_rate": 6.947548906157563e-06, "loss": 16.7257, "step": 217690 }, { "epoch": 0.43976777352666685, "grad_norm": 208.30235290527344, "learning_rate": 6.947227403969293e-06, "loss": 15.5571, "step": 217700 }, { "epoch": 0.43978797415935067, "grad_norm": 300.9740295410156, "learning_rate": 6.946905892290473e-06, "loss": 29.8091, "step": 217710 }, { "epoch": 0.4398081747920345, "grad_norm": 52.441192626953125, "learning_rate": 6.946584371122671e-06, "loss": 8.6313, "step": 217720 }, { "epoch": 0.4398283754247183, "grad_norm": 383.3055725097656, "learning_rate": 6.94626284046745e-06, "loss": 16.1593, "step": 217730 }, { "epoch": 0.43984857605740213, "grad_norm": 538.35546875, "learning_rate": 6.945941300326382e-06, "loss": 23.0782, "step": 217740 }, { "epoch": 0.43986877669008595, "grad_norm": 382.81597900390625, "learning_rate": 6.9456197507010315e-06, "loss": 25.2598, "step": 217750 }, { "epoch": 0.43988897732276977, "grad_norm": 37.523929595947266, "learning_rate": 6.945298191592967e-06, "loss": 25.0721, "step": 217760 }, { "epoch": 0.4399091779554536, "grad_norm": 257.01123046875, "learning_rate": 6.944976623003754e-06, "loss": 19.8579, "step": 217770 }, { "epoch": 0.43992937858813735, "grad_norm": 780.4937133789062, "learning_rate": 6.944655044934962e-06, "loss": 16.2912, "step": 217780 }, { "epoch": 0.4399495792208212, "grad_norm": 233.5996856689453, "learning_rate": 6.944333457388156e-06, "loss": 19.5669, "step": 217790 }, { "epoch": 0.439969779853505, "grad_norm": 339.4852294921875, "learning_rate": 6.944011860364905e-06, "loss": 31.5886, "step": 217800 }, { "epoch": 0.4399899804861888, "grad_norm": 225.94215393066406, "learning_rate": 6.9436902538667775e-06, "loss": 21.4075, "step": 217810 }, { "epoch": 0.44001018111887263, "grad_norm": 340.7861328125, "learning_rate": 6.943368637895338e-06, "loss": 26.7931, "step": 217820 }, { "epoch": 0.44003038175155645, "grad_norm": 376.00054931640625, "learning_rate": 6.943047012452156e-06, "loss": 13.3222, "step": 217830 }, { "epoch": 0.4400505823842403, "grad_norm": 186.74745178222656, "learning_rate": 6.942725377538797e-06, "loss": 27.0506, "step": 217840 }, { "epoch": 0.4400707830169241, "grad_norm": 314.2313232421875, "learning_rate": 6.942403733156832e-06, "loss": 16.2805, "step": 217850 }, { "epoch": 0.4400909836496079, "grad_norm": 128.09048461914062, "learning_rate": 6.942082079307826e-06, "loss": 21.0371, "step": 217860 }, { "epoch": 0.44011118428229173, "grad_norm": 209.68136596679688, "learning_rate": 6.941760415993346e-06, "loss": 19.1523, "step": 217870 }, { "epoch": 0.44013138491497555, "grad_norm": 262.0122985839844, "learning_rate": 6.941438743214963e-06, "loss": 16.9196, "step": 217880 }, { "epoch": 0.4401515855476594, "grad_norm": 274.64306640625, "learning_rate": 6.941117060974243e-06, "loss": 20.5924, "step": 217890 }, { "epoch": 0.4401717861803432, "grad_norm": 275.0093078613281, "learning_rate": 6.940795369272754e-06, "loss": 23.17, "step": 217900 }, { "epoch": 0.44019198681302696, "grad_norm": 142.55621337890625, "learning_rate": 6.940473668112063e-06, "loss": 11.5992, "step": 217910 }, { "epoch": 0.4402121874457108, "grad_norm": 368.8808288574219, "learning_rate": 6.940151957493739e-06, "loss": 21.8491, "step": 217920 }, { "epoch": 0.4402323880783946, "grad_norm": 19.031354904174805, "learning_rate": 6.939830237419349e-06, "loss": 15.1922, "step": 217930 }, { "epoch": 0.4402525887110784, "grad_norm": 232.90509033203125, "learning_rate": 6.939508507890464e-06, "loss": 22.5398, "step": 217940 }, { "epoch": 0.44027278934376224, "grad_norm": 438.17608642578125, "learning_rate": 6.939186768908647e-06, "loss": 37.4612, "step": 217950 }, { "epoch": 0.44029298997644606, "grad_norm": 375.556640625, "learning_rate": 6.938865020475471e-06, "loss": 28.2924, "step": 217960 }, { "epoch": 0.4403131906091299, "grad_norm": 608.4413452148438, "learning_rate": 6.9385432625925006e-06, "loss": 21.3958, "step": 217970 }, { "epoch": 0.4403333912418137, "grad_norm": 276.5838623046875, "learning_rate": 6.938221495261306e-06, "loss": 18.93, "step": 217980 }, { "epoch": 0.4403535918744975, "grad_norm": 336.1136779785156, "learning_rate": 6.937899718483456e-06, "loss": 17.3046, "step": 217990 }, { "epoch": 0.44037379250718134, "grad_norm": 194.71876525878906, "learning_rate": 6.9375779322605154e-06, "loss": 21.4464, "step": 218000 }, { "epoch": 0.44039399313986516, "grad_norm": 366.64129638671875, "learning_rate": 6.937256136594057e-06, "loss": 20.6972, "step": 218010 }, { "epoch": 0.440414193772549, "grad_norm": 410.6239318847656, "learning_rate": 6.936934331485646e-06, "loss": 19.7789, "step": 218020 }, { "epoch": 0.4404343944052328, "grad_norm": 218.31607055664062, "learning_rate": 6.936612516936852e-06, "loss": 25.4519, "step": 218030 }, { "epoch": 0.44045459503791656, "grad_norm": 95.38946533203125, "learning_rate": 6.936290692949243e-06, "loss": 14.6427, "step": 218040 }, { "epoch": 0.4404747956706004, "grad_norm": 171.65023803710938, "learning_rate": 6.935968859524389e-06, "loss": 20.5687, "step": 218050 }, { "epoch": 0.4404949963032842, "grad_norm": 222.28530883789062, "learning_rate": 6.935647016663859e-06, "loss": 20.9326, "step": 218060 }, { "epoch": 0.440515196935968, "grad_norm": 225.63076782226562, "learning_rate": 6.935325164369219e-06, "loss": 28.5849, "step": 218070 }, { "epoch": 0.44053539756865184, "grad_norm": 93.61392211914062, "learning_rate": 6.935003302642038e-06, "loss": 17.8967, "step": 218080 }, { "epoch": 0.44055559820133566, "grad_norm": 340.5914306640625, "learning_rate": 6.934681431483886e-06, "loss": 23.8724, "step": 218090 }, { "epoch": 0.4405757988340195, "grad_norm": 224.6217498779297, "learning_rate": 6.934359550896332e-06, "loss": 19.5253, "step": 218100 }, { "epoch": 0.4405959994667033, "grad_norm": 284.46240234375, "learning_rate": 6.934037660880942e-06, "loss": 19.5175, "step": 218110 }, { "epoch": 0.4406162000993871, "grad_norm": 419.5478515625, "learning_rate": 6.93371576143929e-06, "loss": 25.0711, "step": 218120 }, { "epoch": 0.44063640073207094, "grad_norm": 279.0348815917969, "learning_rate": 6.9333938525729396e-06, "loss": 19.4943, "step": 218130 }, { "epoch": 0.44065660136475476, "grad_norm": 177.0353546142578, "learning_rate": 6.9330719342834644e-06, "loss": 22.9689, "step": 218140 }, { "epoch": 0.4406768019974386, "grad_norm": 115.56510162353516, "learning_rate": 6.932750006572428e-06, "loss": 17.8898, "step": 218150 }, { "epoch": 0.4406970026301224, "grad_norm": 203.51744079589844, "learning_rate": 6.932428069441405e-06, "loss": 12.1089, "step": 218160 }, { "epoch": 0.44071720326280617, "grad_norm": 173.99234008789062, "learning_rate": 6.932106122891961e-06, "loss": 27.594, "step": 218170 }, { "epoch": 0.44073740389549, "grad_norm": 274.5644226074219, "learning_rate": 6.931784166925667e-06, "loss": 13.1661, "step": 218180 }, { "epoch": 0.4407576045281738, "grad_norm": 55.1752815246582, "learning_rate": 6.93146220154409e-06, "loss": 12.3557, "step": 218190 }, { "epoch": 0.4407778051608576, "grad_norm": 202.18247985839844, "learning_rate": 6.9311402267488004e-06, "loss": 17.378, "step": 218200 }, { "epoch": 0.44079800579354145, "grad_norm": 203.37399291992188, "learning_rate": 6.930818242541368e-06, "loss": 19.7324, "step": 218210 }, { "epoch": 0.44081820642622527, "grad_norm": 196.71746826171875, "learning_rate": 6.9304962489233615e-06, "loss": 10.7225, "step": 218220 }, { "epoch": 0.4408384070589091, "grad_norm": 394.018798828125, "learning_rate": 6.930174245896352e-06, "loss": 23.9348, "step": 218230 }, { "epoch": 0.4408586076915929, "grad_norm": 0.0, "learning_rate": 6.929852233461906e-06, "loss": 46.8157, "step": 218240 }, { "epoch": 0.4408788083242767, "grad_norm": 451.32177734375, "learning_rate": 6.929530211621593e-06, "loss": 30.6663, "step": 218250 }, { "epoch": 0.44089900895696055, "grad_norm": 346.55914306640625, "learning_rate": 6.9292081803769865e-06, "loss": 23.236, "step": 218260 }, { "epoch": 0.44091920958964437, "grad_norm": 773.6644287109375, "learning_rate": 6.928886139729652e-06, "loss": 19.3997, "step": 218270 }, { "epoch": 0.4409394102223282, "grad_norm": 180.7538604736328, "learning_rate": 6.92856408968116e-06, "loss": 19.3725, "step": 218280 }, { "epoch": 0.44095961085501195, "grad_norm": 200.36944580078125, "learning_rate": 6.92824203023308e-06, "loss": 13.7418, "step": 218290 }, { "epoch": 0.44097981148769577, "grad_norm": 290.6386413574219, "learning_rate": 6.927919961386984e-06, "loss": 23.0229, "step": 218300 }, { "epoch": 0.4410000121203796, "grad_norm": 196.5821533203125, "learning_rate": 6.927597883144439e-06, "loss": 14.8477, "step": 218310 }, { "epoch": 0.4410202127530634, "grad_norm": 215.07763671875, "learning_rate": 6.9272757955070146e-06, "loss": 26.6682, "step": 218320 }, { "epoch": 0.44104041338574723, "grad_norm": 220.06997680664062, "learning_rate": 6.926953698476284e-06, "loss": 9.4769, "step": 218330 }, { "epoch": 0.44106061401843105, "grad_norm": 490.44256591796875, "learning_rate": 6.926631592053812e-06, "loss": 24.2261, "step": 218340 }, { "epoch": 0.44108081465111487, "grad_norm": 356.5882263183594, "learning_rate": 6.926309476241174e-06, "loss": 11.4381, "step": 218350 }, { "epoch": 0.4411010152837987, "grad_norm": 161.61883544921875, "learning_rate": 6.925987351039936e-06, "loss": 20.0773, "step": 218360 }, { "epoch": 0.4411212159164825, "grad_norm": 186.00352478027344, "learning_rate": 6.925665216451669e-06, "loss": 12.5825, "step": 218370 }, { "epoch": 0.44114141654916633, "grad_norm": 239.29708862304688, "learning_rate": 6.925343072477943e-06, "loss": 19.314, "step": 218380 }, { "epoch": 0.44116161718185015, "grad_norm": 339.8052673339844, "learning_rate": 6.925020919120331e-06, "loss": 15.1566, "step": 218390 }, { "epoch": 0.44118181781453397, "grad_norm": 324.1094665527344, "learning_rate": 6.924698756380398e-06, "loss": 17.9576, "step": 218400 }, { "epoch": 0.4412020184472178, "grad_norm": 299.09136962890625, "learning_rate": 6.924376584259718e-06, "loss": 12.8414, "step": 218410 }, { "epoch": 0.44122221907990156, "grad_norm": 800.819580078125, "learning_rate": 6.924054402759858e-06, "loss": 24.6372, "step": 218420 }, { "epoch": 0.4412424197125854, "grad_norm": 5.239760398864746, "learning_rate": 6.923732211882391e-06, "loss": 16.5283, "step": 218430 }, { "epoch": 0.4412626203452692, "grad_norm": 70.60247039794922, "learning_rate": 6.9234100116288886e-06, "loss": 17.0883, "step": 218440 }, { "epoch": 0.441282820977953, "grad_norm": 489.7955017089844, "learning_rate": 6.923087802000916e-06, "loss": 23.2546, "step": 218450 }, { "epoch": 0.44130302161063684, "grad_norm": 0.0, "learning_rate": 6.9227655830000485e-06, "loss": 36.7951, "step": 218460 }, { "epoch": 0.44132322224332066, "grad_norm": 231.9298553466797, "learning_rate": 6.922443354627855e-06, "loss": 19.6094, "step": 218470 }, { "epoch": 0.4413434228760045, "grad_norm": 447.90234375, "learning_rate": 6.922121116885905e-06, "loss": 18.7565, "step": 218480 }, { "epoch": 0.4413636235086883, "grad_norm": 174.5920867919922, "learning_rate": 6.921798869775769e-06, "loss": 15.2054, "step": 218490 }, { "epoch": 0.4413838241413721, "grad_norm": 269.0977478027344, "learning_rate": 6.921476613299018e-06, "loss": 12.6699, "step": 218500 }, { "epoch": 0.44140402477405594, "grad_norm": 177.0853271484375, "learning_rate": 6.921154347457226e-06, "loss": 19.1755, "step": 218510 }, { "epoch": 0.44142422540673976, "grad_norm": 107.29742431640625, "learning_rate": 6.9208320722519594e-06, "loss": 22.1347, "step": 218520 }, { "epoch": 0.4414444260394236, "grad_norm": 463.7456970214844, "learning_rate": 6.920509787684789e-06, "loss": 18.2857, "step": 218530 }, { "epoch": 0.4414646266721074, "grad_norm": 447.5906677246094, "learning_rate": 6.920187493757288e-06, "loss": 21.9784, "step": 218540 }, { "epoch": 0.44148482730479116, "grad_norm": 807.1781616210938, "learning_rate": 6.919865190471027e-06, "loss": 32.7643, "step": 218550 }, { "epoch": 0.441505027937475, "grad_norm": 263.3524169921875, "learning_rate": 6.919542877827573e-06, "loss": 14.9693, "step": 218560 }, { "epoch": 0.4415252285701588, "grad_norm": 270.4111022949219, "learning_rate": 6.919220555828502e-06, "loss": 23.8892, "step": 218570 }, { "epoch": 0.4415454292028426, "grad_norm": 125.10990142822266, "learning_rate": 6.918898224475382e-06, "loss": 25.46, "step": 218580 }, { "epoch": 0.44156562983552644, "grad_norm": 250.6649627685547, "learning_rate": 6.918575883769784e-06, "loss": 16.1691, "step": 218590 }, { "epoch": 0.44158583046821026, "grad_norm": 251.0384521484375, "learning_rate": 6.9182535337132824e-06, "loss": 16.4404, "step": 218600 }, { "epoch": 0.4416060311008941, "grad_norm": 147.2530059814453, "learning_rate": 6.917931174307444e-06, "loss": 18.1937, "step": 218610 }, { "epoch": 0.4416262317335779, "grad_norm": 339.063720703125, "learning_rate": 6.917608805553843e-06, "loss": 19.7677, "step": 218620 }, { "epoch": 0.4416464323662617, "grad_norm": 651.6470947265625, "learning_rate": 6.917286427454048e-06, "loss": 20.4787, "step": 218630 }, { "epoch": 0.44166663299894554, "grad_norm": 263.90411376953125, "learning_rate": 6.916964040009631e-06, "loss": 20.6052, "step": 218640 }, { "epoch": 0.44168683363162936, "grad_norm": 118.68295288085938, "learning_rate": 6.9166416432221636e-06, "loss": 18.785, "step": 218650 }, { "epoch": 0.4417070342643132, "grad_norm": 115.31037139892578, "learning_rate": 6.916319237093219e-06, "loss": 24.5062, "step": 218660 }, { "epoch": 0.441727234896997, "grad_norm": 156.4493408203125, "learning_rate": 6.915996821624366e-06, "loss": 26.3891, "step": 218670 }, { "epoch": 0.44174743552968077, "grad_norm": 136.9975128173828, "learning_rate": 6.915674396817177e-06, "loss": 21.7255, "step": 218680 }, { "epoch": 0.4417676361623646, "grad_norm": 371.8674011230469, "learning_rate": 6.9153519626732225e-06, "loss": 27.9354, "step": 218690 }, { "epoch": 0.4417878367950484, "grad_norm": 318.1452941894531, "learning_rate": 6.915029519194076e-06, "loss": 13.1904, "step": 218700 }, { "epoch": 0.4418080374277322, "grad_norm": 269.3638916015625, "learning_rate": 6.914707066381308e-06, "loss": 19.6306, "step": 218710 }, { "epoch": 0.44182823806041605, "grad_norm": 112.7406005859375, "learning_rate": 6.914384604236488e-06, "loss": 14.5342, "step": 218720 }, { "epoch": 0.44184843869309987, "grad_norm": 127.05351257324219, "learning_rate": 6.914062132761192e-06, "loss": 30.4205, "step": 218730 }, { "epoch": 0.4418686393257837, "grad_norm": 643.4254150390625, "learning_rate": 6.913739651956989e-06, "loss": 18.0196, "step": 218740 }, { "epoch": 0.4418888399584675, "grad_norm": 238.68310546875, "learning_rate": 6.913417161825449e-06, "loss": 9.6224, "step": 218750 }, { "epoch": 0.4419090405911513, "grad_norm": 427.0183410644531, "learning_rate": 6.913094662368147e-06, "loss": 29.6948, "step": 218760 }, { "epoch": 0.44192924122383515, "grad_norm": 151.87985229492188, "learning_rate": 6.912772153586654e-06, "loss": 12.7632, "step": 218770 }, { "epoch": 0.44194944185651897, "grad_norm": 187.79779052734375, "learning_rate": 6.9124496354825435e-06, "loss": 16.9147, "step": 218780 }, { "epoch": 0.4419696424892028, "grad_norm": 312.7611999511719, "learning_rate": 6.912127108057381e-06, "loss": 19.734, "step": 218790 }, { "epoch": 0.44198984312188655, "grad_norm": 769.9107666015625, "learning_rate": 6.911804571312746e-06, "loss": 43.974, "step": 218800 }, { "epoch": 0.44201004375457037, "grad_norm": 278.66241455078125, "learning_rate": 6.911482025250207e-06, "loss": 16.4906, "step": 218810 }, { "epoch": 0.4420302443872542, "grad_norm": 152.0050811767578, "learning_rate": 6.911159469871335e-06, "loss": 21.5123, "step": 218820 }, { "epoch": 0.442050445019938, "grad_norm": 215.3417510986328, "learning_rate": 6.9108369051777045e-06, "loss": 28.0672, "step": 218830 }, { "epoch": 0.44207064565262183, "grad_norm": 131.7413330078125, "learning_rate": 6.910514331170888e-06, "loss": 17.0112, "step": 218840 }, { "epoch": 0.44209084628530565, "grad_norm": 451.4710998535156, "learning_rate": 6.910191747852455e-06, "loss": 30.0909, "step": 218850 }, { "epoch": 0.44211104691798947, "grad_norm": 149.04067993164062, "learning_rate": 6.909869155223978e-06, "loss": 19.8654, "step": 218860 }, { "epoch": 0.4421312475506733, "grad_norm": 219.50152587890625, "learning_rate": 6.909546553287032e-06, "loss": 31.2785, "step": 218870 }, { "epoch": 0.4421514481833571, "grad_norm": 393.47393798828125, "learning_rate": 6.909223942043187e-06, "loss": 17.2711, "step": 218880 }, { "epoch": 0.44217164881604093, "grad_norm": 408.3843994140625, "learning_rate": 6.908901321494017e-06, "loss": 26.6071, "step": 218890 }, { "epoch": 0.44219184944872475, "grad_norm": 310.6182861328125, "learning_rate": 6.908578691641092e-06, "loss": 15.2511, "step": 218900 }, { "epoch": 0.44221205008140857, "grad_norm": 351.9003601074219, "learning_rate": 6.9082560524859875e-06, "loss": 17.8014, "step": 218910 }, { "epoch": 0.4422322507140924, "grad_norm": 69.14232635498047, "learning_rate": 6.907933404030274e-06, "loss": 10.7853, "step": 218920 }, { "epoch": 0.44225245134677615, "grad_norm": 134.2755126953125, "learning_rate": 6.907610746275524e-06, "loss": 15.29, "step": 218930 }, { "epoch": 0.44227265197946, "grad_norm": 354.6544189453125, "learning_rate": 6.907288079223311e-06, "loss": 22.0718, "step": 218940 }, { "epoch": 0.4422928526121438, "grad_norm": 124.5386962890625, "learning_rate": 6.906965402875207e-06, "loss": 8.1279, "step": 218950 }, { "epoch": 0.4423130532448276, "grad_norm": 182.298828125, "learning_rate": 6.906642717232786e-06, "loss": 8.2896, "step": 218960 }, { "epoch": 0.44233325387751143, "grad_norm": 342.04400634765625, "learning_rate": 6.906320022297618e-06, "loss": 18.0675, "step": 218970 }, { "epoch": 0.44235345451019525, "grad_norm": 416.5542297363281, "learning_rate": 6.905997318071278e-06, "loss": 27.7081, "step": 218980 }, { "epoch": 0.4423736551428791, "grad_norm": 217.39553833007812, "learning_rate": 6.905674604555337e-06, "loss": 13.3624, "step": 218990 }, { "epoch": 0.4423938557755629, "grad_norm": 51.83039093017578, "learning_rate": 6.905351881751372e-06, "loss": 17.9873, "step": 219000 }, { "epoch": 0.4424140564082467, "grad_norm": 228.3551483154297, "learning_rate": 6.905029149660951e-06, "loss": 15.3454, "step": 219010 }, { "epoch": 0.44243425704093053, "grad_norm": 263.48504638671875, "learning_rate": 6.904706408285649e-06, "loss": 21.3637, "step": 219020 }, { "epoch": 0.44245445767361435, "grad_norm": 511.92279052734375, "learning_rate": 6.90438365762704e-06, "loss": 13.6314, "step": 219030 }, { "epoch": 0.4424746583062982, "grad_norm": 40.56866455078125, "learning_rate": 6.904060897686695e-06, "loss": 14.0699, "step": 219040 }, { "epoch": 0.442494858938982, "grad_norm": 723.4234008789062, "learning_rate": 6.903738128466189e-06, "loss": 28.4479, "step": 219050 }, { "epoch": 0.44251505957166576, "grad_norm": 283.917236328125, "learning_rate": 6.903415349967092e-06, "loss": 14.6362, "step": 219060 }, { "epoch": 0.4425352602043496, "grad_norm": 212.60858154296875, "learning_rate": 6.903092562190983e-06, "loss": 8.6673, "step": 219070 }, { "epoch": 0.4425554608370334, "grad_norm": 339.775390625, "learning_rate": 6.902769765139429e-06, "loss": 17.8167, "step": 219080 }, { "epoch": 0.4425756614697172, "grad_norm": 67.49806213378906, "learning_rate": 6.902446958814007e-06, "loss": 16.3418, "step": 219090 }, { "epoch": 0.44259586210240104, "grad_norm": 550.2808227539062, "learning_rate": 6.9021241432162886e-06, "loss": 21.5215, "step": 219100 }, { "epoch": 0.44261606273508486, "grad_norm": 212.4113006591797, "learning_rate": 6.901801318347848e-06, "loss": 25.4491, "step": 219110 }, { "epoch": 0.4426362633677687, "grad_norm": 371.4028015136719, "learning_rate": 6.90147848421026e-06, "loss": 45.7108, "step": 219120 }, { "epoch": 0.4426564640004525, "grad_norm": 190.96165466308594, "learning_rate": 6.901155640805095e-06, "loss": 25.1399, "step": 219130 }, { "epoch": 0.4426766646331363, "grad_norm": 263.519287109375, "learning_rate": 6.900832788133928e-06, "loss": 17.129, "step": 219140 }, { "epoch": 0.44269686526582014, "grad_norm": 554.9775390625, "learning_rate": 6.900509926198332e-06, "loss": 40.8004, "step": 219150 }, { "epoch": 0.44271706589850396, "grad_norm": 251.58816528320312, "learning_rate": 6.900187054999883e-06, "loss": 25.252, "step": 219160 }, { "epoch": 0.4427372665311878, "grad_norm": 331.86212158203125, "learning_rate": 6.899864174540151e-06, "loss": 15.8429, "step": 219170 }, { "epoch": 0.4427574671638716, "grad_norm": 455.2579040527344, "learning_rate": 6.899541284820712e-06, "loss": 21.5235, "step": 219180 }, { "epoch": 0.44277766779655536, "grad_norm": 403.8883972167969, "learning_rate": 6.899218385843139e-06, "loss": 11.5277, "step": 219190 }, { "epoch": 0.4427978684292392, "grad_norm": 442.9278259277344, "learning_rate": 6.898895477609007e-06, "loss": 25.5119, "step": 219200 }, { "epoch": 0.442818069061923, "grad_norm": 995.6471557617188, "learning_rate": 6.898572560119888e-06, "loss": 39.9742, "step": 219210 }, { "epoch": 0.4428382696946068, "grad_norm": 234.9128875732422, "learning_rate": 6.898249633377355e-06, "loss": 16.3113, "step": 219220 }, { "epoch": 0.44285847032729064, "grad_norm": 269.7539978027344, "learning_rate": 6.897926697382986e-06, "loss": 14.2974, "step": 219230 }, { "epoch": 0.44287867095997446, "grad_norm": 287.32330322265625, "learning_rate": 6.897603752138351e-06, "loss": 23.9699, "step": 219240 }, { "epoch": 0.4428988715926583, "grad_norm": 650.5316772460938, "learning_rate": 6.897280797645026e-06, "loss": 21.6368, "step": 219250 }, { "epoch": 0.4429190722253421, "grad_norm": 177.57948303222656, "learning_rate": 6.8969578339045855e-06, "loss": 23.0046, "step": 219260 }, { "epoch": 0.4429392728580259, "grad_norm": 419.2382507324219, "learning_rate": 6.8966348609186005e-06, "loss": 24.3207, "step": 219270 }, { "epoch": 0.44295947349070974, "grad_norm": 266.507568359375, "learning_rate": 6.896311878688648e-06, "loss": 21.5251, "step": 219280 }, { "epoch": 0.44297967412339356, "grad_norm": 119.17599487304688, "learning_rate": 6.895988887216303e-06, "loss": 5.8981, "step": 219290 }, { "epoch": 0.4429998747560774, "grad_norm": 666.9869384765625, "learning_rate": 6.895665886503136e-06, "loss": 18.8542, "step": 219300 }, { "epoch": 0.4430200753887612, "grad_norm": 196.952392578125, "learning_rate": 6.895342876550724e-06, "loss": 10.0179, "step": 219310 }, { "epoch": 0.44304027602144497, "grad_norm": 287.4341125488281, "learning_rate": 6.895019857360641e-06, "loss": 32.9301, "step": 219320 }, { "epoch": 0.4430604766541288, "grad_norm": 100.83110809326172, "learning_rate": 6.8946968289344605e-06, "loss": 12.1899, "step": 219330 }, { "epoch": 0.4430806772868126, "grad_norm": 326.4623107910156, "learning_rate": 6.894373791273758e-06, "loss": 18.7443, "step": 219340 }, { "epoch": 0.4431008779194964, "grad_norm": 6.750455856323242, "learning_rate": 6.8940507443801076e-06, "loss": 15.5074, "step": 219350 }, { "epoch": 0.44312107855218025, "grad_norm": 161.10768127441406, "learning_rate": 6.893727688255083e-06, "loss": 18.6688, "step": 219360 }, { "epoch": 0.44314127918486407, "grad_norm": 185.5940704345703, "learning_rate": 6.8934046229002605e-06, "loss": 11.5134, "step": 219370 }, { "epoch": 0.4431614798175479, "grad_norm": 567.3251953125, "learning_rate": 6.893081548317212e-06, "loss": 22.1718, "step": 219380 }, { "epoch": 0.4431816804502317, "grad_norm": 324.7868347167969, "learning_rate": 6.8927584645075154e-06, "loss": 11.1515, "step": 219390 }, { "epoch": 0.4432018810829155, "grad_norm": 246.28900146484375, "learning_rate": 6.892435371472741e-06, "loss": 21.1241, "step": 219400 }, { "epoch": 0.44322208171559935, "grad_norm": 282.7253112792969, "learning_rate": 6.892112269214468e-06, "loss": 38.5265, "step": 219410 }, { "epoch": 0.44324228234828317, "grad_norm": 200.94509887695312, "learning_rate": 6.8917891577342685e-06, "loss": 17.2774, "step": 219420 }, { "epoch": 0.443262482980967, "grad_norm": 319.1649169921875, "learning_rate": 6.891466037033718e-06, "loss": 41.8613, "step": 219430 }, { "epoch": 0.44328268361365075, "grad_norm": 320.2583923339844, "learning_rate": 6.891142907114392e-06, "loss": 20.717, "step": 219440 }, { "epoch": 0.44330288424633457, "grad_norm": 1155.951904296875, "learning_rate": 6.890819767977865e-06, "loss": 23.2716, "step": 219450 }, { "epoch": 0.4433230848790184, "grad_norm": 216.0127716064453, "learning_rate": 6.890496619625713e-06, "loss": 23.3859, "step": 219460 }, { "epoch": 0.4433432855117022, "grad_norm": 378.4197082519531, "learning_rate": 6.890173462059508e-06, "loss": 23.4377, "step": 219470 }, { "epoch": 0.44336348614438603, "grad_norm": 333.286865234375, "learning_rate": 6.889850295280827e-06, "loss": 8.8751, "step": 219480 }, { "epoch": 0.44338368677706985, "grad_norm": 422.5327453613281, "learning_rate": 6.8895271192912435e-06, "loss": 18.8026, "step": 219490 }, { "epoch": 0.44340388740975367, "grad_norm": 210.62657165527344, "learning_rate": 6.889203934092337e-06, "loss": 25.5267, "step": 219500 }, { "epoch": 0.4434240880424375, "grad_norm": 169.5663299560547, "learning_rate": 6.888880739685677e-06, "loss": 11.5406, "step": 219510 }, { "epoch": 0.4434442886751213, "grad_norm": 252.2335205078125, "learning_rate": 6.888557536072843e-06, "loss": 21.6475, "step": 219520 }, { "epoch": 0.44346448930780513, "grad_norm": 572.8245239257812, "learning_rate": 6.888234323255408e-06, "loss": 28.8109, "step": 219530 }, { "epoch": 0.44348468994048895, "grad_norm": 153.39547729492188, "learning_rate": 6.8879111012349475e-06, "loss": 20.7281, "step": 219540 }, { "epoch": 0.44350489057317277, "grad_norm": 166.20513916015625, "learning_rate": 6.887587870013039e-06, "loss": 14.6269, "step": 219550 }, { "epoch": 0.4435250912058566, "grad_norm": 122.04380798339844, "learning_rate": 6.887264629591254e-06, "loss": 22.553, "step": 219560 }, { "epoch": 0.44354529183854036, "grad_norm": 243.90231323242188, "learning_rate": 6.886941379971172e-06, "loss": 24.1196, "step": 219570 }, { "epoch": 0.4435654924712242, "grad_norm": 348.6385192871094, "learning_rate": 6.886618121154364e-06, "loss": 19.7478, "step": 219580 }, { "epoch": 0.443585693103908, "grad_norm": 0.0, "learning_rate": 6.88629485314241e-06, "loss": 23.3274, "step": 219590 }, { "epoch": 0.4436058937365918, "grad_norm": 251.77723693847656, "learning_rate": 6.885971575936884e-06, "loss": 21.6944, "step": 219600 }, { "epoch": 0.44362609436927564, "grad_norm": 308.3563232421875, "learning_rate": 6.885648289539362e-06, "loss": 12.2117, "step": 219610 }, { "epoch": 0.44364629500195946, "grad_norm": 500.6455993652344, "learning_rate": 6.8853249939514165e-06, "loss": 14.2641, "step": 219620 }, { "epoch": 0.4436664956346433, "grad_norm": 0.0, "learning_rate": 6.885001689174627e-06, "loss": 26.5802, "step": 219630 }, { "epoch": 0.4436866962673271, "grad_norm": 266.4641418457031, "learning_rate": 6.884678375210568e-06, "loss": 13.1196, "step": 219640 }, { "epoch": 0.4437068969000109, "grad_norm": 426.5528564453125, "learning_rate": 6.884355052060814e-06, "loss": 27.7974, "step": 219650 }, { "epoch": 0.44372709753269474, "grad_norm": 504.8208312988281, "learning_rate": 6.884031719726943e-06, "loss": 32.0167, "step": 219660 }, { "epoch": 0.44374729816537856, "grad_norm": 415.3638610839844, "learning_rate": 6.8837083782105296e-06, "loss": 13.8481, "step": 219670 }, { "epoch": 0.4437674987980624, "grad_norm": 143.79278564453125, "learning_rate": 6.883385027513151e-06, "loss": 27.3972, "step": 219680 }, { "epoch": 0.4437876994307462, "grad_norm": 351.2354736328125, "learning_rate": 6.88306166763638e-06, "loss": 12.8558, "step": 219690 }, { "epoch": 0.44380790006342996, "grad_norm": 289.7987060546875, "learning_rate": 6.882738298581797e-06, "loss": 27.1336, "step": 219700 }, { "epoch": 0.4438281006961138, "grad_norm": 373.96380615234375, "learning_rate": 6.882414920350975e-06, "loss": 20.3434, "step": 219710 }, { "epoch": 0.4438483013287976, "grad_norm": 456.5250549316406, "learning_rate": 6.882091532945491e-06, "loss": 20.2071, "step": 219720 }, { "epoch": 0.4438685019614814, "grad_norm": 350.40899658203125, "learning_rate": 6.881768136366922e-06, "loss": 18.8517, "step": 219730 }, { "epoch": 0.44388870259416524, "grad_norm": 587.7376098632812, "learning_rate": 6.881444730616842e-06, "loss": 15.2551, "step": 219740 }, { "epoch": 0.44390890322684906, "grad_norm": 184.942138671875, "learning_rate": 6.881121315696828e-06, "loss": 27.3904, "step": 219750 }, { "epoch": 0.4439291038595329, "grad_norm": 551.297607421875, "learning_rate": 6.880797891608458e-06, "loss": 25.0412, "step": 219760 }, { "epoch": 0.4439493044922167, "grad_norm": 171.62014770507812, "learning_rate": 6.880474458353309e-06, "loss": 16.111, "step": 219770 }, { "epoch": 0.4439695051249005, "grad_norm": 162.796875, "learning_rate": 6.880151015932952e-06, "loss": 16.7732, "step": 219780 }, { "epoch": 0.44398970575758434, "grad_norm": 130.9040069580078, "learning_rate": 6.87982756434897e-06, "loss": 9.1849, "step": 219790 }, { "epoch": 0.44400990639026816, "grad_norm": 364.22998046875, "learning_rate": 6.879504103602934e-06, "loss": 18.6645, "step": 219800 }, { "epoch": 0.444030107022952, "grad_norm": 125.7731704711914, "learning_rate": 6.879180633696425e-06, "loss": 7.6231, "step": 219810 }, { "epoch": 0.4440503076556358, "grad_norm": 702.6375732421875, "learning_rate": 6.878857154631016e-06, "loss": 38.6534, "step": 219820 }, { "epoch": 0.44407050828831957, "grad_norm": 291.79193115234375, "learning_rate": 6.878533666408286e-06, "loss": 13.3295, "step": 219830 }, { "epoch": 0.4440907089210034, "grad_norm": 644.588623046875, "learning_rate": 6.878210169029811e-06, "loss": 28.9708, "step": 219840 }, { "epoch": 0.4441109095536872, "grad_norm": 69.40763854980469, "learning_rate": 6.877886662497165e-06, "loss": 17.8097, "step": 219850 }, { "epoch": 0.444131110186371, "grad_norm": 127.46562957763672, "learning_rate": 6.877563146811931e-06, "loss": 21.8647, "step": 219860 }, { "epoch": 0.44415131081905485, "grad_norm": 0.0, "learning_rate": 6.87723962197568e-06, "loss": 9.2351, "step": 219870 }, { "epoch": 0.44417151145173867, "grad_norm": 175.95603942871094, "learning_rate": 6.87691608798999e-06, "loss": 32.1664, "step": 219880 }, { "epoch": 0.4441917120844225, "grad_norm": 332.9775085449219, "learning_rate": 6.87659254485644e-06, "loss": 22.4273, "step": 219890 }, { "epoch": 0.4442119127171063, "grad_norm": 550.5944213867188, "learning_rate": 6.876268992576605e-06, "loss": 18.5692, "step": 219900 }, { "epoch": 0.4442321133497901, "grad_norm": 173.42359924316406, "learning_rate": 6.875945431152063e-06, "loss": 21.9523, "step": 219910 }, { "epoch": 0.44425231398247395, "grad_norm": 428.17498779296875, "learning_rate": 6.875621860584389e-06, "loss": 20.4325, "step": 219920 }, { "epoch": 0.44427251461515777, "grad_norm": 229.55136108398438, "learning_rate": 6.875298280875163e-06, "loss": 28.1679, "step": 219930 }, { "epoch": 0.4442927152478416, "grad_norm": 230.51560974121094, "learning_rate": 6.874974692025959e-06, "loss": 27.3542, "step": 219940 }, { "epoch": 0.4443129158805254, "grad_norm": 259.1538391113281, "learning_rate": 6.874651094038358e-06, "loss": 16.8211, "step": 219950 }, { "epoch": 0.44433311651320917, "grad_norm": 363.3927001953125, "learning_rate": 6.874327486913933e-06, "loss": 22.6304, "step": 219960 }, { "epoch": 0.444353317145893, "grad_norm": 342.9845275878906, "learning_rate": 6.874003870654265e-06, "loss": 15.7835, "step": 219970 }, { "epoch": 0.4443735177785768, "grad_norm": 1375.023193359375, "learning_rate": 6.873680245260929e-06, "loss": 30.2904, "step": 219980 }, { "epoch": 0.44439371841126063, "grad_norm": 526.2301025390625, "learning_rate": 6.8733566107355e-06, "loss": 15.6316, "step": 219990 }, { "epoch": 0.44441391904394445, "grad_norm": 428.6099853515625, "learning_rate": 6.873032967079562e-06, "loss": 7.8897, "step": 220000 }, { "epoch": 0.44443411967662827, "grad_norm": 236.0324249267578, "learning_rate": 6.872709314294685e-06, "loss": 22.2423, "step": 220010 }, { "epoch": 0.4444543203093121, "grad_norm": 270.1660461425781, "learning_rate": 6.872385652382452e-06, "loss": 21.7317, "step": 220020 }, { "epoch": 0.4444745209419959, "grad_norm": 559.7174682617188, "learning_rate": 6.872061981344438e-06, "loss": 17.955, "step": 220030 }, { "epoch": 0.44449472157467973, "grad_norm": 611.444091796875, "learning_rate": 6.871738301182221e-06, "loss": 25.2589, "step": 220040 }, { "epoch": 0.44451492220736355, "grad_norm": 237.7333526611328, "learning_rate": 6.87141461189738e-06, "loss": 31.7295, "step": 220050 }, { "epoch": 0.44453512284004737, "grad_norm": 190.22268676757812, "learning_rate": 6.87109091349149e-06, "loss": 15.5567, "step": 220060 }, { "epoch": 0.4445553234727312, "grad_norm": 444.6222839355469, "learning_rate": 6.870767205966128e-06, "loss": 24.8071, "step": 220070 }, { "epoch": 0.44457552410541495, "grad_norm": 468.19940185546875, "learning_rate": 6.870443489322875e-06, "loss": 20.047, "step": 220080 }, { "epoch": 0.4445957247380988, "grad_norm": 238.69911193847656, "learning_rate": 6.870119763563308e-06, "loss": 11.2906, "step": 220090 }, { "epoch": 0.4446159253707826, "grad_norm": 138.72686767578125, "learning_rate": 6.869796028689002e-06, "loss": 9.4916, "step": 220100 }, { "epoch": 0.4446361260034664, "grad_norm": 374.0670166015625, "learning_rate": 6.86947228470154e-06, "loss": 12.5781, "step": 220110 }, { "epoch": 0.44465632663615023, "grad_norm": 154.71820068359375, "learning_rate": 6.8691485316024945e-06, "loss": 18.3121, "step": 220120 }, { "epoch": 0.44467652726883405, "grad_norm": 223.37200927734375, "learning_rate": 6.8688247693934465e-06, "loss": 6.8404, "step": 220130 }, { "epoch": 0.4446967279015179, "grad_norm": 290.0184631347656, "learning_rate": 6.868500998075973e-06, "loss": 43.5662, "step": 220140 }, { "epoch": 0.4447169285342017, "grad_norm": 322.1494140625, "learning_rate": 6.8681772176516525e-06, "loss": 29.9138, "step": 220150 }, { "epoch": 0.4447371291668855, "grad_norm": 413.7652893066406, "learning_rate": 6.867853428122063e-06, "loss": 17.2696, "step": 220160 }, { "epoch": 0.44475732979956933, "grad_norm": 231.98281860351562, "learning_rate": 6.867529629488782e-06, "loss": 18.5939, "step": 220170 }, { "epoch": 0.44477753043225315, "grad_norm": 257.0954895019531, "learning_rate": 6.867205821753389e-06, "loss": 30.6337, "step": 220180 }, { "epoch": 0.444797731064937, "grad_norm": 5.993592739105225, "learning_rate": 6.86688200491746e-06, "loss": 13.0801, "step": 220190 }, { "epoch": 0.4448179316976208, "grad_norm": 355.9172668457031, "learning_rate": 6.866558178982575e-06, "loss": 9.3403, "step": 220200 }, { "epoch": 0.44483813233030456, "grad_norm": 560.5873413085938, "learning_rate": 6.866234343950312e-06, "loss": 29.7043, "step": 220210 }, { "epoch": 0.4448583329629884, "grad_norm": 391.15673828125, "learning_rate": 6.86591049982225e-06, "loss": 28.6354, "step": 220220 }, { "epoch": 0.4448785335956722, "grad_norm": 405.93585205078125, "learning_rate": 6.865586646599965e-06, "loss": 26.6413, "step": 220230 }, { "epoch": 0.444898734228356, "grad_norm": 142.5735321044922, "learning_rate": 6.8652627842850374e-06, "loss": 16.952, "step": 220240 }, { "epoch": 0.44491893486103984, "grad_norm": 130.56082153320312, "learning_rate": 6.8649389128790455e-06, "loss": 14.0584, "step": 220250 }, { "epoch": 0.44493913549372366, "grad_norm": 250.5571746826172, "learning_rate": 6.864615032383567e-06, "loss": 8.3591, "step": 220260 }, { "epoch": 0.4449593361264075, "grad_norm": 429.1754150390625, "learning_rate": 6.864291142800183e-06, "loss": 11.5121, "step": 220270 }, { "epoch": 0.4449795367590913, "grad_norm": 364.4546203613281, "learning_rate": 6.863967244130467e-06, "loss": 14.4624, "step": 220280 }, { "epoch": 0.4449997373917751, "grad_norm": 246.76168823242188, "learning_rate": 6.8636433363760025e-06, "loss": 15.224, "step": 220290 }, { "epoch": 0.44501993802445894, "grad_norm": 149.62669372558594, "learning_rate": 6.863319419538366e-06, "loss": 16.2584, "step": 220300 }, { "epoch": 0.44504013865714276, "grad_norm": 28.291780471801758, "learning_rate": 6.862995493619137e-06, "loss": 16.0228, "step": 220310 }, { "epoch": 0.4450603392898266, "grad_norm": 86.73417663574219, "learning_rate": 6.862671558619894e-06, "loss": 13.0539, "step": 220320 }, { "epoch": 0.4450805399225104, "grad_norm": 376.6438903808594, "learning_rate": 6.862347614542214e-06, "loss": 34.9632, "step": 220330 }, { "epoch": 0.44510074055519416, "grad_norm": 259.069091796875, "learning_rate": 6.86202366138768e-06, "loss": 24.6732, "step": 220340 }, { "epoch": 0.445120941187878, "grad_norm": 461.8516845703125, "learning_rate": 6.861699699157868e-06, "loss": 13.8798, "step": 220350 }, { "epoch": 0.4451411418205618, "grad_norm": 261.21185302734375, "learning_rate": 6.861375727854356e-06, "loss": 19.3403, "step": 220360 }, { "epoch": 0.4451613424532456, "grad_norm": 370.5517578125, "learning_rate": 6.861051747478727e-06, "loss": 20.1416, "step": 220370 }, { "epoch": 0.44518154308592944, "grad_norm": 227.28636169433594, "learning_rate": 6.860727758032555e-06, "loss": 21.148, "step": 220380 }, { "epoch": 0.44520174371861326, "grad_norm": 46.66242980957031, "learning_rate": 6.860403759517422e-06, "loss": 10.8007, "step": 220390 }, { "epoch": 0.4452219443512971, "grad_norm": 614.117431640625, "learning_rate": 6.860079751934908e-06, "loss": 30.5097, "step": 220400 }, { "epoch": 0.4452421449839809, "grad_norm": 413.60723876953125, "learning_rate": 6.859755735286589e-06, "loss": 17.7818, "step": 220410 }, { "epoch": 0.4452623456166647, "grad_norm": 96.79912567138672, "learning_rate": 6.859431709574048e-06, "loss": 17.693, "step": 220420 }, { "epoch": 0.44528254624934854, "grad_norm": 317.6785888671875, "learning_rate": 6.859107674798863e-06, "loss": 38.8168, "step": 220430 }, { "epoch": 0.44530274688203236, "grad_norm": 342.97802734375, "learning_rate": 6.85878363096261e-06, "loss": 14.961, "step": 220440 }, { "epoch": 0.4453229475147162, "grad_norm": 424.007568359375, "learning_rate": 6.858459578066873e-06, "loss": 26.2545, "step": 220450 }, { "epoch": 0.4453431481474, "grad_norm": 392.7356872558594, "learning_rate": 6.858135516113226e-06, "loss": 30.8387, "step": 220460 }, { "epoch": 0.44536334878008377, "grad_norm": 394.7209777832031, "learning_rate": 6.857811445103257e-06, "loss": 23.2892, "step": 220470 }, { "epoch": 0.4453835494127676, "grad_norm": 602.2578125, "learning_rate": 6.857487365038537e-06, "loss": 29.0086, "step": 220480 }, { "epoch": 0.4454037500454514, "grad_norm": 53.756263732910156, "learning_rate": 6.857163275920651e-06, "loss": 13.1515, "step": 220490 }, { "epoch": 0.4454239506781352, "grad_norm": 0.0, "learning_rate": 6.856839177751175e-06, "loss": 14.5186, "step": 220500 }, { "epoch": 0.44544415131081905, "grad_norm": 509.0626525878906, "learning_rate": 6.85651507053169e-06, "loss": 16.6465, "step": 220510 }, { "epoch": 0.44546435194350287, "grad_norm": 655.0435791015625, "learning_rate": 6.856190954263776e-06, "loss": 22.9965, "step": 220520 }, { "epoch": 0.4454845525761867, "grad_norm": 101.1089859008789, "learning_rate": 6.8558668289490126e-06, "loss": 31.2643, "step": 220530 }, { "epoch": 0.4455047532088705, "grad_norm": 422.9068298339844, "learning_rate": 6.855542694588979e-06, "loss": 16.005, "step": 220540 }, { "epoch": 0.4455249538415543, "grad_norm": 280.0576477050781, "learning_rate": 6.8552185511852555e-06, "loss": 9.757, "step": 220550 }, { "epoch": 0.44554515447423815, "grad_norm": 0.0, "learning_rate": 6.854894398739422e-06, "loss": 12.2843, "step": 220560 }, { "epoch": 0.44556535510692197, "grad_norm": 67.0091781616211, "learning_rate": 6.854570237253059e-06, "loss": 13.5348, "step": 220570 }, { "epoch": 0.4455855557396058, "grad_norm": 476.281005859375, "learning_rate": 6.854246066727743e-06, "loss": 28.4624, "step": 220580 }, { "epoch": 0.4456057563722896, "grad_norm": 364.1082763671875, "learning_rate": 6.8539218871650605e-06, "loss": 18.269, "step": 220590 }, { "epoch": 0.44562595700497337, "grad_norm": 336.8706970214844, "learning_rate": 6.853597698566583e-06, "loss": 21.5583, "step": 220600 }, { "epoch": 0.4456461576376572, "grad_norm": 484.31121826171875, "learning_rate": 6.853273500933899e-06, "loss": 25.0975, "step": 220610 }, { "epoch": 0.445666358270341, "grad_norm": 301.8086242675781, "learning_rate": 6.852949294268582e-06, "loss": 25.7266, "step": 220620 }, { "epoch": 0.44568655890302483, "grad_norm": 698.1058349609375, "learning_rate": 6.852625078572217e-06, "loss": 26.2086, "step": 220630 }, { "epoch": 0.44570675953570865, "grad_norm": 157.7096405029297, "learning_rate": 6.852300853846381e-06, "loss": 8.5818, "step": 220640 }, { "epoch": 0.44572696016839247, "grad_norm": 435.9980163574219, "learning_rate": 6.851976620092655e-06, "loss": 18.1592, "step": 220650 }, { "epoch": 0.4457471608010763, "grad_norm": 327.7413330078125, "learning_rate": 6.851652377312621e-06, "loss": 29.7023, "step": 220660 }, { "epoch": 0.4457673614337601, "grad_norm": 33.3922119140625, "learning_rate": 6.851328125507856e-06, "loss": 41.8726, "step": 220670 }, { "epoch": 0.44578756206644393, "grad_norm": 534.6714477539062, "learning_rate": 6.851003864679943e-06, "loss": 21.0406, "step": 220680 }, { "epoch": 0.44580776269912775, "grad_norm": 632.1672973632812, "learning_rate": 6.850679594830461e-06, "loss": 27.493, "step": 220690 }, { "epoch": 0.44582796333181157, "grad_norm": 792.5498657226562, "learning_rate": 6.850355315960992e-06, "loss": 21.0863, "step": 220700 }, { "epoch": 0.4458481639644954, "grad_norm": 365.2892150878906, "learning_rate": 6.850031028073115e-06, "loss": 20.5482, "step": 220710 }, { "epoch": 0.44586836459717916, "grad_norm": 210.88490295410156, "learning_rate": 6.849706731168413e-06, "loss": 16.4967, "step": 220720 }, { "epoch": 0.445888565229863, "grad_norm": 368.0472412109375, "learning_rate": 6.849382425248463e-06, "loss": 19.4074, "step": 220730 }, { "epoch": 0.4459087658625468, "grad_norm": 222.28895568847656, "learning_rate": 6.849058110314848e-06, "loss": 7.9884, "step": 220740 }, { "epoch": 0.4459289664952306, "grad_norm": 462.76373291015625, "learning_rate": 6.848733786369147e-06, "loss": 17.387, "step": 220750 }, { "epoch": 0.44594916712791444, "grad_norm": 98.30000305175781, "learning_rate": 6.848409453412943e-06, "loss": 23.1439, "step": 220760 }, { "epoch": 0.44596936776059826, "grad_norm": 399.39404296875, "learning_rate": 6.848085111447815e-06, "loss": 13.5574, "step": 220770 }, { "epoch": 0.4459895683932821, "grad_norm": 255.11851501464844, "learning_rate": 6.847760760475344e-06, "loss": 22.1428, "step": 220780 }, { "epoch": 0.4460097690259659, "grad_norm": 144.56472778320312, "learning_rate": 6.8474364004971115e-06, "loss": 17.3237, "step": 220790 }, { "epoch": 0.4460299696586497, "grad_norm": 408.3194274902344, "learning_rate": 6.847112031514698e-06, "loss": 20.4446, "step": 220800 }, { "epoch": 0.44605017029133354, "grad_norm": 243.77967834472656, "learning_rate": 6.8467876535296855e-06, "loss": 9.7111, "step": 220810 }, { "epoch": 0.44607037092401736, "grad_norm": 292.7369079589844, "learning_rate": 6.846463266543653e-06, "loss": 34.0535, "step": 220820 }, { "epoch": 0.4460905715567012, "grad_norm": 203.61549377441406, "learning_rate": 6.846138870558181e-06, "loss": 21.2438, "step": 220830 }, { "epoch": 0.446110772189385, "grad_norm": 256.6507263183594, "learning_rate": 6.845814465574855e-06, "loss": 17.3512, "step": 220840 }, { "epoch": 0.44613097282206876, "grad_norm": 412.45904541015625, "learning_rate": 6.845490051595252e-06, "loss": 13.6812, "step": 220850 }, { "epoch": 0.4461511734547526, "grad_norm": 160.28065490722656, "learning_rate": 6.8451656286209535e-06, "loss": 29.6473, "step": 220860 }, { "epoch": 0.4461713740874364, "grad_norm": 278.0494689941406, "learning_rate": 6.844841196653541e-06, "loss": 10.014, "step": 220870 }, { "epoch": 0.4461915747201202, "grad_norm": 308.8392333984375, "learning_rate": 6.844516755694599e-06, "loss": 14.9234, "step": 220880 }, { "epoch": 0.44621177535280404, "grad_norm": 217.7829132080078, "learning_rate": 6.844192305745702e-06, "loss": 22.1488, "step": 220890 }, { "epoch": 0.44623197598548786, "grad_norm": 410.2087707519531, "learning_rate": 6.843867846808438e-06, "loss": 36.3341, "step": 220900 }, { "epoch": 0.4462521766181717, "grad_norm": 210.99456787109375, "learning_rate": 6.8435433788843865e-06, "loss": 10.2912, "step": 220910 }, { "epoch": 0.4462723772508555, "grad_norm": 78.91656494140625, "learning_rate": 6.843218901975127e-06, "loss": 21.5103, "step": 220920 }, { "epoch": 0.4462925778835393, "grad_norm": 309.2608947753906, "learning_rate": 6.842894416082243e-06, "loss": 11.5891, "step": 220930 }, { "epoch": 0.44631277851622314, "grad_norm": 128.9291534423828, "learning_rate": 6.842569921207314e-06, "loss": 13.7323, "step": 220940 }, { "epoch": 0.44633297914890696, "grad_norm": 103.19662475585938, "learning_rate": 6.842245417351923e-06, "loss": 10.751, "step": 220950 }, { "epoch": 0.4463531797815908, "grad_norm": 402.2106628417969, "learning_rate": 6.841920904517652e-06, "loss": 21.1649, "step": 220960 }, { "epoch": 0.4463733804142746, "grad_norm": 261.5542907714844, "learning_rate": 6.84159638270608e-06, "loss": 5.705, "step": 220970 }, { "epoch": 0.44639358104695837, "grad_norm": 248.47885131835938, "learning_rate": 6.8412718519187916e-06, "loss": 19.9731, "step": 220980 }, { "epoch": 0.4464137816796422, "grad_norm": 557.027099609375, "learning_rate": 6.840947312157367e-06, "loss": 20.2602, "step": 220990 }, { "epoch": 0.446433982312326, "grad_norm": 301.51312255859375, "learning_rate": 6.840622763423391e-06, "loss": 19.3299, "step": 221000 }, { "epoch": 0.4464541829450098, "grad_norm": 0.0, "learning_rate": 6.840298205718441e-06, "loss": 18.487, "step": 221010 }, { "epoch": 0.44647438357769365, "grad_norm": 227.14205932617188, "learning_rate": 6.839973639044101e-06, "loss": 18.0186, "step": 221020 }, { "epoch": 0.44649458421037747, "grad_norm": 204.76547241210938, "learning_rate": 6.839649063401952e-06, "loss": 21.7879, "step": 221030 }, { "epoch": 0.4465147848430613, "grad_norm": 88.6982650756836, "learning_rate": 6.8393244787935775e-06, "loss": 15.5854, "step": 221040 }, { "epoch": 0.4465349854757451, "grad_norm": 349.5867004394531, "learning_rate": 6.838999885220558e-06, "loss": 29.153, "step": 221050 }, { "epoch": 0.4465551861084289, "grad_norm": 374.8085021972656, "learning_rate": 6.838675282684477e-06, "loss": 15.2272, "step": 221060 }, { "epoch": 0.44657538674111275, "grad_norm": 142.6425018310547, "learning_rate": 6.838350671186914e-06, "loss": 20.8474, "step": 221070 }, { "epoch": 0.44659558737379657, "grad_norm": 503.947998046875, "learning_rate": 6.838026050729454e-06, "loss": 15.9964, "step": 221080 }, { "epoch": 0.4466157880064804, "grad_norm": 2743.62353515625, "learning_rate": 6.837701421313677e-06, "loss": 39.1657, "step": 221090 }, { "epoch": 0.4466359886391642, "grad_norm": 259.0085754394531, "learning_rate": 6.837376782941168e-06, "loss": 28.6891, "step": 221100 }, { "epoch": 0.44665618927184797, "grad_norm": 202.544677734375, "learning_rate": 6.837052135613507e-06, "loss": 11.4263, "step": 221110 }, { "epoch": 0.4466763899045318, "grad_norm": 295.37042236328125, "learning_rate": 6.8367274793322745e-06, "loss": 9.0491, "step": 221120 }, { "epoch": 0.4466965905372156, "grad_norm": 435.3619689941406, "learning_rate": 6.836402814099057e-06, "loss": 20.9846, "step": 221130 }, { "epoch": 0.44671679116989943, "grad_norm": 5.664072036743164, "learning_rate": 6.836078139915434e-06, "loss": 17.3578, "step": 221140 }, { "epoch": 0.44673699180258325, "grad_norm": 243.64767456054688, "learning_rate": 6.83575345678299e-06, "loss": 14.7543, "step": 221150 }, { "epoch": 0.44675719243526707, "grad_norm": 154.86424255371094, "learning_rate": 6.8354287647033046e-06, "loss": 16.4814, "step": 221160 }, { "epoch": 0.4467773930679509, "grad_norm": 388.90924072265625, "learning_rate": 6.835104063677964e-06, "loss": 21.4559, "step": 221170 }, { "epoch": 0.4467975937006347, "grad_norm": 360.7520751953125, "learning_rate": 6.8347793537085474e-06, "loss": 35.6618, "step": 221180 }, { "epoch": 0.44681779433331853, "grad_norm": 271.4325866699219, "learning_rate": 6.834454634796639e-06, "loss": 13.7186, "step": 221190 }, { "epoch": 0.44683799496600235, "grad_norm": 216.1304473876953, "learning_rate": 6.834129906943822e-06, "loss": 17.3229, "step": 221200 }, { "epoch": 0.44685819559868617, "grad_norm": 281.48858642578125, "learning_rate": 6.833805170151676e-06, "loss": 37.0812, "step": 221210 }, { "epoch": 0.44687839623137, "grad_norm": 434.1552734375, "learning_rate": 6.8334804244217885e-06, "loss": 19.9555, "step": 221220 }, { "epoch": 0.4468985968640538, "grad_norm": 286.05908203125, "learning_rate": 6.833155669755738e-06, "loss": 22.9072, "step": 221230 }, { "epoch": 0.4469187974967376, "grad_norm": 281.95068359375, "learning_rate": 6.8328309061551105e-06, "loss": 16.5704, "step": 221240 }, { "epoch": 0.4469389981294214, "grad_norm": 543.0027465820312, "learning_rate": 6.832506133621487e-06, "loss": 19.5303, "step": 221250 }, { "epoch": 0.4469591987621052, "grad_norm": 553.1766967773438, "learning_rate": 6.832181352156451e-06, "loss": 24.2245, "step": 221260 }, { "epoch": 0.44697939939478903, "grad_norm": 546.6942749023438, "learning_rate": 6.831856561761585e-06, "loss": 30.3733, "step": 221270 }, { "epoch": 0.44699960002747285, "grad_norm": 171.87525939941406, "learning_rate": 6.831531762438472e-06, "loss": 23.0356, "step": 221280 }, { "epoch": 0.4470198006601567, "grad_norm": 38.75459671020508, "learning_rate": 6.8312069541886964e-06, "loss": 19.9404, "step": 221290 }, { "epoch": 0.4470400012928405, "grad_norm": 158.6588592529297, "learning_rate": 6.830882137013839e-06, "loss": 26.1483, "step": 221300 }, { "epoch": 0.4470602019255243, "grad_norm": 475.39117431640625, "learning_rate": 6.830557310915484e-06, "loss": 39.3251, "step": 221310 }, { "epoch": 0.44708040255820813, "grad_norm": 384.4389953613281, "learning_rate": 6.830232475895215e-06, "loss": 33.9552, "step": 221320 }, { "epoch": 0.44710060319089195, "grad_norm": 112.24369049072266, "learning_rate": 6.829907631954618e-06, "loss": 18.1333, "step": 221330 }, { "epoch": 0.4471208038235758, "grad_norm": 275.853271484375, "learning_rate": 6.829582779095269e-06, "loss": 27.1296, "step": 221340 }, { "epoch": 0.4471410044562596, "grad_norm": 63.757240295410156, "learning_rate": 6.829257917318757e-06, "loss": 15.6934, "step": 221350 }, { "epoch": 0.44716120508894336, "grad_norm": 128.90174865722656, "learning_rate": 6.8289330466266635e-06, "loss": 13.3845, "step": 221360 }, { "epoch": 0.4471814057216272, "grad_norm": 176.8086395263672, "learning_rate": 6.828608167020572e-06, "loss": 15.1104, "step": 221370 }, { "epoch": 0.447201606354311, "grad_norm": 275.9834289550781, "learning_rate": 6.828283278502067e-06, "loss": 21.7175, "step": 221380 }, { "epoch": 0.4472218069869948, "grad_norm": 16.749267578125, "learning_rate": 6.827958381072729e-06, "loss": 23.7939, "step": 221390 }, { "epoch": 0.44724200761967864, "grad_norm": 389.1201477050781, "learning_rate": 6.827633474734145e-06, "loss": 19.5765, "step": 221400 }, { "epoch": 0.44726220825236246, "grad_norm": 792.5443725585938, "learning_rate": 6.827308559487897e-06, "loss": 24.9356, "step": 221410 }, { "epoch": 0.4472824088850463, "grad_norm": 163.55056762695312, "learning_rate": 6.826983635335569e-06, "loss": 16.5549, "step": 221420 }, { "epoch": 0.4473026095177301, "grad_norm": 300.1168518066406, "learning_rate": 6.826658702278745e-06, "loss": 12.938, "step": 221430 }, { "epoch": 0.4473228101504139, "grad_norm": 382.24798583984375, "learning_rate": 6.826333760319006e-06, "loss": 25.7735, "step": 221440 }, { "epoch": 0.44734301078309774, "grad_norm": 37.59864044189453, "learning_rate": 6.82600880945794e-06, "loss": 24.4246, "step": 221450 }, { "epoch": 0.44736321141578156, "grad_norm": 58.965736389160156, "learning_rate": 6.825683849697127e-06, "loss": 18.753, "step": 221460 }, { "epoch": 0.4473834120484654, "grad_norm": 297.2691345214844, "learning_rate": 6.825358881038153e-06, "loss": 15.4852, "step": 221470 }, { "epoch": 0.4474036126811492, "grad_norm": 168.39088439941406, "learning_rate": 6.825033903482601e-06, "loss": 31.1021, "step": 221480 }, { "epoch": 0.44742381331383296, "grad_norm": 138.25160217285156, "learning_rate": 6.824708917032056e-06, "loss": 17.4708, "step": 221490 }, { "epoch": 0.4474440139465168, "grad_norm": 351.35498046875, "learning_rate": 6.824383921688098e-06, "loss": 12.8274, "step": 221500 }, { "epoch": 0.4474642145792006, "grad_norm": 97.21036529541016, "learning_rate": 6.824058917452318e-06, "loss": 21.3601, "step": 221510 }, { "epoch": 0.4474844152118844, "grad_norm": 217.91661071777344, "learning_rate": 6.823733904326293e-06, "loss": 17.0232, "step": 221520 }, { "epoch": 0.44750461584456824, "grad_norm": 598.2664184570312, "learning_rate": 6.823408882311612e-06, "loss": 26.9103, "step": 221530 }, { "epoch": 0.44752481647725206, "grad_norm": 510.2764892578125, "learning_rate": 6.823083851409857e-06, "loss": 21.8113, "step": 221540 }, { "epoch": 0.4475450171099359, "grad_norm": 646.4940795898438, "learning_rate": 6.822758811622611e-06, "loss": 29.3044, "step": 221550 }, { "epoch": 0.4475652177426197, "grad_norm": 48.36207962036133, "learning_rate": 6.8224337629514615e-06, "loss": 14.155, "step": 221560 }, { "epoch": 0.4475854183753035, "grad_norm": 338.9132995605469, "learning_rate": 6.8221087053979894e-06, "loss": 10.8011, "step": 221570 }, { "epoch": 0.44760561900798734, "grad_norm": 686.234619140625, "learning_rate": 6.821783638963782e-06, "loss": 35.8445, "step": 221580 }, { "epoch": 0.44762581964067116, "grad_norm": 192.5373992919922, "learning_rate": 6.82145856365042e-06, "loss": 14.8452, "step": 221590 }, { "epoch": 0.447646020273355, "grad_norm": 218.4583740234375, "learning_rate": 6.821133479459492e-06, "loss": 16.9912, "step": 221600 }, { "epoch": 0.4476662209060388, "grad_norm": 233.96275329589844, "learning_rate": 6.820808386392579e-06, "loss": 10.4254, "step": 221610 }, { "epoch": 0.44768642153872257, "grad_norm": 116.82610321044922, "learning_rate": 6.820483284451267e-06, "loss": 24.3171, "step": 221620 }, { "epoch": 0.4477066221714064, "grad_norm": 214.61590576171875, "learning_rate": 6.820158173637142e-06, "loss": 21.2644, "step": 221630 }, { "epoch": 0.4477268228040902, "grad_norm": 212.19410705566406, "learning_rate": 6.819833053951783e-06, "loss": 16.2984, "step": 221640 }, { "epoch": 0.447747023436774, "grad_norm": 145.67677307128906, "learning_rate": 6.819507925396782e-06, "loss": 13.7821, "step": 221650 }, { "epoch": 0.44776722406945785, "grad_norm": 424.8347473144531, "learning_rate": 6.819182787973717e-06, "loss": 22.5041, "step": 221660 }, { "epoch": 0.44778742470214167, "grad_norm": 191.94371032714844, "learning_rate": 6.818857641684179e-06, "loss": 21.0426, "step": 221670 }, { "epoch": 0.4478076253348255, "grad_norm": 128.56163024902344, "learning_rate": 6.8185324865297475e-06, "loss": 16.2848, "step": 221680 }, { "epoch": 0.4478278259675093, "grad_norm": 93.82872772216797, "learning_rate": 6.81820732251201e-06, "loss": 9.8207, "step": 221690 }, { "epoch": 0.4478480266001931, "grad_norm": 536.3947143554688, "learning_rate": 6.81788214963255e-06, "loss": 30.5728, "step": 221700 }, { "epoch": 0.44786822723287695, "grad_norm": 130.286376953125, "learning_rate": 6.817556967892953e-06, "loss": 11.4336, "step": 221710 }, { "epoch": 0.44788842786556077, "grad_norm": 493.1657409667969, "learning_rate": 6.817231777294804e-06, "loss": 28.0526, "step": 221720 }, { "epoch": 0.4479086284982446, "grad_norm": 66.24658966064453, "learning_rate": 6.816906577839688e-06, "loss": 16.1713, "step": 221730 }, { "epoch": 0.4479288291309284, "grad_norm": 323.0931091308594, "learning_rate": 6.816581369529189e-06, "loss": 25.3335, "step": 221740 }, { "epoch": 0.44794902976361217, "grad_norm": 374.5180358886719, "learning_rate": 6.816256152364893e-06, "loss": 14.8483, "step": 221750 }, { "epoch": 0.447969230396296, "grad_norm": 228.1222381591797, "learning_rate": 6.815930926348384e-06, "loss": 20.98, "step": 221760 }, { "epoch": 0.4479894310289798, "grad_norm": 71.99800109863281, "learning_rate": 6.8156056914812486e-06, "loss": 16.193, "step": 221770 }, { "epoch": 0.44800963166166363, "grad_norm": 188.25364685058594, "learning_rate": 6.815280447765073e-06, "loss": 10.2666, "step": 221780 }, { "epoch": 0.44802983229434745, "grad_norm": 345.7408752441406, "learning_rate": 6.814955195201438e-06, "loss": 22.2993, "step": 221790 }, { "epoch": 0.44805003292703127, "grad_norm": 344.1194763183594, "learning_rate": 6.814629933791932e-06, "loss": 16.6214, "step": 221800 }, { "epoch": 0.4480702335597151, "grad_norm": 262.2949523925781, "learning_rate": 6.814304663538142e-06, "loss": 15.6742, "step": 221810 }, { "epoch": 0.4480904341923989, "grad_norm": 263.5743408203125, "learning_rate": 6.813979384441648e-06, "loss": 29.7934, "step": 221820 }, { "epoch": 0.44811063482508273, "grad_norm": 0.0, "learning_rate": 6.813654096504041e-06, "loss": 18.4247, "step": 221830 }, { "epoch": 0.44813083545776655, "grad_norm": 279.78668212890625, "learning_rate": 6.813328799726901e-06, "loss": 27.0966, "step": 221840 }, { "epoch": 0.44815103609045037, "grad_norm": 585.9711303710938, "learning_rate": 6.8130034941118185e-06, "loss": 34.3834, "step": 221850 }, { "epoch": 0.4481712367231342, "grad_norm": 434.531005859375, "learning_rate": 6.812678179660377e-06, "loss": 34.4552, "step": 221860 }, { "epoch": 0.44819143735581796, "grad_norm": 163.99009704589844, "learning_rate": 6.812352856374162e-06, "loss": 21.3287, "step": 221870 }, { "epoch": 0.4482116379885018, "grad_norm": 100.07765197753906, "learning_rate": 6.812027524254758e-06, "loss": 20.0607, "step": 221880 }, { "epoch": 0.4482318386211856, "grad_norm": 4.313264846801758, "learning_rate": 6.8117021833037514e-06, "loss": 22.13, "step": 221890 }, { "epoch": 0.4482520392538694, "grad_norm": 272.07073974609375, "learning_rate": 6.811376833522729e-06, "loss": 19.5213, "step": 221900 }, { "epoch": 0.44827223988655324, "grad_norm": 379.5329895019531, "learning_rate": 6.811051474913275e-06, "loss": 19.22, "step": 221910 }, { "epoch": 0.44829244051923706, "grad_norm": 451.68572998046875, "learning_rate": 6.810726107476977e-06, "loss": 18.5025, "step": 221920 }, { "epoch": 0.4483126411519209, "grad_norm": 568.9998779296875, "learning_rate": 6.8104007312154185e-06, "loss": 14.3982, "step": 221930 }, { "epoch": 0.4483328417846047, "grad_norm": 186.62966918945312, "learning_rate": 6.810075346130187e-06, "loss": 21.6484, "step": 221940 }, { "epoch": 0.4483530424172885, "grad_norm": 200.550048828125, "learning_rate": 6.809749952222867e-06, "loss": 15.5214, "step": 221950 }, { "epoch": 0.44837324304997234, "grad_norm": 155.2937469482422, "learning_rate": 6.809424549495045e-06, "loss": 14.434, "step": 221960 }, { "epoch": 0.44839344368265616, "grad_norm": 554.6494750976562, "learning_rate": 6.809099137948309e-06, "loss": 19.0924, "step": 221970 }, { "epoch": 0.44841364431534, "grad_norm": 176.33421325683594, "learning_rate": 6.80877371758424e-06, "loss": 23.27, "step": 221980 }, { "epoch": 0.4484338449480238, "grad_norm": 1389.0264892578125, "learning_rate": 6.808448288404431e-06, "loss": 35.4904, "step": 221990 }, { "epoch": 0.44845404558070756, "grad_norm": 168.4103240966797, "learning_rate": 6.808122850410461e-06, "loss": 28.8374, "step": 222000 }, { "epoch": 0.4484742462133914, "grad_norm": 184.36399841308594, "learning_rate": 6.807797403603923e-06, "loss": 22.2908, "step": 222010 }, { "epoch": 0.4484944468460752, "grad_norm": 253.2383575439453, "learning_rate": 6.8074719479863974e-06, "loss": 40.5112, "step": 222020 }, { "epoch": 0.448514647478759, "grad_norm": 243.66519165039062, "learning_rate": 6.8071464835594735e-06, "loss": 17.8686, "step": 222030 }, { "epoch": 0.44853484811144284, "grad_norm": 221.30386352539062, "learning_rate": 6.806821010324738e-06, "loss": 24.5186, "step": 222040 }, { "epoch": 0.44855504874412666, "grad_norm": 261.2215270996094, "learning_rate": 6.806495528283772e-06, "loss": 39.6042, "step": 222050 }, { "epoch": 0.4485752493768105, "grad_norm": 144.69737243652344, "learning_rate": 6.80617003743817e-06, "loss": 11.3323, "step": 222060 }, { "epoch": 0.4485954500094943, "grad_norm": 388.6412353515625, "learning_rate": 6.805844537789512e-06, "loss": 15.2675, "step": 222070 }, { "epoch": 0.4486156506421781, "grad_norm": 242.0518035888672, "learning_rate": 6.805519029339388e-06, "loss": 16.8463, "step": 222080 }, { "epoch": 0.44863585127486194, "grad_norm": 522.247802734375, "learning_rate": 6.805193512089383e-06, "loss": 18.4106, "step": 222090 }, { "epoch": 0.44865605190754576, "grad_norm": 436.8128967285156, "learning_rate": 6.804867986041084e-06, "loss": 17.0623, "step": 222100 }, { "epoch": 0.4486762525402296, "grad_norm": 545.8941040039062, "learning_rate": 6.804542451196075e-06, "loss": 22.2795, "step": 222110 }, { "epoch": 0.4486964531729134, "grad_norm": 140.649658203125, "learning_rate": 6.804216907555948e-06, "loss": 19.3531, "step": 222120 }, { "epoch": 0.44871665380559717, "grad_norm": 334.6215515136719, "learning_rate": 6.8038913551222864e-06, "loss": 34.8511, "step": 222130 }, { "epoch": 0.448736854438281, "grad_norm": 283.8335266113281, "learning_rate": 6.803565793896676e-06, "loss": 17.5133, "step": 222140 }, { "epoch": 0.4487570550709648, "grad_norm": 199.59738159179688, "learning_rate": 6.803240223880705e-06, "loss": 27.143, "step": 222150 }, { "epoch": 0.4487772557036486, "grad_norm": 190.07814025878906, "learning_rate": 6.802914645075959e-06, "loss": 10.8922, "step": 222160 }, { "epoch": 0.44879745633633245, "grad_norm": 755.8761596679688, "learning_rate": 6.802589057484027e-06, "loss": 22.1195, "step": 222170 }, { "epoch": 0.44881765696901627, "grad_norm": 136.75877380371094, "learning_rate": 6.8022634611064945e-06, "loss": 20.3089, "step": 222180 }, { "epoch": 0.4488378576017001, "grad_norm": 279.2505798339844, "learning_rate": 6.801937855944946e-06, "loss": 14.1231, "step": 222190 }, { "epoch": 0.4488580582343839, "grad_norm": 120.84229278564453, "learning_rate": 6.8016122420009745e-06, "loss": 23.3829, "step": 222200 }, { "epoch": 0.4488782588670677, "grad_norm": 277.6899719238281, "learning_rate": 6.801286619276161e-06, "loss": 22.35, "step": 222210 }, { "epoch": 0.44889845949975155, "grad_norm": 400.7077941894531, "learning_rate": 6.800960987772096e-06, "loss": 16.5454, "step": 222220 }, { "epoch": 0.44891866013243537, "grad_norm": 1001.50439453125, "learning_rate": 6.800635347490365e-06, "loss": 21.4915, "step": 222230 }, { "epoch": 0.4489388607651192, "grad_norm": 325.65679931640625, "learning_rate": 6.800309698432557e-06, "loss": 12.1442, "step": 222240 }, { "epoch": 0.448959061397803, "grad_norm": 342.43890380859375, "learning_rate": 6.799984040600257e-06, "loss": 17.8888, "step": 222250 }, { "epoch": 0.44897926203048677, "grad_norm": 549.8706665039062, "learning_rate": 6.799658373995054e-06, "loss": 17.7265, "step": 222260 }, { "epoch": 0.4489994626631706, "grad_norm": 239.1781463623047, "learning_rate": 6.7993326986185315e-06, "loss": 31.5893, "step": 222270 }, { "epoch": 0.4490196632958544, "grad_norm": 228.67588806152344, "learning_rate": 6.799007014472283e-06, "loss": 12.3204, "step": 222280 }, { "epoch": 0.44903986392853823, "grad_norm": 209.3374481201172, "learning_rate": 6.798681321557891e-06, "loss": 10.5922, "step": 222290 }, { "epoch": 0.44906006456122205, "grad_norm": 55.07126998901367, "learning_rate": 6.798355619876944e-06, "loss": 9.6531, "step": 222300 }, { "epoch": 0.44908026519390587, "grad_norm": 489.97918701171875, "learning_rate": 6.798029909431031e-06, "loss": 19.8032, "step": 222310 }, { "epoch": 0.4491004658265897, "grad_norm": 143.01641845703125, "learning_rate": 6.797704190221737e-06, "loss": 25.5288, "step": 222320 }, { "epoch": 0.4491206664592735, "grad_norm": 311.3023376464844, "learning_rate": 6.797378462250653e-06, "loss": 25.5981, "step": 222330 }, { "epoch": 0.44914086709195733, "grad_norm": 243.1894989013672, "learning_rate": 6.797052725519362e-06, "loss": 14.5059, "step": 222340 }, { "epoch": 0.44916106772464115, "grad_norm": 236.1436767578125, "learning_rate": 6.796726980029454e-06, "loss": 14.6954, "step": 222350 }, { "epoch": 0.44918126835732497, "grad_norm": 357.1648254394531, "learning_rate": 6.796401225782517e-06, "loss": 12.9038, "step": 222360 }, { "epoch": 0.4492014689900088, "grad_norm": 414.30517578125, "learning_rate": 6.796075462780139e-06, "loss": 22.2694, "step": 222370 }, { "epoch": 0.4492216696226926, "grad_norm": 35.32927322387695, "learning_rate": 6.7957496910239075e-06, "loss": 23.5891, "step": 222380 }, { "epoch": 0.4492418702553764, "grad_norm": 216.9486083984375, "learning_rate": 6.7954239105154084e-06, "loss": 27.8594, "step": 222390 }, { "epoch": 0.4492620708880602, "grad_norm": 301.8142395019531, "learning_rate": 6.7950981212562315e-06, "loss": 16.5139, "step": 222400 }, { "epoch": 0.449282271520744, "grad_norm": 290.4708557128906, "learning_rate": 6.794772323247965e-06, "loss": 13.9479, "step": 222410 }, { "epoch": 0.44930247215342783, "grad_norm": 620.34765625, "learning_rate": 6.794446516492195e-06, "loss": 25.1131, "step": 222420 }, { "epoch": 0.44932267278611165, "grad_norm": 575.1246948242188, "learning_rate": 6.794120700990509e-06, "loss": 15.895, "step": 222430 }, { "epoch": 0.4493428734187955, "grad_norm": 3.6468324661254883, "learning_rate": 6.793794876744499e-06, "loss": 12.9027, "step": 222440 }, { "epoch": 0.4493630740514793, "grad_norm": 275.5616149902344, "learning_rate": 6.793469043755747e-06, "loss": 39.1259, "step": 222450 }, { "epoch": 0.4493832746841631, "grad_norm": 480.3856506347656, "learning_rate": 6.793143202025848e-06, "loss": 23.2138, "step": 222460 }, { "epoch": 0.44940347531684693, "grad_norm": 234.0272674560547, "learning_rate": 6.792817351556384e-06, "loss": 12.7078, "step": 222470 }, { "epoch": 0.44942367594953075, "grad_norm": 712.126953125, "learning_rate": 6.792491492348947e-06, "loss": 15.9091, "step": 222480 }, { "epoch": 0.4494438765822146, "grad_norm": 292.57855224609375, "learning_rate": 6.792165624405124e-06, "loss": 17.6213, "step": 222490 }, { "epoch": 0.4494640772148984, "grad_norm": 533.4230346679688, "learning_rate": 6.7918397477265e-06, "loss": 17.2788, "step": 222500 }, { "epoch": 0.44948427784758216, "grad_norm": 380.9327697753906, "learning_rate": 6.791513862314672e-06, "loss": 13.1298, "step": 222510 }, { "epoch": 0.449504478480266, "grad_norm": 50.200950622558594, "learning_rate": 6.791187968171219e-06, "loss": 10.96, "step": 222520 }, { "epoch": 0.4495246791129498, "grad_norm": 278.3921813964844, "learning_rate": 6.790862065297733e-06, "loss": 22.5013, "step": 222530 }, { "epoch": 0.4495448797456336, "grad_norm": 310.1663513183594, "learning_rate": 6.7905361536958035e-06, "loss": 15.4553, "step": 222540 }, { "epoch": 0.44956508037831744, "grad_norm": 485.95074462890625, "learning_rate": 6.7902102333670185e-06, "loss": 8.4399, "step": 222550 }, { "epoch": 0.44958528101100126, "grad_norm": 151.86627197265625, "learning_rate": 6.789884304312965e-06, "loss": 16.7476, "step": 222560 }, { "epoch": 0.4496054816436851, "grad_norm": 309.07733154296875, "learning_rate": 6.789558366535232e-06, "loss": 16.0229, "step": 222570 }, { "epoch": 0.4496256822763689, "grad_norm": 182.07907104492188, "learning_rate": 6.78923242003541e-06, "loss": 13.1518, "step": 222580 }, { "epoch": 0.4496458829090527, "grad_norm": 116.81712341308594, "learning_rate": 6.788906464815085e-06, "loss": 23.6509, "step": 222590 }, { "epoch": 0.44966608354173654, "grad_norm": 477.9855041503906, "learning_rate": 6.788580500875848e-06, "loss": 26.8008, "step": 222600 }, { "epoch": 0.44968628417442036, "grad_norm": 5652.27734375, "learning_rate": 6.788254528219285e-06, "loss": 80.1002, "step": 222610 }, { "epoch": 0.4497064848071042, "grad_norm": 279.23236083984375, "learning_rate": 6.787928546846987e-06, "loss": 16.5252, "step": 222620 }, { "epoch": 0.449726685439788, "grad_norm": 301.28936767578125, "learning_rate": 6.787602556760542e-06, "loss": 26.9756, "step": 222630 }, { "epoch": 0.44974688607247176, "grad_norm": 305.542236328125, "learning_rate": 6.78727655796154e-06, "loss": 28.8211, "step": 222640 }, { "epoch": 0.4497670867051556, "grad_norm": 735.6256103515625, "learning_rate": 6.786950550451568e-06, "loss": 24.8427, "step": 222650 }, { "epoch": 0.4497872873378394, "grad_norm": 424.2669372558594, "learning_rate": 6.786624534232215e-06, "loss": 20.0792, "step": 222660 }, { "epoch": 0.4498074879705232, "grad_norm": 383.5442199707031, "learning_rate": 6.786298509305072e-06, "loss": 25.0528, "step": 222670 }, { "epoch": 0.44982768860320704, "grad_norm": 198.23223876953125, "learning_rate": 6.785972475671726e-06, "loss": 18.3491, "step": 222680 }, { "epoch": 0.44984788923589086, "grad_norm": 157.2979278564453, "learning_rate": 6.785646433333767e-06, "loss": 22.9201, "step": 222690 }, { "epoch": 0.4498680898685747, "grad_norm": 109.24655151367188, "learning_rate": 6.785320382292783e-06, "loss": 10.8549, "step": 222700 }, { "epoch": 0.4498882905012585, "grad_norm": 28.28339958190918, "learning_rate": 6.784994322550367e-06, "loss": 16.8792, "step": 222710 }, { "epoch": 0.4499084911339423, "grad_norm": 46.76184844970703, "learning_rate": 6.7846682541081024e-06, "loss": 22.9295, "step": 222720 }, { "epoch": 0.44992869176662614, "grad_norm": 119.49576568603516, "learning_rate": 6.784342176967581e-06, "loss": 9.5527, "step": 222730 }, { "epoch": 0.44994889239930996, "grad_norm": 738.69580078125, "learning_rate": 6.784016091130393e-06, "loss": 31.9805, "step": 222740 }, { "epoch": 0.4499690930319938, "grad_norm": 721.9114990234375, "learning_rate": 6.783689996598126e-06, "loss": 23.255, "step": 222750 }, { "epoch": 0.4499892936646776, "grad_norm": 186.63189697265625, "learning_rate": 6.783363893372372e-06, "loss": 23.2374, "step": 222760 }, { "epoch": 0.45000949429736137, "grad_norm": 283.92572021484375, "learning_rate": 6.783037781454718e-06, "loss": 21.427, "step": 222770 }, { "epoch": 0.4500296949300452, "grad_norm": 418.05584716796875, "learning_rate": 6.782711660846755e-06, "loss": 13.0179, "step": 222780 }, { "epoch": 0.450049895562729, "grad_norm": 450.6697998046875, "learning_rate": 6.78238553155007e-06, "loss": 29.9659, "step": 222790 }, { "epoch": 0.4500700961954128, "grad_norm": 43.35778045654297, "learning_rate": 6.782059393566254e-06, "loss": 16.7843, "step": 222800 }, { "epoch": 0.45009029682809665, "grad_norm": 335.8379821777344, "learning_rate": 6.781733246896898e-06, "loss": 20.5995, "step": 222810 }, { "epoch": 0.45011049746078047, "grad_norm": 0.0, "learning_rate": 6.781407091543589e-06, "loss": 18.0158, "step": 222820 }, { "epoch": 0.4501306980934643, "grad_norm": 482.1202087402344, "learning_rate": 6.781080927507919e-06, "loss": 15.0086, "step": 222830 }, { "epoch": 0.4501508987261481, "grad_norm": 260.6979675292969, "learning_rate": 6.780754754791476e-06, "loss": 19.5923, "step": 222840 }, { "epoch": 0.4501710993588319, "grad_norm": 20.957870483398438, "learning_rate": 6.7804285733958495e-06, "loss": 29.6976, "step": 222850 }, { "epoch": 0.45019129999151575, "grad_norm": 403.2077331542969, "learning_rate": 6.780102383322631e-06, "loss": 31.046, "step": 222860 }, { "epoch": 0.45021150062419957, "grad_norm": 461.8757019042969, "learning_rate": 6.7797761845734115e-06, "loss": 19.3612, "step": 222870 }, { "epoch": 0.4502317012568834, "grad_norm": 265.8474426269531, "learning_rate": 6.779449977149774e-06, "loss": 9.2527, "step": 222880 }, { "epoch": 0.4502519018895672, "grad_norm": 284.7185363769531, "learning_rate": 6.779123761053317e-06, "loss": 16.732, "step": 222890 }, { "epoch": 0.45027210252225097, "grad_norm": 57.78697204589844, "learning_rate": 6.778797536285625e-06, "loss": 11.9212, "step": 222900 }, { "epoch": 0.4502923031549348, "grad_norm": 207.17233276367188, "learning_rate": 6.778471302848291e-06, "loss": 10.1465, "step": 222910 }, { "epoch": 0.4503125037876186, "grad_norm": 367.02490234375, "learning_rate": 6.778145060742902e-06, "loss": 20.1792, "step": 222920 }, { "epoch": 0.45033270442030243, "grad_norm": 494.6500244140625, "learning_rate": 6.777818809971048e-06, "loss": 18.88, "step": 222930 }, { "epoch": 0.45035290505298625, "grad_norm": 131.50088500976562, "learning_rate": 6.777492550534325e-06, "loss": 22.9946, "step": 222940 }, { "epoch": 0.45037310568567007, "grad_norm": 464.0691223144531, "learning_rate": 6.777166282434316e-06, "loss": 21.0363, "step": 222950 }, { "epoch": 0.4503933063183539, "grad_norm": 309.8979797363281, "learning_rate": 6.776840005672615e-06, "loss": 13.4182, "step": 222960 }, { "epoch": 0.4504135069510377, "grad_norm": 113.18099212646484, "learning_rate": 6.77651372025081e-06, "loss": 15.7438, "step": 222970 }, { "epoch": 0.45043370758372153, "grad_norm": 202.8169403076172, "learning_rate": 6.776187426170494e-06, "loss": 20.5342, "step": 222980 }, { "epoch": 0.45045390821640535, "grad_norm": 664.1404418945312, "learning_rate": 6.775861123433256e-06, "loss": 27.7604, "step": 222990 }, { "epoch": 0.45047410884908917, "grad_norm": 282.0461120605469, "learning_rate": 6.775534812040686e-06, "loss": 13.7417, "step": 223000 }, { "epoch": 0.450494309481773, "grad_norm": 228.7432861328125, "learning_rate": 6.775208491994375e-06, "loss": 17.8704, "step": 223010 }, { "epoch": 0.4505145101144568, "grad_norm": 324.0185852050781, "learning_rate": 6.7748821632959126e-06, "loss": 17.5591, "step": 223020 }, { "epoch": 0.4505347107471406, "grad_norm": 181.77679443359375, "learning_rate": 6.774555825946889e-06, "loss": 26.5039, "step": 223030 }, { "epoch": 0.4505549113798244, "grad_norm": 319.7396545410156, "learning_rate": 6.7742294799488965e-06, "loss": 20.7049, "step": 223040 }, { "epoch": 0.4505751120125082, "grad_norm": 320.5530700683594, "learning_rate": 6.773903125303525e-06, "loss": 30.7097, "step": 223050 }, { "epoch": 0.45059531264519204, "grad_norm": 134.3300323486328, "learning_rate": 6.773576762012365e-06, "loss": 34.6144, "step": 223060 }, { "epoch": 0.45061551327787586, "grad_norm": 216.84324645996094, "learning_rate": 6.773250390077006e-06, "loss": 11.6515, "step": 223070 }, { "epoch": 0.4506357139105597, "grad_norm": 136.14862060546875, "learning_rate": 6.77292400949904e-06, "loss": 9.4426, "step": 223080 }, { "epoch": 0.4506559145432435, "grad_norm": 186.19973754882812, "learning_rate": 6.772597620280057e-06, "loss": 14.2148, "step": 223090 }, { "epoch": 0.4506761151759273, "grad_norm": 414.2188415527344, "learning_rate": 6.772271222421649e-06, "loss": 25.9209, "step": 223100 }, { "epoch": 0.45069631580861114, "grad_norm": 241.2484893798828, "learning_rate": 6.771944815925405e-06, "loss": 16.2145, "step": 223110 }, { "epoch": 0.45071651644129496, "grad_norm": 385.94976806640625, "learning_rate": 6.771618400792919e-06, "loss": 22.3504, "step": 223120 }, { "epoch": 0.4507367170739788, "grad_norm": 315.2563171386719, "learning_rate": 6.771291977025778e-06, "loss": 14.8424, "step": 223130 }, { "epoch": 0.4507569177066626, "grad_norm": 167.3798065185547, "learning_rate": 6.770965544625574e-06, "loss": 16.98, "step": 223140 }, { "epoch": 0.45077711833934636, "grad_norm": 694.1487426757812, "learning_rate": 6.7706391035939e-06, "loss": 38.6859, "step": 223150 }, { "epoch": 0.4507973189720302, "grad_norm": 242.53421020507812, "learning_rate": 6.770312653932346e-06, "loss": 23.5502, "step": 223160 }, { "epoch": 0.450817519604714, "grad_norm": 107.31112670898438, "learning_rate": 6.769986195642503e-06, "loss": 12.444, "step": 223170 }, { "epoch": 0.4508377202373978, "grad_norm": 253.68106079101562, "learning_rate": 6.76965972872596e-06, "loss": 13.2838, "step": 223180 }, { "epoch": 0.45085792087008164, "grad_norm": 823.3690185546875, "learning_rate": 6.769333253184312e-06, "loss": 10.6915, "step": 223190 }, { "epoch": 0.45087812150276546, "grad_norm": 284.2256164550781, "learning_rate": 6.769006769019147e-06, "loss": 16.8665, "step": 223200 }, { "epoch": 0.4508983221354493, "grad_norm": 266.001953125, "learning_rate": 6.76868027623206e-06, "loss": 11.9261, "step": 223210 }, { "epoch": 0.4509185227681331, "grad_norm": 373.257568359375, "learning_rate": 6.768353774824636e-06, "loss": 12.8614, "step": 223220 }, { "epoch": 0.4509387234008169, "grad_norm": 18.5451602935791, "learning_rate": 6.7680272647984734e-06, "loss": 13.9451, "step": 223230 }, { "epoch": 0.45095892403350074, "grad_norm": 359.0215148925781, "learning_rate": 6.767700746155159e-06, "loss": 24.8701, "step": 223240 }, { "epoch": 0.45097912466618456, "grad_norm": 29.881914138793945, "learning_rate": 6.767374218896286e-06, "loss": 19.1651, "step": 223250 }, { "epoch": 0.4509993252988684, "grad_norm": 322.7593688964844, "learning_rate": 6.767047683023447e-06, "loss": 28.1495, "step": 223260 }, { "epoch": 0.4510195259315522, "grad_norm": 156.9593963623047, "learning_rate": 6.766721138538228e-06, "loss": 15.3686, "step": 223270 }, { "epoch": 0.45103972656423597, "grad_norm": 309.34991455078125, "learning_rate": 6.766394585442228e-06, "loss": 13.9517, "step": 223280 }, { "epoch": 0.4510599271969198, "grad_norm": 480.9609375, "learning_rate": 6.766068023737034e-06, "loss": 32.878, "step": 223290 }, { "epoch": 0.4510801278296036, "grad_norm": 186.31228637695312, "learning_rate": 6.765741453424237e-06, "loss": 16.4616, "step": 223300 }, { "epoch": 0.4511003284622874, "grad_norm": 128.66212463378906, "learning_rate": 6.765414874505431e-06, "loss": 15.4189, "step": 223310 }, { "epoch": 0.45112052909497125, "grad_norm": 218.2823028564453, "learning_rate": 6.765088286982209e-06, "loss": 46.329, "step": 223320 }, { "epoch": 0.45114072972765507, "grad_norm": 111.33246612548828, "learning_rate": 6.7647616908561595e-06, "loss": 14.7394, "step": 223330 }, { "epoch": 0.4511609303603389, "grad_norm": 279.16363525390625, "learning_rate": 6.764435086128876e-06, "loss": 14.1848, "step": 223340 }, { "epoch": 0.4511811309930227, "grad_norm": 112.7638168334961, "learning_rate": 6.764108472801949e-06, "loss": 9.4838, "step": 223350 }, { "epoch": 0.4512013316257065, "grad_norm": 736.892822265625, "learning_rate": 6.763781850876972e-06, "loss": 11.8211, "step": 223360 }, { "epoch": 0.45122153225839035, "grad_norm": 221.809326171875, "learning_rate": 6.763455220355536e-06, "loss": 17.8638, "step": 223370 }, { "epoch": 0.45124173289107417, "grad_norm": 561.2167358398438, "learning_rate": 6.763128581239231e-06, "loss": 36.5211, "step": 223380 }, { "epoch": 0.451261933523758, "grad_norm": 323.39227294921875, "learning_rate": 6.762801933529655e-06, "loss": 12.6197, "step": 223390 }, { "epoch": 0.4512821341564418, "grad_norm": 308.34893798828125, "learning_rate": 6.762475277228393e-06, "loss": 16.9184, "step": 223400 }, { "epoch": 0.45130233478912557, "grad_norm": 319.2068786621094, "learning_rate": 6.762148612337042e-06, "loss": 27.4683, "step": 223410 }, { "epoch": 0.4513225354218094, "grad_norm": 0.0, "learning_rate": 6.761821938857191e-06, "loss": 10.3535, "step": 223420 }, { "epoch": 0.4513427360544932, "grad_norm": 436.40936279296875, "learning_rate": 6.761495256790434e-06, "loss": 15.1316, "step": 223430 }, { "epoch": 0.45136293668717703, "grad_norm": 182.41567993164062, "learning_rate": 6.761168566138366e-06, "loss": 15.0154, "step": 223440 }, { "epoch": 0.45138313731986085, "grad_norm": 273.11138916015625, "learning_rate": 6.760841866902572e-06, "loss": 50.7175, "step": 223450 }, { "epoch": 0.45140333795254467, "grad_norm": 222.64486694335938, "learning_rate": 6.7605151590846494e-06, "loss": 20.854, "step": 223460 }, { "epoch": 0.4514235385852285, "grad_norm": 336.5139465332031, "learning_rate": 6.760188442686189e-06, "loss": 17.5156, "step": 223470 }, { "epoch": 0.4514437392179123, "grad_norm": 89.60285186767578, "learning_rate": 6.759861717708785e-06, "loss": 8.8874, "step": 223480 }, { "epoch": 0.45146393985059613, "grad_norm": 441.04974365234375, "learning_rate": 6.759534984154027e-06, "loss": 15.6771, "step": 223490 }, { "epoch": 0.45148414048327995, "grad_norm": 183.0120849609375, "learning_rate": 6.759208242023509e-06, "loss": 15.7748, "step": 223500 }, { "epoch": 0.45150434111596377, "grad_norm": 250.5340118408203, "learning_rate": 6.758881491318825e-06, "loss": 14.9718, "step": 223510 }, { "epoch": 0.4515245417486476, "grad_norm": 519.3700561523438, "learning_rate": 6.758554732041564e-06, "loss": 12.9691, "step": 223520 }, { "epoch": 0.4515447423813314, "grad_norm": 559.9418334960938, "learning_rate": 6.758227964193323e-06, "loss": 17.9324, "step": 223530 }, { "epoch": 0.4515649430140152, "grad_norm": 262.2828369140625, "learning_rate": 6.757901187775689e-06, "loss": 11.0752, "step": 223540 }, { "epoch": 0.451585143646699, "grad_norm": 295.35845947265625, "learning_rate": 6.75757440279026e-06, "loss": 16.429, "step": 223550 }, { "epoch": 0.4516053442793828, "grad_norm": 356.8254699707031, "learning_rate": 6.757247609238625e-06, "loss": 9.6769, "step": 223560 }, { "epoch": 0.45162554491206663, "grad_norm": 127.39014434814453, "learning_rate": 6.75692080712238e-06, "loss": 14.9424, "step": 223570 }, { "epoch": 0.45164574554475045, "grad_norm": 535.056640625, "learning_rate": 6.756593996443115e-06, "loss": 23.0625, "step": 223580 }, { "epoch": 0.4516659461774343, "grad_norm": 238.56121826171875, "learning_rate": 6.756267177202425e-06, "loss": 5.687, "step": 223590 }, { "epoch": 0.4516861468101181, "grad_norm": 197.23890686035156, "learning_rate": 6.755940349401901e-06, "loss": 38.6246, "step": 223600 }, { "epoch": 0.4517063474428019, "grad_norm": 279.5742492675781, "learning_rate": 6.755613513043136e-06, "loss": 18.7514, "step": 223610 }, { "epoch": 0.45172654807548573, "grad_norm": 137.40830993652344, "learning_rate": 6.755286668127724e-06, "loss": 14.4725, "step": 223620 }, { "epoch": 0.45174674870816955, "grad_norm": 1295.7659912109375, "learning_rate": 6.7549598146572584e-06, "loss": 29.4107, "step": 223630 }, { "epoch": 0.4517669493408534, "grad_norm": 436.1981201171875, "learning_rate": 6.7546329526333305e-06, "loss": 20.8177, "step": 223640 }, { "epoch": 0.4517871499735372, "grad_norm": 162.65553283691406, "learning_rate": 6.754306082057534e-06, "loss": 18.3156, "step": 223650 }, { "epoch": 0.451807350606221, "grad_norm": 185.48007202148438, "learning_rate": 6.753979202931466e-06, "loss": 13.768, "step": 223660 }, { "epoch": 0.4518275512389048, "grad_norm": 185.57191467285156, "learning_rate": 6.753652315256712e-06, "loss": 14.6715, "step": 223670 }, { "epoch": 0.4518477518715886, "grad_norm": 346.201904296875, "learning_rate": 6.753325419034871e-06, "loss": 23.0726, "step": 223680 }, { "epoch": 0.4518679525042724, "grad_norm": 228.6507568359375, "learning_rate": 6.752998514267534e-06, "loss": 18.0172, "step": 223690 }, { "epoch": 0.45188815313695624, "grad_norm": 302.76190185546875, "learning_rate": 6.752671600956295e-06, "loss": 26.9774, "step": 223700 }, { "epoch": 0.45190835376964006, "grad_norm": 377.6747741699219, "learning_rate": 6.752344679102749e-06, "loss": 63.4588, "step": 223710 }, { "epoch": 0.4519285544023239, "grad_norm": 185.3396453857422, "learning_rate": 6.752017748708485e-06, "loss": 17.3156, "step": 223720 }, { "epoch": 0.4519487550350077, "grad_norm": 397.0793151855469, "learning_rate": 6.7516908097751e-06, "loss": 26.2128, "step": 223730 }, { "epoch": 0.4519689556676915, "grad_norm": 441.2061462402344, "learning_rate": 6.751363862304186e-06, "loss": 15.521, "step": 223740 }, { "epoch": 0.45198915630037534, "grad_norm": 180.538818359375, "learning_rate": 6.751036906297338e-06, "loss": 22.2805, "step": 223750 }, { "epoch": 0.45200935693305916, "grad_norm": 340.4770202636719, "learning_rate": 6.750709941756147e-06, "loss": 14.711, "step": 223760 }, { "epoch": 0.452029557565743, "grad_norm": 458.65155029296875, "learning_rate": 6.7503829686822095e-06, "loss": 23.8856, "step": 223770 }, { "epoch": 0.4520497581984268, "grad_norm": 431.040283203125, "learning_rate": 6.750055987077118e-06, "loss": 19.4014, "step": 223780 }, { "epoch": 0.45206995883111056, "grad_norm": 355.99664306640625, "learning_rate": 6.749728996942465e-06, "loss": 14.247, "step": 223790 }, { "epoch": 0.4520901594637944, "grad_norm": 313.8536376953125, "learning_rate": 6.749401998279845e-06, "loss": 27.4152, "step": 223800 }, { "epoch": 0.4521103600964782, "grad_norm": 513.1095581054688, "learning_rate": 6.749074991090852e-06, "loss": 24.1083, "step": 223810 }, { "epoch": 0.452130560729162, "grad_norm": 1037.04931640625, "learning_rate": 6.74874797537708e-06, "loss": 21.2988, "step": 223820 }, { "epoch": 0.45215076136184584, "grad_norm": 266.38848876953125, "learning_rate": 6.748420951140121e-06, "loss": 36.7425, "step": 223830 }, { "epoch": 0.45217096199452966, "grad_norm": 474.34454345703125, "learning_rate": 6.748093918381572e-06, "loss": 19.4062, "step": 223840 }, { "epoch": 0.4521911626272135, "grad_norm": 255.125732421875, "learning_rate": 6.747766877103025e-06, "loss": 21.7049, "step": 223850 }, { "epoch": 0.4522113632598973, "grad_norm": 283.06842041015625, "learning_rate": 6.7474398273060725e-06, "loss": 14.5425, "step": 223860 }, { "epoch": 0.4522315638925811, "grad_norm": 267.3173828125, "learning_rate": 6.747112768992313e-06, "loss": 13.0496, "step": 223870 }, { "epoch": 0.45225176452526494, "grad_norm": 801.7942504882812, "learning_rate": 6.7467857021633354e-06, "loss": 37.0498, "step": 223880 }, { "epoch": 0.45227196515794876, "grad_norm": 209.74928283691406, "learning_rate": 6.746458626820738e-06, "loss": 18.5093, "step": 223890 }, { "epoch": 0.4522921657906326, "grad_norm": 131.6996307373047, "learning_rate": 6.746131542966112e-06, "loss": 23.7755, "step": 223900 }, { "epoch": 0.4523123664233164, "grad_norm": 156.3144073486328, "learning_rate": 6.745804450601053e-06, "loss": 14.2204, "step": 223910 }, { "epoch": 0.45233256705600017, "grad_norm": 384.09613037109375, "learning_rate": 6.745477349727154e-06, "loss": 19.81, "step": 223920 }, { "epoch": 0.452352767688684, "grad_norm": 29.04810905456543, "learning_rate": 6.74515024034601e-06, "loss": 18.4215, "step": 223930 }, { "epoch": 0.4523729683213678, "grad_norm": 327.5838317871094, "learning_rate": 6.744823122459217e-06, "loss": 12.8739, "step": 223940 }, { "epoch": 0.4523931689540516, "grad_norm": 161.19676208496094, "learning_rate": 6.744495996068367e-06, "loss": 11.6625, "step": 223950 }, { "epoch": 0.45241336958673545, "grad_norm": 240.6019287109375, "learning_rate": 6.744168861175056e-06, "loss": 14.361, "step": 223960 }, { "epoch": 0.45243357021941927, "grad_norm": 233.1769561767578, "learning_rate": 6.743841717780876e-06, "loss": 8.1365, "step": 223970 }, { "epoch": 0.4524537708521031, "grad_norm": 277.39422607421875, "learning_rate": 6.743514565887424e-06, "loss": 13.391, "step": 223980 }, { "epoch": 0.4524739714847869, "grad_norm": 345.764892578125, "learning_rate": 6.743187405496292e-06, "loss": 22.3376, "step": 223990 }, { "epoch": 0.4524941721174707, "grad_norm": 190.06594848632812, "learning_rate": 6.7428602366090764e-06, "loss": 18.6737, "step": 224000 }, { "epoch": 0.45251437275015455, "grad_norm": 319.82745361328125, "learning_rate": 6.742533059227372e-06, "loss": 12.6998, "step": 224010 }, { "epoch": 0.45253457338283837, "grad_norm": 327.0186767578125, "learning_rate": 6.742205873352773e-06, "loss": 13.1831, "step": 224020 }, { "epoch": 0.4525547740155222, "grad_norm": 842.3953857421875, "learning_rate": 6.741878678986873e-06, "loss": 24.0312, "step": 224030 }, { "epoch": 0.452574974648206, "grad_norm": 371.538818359375, "learning_rate": 6.741551476131269e-06, "loss": 32.4339, "step": 224040 }, { "epoch": 0.45259517528088977, "grad_norm": 557.4412231445312, "learning_rate": 6.741224264787553e-06, "loss": 23.2674, "step": 224050 }, { "epoch": 0.4526153759135736, "grad_norm": 499.5594177246094, "learning_rate": 6.740897044957322e-06, "loss": 21.0672, "step": 224060 }, { "epoch": 0.4526355765462574, "grad_norm": 358.1495056152344, "learning_rate": 6.74056981664217e-06, "loss": 17.9927, "step": 224070 }, { "epoch": 0.45265577717894123, "grad_norm": 3079.692626953125, "learning_rate": 6.740242579843691e-06, "loss": 76.7351, "step": 224080 }, { "epoch": 0.45267597781162505, "grad_norm": 338.0431823730469, "learning_rate": 6.739915334563481e-06, "loss": 26.2316, "step": 224090 }, { "epoch": 0.45269617844430887, "grad_norm": 144.97410583496094, "learning_rate": 6.739588080803134e-06, "loss": 12.1381, "step": 224100 }, { "epoch": 0.4527163790769927, "grad_norm": 435.57196044921875, "learning_rate": 6.739260818564248e-06, "loss": 16.6067, "step": 224110 }, { "epoch": 0.4527365797096765, "grad_norm": 171.0533447265625, "learning_rate": 6.738933547848414e-06, "loss": 16.8726, "step": 224120 }, { "epoch": 0.45275678034236033, "grad_norm": 383.82354736328125, "learning_rate": 6.7386062686572286e-06, "loss": 21.3513, "step": 224130 }, { "epoch": 0.45277698097504415, "grad_norm": 31.025728225708008, "learning_rate": 6.738278980992289e-06, "loss": 23.0178, "step": 224140 }, { "epoch": 0.45279718160772797, "grad_norm": 236.8266143798828, "learning_rate": 6.737951684855185e-06, "loss": 17.2392, "step": 224150 }, { "epoch": 0.4528173822404118, "grad_norm": 0.0, "learning_rate": 6.737624380247519e-06, "loss": 10.3642, "step": 224160 }, { "epoch": 0.4528375828730956, "grad_norm": 81.08824920654297, "learning_rate": 6.737297067170879e-06, "loss": 18.0, "step": 224170 }, { "epoch": 0.4528577835057794, "grad_norm": 297.20867919921875, "learning_rate": 6.736969745626867e-06, "loss": 17.2966, "step": 224180 }, { "epoch": 0.4528779841384632, "grad_norm": 76.86700439453125, "learning_rate": 6.736642415617073e-06, "loss": 14.021, "step": 224190 }, { "epoch": 0.452898184771147, "grad_norm": 1.2270747423171997, "learning_rate": 6.736315077143095e-06, "loss": 26.7255, "step": 224200 }, { "epoch": 0.45291838540383084, "grad_norm": 379.5224304199219, "learning_rate": 6.735987730206529e-06, "loss": 12.1382, "step": 224210 }, { "epoch": 0.45293858603651466, "grad_norm": 457.1927185058594, "learning_rate": 6.735660374808969e-06, "loss": 21.9467, "step": 224220 }, { "epoch": 0.4529587866691985, "grad_norm": 21.699369430541992, "learning_rate": 6.73533301095201e-06, "loss": 16.4666, "step": 224230 }, { "epoch": 0.4529789873018823, "grad_norm": 284.0079650878906, "learning_rate": 6.7350056386372485e-06, "loss": 17.3536, "step": 224240 }, { "epoch": 0.4529991879345661, "grad_norm": 487.185791015625, "learning_rate": 6.7346782578662795e-06, "loss": 21.8053, "step": 224250 }, { "epoch": 0.45301938856724994, "grad_norm": 280.5442199707031, "learning_rate": 6.7343508686407e-06, "loss": 19.6916, "step": 224260 }, { "epoch": 0.45303958919993376, "grad_norm": 97.4669418334961, "learning_rate": 6.734023470962106e-06, "loss": 21.5326, "step": 224270 }, { "epoch": 0.4530597898326176, "grad_norm": 93.9043197631836, "learning_rate": 6.733696064832089e-06, "loss": 20.9347, "step": 224280 }, { "epoch": 0.4530799904653014, "grad_norm": 66.56202697753906, "learning_rate": 6.733368650252249e-06, "loss": 16.8225, "step": 224290 }, { "epoch": 0.4531001910979852, "grad_norm": 358.3203430175781, "learning_rate": 6.733041227224182e-06, "loss": 32.0849, "step": 224300 }, { "epoch": 0.453120391730669, "grad_norm": 138.67103576660156, "learning_rate": 6.732713795749479e-06, "loss": 18.2, "step": 224310 }, { "epoch": 0.4531405923633528, "grad_norm": 165.70054626464844, "learning_rate": 6.732386355829742e-06, "loss": 24.5123, "step": 224320 }, { "epoch": 0.4531607929960366, "grad_norm": 276.81585693359375, "learning_rate": 6.7320589074665606e-06, "loss": 25.164, "step": 224330 }, { "epoch": 0.45318099362872044, "grad_norm": 554.4995727539062, "learning_rate": 6.7317314506615385e-06, "loss": 10.1524, "step": 224340 }, { "epoch": 0.45320119426140426, "grad_norm": 313.37969970703125, "learning_rate": 6.731403985416265e-06, "loss": 23.614, "step": 224350 }, { "epoch": 0.4532213948940881, "grad_norm": 131.09384155273438, "learning_rate": 6.731076511732338e-06, "loss": 20.9021, "step": 224360 }, { "epoch": 0.4532415955267719, "grad_norm": 193.55792236328125, "learning_rate": 6.730749029611354e-06, "loss": 12.7903, "step": 224370 }, { "epoch": 0.4532617961594557, "grad_norm": 72.20606994628906, "learning_rate": 6.730421539054911e-06, "loss": 17.9589, "step": 224380 }, { "epoch": 0.45328199679213954, "grad_norm": 154.4459991455078, "learning_rate": 6.730094040064602e-06, "loss": 22.2844, "step": 224390 }, { "epoch": 0.45330219742482336, "grad_norm": 329.5399475097656, "learning_rate": 6.729766532642024e-06, "loss": 21.606, "step": 224400 }, { "epoch": 0.4533223980575072, "grad_norm": 395.9461669921875, "learning_rate": 6.729439016788774e-06, "loss": 14.8427, "step": 224410 }, { "epoch": 0.453342598690191, "grad_norm": 132.24024963378906, "learning_rate": 6.72911149250645e-06, "loss": 10.4387, "step": 224420 }, { "epoch": 0.45336279932287477, "grad_norm": 273.3148193359375, "learning_rate": 6.7287839597966444e-06, "loss": 21.8022, "step": 224430 }, { "epoch": 0.4533829999555586, "grad_norm": 154.6059112548828, "learning_rate": 6.728456418660954e-06, "loss": 15.8911, "step": 224440 }, { "epoch": 0.4534032005882424, "grad_norm": 549.2393188476562, "learning_rate": 6.7281288691009795e-06, "loss": 26.0252, "step": 224450 }, { "epoch": 0.4534234012209262, "grad_norm": 228.38174438476562, "learning_rate": 6.727801311118314e-06, "loss": 22.0219, "step": 224460 }, { "epoch": 0.45344360185361005, "grad_norm": 320.7374267578125, "learning_rate": 6.727473744714554e-06, "loss": 12.2585, "step": 224470 }, { "epoch": 0.45346380248629387, "grad_norm": 247.53517150878906, "learning_rate": 6.727146169891297e-06, "loss": 21.1046, "step": 224480 }, { "epoch": 0.4534840031189777, "grad_norm": 83.11063385009766, "learning_rate": 6.726818586650137e-06, "loss": 19.6884, "step": 224490 }, { "epoch": 0.4535042037516615, "grad_norm": 169.14244079589844, "learning_rate": 6.7264909949926735e-06, "loss": 17.395, "step": 224500 }, { "epoch": 0.4535244043843453, "grad_norm": 264.9266662597656, "learning_rate": 6.726163394920503e-06, "loss": 42.9943, "step": 224510 }, { "epoch": 0.45354460501702915, "grad_norm": 92.65164184570312, "learning_rate": 6.725835786435222e-06, "loss": 13.7732, "step": 224520 }, { "epoch": 0.45356480564971297, "grad_norm": 369.056640625, "learning_rate": 6.725508169538425e-06, "loss": 30.858, "step": 224530 }, { "epoch": 0.4535850062823968, "grad_norm": 426.50213623046875, "learning_rate": 6.725180544231711e-06, "loss": 15.2167, "step": 224540 }, { "epoch": 0.4536052069150806, "grad_norm": 278.1307678222656, "learning_rate": 6.7248529105166785e-06, "loss": 14.1333, "step": 224550 }, { "epoch": 0.45362540754776437, "grad_norm": 452.7124938964844, "learning_rate": 6.724525268394919e-06, "loss": 23.0509, "step": 224560 }, { "epoch": 0.4536456081804482, "grad_norm": 688.0189208984375, "learning_rate": 6.7241976178680335e-06, "loss": 24.9741, "step": 224570 }, { "epoch": 0.453665808813132, "grad_norm": 347.4973449707031, "learning_rate": 6.723869958937619e-06, "loss": 16.1488, "step": 224580 }, { "epoch": 0.45368600944581583, "grad_norm": 328.7215881347656, "learning_rate": 6.723542291605271e-06, "loss": 18.3644, "step": 224590 }, { "epoch": 0.45370621007849965, "grad_norm": 23.773012161254883, "learning_rate": 6.723214615872585e-06, "loss": 14.3738, "step": 224600 }, { "epoch": 0.45372641071118347, "grad_norm": 180.44186401367188, "learning_rate": 6.722886931741163e-06, "loss": 19.4989, "step": 224610 }, { "epoch": 0.4537466113438673, "grad_norm": 64.38660430908203, "learning_rate": 6.7225592392125975e-06, "loss": 22.1436, "step": 224620 }, { "epoch": 0.4537668119765511, "grad_norm": 223.8133087158203, "learning_rate": 6.722231538288486e-06, "loss": 18.1389, "step": 224630 }, { "epoch": 0.45378701260923493, "grad_norm": 292.5125732421875, "learning_rate": 6.7219038289704294e-06, "loss": 16.9042, "step": 224640 }, { "epoch": 0.45380721324191875, "grad_norm": 243.24888610839844, "learning_rate": 6.72157611126002e-06, "loss": 18.4081, "step": 224650 }, { "epoch": 0.45382741387460257, "grad_norm": 6.820436477661133, "learning_rate": 6.721248385158859e-06, "loss": 14.01, "step": 224660 }, { "epoch": 0.4538476145072864, "grad_norm": 350.4612731933594, "learning_rate": 6.720920650668542e-06, "loss": 20.3563, "step": 224670 }, { "epoch": 0.4538678151399702, "grad_norm": 378.66796875, "learning_rate": 6.720592907790667e-06, "loss": 16.2517, "step": 224680 }, { "epoch": 0.453888015772654, "grad_norm": 209.46824645996094, "learning_rate": 6.720265156526828e-06, "loss": 14.0006, "step": 224690 }, { "epoch": 0.4539082164053378, "grad_norm": 200.64306640625, "learning_rate": 6.719937396878628e-06, "loss": 22.1343, "step": 224700 }, { "epoch": 0.4539284170380216, "grad_norm": 141.2621612548828, "learning_rate": 6.719609628847662e-06, "loss": 10.8172, "step": 224710 }, { "epoch": 0.45394861767070543, "grad_norm": 31.67059326171875, "learning_rate": 6.7192818524355266e-06, "loss": 13.8222, "step": 224720 }, { "epoch": 0.45396881830338925, "grad_norm": 306.1316223144531, "learning_rate": 6.7189540676438195e-06, "loss": 15.7686, "step": 224730 }, { "epoch": 0.4539890189360731, "grad_norm": 440.9480285644531, "learning_rate": 6.718626274474138e-06, "loss": 15.7954, "step": 224740 }, { "epoch": 0.4540092195687569, "grad_norm": 189.2568817138672, "learning_rate": 6.718298472928082e-06, "loss": 21.2295, "step": 224750 }, { "epoch": 0.4540294202014407, "grad_norm": 361.3060607910156, "learning_rate": 6.717970663007245e-06, "loss": 14.7813, "step": 224760 }, { "epoch": 0.45404962083412453, "grad_norm": 761.9708251953125, "learning_rate": 6.71764284471323e-06, "loss": 23.4916, "step": 224770 }, { "epoch": 0.45406982146680835, "grad_norm": 251.84495544433594, "learning_rate": 6.717315018047631e-06, "loss": 19.2154, "step": 224780 }, { "epoch": 0.4540900220994922, "grad_norm": 435.4103698730469, "learning_rate": 6.716987183012048e-06, "loss": 23.8202, "step": 224790 }, { "epoch": 0.454110222732176, "grad_norm": 228.01394653320312, "learning_rate": 6.716659339608077e-06, "loss": 25.6222, "step": 224800 }, { "epoch": 0.4541304233648598, "grad_norm": 255.50051879882812, "learning_rate": 6.7163314878373166e-06, "loss": 19.654, "step": 224810 }, { "epoch": 0.4541506239975436, "grad_norm": 13.132115364074707, "learning_rate": 6.716003627701365e-06, "loss": 24.7085, "step": 224820 }, { "epoch": 0.4541708246302274, "grad_norm": 484.2520751953125, "learning_rate": 6.71567575920182e-06, "loss": 18.5799, "step": 224830 }, { "epoch": 0.4541910252629112, "grad_norm": 171.063720703125, "learning_rate": 6.715347882340278e-06, "loss": 18.6415, "step": 224840 }, { "epoch": 0.45421122589559504, "grad_norm": 251.44094848632812, "learning_rate": 6.7150199971183395e-06, "loss": 10.5689, "step": 224850 }, { "epoch": 0.45423142652827886, "grad_norm": 466.04681396484375, "learning_rate": 6.714692103537601e-06, "loss": 15.2128, "step": 224860 }, { "epoch": 0.4542516271609627, "grad_norm": 610.0632934570312, "learning_rate": 6.7143642015996626e-06, "loss": 21.9816, "step": 224870 }, { "epoch": 0.4542718277936465, "grad_norm": 48.134891510009766, "learning_rate": 6.714036291306121e-06, "loss": 18.9751, "step": 224880 }, { "epoch": 0.4542920284263303, "grad_norm": 383.6083984375, "learning_rate": 6.7137083726585724e-06, "loss": 15.2533, "step": 224890 }, { "epoch": 0.45431222905901414, "grad_norm": 397.1505126953125, "learning_rate": 6.713380445658618e-06, "loss": 10.6422, "step": 224900 }, { "epoch": 0.45433242969169796, "grad_norm": 409.5924377441406, "learning_rate": 6.713052510307856e-06, "loss": 15.5155, "step": 224910 }, { "epoch": 0.4543526303243818, "grad_norm": 386.540771484375, "learning_rate": 6.712724566607882e-06, "loss": 14.9988, "step": 224920 }, { "epoch": 0.4543728309570656, "grad_norm": 451.5548095703125, "learning_rate": 6.712396614560298e-06, "loss": 21.6033, "step": 224930 }, { "epoch": 0.45439303158974936, "grad_norm": 330.3294372558594, "learning_rate": 6.712068654166699e-06, "loss": 19.9754, "step": 224940 }, { "epoch": 0.4544132322224332, "grad_norm": 909.9692993164062, "learning_rate": 6.711740685428687e-06, "loss": 18.7836, "step": 224950 }, { "epoch": 0.454433432855117, "grad_norm": 497.5475769042969, "learning_rate": 6.711412708347857e-06, "loss": 38.2868, "step": 224960 }, { "epoch": 0.4544536334878008, "grad_norm": 415.42071533203125, "learning_rate": 6.711084722925809e-06, "loss": 24.854, "step": 224970 }, { "epoch": 0.45447383412048464, "grad_norm": 317.4911804199219, "learning_rate": 6.7107567291641425e-06, "loss": 22.7676, "step": 224980 }, { "epoch": 0.45449403475316846, "grad_norm": 471.6135559082031, "learning_rate": 6.710428727064454e-06, "loss": 9.7897, "step": 224990 }, { "epoch": 0.4545142353858523, "grad_norm": 73.7365951538086, "learning_rate": 6.710100716628345e-06, "loss": 13.559, "step": 225000 }, { "epoch": 0.4545344360185361, "grad_norm": 175.46624755859375, "learning_rate": 6.709772697857411e-06, "loss": 20.3643, "step": 225010 }, { "epoch": 0.4545546366512199, "grad_norm": 44.57848358154297, "learning_rate": 6.709444670753252e-06, "loss": 16.3039, "step": 225020 }, { "epoch": 0.45457483728390374, "grad_norm": 215.0579833984375, "learning_rate": 6.709116635317469e-06, "loss": 20.4954, "step": 225030 }, { "epoch": 0.45459503791658756, "grad_norm": 188.92933654785156, "learning_rate": 6.708788591551658e-06, "loss": 11.398, "step": 225040 }, { "epoch": 0.4546152385492714, "grad_norm": 202.376220703125, "learning_rate": 6.708460539457418e-06, "loss": 23.439, "step": 225050 }, { "epoch": 0.4546354391819552, "grad_norm": 157.07859802246094, "learning_rate": 6.708132479036349e-06, "loss": 17.6137, "step": 225060 }, { "epoch": 0.45465563981463897, "grad_norm": 102.0869369506836, "learning_rate": 6.707804410290049e-06, "loss": 14.3022, "step": 225070 }, { "epoch": 0.4546758404473228, "grad_norm": 482.3387756347656, "learning_rate": 6.707476333220116e-06, "loss": 17.2713, "step": 225080 }, { "epoch": 0.4546960410800066, "grad_norm": 703.6065063476562, "learning_rate": 6.707148247828153e-06, "loss": 27.4886, "step": 225090 }, { "epoch": 0.4547162417126904, "grad_norm": 199.82472229003906, "learning_rate": 6.7068201541157555e-06, "loss": 23.5698, "step": 225100 }, { "epoch": 0.45473644234537425, "grad_norm": 448.570068359375, "learning_rate": 6.706492052084524e-06, "loss": 20.7365, "step": 225110 }, { "epoch": 0.45475664297805807, "grad_norm": 395.7660217285156, "learning_rate": 6.706163941736057e-06, "loss": 18.4731, "step": 225120 }, { "epoch": 0.4547768436107419, "grad_norm": 172.19200134277344, "learning_rate": 6.705835823071953e-06, "loss": 15.0603, "step": 225130 }, { "epoch": 0.4547970442434257, "grad_norm": 191.87452697753906, "learning_rate": 6.7055076960938135e-06, "loss": 10.4905, "step": 225140 }, { "epoch": 0.4548172448761095, "grad_norm": 83.07159423828125, "learning_rate": 6.705179560803236e-06, "loss": 8.3425, "step": 225150 }, { "epoch": 0.45483744550879335, "grad_norm": 16.49951934814453, "learning_rate": 6.704851417201821e-06, "loss": 14.1119, "step": 225160 }, { "epoch": 0.45485764614147717, "grad_norm": 133.27191162109375, "learning_rate": 6.704523265291165e-06, "loss": 11.8241, "step": 225170 }, { "epoch": 0.454877846774161, "grad_norm": 531.5558471679688, "learning_rate": 6.704195105072871e-06, "loss": 10.8021, "step": 225180 }, { "epoch": 0.4548980474068448, "grad_norm": 56.5534553527832, "learning_rate": 6.703866936548534e-06, "loss": 20.476, "step": 225190 }, { "epoch": 0.45491824803952857, "grad_norm": 375.24951171875, "learning_rate": 6.70353875971976e-06, "loss": 27.2611, "step": 225200 }, { "epoch": 0.4549384486722124, "grad_norm": 439.99493408203125, "learning_rate": 6.703210574588142e-06, "loss": 15.3911, "step": 225210 }, { "epoch": 0.4549586493048962, "grad_norm": 541.160888671875, "learning_rate": 6.702882381155283e-06, "loss": 24.5996, "step": 225220 }, { "epoch": 0.45497884993758003, "grad_norm": 47.61753845214844, "learning_rate": 6.702554179422782e-06, "loss": 19.6953, "step": 225230 }, { "epoch": 0.45499905057026385, "grad_norm": 248.61502075195312, "learning_rate": 6.702225969392238e-06, "loss": 19.7184, "step": 225240 }, { "epoch": 0.45501925120294767, "grad_norm": 0.0, "learning_rate": 6.701897751065251e-06, "loss": 16.8707, "step": 225250 }, { "epoch": 0.4550394518356315, "grad_norm": 254.2016143798828, "learning_rate": 6.701569524443421e-06, "loss": 25.5803, "step": 225260 }, { "epoch": 0.4550596524683153, "grad_norm": 502.7754211425781, "learning_rate": 6.701241289528348e-06, "loss": 19.9257, "step": 225270 }, { "epoch": 0.45507985310099913, "grad_norm": 453.6603698730469, "learning_rate": 6.700913046321631e-06, "loss": 15.4176, "step": 225280 }, { "epoch": 0.45510005373368295, "grad_norm": 354.26800537109375, "learning_rate": 6.700584794824871e-06, "loss": 28.5461, "step": 225290 }, { "epoch": 0.45512025436636677, "grad_norm": 349.60858154296875, "learning_rate": 6.700256535039665e-06, "loss": 26.8998, "step": 225300 }, { "epoch": 0.4551404549990506, "grad_norm": 327.08026123046875, "learning_rate": 6.6999282669676155e-06, "loss": 12.6934, "step": 225310 }, { "epoch": 0.4551606556317344, "grad_norm": 10.2844820022583, "learning_rate": 6.699599990610324e-06, "loss": 20.0052, "step": 225320 }, { "epoch": 0.4551808562644182, "grad_norm": 283.82177734375, "learning_rate": 6.699271705969386e-06, "loss": 18.7831, "step": 225330 }, { "epoch": 0.455201056897102, "grad_norm": 72.27128601074219, "learning_rate": 6.698943413046404e-06, "loss": 18.5849, "step": 225340 }, { "epoch": 0.4552212575297858, "grad_norm": 275.93243408203125, "learning_rate": 6.698615111842977e-06, "loss": 19.3666, "step": 225350 }, { "epoch": 0.45524145816246964, "grad_norm": 313.5260009765625, "learning_rate": 6.698286802360708e-06, "loss": 23.2725, "step": 225360 }, { "epoch": 0.45526165879515346, "grad_norm": 141.9264678955078, "learning_rate": 6.697958484601193e-06, "loss": 20.1225, "step": 225370 }, { "epoch": 0.4552818594278373, "grad_norm": 182.740478515625, "learning_rate": 6.697630158566038e-06, "loss": 14.434, "step": 225380 }, { "epoch": 0.4553020600605211, "grad_norm": 324.5687255859375, "learning_rate": 6.697301824256836e-06, "loss": 22.5936, "step": 225390 }, { "epoch": 0.4553222606932049, "grad_norm": 408.19378662109375, "learning_rate": 6.6969734816751906e-06, "loss": 14.4005, "step": 225400 }, { "epoch": 0.45534246132588874, "grad_norm": 253.07643127441406, "learning_rate": 6.696645130822704e-06, "loss": 17.8391, "step": 225410 }, { "epoch": 0.45536266195857256, "grad_norm": 262.0939636230469, "learning_rate": 6.6963167717009745e-06, "loss": 15.1712, "step": 225420 }, { "epoch": 0.4553828625912564, "grad_norm": 238.91661071777344, "learning_rate": 6.695988404311603e-06, "loss": 30.2256, "step": 225430 }, { "epoch": 0.4554030632239402, "grad_norm": 436.8711853027344, "learning_rate": 6.695660028656189e-06, "loss": 22.0972, "step": 225440 }, { "epoch": 0.455423263856624, "grad_norm": 302.34710693359375, "learning_rate": 6.6953316447363335e-06, "loss": 25.4991, "step": 225450 }, { "epoch": 0.4554434644893078, "grad_norm": 313.46484375, "learning_rate": 6.695003252553638e-06, "loss": 20.7569, "step": 225460 }, { "epoch": 0.4554636651219916, "grad_norm": 75.2326431274414, "learning_rate": 6.694674852109701e-06, "loss": 9.8402, "step": 225470 }, { "epoch": 0.4554838657546754, "grad_norm": 181.7288055419922, "learning_rate": 6.694346443406126e-06, "loss": 16.5961, "step": 225480 }, { "epoch": 0.45550406638735924, "grad_norm": 405.8199157714844, "learning_rate": 6.694018026444511e-06, "loss": 19.7243, "step": 225490 }, { "epoch": 0.45552426702004306, "grad_norm": 382.2400207519531, "learning_rate": 6.693689601226458e-06, "loss": 11.9715, "step": 225500 }, { "epoch": 0.4555444676527269, "grad_norm": 483.54498291015625, "learning_rate": 6.693361167753567e-06, "loss": 40.3974, "step": 225510 }, { "epoch": 0.4555646682854107, "grad_norm": 722.4938354492188, "learning_rate": 6.693032726027438e-06, "loss": 20.1103, "step": 225520 }, { "epoch": 0.4555848689180945, "grad_norm": 20.55360221862793, "learning_rate": 6.692704276049674e-06, "loss": 17.6063, "step": 225530 }, { "epoch": 0.45560506955077834, "grad_norm": 302.9820251464844, "learning_rate": 6.6923758178218756e-06, "loss": 7.5893, "step": 225540 }, { "epoch": 0.45562527018346216, "grad_norm": 171.58509826660156, "learning_rate": 6.692047351345641e-06, "loss": 29.754, "step": 225550 }, { "epoch": 0.455645470816146, "grad_norm": 253.7074432373047, "learning_rate": 6.6917188766225736e-06, "loss": 7.86, "step": 225560 }, { "epoch": 0.4556656714488298, "grad_norm": 473.452880859375, "learning_rate": 6.691390393654274e-06, "loss": 29.99, "step": 225570 }, { "epoch": 0.45568587208151357, "grad_norm": 176.6035614013672, "learning_rate": 6.691061902442342e-06, "loss": 31.58, "step": 225580 }, { "epoch": 0.4557060727141974, "grad_norm": 260.5210876464844, "learning_rate": 6.69073340298838e-06, "loss": 11.0604, "step": 225590 }, { "epoch": 0.4557262733468812, "grad_norm": 274.85162353515625, "learning_rate": 6.690404895293987e-06, "loss": 16.1772, "step": 225600 }, { "epoch": 0.455746473979565, "grad_norm": 198.7578125, "learning_rate": 6.690076379360767e-06, "loss": 9.2646, "step": 225610 }, { "epoch": 0.45576667461224885, "grad_norm": 788.7828979492188, "learning_rate": 6.689747855190319e-06, "loss": 34.4636, "step": 225620 }, { "epoch": 0.45578687524493267, "grad_norm": 541.375, "learning_rate": 6.689419322784245e-06, "loss": 23.271, "step": 225630 }, { "epoch": 0.4558070758776165, "grad_norm": 318.9990539550781, "learning_rate": 6.689090782144146e-06, "loss": 7.6663, "step": 225640 }, { "epoch": 0.4558272765103003, "grad_norm": 831.2885131835938, "learning_rate": 6.688762233271625e-06, "loss": 17.2152, "step": 225650 }, { "epoch": 0.4558474771429841, "grad_norm": 416.7984619140625, "learning_rate": 6.68843367616828e-06, "loss": 23.6669, "step": 225660 }, { "epoch": 0.45586767777566795, "grad_norm": 864.1460571289062, "learning_rate": 6.6881051108357146e-06, "loss": 20.5649, "step": 225670 }, { "epoch": 0.45588787840835177, "grad_norm": 290.2496643066406, "learning_rate": 6.68777653727553e-06, "loss": 25.4255, "step": 225680 }, { "epoch": 0.4559080790410356, "grad_norm": 147.8663787841797, "learning_rate": 6.687447955489326e-06, "loss": 21.681, "step": 225690 }, { "epoch": 0.4559282796737194, "grad_norm": 475.4737854003906, "learning_rate": 6.687119365478707e-06, "loss": 15.5616, "step": 225700 }, { "epoch": 0.45594848030640317, "grad_norm": 194.4939422607422, "learning_rate": 6.68679076724527e-06, "loss": 16.3132, "step": 225710 }, { "epoch": 0.455968680939087, "grad_norm": 410.2565612792969, "learning_rate": 6.686462160790623e-06, "loss": 17.4807, "step": 225720 }, { "epoch": 0.4559888815717708, "grad_norm": 287.0472106933594, "learning_rate": 6.686133546116363e-06, "loss": 18.3348, "step": 225730 }, { "epoch": 0.45600908220445463, "grad_norm": 69.48603820800781, "learning_rate": 6.685804923224091e-06, "loss": 11.986, "step": 225740 }, { "epoch": 0.45602928283713845, "grad_norm": 227.08763122558594, "learning_rate": 6.685476292115411e-06, "loss": 13.659, "step": 225750 }, { "epoch": 0.45604948346982227, "grad_norm": 780.8092041015625, "learning_rate": 6.6851476527919235e-06, "loss": 12.7602, "step": 225760 }, { "epoch": 0.4560696841025061, "grad_norm": 180.04122924804688, "learning_rate": 6.684819005255232e-06, "loss": 28.2892, "step": 225770 }, { "epoch": 0.4560898847351899, "grad_norm": 406.2068176269531, "learning_rate": 6.684490349506937e-06, "loss": 18.8708, "step": 225780 }, { "epoch": 0.45611008536787373, "grad_norm": 266.5179443359375, "learning_rate": 6.6841616855486395e-06, "loss": 18.4806, "step": 225790 }, { "epoch": 0.45613028600055755, "grad_norm": 265.0420837402344, "learning_rate": 6.683833013381942e-06, "loss": 15.0159, "step": 225800 }, { "epoch": 0.45615048663324137, "grad_norm": 418.10406494140625, "learning_rate": 6.683504333008448e-06, "loss": 20.7211, "step": 225810 }, { "epoch": 0.4561706872659252, "grad_norm": 191.4464111328125, "learning_rate": 6.683175644429756e-06, "loss": 23.1995, "step": 225820 }, { "epoch": 0.456190887898609, "grad_norm": 481.1829833984375, "learning_rate": 6.682846947647472e-06, "loss": 12.516, "step": 225830 }, { "epoch": 0.4562110885312928, "grad_norm": 218.54554748535156, "learning_rate": 6.682518242663195e-06, "loss": 13.5078, "step": 225840 }, { "epoch": 0.4562312891639766, "grad_norm": 173.78797912597656, "learning_rate": 6.682189529478528e-06, "loss": 16.2012, "step": 225850 }, { "epoch": 0.4562514897966604, "grad_norm": 214.61837768554688, "learning_rate": 6.681860808095074e-06, "loss": 23.1255, "step": 225860 }, { "epoch": 0.45627169042934423, "grad_norm": 347.9977722167969, "learning_rate": 6.681532078514434e-06, "loss": 21.1638, "step": 225870 }, { "epoch": 0.45629189106202805, "grad_norm": 7.614153861999512, "learning_rate": 6.681203340738212e-06, "loss": 20.4432, "step": 225880 }, { "epoch": 0.4563120916947119, "grad_norm": 414.4118957519531, "learning_rate": 6.680874594768006e-06, "loss": 24.8513, "step": 225890 }, { "epoch": 0.4563322923273957, "grad_norm": 158.5020751953125, "learning_rate": 6.680545840605423e-06, "loss": 33.5187, "step": 225900 }, { "epoch": 0.4563524929600795, "grad_norm": 163.49578857421875, "learning_rate": 6.680217078252063e-06, "loss": 13.3013, "step": 225910 }, { "epoch": 0.45637269359276333, "grad_norm": 293.5594177246094, "learning_rate": 6.6798883077095276e-06, "loss": 20.5032, "step": 225920 }, { "epoch": 0.45639289422544715, "grad_norm": 83.85323333740234, "learning_rate": 6.679559528979423e-06, "loss": 22.0982, "step": 225930 }, { "epoch": 0.456413094858131, "grad_norm": 240.20172119140625, "learning_rate": 6.679230742063347e-06, "loss": 17.4161, "step": 225940 }, { "epoch": 0.4564332954908148, "grad_norm": 174.1883087158203, "learning_rate": 6.6789019469629034e-06, "loss": 21.6571, "step": 225950 }, { "epoch": 0.4564534961234986, "grad_norm": 973.362060546875, "learning_rate": 6.678573143679696e-06, "loss": 36.2131, "step": 225960 }, { "epoch": 0.4564736967561824, "grad_norm": 686.3291625976562, "learning_rate": 6.678244332215329e-06, "loss": 30.4992, "step": 225970 }, { "epoch": 0.4564938973888662, "grad_norm": 289.88458251953125, "learning_rate": 6.677915512571399e-06, "loss": 14.0987, "step": 225980 }, { "epoch": 0.45651409802155, "grad_norm": 488.4683532714844, "learning_rate": 6.6775866847495155e-06, "loss": 18.7541, "step": 225990 }, { "epoch": 0.45653429865423384, "grad_norm": 99.57408142089844, "learning_rate": 6.677257848751276e-06, "loss": 19.9188, "step": 226000 }, { "epoch": 0.45655449928691766, "grad_norm": 648.0419311523438, "learning_rate": 6.676929004578286e-06, "loss": 21.1461, "step": 226010 }, { "epoch": 0.4565746999196015, "grad_norm": 0.0, "learning_rate": 6.676600152232147e-06, "loss": 16.4294, "step": 226020 }, { "epoch": 0.4565949005522853, "grad_norm": 207.34487915039062, "learning_rate": 6.676271291714461e-06, "loss": 23.4921, "step": 226030 }, { "epoch": 0.4566151011849691, "grad_norm": 346.6456604003906, "learning_rate": 6.675942423026834e-06, "loss": 15.8842, "step": 226040 }, { "epoch": 0.45663530181765294, "grad_norm": 436.8177185058594, "learning_rate": 6.675613546170866e-06, "loss": 21.8989, "step": 226050 }, { "epoch": 0.45665550245033676, "grad_norm": 538.6521606445312, "learning_rate": 6.675284661148162e-06, "loss": 17.5397, "step": 226060 }, { "epoch": 0.4566757030830206, "grad_norm": 66.8575668334961, "learning_rate": 6.6749557679603225e-06, "loss": 13.832, "step": 226070 }, { "epoch": 0.4566959037157044, "grad_norm": 128.47731018066406, "learning_rate": 6.674626866608951e-06, "loss": 11.4451, "step": 226080 }, { "epoch": 0.4567161043483882, "grad_norm": 2.318305492401123, "learning_rate": 6.674297957095652e-06, "loss": 16.7003, "step": 226090 }, { "epoch": 0.456736304981072, "grad_norm": 416.4245910644531, "learning_rate": 6.673969039422029e-06, "loss": 17.613, "step": 226100 }, { "epoch": 0.4567565056137558, "grad_norm": 287.7579650878906, "learning_rate": 6.673640113589683e-06, "loss": 15.6811, "step": 226110 }, { "epoch": 0.4567767062464396, "grad_norm": 382.2228698730469, "learning_rate": 6.673311179600218e-06, "loss": 29.0028, "step": 226120 }, { "epoch": 0.45679690687912344, "grad_norm": 479.06439208984375, "learning_rate": 6.672982237455238e-06, "loss": 13.5325, "step": 226130 }, { "epoch": 0.45681710751180726, "grad_norm": 260.0084228515625, "learning_rate": 6.672653287156345e-06, "loss": 26.1459, "step": 226140 }, { "epoch": 0.4568373081444911, "grad_norm": 348.92608642578125, "learning_rate": 6.672324328705142e-06, "loss": 22.2114, "step": 226150 }, { "epoch": 0.4568575087771749, "grad_norm": 94.51671600341797, "learning_rate": 6.671995362103233e-06, "loss": 15.3836, "step": 226160 }, { "epoch": 0.4568777094098587, "grad_norm": 555.9740600585938, "learning_rate": 6.671666387352223e-06, "loss": 34.1753, "step": 226170 }, { "epoch": 0.45689791004254254, "grad_norm": 384.97906494140625, "learning_rate": 6.671337404453713e-06, "loss": 19.9848, "step": 226180 }, { "epoch": 0.45691811067522636, "grad_norm": 335.6540832519531, "learning_rate": 6.671008413409306e-06, "loss": 17.0068, "step": 226190 }, { "epoch": 0.4569383113079102, "grad_norm": 301.8160095214844, "learning_rate": 6.6706794142206085e-06, "loss": 24.7602, "step": 226200 }, { "epoch": 0.456958511940594, "grad_norm": 229.21365356445312, "learning_rate": 6.67035040688922e-06, "loss": 27.9957, "step": 226210 }, { "epoch": 0.45697871257327777, "grad_norm": 280.8311767578125, "learning_rate": 6.6700213914167485e-06, "loss": 23.3231, "step": 226220 }, { "epoch": 0.4569989132059616, "grad_norm": 347.31689453125, "learning_rate": 6.669692367804795e-06, "loss": 25.1831, "step": 226230 }, { "epoch": 0.4570191138386454, "grad_norm": 244.92111206054688, "learning_rate": 6.6693633360549615e-06, "loss": 28.6454, "step": 226240 }, { "epoch": 0.4570393144713292, "grad_norm": 333.6777648925781, "learning_rate": 6.669034296168855e-06, "loss": 8.7296, "step": 226250 }, { "epoch": 0.45705951510401305, "grad_norm": 81.96710968017578, "learning_rate": 6.668705248148079e-06, "loss": 19.1242, "step": 226260 }, { "epoch": 0.45707971573669687, "grad_norm": 345.7232971191406, "learning_rate": 6.668376191994234e-06, "loss": 25.2577, "step": 226270 }, { "epoch": 0.4570999163693807, "grad_norm": 52.651824951171875, "learning_rate": 6.668047127708927e-06, "loss": 22.1624, "step": 226280 }, { "epoch": 0.4571201170020645, "grad_norm": 126.20040130615234, "learning_rate": 6.667718055293759e-06, "loss": 12.6948, "step": 226290 }, { "epoch": 0.4571403176347483, "grad_norm": 291.4485168457031, "learning_rate": 6.6673889747503364e-06, "loss": 17.8082, "step": 226300 }, { "epoch": 0.45716051826743215, "grad_norm": 219.04345703125, "learning_rate": 6.667059886080263e-06, "loss": 17.0722, "step": 226310 }, { "epoch": 0.45718071890011597, "grad_norm": 113.95182800292969, "learning_rate": 6.66673078928514e-06, "loss": 12.1961, "step": 226320 }, { "epoch": 0.4572009195327998, "grad_norm": 265.8270263671875, "learning_rate": 6.666401684366575e-06, "loss": 17.3321, "step": 226330 }, { "epoch": 0.4572211201654836, "grad_norm": 227.69906616210938, "learning_rate": 6.66607257132617e-06, "loss": 23.0834, "step": 226340 }, { "epoch": 0.45724132079816737, "grad_norm": 1332.5640869140625, "learning_rate": 6.665743450165528e-06, "loss": 15.2803, "step": 226350 }, { "epoch": 0.4572615214308512, "grad_norm": 169.945068359375, "learning_rate": 6.665414320886256e-06, "loss": 10.8399, "step": 226360 }, { "epoch": 0.457281722063535, "grad_norm": 338.2686767578125, "learning_rate": 6.665085183489955e-06, "loss": 17.4794, "step": 226370 }, { "epoch": 0.45730192269621883, "grad_norm": 519.5269165039062, "learning_rate": 6.664756037978233e-06, "loss": 18.372, "step": 226380 }, { "epoch": 0.45732212332890265, "grad_norm": 244.2911834716797, "learning_rate": 6.664426884352691e-06, "loss": 29.1169, "step": 226390 }, { "epoch": 0.45734232396158647, "grad_norm": 121.04158020019531, "learning_rate": 6.664097722614934e-06, "loss": 9.1461, "step": 226400 }, { "epoch": 0.4573625245942703, "grad_norm": 344.4649658203125, "learning_rate": 6.663768552766566e-06, "loss": 28.1238, "step": 226410 }, { "epoch": 0.4573827252269541, "grad_norm": 103.74214935302734, "learning_rate": 6.663439374809194e-06, "loss": 27.5155, "step": 226420 }, { "epoch": 0.45740292585963793, "grad_norm": 228.40907287597656, "learning_rate": 6.663110188744417e-06, "loss": 13.6265, "step": 226430 }, { "epoch": 0.45742312649232175, "grad_norm": 413.7095031738281, "learning_rate": 6.662780994573846e-06, "loss": 19.2703, "step": 226440 }, { "epoch": 0.45744332712500557, "grad_norm": 440.5522155761719, "learning_rate": 6.6624517922990795e-06, "loss": 23.8339, "step": 226450 }, { "epoch": 0.4574635277576894, "grad_norm": 266.2892150878906, "learning_rate": 6.662122581921726e-06, "loss": 22.5644, "step": 226460 }, { "epoch": 0.4574837283903732, "grad_norm": 248.64434814453125, "learning_rate": 6.661793363443389e-06, "loss": 23.6745, "step": 226470 }, { "epoch": 0.457503929023057, "grad_norm": 255.54281616210938, "learning_rate": 6.661464136865671e-06, "loss": 13.6421, "step": 226480 }, { "epoch": 0.4575241296557408, "grad_norm": 304.6407165527344, "learning_rate": 6.6611349021901795e-06, "loss": 16.9253, "step": 226490 }, { "epoch": 0.4575443302884246, "grad_norm": 328.55096435546875, "learning_rate": 6.6608056594185166e-06, "loss": 22.0863, "step": 226500 }, { "epoch": 0.45756453092110844, "grad_norm": 218.3176727294922, "learning_rate": 6.66047640855229e-06, "loss": 15.4515, "step": 226510 }, { "epoch": 0.45758473155379226, "grad_norm": 162.76776123046875, "learning_rate": 6.660147149593102e-06, "loss": 20.768, "step": 226520 }, { "epoch": 0.4576049321864761, "grad_norm": 104.9988784790039, "learning_rate": 6.659817882542559e-06, "loss": 25.5314, "step": 226530 }, { "epoch": 0.4576251328191599, "grad_norm": 481.9990234375, "learning_rate": 6.659488607402265e-06, "loss": 36.4249, "step": 226540 }, { "epoch": 0.4576453334518437, "grad_norm": 16.79974937438965, "learning_rate": 6.659159324173823e-06, "loss": 12.9143, "step": 226550 }, { "epoch": 0.45766553408452754, "grad_norm": 148.8212127685547, "learning_rate": 6.658830032858841e-06, "loss": 11.3387, "step": 226560 }, { "epoch": 0.45768573471721136, "grad_norm": 356.48150634765625, "learning_rate": 6.658500733458922e-06, "loss": 22.0419, "step": 226570 }, { "epoch": 0.4577059353498952, "grad_norm": 496.0290832519531, "learning_rate": 6.658171425975673e-06, "loss": 18.5596, "step": 226580 }, { "epoch": 0.457726135982579, "grad_norm": 327.84423828125, "learning_rate": 6.657842110410695e-06, "loss": 26.5507, "step": 226590 }, { "epoch": 0.4577463366152628, "grad_norm": 229.62820434570312, "learning_rate": 6.657512786765599e-06, "loss": 7.1062, "step": 226600 }, { "epoch": 0.4577665372479466, "grad_norm": 198.07577514648438, "learning_rate": 6.657183455041984e-06, "loss": 14.0675, "step": 226610 }, { "epoch": 0.4577867378806304, "grad_norm": 285.07318115234375, "learning_rate": 6.656854115241458e-06, "loss": 14.2565, "step": 226620 }, { "epoch": 0.4578069385133142, "grad_norm": 287.5888977050781, "learning_rate": 6.656524767365629e-06, "loss": 17.726, "step": 226630 }, { "epoch": 0.45782713914599804, "grad_norm": 230.5944061279297, "learning_rate": 6.656195411416094e-06, "loss": 9.4399, "step": 226640 }, { "epoch": 0.45784733977868186, "grad_norm": 158.69908142089844, "learning_rate": 6.655866047394468e-06, "loss": 14.9457, "step": 226650 }, { "epoch": 0.4578675404113657, "grad_norm": 719.2948608398438, "learning_rate": 6.655536675302349e-06, "loss": 27.1728, "step": 226660 }, { "epoch": 0.4578877410440495, "grad_norm": 247.87844848632812, "learning_rate": 6.655207295141346e-06, "loss": 12.6691, "step": 226670 }, { "epoch": 0.4579079416767333, "grad_norm": 158.5869598388672, "learning_rate": 6.654877906913064e-06, "loss": 17.078, "step": 226680 }, { "epoch": 0.45792814230941714, "grad_norm": 377.8545837402344, "learning_rate": 6.654548510619108e-06, "loss": 24.9561, "step": 226690 }, { "epoch": 0.45794834294210096, "grad_norm": 185.11288452148438, "learning_rate": 6.654219106261082e-06, "loss": 21.7835, "step": 226700 }, { "epoch": 0.4579685435747848, "grad_norm": 73.58160400390625, "learning_rate": 6.6538896938405935e-06, "loss": 8.6921, "step": 226710 }, { "epoch": 0.4579887442074686, "grad_norm": 332.86737060546875, "learning_rate": 6.6535602733592465e-06, "loss": 23.5554, "step": 226720 }, { "epoch": 0.4580089448401524, "grad_norm": 443.82794189453125, "learning_rate": 6.653230844818648e-06, "loss": 15.9618, "step": 226730 }, { "epoch": 0.4580291454728362, "grad_norm": 135.87033081054688, "learning_rate": 6.6529014082204025e-06, "loss": 14.2608, "step": 226740 }, { "epoch": 0.45804934610552, "grad_norm": 197.55747985839844, "learning_rate": 6.652571963566116e-06, "loss": 12.8044, "step": 226750 }, { "epoch": 0.4580695467382038, "grad_norm": 475.3341064453125, "learning_rate": 6.652242510857395e-06, "loss": 25.1584, "step": 226760 }, { "epoch": 0.45808974737088765, "grad_norm": 386.4275207519531, "learning_rate": 6.651913050095842e-06, "loss": 13.2427, "step": 226770 }, { "epoch": 0.45810994800357147, "grad_norm": 334.7086181640625, "learning_rate": 6.651583581283068e-06, "loss": 21.5372, "step": 226780 }, { "epoch": 0.4581301486362553, "grad_norm": 63.42613220214844, "learning_rate": 6.651254104420674e-06, "loss": 14.5631, "step": 226790 }, { "epoch": 0.4581503492689391, "grad_norm": 167.09121704101562, "learning_rate": 6.6509246195102685e-06, "loss": 12.8191, "step": 226800 }, { "epoch": 0.4581705499016229, "grad_norm": 223.9907989501953, "learning_rate": 6.650595126553459e-06, "loss": 16.8672, "step": 226810 }, { "epoch": 0.45819075053430675, "grad_norm": 11.49698543548584, "learning_rate": 6.6502656255518435e-06, "loss": 25.5174, "step": 226820 }, { "epoch": 0.45821095116699057, "grad_norm": 192.18809509277344, "learning_rate": 6.649936116507039e-06, "loss": 24.7205, "step": 226830 }, { "epoch": 0.4582311517996744, "grad_norm": 372.0012512207031, "learning_rate": 6.649606599420643e-06, "loss": 33.7507, "step": 226840 }, { "epoch": 0.4582513524323582, "grad_norm": 105.03313446044922, "learning_rate": 6.649277074294265e-06, "loss": 23.9811, "step": 226850 }, { "epoch": 0.45827155306504197, "grad_norm": 205.1587371826172, "learning_rate": 6.648947541129511e-06, "loss": 26.871, "step": 226860 }, { "epoch": 0.4582917536977258, "grad_norm": 356.0289611816406, "learning_rate": 6.648617999927986e-06, "loss": 19.0391, "step": 226870 }, { "epoch": 0.4583119543304096, "grad_norm": 232.06100463867188, "learning_rate": 6.648288450691298e-06, "loss": 21.4458, "step": 226880 }, { "epoch": 0.45833215496309343, "grad_norm": 1016.0369873046875, "learning_rate": 6.647958893421051e-06, "loss": 22.9529, "step": 226890 }, { "epoch": 0.45835235559577725, "grad_norm": 283.07122802734375, "learning_rate": 6.647629328118852e-06, "loss": 20.6425, "step": 226900 }, { "epoch": 0.45837255622846107, "grad_norm": 363.2906799316406, "learning_rate": 6.647299754786308e-06, "loss": 18.6909, "step": 226910 }, { "epoch": 0.4583927568611449, "grad_norm": 600.4022827148438, "learning_rate": 6.646970173425026e-06, "loss": 29.6246, "step": 226920 }, { "epoch": 0.4584129574938287, "grad_norm": 191.6896514892578, "learning_rate": 6.646640584036609e-06, "loss": 25.1372, "step": 226930 }, { "epoch": 0.45843315812651253, "grad_norm": 583.3280029296875, "learning_rate": 6.6463109866226675e-06, "loss": 10.4451, "step": 226940 }, { "epoch": 0.45845335875919635, "grad_norm": 534.2842407226562, "learning_rate": 6.645981381184804e-06, "loss": 23.4692, "step": 226950 }, { "epoch": 0.45847355939188017, "grad_norm": 314.9391174316406, "learning_rate": 6.645651767724628e-06, "loss": 15.0556, "step": 226960 }, { "epoch": 0.458493760024564, "grad_norm": 195.89559936523438, "learning_rate": 6.645322146243744e-06, "loss": 16.1612, "step": 226970 }, { "epoch": 0.4585139606572478, "grad_norm": 182.68710327148438, "learning_rate": 6.6449925167437604e-06, "loss": 16.4006, "step": 226980 }, { "epoch": 0.4585341612899316, "grad_norm": 366.4389343261719, "learning_rate": 6.644662879226282e-06, "loss": 26.3614, "step": 226990 }, { "epoch": 0.4585543619226154, "grad_norm": 83.33714294433594, "learning_rate": 6.644333233692917e-06, "loss": 8.6848, "step": 227000 }, { "epoch": 0.4585745625552992, "grad_norm": 218.73532104492188, "learning_rate": 6.6440035801452705e-06, "loss": 23.6291, "step": 227010 }, { "epoch": 0.45859476318798303, "grad_norm": 388.358642578125, "learning_rate": 6.643673918584951e-06, "loss": 15.1297, "step": 227020 }, { "epoch": 0.45861496382066685, "grad_norm": 428.1208190917969, "learning_rate": 6.643344249013562e-06, "loss": 26.8207, "step": 227030 }, { "epoch": 0.4586351644533507, "grad_norm": 642.8238525390625, "learning_rate": 6.643014571432715e-06, "loss": 26.6385, "step": 227040 }, { "epoch": 0.4586553650860345, "grad_norm": 338.2110290527344, "learning_rate": 6.642684885844013e-06, "loss": 17.6366, "step": 227050 }, { "epoch": 0.4586755657187183, "grad_norm": 350.84722900390625, "learning_rate": 6.642355192249065e-06, "loss": 15.0428, "step": 227060 }, { "epoch": 0.45869576635140213, "grad_norm": 392.5416259765625, "learning_rate": 6.642025490649475e-06, "loss": 19.7691, "step": 227070 }, { "epoch": 0.45871596698408595, "grad_norm": 351.155029296875, "learning_rate": 6.6416957810468555e-06, "loss": 22.3445, "step": 227080 }, { "epoch": 0.4587361676167698, "grad_norm": 347.0136413574219, "learning_rate": 6.641366063442806e-06, "loss": 9.8688, "step": 227090 }, { "epoch": 0.4587563682494536, "grad_norm": 294.9590148925781, "learning_rate": 6.64103633783894e-06, "loss": 15.0368, "step": 227100 }, { "epoch": 0.4587765688821374, "grad_norm": 159.06092834472656, "learning_rate": 6.64070660423686e-06, "loss": 9.0753, "step": 227110 }, { "epoch": 0.4587967695148212, "grad_norm": 333.8312072753906, "learning_rate": 6.640376862638176e-06, "loss": 22.3037, "step": 227120 }, { "epoch": 0.458816970147505, "grad_norm": 299.9660339355469, "learning_rate": 6.640047113044493e-06, "loss": 18.6726, "step": 227130 }, { "epoch": 0.4588371707801888, "grad_norm": 218.05218505859375, "learning_rate": 6.63971735545742e-06, "loss": 26.9604, "step": 227140 }, { "epoch": 0.45885737141287264, "grad_norm": 169.38882446289062, "learning_rate": 6.6393875898785655e-06, "loss": 21.4799, "step": 227150 }, { "epoch": 0.45887757204555646, "grad_norm": 279.3795471191406, "learning_rate": 6.639057816309532e-06, "loss": 13.5468, "step": 227160 }, { "epoch": 0.4588977726782403, "grad_norm": 180.2625732421875, "learning_rate": 6.638728034751931e-06, "loss": 11.9784, "step": 227170 }, { "epoch": 0.4589179733109241, "grad_norm": 237.71791076660156, "learning_rate": 6.638398245207367e-06, "loss": 11.7803, "step": 227180 }, { "epoch": 0.4589381739436079, "grad_norm": 333.2401428222656, "learning_rate": 6.638068447677449e-06, "loss": 27.6538, "step": 227190 }, { "epoch": 0.45895837457629174, "grad_norm": 408.5473937988281, "learning_rate": 6.637738642163785e-06, "loss": 24.1387, "step": 227200 }, { "epoch": 0.45897857520897556, "grad_norm": 261.7376403808594, "learning_rate": 6.637408828667982e-06, "loss": 19.2426, "step": 227210 }, { "epoch": 0.4589987758416594, "grad_norm": 118.11548614501953, "learning_rate": 6.6370790071916456e-06, "loss": 16.0418, "step": 227220 }, { "epoch": 0.4590189764743432, "grad_norm": 325.09600830078125, "learning_rate": 6.6367491777363845e-06, "loss": 19.797, "step": 227230 }, { "epoch": 0.459039177107027, "grad_norm": 239.65774536132812, "learning_rate": 6.636419340303808e-06, "loss": 33.2, "step": 227240 }, { "epoch": 0.4590593777397108, "grad_norm": 486.3817138671875, "learning_rate": 6.63608949489552e-06, "loss": 21.7641, "step": 227250 }, { "epoch": 0.4590795783723946, "grad_norm": 35.438438415527344, "learning_rate": 6.635759641513132e-06, "loss": 21.6538, "step": 227260 }, { "epoch": 0.4590997790050784, "grad_norm": 346.75775146484375, "learning_rate": 6.635429780158248e-06, "loss": 8.6638, "step": 227270 }, { "epoch": 0.45911997963776224, "grad_norm": 437.05755615234375, "learning_rate": 6.63509991083248e-06, "loss": 15.6752, "step": 227280 }, { "epoch": 0.45914018027044606, "grad_norm": 327.460693359375, "learning_rate": 6.634770033537432e-06, "loss": 38.2078, "step": 227290 }, { "epoch": 0.4591603809031299, "grad_norm": 308.4761047363281, "learning_rate": 6.634440148274712e-06, "loss": 15.9745, "step": 227300 }, { "epoch": 0.4591805815358137, "grad_norm": 547.767333984375, "learning_rate": 6.634110255045931e-06, "loss": 14.2569, "step": 227310 }, { "epoch": 0.4592007821684975, "grad_norm": 248.87222290039062, "learning_rate": 6.633780353852695e-06, "loss": 28.5499, "step": 227320 }, { "epoch": 0.45922098280118134, "grad_norm": 681.9942016601562, "learning_rate": 6.6334504446966095e-06, "loss": 25.683, "step": 227330 }, { "epoch": 0.45924118343386516, "grad_norm": 213.78553771972656, "learning_rate": 6.633120527579286e-06, "loss": 14.3718, "step": 227340 }, { "epoch": 0.459261384066549, "grad_norm": 390.4425048828125, "learning_rate": 6.632790602502331e-06, "loss": 27.1658, "step": 227350 }, { "epoch": 0.4592815846992328, "grad_norm": 363.4864196777344, "learning_rate": 6.632460669467353e-06, "loss": 9.3133, "step": 227360 }, { "epoch": 0.4593017853319166, "grad_norm": 380.4106140136719, "learning_rate": 6.632130728475961e-06, "loss": 15.0501, "step": 227370 }, { "epoch": 0.4593219859646004, "grad_norm": 315.6197204589844, "learning_rate": 6.631800779529759e-06, "loss": 13.3533, "step": 227380 }, { "epoch": 0.4593421865972842, "grad_norm": 127.8883285522461, "learning_rate": 6.6314708226303596e-06, "loss": 10.7667, "step": 227390 }, { "epoch": 0.459362387229968, "grad_norm": 137.3162841796875, "learning_rate": 6.631140857779368e-06, "loss": 15.3496, "step": 227400 }, { "epoch": 0.45938258786265185, "grad_norm": 626.1858520507812, "learning_rate": 6.6308108849783936e-06, "loss": 16.3085, "step": 227410 }, { "epoch": 0.45940278849533567, "grad_norm": 753.2288818359375, "learning_rate": 6.630480904229047e-06, "loss": 22.7463, "step": 227420 }, { "epoch": 0.4594229891280195, "grad_norm": 604.959228515625, "learning_rate": 6.6301509155329315e-06, "loss": 18.8315, "step": 227430 }, { "epoch": 0.4594431897607033, "grad_norm": 243.20101928710938, "learning_rate": 6.629820918891661e-06, "loss": 21.633, "step": 227440 }, { "epoch": 0.4594633903933871, "grad_norm": 269.200927734375, "learning_rate": 6.629490914306839e-06, "loss": 16.5841, "step": 227450 }, { "epoch": 0.45948359102607095, "grad_norm": 270.2914123535156, "learning_rate": 6.629160901780076e-06, "loss": 28.3109, "step": 227460 }, { "epoch": 0.45950379165875477, "grad_norm": 211.80767822265625, "learning_rate": 6.62883088131298e-06, "loss": 14.9958, "step": 227470 }, { "epoch": 0.4595239922914386, "grad_norm": 350.62579345703125, "learning_rate": 6.6285008529071615e-06, "loss": 17.9251, "step": 227480 }, { "epoch": 0.4595441929241224, "grad_norm": 322.35968017578125, "learning_rate": 6.628170816564227e-06, "loss": 20.4497, "step": 227490 }, { "epoch": 0.45956439355680617, "grad_norm": 191.37425231933594, "learning_rate": 6.627840772285784e-06, "loss": 18.0551, "step": 227500 }, { "epoch": 0.45958459418949, "grad_norm": 131.91758728027344, "learning_rate": 6.627510720073443e-06, "loss": 20.0704, "step": 227510 }, { "epoch": 0.4596047948221738, "grad_norm": 1166.66259765625, "learning_rate": 6.627180659928812e-06, "loss": 19.8152, "step": 227520 }, { "epoch": 0.45962499545485763, "grad_norm": 329.0010681152344, "learning_rate": 6.626850591853502e-06, "loss": 32.4321, "step": 227530 }, { "epoch": 0.45964519608754145, "grad_norm": 338.1702575683594, "learning_rate": 6.626520515849117e-06, "loss": 26.5962, "step": 227540 }, { "epoch": 0.45966539672022527, "grad_norm": 284.8745422363281, "learning_rate": 6.62619043191727e-06, "loss": 27.5928, "step": 227550 }, { "epoch": 0.4596855973529091, "grad_norm": 126.03436279296875, "learning_rate": 6.625860340059567e-06, "loss": 7.972, "step": 227560 }, { "epoch": 0.4597057979855929, "grad_norm": 321.208740234375, "learning_rate": 6.6255302402776175e-06, "loss": 19.8323, "step": 227570 }, { "epoch": 0.45972599861827673, "grad_norm": 45.02515411376953, "learning_rate": 6.625200132573032e-06, "loss": 11.3566, "step": 227580 }, { "epoch": 0.45974619925096055, "grad_norm": 138.9471893310547, "learning_rate": 6.624870016947417e-06, "loss": 16.2488, "step": 227590 }, { "epoch": 0.45976639988364437, "grad_norm": 55.459293365478516, "learning_rate": 6.624539893402383e-06, "loss": 17.0183, "step": 227600 }, { "epoch": 0.4597866005163282, "grad_norm": 468.6944274902344, "learning_rate": 6.624209761939539e-06, "loss": 11.6947, "step": 227610 }, { "epoch": 0.459806801149012, "grad_norm": 167.68031311035156, "learning_rate": 6.623879622560493e-06, "loss": 16.2934, "step": 227620 }, { "epoch": 0.4598270017816958, "grad_norm": 299.453369140625, "learning_rate": 6.623549475266855e-06, "loss": 25.7723, "step": 227630 }, { "epoch": 0.4598472024143796, "grad_norm": 22.283979415893555, "learning_rate": 6.6232193200602335e-06, "loss": 14.1899, "step": 227640 }, { "epoch": 0.4598674030470634, "grad_norm": 138.76657104492188, "learning_rate": 6.622889156942239e-06, "loss": 7.8887, "step": 227650 }, { "epoch": 0.45988760367974724, "grad_norm": 255.18679809570312, "learning_rate": 6.622558985914478e-06, "loss": 48.1512, "step": 227660 }, { "epoch": 0.45990780431243106, "grad_norm": 220.19912719726562, "learning_rate": 6.622228806978562e-06, "loss": 24.0865, "step": 227670 }, { "epoch": 0.4599280049451149, "grad_norm": 177.35691833496094, "learning_rate": 6.6218986201361e-06, "loss": 11.2377, "step": 227680 }, { "epoch": 0.4599482055777987, "grad_norm": 273.243408203125, "learning_rate": 6.621568425388701e-06, "loss": 16.2629, "step": 227690 }, { "epoch": 0.4599684062104825, "grad_norm": 76.89684295654297, "learning_rate": 6.6212382227379726e-06, "loss": 25.8669, "step": 227700 }, { "epoch": 0.45998860684316634, "grad_norm": 344.5767822265625, "learning_rate": 6.620908012185528e-06, "loss": 12.1878, "step": 227710 }, { "epoch": 0.46000880747585016, "grad_norm": 211.1240692138672, "learning_rate": 6.6205777937329715e-06, "loss": 16.7005, "step": 227720 }, { "epoch": 0.460029008108534, "grad_norm": 180.15206909179688, "learning_rate": 6.620247567381918e-06, "loss": 34.1538, "step": 227730 }, { "epoch": 0.4600492087412178, "grad_norm": 348.8111877441406, "learning_rate": 6.619917333133973e-06, "loss": 30.5487, "step": 227740 }, { "epoch": 0.4600694093739016, "grad_norm": 141.19766235351562, "learning_rate": 6.619587090990748e-06, "loss": 15.1992, "step": 227750 }, { "epoch": 0.4600896100065854, "grad_norm": 432.17877197265625, "learning_rate": 6.619256840953852e-06, "loss": 30.8324, "step": 227760 }, { "epoch": 0.4601098106392692, "grad_norm": 279.0324401855469, "learning_rate": 6.618926583024894e-06, "loss": 23.2876, "step": 227770 }, { "epoch": 0.460130011271953, "grad_norm": 0.0, "learning_rate": 6.618596317205485e-06, "loss": 9.6582, "step": 227780 }, { "epoch": 0.46015021190463684, "grad_norm": 236.59719848632812, "learning_rate": 6.6182660434972325e-06, "loss": 24.8127, "step": 227790 }, { "epoch": 0.46017041253732066, "grad_norm": 829.5487670898438, "learning_rate": 6.617935761901748e-06, "loss": 31.8741, "step": 227800 }, { "epoch": 0.4601906131700045, "grad_norm": 109.32231903076172, "learning_rate": 6.61760547242064e-06, "loss": 37.5185, "step": 227810 }, { "epoch": 0.4602108138026883, "grad_norm": 265.7324523925781, "learning_rate": 6.617275175055522e-06, "loss": 22.6961, "step": 227820 }, { "epoch": 0.4602310144353721, "grad_norm": 154.08119201660156, "learning_rate": 6.616944869807999e-06, "loss": 46.0759, "step": 227830 }, { "epoch": 0.46025121506805594, "grad_norm": 181.0331268310547, "learning_rate": 6.616614556679684e-06, "loss": 23.7704, "step": 227840 }, { "epoch": 0.46027141570073976, "grad_norm": 184.7841033935547, "learning_rate": 6.616284235672184e-06, "loss": 10.1114, "step": 227850 }, { "epoch": 0.4602916163334236, "grad_norm": 364.95343017578125, "learning_rate": 6.6159539067871114e-06, "loss": 23.2904, "step": 227860 }, { "epoch": 0.4603118169661074, "grad_norm": 59.2686882019043, "learning_rate": 6.615623570026076e-06, "loss": 14.5997, "step": 227870 }, { "epoch": 0.4603320175987912, "grad_norm": 549.2871704101562, "learning_rate": 6.615293225390686e-06, "loss": 28.7566, "step": 227880 }, { "epoch": 0.460352218231475, "grad_norm": 114.10447692871094, "learning_rate": 6.6149628728825535e-06, "loss": 17.2111, "step": 227890 }, { "epoch": 0.4603724188641588, "grad_norm": 49.34844207763672, "learning_rate": 6.614632512503289e-06, "loss": 19.877, "step": 227900 }, { "epoch": 0.4603926194968426, "grad_norm": 870.67041015625, "learning_rate": 6.614302144254498e-06, "loss": 19.3938, "step": 227910 }, { "epoch": 0.46041282012952645, "grad_norm": 342.0787353515625, "learning_rate": 6.613971768137799e-06, "loss": 20.7181, "step": 227920 }, { "epoch": 0.46043302076221027, "grad_norm": 409.73443603515625, "learning_rate": 6.613641384154794e-06, "loss": 12.7583, "step": 227930 }, { "epoch": 0.4604532213948941, "grad_norm": 155.79214477539062, "learning_rate": 6.613310992307097e-06, "loss": 22.2163, "step": 227940 }, { "epoch": 0.4604734220275779, "grad_norm": 655.9906616210938, "learning_rate": 6.612980592596319e-06, "loss": 18.7296, "step": 227950 }, { "epoch": 0.4604936226602617, "grad_norm": 214.17333984375, "learning_rate": 6.612650185024068e-06, "loss": 19.972, "step": 227960 }, { "epoch": 0.46051382329294555, "grad_norm": 259.0454406738281, "learning_rate": 6.612319769591955e-06, "loss": 21.5223, "step": 227970 }, { "epoch": 0.46053402392562937, "grad_norm": 290.9821472167969, "learning_rate": 6.611989346301594e-06, "loss": 17.6434, "step": 227980 }, { "epoch": 0.4605542245583132, "grad_norm": 14.05604362487793, "learning_rate": 6.611658915154589e-06, "loss": 26.8113, "step": 227990 }, { "epoch": 0.460574425190997, "grad_norm": 286.3800964355469, "learning_rate": 6.611328476152557e-06, "loss": 20.8173, "step": 228000 }, { "epoch": 0.46059462582368077, "grad_norm": 424.48883056640625, "learning_rate": 6.610998029297103e-06, "loss": 16.3378, "step": 228010 }, { "epoch": 0.4606148264563646, "grad_norm": 366.1694641113281, "learning_rate": 6.610667574589841e-06, "loss": 27.9622, "step": 228020 }, { "epoch": 0.4606350270890484, "grad_norm": 218.50047302246094, "learning_rate": 6.610337112032381e-06, "loss": 23.0491, "step": 228030 }, { "epoch": 0.46065522772173223, "grad_norm": 22.16668128967285, "learning_rate": 6.610006641626332e-06, "loss": 10.3642, "step": 228040 }, { "epoch": 0.46067542835441605, "grad_norm": 32.618160247802734, "learning_rate": 6.6096761633733065e-06, "loss": 26.2385, "step": 228050 }, { "epoch": 0.46069562898709987, "grad_norm": 196.96116638183594, "learning_rate": 6.6093456772749155e-06, "loss": 14.9902, "step": 228060 }, { "epoch": 0.4607158296197837, "grad_norm": 234.50086975097656, "learning_rate": 6.609015183332767e-06, "loss": 15.5364, "step": 228070 }, { "epoch": 0.4607360302524675, "grad_norm": 225.96482849121094, "learning_rate": 6.608684681548475e-06, "loss": 28.2385, "step": 228080 }, { "epoch": 0.46075623088515133, "grad_norm": 230.64759826660156, "learning_rate": 6.608354171923649e-06, "loss": 54.7954, "step": 228090 }, { "epoch": 0.46077643151783515, "grad_norm": 258.3045654296875, "learning_rate": 6.6080236544599e-06, "loss": 31.4333, "step": 228100 }, { "epoch": 0.46079663215051897, "grad_norm": 217.06385803222656, "learning_rate": 6.6076931291588375e-06, "loss": 21.0817, "step": 228110 }, { "epoch": 0.4608168327832028, "grad_norm": 446.5308532714844, "learning_rate": 6.607362596022074e-06, "loss": 18.0173, "step": 228120 }, { "epoch": 0.4608370334158866, "grad_norm": 329.2787170410156, "learning_rate": 6.607032055051221e-06, "loss": 12.2737, "step": 228130 }, { "epoch": 0.4608572340485704, "grad_norm": 99.86527252197266, "learning_rate": 6.606701506247889e-06, "loss": 6.9131, "step": 228140 }, { "epoch": 0.4608774346812542, "grad_norm": 370.82061767578125, "learning_rate": 6.606370949613688e-06, "loss": 11.2179, "step": 228150 }, { "epoch": 0.460897635313938, "grad_norm": 371.99176025390625, "learning_rate": 6.60604038515023e-06, "loss": 23.2927, "step": 228160 }, { "epoch": 0.46091783594662183, "grad_norm": 325.6578063964844, "learning_rate": 6.605709812859126e-06, "loss": 13.6131, "step": 228170 }, { "epoch": 0.46093803657930565, "grad_norm": 378.3265075683594, "learning_rate": 6.605379232741986e-06, "loss": 11.8274, "step": 228180 }, { "epoch": 0.4609582372119895, "grad_norm": 210.8937530517578, "learning_rate": 6.605048644800425e-06, "loss": 10.8227, "step": 228190 }, { "epoch": 0.4609784378446733, "grad_norm": 430.89459228515625, "learning_rate": 6.604718049036047e-06, "loss": 8.5923, "step": 228200 }, { "epoch": 0.4609986384773571, "grad_norm": 127.435302734375, "learning_rate": 6.604387445450472e-06, "loss": 8.1539, "step": 228210 }, { "epoch": 0.46101883911004093, "grad_norm": 288.4720153808594, "learning_rate": 6.604056834045306e-06, "loss": 22.6267, "step": 228220 }, { "epoch": 0.46103903974272475, "grad_norm": 310.196044921875, "learning_rate": 6.603726214822161e-06, "loss": 26.4848, "step": 228230 }, { "epoch": 0.4610592403754086, "grad_norm": 432.36968994140625, "learning_rate": 6.6033955877826495e-06, "loss": 14.9829, "step": 228240 }, { "epoch": 0.4610794410080924, "grad_norm": 186.742431640625, "learning_rate": 6.603064952928382e-06, "loss": 32.4182, "step": 228250 }, { "epoch": 0.4610996416407762, "grad_norm": 611.9507446289062, "learning_rate": 6.6027343102609705e-06, "loss": 17.5714, "step": 228260 }, { "epoch": 0.46111984227346, "grad_norm": 276.9157409667969, "learning_rate": 6.602403659782026e-06, "loss": 19.98, "step": 228270 }, { "epoch": 0.4611400429061438, "grad_norm": 259.6354675292969, "learning_rate": 6.602073001493161e-06, "loss": 13.9635, "step": 228280 }, { "epoch": 0.4611602435388276, "grad_norm": 255.94044494628906, "learning_rate": 6.601742335395987e-06, "loss": 15.2189, "step": 228290 }, { "epoch": 0.46118044417151144, "grad_norm": 383.42047119140625, "learning_rate": 6.601411661492114e-06, "loss": 18.75, "step": 228300 }, { "epoch": 0.46120064480419526, "grad_norm": 111.80281829833984, "learning_rate": 6.601080979783155e-06, "loss": 11.0807, "step": 228310 }, { "epoch": 0.4612208454368791, "grad_norm": 258.4835510253906, "learning_rate": 6.600750290270722e-06, "loss": 32.2027, "step": 228320 }, { "epoch": 0.4612410460695629, "grad_norm": 139.2156219482422, "learning_rate": 6.600419592956427e-06, "loss": 28.694, "step": 228330 }, { "epoch": 0.4612612467022467, "grad_norm": 14.027462005615234, "learning_rate": 6.600088887841879e-06, "loss": 15.9947, "step": 228340 }, { "epoch": 0.46128144733493054, "grad_norm": 1821.9947509765625, "learning_rate": 6.599758174928692e-06, "loss": 32.0721, "step": 228350 }, { "epoch": 0.46130164796761436, "grad_norm": 251.80593872070312, "learning_rate": 6.599427454218479e-06, "loss": 21.9489, "step": 228360 }, { "epoch": 0.4613218486002982, "grad_norm": 665.418701171875, "learning_rate": 6.59909672571285e-06, "loss": 49.0444, "step": 228370 }, { "epoch": 0.461342049232982, "grad_norm": 96.88481903076172, "learning_rate": 6.598765989413419e-06, "loss": 10.3644, "step": 228380 }, { "epoch": 0.4613622498656658, "grad_norm": 143.07235717773438, "learning_rate": 6.598435245321794e-06, "loss": 35.6728, "step": 228390 }, { "epoch": 0.4613824504983496, "grad_norm": 972.9696044921875, "learning_rate": 6.59810449343959e-06, "loss": 30.8346, "step": 228400 }, { "epoch": 0.4614026511310334, "grad_norm": 266.4164733886719, "learning_rate": 6.597773733768419e-06, "loss": 9.0508, "step": 228410 }, { "epoch": 0.4614228517637172, "grad_norm": 271.0151062011719, "learning_rate": 6.597442966309893e-06, "loss": 17.3578, "step": 228420 }, { "epoch": 0.46144305239640104, "grad_norm": 369.71649169921875, "learning_rate": 6.5971121910656245e-06, "loss": 15.5552, "step": 228430 }, { "epoch": 0.46146325302908486, "grad_norm": 236.3412322998047, "learning_rate": 6.5967814080372224e-06, "loss": 23.7095, "step": 228440 }, { "epoch": 0.4614834536617687, "grad_norm": 344.6517028808594, "learning_rate": 6.596450617226303e-06, "loss": 26.6183, "step": 228450 }, { "epoch": 0.4615036542944525, "grad_norm": 213.60464477539062, "learning_rate": 6.596119818634478e-06, "loss": 20.7677, "step": 228460 }, { "epoch": 0.4615238549271363, "grad_norm": 309.8865661621094, "learning_rate": 6.595789012263356e-06, "loss": 13.0838, "step": 228470 }, { "epoch": 0.46154405555982014, "grad_norm": 484.257080078125, "learning_rate": 6.5954581981145536e-06, "loss": 29.0678, "step": 228480 }, { "epoch": 0.46156425619250396, "grad_norm": 128.2948455810547, "learning_rate": 6.5951273761896794e-06, "loss": 16.608, "step": 228490 }, { "epoch": 0.4615844568251878, "grad_norm": 580.3865966796875, "learning_rate": 6.594796546490351e-06, "loss": 24.4, "step": 228500 }, { "epoch": 0.4616046574578716, "grad_norm": 223.98312377929688, "learning_rate": 6.594465709018175e-06, "loss": 12.8977, "step": 228510 }, { "epoch": 0.4616248580905554, "grad_norm": 1056.8211669921875, "learning_rate": 6.594134863774768e-06, "loss": 25.7395, "step": 228520 }, { "epoch": 0.4616450587232392, "grad_norm": 58.5048942565918, "learning_rate": 6.593804010761742e-06, "loss": 24.7586, "step": 228530 }, { "epoch": 0.461665259355923, "grad_norm": 506.4383239746094, "learning_rate": 6.593473149980707e-06, "loss": 22.7633, "step": 228540 }, { "epoch": 0.46168545998860683, "grad_norm": 532.6744384765625, "learning_rate": 6.593142281433277e-06, "loss": 31.1451, "step": 228550 }, { "epoch": 0.46170566062129065, "grad_norm": 250.6393585205078, "learning_rate": 6.592811405121064e-06, "loss": 25.3368, "step": 228560 }, { "epoch": 0.46172586125397447, "grad_norm": 142.97218322753906, "learning_rate": 6.592480521045683e-06, "loss": 41.9469, "step": 228570 }, { "epoch": 0.4617460618866583, "grad_norm": 405.6157531738281, "learning_rate": 6.592149629208744e-06, "loss": 18.7361, "step": 228580 }, { "epoch": 0.4617662625193421, "grad_norm": 405.7395935058594, "learning_rate": 6.591818729611863e-06, "loss": 8.6353, "step": 228590 }, { "epoch": 0.46178646315202593, "grad_norm": 293.0142517089844, "learning_rate": 6.591487822256648e-06, "loss": 23.1782, "step": 228600 }, { "epoch": 0.46180666378470975, "grad_norm": 492.2015686035156, "learning_rate": 6.591156907144716e-06, "loss": 22.6764, "step": 228610 }, { "epoch": 0.46182686441739357, "grad_norm": 383.2519226074219, "learning_rate": 6.590825984277677e-06, "loss": 33.3584, "step": 228620 }, { "epoch": 0.4618470650500774, "grad_norm": 191.58526611328125, "learning_rate": 6.590495053657145e-06, "loss": 11.6464, "step": 228630 }, { "epoch": 0.4618672656827612, "grad_norm": 171.4994354248047, "learning_rate": 6.590164115284734e-06, "loss": 15.6365, "step": 228640 }, { "epoch": 0.46188746631544497, "grad_norm": 280.93719482421875, "learning_rate": 6.589833169162055e-06, "loss": 16.4522, "step": 228650 }, { "epoch": 0.4619076669481288, "grad_norm": 466.8980407714844, "learning_rate": 6.589502215290723e-06, "loss": 17.384, "step": 228660 }, { "epoch": 0.4619278675808126, "grad_norm": 0.0, "learning_rate": 6.5891712536723495e-06, "loss": 15.2802, "step": 228670 }, { "epoch": 0.46194806821349643, "grad_norm": 282.8539123535156, "learning_rate": 6.588840284308548e-06, "loss": 28.1101, "step": 228680 }, { "epoch": 0.46196826884618025, "grad_norm": 51.84686279296875, "learning_rate": 6.588509307200932e-06, "loss": 23.9104, "step": 228690 }, { "epoch": 0.46198846947886407, "grad_norm": 84.83384704589844, "learning_rate": 6.588178322351113e-06, "loss": 14.0188, "step": 228700 }, { "epoch": 0.4620086701115479, "grad_norm": 12.653707504272461, "learning_rate": 6.587847329760708e-06, "loss": 18.2757, "step": 228710 }, { "epoch": 0.4620288707442317, "grad_norm": 419.0038146972656, "learning_rate": 6.587516329431326e-06, "loss": 14.9025, "step": 228720 }, { "epoch": 0.46204907137691553, "grad_norm": 58.4532356262207, "learning_rate": 6.587185321364582e-06, "loss": 11.2697, "step": 228730 }, { "epoch": 0.46206927200959935, "grad_norm": 367.9628601074219, "learning_rate": 6.5868543055620895e-06, "loss": 16.6834, "step": 228740 }, { "epoch": 0.46208947264228317, "grad_norm": 0.8493233919143677, "learning_rate": 6.586523282025462e-06, "loss": 46.5447, "step": 228750 }, { "epoch": 0.462109673274967, "grad_norm": 54.18994903564453, "learning_rate": 6.586192250756312e-06, "loss": 29.8455, "step": 228760 }, { "epoch": 0.4621298739076508, "grad_norm": 193.18482971191406, "learning_rate": 6.585861211756253e-06, "loss": 17.422, "step": 228770 }, { "epoch": 0.4621500745403346, "grad_norm": 217.60592651367188, "learning_rate": 6.585530165026899e-06, "loss": 30.7533, "step": 228780 }, { "epoch": 0.4621702751730184, "grad_norm": 158.59298706054688, "learning_rate": 6.585199110569863e-06, "loss": 11.8878, "step": 228790 }, { "epoch": 0.4621904758057022, "grad_norm": 217.29208374023438, "learning_rate": 6.58486804838676e-06, "loss": 11.029, "step": 228800 }, { "epoch": 0.46221067643838604, "grad_norm": 287.4243469238281, "learning_rate": 6.5845369784792e-06, "loss": 30.4894, "step": 228810 }, { "epoch": 0.46223087707106986, "grad_norm": 192.53073120117188, "learning_rate": 6.584205900848801e-06, "loss": 22.7963, "step": 228820 }, { "epoch": 0.4622510777037537, "grad_norm": 343.77197265625, "learning_rate": 6.583874815497174e-06, "loss": 18.7724, "step": 228830 }, { "epoch": 0.4622712783364375, "grad_norm": 112.06795501708984, "learning_rate": 6.583543722425934e-06, "loss": 10.4384, "step": 228840 }, { "epoch": 0.4622914789691213, "grad_norm": 490.0068359375, "learning_rate": 6.583212621636693e-06, "loss": 19.1194, "step": 228850 }, { "epoch": 0.46231167960180514, "grad_norm": 59.021053314208984, "learning_rate": 6.582881513131065e-06, "loss": 15.3706, "step": 228860 }, { "epoch": 0.46233188023448896, "grad_norm": 261.73236083984375, "learning_rate": 6.5825503969106675e-06, "loss": 27.1906, "step": 228870 }, { "epoch": 0.4623520808671728, "grad_norm": 89.90087127685547, "learning_rate": 6.582219272977108e-06, "loss": 26.8524, "step": 228880 }, { "epoch": 0.4623722814998566, "grad_norm": 235.7649383544922, "learning_rate": 6.581888141332004e-06, "loss": 18.9953, "step": 228890 }, { "epoch": 0.4623924821325404, "grad_norm": 149.35389709472656, "learning_rate": 6.58155700197697e-06, "loss": 21.1833, "step": 228900 }, { "epoch": 0.4624126827652242, "grad_norm": 111.8674545288086, "learning_rate": 6.581225854913621e-06, "loss": 23.0765, "step": 228910 }, { "epoch": 0.462432883397908, "grad_norm": 294.1495361328125, "learning_rate": 6.580894700143565e-06, "loss": 19.7541, "step": 228920 }, { "epoch": 0.4624530840305918, "grad_norm": 127.71861267089844, "learning_rate": 6.580563537668423e-06, "loss": 14.4907, "step": 228930 }, { "epoch": 0.46247328466327564, "grad_norm": 317.0696105957031, "learning_rate": 6.580232367489805e-06, "loss": 20.543, "step": 228940 }, { "epoch": 0.46249348529595946, "grad_norm": 369.7459411621094, "learning_rate": 6.579901189609325e-06, "loss": 20.2995, "step": 228950 }, { "epoch": 0.4625136859286433, "grad_norm": 285.0638122558594, "learning_rate": 6.5795700040286014e-06, "loss": 17.2451, "step": 228960 }, { "epoch": 0.4625338865613271, "grad_norm": 406.1532287597656, "learning_rate": 6.579238810749241e-06, "loss": 18.4299, "step": 228970 }, { "epoch": 0.4625540871940109, "grad_norm": 368.961669921875, "learning_rate": 6.578907609772866e-06, "loss": 17.4676, "step": 228980 }, { "epoch": 0.46257428782669474, "grad_norm": 71.252197265625, "learning_rate": 6.578576401101084e-06, "loss": 15.2954, "step": 228990 }, { "epoch": 0.46259448845937856, "grad_norm": 165.4848175048828, "learning_rate": 6.578245184735513e-06, "loss": 12.3721, "step": 229000 }, { "epoch": 0.4626146890920624, "grad_norm": 149.20509338378906, "learning_rate": 6.577913960677766e-06, "loss": 11.0391, "step": 229010 }, { "epoch": 0.4626348897247462, "grad_norm": 518.880615234375, "learning_rate": 6.577582728929458e-06, "loss": 21.2859, "step": 229020 }, { "epoch": 0.46265509035743, "grad_norm": 273.3765869140625, "learning_rate": 6.5772514894922034e-06, "loss": 13.1233, "step": 229030 }, { "epoch": 0.4626752909901138, "grad_norm": 125.3149185180664, "learning_rate": 6.576920242367617e-06, "loss": 21.6498, "step": 229040 }, { "epoch": 0.4626954916227976, "grad_norm": 174.5244598388672, "learning_rate": 6.576588987557312e-06, "loss": 15.6477, "step": 229050 }, { "epoch": 0.4627156922554814, "grad_norm": 242.5099639892578, "learning_rate": 6.576257725062903e-06, "loss": 10.9161, "step": 229060 }, { "epoch": 0.46273589288816525, "grad_norm": 129.79151916503906, "learning_rate": 6.575926454886005e-06, "loss": 34.5484, "step": 229070 }, { "epoch": 0.46275609352084907, "grad_norm": 420.2569885253906, "learning_rate": 6.575595177028233e-06, "loss": 13.67, "step": 229080 }, { "epoch": 0.4627762941535329, "grad_norm": 312.1424560546875, "learning_rate": 6.575263891491203e-06, "loss": 15.4445, "step": 229090 }, { "epoch": 0.4627964947862167, "grad_norm": 160.95404052734375, "learning_rate": 6.574932598276524e-06, "loss": 20.5579, "step": 229100 }, { "epoch": 0.4628166954189005, "grad_norm": 212.55068969726562, "learning_rate": 6.574601297385817e-06, "loss": 13.7628, "step": 229110 }, { "epoch": 0.46283689605158435, "grad_norm": 161.1964111328125, "learning_rate": 6.574269988820694e-06, "loss": 13.6427, "step": 229120 }, { "epoch": 0.46285709668426817, "grad_norm": 526.7996826171875, "learning_rate": 6.573938672582769e-06, "loss": 10.9389, "step": 229130 }, { "epoch": 0.462877297316952, "grad_norm": 601.6796264648438, "learning_rate": 6.5736073486736606e-06, "loss": 15.0754, "step": 229140 }, { "epoch": 0.4628974979496358, "grad_norm": 604.4913940429688, "learning_rate": 6.573276017094977e-06, "loss": 20.3136, "step": 229150 }, { "epoch": 0.4629176985823196, "grad_norm": 266.5885009765625, "learning_rate": 6.5729446778483395e-06, "loss": 26.9341, "step": 229160 }, { "epoch": 0.4629378992150034, "grad_norm": 63.83548355102539, "learning_rate": 6.57261333093536e-06, "loss": 16.2437, "step": 229170 }, { "epoch": 0.4629580998476872, "grad_norm": 133.00621032714844, "learning_rate": 6.5722819763576525e-06, "loss": 18.0615, "step": 229180 }, { "epoch": 0.46297830048037103, "grad_norm": 313.0032958984375, "learning_rate": 6.571950614116835e-06, "loss": 16.5829, "step": 229190 }, { "epoch": 0.46299850111305485, "grad_norm": 336.828125, "learning_rate": 6.571619244214521e-06, "loss": 14.466, "step": 229200 }, { "epoch": 0.46301870174573867, "grad_norm": 116.6859130859375, "learning_rate": 6.571287866652325e-06, "loss": 20.9073, "step": 229210 }, { "epoch": 0.4630389023784225, "grad_norm": 285.19183349609375, "learning_rate": 6.570956481431862e-06, "loss": 20.1244, "step": 229220 }, { "epoch": 0.4630591030111063, "grad_norm": 135.9971160888672, "learning_rate": 6.570625088554747e-06, "loss": 19.9207, "step": 229230 }, { "epoch": 0.46307930364379013, "grad_norm": 173.5549774169922, "learning_rate": 6.570293688022597e-06, "loss": 19.8154, "step": 229240 }, { "epoch": 0.46309950427647395, "grad_norm": 419.2649841308594, "learning_rate": 6.569962279837025e-06, "loss": 13.8402, "step": 229250 }, { "epoch": 0.46311970490915777, "grad_norm": 323.45709228515625, "learning_rate": 6.5696308639996475e-06, "loss": 21.3098, "step": 229260 }, { "epoch": 0.4631399055418416, "grad_norm": 191.80218505859375, "learning_rate": 6.569299440512081e-06, "loss": 16.6252, "step": 229270 }, { "epoch": 0.4631601061745254, "grad_norm": 351.6878356933594, "learning_rate": 6.568968009375938e-06, "loss": 14.746, "step": 229280 }, { "epoch": 0.4631803068072092, "grad_norm": 197.4404296875, "learning_rate": 6.568636570592835e-06, "loss": 14.4256, "step": 229290 }, { "epoch": 0.463200507439893, "grad_norm": 181.59043884277344, "learning_rate": 6.5683051241643894e-06, "loss": 15.3062, "step": 229300 }, { "epoch": 0.4632207080725768, "grad_norm": 254.1195068359375, "learning_rate": 6.567973670092212e-06, "loss": 14.3766, "step": 229310 }, { "epoch": 0.46324090870526063, "grad_norm": 101.96195220947266, "learning_rate": 6.567642208377924e-06, "loss": 9.8903, "step": 229320 }, { "epoch": 0.46326110933794445, "grad_norm": 157.60874938964844, "learning_rate": 6.567310739023136e-06, "loss": 16.9365, "step": 229330 }, { "epoch": 0.4632813099706283, "grad_norm": 8.789402961730957, "learning_rate": 6.566979262029467e-06, "loss": 23.0709, "step": 229340 }, { "epoch": 0.4633015106033121, "grad_norm": 411.4132995605469, "learning_rate": 6.566647777398529e-06, "loss": 22.014, "step": 229350 }, { "epoch": 0.4633217112359959, "grad_norm": 607.08154296875, "learning_rate": 6.566316285131943e-06, "loss": 32.444, "step": 229360 }, { "epoch": 0.46334191186867973, "grad_norm": 201.14842224121094, "learning_rate": 6.5659847852313184e-06, "loss": 13.2254, "step": 229370 }, { "epoch": 0.46336211250136355, "grad_norm": 211.62078857421875, "learning_rate": 6.5656532776982765e-06, "loss": 26.1464, "step": 229380 }, { "epoch": 0.4633823131340474, "grad_norm": 123.09964752197266, "learning_rate": 6.565321762534428e-06, "loss": 12.8217, "step": 229390 }, { "epoch": 0.4634025137667312, "grad_norm": 415.0384826660156, "learning_rate": 6.5649902397413915e-06, "loss": 18.1678, "step": 229400 }, { "epoch": 0.463422714399415, "grad_norm": 644.9924926757812, "learning_rate": 6.564658709320783e-06, "loss": 22.9092, "step": 229410 }, { "epoch": 0.4634429150320988, "grad_norm": 208.94320678710938, "learning_rate": 6.564327171274217e-06, "loss": 24.0479, "step": 229420 }, { "epoch": 0.4634631156647826, "grad_norm": 268.2586669921875, "learning_rate": 6.563995625603312e-06, "loss": 17.7794, "step": 229430 }, { "epoch": 0.4634833162974664, "grad_norm": 427.7290954589844, "learning_rate": 6.56366407230968e-06, "loss": 21.7253, "step": 229440 }, { "epoch": 0.46350351693015024, "grad_norm": 354.1027526855469, "learning_rate": 6.5633325113949395e-06, "loss": 20.8164, "step": 229450 }, { "epoch": 0.46352371756283406, "grad_norm": 580.920166015625, "learning_rate": 6.5630009428607065e-06, "loss": 23.4428, "step": 229460 }, { "epoch": 0.4635439181955179, "grad_norm": 264.5554504394531, "learning_rate": 6.562669366708596e-06, "loss": 13.163, "step": 229470 }, { "epoch": 0.4635641188282017, "grad_norm": 407.0652770996094, "learning_rate": 6.562337782940224e-06, "loss": 26.8113, "step": 229480 }, { "epoch": 0.4635843194608855, "grad_norm": 508.1312255859375, "learning_rate": 6.562006191557209e-06, "loss": 17.7262, "step": 229490 }, { "epoch": 0.46360452009356934, "grad_norm": 98.75956726074219, "learning_rate": 6.561674592561164e-06, "loss": 19.0045, "step": 229500 }, { "epoch": 0.46362472072625316, "grad_norm": 209.20257568359375, "learning_rate": 6.561342985953706e-06, "loss": 12.5255, "step": 229510 }, { "epoch": 0.463644921358937, "grad_norm": 316.0845031738281, "learning_rate": 6.561011371736452e-06, "loss": 12.6563, "step": 229520 }, { "epoch": 0.4636651219916208, "grad_norm": 173.3015899658203, "learning_rate": 6.560679749911018e-06, "loss": 12.8082, "step": 229530 }, { "epoch": 0.4636853226243046, "grad_norm": 289.4515380859375, "learning_rate": 6.560348120479021e-06, "loss": 30.2138, "step": 229540 }, { "epoch": 0.4637055232569884, "grad_norm": 442.9195861816406, "learning_rate": 6.5600164834420754e-06, "loss": 16.7966, "step": 229550 }, { "epoch": 0.4637257238896722, "grad_norm": 281.88824462890625, "learning_rate": 6.559684838801798e-06, "loss": 37.6623, "step": 229560 }, { "epoch": 0.463745924522356, "grad_norm": 150.4138946533203, "learning_rate": 6.559353186559808e-06, "loss": 14.2239, "step": 229570 }, { "epoch": 0.46376612515503984, "grad_norm": 585.6231689453125, "learning_rate": 6.559021526717717e-06, "loss": 23.9042, "step": 229580 }, { "epoch": 0.46378632578772366, "grad_norm": 268.73333740234375, "learning_rate": 6.558689859277148e-06, "loss": 16.9914, "step": 229590 }, { "epoch": 0.4638065264204075, "grad_norm": 161.56468200683594, "learning_rate": 6.558358184239709e-06, "loss": 22.5795, "step": 229600 }, { "epoch": 0.4638267270530913, "grad_norm": 236.35780334472656, "learning_rate": 6.5580265016070245e-06, "loss": 35.0547, "step": 229610 }, { "epoch": 0.4638469276857751, "grad_norm": 591.3236694335938, "learning_rate": 6.557694811380707e-06, "loss": 22.3792, "step": 229620 }, { "epoch": 0.46386712831845894, "grad_norm": 132.39486694335938, "learning_rate": 6.5573631135623736e-06, "loss": 24.2313, "step": 229630 }, { "epoch": 0.46388732895114276, "grad_norm": 193.477783203125, "learning_rate": 6.557031408153642e-06, "loss": 18.4563, "step": 229640 }, { "epoch": 0.4639075295838266, "grad_norm": 356.71173095703125, "learning_rate": 6.556699695156128e-06, "loss": 17.6154, "step": 229650 }, { "epoch": 0.4639277302165104, "grad_norm": 305.1192932128906, "learning_rate": 6.556367974571448e-06, "loss": 15.1721, "step": 229660 }, { "epoch": 0.4639479308491942, "grad_norm": 262.60565185546875, "learning_rate": 6.556036246401218e-06, "loss": 16.7325, "step": 229670 }, { "epoch": 0.463968131481878, "grad_norm": 123.84944152832031, "learning_rate": 6.555704510647059e-06, "loss": 12.5663, "step": 229680 }, { "epoch": 0.4639883321145618, "grad_norm": 97.04841613769531, "learning_rate": 6.555372767310582e-06, "loss": 25.6989, "step": 229690 }, { "epoch": 0.46400853274724563, "grad_norm": 525.7053833007812, "learning_rate": 6.55504101639341e-06, "loss": 26.4193, "step": 229700 }, { "epoch": 0.46402873337992945, "grad_norm": 204.77194213867188, "learning_rate": 6.554709257897153e-06, "loss": 15.4039, "step": 229710 }, { "epoch": 0.46404893401261327, "grad_norm": 210.8658447265625, "learning_rate": 6.554377491823434e-06, "loss": 19.3939, "step": 229720 }, { "epoch": 0.4640691346452971, "grad_norm": 348.4362487792969, "learning_rate": 6.554045718173867e-06, "loss": 15.3209, "step": 229730 }, { "epoch": 0.4640893352779809, "grad_norm": 636.6931762695312, "learning_rate": 6.5537139369500705e-06, "loss": 32.1241, "step": 229740 }, { "epoch": 0.46410953591066473, "grad_norm": 163.26010131835938, "learning_rate": 6.55338214815366e-06, "loss": 21.2472, "step": 229750 }, { "epoch": 0.46412973654334855, "grad_norm": 320.88311767578125, "learning_rate": 6.553050351786252e-06, "loss": 21.5046, "step": 229760 }, { "epoch": 0.46414993717603237, "grad_norm": 466.7154235839844, "learning_rate": 6.552718547849467e-06, "loss": 12.9769, "step": 229770 }, { "epoch": 0.4641701378087162, "grad_norm": 487.47406005859375, "learning_rate": 6.55238673634492e-06, "loss": 23.4285, "step": 229780 }, { "epoch": 0.4641903384414, "grad_norm": 556.0003051757812, "learning_rate": 6.552054917274226e-06, "loss": 21.5784, "step": 229790 }, { "epoch": 0.46421053907408383, "grad_norm": 473.9961853027344, "learning_rate": 6.551723090639008e-06, "loss": 23.758, "step": 229800 }, { "epoch": 0.4642307397067676, "grad_norm": 247.04803466796875, "learning_rate": 6.551391256440877e-06, "loss": 30.7157, "step": 229810 }, { "epoch": 0.4642509403394514, "grad_norm": 271.2428283691406, "learning_rate": 6.551059414681455e-06, "loss": 12.4126, "step": 229820 }, { "epoch": 0.46427114097213523, "grad_norm": 621.4597778320312, "learning_rate": 6.550727565362357e-06, "loss": 23.0938, "step": 229830 }, { "epoch": 0.46429134160481905, "grad_norm": 175.3802490234375, "learning_rate": 6.5503957084852e-06, "loss": 19.7626, "step": 229840 }, { "epoch": 0.46431154223750287, "grad_norm": 245.82061767578125, "learning_rate": 6.550063844051603e-06, "loss": 15.1887, "step": 229850 }, { "epoch": 0.4643317428701867, "grad_norm": 286.3362731933594, "learning_rate": 6.549731972063183e-06, "loss": 14.7092, "step": 229860 }, { "epoch": 0.4643519435028705, "grad_norm": 394.1311950683594, "learning_rate": 6.549400092521557e-06, "loss": 15.8082, "step": 229870 }, { "epoch": 0.46437214413555433, "grad_norm": 1.5387331247329712, "learning_rate": 6.549068205428344e-06, "loss": 21.5328, "step": 229880 }, { "epoch": 0.46439234476823815, "grad_norm": 397.85614013671875, "learning_rate": 6.54873631078516e-06, "loss": 20.7495, "step": 229890 }, { "epoch": 0.46441254540092197, "grad_norm": 262.1589660644531, "learning_rate": 6.548404408593622e-06, "loss": 16.9991, "step": 229900 }, { "epoch": 0.4644327460336058, "grad_norm": 104.7777099609375, "learning_rate": 6.54807249885535e-06, "loss": 32.766, "step": 229910 }, { "epoch": 0.4644529466662896, "grad_norm": 200.12295532226562, "learning_rate": 6.547740581571959e-06, "loss": 19.3555, "step": 229920 }, { "epoch": 0.4644731472989734, "grad_norm": 314.0841979980469, "learning_rate": 6.547408656745069e-06, "loss": 10.8184, "step": 229930 }, { "epoch": 0.4644933479316572, "grad_norm": 193.37490844726562, "learning_rate": 6.547076724376296e-06, "loss": 15.5862, "step": 229940 }, { "epoch": 0.464513548564341, "grad_norm": 583.7041625976562, "learning_rate": 6.546744784467261e-06, "loss": 23.0763, "step": 229950 }, { "epoch": 0.46453374919702484, "grad_norm": 243.75930786132812, "learning_rate": 6.546412837019577e-06, "loss": 24.7986, "step": 229960 }, { "epoch": 0.46455394982970866, "grad_norm": 206.74386596679688, "learning_rate": 6.546080882034866e-06, "loss": 17.152, "step": 229970 }, { "epoch": 0.4645741504623925, "grad_norm": 1210.060791015625, "learning_rate": 6.545748919514743e-06, "loss": 22.2683, "step": 229980 }, { "epoch": 0.4645943510950763, "grad_norm": 412.109619140625, "learning_rate": 6.545416949460828e-06, "loss": 25.3992, "step": 229990 }, { "epoch": 0.4646145517277601, "grad_norm": 499.7328186035156, "learning_rate": 6.545084971874738e-06, "loss": 24.6814, "step": 230000 }, { "epoch": 0.46463475236044394, "grad_norm": 469.2377014160156, "learning_rate": 6.544752986758092e-06, "loss": 40.5023, "step": 230010 }, { "epoch": 0.46465495299312776, "grad_norm": 324.91412353515625, "learning_rate": 6.5444209941125056e-06, "loss": 10.235, "step": 230020 }, { "epoch": 0.4646751536258116, "grad_norm": 358.0535888671875, "learning_rate": 6.544088993939599e-06, "loss": 10.9769, "step": 230030 }, { "epoch": 0.4646953542584954, "grad_norm": 728.4425048828125, "learning_rate": 6.543756986240992e-06, "loss": 21.6491, "step": 230040 }, { "epoch": 0.4647155548911792, "grad_norm": 181.56260681152344, "learning_rate": 6.543424971018298e-06, "loss": 24.5431, "step": 230050 }, { "epoch": 0.464735755523863, "grad_norm": 160.94931030273438, "learning_rate": 6.54309294827314e-06, "loss": 7.997, "step": 230060 }, { "epoch": 0.4647559561565468, "grad_norm": 253.96148681640625, "learning_rate": 6.542760918007133e-06, "loss": 14.1033, "step": 230070 }, { "epoch": 0.4647761567892306, "grad_norm": 261.7500305175781, "learning_rate": 6.542428880221896e-06, "loss": 19.9664, "step": 230080 }, { "epoch": 0.46479635742191444, "grad_norm": 88.11426544189453, "learning_rate": 6.542096834919049e-06, "loss": 11.3409, "step": 230090 }, { "epoch": 0.46481655805459826, "grad_norm": 306.2686767578125, "learning_rate": 6.541764782100208e-06, "loss": 13.3094, "step": 230100 }, { "epoch": 0.4648367586872821, "grad_norm": 56.54940414428711, "learning_rate": 6.541432721766994e-06, "loss": 34.2315, "step": 230110 }, { "epoch": 0.4648569593199659, "grad_norm": 195.1072235107422, "learning_rate": 6.541100653921022e-06, "loss": 24.4819, "step": 230120 }, { "epoch": 0.4648771599526497, "grad_norm": 238.82073974609375, "learning_rate": 6.540768578563913e-06, "loss": 24.7072, "step": 230130 }, { "epoch": 0.46489736058533354, "grad_norm": 275.60479736328125, "learning_rate": 6.540436495697284e-06, "loss": 28.1565, "step": 230140 }, { "epoch": 0.46491756121801736, "grad_norm": 361.5238952636719, "learning_rate": 6.540104405322757e-06, "loss": 22.568, "step": 230150 }, { "epoch": 0.4649377618507012, "grad_norm": 69.8905258178711, "learning_rate": 6.5397723074419454e-06, "loss": 23.9554, "step": 230160 }, { "epoch": 0.464957962483385, "grad_norm": 409.1376953125, "learning_rate": 6.53944020205647e-06, "loss": 18.9286, "step": 230170 }, { "epoch": 0.4649781631160688, "grad_norm": 481.3592834472656, "learning_rate": 6.539108089167953e-06, "loss": 27.7272, "step": 230180 }, { "epoch": 0.4649983637487526, "grad_norm": 398.72869873046875, "learning_rate": 6.538775968778006e-06, "loss": 13.5932, "step": 230190 }, { "epoch": 0.4650185643814364, "grad_norm": 204.81980895996094, "learning_rate": 6.538443840888254e-06, "loss": 18.3698, "step": 230200 }, { "epoch": 0.4650387650141202, "grad_norm": 479.78558349609375, "learning_rate": 6.538111705500312e-06, "loss": 16.4132, "step": 230210 }, { "epoch": 0.46505896564680405, "grad_norm": 244.32862854003906, "learning_rate": 6.537779562615801e-06, "loss": 12.6441, "step": 230220 }, { "epoch": 0.46507916627948787, "grad_norm": 207.0391082763672, "learning_rate": 6.537447412236338e-06, "loss": 22.6567, "step": 230230 }, { "epoch": 0.4650993669121717, "grad_norm": 199.74310302734375, "learning_rate": 6.537115254363544e-06, "loss": 16.8381, "step": 230240 }, { "epoch": 0.4651195675448555, "grad_norm": 401.3957824707031, "learning_rate": 6.536783088999037e-06, "loss": 17.089, "step": 230250 }, { "epoch": 0.4651397681775393, "grad_norm": 313.5570068359375, "learning_rate": 6.536450916144435e-06, "loss": 24.45, "step": 230260 }, { "epoch": 0.46515996881022315, "grad_norm": 493.7795715332031, "learning_rate": 6.536118735801356e-06, "loss": 19.0283, "step": 230270 }, { "epoch": 0.46518016944290697, "grad_norm": 454.3205261230469, "learning_rate": 6.535786547971421e-06, "loss": 22.3295, "step": 230280 }, { "epoch": 0.4652003700755908, "grad_norm": 1595.4095458984375, "learning_rate": 6.53545435265625e-06, "loss": 19.7394, "step": 230290 }, { "epoch": 0.4652205707082746, "grad_norm": 311.3018493652344, "learning_rate": 6.53512214985746e-06, "loss": 21.2557, "step": 230300 }, { "epoch": 0.4652407713409584, "grad_norm": 282.8323669433594, "learning_rate": 6.534789939576672e-06, "loss": 12.4019, "step": 230310 }, { "epoch": 0.4652609719736422, "grad_norm": 429.9158020019531, "learning_rate": 6.534457721815502e-06, "loss": 13.2881, "step": 230320 }, { "epoch": 0.465281172606326, "grad_norm": 160.1783905029297, "learning_rate": 6.534125496575573e-06, "loss": 13.2339, "step": 230330 }, { "epoch": 0.46530137323900983, "grad_norm": 367.2994079589844, "learning_rate": 6.533793263858501e-06, "loss": 19.346, "step": 230340 }, { "epoch": 0.46532157387169365, "grad_norm": 252.804443359375, "learning_rate": 6.533461023665907e-06, "loss": 30.8204, "step": 230350 }, { "epoch": 0.46534177450437747, "grad_norm": 155.61073303222656, "learning_rate": 6.533128775999411e-06, "loss": 21.8377, "step": 230360 }, { "epoch": 0.4653619751370613, "grad_norm": 139.07647705078125, "learning_rate": 6.532796520860629e-06, "loss": 17.0235, "step": 230370 }, { "epoch": 0.4653821757697451, "grad_norm": 438.4337463378906, "learning_rate": 6.532464258251185e-06, "loss": 12.7165, "step": 230380 }, { "epoch": 0.46540237640242893, "grad_norm": 176.7720947265625, "learning_rate": 6.532131988172695e-06, "loss": 10.2699, "step": 230390 }, { "epoch": 0.46542257703511275, "grad_norm": 407.3740234375, "learning_rate": 6.53179971062678e-06, "loss": 18.4003, "step": 230400 }, { "epoch": 0.46544277766779657, "grad_norm": 422.8507080078125, "learning_rate": 6.531467425615059e-06, "loss": 16.7081, "step": 230410 }, { "epoch": 0.4654629783004804, "grad_norm": 156.13314819335938, "learning_rate": 6.531135133139152e-06, "loss": 21.5665, "step": 230420 }, { "epoch": 0.4654831789331642, "grad_norm": 459.4855651855469, "learning_rate": 6.530802833200677e-06, "loss": 22.2408, "step": 230430 }, { "epoch": 0.46550337956584803, "grad_norm": 160.27061462402344, "learning_rate": 6.530470525801254e-06, "loss": 21.8163, "step": 230440 }, { "epoch": 0.4655235801985318, "grad_norm": 160.52236938476562, "learning_rate": 6.530138210942505e-06, "loss": 9.8982, "step": 230450 }, { "epoch": 0.4655437808312156, "grad_norm": 66.28935241699219, "learning_rate": 6.529805888626046e-06, "loss": 15.1725, "step": 230460 }, { "epoch": 0.46556398146389943, "grad_norm": 282.7294921875, "learning_rate": 6.529473558853501e-06, "loss": 23.5235, "step": 230470 }, { "epoch": 0.46558418209658325, "grad_norm": 126.14422607421875, "learning_rate": 6.529141221626485e-06, "loss": 13.8058, "step": 230480 }, { "epoch": 0.4656043827292671, "grad_norm": 31.46508026123047, "learning_rate": 6.528808876946622e-06, "loss": 10.4054, "step": 230490 }, { "epoch": 0.4656245833619509, "grad_norm": 305.8723449707031, "learning_rate": 6.5284765248155295e-06, "loss": 18.5224, "step": 230500 }, { "epoch": 0.4656447839946347, "grad_norm": 146.7981414794922, "learning_rate": 6.5281441652348266e-06, "loss": 12.2956, "step": 230510 }, { "epoch": 0.46566498462731853, "grad_norm": 366.3955383300781, "learning_rate": 6.527811798206136e-06, "loss": 18.7638, "step": 230520 }, { "epoch": 0.46568518526000235, "grad_norm": 347.45574951171875, "learning_rate": 6.527479423731074e-06, "loss": 14.5131, "step": 230530 }, { "epoch": 0.4657053858926862, "grad_norm": 345.1633605957031, "learning_rate": 6.527147041811266e-06, "loss": 24.3563, "step": 230540 }, { "epoch": 0.46572558652537, "grad_norm": 322.75372314453125, "learning_rate": 6.526814652448325e-06, "loss": 17.6718, "step": 230550 }, { "epoch": 0.4657457871580538, "grad_norm": 365.07232666015625, "learning_rate": 6.526482255643877e-06, "loss": 21.9533, "step": 230560 }, { "epoch": 0.4657659877907376, "grad_norm": 74.94061279296875, "learning_rate": 6.526149851399538e-06, "loss": 14.6858, "step": 230570 }, { "epoch": 0.4657861884234214, "grad_norm": 325.8135986328125, "learning_rate": 6.525817439716932e-06, "loss": 14.1431, "step": 230580 }, { "epoch": 0.4658063890561052, "grad_norm": 248.1006317138672, "learning_rate": 6.525485020597675e-06, "loss": 11.5351, "step": 230590 }, { "epoch": 0.46582658968878904, "grad_norm": 126.90278625488281, "learning_rate": 6.525152594043389e-06, "loss": 14.825, "step": 230600 }, { "epoch": 0.46584679032147286, "grad_norm": 355.6916198730469, "learning_rate": 6.524820160055694e-06, "loss": 23.8398, "step": 230610 }, { "epoch": 0.4658669909541567, "grad_norm": 25.26082420349121, "learning_rate": 6.5244877186362095e-06, "loss": 20.4036, "step": 230620 }, { "epoch": 0.4658871915868405, "grad_norm": 361.58990478515625, "learning_rate": 6.52415526978656e-06, "loss": 31.1766, "step": 230630 }, { "epoch": 0.4659073922195243, "grad_norm": 263.5442199707031, "learning_rate": 6.523822813508359e-06, "loss": 18.7, "step": 230640 }, { "epoch": 0.46592759285220814, "grad_norm": 212.98709106445312, "learning_rate": 6.5234903498032345e-06, "loss": 14.8251, "step": 230650 }, { "epoch": 0.46594779348489196, "grad_norm": 158.9968719482422, "learning_rate": 6.523157878672799e-06, "loss": 18.5322, "step": 230660 }, { "epoch": 0.4659679941175758, "grad_norm": 258.6199951171875, "learning_rate": 6.522825400118679e-06, "loss": 25.419, "step": 230670 }, { "epoch": 0.4659881947502596, "grad_norm": 444.0636291503906, "learning_rate": 6.5224929141424906e-06, "loss": 22.5273, "step": 230680 }, { "epoch": 0.4660083953829434, "grad_norm": 420.22857666015625, "learning_rate": 6.522160420745857e-06, "loss": 23.3191, "step": 230690 }, { "epoch": 0.4660285960156272, "grad_norm": 281.88641357421875, "learning_rate": 6.5218279199304014e-06, "loss": 19.4499, "step": 230700 }, { "epoch": 0.466048796648311, "grad_norm": 55.26210021972656, "learning_rate": 6.521495411697738e-06, "loss": 16.8774, "step": 230710 }, { "epoch": 0.4660689972809948, "grad_norm": 120.89141845703125, "learning_rate": 6.521162896049491e-06, "loss": 18.0596, "step": 230720 }, { "epoch": 0.46608919791367864, "grad_norm": 268.268798828125, "learning_rate": 6.52083037298728e-06, "loss": 16.0604, "step": 230730 }, { "epoch": 0.46610939854636246, "grad_norm": 12.105450630187988, "learning_rate": 6.520497842512728e-06, "loss": 9.4249, "step": 230740 }, { "epoch": 0.4661295991790463, "grad_norm": 202.22096252441406, "learning_rate": 6.520165304627452e-06, "loss": 21.5408, "step": 230750 }, { "epoch": 0.4661497998117301, "grad_norm": 337.5409240722656, "learning_rate": 6.519832759333076e-06, "loss": 29.9711, "step": 230760 }, { "epoch": 0.4661700004444139, "grad_norm": 219.98300170898438, "learning_rate": 6.519500206631218e-06, "loss": 29.8635, "step": 230770 }, { "epoch": 0.46619020107709774, "grad_norm": 294.467529296875, "learning_rate": 6.519167646523501e-06, "loss": 10.48, "step": 230780 }, { "epoch": 0.46621040170978156, "grad_norm": 396.7747802734375, "learning_rate": 6.518835079011548e-06, "loss": 17.2731, "step": 230790 }, { "epoch": 0.4662306023424654, "grad_norm": 23.465089797973633, "learning_rate": 6.518502504096972e-06, "loss": 24.9495, "step": 230800 }, { "epoch": 0.4662508029751492, "grad_norm": 213.52825927734375, "learning_rate": 6.5181699217814025e-06, "loss": 15.8304, "step": 230810 }, { "epoch": 0.466271003607833, "grad_norm": 438.2044677734375, "learning_rate": 6.517837332066455e-06, "loss": 27.3878, "step": 230820 }, { "epoch": 0.4662912042405168, "grad_norm": 354.9538269042969, "learning_rate": 6.5175047349537535e-06, "loss": 25.539, "step": 230830 }, { "epoch": 0.4663114048732006, "grad_norm": 189.2494659423828, "learning_rate": 6.517172130444918e-06, "loss": 8.6384, "step": 230840 }, { "epoch": 0.46633160550588443, "grad_norm": 286.53314208984375, "learning_rate": 6.516839518541569e-06, "loss": 26.0485, "step": 230850 }, { "epoch": 0.46635180613856825, "grad_norm": 201.26194763183594, "learning_rate": 6.516506899245329e-06, "loss": 11.4761, "step": 230860 }, { "epoch": 0.46637200677125207, "grad_norm": 543.276611328125, "learning_rate": 6.516174272557817e-06, "loss": 23.4385, "step": 230870 }, { "epoch": 0.4663922074039359, "grad_norm": 289.6154479980469, "learning_rate": 6.515841638480656e-06, "loss": 12.4283, "step": 230880 }, { "epoch": 0.4664124080366197, "grad_norm": 501.4272155761719, "learning_rate": 6.515508997015467e-06, "loss": 23.2077, "step": 230890 }, { "epoch": 0.46643260866930353, "grad_norm": 247.7288818359375, "learning_rate": 6.5151763481638705e-06, "loss": 14.5278, "step": 230900 }, { "epoch": 0.46645280930198735, "grad_norm": 244.7344512939453, "learning_rate": 6.5148436919274884e-06, "loss": 34.8364, "step": 230910 }, { "epoch": 0.46647300993467117, "grad_norm": 256.1528015136719, "learning_rate": 6.514511028307943e-06, "loss": 13.0615, "step": 230920 }, { "epoch": 0.466493210567355, "grad_norm": 211.56285095214844, "learning_rate": 6.5141783573068525e-06, "loss": 14.1001, "step": 230930 }, { "epoch": 0.4665134112000388, "grad_norm": 181.11538696289062, "learning_rate": 6.513845678925842e-06, "loss": 31.9614, "step": 230940 }, { "epoch": 0.46653361183272263, "grad_norm": 404.848876953125, "learning_rate": 6.5135129931665305e-06, "loss": 19.0697, "step": 230950 }, { "epoch": 0.4665538124654064, "grad_norm": 0.0, "learning_rate": 6.5131803000305405e-06, "loss": 37.0636, "step": 230960 }, { "epoch": 0.4665740130980902, "grad_norm": 430.0387878417969, "learning_rate": 6.512847599519494e-06, "loss": 15.5509, "step": 230970 }, { "epoch": 0.46659421373077403, "grad_norm": 375.9198303222656, "learning_rate": 6.5125148916350086e-06, "loss": 16.8101, "step": 230980 }, { "epoch": 0.46661441436345785, "grad_norm": 232.88571166992188, "learning_rate": 6.512182176378713e-06, "loss": 28.4153, "step": 230990 }, { "epoch": 0.4666346149961417, "grad_norm": 537.2500610351562, "learning_rate": 6.5118494537522235e-06, "loss": 19.6985, "step": 231000 }, { "epoch": 0.4666548156288255, "grad_norm": 198.88963317871094, "learning_rate": 6.511516723757163e-06, "loss": 31.3372, "step": 231010 }, { "epoch": 0.4666750162615093, "grad_norm": 373.78369140625, "learning_rate": 6.511183986395153e-06, "loss": 18.4824, "step": 231020 }, { "epoch": 0.46669521689419313, "grad_norm": 357.8785400390625, "learning_rate": 6.510851241667816e-06, "loss": 12.138, "step": 231030 }, { "epoch": 0.46671541752687695, "grad_norm": 112.11738586425781, "learning_rate": 6.510518489576774e-06, "loss": 32.189, "step": 231040 }, { "epoch": 0.4667356181595608, "grad_norm": 388.7401428222656, "learning_rate": 6.510185730123646e-06, "loss": 14.1586, "step": 231050 }, { "epoch": 0.4667558187922446, "grad_norm": 262.6580810546875, "learning_rate": 6.509852963310057e-06, "loss": 18.1902, "step": 231060 }, { "epoch": 0.4667760194249284, "grad_norm": 97.827392578125, "learning_rate": 6.509520189137628e-06, "loss": 23.4473, "step": 231070 }, { "epoch": 0.4667962200576122, "grad_norm": 44.32521438598633, "learning_rate": 6.509187407607981e-06, "loss": 16.2384, "step": 231080 }, { "epoch": 0.466816420690296, "grad_norm": 216.4987335205078, "learning_rate": 6.508854618722735e-06, "loss": 19.6795, "step": 231090 }, { "epoch": 0.4668366213229798, "grad_norm": 842.8447265625, "learning_rate": 6.508521822483518e-06, "loss": 28.8489, "step": 231100 }, { "epoch": 0.46685682195566364, "grad_norm": 162.7846221923828, "learning_rate": 6.508189018891948e-06, "loss": 17.244, "step": 231110 }, { "epoch": 0.46687702258834746, "grad_norm": 107.03738403320312, "learning_rate": 6.507856207949647e-06, "loss": 19.2886, "step": 231120 }, { "epoch": 0.4668972232210313, "grad_norm": 477.902099609375, "learning_rate": 6.507523389658238e-06, "loss": 10.162, "step": 231130 }, { "epoch": 0.4669174238537151, "grad_norm": 164.06076049804688, "learning_rate": 6.507190564019341e-06, "loss": 12.0646, "step": 231140 }, { "epoch": 0.4669376244863989, "grad_norm": 303.1253356933594, "learning_rate": 6.506857731034582e-06, "loss": 24.0215, "step": 231150 }, { "epoch": 0.46695782511908274, "grad_norm": 121.19132232666016, "learning_rate": 6.506524890705581e-06, "loss": 19.1999, "step": 231160 }, { "epoch": 0.46697802575176656, "grad_norm": 494.703125, "learning_rate": 6.50619204303396e-06, "loss": 16.9967, "step": 231170 }, { "epoch": 0.4669982263844504, "grad_norm": 162.5146942138672, "learning_rate": 6.5058591880213414e-06, "loss": 16.1802, "step": 231180 }, { "epoch": 0.4670184270171342, "grad_norm": 27.544509887695312, "learning_rate": 6.505526325669348e-06, "loss": 12.7986, "step": 231190 }, { "epoch": 0.467038627649818, "grad_norm": 119.78087615966797, "learning_rate": 6.505193455979603e-06, "loss": 33.6713, "step": 231200 }, { "epoch": 0.4670588282825018, "grad_norm": 163.53895568847656, "learning_rate": 6.504860578953727e-06, "loss": 20.8786, "step": 231210 }, { "epoch": 0.4670790289151856, "grad_norm": 414.9251708984375, "learning_rate": 6.504527694593342e-06, "loss": 18.2309, "step": 231220 }, { "epoch": 0.4670992295478694, "grad_norm": 218.24325561523438, "learning_rate": 6.504194802900072e-06, "loss": 11.4057, "step": 231230 }, { "epoch": 0.46711943018055324, "grad_norm": 243.98182678222656, "learning_rate": 6.50386190387554e-06, "loss": 7.1942, "step": 231240 }, { "epoch": 0.46713963081323706, "grad_norm": 1529.618408203125, "learning_rate": 6.503528997521365e-06, "loss": 25.5486, "step": 231250 }, { "epoch": 0.4671598314459209, "grad_norm": 311.8275146484375, "learning_rate": 6.503196083839175e-06, "loss": 24.0154, "step": 231260 }, { "epoch": 0.4671800320786047, "grad_norm": 335.69110107421875, "learning_rate": 6.502863162830589e-06, "loss": 17.9562, "step": 231270 }, { "epoch": 0.4672002327112885, "grad_norm": 275.45458984375, "learning_rate": 6.502530234497229e-06, "loss": 13.9319, "step": 231280 }, { "epoch": 0.46722043334397234, "grad_norm": 71.5420913696289, "learning_rate": 6.50219729884072e-06, "loss": 17.6779, "step": 231290 }, { "epoch": 0.46724063397665616, "grad_norm": 187.33135986328125, "learning_rate": 6.501864355862682e-06, "loss": 15.9068, "step": 231300 }, { "epoch": 0.46726083460934, "grad_norm": 216.73764038085938, "learning_rate": 6.50153140556474e-06, "loss": 17.7058, "step": 231310 }, { "epoch": 0.4672810352420238, "grad_norm": 163.68251037597656, "learning_rate": 6.5011984479485165e-06, "loss": 22.7241, "step": 231320 }, { "epoch": 0.4673012358747076, "grad_norm": 207.92857360839844, "learning_rate": 6.500865483015634e-06, "loss": 17.4214, "step": 231330 }, { "epoch": 0.4673214365073914, "grad_norm": 163.6634063720703, "learning_rate": 6.5005325107677145e-06, "loss": 20.4387, "step": 231340 }, { "epoch": 0.4673416371400752, "grad_norm": 196.6903076171875, "learning_rate": 6.500199531206381e-06, "loss": 14.139, "step": 231350 }, { "epoch": 0.467361837772759, "grad_norm": 390.139404296875, "learning_rate": 6.4998665443332575e-06, "loss": 56.0058, "step": 231360 }, { "epoch": 0.46738203840544285, "grad_norm": 295.7789611816406, "learning_rate": 6.499533550149968e-06, "loss": 18.604, "step": 231370 }, { "epoch": 0.46740223903812667, "grad_norm": 335.8055114746094, "learning_rate": 6.499200548658132e-06, "loss": 12.0463, "step": 231380 }, { "epoch": 0.4674224396708105, "grad_norm": 289.2165222167969, "learning_rate": 6.498867539859375e-06, "loss": 30.4793, "step": 231390 }, { "epoch": 0.4674426403034943, "grad_norm": 223.60891723632812, "learning_rate": 6.49853452375532e-06, "loss": 10.0338, "step": 231400 }, { "epoch": 0.4674628409361781, "grad_norm": 1140.06201171875, "learning_rate": 6.498201500347587e-06, "loss": 33.4566, "step": 231410 }, { "epoch": 0.46748304156886195, "grad_norm": 313.09222412109375, "learning_rate": 6.497868469637805e-06, "loss": 29.0201, "step": 231420 }, { "epoch": 0.46750324220154577, "grad_norm": 370.9849548339844, "learning_rate": 6.497535431627591e-06, "loss": 29.379, "step": 231430 }, { "epoch": 0.4675234428342296, "grad_norm": 232.8731231689453, "learning_rate": 6.497202386318573e-06, "loss": 26.7835, "step": 231440 }, { "epoch": 0.4675436434669134, "grad_norm": 485.19134521484375, "learning_rate": 6.49686933371237e-06, "loss": 9.589, "step": 231450 }, { "epoch": 0.4675638440995972, "grad_norm": 489.53021240234375, "learning_rate": 6.496536273810609e-06, "loss": 22.8981, "step": 231460 }, { "epoch": 0.467584044732281, "grad_norm": 267.0397033691406, "learning_rate": 6.496203206614912e-06, "loss": 18.457, "step": 231470 }, { "epoch": 0.4676042453649648, "grad_norm": 539.4376831054688, "learning_rate": 6.495870132126901e-06, "loss": 21.804, "step": 231480 }, { "epoch": 0.46762444599764863, "grad_norm": 261.42230224609375, "learning_rate": 6.495537050348201e-06, "loss": 15.1745, "step": 231490 }, { "epoch": 0.46764464663033245, "grad_norm": 290.3336181640625, "learning_rate": 6.495203961280434e-06, "loss": 16.2859, "step": 231500 }, { "epoch": 0.46766484726301627, "grad_norm": 402.15325927734375, "learning_rate": 6.494870864925225e-06, "loss": 17.7755, "step": 231510 }, { "epoch": 0.4676850478957001, "grad_norm": 366.45611572265625, "learning_rate": 6.494537761284197e-06, "loss": 15.8572, "step": 231520 }, { "epoch": 0.4677052485283839, "grad_norm": 334.94207763671875, "learning_rate": 6.494204650358973e-06, "loss": 10.0986, "step": 231530 }, { "epoch": 0.46772544916106773, "grad_norm": 199.52394104003906, "learning_rate": 6.493871532151176e-06, "loss": 25.4366, "step": 231540 }, { "epoch": 0.46774564979375155, "grad_norm": 346.4984436035156, "learning_rate": 6.493538406662429e-06, "loss": 17.8955, "step": 231550 }, { "epoch": 0.46776585042643537, "grad_norm": 0.0, "learning_rate": 6.493205273894361e-06, "loss": 14.7174, "step": 231560 }, { "epoch": 0.4677860510591192, "grad_norm": 32.30203628540039, "learning_rate": 6.492872133848588e-06, "loss": 16.3432, "step": 231570 }, { "epoch": 0.467806251691803, "grad_norm": 178.59661865234375, "learning_rate": 6.49253898652674e-06, "loss": 20.7835, "step": 231580 }, { "epoch": 0.46782645232448683, "grad_norm": 460.5017395019531, "learning_rate": 6.492205831930435e-06, "loss": 18.547, "step": 231590 }, { "epoch": 0.4678466529571706, "grad_norm": 197.56094360351562, "learning_rate": 6.491872670061302e-06, "loss": 28.1465, "step": 231600 }, { "epoch": 0.4678668535898544, "grad_norm": 79.94232177734375, "learning_rate": 6.491539500920962e-06, "loss": 19.8416, "step": 231610 }, { "epoch": 0.46788705422253823, "grad_norm": 124.6836166381836, "learning_rate": 6.491206324511039e-06, "loss": 8.7927, "step": 231620 }, { "epoch": 0.46790725485522205, "grad_norm": 0.0, "learning_rate": 6.490873140833158e-06, "loss": 25.2921, "step": 231630 }, { "epoch": 0.4679274554879059, "grad_norm": 256.8302307128906, "learning_rate": 6.490539949888942e-06, "loss": 27.3825, "step": 231640 }, { "epoch": 0.4679476561205897, "grad_norm": 201.8746337890625, "learning_rate": 6.490206751680015e-06, "loss": 20.3824, "step": 231650 }, { "epoch": 0.4679678567532735, "grad_norm": 491.857421875, "learning_rate": 6.489873546208e-06, "loss": 23.3467, "step": 231660 }, { "epoch": 0.46798805738595733, "grad_norm": 783.8392944335938, "learning_rate": 6.489540333474522e-06, "loss": 31.6718, "step": 231670 }, { "epoch": 0.46800825801864115, "grad_norm": 359.529296875, "learning_rate": 6.4892071134812065e-06, "loss": 20.353, "step": 231680 }, { "epoch": 0.468028458651325, "grad_norm": 54.70817184448242, "learning_rate": 6.4888738862296765e-06, "loss": 13.5455, "step": 231690 }, { "epoch": 0.4680486592840088, "grad_norm": 7.688113212585449, "learning_rate": 6.4885406517215535e-06, "loss": 15.8294, "step": 231700 }, { "epoch": 0.4680688599166926, "grad_norm": 161.0897979736328, "learning_rate": 6.488207409958466e-06, "loss": 17.3581, "step": 231710 }, { "epoch": 0.4680890605493764, "grad_norm": 53.752140045166016, "learning_rate": 6.487874160942035e-06, "loss": 28.3807, "step": 231720 }, { "epoch": 0.4681092611820602, "grad_norm": 240.3619842529297, "learning_rate": 6.487540904673886e-06, "loss": 16.8003, "step": 231730 }, { "epoch": 0.468129461814744, "grad_norm": 228.44674682617188, "learning_rate": 6.4872076411556436e-06, "loss": 13.2251, "step": 231740 }, { "epoch": 0.46814966244742784, "grad_norm": 61.76347351074219, "learning_rate": 6.48687437038893e-06, "loss": 14.7659, "step": 231750 }, { "epoch": 0.46816986308011166, "grad_norm": 529.3142700195312, "learning_rate": 6.486541092375374e-06, "loss": 12.0001, "step": 231760 }, { "epoch": 0.4681900637127955, "grad_norm": 2.3512043952941895, "learning_rate": 6.486207807116593e-06, "loss": 19.4135, "step": 231770 }, { "epoch": 0.4682102643454793, "grad_norm": 879.0709838867188, "learning_rate": 6.485874514614219e-06, "loss": 25.2563, "step": 231780 }, { "epoch": 0.4682304649781631, "grad_norm": 90.51962280273438, "learning_rate": 6.4855412148698704e-06, "loss": 9.3471, "step": 231790 }, { "epoch": 0.46825066561084694, "grad_norm": 607.75732421875, "learning_rate": 6.485207907885175e-06, "loss": 28.4367, "step": 231800 }, { "epoch": 0.46827086624353076, "grad_norm": 245.41409301757812, "learning_rate": 6.484874593661757e-06, "loss": 20.4242, "step": 231810 }, { "epoch": 0.4682910668762146, "grad_norm": 109.6788101196289, "learning_rate": 6.484541272201239e-06, "loss": 26.7687, "step": 231820 }, { "epoch": 0.4683112675088984, "grad_norm": 308.90606689453125, "learning_rate": 6.484207943505249e-06, "loss": 18.7888, "step": 231830 }, { "epoch": 0.4683314681415822, "grad_norm": 318.3598327636719, "learning_rate": 6.483874607575407e-06, "loss": 18.1276, "step": 231840 }, { "epoch": 0.468351668774266, "grad_norm": 378.33038330078125, "learning_rate": 6.483541264413342e-06, "loss": 19.3503, "step": 231850 }, { "epoch": 0.4683718694069498, "grad_norm": 183.73098754882812, "learning_rate": 6.483207914020675e-06, "loss": 20.4318, "step": 231860 }, { "epoch": 0.4683920700396336, "grad_norm": 225.0318603515625, "learning_rate": 6.4828745563990345e-06, "loss": 25.9403, "step": 231870 }, { "epoch": 0.46841227067231744, "grad_norm": 269.74688720703125, "learning_rate": 6.482541191550042e-06, "loss": 27.3955, "step": 231880 }, { "epoch": 0.46843247130500126, "grad_norm": 151.0627899169922, "learning_rate": 6.482207819475324e-06, "loss": 11.7558, "step": 231890 }, { "epoch": 0.4684526719376851, "grad_norm": 233.68695068359375, "learning_rate": 6.481874440176506e-06, "loss": 11.4674, "step": 231900 }, { "epoch": 0.4684728725703689, "grad_norm": 190.1412353515625, "learning_rate": 6.481541053655209e-06, "loss": 33.7625, "step": 231910 }, { "epoch": 0.4684930732030527, "grad_norm": 627.3602905273438, "learning_rate": 6.481207659913062e-06, "loss": 25.7014, "step": 231920 }, { "epoch": 0.46851327383573654, "grad_norm": 241.83543395996094, "learning_rate": 6.4808742589516895e-06, "loss": 33.2503, "step": 231930 }, { "epoch": 0.46853347446842036, "grad_norm": 334.98663330078125, "learning_rate": 6.480540850772714e-06, "loss": 21.2077, "step": 231940 }, { "epoch": 0.4685536751011042, "grad_norm": 511.91937255859375, "learning_rate": 6.4802074353777625e-06, "loss": 12.9663, "step": 231950 }, { "epoch": 0.468573875733788, "grad_norm": 66.21238708496094, "learning_rate": 6.479874012768459e-06, "loss": 22.3053, "step": 231960 }, { "epoch": 0.4685940763664718, "grad_norm": 298.9091491699219, "learning_rate": 6.479540582946431e-06, "loss": 26.4403, "step": 231970 }, { "epoch": 0.4686142769991556, "grad_norm": 203.15594482421875, "learning_rate": 6.4792071459133e-06, "loss": 15.0163, "step": 231980 }, { "epoch": 0.4686344776318394, "grad_norm": 495.4629821777344, "learning_rate": 6.478873701670693e-06, "loss": 34.5208, "step": 231990 }, { "epoch": 0.46865467826452323, "grad_norm": 30.715930938720703, "learning_rate": 6.4785402502202345e-06, "loss": 20.0813, "step": 232000 }, { "epoch": 0.46867487889720705, "grad_norm": 319.82568359375, "learning_rate": 6.47820679156355e-06, "loss": 20.2321, "step": 232010 }, { "epoch": 0.46869507952989087, "grad_norm": 460.34625244140625, "learning_rate": 6.477873325702265e-06, "loss": 12.8652, "step": 232020 }, { "epoch": 0.4687152801625747, "grad_norm": 516.3229370117188, "learning_rate": 6.477539852638006e-06, "loss": 10.61, "step": 232030 }, { "epoch": 0.4687354807952585, "grad_norm": 203.33963012695312, "learning_rate": 6.477206372372396e-06, "loss": 11.4246, "step": 232040 }, { "epoch": 0.46875568142794233, "grad_norm": 817.948974609375, "learning_rate": 6.476872884907061e-06, "loss": 20.8217, "step": 232050 }, { "epoch": 0.46877588206062615, "grad_norm": 516.287353515625, "learning_rate": 6.476539390243627e-06, "loss": 42.7068, "step": 232060 }, { "epoch": 0.46879608269330997, "grad_norm": 299.7341613769531, "learning_rate": 6.47620588838372e-06, "loss": 14.0853, "step": 232070 }, { "epoch": 0.4688162833259938, "grad_norm": 101.63556671142578, "learning_rate": 6.475872379328964e-06, "loss": 22.2704, "step": 232080 }, { "epoch": 0.4688364839586776, "grad_norm": 49.69912338256836, "learning_rate": 6.475538863080985e-06, "loss": 33.3604, "step": 232090 }, { "epoch": 0.46885668459136143, "grad_norm": 106.36318969726562, "learning_rate": 6.4752053396414075e-06, "loss": 11.8562, "step": 232100 }, { "epoch": 0.4688768852240452, "grad_norm": 321.5381164550781, "learning_rate": 6.474871809011858e-06, "loss": 20.5615, "step": 232110 }, { "epoch": 0.468897085856729, "grad_norm": 208.30982971191406, "learning_rate": 6.474538271193963e-06, "loss": 24.761, "step": 232120 }, { "epoch": 0.46891728648941283, "grad_norm": 258.8935546875, "learning_rate": 6.4742047261893485e-06, "loss": 11.3584, "step": 232130 }, { "epoch": 0.46893748712209665, "grad_norm": 138.50404357910156, "learning_rate": 6.473871173999637e-06, "loss": 19.2843, "step": 232140 }, { "epoch": 0.4689576877547805, "grad_norm": 58.71372985839844, "learning_rate": 6.473537614626457e-06, "loss": 11.397, "step": 232150 }, { "epoch": 0.4689778883874643, "grad_norm": 8.039374351501465, "learning_rate": 6.473204048071433e-06, "loss": 23.7232, "step": 232160 }, { "epoch": 0.4689980890201481, "grad_norm": 148.84335327148438, "learning_rate": 6.472870474336192e-06, "loss": 19.0992, "step": 232170 }, { "epoch": 0.46901828965283193, "grad_norm": 438.9931945800781, "learning_rate": 6.472536893422359e-06, "loss": 29.1272, "step": 232180 }, { "epoch": 0.46903849028551575, "grad_norm": 30.119171142578125, "learning_rate": 6.472203305331559e-06, "loss": 9.7105, "step": 232190 }, { "epoch": 0.4690586909181996, "grad_norm": 274.2230224609375, "learning_rate": 6.471869710065418e-06, "loss": 20.0438, "step": 232200 }, { "epoch": 0.4690788915508834, "grad_norm": 278.66461181640625, "learning_rate": 6.471536107625565e-06, "loss": 24.8579, "step": 232210 }, { "epoch": 0.4690990921835672, "grad_norm": 434.7815246582031, "learning_rate": 6.4712024980136215e-06, "loss": 37.3357, "step": 232220 }, { "epoch": 0.46911929281625103, "grad_norm": 132.94955444335938, "learning_rate": 6.470868881231215e-06, "loss": 13.5838, "step": 232230 }, { "epoch": 0.4691394934489348, "grad_norm": 312.9062805175781, "learning_rate": 6.470535257279974e-06, "loss": 26.832, "step": 232240 }, { "epoch": 0.4691596940816186, "grad_norm": 77.25293731689453, "learning_rate": 6.47020162616152e-06, "loss": 16.7692, "step": 232250 }, { "epoch": 0.46917989471430244, "grad_norm": 280.4794616699219, "learning_rate": 6.469867987877484e-06, "loss": 19.135, "step": 232260 }, { "epoch": 0.46920009534698626, "grad_norm": 265.2571105957031, "learning_rate": 6.469534342429489e-06, "loss": 19.6037, "step": 232270 }, { "epoch": 0.4692202959796701, "grad_norm": 281.8870544433594, "learning_rate": 6.469200689819161e-06, "loss": 14.5735, "step": 232280 }, { "epoch": 0.4692404966123539, "grad_norm": 139.43447875976562, "learning_rate": 6.468867030048127e-06, "loss": 18.2882, "step": 232290 }, { "epoch": 0.4692606972450377, "grad_norm": 507.5799255371094, "learning_rate": 6.4685333631180145e-06, "loss": 30.7714, "step": 232300 }, { "epoch": 0.46928089787772154, "grad_norm": 402.88134765625, "learning_rate": 6.468199689030448e-06, "loss": 20.5358, "step": 232310 }, { "epoch": 0.46930109851040536, "grad_norm": 197.3392333984375, "learning_rate": 6.467866007787053e-06, "loss": 24.4778, "step": 232320 }, { "epoch": 0.4693212991430892, "grad_norm": 128.0878143310547, "learning_rate": 6.467532319389458e-06, "loss": 25.5539, "step": 232330 }, { "epoch": 0.469341499775773, "grad_norm": 394.4768371582031, "learning_rate": 6.467198623839288e-06, "loss": 16.4874, "step": 232340 }, { "epoch": 0.4693617004084568, "grad_norm": 378.4883117675781, "learning_rate": 6.466864921138171e-06, "loss": 27.0374, "step": 232350 }, { "epoch": 0.4693819010411406, "grad_norm": 190.90122985839844, "learning_rate": 6.4665312112877325e-06, "loss": 14.1923, "step": 232360 }, { "epoch": 0.4694021016738244, "grad_norm": 104.33226776123047, "learning_rate": 6.466197494289598e-06, "loss": 10.9908, "step": 232370 }, { "epoch": 0.4694223023065082, "grad_norm": 497.0327453613281, "learning_rate": 6.465863770145394e-06, "loss": 23.3687, "step": 232380 }, { "epoch": 0.46944250293919204, "grad_norm": 86.55400085449219, "learning_rate": 6.4655300388567485e-06, "loss": 14.5338, "step": 232390 }, { "epoch": 0.46946270357187586, "grad_norm": 588.1866455078125, "learning_rate": 6.465196300425287e-06, "loss": 28.5935, "step": 232400 }, { "epoch": 0.4694829042045597, "grad_norm": 521.6524658203125, "learning_rate": 6.464862554852638e-06, "loss": 30.0115, "step": 232410 }, { "epoch": 0.4695031048372435, "grad_norm": 422.8952331542969, "learning_rate": 6.464528802140425e-06, "loss": 17.1896, "step": 232420 }, { "epoch": 0.4695233054699273, "grad_norm": 308.5865478515625, "learning_rate": 6.464195042290277e-06, "loss": 17.1574, "step": 232430 }, { "epoch": 0.46954350610261114, "grad_norm": 101.93814086914062, "learning_rate": 6.463861275303819e-06, "loss": 14.3597, "step": 232440 }, { "epoch": 0.46956370673529496, "grad_norm": 1502.4388427734375, "learning_rate": 6.463527501182679e-06, "loss": 30.2708, "step": 232450 }, { "epoch": 0.4695839073679788, "grad_norm": 380.3918762207031, "learning_rate": 6.4631937199284845e-06, "loss": 11.8367, "step": 232460 }, { "epoch": 0.4696041080006626, "grad_norm": 0.9162931442260742, "learning_rate": 6.46285993154286e-06, "loss": 10.1734, "step": 232470 }, { "epoch": 0.4696243086333464, "grad_norm": 272.07745361328125, "learning_rate": 6.462526136027435e-06, "loss": 14.8943, "step": 232480 }, { "epoch": 0.4696445092660302, "grad_norm": 87.03410339355469, "learning_rate": 6.462192333383834e-06, "loss": 13.3373, "step": 232490 }, { "epoch": 0.469664709898714, "grad_norm": 306.4310302734375, "learning_rate": 6.461858523613684e-06, "loss": 18.3188, "step": 232500 }, { "epoch": 0.4696849105313978, "grad_norm": 81.42255401611328, "learning_rate": 6.461524706718615e-06, "loss": 13.0086, "step": 232510 }, { "epoch": 0.46970511116408165, "grad_norm": 9.509613037109375, "learning_rate": 6.4611908827002504e-06, "loss": 25.4998, "step": 232520 }, { "epoch": 0.46972531179676547, "grad_norm": 171.20574951171875, "learning_rate": 6.460857051560219e-06, "loss": 14.2148, "step": 232530 }, { "epoch": 0.4697455124294493, "grad_norm": 216.23812866210938, "learning_rate": 6.4605232133001474e-06, "loss": 16.1421, "step": 232540 }, { "epoch": 0.4697657130621331, "grad_norm": 263.3742980957031, "learning_rate": 6.460189367921663e-06, "loss": 16.7177, "step": 232550 }, { "epoch": 0.4697859136948169, "grad_norm": 181.4710235595703, "learning_rate": 6.459855515426392e-06, "loss": 28.0333, "step": 232560 }, { "epoch": 0.46980611432750075, "grad_norm": 0.0, "learning_rate": 6.459521655815962e-06, "loss": 17.1031, "step": 232570 }, { "epoch": 0.46982631496018457, "grad_norm": 133.1177520751953, "learning_rate": 6.459187789092003e-06, "loss": 17.8965, "step": 232580 }, { "epoch": 0.4698465155928684, "grad_norm": 564.846435546875, "learning_rate": 6.4588539152561384e-06, "loss": 27.002, "step": 232590 }, { "epoch": 0.4698667162255522, "grad_norm": 241.921142578125, "learning_rate": 6.458520034309995e-06, "loss": 43.2538, "step": 232600 }, { "epoch": 0.469886916858236, "grad_norm": 524.8583374023438, "learning_rate": 6.458186146255203e-06, "loss": 33.3565, "step": 232610 }, { "epoch": 0.4699071174909198, "grad_norm": 358.4457702636719, "learning_rate": 6.457852251093391e-06, "loss": 11.6178, "step": 232620 }, { "epoch": 0.4699273181236036, "grad_norm": 462.76861572265625, "learning_rate": 6.4575183488261795e-06, "loss": 36.2307, "step": 232630 }, { "epoch": 0.46994751875628743, "grad_norm": 196.3909912109375, "learning_rate": 6.457184439455204e-06, "loss": 17.4604, "step": 232640 }, { "epoch": 0.46996771938897125, "grad_norm": 363.1448974609375, "learning_rate": 6.456850522982086e-06, "loss": 15.0005, "step": 232650 }, { "epoch": 0.46998792002165507, "grad_norm": 371.06903076171875, "learning_rate": 6.456516599408457e-06, "loss": 18.1092, "step": 232660 }, { "epoch": 0.4700081206543389, "grad_norm": 13.545893669128418, "learning_rate": 6.456182668735941e-06, "loss": 20.0977, "step": 232670 }, { "epoch": 0.4700283212870227, "grad_norm": 122.81166076660156, "learning_rate": 6.455848730966168e-06, "loss": 31.1159, "step": 232680 }, { "epoch": 0.47004852191970653, "grad_norm": 344.5368347167969, "learning_rate": 6.455514786100766e-06, "loss": 13.1486, "step": 232690 }, { "epoch": 0.47006872255239035, "grad_norm": 828.336181640625, "learning_rate": 6.455180834141359e-06, "loss": 20.0466, "step": 232700 }, { "epoch": 0.47008892318507417, "grad_norm": 345.0340881347656, "learning_rate": 6.45484687508958e-06, "loss": 16.435, "step": 232710 }, { "epoch": 0.470109123817758, "grad_norm": 261.5340270996094, "learning_rate": 6.454512908947052e-06, "loss": 9.944, "step": 232720 }, { "epoch": 0.4701293244504418, "grad_norm": 193.43399047851562, "learning_rate": 6.454178935715405e-06, "loss": 15.1829, "step": 232730 }, { "epoch": 0.47014952508312563, "grad_norm": 310.5386657714844, "learning_rate": 6.453844955396265e-06, "loss": 12.8029, "step": 232740 }, { "epoch": 0.4701697257158094, "grad_norm": 110.00629425048828, "learning_rate": 6.453510967991263e-06, "loss": 21.1641, "step": 232750 }, { "epoch": 0.4701899263484932, "grad_norm": 352.7306823730469, "learning_rate": 6.453176973502024e-06, "loss": 30.9939, "step": 232760 }, { "epoch": 0.47021012698117703, "grad_norm": 435.7456970214844, "learning_rate": 6.4528429719301764e-06, "loss": 16.4996, "step": 232770 }, { "epoch": 0.47023032761386085, "grad_norm": 495.9881591796875, "learning_rate": 6.452508963277348e-06, "loss": 15.3967, "step": 232780 }, { "epoch": 0.4702505282465447, "grad_norm": 62.631587982177734, "learning_rate": 6.452174947545169e-06, "loss": 15.3167, "step": 232790 }, { "epoch": 0.4702707288792285, "grad_norm": 63.94768524169922, "learning_rate": 6.451840924735264e-06, "loss": 27.6729, "step": 232800 }, { "epoch": 0.4702909295119123, "grad_norm": 10.005905151367188, "learning_rate": 6.451506894849262e-06, "loss": 8.752, "step": 232810 }, { "epoch": 0.47031113014459613, "grad_norm": 70.2020263671875, "learning_rate": 6.451172857888792e-06, "loss": 16.6736, "step": 232820 }, { "epoch": 0.47033133077727995, "grad_norm": 0.0, "learning_rate": 6.450838813855482e-06, "loss": 16.6979, "step": 232830 }, { "epoch": 0.4703515314099638, "grad_norm": 201.6558074951172, "learning_rate": 6.450504762750959e-06, "loss": 25.8618, "step": 232840 }, { "epoch": 0.4703717320426476, "grad_norm": 271.12908935546875, "learning_rate": 6.4501707045768524e-06, "loss": 39.0924, "step": 232850 }, { "epoch": 0.4703919326753314, "grad_norm": 234.72320556640625, "learning_rate": 6.449836639334788e-06, "loss": 20.7432, "step": 232860 }, { "epoch": 0.47041213330801523, "grad_norm": 409.29156494140625, "learning_rate": 6.449502567026398e-06, "loss": 23.1249, "step": 232870 }, { "epoch": 0.470432333940699, "grad_norm": 54.60081481933594, "learning_rate": 6.449168487653305e-06, "loss": 19.7971, "step": 232880 }, { "epoch": 0.4704525345733828, "grad_norm": 212.0153045654297, "learning_rate": 6.448834401217143e-06, "loss": 11.9834, "step": 232890 }, { "epoch": 0.47047273520606664, "grad_norm": 527.7559814453125, "learning_rate": 6.448500307719537e-06, "loss": 20.0488, "step": 232900 }, { "epoch": 0.47049293583875046, "grad_norm": 207.31300354003906, "learning_rate": 6.448166207162119e-06, "loss": 14.6916, "step": 232910 }, { "epoch": 0.4705131364714343, "grad_norm": 331.0078125, "learning_rate": 6.447832099546512e-06, "loss": 14.4875, "step": 232920 }, { "epoch": 0.4705333371041181, "grad_norm": 317.8180847167969, "learning_rate": 6.4474979848743455e-06, "loss": 16.9049, "step": 232930 }, { "epoch": 0.4705535377368019, "grad_norm": 343.4740905761719, "learning_rate": 6.447163863147251e-06, "loss": 26.6887, "step": 232940 }, { "epoch": 0.47057373836948574, "grad_norm": 206.89491271972656, "learning_rate": 6.446829734366855e-06, "loss": 22.3566, "step": 232950 }, { "epoch": 0.47059393900216956, "grad_norm": 222.6850128173828, "learning_rate": 6.446495598534787e-06, "loss": 25.789, "step": 232960 }, { "epoch": 0.4706141396348534, "grad_norm": 532.7865600585938, "learning_rate": 6.446161455652674e-06, "loss": 18.8526, "step": 232970 }, { "epoch": 0.4706343402675372, "grad_norm": 283.31103515625, "learning_rate": 6.445827305722148e-06, "loss": 7.0304, "step": 232980 }, { "epoch": 0.470654540900221, "grad_norm": 280.9999694824219, "learning_rate": 6.445493148744832e-06, "loss": 17.7744, "step": 232990 }, { "epoch": 0.4706747415329048, "grad_norm": 99.53065490722656, "learning_rate": 6.445158984722358e-06, "loss": 15.4126, "step": 233000 }, { "epoch": 0.4706949421655886, "grad_norm": 423.7823486328125, "learning_rate": 6.444824813656356e-06, "loss": 24.8275, "step": 233010 }, { "epoch": 0.4707151427982724, "grad_norm": 351.2669372558594, "learning_rate": 6.444490635548451e-06, "loss": 17.4738, "step": 233020 }, { "epoch": 0.47073534343095624, "grad_norm": 569.9388427734375, "learning_rate": 6.444156450400276e-06, "loss": 12.149, "step": 233030 }, { "epoch": 0.47075554406364006, "grad_norm": 316.74322509765625, "learning_rate": 6.443822258213457e-06, "loss": 18.8063, "step": 233040 }, { "epoch": 0.4707757446963239, "grad_norm": 116.57533264160156, "learning_rate": 6.443488058989623e-06, "loss": 14.7191, "step": 233050 }, { "epoch": 0.4707959453290077, "grad_norm": 197.58639526367188, "learning_rate": 6.443153852730404e-06, "loss": 21.6546, "step": 233060 }, { "epoch": 0.4708161459616915, "grad_norm": 188.0845947265625, "learning_rate": 6.4428196394374275e-06, "loss": 8.5837, "step": 233070 }, { "epoch": 0.47083634659437534, "grad_norm": 19.88582992553711, "learning_rate": 6.442485419112322e-06, "loss": 23.3568, "step": 233080 }, { "epoch": 0.47085654722705916, "grad_norm": 282.4696960449219, "learning_rate": 6.44215119175672e-06, "loss": 31.951, "step": 233090 }, { "epoch": 0.470876747859743, "grad_norm": 106.4129409790039, "learning_rate": 6.441816957372247e-06, "loss": 20.5073, "step": 233100 }, { "epoch": 0.4708969484924268, "grad_norm": 205.73471069335938, "learning_rate": 6.441482715960532e-06, "loss": 13.5262, "step": 233110 }, { "epoch": 0.4709171491251106, "grad_norm": 405.51715087890625, "learning_rate": 6.441148467523206e-06, "loss": 24.2442, "step": 233120 }, { "epoch": 0.4709373497577944, "grad_norm": 33.54397201538086, "learning_rate": 6.440814212061897e-06, "loss": 59.6108, "step": 233130 }, { "epoch": 0.4709575503904782, "grad_norm": 267.5323181152344, "learning_rate": 6.440479949578234e-06, "loss": 17.0504, "step": 233140 }, { "epoch": 0.47097775102316203, "grad_norm": 306.4904479980469, "learning_rate": 6.440145680073847e-06, "loss": 14.0819, "step": 233150 }, { "epoch": 0.47099795165584585, "grad_norm": 191.40231323242188, "learning_rate": 6.4398114035503644e-06, "loss": 13.9212, "step": 233160 }, { "epoch": 0.47101815228852967, "grad_norm": 525.0487670898438, "learning_rate": 6.4394771200094156e-06, "loss": 20.1043, "step": 233170 }, { "epoch": 0.4710383529212135, "grad_norm": 245.93910217285156, "learning_rate": 6.439142829452629e-06, "loss": 18.6184, "step": 233180 }, { "epoch": 0.4710585535538973, "grad_norm": 202.55079650878906, "learning_rate": 6.438808531881637e-06, "loss": 26.6751, "step": 233190 }, { "epoch": 0.47107875418658113, "grad_norm": 230.95042419433594, "learning_rate": 6.438474227298065e-06, "loss": 15.7802, "step": 233200 }, { "epoch": 0.47109895481926495, "grad_norm": 310.6934814453125, "learning_rate": 6.438139915703544e-06, "loss": 20.046, "step": 233210 }, { "epoch": 0.47111915545194877, "grad_norm": 237.0054168701172, "learning_rate": 6.437805597099704e-06, "loss": 43.1146, "step": 233220 }, { "epoch": 0.4711393560846326, "grad_norm": 268.99847412109375, "learning_rate": 6.437471271488174e-06, "loss": 9.9996, "step": 233230 }, { "epoch": 0.4711595567173164, "grad_norm": 66.05647277832031, "learning_rate": 6.437136938870583e-06, "loss": 17.8076, "step": 233240 }, { "epoch": 0.47117975735000023, "grad_norm": 443.460693359375, "learning_rate": 6.4368025992485615e-06, "loss": 18.9197, "step": 233250 }, { "epoch": 0.471199957982684, "grad_norm": 0.0, "learning_rate": 6.436468252623738e-06, "loss": 7.3949, "step": 233260 }, { "epoch": 0.4712201586153678, "grad_norm": 635.0155029296875, "learning_rate": 6.436133898997742e-06, "loss": 34.0142, "step": 233270 }, { "epoch": 0.47124035924805163, "grad_norm": 315.0978698730469, "learning_rate": 6.4357995383722025e-06, "loss": 12.0306, "step": 233280 }, { "epoch": 0.47126055988073545, "grad_norm": 275.10089111328125, "learning_rate": 6.435465170748753e-06, "loss": 10.6176, "step": 233290 }, { "epoch": 0.4712807605134193, "grad_norm": 134.50225830078125, "learning_rate": 6.435130796129019e-06, "loss": 12.4452, "step": 233300 }, { "epoch": 0.4713009611461031, "grad_norm": 198.2256317138672, "learning_rate": 6.43479641451463e-06, "loss": 23.5337, "step": 233310 }, { "epoch": 0.4713211617787869, "grad_norm": 555.7753295898438, "learning_rate": 6.43446202590722e-06, "loss": 16.4897, "step": 233320 }, { "epoch": 0.47134136241147073, "grad_norm": 678.7932739257812, "learning_rate": 6.434127630308415e-06, "loss": 32.3314, "step": 233330 }, { "epoch": 0.47136156304415455, "grad_norm": 312.53338623046875, "learning_rate": 6.433793227719845e-06, "loss": 24.0308, "step": 233340 }, { "epoch": 0.4713817636768384, "grad_norm": 384.4660949707031, "learning_rate": 6.4334588181431424e-06, "loss": 20.8908, "step": 233350 }, { "epoch": 0.4714019643095222, "grad_norm": 413.91961669921875, "learning_rate": 6.433124401579936e-06, "loss": 11.3365, "step": 233360 }, { "epoch": 0.471422164942206, "grad_norm": 215.5688018798828, "learning_rate": 6.432789978031852e-06, "loss": 14.1623, "step": 233370 }, { "epoch": 0.47144236557488983, "grad_norm": 257.6873474121094, "learning_rate": 6.432455547500525e-06, "loss": 13.1157, "step": 233380 }, { "epoch": 0.4714625662075736, "grad_norm": 121.64051055908203, "learning_rate": 6.432121109987584e-06, "loss": 39.882, "step": 233390 }, { "epoch": 0.4714827668402574, "grad_norm": 354.84857177734375, "learning_rate": 6.431786665494657e-06, "loss": 12.3161, "step": 233400 }, { "epoch": 0.47150296747294124, "grad_norm": 0.0, "learning_rate": 6.431452214023377e-06, "loss": 11.9814, "step": 233410 }, { "epoch": 0.47152316810562506, "grad_norm": 31.00705337524414, "learning_rate": 6.431117755575371e-06, "loss": 15.9407, "step": 233420 }, { "epoch": 0.4715433687383089, "grad_norm": 208.39344787597656, "learning_rate": 6.430783290152272e-06, "loss": 22.1216, "step": 233430 }, { "epoch": 0.4715635693709927, "grad_norm": 365.3531494140625, "learning_rate": 6.430448817755708e-06, "loss": 14.0217, "step": 233440 }, { "epoch": 0.4715837700036765, "grad_norm": 103.14920043945312, "learning_rate": 6.43011433838731e-06, "loss": 8.8408, "step": 233450 }, { "epoch": 0.47160397063636034, "grad_norm": 321.92828369140625, "learning_rate": 6.429779852048709e-06, "loss": 15.0182, "step": 233460 }, { "epoch": 0.47162417126904416, "grad_norm": 179.32608032226562, "learning_rate": 6.429445358741533e-06, "loss": 17.683, "step": 233470 }, { "epoch": 0.471644371901728, "grad_norm": 0.45440244674682617, "learning_rate": 6.429110858467414e-06, "loss": 10.7271, "step": 233480 }, { "epoch": 0.4716645725344118, "grad_norm": 271.6562194824219, "learning_rate": 6.428776351227984e-06, "loss": 24.5991, "step": 233490 }, { "epoch": 0.4716847731670956, "grad_norm": 0.0, "learning_rate": 6.428441837024868e-06, "loss": 15.4384, "step": 233500 }, { "epoch": 0.47170497379977944, "grad_norm": 218.00396728515625, "learning_rate": 6.428107315859702e-06, "loss": 21.7423, "step": 233510 }, { "epoch": 0.4717251744324632, "grad_norm": 136.46002197265625, "learning_rate": 6.427772787734114e-06, "loss": 23.3291, "step": 233520 }, { "epoch": 0.471745375065147, "grad_norm": 209.2935028076172, "learning_rate": 6.4274382526497335e-06, "loss": 17.9011, "step": 233530 }, { "epoch": 0.47176557569783084, "grad_norm": 343.1158142089844, "learning_rate": 6.427103710608193e-06, "loss": 15.8593, "step": 233540 }, { "epoch": 0.47178577633051466, "grad_norm": 477.4010314941406, "learning_rate": 6.426769161611122e-06, "loss": 18.1705, "step": 233550 }, { "epoch": 0.4718059769631985, "grad_norm": 43.846797943115234, "learning_rate": 6.426434605660151e-06, "loss": 16.8933, "step": 233560 }, { "epoch": 0.4718261775958823, "grad_norm": 179.85980224609375, "learning_rate": 6.426100042756912e-06, "loss": 19.9745, "step": 233570 }, { "epoch": 0.4718463782285661, "grad_norm": 129.3119354248047, "learning_rate": 6.425765472903032e-06, "loss": 23.8931, "step": 233580 }, { "epoch": 0.47186657886124994, "grad_norm": 495.0324401855469, "learning_rate": 6.425430896100145e-06, "loss": 21.6816, "step": 233590 }, { "epoch": 0.47188677949393376, "grad_norm": 326.224609375, "learning_rate": 6.425096312349881e-06, "loss": 14.6968, "step": 233600 }, { "epoch": 0.4719069801266176, "grad_norm": 564.86181640625, "learning_rate": 6.424761721653871e-06, "loss": 19.8048, "step": 233610 }, { "epoch": 0.4719271807593014, "grad_norm": 428.2468566894531, "learning_rate": 6.4244271240137435e-06, "loss": 19.898, "step": 233620 }, { "epoch": 0.4719473813919852, "grad_norm": 622.1227416992188, "learning_rate": 6.424092519431132e-06, "loss": 28.0535, "step": 233630 }, { "epoch": 0.471967582024669, "grad_norm": 240.3117218017578, "learning_rate": 6.423757907907667e-06, "loss": 11.8669, "step": 233640 }, { "epoch": 0.4719877826573528, "grad_norm": 459.6439514160156, "learning_rate": 6.423423289444978e-06, "loss": 20.1466, "step": 233650 }, { "epoch": 0.4720079832900366, "grad_norm": 820.3690185546875, "learning_rate": 6.423088664044696e-06, "loss": 26.2377, "step": 233660 }, { "epoch": 0.47202818392272045, "grad_norm": 0.0, "learning_rate": 6.422754031708453e-06, "loss": 15.7828, "step": 233670 }, { "epoch": 0.47204838455540427, "grad_norm": 166.76437377929688, "learning_rate": 6.422419392437879e-06, "loss": 13.48, "step": 233680 }, { "epoch": 0.4720685851880881, "grad_norm": 14.039334297180176, "learning_rate": 6.422084746234605e-06, "loss": 15.2625, "step": 233690 }, { "epoch": 0.4720887858207719, "grad_norm": 80.41558074951172, "learning_rate": 6.421750093100264e-06, "loss": 12.9797, "step": 233700 }, { "epoch": 0.4721089864534557, "grad_norm": 227.63873291015625, "learning_rate": 6.421415433036484e-06, "loss": 26.2145, "step": 233710 }, { "epoch": 0.47212918708613955, "grad_norm": 298.2243957519531, "learning_rate": 6.421080766044898e-06, "loss": 28.7359, "step": 233720 }, { "epoch": 0.47214938771882337, "grad_norm": 410.8390197753906, "learning_rate": 6.420746092127138e-06, "loss": 28.5321, "step": 233730 }, { "epoch": 0.4721695883515072, "grad_norm": 321.17413330078125, "learning_rate": 6.420411411284831e-06, "loss": 26.1646, "step": 233740 }, { "epoch": 0.472189788984191, "grad_norm": 207.69944763183594, "learning_rate": 6.420076723519615e-06, "loss": 13.2815, "step": 233750 }, { "epoch": 0.4722099896168748, "grad_norm": 97.4810562133789, "learning_rate": 6.419742028833114e-06, "loss": 16.5357, "step": 233760 }, { "epoch": 0.4722301902495586, "grad_norm": 0.0, "learning_rate": 6.419407327226963e-06, "loss": 21.7141, "step": 233770 }, { "epoch": 0.4722503908822424, "grad_norm": 272.4669189453125, "learning_rate": 6.419072618702794e-06, "loss": 22.4282, "step": 233780 }, { "epoch": 0.47227059151492623, "grad_norm": 595.9771118164062, "learning_rate": 6.4187379032622355e-06, "loss": 28.3181, "step": 233790 }, { "epoch": 0.47229079214761005, "grad_norm": 335.84619140625, "learning_rate": 6.418403180906923e-06, "loss": 28.1524, "step": 233800 }, { "epoch": 0.47231099278029387, "grad_norm": 63.154170989990234, "learning_rate": 6.418068451638484e-06, "loss": 33.2298, "step": 233810 }, { "epoch": 0.4723311934129777, "grad_norm": 280.00250244140625, "learning_rate": 6.4177337154585514e-06, "loss": 9.8949, "step": 233820 }, { "epoch": 0.4723513940456615, "grad_norm": 262.990966796875, "learning_rate": 6.417398972368756e-06, "loss": 24.363, "step": 233830 }, { "epoch": 0.47237159467834533, "grad_norm": 248.55299377441406, "learning_rate": 6.41706422237073e-06, "loss": 18.3962, "step": 233840 }, { "epoch": 0.47239179531102915, "grad_norm": 31.376943588256836, "learning_rate": 6.416729465466106e-06, "loss": 46.3407, "step": 233850 }, { "epoch": 0.47241199594371297, "grad_norm": 343.4852294921875, "learning_rate": 6.416394701656514e-06, "loss": 17.4591, "step": 233860 }, { "epoch": 0.4724321965763968, "grad_norm": 59.138973236083984, "learning_rate": 6.416059930943586e-06, "loss": 21.4465, "step": 233870 }, { "epoch": 0.4724523972090806, "grad_norm": 416.2904052734375, "learning_rate": 6.415725153328953e-06, "loss": 30.9271, "step": 233880 }, { "epoch": 0.47247259784176443, "grad_norm": 505.4678649902344, "learning_rate": 6.41539036881425e-06, "loss": 20.0692, "step": 233890 }, { "epoch": 0.4724927984744482, "grad_norm": 304.62017822265625, "learning_rate": 6.415055577401101e-06, "loss": 21.5853, "step": 233900 }, { "epoch": 0.472512999107132, "grad_norm": 981.3473510742188, "learning_rate": 6.414720779091147e-06, "loss": 31.7016, "step": 233910 }, { "epoch": 0.47253319973981583, "grad_norm": 474.8854675292969, "learning_rate": 6.414385973886012e-06, "loss": 17.3203, "step": 233920 }, { "epoch": 0.47255340037249965, "grad_norm": 1594.6826171875, "learning_rate": 6.414051161787334e-06, "loss": 29.8812, "step": 233930 }, { "epoch": 0.4725736010051835, "grad_norm": 284.396728515625, "learning_rate": 6.4137163427967415e-06, "loss": 25.5088, "step": 233940 }, { "epoch": 0.4725938016378673, "grad_norm": 145.97975158691406, "learning_rate": 6.413381516915868e-06, "loss": 18.6164, "step": 233950 }, { "epoch": 0.4726140022705511, "grad_norm": 193.2185821533203, "learning_rate": 6.413046684146343e-06, "loss": 19.4244, "step": 233960 }, { "epoch": 0.47263420290323493, "grad_norm": 94.16001892089844, "learning_rate": 6.412711844489801e-06, "loss": 15.8366, "step": 233970 }, { "epoch": 0.47265440353591875, "grad_norm": 329.267822265625, "learning_rate": 6.4123769979478715e-06, "loss": 27.878, "step": 233980 }, { "epoch": 0.4726746041686026, "grad_norm": 271.2923583984375, "learning_rate": 6.412042144522188e-06, "loss": 14.4957, "step": 233990 }, { "epoch": 0.4726948048012864, "grad_norm": 251.30503845214844, "learning_rate": 6.411707284214384e-06, "loss": 21.2201, "step": 234000 }, { "epoch": 0.4727150054339702, "grad_norm": 215.10621643066406, "learning_rate": 6.411372417026087e-06, "loss": 29.2392, "step": 234010 }, { "epoch": 0.47273520606665403, "grad_norm": 580.5404663085938, "learning_rate": 6.411037542958935e-06, "loss": 31.5826, "step": 234020 }, { "epoch": 0.4727554066993378, "grad_norm": 78.26838684082031, "learning_rate": 6.410702662014554e-06, "loss": 19.5785, "step": 234030 }, { "epoch": 0.4727756073320216, "grad_norm": 122.64849853515625, "learning_rate": 6.410367774194583e-06, "loss": 16.3908, "step": 234040 }, { "epoch": 0.47279580796470544, "grad_norm": 316.43072509765625, "learning_rate": 6.410032879500647e-06, "loss": 17.8182, "step": 234050 }, { "epoch": 0.47281600859738926, "grad_norm": 280.8567810058594, "learning_rate": 6.409697977934384e-06, "loss": 29.816, "step": 234060 }, { "epoch": 0.4728362092300731, "grad_norm": 418.7429504394531, "learning_rate": 6.409363069497424e-06, "loss": 12.4488, "step": 234070 }, { "epoch": 0.4728564098627569, "grad_norm": 550.7597045898438, "learning_rate": 6.4090281541913975e-06, "loss": 20.1115, "step": 234080 }, { "epoch": 0.4728766104954407, "grad_norm": 390.1741943359375, "learning_rate": 6.408693232017942e-06, "loss": 17.4804, "step": 234090 }, { "epoch": 0.47289681112812454, "grad_norm": 197.442626953125, "learning_rate": 6.408358302978683e-06, "loss": 17.2115, "step": 234100 }, { "epoch": 0.47291701176080836, "grad_norm": 623.2523803710938, "learning_rate": 6.408023367075258e-06, "loss": 20.1691, "step": 234110 }, { "epoch": 0.4729372123934922, "grad_norm": 161.82009887695312, "learning_rate": 6.4076884243092975e-06, "loss": 12.1091, "step": 234120 }, { "epoch": 0.472957413026176, "grad_norm": 82.56803131103516, "learning_rate": 6.407353474682436e-06, "loss": 10.5493, "step": 234130 }, { "epoch": 0.4729776136588598, "grad_norm": 174.92942810058594, "learning_rate": 6.407018518196303e-06, "loss": 14.9878, "step": 234140 }, { "epoch": 0.4729978142915436, "grad_norm": 335.8045349121094, "learning_rate": 6.406683554852532e-06, "loss": 25.3881, "step": 234150 }, { "epoch": 0.4730180149242274, "grad_norm": 329.1597595214844, "learning_rate": 6.406348584652756e-06, "loss": 23.9278, "step": 234160 }, { "epoch": 0.4730382155569112, "grad_norm": 743.2049560546875, "learning_rate": 6.4060136075986076e-06, "loss": 18.2946, "step": 234170 }, { "epoch": 0.47305841618959504, "grad_norm": 289.3351135253906, "learning_rate": 6.405678623691721e-06, "loss": 17.198, "step": 234180 }, { "epoch": 0.47307861682227886, "grad_norm": 430.5278015136719, "learning_rate": 6.405343632933725e-06, "loss": 10.1047, "step": 234190 }, { "epoch": 0.4730988174549627, "grad_norm": 389.00506591796875, "learning_rate": 6.4050086353262565e-06, "loss": 12.7205, "step": 234200 }, { "epoch": 0.4731190180876465, "grad_norm": 693.2787475585938, "learning_rate": 6.404673630870946e-06, "loss": 14.5796, "step": 234210 }, { "epoch": 0.4731392187203303, "grad_norm": 197.1500244140625, "learning_rate": 6.404338619569425e-06, "loss": 33.6521, "step": 234220 }, { "epoch": 0.47315941935301414, "grad_norm": 203.13548278808594, "learning_rate": 6.40400360142333e-06, "loss": 9.3007, "step": 234230 }, { "epoch": 0.47317961998569796, "grad_norm": 343.16754150390625, "learning_rate": 6.403668576434289e-06, "loss": 16.9539, "step": 234240 }, { "epoch": 0.4731998206183818, "grad_norm": 196.41065979003906, "learning_rate": 6.40333354460394e-06, "loss": 13.5854, "step": 234250 }, { "epoch": 0.4732200212510656, "grad_norm": 135.87164306640625, "learning_rate": 6.402998505933913e-06, "loss": 20.194, "step": 234260 }, { "epoch": 0.4732402218837494, "grad_norm": 151.08270263671875, "learning_rate": 6.4026634604258404e-06, "loss": 15.4891, "step": 234270 }, { "epoch": 0.4732604225164332, "grad_norm": 322.8927001953125, "learning_rate": 6.402328408081358e-06, "loss": 17.6946, "step": 234280 }, { "epoch": 0.473280623149117, "grad_norm": 479.7793884277344, "learning_rate": 6.401993348902095e-06, "loss": 22.7317, "step": 234290 }, { "epoch": 0.47330082378180083, "grad_norm": 247.61485290527344, "learning_rate": 6.401658282889689e-06, "loss": 21.8644, "step": 234300 }, { "epoch": 0.47332102441448465, "grad_norm": 162.19313049316406, "learning_rate": 6.401323210045768e-06, "loss": 22.0185, "step": 234310 }, { "epoch": 0.47334122504716847, "grad_norm": 329.83819580078125, "learning_rate": 6.400988130371969e-06, "loss": 18.8262, "step": 234320 }, { "epoch": 0.4733614256798523, "grad_norm": 175.2557830810547, "learning_rate": 6.400653043869924e-06, "loss": 19.3593, "step": 234330 }, { "epoch": 0.4733816263125361, "grad_norm": 200.32290649414062, "learning_rate": 6.400317950541265e-06, "loss": 26.4853, "step": 234340 }, { "epoch": 0.47340182694521993, "grad_norm": 454.8965759277344, "learning_rate": 6.399982850387625e-06, "loss": 18.5347, "step": 234350 }, { "epoch": 0.47342202757790375, "grad_norm": 768.8986206054688, "learning_rate": 6.3996477434106405e-06, "loss": 17.057, "step": 234360 }, { "epoch": 0.47344222821058757, "grad_norm": 664.8223876953125, "learning_rate": 6.399312629611941e-06, "loss": 25.3557, "step": 234370 }, { "epoch": 0.4734624288432714, "grad_norm": 0.0, "learning_rate": 6.398977508993164e-06, "loss": 9.4998, "step": 234380 }, { "epoch": 0.4734826294759552, "grad_norm": 379.18896484375, "learning_rate": 6.3986423815559386e-06, "loss": 26.9141, "step": 234390 }, { "epoch": 0.47350283010863903, "grad_norm": 214.22914123535156, "learning_rate": 6.3983072473019e-06, "loss": 8.7726, "step": 234400 }, { "epoch": 0.4735230307413228, "grad_norm": 273.8097839355469, "learning_rate": 6.3979721062326815e-06, "loss": 25.5153, "step": 234410 }, { "epoch": 0.4735432313740066, "grad_norm": 350.2293701171875, "learning_rate": 6.397636958349918e-06, "loss": 12.3051, "step": 234420 }, { "epoch": 0.47356343200669043, "grad_norm": 589.4801635742188, "learning_rate": 6.397301803655239e-06, "loss": 18.2101, "step": 234430 }, { "epoch": 0.47358363263937425, "grad_norm": 82.08505249023438, "learning_rate": 6.396966642150282e-06, "loss": 20.5192, "step": 234440 }, { "epoch": 0.4736038332720581, "grad_norm": 208.64263916015625, "learning_rate": 6.396631473836677e-06, "loss": 19.2264, "step": 234450 }, { "epoch": 0.4736240339047419, "grad_norm": 176.13262939453125, "learning_rate": 6.396296298716061e-06, "loss": 15.0508, "step": 234460 }, { "epoch": 0.4736442345374257, "grad_norm": 475.84686279296875, "learning_rate": 6.3959611167900685e-06, "loss": 18.4282, "step": 234470 }, { "epoch": 0.47366443517010953, "grad_norm": 323.65771484375, "learning_rate": 6.395625928060328e-06, "loss": 13.9233, "step": 234480 }, { "epoch": 0.47368463580279335, "grad_norm": 160.9544677734375, "learning_rate": 6.395290732528476e-06, "loss": 11.9047, "step": 234490 }, { "epoch": 0.4737048364354772, "grad_norm": 111.54698944091797, "learning_rate": 6.3949555301961474e-06, "loss": 38.6767, "step": 234500 }, { "epoch": 0.473725037068161, "grad_norm": 84.8818130493164, "learning_rate": 6.3946203210649734e-06, "loss": 20.4927, "step": 234510 }, { "epoch": 0.4737452377008448, "grad_norm": 304.14691162109375, "learning_rate": 6.39428510513659e-06, "loss": 13.007, "step": 234520 }, { "epoch": 0.47376543833352863, "grad_norm": 271.711669921875, "learning_rate": 6.393949882412629e-06, "loss": 31.9114, "step": 234530 }, { "epoch": 0.4737856389662124, "grad_norm": 578.7874755859375, "learning_rate": 6.393614652894727e-06, "loss": 19.0842, "step": 234540 }, { "epoch": 0.4738058395988962, "grad_norm": 245.68370056152344, "learning_rate": 6.3932794165845156e-06, "loss": 17.9837, "step": 234550 }, { "epoch": 0.47382604023158004, "grad_norm": 254.62472534179688, "learning_rate": 6.39294417348363e-06, "loss": 16.3442, "step": 234560 }, { "epoch": 0.47384624086426386, "grad_norm": 0.0, "learning_rate": 6.392608923593703e-06, "loss": 12.7805, "step": 234570 }, { "epoch": 0.4738664414969477, "grad_norm": 125.13101959228516, "learning_rate": 6.392273666916369e-06, "loss": 14.7228, "step": 234580 }, { "epoch": 0.4738866421296315, "grad_norm": 154.1107635498047, "learning_rate": 6.391938403453262e-06, "loss": 18.3299, "step": 234590 }, { "epoch": 0.4739068427623153, "grad_norm": 241.9048614501953, "learning_rate": 6.391603133206015e-06, "loss": 15.3186, "step": 234600 }, { "epoch": 0.47392704339499914, "grad_norm": 246.7981719970703, "learning_rate": 6.391267856176263e-06, "loss": 22.2628, "step": 234610 }, { "epoch": 0.47394724402768296, "grad_norm": 382.1298828125, "learning_rate": 6.390932572365641e-06, "loss": 31.8203, "step": 234620 }, { "epoch": 0.4739674446603668, "grad_norm": 132.4844970703125, "learning_rate": 6.390597281775783e-06, "loss": 16.663, "step": 234630 }, { "epoch": 0.4739876452930506, "grad_norm": 510.72369384765625, "learning_rate": 6.390261984408322e-06, "loss": 25.6551, "step": 234640 }, { "epoch": 0.4740078459257344, "grad_norm": 381.7547607421875, "learning_rate": 6.389926680264893e-06, "loss": 15.4814, "step": 234650 }, { "epoch": 0.47402804655841824, "grad_norm": 190.66424560546875, "learning_rate": 6.389591369347129e-06, "loss": 30.8308, "step": 234660 }, { "epoch": 0.474048247191102, "grad_norm": 90.11611938476562, "learning_rate": 6.389256051656665e-06, "loss": 21.2223, "step": 234670 }, { "epoch": 0.4740684478237858, "grad_norm": 275.7165832519531, "learning_rate": 6.388920727195138e-06, "loss": 12.888, "step": 234680 }, { "epoch": 0.47408864845646964, "grad_norm": 398.0523376464844, "learning_rate": 6.3885853959641765e-06, "loss": 13.8856, "step": 234690 }, { "epoch": 0.47410884908915346, "grad_norm": 173.2904052734375, "learning_rate": 6.388250057965421e-06, "loss": 17.8274, "step": 234700 }, { "epoch": 0.4741290497218373, "grad_norm": 493.35650634765625, "learning_rate": 6.387914713200502e-06, "loss": 20.1284, "step": 234710 }, { "epoch": 0.4741492503545211, "grad_norm": 329.68206787109375, "learning_rate": 6.387579361671054e-06, "loss": 21.1115, "step": 234720 }, { "epoch": 0.4741694509872049, "grad_norm": 401.5271301269531, "learning_rate": 6.387244003378713e-06, "loss": 24.2276, "step": 234730 }, { "epoch": 0.47418965161988874, "grad_norm": 244.2340087890625, "learning_rate": 6.386908638325114e-06, "loss": 20.2608, "step": 234740 }, { "epoch": 0.47420985225257256, "grad_norm": 136.28036499023438, "learning_rate": 6.386573266511891e-06, "loss": 32.7023, "step": 234750 }, { "epoch": 0.4742300528852564, "grad_norm": 366.8553161621094, "learning_rate": 6.3862378879406765e-06, "loss": 35.7066, "step": 234760 }, { "epoch": 0.4742502535179402, "grad_norm": 293.64959716796875, "learning_rate": 6.385902502613106e-06, "loss": 19.0389, "step": 234770 }, { "epoch": 0.474270454150624, "grad_norm": 392.91961669921875, "learning_rate": 6.385567110530816e-06, "loss": 21.1811, "step": 234780 }, { "epoch": 0.4742906547833078, "grad_norm": 217.08970642089844, "learning_rate": 6.385231711695441e-06, "loss": 21.874, "step": 234790 }, { "epoch": 0.4743108554159916, "grad_norm": 259.0161437988281, "learning_rate": 6.384896306108612e-06, "loss": 17.5991, "step": 234800 }, { "epoch": 0.4743310560486754, "grad_norm": 370.8699951171875, "learning_rate": 6.384560893771968e-06, "loss": 17.659, "step": 234810 }, { "epoch": 0.47435125668135925, "grad_norm": 328.98028564453125, "learning_rate": 6.3842254746871424e-06, "loss": 28.2784, "step": 234820 }, { "epoch": 0.47437145731404307, "grad_norm": 197.22532653808594, "learning_rate": 6.3838900488557695e-06, "loss": 18.0027, "step": 234830 }, { "epoch": 0.4743916579467269, "grad_norm": 183.9948272705078, "learning_rate": 6.383554616279485e-06, "loss": 22.8533, "step": 234840 }, { "epoch": 0.4744118585794107, "grad_norm": 217.38934326171875, "learning_rate": 6.383219176959921e-06, "loss": 15.5232, "step": 234850 }, { "epoch": 0.4744320592120945, "grad_norm": 172.2759246826172, "learning_rate": 6.382883730898717e-06, "loss": 35.7853, "step": 234860 }, { "epoch": 0.47445225984477835, "grad_norm": 196.10256958007812, "learning_rate": 6.382548278097503e-06, "loss": 23.6232, "step": 234870 }, { "epoch": 0.47447246047746217, "grad_norm": 62.2056884765625, "learning_rate": 6.382212818557918e-06, "loss": 12.9647, "step": 234880 }, { "epoch": 0.474492661110146, "grad_norm": 328.51324462890625, "learning_rate": 6.381877352281594e-06, "loss": 19.237, "step": 234890 }, { "epoch": 0.4745128617428298, "grad_norm": 327.0961608886719, "learning_rate": 6.3815418792701686e-06, "loss": 18.3127, "step": 234900 }, { "epoch": 0.4745330623755136, "grad_norm": 30.34280776977539, "learning_rate": 6.381206399525276e-06, "loss": 11.2779, "step": 234910 }, { "epoch": 0.4745532630081974, "grad_norm": 235.65530395507812, "learning_rate": 6.38087091304855e-06, "loss": 17.5479, "step": 234920 }, { "epoch": 0.4745734636408812, "grad_norm": 220.25973510742188, "learning_rate": 6.380535419841627e-06, "loss": 10.188, "step": 234930 }, { "epoch": 0.47459366427356503, "grad_norm": 212.65013122558594, "learning_rate": 6.380199919906141e-06, "loss": 13.4763, "step": 234940 }, { "epoch": 0.47461386490624885, "grad_norm": 729.042724609375, "learning_rate": 6.3798644132437304e-06, "loss": 26.473, "step": 234950 }, { "epoch": 0.47463406553893267, "grad_norm": 121.70183563232422, "learning_rate": 6.379528899856025e-06, "loss": 39.3966, "step": 234960 }, { "epoch": 0.4746542661716165, "grad_norm": 242.33534240722656, "learning_rate": 6.3791933797446644e-06, "loss": 27.4354, "step": 234970 }, { "epoch": 0.4746744668043003, "grad_norm": 212.03599548339844, "learning_rate": 6.378857852911283e-06, "loss": 19.3012, "step": 234980 }, { "epoch": 0.47469466743698413, "grad_norm": 426.098388671875, "learning_rate": 6.378522319357515e-06, "loss": 12.6129, "step": 234990 }, { "epoch": 0.47471486806966795, "grad_norm": 296.6100769042969, "learning_rate": 6.378186779084996e-06, "loss": 21.5582, "step": 235000 }, { "epoch": 0.47473506870235177, "grad_norm": 107.235595703125, "learning_rate": 6.377851232095362e-06, "loss": 11.7075, "step": 235010 }, { "epoch": 0.4747552693350356, "grad_norm": 37.25078201293945, "learning_rate": 6.37751567839025e-06, "loss": 20.0048, "step": 235020 }, { "epoch": 0.4747754699677194, "grad_norm": 263.8815612792969, "learning_rate": 6.377180117971292e-06, "loss": 12.0575, "step": 235030 }, { "epoch": 0.47479567060040323, "grad_norm": 150.74844360351562, "learning_rate": 6.376844550840126e-06, "loss": 16.2347, "step": 235040 }, { "epoch": 0.474815871233087, "grad_norm": 84.83209228515625, "learning_rate": 6.376508976998385e-06, "loss": 18.8763, "step": 235050 }, { "epoch": 0.4748360718657708, "grad_norm": 416.88677978515625, "learning_rate": 6.3761733964477066e-06, "loss": 18.1112, "step": 235060 }, { "epoch": 0.47485627249845463, "grad_norm": 29.723648071289062, "learning_rate": 6.375837809189726e-06, "loss": 18.4924, "step": 235070 }, { "epoch": 0.47487647313113845, "grad_norm": 427.0194396972656, "learning_rate": 6.375502215226082e-06, "loss": 16.7098, "step": 235080 }, { "epoch": 0.4748966737638223, "grad_norm": 173.62960815429688, "learning_rate": 6.375166614558404e-06, "loss": 13.341, "step": 235090 }, { "epoch": 0.4749168743965061, "grad_norm": 663.5652465820312, "learning_rate": 6.374831007188331e-06, "loss": 24.2268, "step": 235100 }, { "epoch": 0.4749370750291899, "grad_norm": 447.9564514160156, "learning_rate": 6.374495393117499e-06, "loss": 29.1698, "step": 235110 }, { "epoch": 0.47495727566187373, "grad_norm": 336.2720031738281, "learning_rate": 6.374159772347541e-06, "loss": 15.7179, "step": 235120 }, { "epoch": 0.47497747629455755, "grad_norm": 214.29673767089844, "learning_rate": 6.373824144880099e-06, "loss": 10.7611, "step": 235130 }, { "epoch": 0.4749976769272414, "grad_norm": 312.23699951171875, "learning_rate": 6.3734885107168e-06, "loss": 11.2199, "step": 235140 }, { "epoch": 0.4750178775599252, "grad_norm": 394.6502380371094, "learning_rate": 6.373152869859288e-06, "loss": 22.8227, "step": 235150 }, { "epoch": 0.475038078192609, "grad_norm": 573.7748413085938, "learning_rate": 6.372817222309194e-06, "loss": 26.1345, "step": 235160 }, { "epoch": 0.47505827882529283, "grad_norm": 231.83102416992188, "learning_rate": 6.372481568068156e-06, "loss": 30.1279, "step": 235170 }, { "epoch": 0.4750784794579766, "grad_norm": 525.2330932617188, "learning_rate": 6.37214590713781e-06, "loss": 28.067, "step": 235180 }, { "epoch": 0.4750986800906604, "grad_norm": 91.91986846923828, "learning_rate": 6.37181023951979e-06, "loss": 13.2239, "step": 235190 }, { "epoch": 0.47511888072334424, "grad_norm": 260.6444091796875, "learning_rate": 6.371474565215734e-06, "loss": 20.834, "step": 235200 }, { "epoch": 0.47513908135602806, "grad_norm": 206.9452667236328, "learning_rate": 6.371138884227277e-06, "loss": 20.7235, "step": 235210 }, { "epoch": 0.4751592819887119, "grad_norm": 295.5717468261719, "learning_rate": 6.3708031965560545e-06, "loss": 33.8541, "step": 235220 }, { "epoch": 0.4751794826213957, "grad_norm": 284.14581298828125, "learning_rate": 6.370467502203704e-06, "loss": 23.0372, "step": 235230 }, { "epoch": 0.4751996832540795, "grad_norm": 341.304443359375, "learning_rate": 6.370131801171863e-06, "loss": 21.4309, "step": 235240 }, { "epoch": 0.47521988388676334, "grad_norm": 366.19830322265625, "learning_rate": 6.369796093462164e-06, "loss": 23.2081, "step": 235250 }, { "epoch": 0.47524008451944716, "grad_norm": 0.0, "learning_rate": 6.369460379076244e-06, "loss": 19.9143, "step": 235260 }, { "epoch": 0.475260285152131, "grad_norm": 359.9265441894531, "learning_rate": 6.369124658015742e-06, "loss": 10.9329, "step": 235270 }, { "epoch": 0.4752804857848148, "grad_norm": 140.76864624023438, "learning_rate": 6.368788930282292e-06, "loss": 22.1764, "step": 235280 }, { "epoch": 0.4753006864174986, "grad_norm": 29.63686752319336, "learning_rate": 6.368453195877531e-06, "loss": 21.5599, "step": 235290 }, { "epoch": 0.47532088705018244, "grad_norm": 251.17628479003906, "learning_rate": 6.368117454803093e-06, "loss": 14.9031, "step": 235300 }, { "epoch": 0.4753410876828662, "grad_norm": 251.57440185546875, "learning_rate": 6.36778170706062e-06, "loss": 19.8527, "step": 235310 }, { "epoch": 0.47536128831555, "grad_norm": 72.46074676513672, "learning_rate": 6.367445952651742e-06, "loss": 37.8891, "step": 235320 }, { "epoch": 0.47538148894823384, "grad_norm": 406.4862976074219, "learning_rate": 6.367110191578099e-06, "loss": 22.1484, "step": 235330 }, { "epoch": 0.47540168958091766, "grad_norm": 207.01092529296875, "learning_rate": 6.366774423841326e-06, "loss": 23.6507, "step": 235340 }, { "epoch": 0.4754218902136015, "grad_norm": 129.2235565185547, "learning_rate": 6.366438649443062e-06, "loss": 10.7286, "step": 235350 }, { "epoch": 0.4754420908462853, "grad_norm": 270.5228576660156, "learning_rate": 6.366102868384942e-06, "loss": 16.5354, "step": 235360 }, { "epoch": 0.4754622914789691, "grad_norm": 190.71234130859375, "learning_rate": 6.365767080668601e-06, "loss": 31.6069, "step": 235370 }, { "epoch": 0.47548249211165294, "grad_norm": 160.77285766601562, "learning_rate": 6.365431286295677e-06, "loss": 19.9506, "step": 235380 }, { "epoch": 0.47550269274433676, "grad_norm": 225.9369354248047, "learning_rate": 6.365095485267807e-06, "loss": 10.4406, "step": 235390 }, { "epoch": 0.4755228933770206, "grad_norm": 150.66807556152344, "learning_rate": 6.364759677586627e-06, "loss": 15.8744, "step": 235400 }, { "epoch": 0.4755430940097044, "grad_norm": 0.0, "learning_rate": 6.364423863253772e-06, "loss": 14.8064, "step": 235410 }, { "epoch": 0.4755632946423882, "grad_norm": 355.1036376953125, "learning_rate": 6.364088042270884e-06, "loss": 17.1053, "step": 235420 }, { "epoch": 0.475583495275072, "grad_norm": 305.307861328125, "learning_rate": 6.363752214639595e-06, "loss": 14.4421, "step": 235430 }, { "epoch": 0.4756036959077558, "grad_norm": 35.69308853149414, "learning_rate": 6.363416380361542e-06, "loss": 16.149, "step": 235440 }, { "epoch": 0.47562389654043963, "grad_norm": 107.80265045166016, "learning_rate": 6.363080539438364e-06, "loss": 15.6538, "step": 235450 }, { "epoch": 0.47564409717312345, "grad_norm": 75.21046447753906, "learning_rate": 6.3627446918716965e-06, "loss": 17.3816, "step": 235460 }, { "epoch": 0.47566429780580727, "grad_norm": 457.4656677246094, "learning_rate": 6.362408837663177e-06, "loss": 24.8396, "step": 235470 }, { "epoch": 0.4756844984384911, "grad_norm": 190.85426330566406, "learning_rate": 6.3620729768144415e-06, "loss": 23.1504, "step": 235480 }, { "epoch": 0.4757046990711749, "grad_norm": 1537.8323974609375, "learning_rate": 6.361737109327128e-06, "loss": 21.1508, "step": 235490 }, { "epoch": 0.47572489970385873, "grad_norm": 33.12578201293945, "learning_rate": 6.361401235202872e-06, "loss": 17.9178, "step": 235500 }, { "epoch": 0.47574510033654255, "grad_norm": 402.8983459472656, "learning_rate": 6.361065354443312e-06, "loss": 11.7846, "step": 235510 }, { "epoch": 0.47576530096922637, "grad_norm": 214.59417724609375, "learning_rate": 6.360729467050086e-06, "loss": 16.2214, "step": 235520 }, { "epoch": 0.4757855016019102, "grad_norm": 810.9219970703125, "learning_rate": 6.360393573024828e-06, "loss": 26.2084, "step": 235530 }, { "epoch": 0.475805702234594, "grad_norm": 156.94981384277344, "learning_rate": 6.360057672369177e-06, "loss": 20.3314, "step": 235540 }, { "epoch": 0.47582590286727783, "grad_norm": 538.5867309570312, "learning_rate": 6.35972176508477e-06, "loss": 17.3019, "step": 235550 }, { "epoch": 0.4758461034999616, "grad_norm": 205.78973388671875, "learning_rate": 6.3593858511732446e-06, "loss": 24.8573, "step": 235560 }, { "epoch": 0.4758663041326454, "grad_norm": 370.8227844238281, "learning_rate": 6.359049930636235e-06, "loss": 18.086, "step": 235570 }, { "epoch": 0.47588650476532923, "grad_norm": 356.544921875, "learning_rate": 6.3587140034753836e-06, "loss": 19.6638, "step": 235580 }, { "epoch": 0.47590670539801305, "grad_norm": 257.9295349121094, "learning_rate": 6.358378069692324e-06, "loss": 26.6233, "step": 235590 }, { "epoch": 0.4759269060306969, "grad_norm": 325.27587890625, "learning_rate": 6.358042129288694e-06, "loss": 24.2227, "step": 235600 }, { "epoch": 0.4759471066633807, "grad_norm": 153.114501953125, "learning_rate": 6.3577061822661326e-06, "loss": 15.0209, "step": 235610 }, { "epoch": 0.4759673072960645, "grad_norm": 317.26141357421875, "learning_rate": 6.357370228626274e-06, "loss": 11.8937, "step": 235620 }, { "epoch": 0.47598750792874833, "grad_norm": 121.63030242919922, "learning_rate": 6.3570342683707595e-06, "loss": 18.3873, "step": 235630 }, { "epoch": 0.47600770856143215, "grad_norm": 116.06968688964844, "learning_rate": 6.356698301501224e-06, "loss": 34.4066, "step": 235640 }, { "epoch": 0.476027909194116, "grad_norm": 226.4818115234375, "learning_rate": 6.356362328019305e-06, "loss": 15.9497, "step": 235650 }, { "epoch": 0.4760481098267998, "grad_norm": 389.2100524902344, "learning_rate": 6.35602634792664e-06, "loss": 28.8573, "step": 235660 }, { "epoch": 0.4760683104594836, "grad_norm": 341.6592102050781, "learning_rate": 6.355690361224869e-06, "loss": 19.37, "step": 235670 }, { "epoch": 0.47608851109216743, "grad_norm": 223.19386291503906, "learning_rate": 6.355354367915626e-06, "loss": 16.7744, "step": 235680 }, { "epoch": 0.4761087117248512, "grad_norm": 296.23797607421875, "learning_rate": 6.355018368000552e-06, "loss": 12.7533, "step": 235690 }, { "epoch": 0.476128912357535, "grad_norm": 318.57781982421875, "learning_rate": 6.35468236148128e-06, "loss": 19.6702, "step": 235700 }, { "epoch": 0.47614911299021884, "grad_norm": 254.6189422607422, "learning_rate": 6.354346348359452e-06, "loss": 16.4533, "step": 235710 }, { "epoch": 0.47616931362290266, "grad_norm": 497.0199279785156, "learning_rate": 6.354010328636705e-06, "loss": 17.6152, "step": 235720 }, { "epoch": 0.4761895142555865, "grad_norm": 333.6150207519531, "learning_rate": 6.3536743023146744e-06, "loss": 9.808, "step": 235730 }, { "epoch": 0.4762097148882703, "grad_norm": 367.8109130859375, "learning_rate": 6.353338269395e-06, "loss": 12.5075, "step": 235740 }, { "epoch": 0.4762299155209541, "grad_norm": 573.1396484375, "learning_rate": 6.353002229879318e-06, "loss": 32.6608, "step": 235750 }, { "epoch": 0.47625011615363794, "grad_norm": 196.98765563964844, "learning_rate": 6.352666183769269e-06, "loss": 11.7662, "step": 235760 }, { "epoch": 0.47627031678632176, "grad_norm": 381.2041015625, "learning_rate": 6.352330131066489e-06, "loss": 19.4288, "step": 235770 }, { "epoch": 0.4762905174190056, "grad_norm": 551.9927978515625, "learning_rate": 6.351994071772615e-06, "loss": 18.1019, "step": 235780 }, { "epoch": 0.4763107180516894, "grad_norm": 460.33660888671875, "learning_rate": 6.351658005889286e-06, "loss": 16.6254, "step": 235790 }, { "epoch": 0.4763309186843732, "grad_norm": 423.4263610839844, "learning_rate": 6.35132193341814e-06, "loss": 23.7044, "step": 235800 }, { "epoch": 0.47635111931705704, "grad_norm": 186.51332092285156, "learning_rate": 6.350985854360815e-06, "loss": 41.1957, "step": 235810 }, { "epoch": 0.4763713199497408, "grad_norm": 630.2817993164062, "learning_rate": 6.350649768718948e-06, "loss": 18.9317, "step": 235820 }, { "epoch": 0.4763915205824246, "grad_norm": 137.49729919433594, "learning_rate": 6.3503136764941786e-06, "loss": 13.4061, "step": 235830 }, { "epoch": 0.47641172121510844, "grad_norm": 90.8154296875, "learning_rate": 6.349977577688144e-06, "loss": 19.3965, "step": 235840 }, { "epoch": 0.47643192184779226, "grad_norm": 298.8126525878906, "learning_rate": 6.349641472302484e-06, "loss": 20.5768, "step": 235850 }, { "epoch": 0.4764521224804761, "grad_norm": 809.7374877929688, "learning_rate": 6.349305360338832e-06, "loss": 20.2111, "step": 235860 }, { "epoch": 0.4764723231131599, "grad_norm": 245.4927215576172, "learning_rate": 6.34896924179883e-06, "loss": 36.9723, "step": 235870 }, { "epoch": 0.4764925237458437, "grad_norm": 135.92031860351562, "learning_rate": 6.348633116684117e-06, "loss": 17.1117, "step": 235880 }, { "epoch": 0.47651272437852754, "grad_norm": 23.906089782714844, "learning_rate": 6.348296984996329e-06, "loss": 15.8026, "step": 235890 }, { "epoch": 0.47653292501121136, "grad_norm": 27.019487380981445, "learning_rate": 6.3479608467371055e-06, "loss": 20.9327, "step": 235900 }, { "epoch": 0.4765531256438952, "grad_norm": 188.26419067382812, "learning_rate": 6.3476247019080826e-06, "loss": 18.6093, "step": 235910 }, { "epoch": 0.476573326276579, "grad_norm": 257.5811767578125, "learning_rate": 6.347288550510903e-06, "loss": 25.4158, "step": 235920 }, { "epoch": 0.4765935269092628, "grad_norm": 101.38032531738281, "learning_rate": 6.346952392547201e-06, "loss": 13.6602, "step": 235930 }, { "epoch": 0.47661372754194664, "grad_norm": 176.78485107421875, "learning_rate": 6.3466162280186164e-06, "loss": 10.0363, "step": 235940 }, { "epoch": 0.4766339281746304, "grad_norm": 158.8994598388672, "learning_rate": 6.346280056926788e-06, "loss": 17.3074, "step": 235950 }, { "epoch": 0.4766541288073142, "grad_norm": 97.84807586669922, "learning_rate": 6.345943879273353e-06, "loss": 17.1773, "step": 235960 }, { "epoch": 0.47667432943999805, "grad_norm": 64.37788391113281, "learning_rate": 6.3456076950599525e-06, "loss": 11.6738, "step": 235970 }, { "epoch": 0.47669453007268187, "grad_norm": 454.9940490722656, "learning_rate": 6.345271504288222e-06, "loss": 18.2247, "step": 235980 }, { "epoch": 0.4767147307053657, "grad_norm": 365.52081298828125, "learning_rate": 6.344935306959801e-06, "loss": 13.251, "step": 235990 }, { "epoch": 0.4767349313380495, "grad_norm": 518.8076171875, "learning_rate": 6.344599103076329e-06, "loss": 39.1762, "step": 236000 }, { "epoch": 0.4767551319707333, "grad_norm": 499.1654968261719, "learning_rate": 6.3442628926394455e-06, "loss": 17.0053, "step": 236010 }, { "epoch": 0.47677533260341715, "grad_norm": 215.90786743164062, "learning_rate": 6.3439266756507846e-06, "loss": 13.6943, "step": 236020 }, { "epoch": 0.47679553323610097, "grad_norm": 255.23583984375, "learning_rate": 6.343590452111991e-06, "loss": 24.1777, "step": 236030 }, { "epoch": 0.4768157338687848, "grad_norm": 229.6059112548828, "learning_rate": 6.343254222024699e-06, "loss": 12.2585, "step": 236040 }, { "epoch": 0.4768359345014686, "grad_norm": 1.5683120489120483, "learning_rate": 6.3429179853905485e-06, "loss": 24.413, "step": 236050 }, { "epoch": 0.4768561351341524, "grad_norm": 1584.3658447265625, "learning_rate": 6.34258174221118e-06, "loss": 24.7889, "step": 236060 }, { "epoch": 0.4768763357668362, "grad_norm": 305.65155029296875, "learning_rate": 6.342245492488228e-06, "loss": 18.109, "step": 236070 }, { "epoch": 0.47689653639952, "grad_norm": 305.5334777832031, "learning_rate": 6.341909236223338e-06, "loss": 21.4977, "step": 236080 }, { "epoch": 0.47691673703220383, "grad_norm": 296.4954833984375, "learning_rate": 6.341572973418143e-06, "loss": 19.6292, "step": 236090 }, { "epoch": 0.47693693766488765, "grad_norm": 318.7249450683594, "learning_rate": 6.341236704074285e-06, "loss": 23.8701, "step": 236100 }, { "epoch": 0.47695713829757147, "grad_norm": 265.14178466796875, "learning_rate": 6.340900428193401e-06, "loss": 13.5102, "step": 236110 }, { "epoch": 0.4769773389302553, "grad_norm": 50.94116973876953, "learning_rate": 6.340564145777131e-06, "loss": 18.1815, "step": 236120 }, { "epoch": 0.4769975395629391, "grad_norm": 305.2159729003906, "learning_rate": 6.340227856827116e-06, "loss": 10.5089, "step": 236130 }, { "epoch": 0.47701774019562293, "grad_norm": 294.8662414550781, "learning_rate": 6.339891561344991e-06, "loss": 17.9212, "step": 236140 }, { "epoch": 0.47703794082830675, "grad_norm": 120.34435272216797, "learning_rate": 6.339555259332398e-06, "loss": 21.3548, "step": 236150 }, { "epoch": 0.47705814146099057, "grad_norm": 1216.7646484375, "learning_rate": 6.339218950790973e-06, "loss": 35.0689, "step": 236160 }, { "epoch": 0.4770783420936744, "grad_norm": 258.1134948730469, "learning_rate": 6.33888263572236e-06, "loss": 32.9308, "step": 236170 }, { "epoch": 0.4770985427263582, "grad_norm": 113.07320404052734, "learning_rate": 6.338546314128193e-06, "loss": 9.7325, "step": 236180 }, { "epoch": 0.47711874335904203, "grad_norm": 48.87547302246094, "learning_rate": 6.3382099860101154e-06, "loss": 14.4419, "step": 236190 }, { "epoch": 0.4771389439917258, "grad_norm": 104.06188201904297, "learning_rate": 6.337873651369764e-06, "loss": 20.3412, "step": 236200 }, { "epoch": 0.4771591446244096, "grad_norm": 166.86428833007812, "learning_rate": 6.337537310208779e-06, "loss": 14.87, "step": 236210 }, { "epoch": 0.47717934525709343, "grad_norm": 0.0, "learning_rate": 6.3372009625288e-06, "loss": 17.0728, "step": 236220 }, { "epoch": 0.47719954588977725, "grad_norm": 283.13409423828125, "learning_rate": 6.336864608331463e-06, "loss": 13.8897, "step": 236230 }, { "epoch": 0.4772197465224611, "grad_norm": 386.5068054199219, "learning_rate": 6.336528247618413e-06, "loss": 16.9124, "step": 236240 }, { "epoch": 0.4772399471551449, "grad_norm": 319.2092590332031, "learning_rate": 6.336191880391285e-06, "loss": 44.0371, "step": 236250 }, { "epoch": 0.4772601477878287, "grad_norm": 269.0662841796875, "learning_rate": 6.335855506651721e-06, "loss": 22.5028, "step": 236260 }, { "epoch": 0.47728034842051253, "grad_norm": 139.19857788085938, "learning_rate": 6.335519126401357e-06, "loss": 20.8103, "step": 236270 }, { "epoch": 0.47730054905319635, "grad_norm": 556.1686401367188, "learning_rate": 6.335182739641837e-06, "loss": 24.4479, "step": 236280 }, { "epoch": 0.4773207496858802, "grad_norm": 423.05450439453125, "learning_rate": 6.334846346374797e-06, "loss": 15.489, "step": 236290 }, { "epoch": 0.477340950318564, "grad_norm": 154.90843200683594, "learning_rate": 6.334509946601879e-06, "loss": 20.001, "step": 236300 }, { "epoch": 0.4773611509512478, "grad_norm": 488.1640319824219, "learning_rate": 6.334173540324721e-06, "loss": 18.5341, "step": 236310 }, { "epoch": 0.47738135158393163, "grad_norm": 96.7862777709961, "learning_rate": 6.3338371275449614e-06, "loss": 18.4805, "step": 236320 }, { "epoch": 0.4774015522166154, "grad_norm": 3.370549440383911, "learning_rate": 6.333500708264243e-06, "loss": 20.9637, "step": 236330 }, { "epoch": 0.4774217528492992, "grad_norm": 475.3977355957031, "learning_rate": 6.3331642824842034e-06, "loss": 16.4089, "step": 236340 }, { "epoch": 0.47744195348198304, "grad_norm": 27.723621368408203, "learning_rate": 6.3328278502064835e-06, "loss": 17.1259, "step": 236350 }, { "epoch": 0.47746215411466686, "grad_norm": 321.4651794433594, "learning_rate": 6.3324914114327206e-06, "loss": 16.3365, "step": 236360 }, { "epoch": 0.4774823547473507, "grad_norm": 210.84744262695312, "learning_rate": 6.332154966164558e-06, "loss": 16.7796, "step": 236370 }, { "epoch": 0.4775025553800345, "grad_norm": 580.4096069335938, "learning_rate": 6.3318185144036325e-06, "loss": 32.9515, "step": 236380 }, { "epoch": 0.4775227560127183, "grad_norm": 169.37925720214844, "learning_rate": 6.3314820561515854e-06, "loss": 18.0941, "step": 236390 }, { "epoch": 0.47754295664540214, "grad_norm": 116.287841796875, "learning_rate": 6.331145591410057e-06, "loss": 17.1535, "step": 236400 }, { "epoch": 0.47756315727808596, "grad_norm": 455.9611511230469, "learning_rate": 6.330809120180685e-06, "loss": 14.6805, "step": 236410 }, { "epoch": 0.4775833579107698, "grad_norm": 34.614898681640625, "learning_rate": 6.330472642465113e-06, "loss": 21.2383, "step": 236420 }, { "epoch": 0.4776035585434536, "grad_norm": 116.25528717041016, "learning_rate": 6.330136158264977e-06, "loss": 15.3283, "step": 236430 }, { "epoch": 0.4776237591761374, "grad_norm": 362.63623046875, "learning_rate": 6.329799667581918e-06, "loss": 27.6675, "step": 236440 }, { "epoch": 0.47764395980882124, "grad_norm": 172.20965576171875, "learning_rate": 6.329463170417578e-06, "loss": 24.4275, "step": 236450 }, { "epoch": 0.477664160441505, "grad_norm": 178.12179565429688, "learning_rate": 6.329126666773596e-06, "loss": 20.8398, "step": 236460 }, { "epoch": 0.4776843610741888, "grad_norm": 341.2515563964844, "learning_rate": 6.328790156651611e-06, "loss": 9.2667, "step": 236470 }, { "epoch": 0.47770456170687264, "grad_norm": 150.73692321777344, "learning_rate": 6.328453640053264e-06, "loss": 19.2982, "step": 236480 }, { "epoch": 0.47772476233955646, "grad_norm": 426.1101989746094, "learning_rate": 6.3281171169801944e-06, "loss": 19.6774, "step": 236490 }, { "epoch": 0.4777449629722403, "grad_norm": 250.0183563232422, "learning_rate": 6.327780587434045e-06, "loss": 18.7389, "step": 236500 }, { "epoch": 0.4777651636049241, "grad_norm": 196.15914916992188, "learning_rate": 6.3274440514164535e-06, "loss": 16.5052, "step": 236510 }, { "epoch": 0.4777853642376079, "grad_norm": 311.2823486328125, "learning_rate": 6.327107508929059e-06, "loss": 21.4141, "step": 236520 }, { "epoch": 0.47780556487029174, "grad_norm": 226.3784637451172, "learning_rate": 6.326770959973505e-06, "loss": 12.4134, "step": 236530 }, { "epoch": 0.47782576550297556, "grad_norm": 511.1131286621094, "learning_rate": 6.32643440455143e-06, "loss": 20.4776, "step": 236540 }, { "epoch": 0.4778459661356594, "grad_norm": 237.08192443847656, "learning_rate": 6.3260978426644735e-06, "loss": 18.9612, "step": 236550 }, { "epoch": 0.4778661667683432, "grad_norm": 650.193603515625, "learning_rate": 6.325761274314279e-06, "loss": 24.8263, "step": 236560 }, { "epoch": 0.477886367401027, "grad_norm": 342.3340759277344, "learning_rate": 6.325424699502482e-06, "loss": 13.3016, "step": 236570 }, { "epoch": 0.47790656803371084, "grad_norm": 208.63104248046875, "learning_rate": 6.3250881182307285e-06, "loss": 20.7493, "step": 236580 }, { "epoch": 0.4779267686663946, "grad_norm": 232.63938903808594, "learning_rate": 6.324751530500656e-06, "loss": 20.687, "step": 236590 }, { "epoch": 0.47794696929907843, "grad_norm": 441.63067626953125, "learning_rate": 6.324414936313904e-06, "loss": 11.7886, "step": 236600 }, { "epoch": 0.47796716993176225, "grad_norm": 334.9959716796875, "learning_rate": 6.324078335672115e-06, "loss": 18.2732, "step": 236610 }, { "epoch": 0.47798737056444607, "grad_norm": 162.98477172851562, "learning_rate": 6.323741728576928e-06, "loss": 15.9826, "step": 236620 }, { "epoch": 0.4780075711971299, "grad_norm": 111.12680053710938, "learning_rate": 6.323405115029986e-06, "loss": 13.3257, "step": 236630 }, { "epoch": 0.4780277718298137, "grad_norm": 409.9516906738281, "learning_rate": 6.323068495032927e-06, "loss": 26.4236, "step": 236640 }, { "epoch": 0.47804797246249753, "grad_norm": 475.63616943359375, "learning_rate": 6.3227318685873915e-06, "loss": 19.1454, "step": 236650 }, { "epoch": 0.47806817309518135, "grad_norm": 270.5030822753906, "learning_rate": 6.322395235695022e-06, "loss": 8.3406, "step": 236660 }, { "epoch": 0.47808837372786517, "grad_norm": 121.01858520507812, "learning_rate": 6.32205859635746e-06, "loss": 18.4687, "step": 236670 }, { "epoch": 0.478108574360549, "grad_norm": 356.7904357910156, "learning_rate": 6.3217219505763426e-06, "loss": 19.2996, "step": 236680 }, { "epoch": 0.4781287749932328, "grad_norm": 205.60110473632812, "learning_rate": 6.321385298353314e-06, "loss": 25.0605, "step": 236690 }, { "epoch": 0.47814897562591663, "grad_norm": 251.0154266357422, "learning_rate": 6.321048639690013e-06, "loss": 17.1245, "step": 236700 }, { "epoch": 0.4781691762586004, "grad_norm": 271.42022705078125, "learning_rate": 6.320711974588082e-06, "loss": 15.964, "step": 236710 }, { "epoch": 0.4781893768912842, "grad_norm": 146.44711303710938, "learning_rate": 6.32037530304916e-06, "loss": 13.6834, "step": 236720 }, { "epoch": 0.47820957752396803, "grad_norm": 255.75582885742188, "learning_rate": 6.320038625074889e-06, "loss": 22.2794, "step": 236730 }, { "epoch": 0.47822977815665185, "grad_norm": 391.43328857421875, "learning_rate": 6.319701940666911e-06, "loss": 13.0674, "step": 236740 }, { "epoch": 0.4782499787893357, "grad_norm": 206.7195281982422, "learning_rate": 6.3193652498268656e-06, "loss": 23.2094, "step": 236750 }, { "epoch": 0.4782701794220195, "grad_norm": 132.25978088378906, "learning_rate": 6.319028552556393e-06, "loss": 15.8099, "step": 236760 }, { "epoch": 0.4782903800547033, "grad_norm": 191.63287353515625, "learning_rate": 6.318691848857136e-06, "loss": 16.7658, "step": 236770 }, { "epoch": 0.47831058068738713, "grad_norm": 181.01145935058594, "learning_rate": 6.318355138730735e-06, "loss": 22.6965, "step": 236780 }, { "epoch": 0.47833078132007095, "grad_norm": 159.6313018798828, "learning_rate": 6.318018422178829e-06, "loss": 24.3358, "step": 236790 }, { "epoch": 0.4783509819527548, "grad_norm": 355.6816101074219, "learning_rate": 6.317681699203065e-06, "loss": 26.9809, "step": 236800 }, { "epoch": 0.4783711825854386, "grad_norm": 183.1710205078125, "learning_rate": 6.317344969805077e-06, "loss": 18.9887, "step": 236810 }, { "epoch": 0.4783913832181224, "grad_norm": 199.095703125, "learning_rate": 6.317008233986509e-06, "loss": 11.6995, "step": 236820 }, { "epoch": 0.47841158385080623, "grad_norm": 469.2431945800781, "learning_rate": 6.316671491749005e-06, "loss": 18.91, "step": 236830 }, { "epoch": 0.47843178448349, "grad_norm": 0.0, "learning_rate": 6.316334743094201e-06, "loss": 15.5652, "step": 236840 }, { "epoch": 0.4784519851161738, "grad_norm": 344.3533630371094, "learning_rate": 6.315997988023744e-06, "loss": 23.8523, "step": 236850 }, { "epoch": 0.47847218574885764, "grad_norm": 311.69244384765625, "learning_rate": 6.31566122653927e-06, "loss": 21.2879, "step": 236860 }, { "epoch": 0.47849238638154146, "grad_norm": 117.62499237060547, "learning_rate": 6.315324458642424e-06, "loss": 9.4636, "step": 236870 }, { "epoch": 0.4785125870142253, "grad_norm": 283.11968994140625, "learning_rate": 6.314987684334847e-06, "loss": 19.9048, "step": 236880 }, { "epoch": 0.4785327876469091, "grad_norm": 602.8812866210938, "learning_rate": 6.314650903618178e-06, "loss": 22.8279, "step": 236890 }, { "epoch": 0.4785529882795929, "grad_norm": 274.651123046875, "learning_rate": 6.314314116494061e-06, "loss": 12.0327, "step": 236900 }, { "epoch": 0.47857318891227674, "grad_norm": 436.17242431640625, "learning_rate": 6.313977322964136e-06, "loss": 45.7506, "step": 236910 }, { "epoch": 0.47859338954496056, "grad_norm": 306.90057373046875, "learning_rate": 6.313640523030045e-06, "loss": 18.1194, "step": 236920 }, { "epoch": 0.4786135901776444, "grad_norm": 192.51776123046875, "learning_rate": 6.313303716693428e-06, "loss": 18.3384, "step": 236930 }, { "epoch": 0.4786337908103282, "grad_norm": 335.6084899902344, "learning_rate": 6.3129669039559295e-06, "loss": 13.9986, "step": 236940 }, { "epoch": 0.478653991443012, "grad_norm": 343.7852478027344, "learning_rate": 6.312630084819189e-06, "loss": 15.2591, "step": 236950 }, { "epoch": 0.47867419207569584, "grad_norm": 138.6490936279297, "learning_rate": 6.3122932592848495e-06, "loss": 15.6689, "step": 236960 }, { "epoch": 0.4786943927083796, "grad_norm": 311.7670593261719, "learning_rate": 6.311956427354552e-06, "loss": 29.0427, "step": 236970 }, { "epoch": 0.4787145933410634, "grad_norm": 190.33934020996094, "learning_rate": 6.311619589029937e-06, "loss": 22.5214, "step": 236980 }, { "epoch": 0.47873479397374724, "grad_norm": 494.0514831542969, "learning_rate": 6.311282744312647e-06, "loss": 18.0027, "step": 236990 }, { "epoch": 0.47875499460643106, "grad_norm": 483.2235107421875, "learning_rate": 6.310945893204324e-06, "loss": 26.5425, "step": 237000 }, { "epoch": 0.4787751952391149, "grad_norm": 304.55511474609375, "learning_rate": 6.310609035706611e-06, "loss": 16.4072, "step": 237010 }, { "epoch": 0.4787953958717987, "grad_norm": 257.4307861328125, "learning_rate": 6.310272171821145e-06, "loss": 23.8795, "step": 237020 }, { "epoch": 0.4788155965044825, "grad_norm": 173.4064483642578, "learning_rate": 6.3099353015495766e-06, "loss": 13.6417, "step": 237030 }, { "epoch": 0.47883579713716634, "grad_norm": 355.9212951660156, "learning_rate": 6.309598424893539e-06, "loss": 13.9529, "step": 237040 }, { "epoch": 0.47885599776985016, "grad_norm": 480.4691467285156, "learning_rate": 6.309261541854679e-06, "loss": 23.6783, "step": 237050 }, { "epoch": 0.478876198402534, "grad_norm": 542.549560546875, "learning_rate": 6.308924652434636e-06, "loss": 18.0015, "step": 237060 }, { "epoch": 0.4788963990352178, "grad_norm": 341.0864562988281, "learning_rate": 6.308587756635054e-06, "loss": 21.1023, "step": 237070 }, { "epoch": 0.4789165996679016, "grad_norm": 194.2560577392578, "learning_rate": 6.308250854457572e-06, "loss": 17.2465, "step": 237080 }, { "epoch": 0.47893680030058544, "grad_norm": 16.311344146728516, "learning_rate": 6.307913945903836e-06, "loss": 17.484, "step": 237090 }, { "epoch": 0.4789570009332692, "grad_norm": 430.1542663574219, "learning_rate": 6.307577030975485e-06, "loss": 28.483, "step": 237100 }, { "epoch": 0.478977201565953, "grad_norm": 1239.21240234375, "learning_rate": 6.307240109674162e-06, "loss": 25.0201, "step": 237110 }, { "epoch": 0.47899740219863685, "grad_norm": 265.2594299316406, "learning_rate": 6.3069031820015116e-06, "loss": 46.4842, "step": 237120 }, { "epoch": 0.47901760283132067, "grad_norm": 272.4784851074219, "learning_rate": 6.306566247959169e-06, "loss": 27.5817, "step": 237130 }, { "epoch": 0.4790378034640045, "grad_norm": 496.7796630859375, "learning_rate": 6.3062293075487854e-06, "loss": 26.7724, "step": 237140 }, { "epoch": 0.4790580040966883, "grad_norm": 409.21978759765625, "learning_rate": 6.305892360771997e-06, "loss": 13.8993, "step": 237150 }, { "epoch": 0.4790782047293721, "grad_norm": 398.0301208496094, "learning_rate": 6.305555407630447e-06, "loss": 29.6757, "step": 237160 }, { "epoch": 0.47909840536205595, "grad_norm": 156.29627990722656, "learning_rate": 6.3052184481257795e-06, "loss": 23.9023, "step": 237170 }, { "epoch": 0.47911860599473977, "grad_norm": 412.8313293457031, "learning_rate": 6.304881482259634e-06, "loss": 27.6497, "step": 237180 }, { "epoch": 0.4791388066274236, "grad_norm": 256.4752197265625, "learning_rate": 6.304544510033656e-06, "loss": 17.3186, "step": 237190 }, { "epoch": 0.4791590072601074, "grad_norm": 602.1134643554688, "learning_rate": 6.304207531449486e-06, "loss": 19.2897, "step": 237200 }, { "epoch": 0.4791792078927912, "grad_norm": 112.14457702636719, "learning_rate": 6.303870546508766e-06, "loss": 21.6659, "step": 237210 }, { "epoch": 0.479199408525475, "grad_norm": 594.1576538085938, "learning_rate": 6.3035335552131395e-06, "loss": 14.8035, "step": 237220 }, { "epoch": 0.4792196091581588, "grad_norm": 281.8275451660156, "learning_rate": 6.303196557564249e-06, "loss": 12.7032, "step": 237230 }, { "epoch": 0.47923980979084263, "grad_norm": 406.2150573730469, "learning_rate": 6.302859553563736e-06, "loss": 27.1095, "step": 237240 }, { "epoch": 0.47926001042352645, "grad_norm": 215.25518798828125, "learning_rate": 6.3025225432132434e-06, "loss": 21.4422, "step": 237250 }, { "epoch": 0.47928021105621027, "grad_norm": 160.85308837890625, "learning_rate": 6.302185526514413e-06, "loss": 22.2449, "step": 237260 }, { "epoch": 0.4793004116888941, "grad_norm": 176.40301513671875, "learning_rate": 6.301848503468889e-06, "loss": 23.3832, "step": 237270 }, { "epoch": 0.4793206123215779, "grad_norm": 271.0252685546875, "learning_rate": 6.301511474078315e-06, "loss": 22.8307, "step": 237280 }, { "epoch": 0.47934081295426173, "grad_norm": 348.6308898925781, "learning_rate": 6.301174438344329e-06, "loss": 19.2356, "step": 237290 }, { "epoch": 0.47936101358694555, "grad_norm": 296.2402038574219, "learning_rate": 6.3008373962685785e-06, "loss": 13.37, "step": 237300 }, { "epoch": 0.47938121421962937, "grad_norm": 122.66092681884766, "learning_rate": 6.3005003478527036e-06, "loss": 17.9922, "step": 237310 }, { "epoch": 0.4794014148523132, "grad_norm": 331.4521484375, "learning_rate": 6.300163293098348e-06, "loss": 26.9626, "step": 237320 }, { "epoch": 0.479421615484997, "grad_norm": 408.6274719238281, "learning_rate": 6.2998262320071546e-06, "loss": 15.033, "step": 237330 }, { "epoch": 0.47944181611768083, "grad_norm": 320.4709167480469, "learning_rate": 6.299489164580765e-06, "loss": 18.1449, "step": 237340 }, { "epoch": 0.4794620167503646, "grad_norm": 495.5616149902344, "learning_rate": 6.2991520908208235e-06, "loss": 21.9259, "step": 237350 }, { "epoch": 0.4794822173830484, "grad_norm": 262.2938232421875, "learning_rate": 6.298815010728972e-06, "loss": 19.5552, "step": 237360 }, { "epoch": 0.47950241801573223, "grad_norm": 190.8236083984375, "learning_rate": 6.298477924306854e-06, "loss": 14.1381, "step": 237370 }, { "epoch": 0.47952261864841605, "grad_norm": 421.9083557128906, "learning_rate": 6.298140831556112e-06, "loss": 15.6622, "step": 237380 }, { "epoch": 0.4795428192810999, "grad_norm": 280.32421875, "learning_rate": 6.2978037324783894e-06, "loss": 20.2401, "step": 237390 }, { "epoch": 0.4795630199137837, "grad_norm": 178.085693359375, "learning_rate": 6.297466627075327e-06, "loss": 17.6997, "step": 237400 }, { "epoch": 0.4795832205464675, "grad_norm": 645.3538818359375, "learning_rate": 6.2971295153485725e-06, "loss": 19.7653, "step": 237410 }, { "epoch": 0.47960342117915133, "grad_norm": 418.77862548828125, "learning_rate": 6.296792397299764e-06, "loss": 19.2981, "step": 237420 }, { "epoch": 0.47962362181183515, "grad_norm": 244.39813232421875, "learning_rate": 6.296455272930546e-06, "loss": 34.6104, "step": 237430 }, { "epoch": 0.479643822444519, "grad_norm": 125.87007904052734, "learning_rate": 6.2961181422425645e-06, "loss": 15.7316, "step": 237440 }, { "epoch": 0.4796640230772028, "grad_norm": 208.21946716308594, "learning_rate": 6.295781005237458e-06, "loss": 14.8493, "step": 237450 }, { "epoch": 0.4796842237098866, "grad_norm": 235.3919219970703, "learning_rate": 6.295443861916875e-06, "loss": 20.2365, "step": 237460 }, { "epoch": 0.47970442434257043, "grad_norm": 394.7134704589844, "learning_rate": 6.2951067122824515e-06, "loss": 25.6358, "step": 237470 }, { "epoch": 0.4797246249752542, "grad_norm": 251.04212951660156, "learning_rate": 6.294769556335839e-06, "loss": 13.7231, "step": 237480 }, { "epoch": 0.479744825607938, "grad_norm": 159.57992553710938, "learning_rate": 6.294432394078675e-06, "loss": 12.877, "step": 237490 }, { "epoch": 0.47976502624062184, "grad_norm": 527.345947265625, "learning_rate": 6.294095225512604e-06, "loss": 17.5401, "step": 237500 }, { "epoch": 0.47978522687330566, "grad_norm": 468.4747009277344, "learning_rate": 6.293758050639272e-06, "loss": 15.775, "step": 237510 }, { "epoch": 0.4798054275059895, "grad_norm": 389.41204833984375, "learning_rate": 6.293420869460318e-06, "loss": 17.3373, "step": 237520 }, { "epoch": 0.4798256281386733, "grad_norm": 355.65887451171875, "learning_rate": 6.2930836819773874e-06, "loss": 21.2184, "step": 237530 }, { "epoch": 0.4798458287713571, "grad_norm": 252.16282653808594, "learning_rate": 6.292746488192125e-06, "loss": 15.7161, "step": 237540 }, { "epoch": 0.47986602940404094, "grad_norm": 93.31018829345703, "learning_rate": 6.292409288106173e-06, "loss": 13.1988, "step": 237550 }, { "epoch": 0.47988623003672476, "grad_norm": 298.2574768066406, "learning_rate": 6.292072081721173e-06, "loss": 19.3388, "step": 237560 }, { "epoch": 0.4799064306694086, "grad_norm": 299.4151306152344, "learning_rate": 6.291734869038773e-06, "loss": 15.3944, "step": 237570 }, { "epoch": 0.4799266313020924, "grad_norm": 303.3268127441406, "learning_rate": 6.291397650060613e-06, "loss": 20.4044, "step": 237580 }, { "epoch": 0.4799468319347762, "grad_norm": 47.49273681640625, "learning_rate": 6.291060424788336e-06, "loss": 18.5613, "step": 237590 }, { "epoch": 0.47996703256746004, "grad_norm": 189.1701202392578, "learning_rate": 6.290723193223589e-06, "loss": 20.3288, "step": 237600 }, { "epoch": 0.4799872332001438, "grad_norm": 162.44735717773438, "learning_rate": 6.290385955368012e-06, "loss": 26.7729, "step": 237610 }, { "epoch": 0.4800074338328276, "grad_norm": 99.44279479980469, "learning_rate": 6.2900487112232534e-06, "loss": 21.0821, "step": 237620 }, { "epoch": 0.48002763446551144, "grad_norm": 183.30523681640625, "learning_rate": 6.289711460790951e-06, "loss": 7.9118, "step": 237630 }, { "epoch": 0.48004783509819526, "grad_norm": 238.51319885253906, "learning_rate": 6.289374204072752e-06, "loss": 22.7658, "step": 237640 }, { "epoch": 0.4800680357308791, "grad_norm": 199.75091552734375, "learning_rate": 6.2890369410703e-06, "loss": 17.8913, "step": 237650 }, { "epoch": 0.4800882363635629, "grad_norm": 218.84429931640625, "learning_rate": 6.2886996717852374e-06, "loss": 26.0745, "step": 237660 }, { "epoch": 0.4801084369962467, "grad_norm": 7.753872394561768, "learning_rate": 6.28836239621921e-06, "loss": 14.8604, "step": 237670 }, { "epoch": 0.48012863762893054, "grad_norm": 276.09765625, "learning_rate": 6.288025114373862e-06, "loss": 17.3696, "step": 237680 }, { "epoch": 0.48014883826161436, "grad_norm": 156.5079803466797, "learning_rate": 6.287687826250832e-06, "loss": 11.4167, "step": 237690 }, { "epoch": 0.4801690388942982, "grad_norm": 285.6181335449219, "learning_rate": 6.28735053185177e-06, "loss": 24.8251, "step": 237700 }, { "epoch": 0.480189239526982, "grad_norm": 68.6441650390625, "learning_rate": 6.287013231178316e-06, "loss": 10.5719, "step": 237710 }, { "epoch": 0.4802094401596658, "grad_norm": 198.65232849121094, "learning_rate": 6.286675924232117e-06, "loss": 20.1337, "step": 237720 }, { "epoch": 0.48022964079234964, "grad_norm": 231.31915283203125, "learning_rate": 6.286338611014817e-06, "loss": 13.1563, "step": 237730 }, { "epoch": 0.4802498414250334, "grad_norm": 179.13177490234375, "learning_rate": 6.286001291528056e-06, "loss": 14.1416, "step": 237740 }, { "epoch": 0.48027004205771723, "grad_norm": 208.28871154785156, "learning_rate": 6.285663965773482e-06, "loss": 30.5398, "step": 237750 }, { "epoch": 0.48029024269040105, "grad_norm": 360.296875, "learning_rate": 6.285326633752737e-06, "loss": 18.027, "step": 237760 }, { "epoch": 0.48031044332308487, "grad_norm": 1.7526826858520508, "learning_rate": 6.284989295467466e-06, "loss": 15.175, "step": 237770 }, { "epoch": 0.4803306439557687, "grad_norm": 56.72864532470703, "learning_rate": 6.284651950919315e-06, "loss": 18.1247, "step": 237780 }, { "epoch": 0.4803508445884525, "grad_norm": 405.1344299316406, "learning_rate": 6.284314600109923e-06, "loss": 25.4464, "step": 237790 }, { "epoch": 0.48037104522113633, "grad_norm": 182.0778350830078, "learning_rate": 6.28397724304094e-06, "loss": 14.0938, "step": 237800 }, { "epoch": 0.48039124585382015, "grad_norm": 300.3033752441406, "learning_rate": 6.283639879714006e-06, "loss": 34.1256, "step": 237810 }, { "epoch": 0.48041144648650397, "grad_norm": 211.23976135253906, "learning_rate": 6.283302510130768e-06, "loss": 17.466, "step": 237820 }, { "epoch": 0.4804316471191878, "grad_norm": 454.8359680175781, "learning_rate": 6.282965134292869e-06, "loss": 13.922, "step": 237830 }, { "epoch": 0.4804518477518716, "grad_norm": 53.38147735595703, "learning_rate": 6.282627752201953e-06, "loss": 27.8579, "step": 237840 }, { "epoch": 0.48047204838455543, "grad_norm": 1.1840314865112305, "learning_rate": 6.2822903638596654e-06, "loss": 13.9646, "step": 237850 }, { "epoch": 0.4804922490172392, "grad_norm": 515.8729858398438, "learning_rate": 6.28195296926765e-06, "loss": 28.3716, "step": 237860 }, { "epoch": 0.480512449649923, "grad_norm": 232.6335906982422, "learning_rate": 6.281615568427551e-06, "loss": 12.9189, "step": 237870 }, { "epoch": 0.48053265028260683, "grad_norm": 294.8248596191406, "learning_rate": 6.281278161341013e-06, "loss": 15.9589, "step": 237880 }, { "epoch": 0.48055285091529065, "grad_norm": 12.92196273803711, "learning_rate": 6.280940748009682e-06, "loss": 22.4827, "step": 237890 }, { "epoch": 0.4805730515479745, "grad_norm": 326.8047180175781, "learning_rate": 6.280603328435199e-06, "loss": 19.2421, "step": 237900 }, { "epoch": 0.4805932521806583, "grad_norm": 65.30421447753906, "learning_rate": 6.2802659026192124e-06, "loss": 12.8592, "step": 237910 }, { "epoch": 0.4806134528133421, "grad_norm": 410.599853515625, "learning_rate": 6.279928470563365e-06, "loss": 19.5612, "step": 237920 }, { "epoch": 0.48063365344602593, "grad_norm": 146.52450561523438, "learning_rate": 6.2795910322693e-06, "loss": 27.0318, "step": 237930 }, { "epoch": 0.48065385407870975, "grad_norm": 164.02687072753906, "learning_rate": 6.279253587738664e-06, "loss": 18.1953, "step": 237940 }, { "epoch": 0.4806740547113936, "grad_norm": 574.2261962890625, "learning_rate": 6.278916136973102e-06, "loss": 22.0913, "step": 237950 }, { "epoch": 0.4806942553440774, "grad_norm": 324.0318603515625, "learning_rate": 6.278578679974259e-06, "loss": 19.4642, "step": 237960 }, { "epoch": 0.4807144559767612, "grad_norm": 435.7317199707031, "learning_rate": 6.278241216743777e-06, "loss": 16.8947, "step": 237970 }, { "epoch": 0.48073465660944503, "grad_norm": 274.3324279785156, "learning_rate": 6.277903747283302e-06, "loss": 18.7342, "step": 237980 }, { "epoch": 0.4807548572421288, "grad_norm": 589.2617797851562, "learning_rate": 6.277566271594478e-06, "loss": 16.037, "step": 237990 }, { "epoch": 0.4807750578748126, "grad_norm": 389.2999572753906, "learning_rate": 6.277228789678953e-06, "loss": 16.5541, "step": 238000 }, { "epoch": 0.48079525850749644, "grad_norm": 152.0805206298828, "learning_rate": 6.2768913015383696e-06, "loss": 18.6227, "step": 238010 }, { "epoch": 0.48081545914018026, "grad_norm": 136.62225341796875, "learning_rate": 6.276553807174373e-06, "loss": 15.7071, "step": 238020 }, { "epoch": 0.4808356597728641, "grad_norm": 436.652587890625, "learning_rate": 6.276216306588607e-06, "loss": 13.6809, "step": 238030 }, { "epoch": 0.4808558604055479, "grad_norm": 339.07708740234375, "learning_rate": 6.275878799782719e-06, "loss": 19.6821, "step": 238040 }, { "epoch": 0.4808760610382317, "grad_norm": 95.26113891601562, "learning_rate": 6.275541286758352e-06, "loss": 18.7971, "step": 238050 }, { "epoch": 0.48089626167091554, "grad_norm": 358.3951110839844, "learning_rate": 6.2752037675171495e-06, "loss": 27.9396, "step": 238060 }, { "epoch": 0.48091646230359936, "grad_norm": 172.3417205810547, "learning_rate": 6.274866242060761e-06, "loss": 18.4151, "step": 238070 }, { "epoch": 0.4809366629362832, "grad_norm": 117.96026611328125, "learning_rate": 6.274528710390826e-06, "loss": 14.379, "step": 238080 }, { "epoch": 0.480956863568967, "grad_norm": 55.03427505493164, "learning_rate": 6.274191172508996e-06, "loss": 21.7047, "step": 238090 }, { "epoch": 0.4809770642016508, "grad_norm": 155.01622009277344, "learning_rate": 6.273853628416911e-06, "loss": 10.3467, "step": 238100 }, { "epoch": 0.48099726483433464, "grad_norm": 300.2708740234375, "learning_rate": 6.273516078116218e-06, "loss": 15.2157, "step": 238110 }, { "epoch": 0.4810174654670184, "grad_norm": 233.45562744140625, "learning_rate": 6.273178521608564e-06, "loss": 29.3611, "step": 238120 }, { "epoch": 0.4810376660997022, "grad_norm": 283.8163757324219, "learning_rate": 6.272840958895591e-06, "loss": 13.1492, "step": 238130 }, { "epoch": 0.48105786673238604, "grad_norm": 229.71722412109375, "learning_rate": 6.272503389978945e-06, "loss": 28.4808, "step": 238140 }, { "epoch": 0.48107806736506986, "grad_norm": 422.572509765625, "learning_rate": 6.272165814860272e-06, "loss": 21.5361, "step": 238150 }, { "epoch": 0.4810982679977537, "grad_norm": 207.5037078857422, "learning_rate": 6.271828233541218e-06, "loss": 15.9621, "step": 238160 }, { "epoch": 0.4811184686304375, "grad_norm": 159.751953125, "learning_rate": 6.271490646023426e-06, "loss": 10.722, "step": 238170 }, { "epoch": 0.4811386692631213, "grad_norm": 266.2142333984375, "learning_rate": 6.271153052308544e-06, "loss": 18.931, "step": 238180 }, { "epoch": 0.48115886989580514, "grad_norm": 1333.4388427734375, "learning_rate": 6.2708154523982155e-06, "loss": 41.3614, "step": 238190 }, { "epoch": 0.48117907052848896, "grad_norm": 325.2752685546875, "learning_rate": 6.270477846294086e-06, "loss": 18.6002, "step": 238200 }, { "epoch": 0.4811992711611728, "grad_norm": 234.98843383789062, "learning_rate": 6.270140233997803e-06, "loss": 10.5987, "step": 238210 }, { "epoch": 0.4812194717938566, "grad_norm": 288.3065490722656, "learning_rate": 6.269802615511009e-06, "loss": 8.88, "step": 238220 }, { "epoch": 0.4812396724265404, "grad_norm": 32.72826385498047, "learning_rate": 6.269464990835353e-06, "loss": 10.7041, "step": 238230 }, { "epoch": 0.48125987305922424, "grad_norm": 295.62066650390625, "learning_rate": 6.2691273599724765e-06, "loss": 16.5308, "step": 238240 }, { "epoch": 0.481280073691908, "grad_norm": 455.1304931640625, "learning_rate": 6.268789722924029e-06, "loss": 20.1295, "step": 238250 }, { "epoch": 0.4813002743245918, "grad_norm": 77.39876556396484, "learning_rate": 6.268452079691654e-06, "loss": 17.6102, "step": 238260 }, { "epoch": 0.48132047495727565, "grad_norm": 228.83694458007812, "learning_rate": 6.268114430276996e-06, "loss": 19.2078, "step": 238270 }, { "epoch": 0.48134067558995947, "grad_norm": 443.73101806640625, "learning_rate": 6.267776774681703e-06, "loss": 25.1133, "step": 238280 }, { "epoch": 0.4813608762226433, "grad_norm": 629.93505859375, "learning_rate": 6.26743911290742e-06, "loss": 17.6677, "step": 238290 }, { "epoch": 0.4813810768553271, "grad_norm": 42.2877311706543, "learning_rate": 6.267101444955792e-06, "loss": 30.7113, "step": 238300 }, { "epoch": 0.4814012774880109, "grad_norm": 141.7714080810547, "learning_rate": 6.2667637708284655e-06, "loss": 11.6674, "step": 238310 }, { "epoch": 0.48142147812069475, "grad_norm": 118.15550994873047, "learning_rate": 6.266426090527087e-06, "loss": 5.9022, "step": 238320 }, { "epoch": 0.48144167875337857, "grad_norm": 220.01321411132812, "learning_rate": 6.2660884040533e-06, "loss": 51.2709, "step": 238330 }, { "epoch": 0.4814618793860624, "grad_norm": 147.910888671875, "learning_rate": 6.2657507114087525e-06, "loss": 14.7257, "step": 238340 }, { "epoch": 0.4814820800187462, "grad_norm": 333.10357666015625, "learning_rate": 6.265413012595088e-06, "loss": 25.3232, "step": 238350 }, { "epoch": 0.48150228065143, "grad_norm": 313.12994384765625, "learning_rate": 6.265075307613956e-06, "loss": 23.244, "step": 238360 }, { "epoch": 0.48152248128411385, "grad_norm": 234.84512329101562, "learning_rate": 6.264737596466999e-06, "loss": 33.0858, "step": 238370 }, { "epoch": 0.4815426819167976, "grad_norm": 204.4493865966797, "learning_rate": 6.264399879155865e-06, "loss": 22.1905, "step": 238380 }, { "epoch": 0.48156288254948143, "grad_norm": 371.0415954589844, "learning_rate": 6.2640621556822e-06, "loss": 16.234, "step": 238390 }, { "epoch": 0.48158308318216525, "grad_norm": 424.999755859375, "learning_rate": 6.2637244260476474e-06, "loss": 20.3911, "step": 238400 }, { "epoch": 0.48160328381484907, "grad_norm": 502.24102783203125, "learning_rate": 6.2633866902538564e-06, "loss": 13.4258, "step": 238410 }, { "epoch": 0.4816234844475329, "grad_norm": 528.8517456054688, "learning_rate": 6.263048948302471e-06, "loss": 23.9058, "step": 238420 }, { "epoch": 0.4816436850802167, "grad_norm": 502.44921875, "learning_rate": 6.26271120019514e-06, "loss": 21.111, "step": 238430 }, { "epoch": 0.48166388571290053, "grad_norm": 292.2132263183594, "learning_rate": 6.262373445933506e-06, "loss": 12.7818, "step": 238440 }, { "epoch": 0.48168408634558435, "grad_norm": 244.21803283691406, "learning_rate": 6.262035685519218e-06, "loss": 15.885, "step": 238450 }, { "epoch": 0.48170428697826817, "grad_norm": 289.2442626953125, "learning_rate": 6.261697918953922e-06, "loss": 21.2771, "step": 238460 }, { "epoch": 0.481724487610952, "grad_norm": 218.7513885498047, "learning_rate": 6.261360146239261e-06, "loss": 15.2726, "step": 238470 }, { "epoch": 0.4817446882436358, "grad_norm": 291.0226745605469, "learning_rate": 6.261022367376886e-06, "loss": 15.5038, "step": 238480 }, { "epoch": 0.48176488887631963, "grad_norm": 29.675317764282227, "learning_rate": 6.260684582368439e-06, "loss": 34.9261, "step": 238490 }, { "epoch": 0.4817850895090034, "grad_norm": 310.8671569824219, "learning_rate": 6.26034679121557e-06, "loss": 16.0056, "step": 238500 }, { "epoch": 0.4818052901416872, "grad_norm": 209.99737548828125, "learning_rate": 6.260008993919922e-06, "loss": 15.2852, "step": 238510 }, { "epoch": 0.48182549077437103, "grad_norm": 229.0928955078125, "learning_rate": 6.259671190483143e-06, "loss": 14.9968, "step": 238520 }, { "epoch": 0.48184569140705485, "grad_norm": 61.85886764526367, "learning_rate": 6.25933338090688e-06, "loss": 17.1232, "step": 238530 }, { "epoch": 0.4818658920397387, "grad_norm": 294.7627258300781, "learning_rate": 6.258995565192779e-06, "loss": 18.8456, "step": 238540 }, { "epoch": 0.4818860926724225, "grad_norm": 106.36480712890625, "learning_rate": 6.258657743342486e-06, "loss": 20.6748, "step": 238550 }, { "epoch": 0.4819062933051063, "grad_norm": 104.41059875488281, "learning_rate": 6.258319915357648e-06, "loss": 24.9217, "step": 238560 }, { "epoch": 0.48192649393779013, "grad_norm": 55.933311462402344, "learning_rate": 6.257982081239912e-06, "loss": 15.5358, "step": 238570 }, { "epoch": 0.48194669457047395, "grad_norm": 453.1619567871094, "learning_rate": 6.257644240990923e-06, "loss": 22.1787, "step": 238580 }, { "epoch": 0.4819668952031578, "grad_norm": 699.7005615234375, "learning_rate": 6.257306394612328e-06, "loss": 32.0662, "step": 238590 }, { "epoch": 0.4819870958358416, "grad_norm": 215.3056182861328, "learning_rate": 6.256968542105775e-06, "loss": 19.0193, "step": 238600 }, { "epoch": 0.4820072964685254, "grad_norm": 579.3931884765625, "learning_rate": 6.2566306834729095e-06, "loss": 29.8202, "step": 238610 }, { "epoch": 0.48202749710120923, "grad_norm": 97.75149536132812, "learning_rate": 6.256292818715378e-06, "loss": 27.2746, "step": 238620 }, { "epoch": 0.482047697733893, "grad_norm": 265.0908508300781, "learning_rate": 6.255954947834831e-06, "loss": 9.599, "step": 238630 }, { "epoch": 0.4820678983665768, "grad_norm": 457.22821044921875, "learning_rate": 6.255617070832908e-06, "loss": 17.7608, "step": 238640 }, { "epoch": 0.48208809899926064, "grad_norm": 434.2626953125, "learning_rate": 6.25527918771126e-06, "loss": 40.4497, "step": 238650 }, { "epoch": 0.48210829963194446, "grad_norm": 172.80477905273438, "learning_rate": 6.254941298471535e-06, "loss": 15.0037, "step": 238660 }, { "epoch": 0.4821285002646283, "grad_norm": 395.58038330078125, "learning_rate": 6.254603403115377e-06, "loss": 25.2874, "step": 238670 }, { "epoch": 0.4821487008973121, "grad_norm": 214.41043090820312, "learning_rate": 6.254265501644435e-06, "loss": 15.7716, "step": 238680 }, { "epoch": 0.4821689015299959, "grad_norm": 442.8664855957031, "learning_rate": 6.253927594060354e-06, "loss": 28.7348, "step": 238690 }, { "epoch": 0.48218910216267974, "grad_norm": 620.346923828125, "learning_rate": 6.2535896803647845e-06, "loss": 14.0092, "step": 238700 }, { "epoch": 0.48220930279536356, "grad_norm": 134.16954040527344, "learning_rate": 6.253251760559369e-06, "loss": 20.7357, "step": 238710 }, { "epoch": 0.4822295034280474, "grad_norm": 337.0098571777344, "learning_rate": 6.252913834645757e-06, "loss": 21.358, "step": 238720 }, { "epoch": 0.4822497040607312, "grad_norm": 308.21038818359375, "learning_rate": 6.252575902625595e-06, "loss": 16.302, "step": 238730 }, { "epoch": 0.482269904693415, "grad_norm": 245.0392608642578, "learning_rate": 6.25223796450053e-06, "loss": 10.3153, "step": 238740 }, { "epoch": 0.48229010532609884, "grad_norm": 96.62300109863281, "learning_rate": 6.251900020272208e-06, "loss": 17.9575, "step": 238750 }, { "epoch": 0.4823103059587826, "grad_norm": 618.3687744140625, "learning_rate": 6.2515620699422775e-06, "loss": 31.3345, "step": 238760 }, { "epoch": 0.4823305065914664, "grad_norm": 806.3795776367188, "learning_rate": 6.2512241135123856e-06, "loss": 21.2297, "step": 238770 }, { "epoch": 0.48235070722415024, "grad_norm": 246.47647094726562, "learning_rate": 6.250886150984179e-06, "loss": 17.017, "step": 238780 }, { "epoch": 0.48237090785683406, "grad_norm": 339.5592956542969, "learning_rate": 6.2505481823593065e-06, "loss": 20.0235, "step": 238790 }, { "epoch": 0.4823911084895179, "grad_norm": 105.3312759399414, "learning_rate": 6.250210207639411e-06, "loss": 16.8597, "step": 238800 }, { "epoch": 0.4824113091222017, "grad_norm": 393.3373718261719, "learning_rate": 6.249872226826145e-06, "loss": 12.4614, "step": 238810 }, { "epoch": 0.4824315097548855, "grad_norm": 265.3084716796875, "learning_rate": 6.249534239921154e-06, "loss": 21.8704, "step": 238820 }, { "epoch": 0.48245171038756934, "grad_norm": 387.1150207519531, "learning_rate": 6.24919624692608e-06, "loss": 21.5784, "step": 238830 }, { "epoch": 0.48247191102025316, "grad_norm": 584.3018188476562, "learning_rate": 6.2488582478425795e-06, "loss": 24.1182, "step": 238840 }, { "epoch": 0.482492111652937, "grad_norm": 297.7552185058594, "learning_rate": 6.2485202426722925e-06, "loss": 37.5912, "step": 238850 }, { "epoch": 0.4825123122856208, "grad_norm": 173.15826416015625, "learning_rate": 6.248182231416872e-06, "loss": 13.1343, "step": 238860 }, { "epoch": 0.4825325129183046, "grad_norm": 1192.698974609375, "learning_rate": 6.247844214077962e-06, "loss": 31.6678, "step": 238870 }, { "epoch": 0.48255271355098844, "grad_norm": 432.1405029296875, "learning_rate": 6.247506190657209e-06, "loss": 25.4201, "step": 238880 }, { "epoch": 0.4825729141836722, "grad_norm": 241.35391235351562, "learning_rate": 6.247168161156264e-06, "loss": 22.367, "step": 238890 }, { "epoch": 0.48259311481635603, "grad_norm": 264.42694091796875, "learning_rate": 6.24683012557677e-06, "loss": 12.5719, "step": 238900 }, { "epoch": 0.48261331544903985, "grad_norm": 378.6200256347656, "learning_rate": 6.2464920839203805e-06, "loss": 23.1163, "step": 238910 }, { "epoch": 0.48263351608172367, "grad_norm": 201.96800231933594, "learning_rate": 6.2461540361887386e-06, "loss": 10.3283, "step": 238920 }, { "epoch": 0.4826537167144075, "grad_norm": 279.50634765625, "learning_rate": 6.245815982383492e-06, "loss": 22.3017, "step": 238930 }, { "epoch": 0.4826739173470913, "grad_norm": 206.7711639404297, "learning_rate": 6.24547792250629e-06, "loss": 18.2322, "step": 238940 }, { "epoch": 0.48269411797977513, "grad_norm": 523.73974609375, "learning_rate": 6.24513985655878e-06, "loss": 28.1005, "step": 238950 }, { "epoch": 0.48271431861245895, "grad_norm": 250.10890197753906, "learning_rate": 6.244801784542609e-06, "loss": 23.5299, "step": 238960 }, { "epoch": 0.48273451924514277, "grad_norm": 185.93862915039062, "learning_rate": 6.244463706459426e-06, "loss": 16.2183, "step": 238970 }, { "epoch": 0.4827547198778266, "grad_norm": 114.59004211425781, "learning_rate": 6.244125622310877e-06, "loss": 19.783, "step": 238980 }, { "epoch": 0.4827749205105104, "grad_norm": 262.95245361328125, "learning_rate": 6.243787532098611e-06, "loss": 17.3343, "step": 238990 }, { "epoch": 0.48279512114319423, "grad_norm": 309.6873474121094, "learning_rate": 6.243449435824276e-06, "loss": 16.0641, "step": 239000 }, { "epoch": 0.48281532177587805, "grad_norm": 212.29251098632812, "learning_rate": 6.243111333489516e-06, "loss": 6.6235, "step": 239010 }, { "epoch": 0.4828355224085618, "grad_norm": 276.8680725097656, "learning_rate": 6.242773225095986e-06, "loss": 14.379, "step": 239020 }, { "epoch": 0.48285572304124563, "grad_norm": 1684.0933837890625, "learning_rate": 6.242435110645328e-06, "loss": 24.487, "step": 239030 }, { "epoch": 0.48287592367392945, "grad_norm": 170.98117065429688, "learning_rate": 6.242096990139192e-06, "loss": 29.464, "step": 239040 }, { "epoch": 0.4828961243066133, "grad_norm": 129.32748413085938, "learning_rate": 6.241758863579227e-06, "loss": 34.8316, "step": 239050 }, { "epoch": 0.4829163249392971, "grad_norm": 218.843017578125, "learning_rate": 6.241420730967079e-06, "loss": 19.2462, "step": 239060 }, { "epoch": 0.4829365255719809, "grad_norm": 489.1596374511719, "learning_rate": 6.241082592304398e-06, "loss": 17.829, "step": 239070 }, { "epoch": 0.48295672620466473, "grad_norm": 229.94891357421875, "learning_rate": 6.24074444759283e-06, "loss": 16.333, "step": 239080 }, { "epoch": 0.48297692683734855, "grad_norm": 453.3951721191406, "learning_rate": 6.240406296834024e-06, "loss": 21.5069, "step": 239090 }, { "epoch": 0.4829971274700324, "grad_norm": 241.62619018554688, "learning_rate": 6.240068140029628e-06, "loss": 20.3431, "step": 239100 }, { "epoch": 0.4830173281027162, "grad_norm": 363.08612060546875, "learning_rate": 6.2397299771812925e-06, "loss": 15.4892, "step": 239110 }, { "epoch": 0.4830375287354, "grad_norm": 357.63031005859375, "learning_rate": 6.23939180829066e-06, "loss": 18.1519, "step": 239120 }, { "epoch": 0.48305772936808383, "grad_norm": 234.8158416748047, "learning_rate": 6.239053633359384e-06, "loss": 10.7217, "step": 239130 }, { "epoch": 0.4830779300007676, "grad_norm": 390.8058166503906, "learning_rate": 6.2387154523891115e-06, "loss": 18.8427, "step": 239140 }, { "epoch": 0.4830981306334514, "grad_norm": 782.8630981445312, "learning_rate": 6.238377265381489e-06, "loss": 32.1159, "step": 239150 }, { "epoch": 0.48311833126613524, "grad_norm": 12.32452392578125, "learning_rate": 6.2380390723381666e-06, "loss": 10.9337, "step": 239160 }, { "epoch": 0.48313853189881906, "grad_norm": 319.5767517089844, "learning_rate": 6.23770087326079e-06, "loss": 14.3367, "step": 239170 }, { "epoch": 0.4831587325315029, "grad_norm": 401.93768310546875, "learning_rate": 6.237362668151013e-06, "loss": 19.1706, "step": 239180 }, { "epoch": 0.4831789331641867, "grad_norm": 37.264705657958984, "learning_rate": 6.237024457010478e-06, "loss": 21.2633, "step": 239190 }, { "epoch": 0.4831991337968705, "grad_norm": 135.05657958984375, "learning_rate": 6.236686239840836e-06, "loss": 19.7726, "step": 239200 }, { "epoch": 0.48321933442955434, "grad_norm": 215.82371520996094, "learning_rate": 6.236348016643735e-06, "loss": 18.2141, "step": 239210 }, { "epoch": 0.48323953506223816, "grad_norm": 223.0302734375, "learning_rate": 6.236009787420824e-06, "loss": 20.4693, "step": 239220 }, { "epoch": 0.483259735694922, "grad_norm": 74.56344604492188, "learning_rate": 6.235671552173752e-06, "loss": 17.1195, "step": 239230 }, { "epoch": 0.4832799363276058, "grad_norm": 241.774658203125, "learning_rate": 6.2353333109041655e-06, "loss": 14.8707, "step": 239240 }, { "epoch": 0.4833001369602896, "grad_norm": 504.5714416503906, "learning_rate": 6.234995063613716e-06, "loss": 15.468, "step": 239250 }, { "epoch": 0.48332033759297344, "grad_norm": 800.693359375, "learning_rate": 6.234656810304048e-06, "loss": 28.0521, "step": 239260 }, { "epoch": 0.4833405382256572, "grad_norm": 99.12686920166016, "learning_rate": 6.234318550976815e-06, "loss": 18.4294, "step": 239270 }, { "epoch": 0.483360738858341, "grad_norm": 115.95409393310547, "learning_rate": 6.233980285633661e-06, "loss": 21.402, "step": 239280 }, { "epoch": 0.48338093949102484, "grad_norm": 332.815673828125, "learning_rate": 6.233642014276238e-06, "loss": 18.2393, "step": 239290 }, { "epoch": 0.48340114012370866, "grad_norm": 94.37334442138672, "learning_rate": 6.233303736906193e-06, "loss": 22.5094, "step": 239300 }, { "epoch": 0.4834213407563925, "grad_norm": 313.26434326171875, "learning_rate": 6.232965453525175e-06, "loss": 20.3019, "step": 239310 }, { "epoch": 0.4834415413890763, "grad_norm": 136.8229522705078, "learning_rate": 6.2326271641348325e-06, "loss": 9.4145, "step": 239320 }, { "epoch": 0.4834617420217601, "grad_norm": 0.0, "learning_rate": 6.232288868736816e-06, "loss": 11.7528, "step": 239330 }, { "epoch": 0.48348194265444394, "grad_norm": 449.7021484375, "learning_rate": 6.231950567332773e-06, "loss": 20.896, "step": 239340 }, { "epoch": 0.48350214328712776, "grad_norm": 237.91893005371094, "learning_rate": 6.231612259924351e-06, "loss": 8.7707, "step": 239350 }, { "epoch": 0.4835223439198116, "grad_norm": 163.1540069580078, "learning_rate": 6.231273946513201e-06, "loss": 12.4073, "step": 239360 }, { "epoch": 0.4835425445524954, "grad_norm": 274.2789611816406, "learning_rate": 6.23093562710097e-06, "loss": 41.8237, "step": 239370 }, { "epoch": 0.4835627451851792, "grad_norm": 360.3392333984375, "learning_rate": 6.23059730168931e-06, "loss": 23.7447, "step": 239380 }, { "epoch": 0.48358294581786304, "grad_norm": 221.3330841064453, "learning_rate": 6.230258970279867e-06, "loss": 11.0327, "step": 239390 }, { "epoch": 0.4836031464505468, "grad_norm": 13.937956809997559, "learning_rate": 6.229920632874291e-06, "loss": 24.028, "step": 239400 }, { "epoch": 0.4836233470832306, "grad_norm": 169.00704956054688, "learning_rate": 6.229582289474231e-06, "loss": 25.8565, "step": 239410 }, { "epoch": 0.48364354771591445, "grad_norm": 361.52203369140625, "learning_rate": 6.229243940081336e-06, "loss": 26.4868, "step": 239420 }, { "epoch": 0.48366374834859827, "grad_norm": 184.466796875, "learning_rate": 6.228905584697254e-06, "loss": 22.0049, "step": 239430 }, { "epoch": 0.4836839489812821, "grad_norm": 349.5063171386719, "learning_rate": 6.228567223323637e-06, "loss": 17.0908, "step": 239440 }, { "epoch": 0.4837041496139659, "grad_norm": 379.5712585449219, "learning_rate": 6.228228855962133e-06, "loss": 17.3249, "step": 239450 }, { "epoch": 0.4837243502466497, "grad_norm": 288.5846252441406, "learning_rate": 6.227890482614388e-06, "loss": 29.3186, "step": 239460 }, { "epoch": 0.48374455087933355, "grad_norm": 1141.19384765625, "learning_rate": 6.227552103282056e-06, "loss": 37.8881, "step": 239470 }, { "epoch": 0.48376475151201737, "grad_norm": 117.52875518798828, "learning_rate": 6.227213717966782e-06, "loss": 18.2875, "step": 239480 }, { "epoch": 0.4837849521447012, "grad_norm": 125.18902587890625, "learning_rate": 6.226875326670218e-06, "loss": 10.9766, "step": 239490 }, { "epoch": 0.483805152777385, "grad_norm": 575.8245239257812, "learning_rate": 6.2265369293940135e-06, "loss": 21.3458, "step": 239500 }, { "epoch": 0.4838253534100688, "grad_norm": 375.1203918457031, "learning_rate": 6.226198526139815e-06, "loss": 11.2477, "step": 239510 }, { "epoch": 0.48384555404275265, "grad_norm": 524.1265869140625, "learning_rate": 6.225860116909276e-06, "loss": 17.991, "step": 239520 }, { "epoch": 0.4838657546754364, "grad_norm": 417.99365234375, "learning_rate": 6.225521701704042e-06, "loss": 19.5099, "step": 239530 }, { "epoch": 0.48388595530812023, "grad_norm": 521.28857421875, "learning_rate": 6.225183280525763e-06, "loss": 21.0014, "step": 239540 }, { "epoch": 0.48390615594080405, "grad_norm": 40.84321212768555, "learning_rate": 6.2248448533760895e-06, "loss": 15.9449, "step": 239550 }, { "epoch": 0.48392635657348787, "grad_norm": 0.0, "learning_rate": 6.224506420256673e-06, "loss": 18.0791, "step": 239560 }, { "epoch": 0.4839465572061717, "grad_norm": 259.818115234375, "learning_rate": 6.2241679811691595e-06, "loss": 15.7618, "step": 239570 }, { "epoch": 0.4839667578388555, "grad_norm": 0.0, "learning_rate": 6.223829536115198e-06, "loss": 11.4434, "step": 239580 }, { "epoch": 0.48398695847153933, "grad_norm": 237.3679656982422, "learning_rate": 6.223491085096441e-06, "loss": 20.5969, "step": 239590 }, { "epoch": 0.48400715910422315, "grad_norm": 358.13629150390625, "learning_rate": 6.223152628114537e-06, "loss": 15.2316, "step": 239600 }, { "epoch": 0.48402735973690697, "grad_norm": 571.1218872070312, "learning_rate": 6.222814165171136e-06, "loss": 28.4111, "step": 239610 }, { "epoch": 0.4840475603695908, "grad_norm": 334.9637756347656, "learning_rate": 6.222475696267885e-06, "loss": 14.2717, "step": 239620 }, { "epoch": 0.4840677610022746, "grad_norm": 173.05181884765625, "learning_rate": 6.222137221406439e-06, "loss": 15.1766, "step": 239630 }, { "epoch": 0.48408796163495843, "grad_norm": 489.7096862792969, "learning_rate": 6.221798740588442e-06, "loss": 32.1572, "step": 239640 }, { "epoch": 0.48410816226764225, "grad_norm": 143.32608032226562, "learning_rate": 6.221460253815546e-06, "loss": 15.4107, "step": 239650 }, { "epoch": 0.484128362900326, "grad_norm": 205.9344940185547, "learning_rate": 6.221121761089402e-06, "loss": 22.4066, "step": 239660 }, { "epoch": 0.48414856353300983, "grad_norm": 187.39468383789062, "learning_rate": 6.220783262411658e-06, "loss": 10.0653, "step": 239670 }, { "epoch": 0.48416876416569365, "grad_norm": 846.5540161132812, "learning_rate": 6.220444757783966e-06, "loss": 18.6988, "step": 239680 }, { "epoch": 0.4841889647983775, "grad_norm": 286.5925598144531, "learning_rate": 6.220106247207972e-06, "loss": 17.4961, "step": 239690 }, { "epoch": 0.4842091654310613, "grad_norm": 445.2710266113281, "learning_rate": 6.219767730685329e-06, "loss": 24.0908, "step": 239700 }, { "epoch": 0.4842293660637451, "grad_norm": 96.57325744628906, "learning_rate": 6.219429208217685e-06, "loss": 16.1158, "step": 239710 }, { "epoch": 0.48424956669642893, "grad_norm": 449.6394958496094, "learning_rate": 6.219090679806694e-06, "loss": 15.4541, "step": 239720 }, { "epoch": 0.48426976732911275, "grad_norm": 176.24099731445312, "learning_rate": 6.218752145453999e-06, "loss": 25.3755, "step": 239730 }, { "epoch": 0.4842899679617966, "grad_norm": 400.73919677734375, "learning_rate": 6.218413605161258e-06, "loss": 18.9906, "step": 239740 }, { "epoch": 0.4843101685944804, "grad_norm": 301.9634704589844, "learning_rate": 6.218075058930113e-06, "loss": 10.5097, "step": 239750 }, { "epoch": 0.4843303692271642, "grad_norm": 327.6512145996094, "learning_rate": 6.217736506762219e-06, "loss": 20.736, "step": 239760 }, { "epoch": 0.48435056985984803, "grad_norm": 458.45599365234375, "learning_rate": 6.217397948659228e-06, "loss": 30.0819, "step": 239770 }, { "epoch": 0.4843707704925318, "grad_norm": 239.54640197753906, "learning_rate": 6.217059384622782e-06, "loss": 12.9534, "step": 239780 }, { "epoch": 0.4843909711252156, "grad_norm": 304.87615966796875, "learning_rate": 6.21672081465454e-06, "loss": 19.5079, "step": 239790 }, { "epoch": 0.48441117175789944, "grad_norm": 113.4278564453125, "learning_rate": 6.216382238756147e-06, "loss": 15.3742, "step": 239800 }, { "epoch": 0.48443137239058326, "grad_norm": 384.94677734375, "learning_rate": 6.216043656929254e-06, "loss": 25.855, "step": 239810 }, { "epoch": 0.4844515730232671, "grad_norm": 385.619873046875, "learning_rate": 6.215705069175513e-06, "loss": 26.0259, "step": 239820 }, { "epoch": 0.4844717736559509, "grad_norm": 345.20758056640625, "learning_rate": 6.215366475496572e-06, "loss": 24.4869, "step": 239830 }, { "epoch": 0.4844919742886347, "grad_norm": 234.2626495361328, "learning_rate": 6.215027875894082e-06, "loss": 17.2853, "step": 239840 }, { "epoch": 0.48451217492131854, "grad_norm": 189.27108764648438, "learning_rate": 6.214689270369694e-06, "loss": 16.4821, "step": 239850 }, { "epoch": 0.48453237555400236, "grad_norm": 283.308837890625, "learning_rate": 6.214350658925058e-06, "loss": 11.0662, "step": 239860 }, { "epoch": 0.4845525761866862, "grad_norm": 269.8576354980469, "learning_rate": 6.2140120415618235e-06, "loss": 22.5737, "step": 239870 }, { "epoch": 0.48457277681937, "grad_norm": 221.6258087158203, "learning_rate": 6.213673418281643e-06, "loss": 19.489, "step": 239880 }, { "epoch": 0.4845929774520538, "grad_norm": 352.7137145996094, "learning_rate": 6.213334789086163e-06, "loss": 21.1031, "step": 239890 }, { "epoch": 0.48461317808473764, "grad_norm": 125.46376037597656, "learning_rate": 6.212996153977038e-06, "loss": 13.3581, "step": 239900 }, { "epoch": 0.4846333787174214, "grad_norm": 189.44529724121094, "learning_rate": 6.212657512955916e-06, "loss": 17.1885, "step": 239910 }, { "epoch": 0.4846535793501052, "grad_norm": 179.2014617919922, "learning_rate": 6.212318866024449e-06, "loss": 22.2273, "step": 239920 }, { "epoch": 0.48467377998278904, "grad_norm": 185.9795379638672, "learning_rate": 6.211980213184287e-06, "loss": 16.5336, "step": 239930 }, { "epoch": 0.48469398061547286, "grad_norm": 203.96990966796875, "learning_rate": 6.21164155443708e-06, "loss": 9.582, "step": 239940 }, { "epoch": 0.4847141812481567, "grad_norm": 92.93877410888672, "learning_rate": 6.21130288978448e-06, "loss": 20.3592, "step": 239950 }, { "epoch": 0.4847343818808405, "grad_norm": 405.9676208496094, "learning_rate": 6.210964219228135e-06, "loss": 27.4034, "step": 239960 }, { "epoch": 0.4847545825135243, "grad_norm": 108.10200500488281, "learning_rate": 6.2106255427697e-06, "loss": 30.0937, "step": 239970 }, { "epoch": 0.48477478314620814, "grad_norm": 124.19640350341797, "learning_rate": 6.21028686041082e-06, "loss": 15.3357, "step": 239980 }, { "epoch": 0.48479498377889196, "grad_norm": 87.72117614746094, "learning_rate": 6.20994817215315e-06, "loss": 9.0507, "step": 239990 }, { "epoch": 0.4848151844115758, "grad_norm": 138.3188018798828, "learning_rate": 6.209609477998339e-06, "loss": 13.2754, "step": 240000 }, { "epoch": 0.4848353850442596, "grad_norm": 55.41876983642578, "learning_rate": 6.209270777948038e-06, "loss": 9.5635, "step": 240010 }, { "epoch": 0.4848555856769434, "grad_norm": 848.6141357421875, "learning_rate": 6.208932072003899e-06, "loss": 23.6116, "step": 240020 }, { "epoch": 0.48487578630962724, "grad_norm": 67.02335357666016, "learning_rate": 6.208593360167571e-06, "loss": 16.8831, "step": 240030 }, { "epoch": 0.484895986942311, "grad_norm": 291.74249267578125, "learning_rate": 6.208254642440705e-06, "loss": 16.3752, "step": 240040 }, { "epoch": 0.48491618757499483, "grad_norm": 326.95452880859375, "learning_rate": 6.207915918824952e-06, "loss": 29.4724, "step": 240050 }, { "epoch": 0.48493638820767865, "grad_norm": 622.47021484375, "learning_rate": 6.207577189321965e-06, "loss": 20.6376, "step": 240060 }, { "epoch": 0.48495658884036247, "grad_norm": 332.9981689453125, "learning_rate": 6.2072384539333914e-06, "loss": 25.5091, "step": 240070 }, { "epoch": 0.4849767894730463, "grad_norm": 194.55902099609375, "learning_rate": 6.206899712660887e-06, "loss": 27.1973, "step": 240080 }, { "epoch": 0.4849969901057301, "grad_norm": 215.84474182128906, "learning_rate": 6.206560965506097e-06, "loss": 19.6215, "step": 240090 }, { "epoch": 0.48501719073841393, "grad_norm": 174.28416442871094, "learning_rate": 6.206222212470675e-06, "loss": 18.6864, "step": 240100 }, { "epoch": 0.48503739137109775, "grad_norm": 6.737982749938965, "learning_rate": 6.205883453556274e-06, "loss": 27.2004, "step": 240110 }, { "epoch": 0.48505759200378157, "grad_norm": 462.28106689453125, "learning_rate": 6.205544688764542e-06, "loss": 27.1536, "step": 240120 }, { "epoch": 0.4850777926364654, "grad_norm": 387.1270751953125, "learning_rate": 6.205205918097133e-06, "loss": 22.2337, "step": 240130 }, { "epoch": 0.4850979932691492, "grad_norm": 401.2827453613281, "learning_rate": 6.204867141555695e-06, "loss": 16.8462, "step": 240140 }, { "epoch": 0.48511819390183303, "grad_norm": 171.7418975830078, "learning_rate": 6.20452835914188e-06, "loss": 26.9797, "step": 240150 }, { "epoch": 0.48513839453451685, "grad_norm": 145.93038940429688, "learning_rate": 6.204189570857342e-06, "loss": 20.0885, "step": 240160 }, { "epoch": 0.4851585951672006, "grad_norm": 140.94021606445312, "learning_rate": 6.2038507767037295e-06, "loss": 17.2921, "step": 240170 }, { "epoch": 0.48517879579988443, "grad_norm": 414.5526428222656, "learning_rate": 6.2035119766826935e-06, "loss": 13.0241, "step": 240180 }, { "epoch": 0.48519899643256825, "grad_norm": 214.25506591796875, "learning_rate": 6.203173170795887e-06, "loss": 19.6998, "step": 240190 }, { "epoch": 0.4852191970652521, "grad_norm": 397.6249084472656, "learning_rate": 6.202834359044959e-06, "loss": 15.9248, "step": 240200 }, { "epoch": 0.4852393976979359, "grad_norm": 205.77560424804688, "learning_rate": 6.2024955414315634e-06, "loss": 12.4579, "step": 240210 }, { "epoch": 0.4852595983306197, "grad_norm": 138.2369384765625, "learning_rate": 6.202156717957351e-06, "loss": 24.9938, "step": 240220 }, { "epoch": 0.48527979896330353, "grad_norm": 302.459716796875, "learning_rate": 6.2018178886239695e-06, "loss": 21.8237, "step": 240230 }, { "epoch": 0.48529999959598735, "grad_norm": 207.39939880371094, "learning_rate": 6.201479053433077e-06, "loss": 16.2939, "step": 240240 }, { "epoch": 0.4853202002286712, "grad_norm": 35.47924041748047, "learning_rate": 6.2011402123863194e-06, "loss": 25.804, "step": 240250 }, { "epoch": 0.485340400861355, "grad_norm": 205.21534729003906, "learning_rate": 6.2008013654853505e-06, "loss": 19.0904, "step": 240260 }, { "epoch": 0.4853606014940388, "grad_norm": 189.2057647705078, "learning_rate": 6.200462512731821e-06, "loss": 14.5697, "step": 240270 }, { "epoch": 0.48538080212672263, "grad_norm": 154.23318481445312, "learning_rate": 6.200123654127383e-06, "loss": 18.901, "step": 240280 }, { "epoch": 0.4854010027594064, "grad_norm": 197.2017059326172, "learning_rate": 6.199784789673689e-06, "loss": 22.8531, "step": 240290 }, { "epoch": 0.4854212033920902, "grad_norm": 691.8828735351562, "learning_rate": 6.199445919372388e-06, "loss": 42.1932, "step": 240300 }, { "epoch": 0.48544140402477404, "grad_norm": 447.4335021972656, "learning_rate": 6.199107043225134e-06, "loss": 13.8719, "step": 240310 }, { "epoch": 0.48546160465745786, "grad_norm": 339.3453369140625, "learning_rate": 6.198768161233577e-06, "loss": 25.7103, "step": 240320 }, { "epoch": 0.4854818052901417, "grad_norm": 126.9661636352539, "learning_rate": 6.19842927339937e-06, "loss": 21.8783, "step": 240330 }, { "epoch": 0.4855020059228255, "grad_norm": 81.35147094726562, "learning_rate": 6.198090379724163e-06, "loss": 7.627, "step": 240340 }, { "epoch": 0.4855222065555093, "grad_norm": 97.25090026855469, "learning_rate": 6.1977514802096105e-06, "loss": 10.6596, "step": 240350 }, { "epoch": 0.48554240718819314, "grad_norm": 293.11083984375, "learning_rate": 6.197412574857361e-06, "loss": 18.8455, "step": 240360 }, { "epoch": 0.48556260782087696, "grad_norm": 253.9628448486328, "learning_rate": 6.197073663669069e-06, "loss": 15.7987, "step": 240370 }, { "epoch": 0.4855828084535608, "grad_norm": 316.69049072265625, "learning_rate": 6.196734746646384e-06, "loss": 27.4016, "step": 240380 }, { "epoch": 0.4856030090862446, "grad_norm": 146.34393310546875, "learning_rate": 6.19639582379096e-06, "loss": 7.056, "step": 240390 }, { "epoch": 0.4856232097189284, "grad_norm": 271.8760681152344, "learning_rate": 6.1960568951044475e-06, "loss": 19.45, "step": 240400 }, { "epoch": 0.48564341035161224, "grad_norm": 586.0911865234375, "learning_rate": 6.195717960588499e-06, "loss": 15.2366, "step": 240410 }, { "epoch": 0.485663610984296, "grad_norm": 311.353515625, "learning_rate": 6.195379020244765e-06, "loss": 16.7152, "step": 240420 }, { "epoch": 0.4856838116169798, "grad_norm": 523.8697509765625, "learning_rate": 6.195040074074899e-06, "loss": 29.0278, "step": 240430 }, { "epoch": 0.48570401224966364, "grad_norm": 168.13125610351562, "learning_rate": 6.1947011220805535e-06, "loss": 8.9189, "step": 240440 }, { "epoch": 0.48572421288234746, "grad_norm": 332.91680908203125, "learning_rate": 6.19436216426338e-06, "loss": 22.113, "step": 240450 }, { "epoch": 0.4857444135150313, "grad_norm": 982.1851806640625, "learning_rate": 6.194023200625029e-06, "loss": 12.5518, "step": 240460 }, { "epoch": 0.4857646141477151, "grad_norm": 290.2177429199219, "learning_rate": 6.193684231167154e-06, "loss": 19.4946, "step": 240470 }, { "epoch": 0.4857848147803989, "grad_norm": 408.1614990234375, "learning_rate": 6.193345255891407e-06, "loss": 22.1923, "step": 240480 }, { "epoch": 0.48580501541308274, "grad_norm": 255.12001037597656, "learning_rate": 6.19300627479944e-06, "loss": 15.5154, "step": 240490 }, { "epoch": 0.48582521604576656, "grad_norm": 94.53915405273438, "learning_rate": 6.192667287892905e-06, "loss": 24.7071, "step": 240500 }, { "epoch": 0.4858454166784504, "grad_norm": 201.9344024658203, "learning_rate": 6.192328295173455e-06, "loss": 17.0147, "step": 240510 }, { "epoch": 0.4858656173111342, "grad_norm": 117.32344818115234, "learning_rate": 6.191989296642741e-06, "loss": 44.5106, "step": 240520 }, { "epoch": 0.485885817943818, "grad_norm": 533.1652221679688, "learning_rate": 6.1916502923024145e-06, "loss": 26.9655, "step": 240530 }, { "epoch": 0.48590601857650184, "grad_norm": 343.40142822265625, "learning_rate": 6.191311282154131e-06, "loss": 26.4013, "step": 240540 }, { "epoch": 0.4859262192091856, "grad_norm": 389.5896301269531, "learning_rate": 6.1909722661995394e-06, "loss": 14.4454, "step": 240550 }, { "epoch": 0.4859464198418694, "grad_norm": 210.77767944335938, "learning_rate": 6.190633244440295e-06, "loss": 8.3043, "step": 240560 }, { "epoch": 0.48596662047455325, "grad_norm": 126.13627624511719, "learning_rate": 6.190294216878045e-06, "loss": 10.8684, "step": 240570 }, { "epoch": 0.48598682110723707, "grad_norm": 129.61141967773438, "learning_rate": 6.189955183514449e-06, "loss": 17.372, "step": 240580 }, { "epoch": 0.4860070217399209, "grad_norm": 419.82537841796875, "learning_rate": 6.1896161443511546e-06, "loss": 28.1424, "step": 240590 }, { "epoch": 0.4860272223726047, "grad_norm": 323.11602783203125, "learning_rate": 6.189277099389816e-06, "loss": 29.8531, "step": 240600 }, { "epoch": 0.4860474230052885, "grad_norm": 415.5751037597656, "learning_rate": 6.188938048632084e-06, "loss": 24.2712, "step": 240610 }, { "epoch": 0.48606762363797235, "grad_norm": 195.148193359375, "learning_rate": 6.188598992079613e-06, "loss": 15.4584, "step": 240620 }, { "epoch": 0.48608782427065617, "grad_norm": 182.6754913330078, "learning_rate": 6.188259929734054e-06, "loss": 27.7856, "step": 240630 }, { "epoch": 0.48610802490334, "grad_norm": 252.10833740234375, "learning_rate": 6.187920861597061e-06, "loss": 25.5658, "step": 240640 }, { "epoch": 0.4861282255360238, "grad_norm": 1046.8382568359375, "learning_rate": 6.187581787670285e-06, "loss": 24.8682, "step": 240650 }, { "epoch": 0.4861484261687076, "grad_norm": 266.3685302734375, "learning_rate": 6.18724270795538e-06, "loss": 8.5924, "step": 240660 }, { "epoch": 0.48616862680139145, "grad_norm": 904.7723999023438, "learning_rate": 6.186903622453998e-06, "loss": 28.5433, "step": 240670 }, { "epoch": 0.4861888274340752, "grad_norm": 16.73373794555664, "learning_rate": 6.18656453116779e-06, "loss": 18.3058, "step": 240680 }, { "epoch": 0.48620902806675903, "grad_norm": 160.66387939453125, "learning_rate": 6.186225434098413e-06, "loss": 34.7566, "step": 240690 }, { "epoch": 0.48622922869944285, "grad_norm": 174.0893096923828, "learning_rate": 6.185886331247516e-06, "loss": 10.6946, "step": 240700 }, { "epoch": 0.48624942933212667, "grad_norm": 0.06804550439119339, "learning_rate": 6.1855472226167525e-06, "loss": 16.4481, "step": 240710 }, { "epoch": 0.4862696299648105, "grad_norm": 88.97247314453125, "learning_rate": 6.185208108207776e-06, "loss": 8.1724, "step": 240720 }, { "epoch": 0.4862898305974943, "grad_norm": 444.6557922363281, "learning_rate": 6.184868988022238e-06, "loss": 30.6211, "step": 240730 }, { "epoch": 0.48631003123017813, "grad_norm": 880.5879516601562, "learning_rate": 6.184529862061794e-06, "loss": 16.8175, "step": 240740 }, { "epoch": 0.48633023186286195, "grad_norm": 325.5740661621094, "learning_rate": 6.184190730328095e-06, "loss": 14.7272, "step": 240750 }, { "epoch": 0.48635043249554577, "grad_norm": 383.2069396972656, "learning_rate": 6.1838515928227925e-06, "loss": 15.0852, "step": 240760 }, { "epoch": 0.4863706331282296, "grad_norm": 295.3523254394531, "learning_rate": 6.1835124495475415e-06, "loss": 14.9373, "step": 240770 }, { "epoch": 0.4863908337609134, "grad_norm": 275.3542175292969, "learning_rate": 6.183173300503995e-06, "loss": 13.4133, "step": 240780 }, { "epoch": 0.48641103439359723, "grad_norm": 416.6690368652344, "learning_rate": 6.182834145693805e-06, "loss": 33.8026, "step": 240790 }, { "epoch": 0.48643123502628105, "grad_norm": 116.412353515625, "learning_rate": 6.182494985118625e-06, "loss": 12.2782, "step": 240800 }, { "epoch": 0.4864514356589648, "grad_norm": 234.5225067138672, "learning_rate": 6.182155818780107e-06, "loss": 21.7785, "step": 240810 }, { "epoch": 0.48647163629164863, "grad_norm": 76.25149536132812, "learning_rate": 6.181816646679904e-06, "loss": 15.4367, "step": 240820 }, { "epoch": 0.48649183692433245, "grad_norm": 15.65932559967041, "learning_rate": 6.181477468819673e-06, "loss": 6.0221, "step": 240830 }, { "epoch": 0.4865120375570163, "grad_norm": 416.5755615234375, "learning_rate": 6.181138285201062e-06, "loss": 22.9479, "step": 240840 }, { "epoch": 0.4865322381897001, "grad_norm": 307.73016357421875, "learning_rate": 6.180799095825727e-06, "loss": 11.6389, "step": 240850 }, { "epoch": 0.4865524388223839, "grad_norm": 308.2898254394531, "learning_rate": 6.18045990069532e-06, "loss": 18.9749, "step": 240860 }, { "epoch": 0.48657263945506773, "grad_norm": 295.3081970214844, "learning_rate": 6.180120699811495e-06, "loss": 8.3608, "step": 240870 }, { "epoch": 0.48659284008775155, "grad_norm": 369.90972900390625, "learning_rate": 6.179781493175906e-06, "loss": 18.2219, "step": 240880 }, { "epoch": 0.4866130407204354, "grad_norm": 1055.2210693359375, "learning_rate": 6.179442280790202e-06, "loss": 23.1065, "step": 240890 }, { "epoch": 0.4866332413531192, "grad_norm": 261.70111083984375, "learning_rate": 6.179103062656042e-06, "loss": 25.9149, "step": 240900 }, { "epoch": 0.486653441985803, "grad_norm": 157.80194091796875, "learning_rate": 6.178763838775076e-06, "loss": 12.1859, "step": 240910 }, { "epoch": 0.48667364261848683, "grad_norm": 244.3269805908203, "learning_rate": 6.178424609148957e-06, "loss": 13.2525, "step": 240920 }, { "epoch": 0.4866938432511706, "grad_norm": 306.4751281738281, "learning_rate": 6.178085373779341e-06, "loss": 21.031, "step": 240930 }, { "epoch": 0.4867140438838544, "grad_norm": 262.1776428222656, "learning_rate": 6.17774613266788e-06, "loss": 12.882, "step": 240940 }, { "epoch": 0.48673424451653824, "grad_norm": 210.46412658691406, "learning_rate": 6.177406885816224e-06, "loss": 32.3747, "step": 240950 }, { "epoch": 0.48675444514922206, "grad_norm": 387.5442199707031, "learning_rate": 6.177067633226034e-06, "loss": 21.606, "step": 240960 }, { "epoch": 0.4867746457819059, "grad_norm": 155.57504272460938, "learning_rate": 6.1767283748989555e-06, "loss": 20.8606, "step": 240970 }, { "epoch": 0.4867948464145897, "grad_norm": 441.5086975097656, "learning_rate": 6.176389110836647e-06, "loss": 33.2801, "step": 240980 }, { "epoch": 0.4868150470472735, "grad_norm": 189.985107421875, "learning_rate": 6.176049841040762e-06, "loss": 20.6566, "step": 240990 }, { "epoch": 0.48683524767995734, "grad_norm": 278.9967346191406, "learning_rate": 6.17571056551295e-06, "loss": 14.2962, "step": 241000 }, { "epoch": 0.48685544831264116, "grad_norm": 461.4620361328125, "learning_rate": 6.1753712842548695e-06, "loss": 18.5959, "step": 241010 }, { "epoch": 0.486875648945325, "grad_norm": 345.50732421875, "learning_rate": 6.175031997268171e-06, "loss": 20.2937, "step": 241020 }, { "epoch": 0.4868958495780088, "grad_norm": 184.00990295410156, "learning_rate": 6.174692704554509e-06, "loss": 8.5737, "step": 241030 }, { "epoch": 0.4869160502106926, "grad_norm": 469.7868957519531, "learning_rate": 6.174353406115537e-06, "loss": 34.9519, "step": 241040 }, { "epoch": 0.48693625084337644, "grad_norm": 239.0948028564453, "learning_rate": 6.17401410195291e-06, "loss": 24.4598, "step": 241050 }, { "epoch": 0.4869564514760602, "grad_norm": 24.07925796508789, "learning_rate": 6.17367479206828e-06, "loss": 11.9661, "step": 241060 }, { "epoch": 0.486976652108744, "grad_norm": 218.5525360107422, "learning_rate": 6.173335476463303e-06, "loss": 46.3871, "step": 241070 }, { "epoch": 0.48699685274142784, "grad_norm": 224.46897888183594, "learning_rate": 6.172996155139629e-06, "loss": 14.5911, "step": 241080 }, { "epoch": 0.48701705337411166, "grad_norm": 54.57894515991211, "learning_rate": 6.172656828098914e-06, "loss": 21.5697, "step": 241090 }, { "epoch": 0.4870372540067955, "grad_norm": 58.68198013305664, "learning_rate": 6.172317495342812e-06, "loss": 41.5586, "step": 241100 }, { "epoch": 0.4870574546394793, "grad_norm": 199.4386444091797, "learning_rate": 6.171978156872978e-06, "loss": 18.9443, "step": 241110 }, { "epoch": 0.4870776552721631, "grad_norm": 30.671218872070312, "learning_rate": 6.171638812691065e-06, "loss": 31.352, "step": 241120 }, { "epoch": 0.48709785590484694, "grad_norm": 362.4325866699219, "learning_rate": 6.171299462798725e-06, "loss": 28.7295, "step": 241130 }, { "epoch": 0.48711805653753076, "grad_norm": 94.90692138671875, "learning_rate": 6.170960107197613e-06, "loss": 9.8534, "step": 241140 }, { "epoch": 0.4871382571702146, "grad_norm": 152.0763397216797, "learning_rate": 6.1706207458893855e-06, "loss": 18.505, "step": 241150 }, { "epoch": 0.4871584578028984, "grad_norm": 151.4791717529297, "learning_rate": 6.170281378875692e-06, "loss": 25.2215, "step": 241160 }, { "epoch": 0.4871786584355822, "grad_norm": 247.98098754882812, "learning_rate": 6.169942006158192e-06, "loss": 18.1403, "step": 241170 }, { "epoch": 0.48719885906826604, "grad_norm": 193.69239807128906, "learning_rate": 6.169602627738533e-06, "loss": 16.0238, "step": 241180 }, { "epoch": 0.4872190597009498, "grad_norm": 271.242919921875, "learning_rate": 6.169263243618375e-06, "loss": 11.2448, "step": 241190 }, { "epoch": 0.48723926033363363, "grad_norm": 234.13858032226562, "learning_rate": 6.168923853799369e-06, "loss": 24.7082, "step": 241200 }, { "epoch": 0.48725946096631745, "grad_norm": 434.4720458984375, "learning_rate": 6.16858445828317e-06, "loss": 33.5274, "step": 241210 }, { "epoch": 0.48727966159900127, "grad_norm": 249.12771606445312, "learning_rate": 6.168245057071434e-06, "loss": 20.4029, "step": 241220 }, { "epoch": 0.4872998622316851, "grad_norm": 220.45480346679688, "learning_rate": 6.167905650165811e-06, "loss": 20.1897, "step": 241230 }, { "epoch": 0.4873200628643689, "grad_norm": 289.7879638671875, "learning_rate": 6.167566237567957e-06, "loss": 27.8708, "step": 241240 }, { "epoch": 0.48734026349705273, "grad_norm": 230.7009735107422, "learning_rate": 6.1672268192795285e-06, "loss": 14.1613, "step": 241250 }, { "epoch": 0.48736046412973655, "grad_norm": 621.3936157226562, "learning_rate": 6.166887395302177e-06, "loss": 45.2133, "step": 241260 }, { "epoch": 0.48738066476242037, "grad_norm": 92.13549041748047, "learning_rate": 6.166547965637557e-06, "loss": 17.3116, "step": 241270 }, { "epoch": 0.4874008653951042, "grad_norm": 382.1002502441406, "learning_rate": 6.166208530287327e-06, "loss": 33.1482, "step": 241280 }, { "epoch": 0.487421066027788, "grad_norm": 454.0991516113281, "learning_rate": 6.165869089253134e-06, "loss": 17.6421, "step": 241290 }, { "epoch": 0.48744126666047183, "grad_norm": 338.6982116699219, "learning_rate": 6.16552964253664e-06, "loss": 11.7415, "step": 241300 }, { "epoch": 0.48746146729315565, "grad_norm": 322.78436279296875, "learning_rate": 6.165190190139494e-06, "loss": 12.0671, "step": 241310 }, { "epoch": 0.4874816679258394, "grad_norm": 28.025222778320312, "learning_rate": 6.164850732063352e-06, "loss": 15.7648, "step": 241320 }, { "epoch": 0.48750186855852323, "grad_norm": 276.6464538574219, "learning_rate": 6.164511268309871e-06, "loss": 12.2337, "step": 241330 }, { "epoch": 0.48752206919120705, "grad_norm": 394.0804443359375, "learning_rate": 6.1641717988807006e-06, "loss": 21.6622, "step": 241340 }, { "epoch": 0.4875422698238909, "grad_norm": 188.0593719482422, "learning_rate": 6.163832323777499e-06, "loss": 17.8226, "step": 241350 }, { "epoch": 0.4875624704565747, "grad_norm": 576.9692993164062, "learning_rate": 6.16349284300192e-06, "loss": 15.293, "step": 241360 }, { "epoch": 0.4875826710892585, "grad_norm": 219.73329162597656, "learning_rate": 6.1631533565556175e-06, "loss": 16.3905, "step": 241370 }, { "epoch": 0.48760287172194233, "grad_norm": 289.10614013671875, "learning_rate": 6.162813864440247e-06, "loss": 9.1417, "step": 241380 }, { "epoch": 0.48762307235462615, "grad_norm": 192.52801513671875, "learning_rate": 6.162474366657464e-06, "loss": 16.364, "step": 241390 }, { "epoch": 0.48764327298731, "grad_norm": 333.6809997558594, "learning_rate": 6.1621348632089205e-06, "loss": 21.6459, "step": 241400 }, { "epoch": 0.4876634736199938, "grad_norm": 298.90283203125, "learning_rate": 6.161795354096273e-06, "loss": 38.2838, "step": 241410 }, { "epoch": 0.4876836742526776, "grad_norm": 246.08741760253906, "learning_rate": 6.161455839321175e-06, "loss": 18.4806, "step": 241420 }, { "epoch": 0.48770387488536143, "grad_norm": 335.72760009765625, "learning_rate": 6.161116318885283e-06, "loss": 21.1521, "step": 241430 }, { "epoch": 0.48772407551804525, "grad_norm": 219.54647827148438, "learning_rate": 6.160776792790252e-06, "loss": 10.6644, "step": 241440 }, { "epoch": 0.487744276150729, "grad_norm": 370.33990478515625, "learning_rate": 6.1604372610377335e-06, "loss": 7.7246, "step": 241450 }, { "epoch": 0.48776447678341284, "grad_norm": 623.81884765625, "learning_rate": 6.160097723629387e-06, "loss": 12.8634, "step": 241460 }, { "epoch": 0.48778467741609666, "grad_norm": 278.91339111328125, "learning_rate": 6.159758180566863e-06, "loss": 13.8126, "step": 241470 }, { "epoch": 0.4878048780487805, "grad_norm": 235.04090881347656, "learning_rate": 6.159418631851818e-06, "loss": 21.7647, "step": 241480 }, { "epoch": 0.4878250786814643, "grad_norm": 391.0286865234375, "learning_rate": 6.159079077485909e-06, "loss": 20.2432, "step": 241490 }, { "epoch": 0.4878452793141481, "grad_norm": 18.111297607421875, "learning_rate": 6.158739517470786e-06, "loss": 11.1069, "step": 241500 }, { "epoch": 0.48786547994683194, "grad_norm": 416.43402099609375, "learning_rate": 6.158399951808111e-06, "loss": 13.0109, "step": 241510 }, { "epoch": 0.48788568057951576, "grad_norm": 179.04647827148438, "learning_rate": 6.158060380499533e-06, "loss": 13.8015, "step": 241520 }, { "epoch": 0.4879058812121996, "grad_norm": 289.6124267578125, "learning_rate": 6.1577208035467095e-06, "loss": 20.661, "step": 241530 }, { "epoch": 0.4879260818448834, "grad_norm": 601.45263671875, "learning_rate": 6.157381220951295e-06, "loss": 28.5808, "step": 241540 }, { "epoch": 0.4879462824775672, "grad_norm": 283.4373779296875, "learning_rate": 6.157041632714945e-06, "loss": 20.2555, "step": 241550 }, { "epoch": 0.48796648311025104, "grad_norm": 419.14752197265625, "learning_rate": 6.1567020388393155e-06, "loss": 59.8201, "step": 241560 }, { "epoch": 0.4879866837429348, "grad_norm": 172.04066467285156, "learning_rate": 6.156362439326059e-06, "loss": 17.5453, "step": 241570 }, { "epoch": 0.4880068843756186, "grad_norm": 405.05242919921875, "learning_rate": 6.156022834176832e-06, "loss": 17.7613, "step": 241580 }, { "epoch": 0.48802708500830244, "grad_norm": 95.43634796142578, "learning_rate": 6.155683223393291e-06, "loss": 12.9591, "step": 241590 }, { "epoch": 0.48804728564098626, "grad_norm": 304.0882263183594, "learning_rate": 6.155343606977091e-06, "loss": 20.0621, "step": 241600 }, { "epoch": 0.4880674862736701, "grad_norm": 132.29373168945312, "learning_rate": 6.155003984929883e-06, "loss": 12.3922, "step": 241610 }, { "epoch": 0.4880876869063539, "grad_norm": 246.58425903320312, "learning_rate": 6.15466435725333e-06, "loss": 10.8102, "step": 241620 }, { "epoch": 0.4881078875390377, "grad_norm": 24.403528213500977, "learning_rate": 6.154324723949079e-06, "loss": 12.1622, "step": 241630 }, { "epoch": 0.48812808817172154, "grad_norm": 94.25048065185547, "learning_rate": 6.153985085018792e-06, "loss": 24.9697, "step": 241640 }, { "epoch": 0.48814828880440536, "grad_norm": 420.8373107910156, "learning_rate": 6.15364544046412e-06, "loss": 36.5131, "step": 241650 }, { "epoch": 0.4881684894370892, "grad_norm": 117.90374755859375, "learning_rate": 6.153305790286721e-06, "loss": 21.9449, "step": 241660 }, { "epoch": 0.488188690069773, "grad_norm": 254.56349182128906, "learning_rate": 6.15296613448825e-06, "loss": 16.178, "step": 241670 }, { "epoch": 0.4882088907024568, "grad_norm": 131.3199005126953, "learning_rate": 6.152626473070361e-06, "loss": 21.2894, "step": 241680 }, { "epoch": 0.48822909133514064, "grad_norm": 630.3235473632812, "learning_rate": 6.152286806034711e-06, "loss": 28.2935, "step": 241690 }, { "epoch": 0.4882492919678244, "grad_norm": 303.0089111328125, "learning_rate": 6.151947133382954e-06, "loss": 23.0675, "step": 241700 }, { "epoch": 0.4882694926005082, "grad_norm": 305.8294677734375, "learning_rate": 6.151607455116746e-06, "loss": 18.4814, "step": 241710 }, { "epoch": 0.48828969323319205, "grad_norm": 303.9829406738281, "learning_rate": 6.1512677712377435e-06, "loss": 23.586, "step": 241720 }, { "epoch": 0.48830989386587587, "grad_norm": 381.2887268066406, "learning_rate": 6.150928081747603e-06, "loss": 30.2311, "step": 241730 }, { "epoch": 0.4883300944985597, "grad_norm": 549.41259765625, "learning_rate": 6.150588386647977e-06, "loss": 17.4367, "step": 241740 }, { "epoch": 0.4883502951312435, "grad_norm": 136.18472290039062, "learning_rate": 6.150248685940523e-06, "loss": 22.4723, "step": 241750 }, { "epoch": 0.4883704957639273, "grad_norm": 231.9735107421875, "learning_rate": 6.149908979626897e-06, "loss": 19.4173, "step": 241760 }, { "epoch": 0.48839069639661115, "grad_norm": 453.4429016113281, "learning_rate": 6.149569267708752e-06, "loss": 32.5, "step": 241770 }, { "epoch": 0.48841089702929497, "grad_norm": 233.57382202148438, "learning_rate": 6.149229550187748e-06, "loss": 25.0861, "step": 241780 }, { "epoch": 0.4884310976619788, "grad_norm": 0.0, "learning_rate": 6.148889827065538e-06, "loss": 19.796, "step": 241790 }, { "epoch": 0.4884512982946626, "grad_norm": 197.3252410888672, "learning_rate": 6.148550098343778e-06, "loss": 8.574, "step": 241800 }, { "epoch": 0.4884714989273464, "grad_norm": 166.8198699951172, "learning_rate": 6.148210364024125e-06, "loss": 14.2559, "step": 241810 }, { "epoch": 0.48849169956003025, "grad_norm": 188.31309509277344, "learning_rate": 6.147870624108233e-06, "loss": 10.7692, "step": 241820 }, { "epoch": 0.488511900192714, "grad_norm": 659.6917114257812, "learning_rate": 6.147530878597761e-06, "loss": 24.7973, "step": 241830 }, { "epoch": 0.48853210082539783, "grad_norm": 440.70806884765625, "learning_rate": 6.14719112749436e-06, "loss": 25.547, "step": 241840 }, { "epoch": 0.48855230145808165, "grad_norm": 6.579123020172119, "learning_rate": 6.146851370799689e-06, "loss": 26.7164, "step": 241850 }, { "epoch": 0.48857250209076547, "grad_norm": 230.09950256347656, "learning_rate": 6.146511608515404e-06, "loss": 12.5039, "step": 241860 }, { "epoch": 0.4885927027234493, "grad_norm": 187.87518310546875, "learning_rate": 6.146171840643161e-06, "loss": 24.4835, "step": 241870 }, { "epoch": 0.4886129033561331, "grad_norm": 126.58621215820312, "learning_rate": 6.145832067184614e-06, "loss": 19.0873, "step": 241880 }, { "epoch": 0.48863310398881693, "grad_norm": 222.16334533691406, "learning_rate": 6.145492288141422e-06, "loss": 14.8576, "step": 241890 }, { "epoch": 0.48865330462150075, "grad_norm": 216.0712432861328, "learning_rate": 6.145152503515239e-06, "loss": 11.6838, "step": 241900 }, { "epoch": 0.48867350525418457, "grad_norm": 229.30096435546875, "learning_rate": 6.144812713307721e-06, "loss": 19.2111, "step": 241910 }, { "epoch": 0.4886937058868684, "grad_norm": 118.03836822509766, "learning_rate": 6.144472917520526e-06, "loss": 7.803, "step": 241920 }, { "epoch": 0.4887139065195522, "grad_norm": 271.5470275878906, "learning_rate": 6.1441331161553065e-06, "loss": 7.987, "step": 241930 }, { "epoch": 0.48873410715223603, "grad_norm": 284.0252380371094, "learning_rate": 6.143793309213724e-06, "loss": 25.3061, "step": 241940 }, { "epoch": 0.48875430778491985, "grad_norm": 140.07044982910156, "learning_rate": 6.143453496697428e-06, "loss": 10.5412, "step": 241950 }, { "epoch": 0.4887745084176036, "grad_norm": 259.7754821777344, "learning_rate": 6.143113678608081e-06, "loss": 37.0058, "step": 241960 }, { "epoch": 0.48879470905028743, "grad_norm": 5.942337989807129, "learning_rate": 6.142773854947336e-06, "loss": 20.3017, "step": 241970 }, { "epoch": 0.48881490968297125, "grad_norm": 445.7484436035156, "learning_rate": 6.14243402571685e-06, "loss": 41.9422, "step": 241980 }, { "epoch": 0.4888351103156551, "grad_norm": 153.11683654785156, "learning_rate": 6.142094190918279e-06, "loss": 14.3078, "step": 241990 }, { "epoch": 0.4888553109483389, "grad_norm": 298.1707763671875, "learning_rate": 6.141754350553279e-06, "loss": 20.9585, "step": 242000 }, { "epoch": 0.4888755115810227, "grad_norm": 342.8335876464844, "learning_rate": 6.141414504623509e-06, "loss": 24.3723, "step": 242010 }, { "epoch": 0.48889571221370653, "grad_norm": 578.425537109375, "learning_rate": 6.14107465313062e-06, "loss": 18.5496, "step": 242020 }, { "epoch": 0.48891591284639035, "grad_norm": 273.84478759765625, "learning_rate": 6.140734796076273e-06, "loss": 14.2847, "step": 242030 }, { "epoch": 0.4889361134790742, "grad_norm": 226.500244140625, "learning_rate": 6.1403949334621215e-06, "loss": 11.9435, "step": 242040 }, { "epoch": 0.488956314111758, "grad_norm": 83.00065612792969, "learning_rate": 6.140055065289826e-06, "loss": 15.1001, "step": 242050 }, { "epoch": 0.4889765147444418, "grad_norm": 340.7629699707031, "learning_rate": 6.139715191561038e-06, "loss": 29.2302, "step": 242060 }, { "epoch": 0.48899671537712563, "grad_norm": 432.1591796875, "learning_rate": 6.139375312277418e-06, "loss": 17.3119, "step": 242070 }, { "epoch": 0.48901691600980945, "grad_norm": 375.1016845703125, "learning_rate": 6.1390354274406205e-06, "loss": 19.7062, "step": 242080 }, { "epoch": 0.4890371166424932, "grad_norm": 209.15945434570312, "learning_rate": 6.138695537052301e-06, "loss": 23.7006, "step": 242090 }, { "epoch": 0.48905731727517704, "grad_norm": 235.8109893798828, "learning_rate": 6.138355641114121e-06, "loss": 33.7467, "step": 242100 }, { "epoch": 0.48907751790786086, "grad_norm": 278.26116943359375, "learning_rate": 6.138015739627731e-06, "loss": 35.3396, "step": 242110 }, { "epoch": 0.4890977185405447, "grad_norm": 514.6587524414062, "learning_rate": 6.137675832594792e-06, "loss": 23.5382, "step": 242120 }, { "epoch": 0.4891179191732285, "grad_norm": 296.6807861328125, "learning_rate": 6.137335920016957e-06, "loss": 13.6984, "step": 242130 }, { "epoch": 0.4891381198059123, "grad_norm": 296.429443359375, "learning_rate": 6.136996001895885e-06, "loss": 27.4108, "step": 242140 }, { "epoch": 0.48915832043859614, "grad_norm": 183.99082946777344, "learning_rate": 6.136656078233233e-06, "loss": 21.278, "step": 242150 }, { "epoch": 0.48917852107127996, "grad_norm": 444.6952819824219, "learning_rate": 6.136316149030657e-06, "loss": 14.8864, "step": 242160 }, { "epoch": 0.4891987217039638, "grad_norm": 393.6529846191406, "learning_rate": 6.135976214289814e-06, "loss": 28.6546, "step": 242170 }, { "epoch": 0.4892189223366476, "grad_norm": 278.4736022949219, "learning_rate": 6.135636274012361e-06, "loss": 12.5016, "step": 242180 }, { "epoch": 0.4892391229693314, "grad_norm": 220.98976135253906, "learning_rate": 6.135296328199954e-06, "loss": 28.9824, "step": 242190 }, { "epoch": 0.48925932360201524, "grad_norm": 597.6094360351562, "learning_rate": 6.134956376854251e-06, "loss": 29.8922, "step": 242200 }, { "epoch": 0.489279524234699, "grad_norm": 156.0749053955078, "learning_rate": 6.134616419976908e-06, "loss": 25.4309, "step": 242210 }, { "epoch": 0.4892997248673828, "grad_norm": 471.7602233886719, "learning_rate": 6.134276457569581e-06, "loss": 32.2873, "step": 242220 }, { "epoch": 0.48931992550006664, "grad_norm": 291.9942321777344, "learning_rate": 6.133936489633929e-06, "loss": 13.3793, "step": 242230 }, { "epoch": 0.48934012613275046, "grad_norm": 300.6629943847656, "learning_rate": 6.133596516171609e-06, "loss": 32.2786, "step": 242240 }, { "epoch": 0.4893603267654343, "grad_norm": 104.70243072509766, "learning_rate": 6.133256537184276e-06, "loss": 17.0309, "step": 242250 }, { "epoch": 0.4893805273981181, "grad_norm": 98.21446990966797, "learning_rate": 6.132916552673588e-06, "loss": 26.8518, "step": 242260 }, { "epoch": 0.4894007280308019, "grad_norm": 57.05887222290039, "learning_rate": 6.132576562641203e-06, "loss": 18.5414, "step": 242270 }, { "epoch": 0.48942092866348574, "grad_norm": 152.68832397460938, "learning_rate": 6.132236567088777e-06, "loss": 18.317, "step": 242280 }, { "epoch": 0.48944112929616956, "grad_norm": 256.2601623535156, "learning_rate": 6.131896566017967e-06, "loss": 15.1646, "step": 242290 }, { "epoch": 0.4894613299288534, "grad_norm": 634.299072265625, "learning_rate": 6.13155655943043e-06, "loss": 17.101, "step": 242300 }, { "epoch": 0.4894815305615372, "grad_norm": 170.2461700439453, "learning_rate": 6.131216547327824e-06, "loss": 10.8872, "step": 242310 }, { "epoch": 0.489501731194221, "grad_norm": 301.2615966796875, "learning_rate": 6.130876529711806e-06, "loss": 28.0402, "step": 242320 }, { "epoch": 0.48952193182690484, "grad_norm": 123.71393585205078, "learning_rate": 6.130536506584032e-06, "loss": 32.7707, "step": 242330 }, { "epoch": 0.4895421324595886, "grad_norm": 181.0061798095703, "learning_rate": 6.130196477946162e-06, "loss": 22.2975, "step": 242340 }, { "epoch": 0.48956233309227243, "grad_norm": 157.36105346679688, "learning_rate": 6.12985644379985e-06, "loss": 19.4352, "step": 242350 }, { "epoch": 0.48958253372495625, "grad_norm": 134.73414611816406, "learning_rate": 6.1295164041467545e-06, "loss": 16.3137, "step": 242360 }, { "epoch": 0.48960273435764007, "grad_norm": 97.66944122314453, "learning_rate": 6.129176358988535e-06, "loss": 18.0613, "step": 242370 }, { "epoch": 0.4896229349903239, "grad_norm": 51.17716598510742, "learning_rate": 6.128836308326844e-06, "loss": 15.2035, "step": 242380 }, { "epoch": 0.4896431356230077, "grad_norm": 160.8468475341797, "learning_rate": 6.128496252163344e-06, "loss": 18.4649, "step": 242390 }, { "epoch": 0.48966333625569153, "grad_norm": 229.8904571533203, "learning_rate": 6.128156190499688e-06, "loss": 13.837, "step": 242400 }, { "epoch": 0.48968353688837535, "grad_norm": 677.5126342773438, "learning_rate": 6.127816123337538e-06, "loss": 18.9408, "step": 242410 }, { "epoch": 0.48970373752105917, "grad_norm": 46.152076721191406, "learning_rate": 6.127476050678548e-06, "loss": 19.9223, "step": 242420 }, { "epoch": 0.489723938153743, "grad_norm": 129.7207489013672, "learning_rate": 6.127135972524376e-06, "loss": 12.8867, "step": 242430 }, { "epoch": 0.4897441387864268, "grad_norm": 230.86570739746094, "learning_rate": 6.126795888876681e-06, "loss": 23.5519, "step": 242440 }, { "epoch": 0.48976433941911063, "grad_norm": 0.6836954355239868, "learning_rate": 6.1264557997371185e-06, "loss": 12.4184, "step": 242450 }, { "epoch": 0.48978454005179445, "grad_norm": 24.879638671875, "learning_rate": 6.126115705107347e-06, "loss": 10.9503, "step": 242460 }, { "epoch": 0.4898047406844782, "grad_norm": 727.1319580078125, "learning_rate": 6.125775604989025e-06, "loss": 21.253, "step": 242470 }, { "epoch": 0.48982494131716203, "grad_norm": 33.15021896362305, "learning_rate": 6.125435499383808e-06, "loss": 19.6368, "step": 242480 }, { "epoch": 0.48984514194984585, "grad_norm": 332.9187316894531, "learning_rate": 6.125095388293356e-06, "loss": 18.4089, "step": 242490 }, { "epoch": 0.4898653425825297, "grad_norm": 144.8144989013672, "learning_rate": 6.124755271719326e-06, "loss": 7.8591, "step": 242500 }, { "epoch": 0.4898855432152135, "grad_norm": 107.74507141113281, "learning_rate": 6.124415149663374e-06, "loss": 14.5488, "step": 242510 }, { "epoch": 0.4899057438478973, "grad_norm": 444.78631591796875, "learning_rate": 6.12407502212716e-06, "loss": 21.3377, "step": 242520 }, { "epoch": 0.48992594448058113, "grad_norm": 90.56747436523438, "learning_rate": 6.12373488911234e-06, "loss": 9.4026, "step": 242530 }, { "epoch": 0.48994614511326495, "grad_norm": 1657.8057861328125, "learning_rate": 6.123394750620571e-06, "loss": 22.5459, "step": 242540 }, { "epoch": 0.4899663457459488, "grad_norm": 229.11322021484375, "learning_rate": 6.123054606653515e-06, "loss": 27.2788, "step": 242550 }, { "epoch": 0.4899865463786326, "grad_norm": 241.08921813964844, "learning_rate": 6.122714457212825e-06, "loss": 22.0021, "step": 242560 }, { "epoch": 0.4900067470113164, "grad_norm": 284.32080078125, "learning_rate": 6.122374302300162e-06, "loss": 45.0825, "step": 242570 }, { "epoch": 0.49002694764400023, "grad_norm": 113.47421264648438, "learning_rate": 6.122034141917183e-06, "loss": 26.8106, "step": 242580 }, { "epoch": 0.49004714827668405, "grad_norm": 256.8528747558594, "learning_rate": 6.121693976065545e-06, "loss": 23.0936, "step": 242590 }, { "epoch": 0.4900673489093678, "grad_norm": 177.10475158691406, "learning_rate": 6.121353804746907e-06, "loss": 10.8521, "step": 242600 }, { "epoch": 0.49008754954205164, "grad_norm": 379.10467529296875, "learning_rate": 6.121013627962925e-06, "loss": 23.5378, "step": 242610 }, { "epoch": 0.49010775017473546, "grad_norm": 1130.833984375, "learning_rate": 6.1206734457152615e-06, "loss": 25.6009, "step": 242620 }, { "epoch": 0.4901279508074193, "grad_norm": 117.39187622070312, "learning_rate": 6.12033325800557e-06, "loss": 18.5154, "step": 242630 }, { "epoch": 0.4901481514401031, "grad_norm": 119.1123046875, "learning_rate": 6.119993064835509e-06, "loss": 12.1352, "step": 242640 }, { "epoch": 0.4901683520727869, "grad_norm": 329.0062255859375, "learning_rate": 6.119652866206739e-06, "loss": 19.2961, "step": 242650 }, { "epoch": 0.49018855270547074, "grad_norm": 253.15765380859375, "learning_rate": 6.119312662120916e-06, "loss": 13.3247, "step": 242660 }, { "epoch": 0.49020875333815456, "grad_norm": 162.27049255371094, "learning_rate": 6.118972452579699e-06, "loss": 14.0635, "step": 242670 }, { "epoch": 0.4902289539708384, "grad_norm": 4013.584716796875, "learning_rate": 6.118632237584748e-06, "loss": 34.202, "step": 242680 }, { "epoch": 0.4902491546035222, "grad_norm": 986.8603515625, "learning_rate": 6.118292017137716e-06, "loss": 22.6275, "step": 242690 }, { "epoch": 0.490269355236206, "grad_norm": 368.8105163574219, "learning_rate": 6.117951791240265e-06, "loss": 24.9354, "step": 242700 }, { "epoch": 0.49028955586888984, "grad_norm": 17.780118942260742, "learning_rate": 6.117611559894054e-06, "loss": 22.2102, "step": 242710 }, { "epoch": 0.49030975650157366, "grad_norm": 287.93780517578125, "learning_rate": 6.117271323100739e-06, "loss": 17.9528, "step": 242720 }, { "epoch": 0.4903299571342574, "grad_norm": 228.43865966796875, "learning_rate": 6.116931080861979e-06, "loss": 25.9587, "step": 242730 }, { "epoch": 0.49035015776694124, "grad_norm": 142.51480102539062, "learning_rate": 6.116590833179432e-06, "loss": 31.3503, "step": 242740 }, { "epoch": 0.49037035839962506, "grad_norm": 46.60099792480469, "learning_rate": 6.116250580054758e-06, "loss": 16.0719, "step": 242750 }, { "epoch": 0.4903905590323089, "grad_norm": 104.00080108642578, "learning_rate": 6.115910321489613e-06, "loss": 19.3221, "step": 242760 }, { "epoch": 0.4904107596649927, "grad_norm": 287.47283935546875, "learning_rate": 6.115570057485656e-06, "loss": 12.3041, "step": 242770 }, { "epoch": 0.4904309602976765, "grad_norm": 279.30157470703125, "learning_rate": 6.1152297880445476e-06, "loss": 13.9901, "step": 242780 }, { "epoch": 0.49045116093036034, "grad_norm": 396.964111328125, "learning_rate": 6.114889513167943e-06, "loss": 19.6223, "step": 242790 }, { "epoch": 0.49047136156304416, "grad_norm": 289.7734680175781, "learning_rate": 6.114549232857503e-06, "loss": 15.3955, "step": 242800 }, { "epoch": 0.490491562195728, "grad_norm": 139.36715698242188, "learning_rate": 6.114208947114883e-06, "loss": 22.571, "step": 242810 }, { "epoch": 0.4905117628284118, "grad_norm": 334.52520751953125, "learning_rate": 6.113868655941747e-06, "loss": 25.427, "step": 242820 }, { "epoch": 0.4905319634610956, "grad_norm": 25.151891708374023, "learning_rate": 6.1135283593397475e-06, "loss": 21.3894, "step": 242830 }, { "epoch": 0.49055216409377944, "grad_norm": 287.98333740234375, "learning_rate": 6.113188057310548e-06, "loss": 15.4018, "step": 242840 }, { "epoch": 0.4905723647264632, "grad_norm": 309.8084716796875, "learning_rate": 6.112847749855804e-06, "loss": 19.5364, "step": 242850 }, { "epoch": 0.490592565359147, "grad_norm": 244.8582305908203, "learning_rate": 6.112507436977175e-06, "loss": 19.3008, "step": 242860 }, { "epoch": 0.49061276599183085, "grad_norm": 9.765552520751953, "learning_rate": 6.112167118676321e-06, "loss": 15.8752, "step": 242870 }, { "epoch": 0.49063296662451467, "grad_norm": 397.9643859863281, "learning_rate": 6.111826794954896e-06, "loss": 24.3402, "step": 242880 }, { "epoch": 0.4906531672571985, "grad_norm": 207.5511016845703, "learning_rate": 6.1114864658145655e-06, "loss": 25.5289, "step": 242890 }, { "epoch": 0.4906733678898823, "grad_norm": 352.0382080078125, "learning_rate": 6.111146131256983e-06, "loss": 15.6208, "step": 242900 }, { "epoch": 0.4906935685225661, "grad_norm": 119.31907653808594, "learning_rate": 6.110805791283809e-06, "loss": 14.7348, "step": 242910 }, { "epoch": 0.49071376915524995, "grad_norm": 213.3852081298828, "learning_rate": 6.110465445896703e-06, "loss": 24.4838, "step": 242920 }, { "epoch": 0.49073396978793377, "grad_norm": 183.25038146972656, "learning_rate": 6.110125095097323e-06, "loss": 23.2666, "step": 242930 }, { "epoch": 0.4907541704206176, "grad_norm": 273.6027526855469, "learning_rate": 6.109784738887327e-06, "loss": 13.5112, "step": 242940 }, { "epoch": 0.4907743710533014, "grad_norm": 343.40325927734375, "learning_rate": 6.109444377268376e-06, "loss": 30.195, "step": 242950 }, { "epoch": 0.4907945716859852, "grad_norm": 380.3698425292969, "learning_rate": 6.109104010242127e-06, "loss": 27.6632, "step": 242960 }, { "epoch": 0.49081477231866905, "grad_norm": 423.4729309082031, "learning_rate": 6.10876363781024e-06, "loss": 25.8584, "step": 242970 }, { "epoch": 0.4908349729513528, "grad_norm": 133.94935607910156, "learning_rate": 6.108423259974375e-06, "loss": 14.4079, "step": 242980 }, { "epoch": 0.49085517358403663, "grad_norm": 238.78903198242188, "learning_rate": 6.108082876736185e-06, "loss": 17.1159, "step": 242990 }, { "epoch": 0.49087537421672045, "grad_norm": 378.2549133300781, "learning_rate": 6.107742488097338e-06, "loss": 26.0111, "step": 243000 }, { "epoch": 0.49089557484940427, "grad_norm": 226.86862182617188, "learning_rate": 6.107402094059485e-06, "loss": 13.7213, "step": 243010 }, { "epoch": 0.4909157754820881, "grad_norm": 40.531982421875, "learning_rate": 6.107061694624291e-06, "loss": 9.6031, "step": 243020 }, { "epoch": 0.4909359761147719, "grad_norm": 22.913829803466797, "learning_rate": 6.1067212897934115e-06, "loss": 29.3085, "step": 243030 }, { "epoch": 0.49095617674745573, "grad_norm": 454.431640625, "learning_rate": 6.106380879568507e-06, "loss": 26.1063, "step": 243040 }, { "epoch": 0.49097637738013955, "grad_norm": 626.4265747070312, "learning_rate": 6.106040463951237e-06, "loss": 25.2556, "step": 243050 }, { "epoch": 0.49099657801282337, "grad_norm": 160.4250030517578, "learning_rate": 6.105700042943258e-06, "loss": 27.6776, "step": 243060 }, { "epoch": 0.4910167786455072, "grad_norm": 132.11080932617188, "learning_rate": 6.105359616546232e-06, "loss": 14.2216, "step": 243070 }, { "epoch": 0.491036979278191, "grad_norm": 147.54464721679688, "learning_rate": 6.105019184761818e-06, "loss": 17.8467, "step": 243080 }, { "epoch": 0.49105717991087483, "grad_norm": 4.994355201721191, "learning_rate": 6.104678747591674e-06, "loss": 13.0077, "step": 243090 }, { "epoch": 0.49107738054355865, "grad_norm": 316.6197814941406, "learning_rate": 6.10433830503746e-06, "loss": 10.9273, "step": 243100 }, { "epoch": 0.4910975811762424, "grad_norm": 145.91244506835938, "learning_rate": 6.1039978571008355e-06, "loss": 15.5496, "step": 243110 }, { "epoch": 0.49111778180892623, "grad_norm": 358.31292724609375, "learning_rate": 6.103657403783458e-06, "loss": 17.7109, "step": 243120 }, { "epoch": 0.49113798244161005, "grad_norm": 300.9735107421875, "learning_rate": 6.103316945086989e-06, "loss": 14.2449, "step": 243130 }, { "epoch": 0.4911581830742939, "grad_norm": 392.0238037109375, "learning_rate": 6.102976481013086e-06, "loss": 30.5095, "step": 243140 }, { "epoch": 0.4911783837069777, "grad_norm": 283.1286315917969, "learning_rate": 6.102636011563411e-06, "loss": 25.8498, "step": 243150 }, { "epoch": 0.4911985843396615, "grad_norm": 123.38845825195312, "learning_rate": 6.102295536739622e-06, "loss": 13.2358, "step": 243160 }, { "epoch": 0.49121878497234533, "grad_norm": 158.7946014404297, "learning_rate": 6.101955056543376e-06, "loss": 30.0643, "step": 243170 }, { "epoch": 0.49123898560502915, "grad_norm": 394.4836120605469, "learning_rate": 6.101614570976336e-06, "loss": 23.252, "step": 243180 }, { "epoch": 0.491259186237713, "grad_norm": 21.765392303466797, "learning_rate": 6.101274080040161e-06, "loss": 15.9049, "step": 243190 }, { "epoch": 0.4912793868703968, "grad_norm": 494.5970764160156, "learning_rate": 6.100933583736508e-06, "loss": 12.8447, "step": 243200 }, { "epoch": 0.4912995875030806, "grad_norm": 62.86954116821289, "learning_rate": 6.10059308206704e-06, "loss": 27.5089, "step": 243210 }, { "epoch": 0.49131978813576443, "grad_norm": 370.6246032714844, "learning_rate": 6.100252575033413e-06, "loss": 26.7894, "step": 243220 }, { "epoch": 0.49133998876844825, "grad_norm": 258.462646484375, "learning_rate": 6.0999120626372895e-06, "loss": 29.8748, "step": 243230 }, { "epoch": 0.491360189401132, "grad_norm": 448.0193786621094, "learning_rate": 6.099571544880328e-06, "loss": 29.1227, "step": 243240 }, { "epoch": 0.49138039003381584, "grad_norm": 602.3392944335938, "learning_rate": 6.099231021764188e-06, "loss": 20.071, "step": 243250 }, { "epoch": 0.49140059066649966, "grad_norm": 326.4935607910156, "learning_rate": 6.098890493290529e-06, "loss": 16.0857, "step": 243260 }, { "epoch": 0.4914207912991835, "grad_norm": 481.64947509765625, "learning_rate": 6.0985499594610136e-06, "loss": 14.8561, "step": 243270 }, { "epoch": 0.4914409919318673, "grad_norm": 22.849878311157227, "learning_rate": 6.098209420277294e-06, "loss": 12.4237, "step": 243280 }, { "epoch": 0.4914611925645511, "grad_norm": 394.6885986328125, "learning_rate": 6.097868875741039e-06, "loss": 25.8219, "step": 243290 }, { "epoch": 0.49148139319723494, "grad_norm": 92.82158660888672, "learning_rate": 6.097528325853903e-06, "loss": 9.2917, "step": 243300 }, { "epoch": 0.49150159382991876, "grad_norm": 455.3236389160156, "learning_rate": 6.0971877706175465e-06, "loss": 14.6483, "step": 243310 }, { "epoch": 0.4915217944626026, "grad_norm": 185.91822814941406, "learning_rate": 6.09684721003363e-06, "loss": 16.1912, "step": 243320 }, { "epoch": 0.4915419950952864, "grad_norm": 148.9674530029297, "learning_rate": 6.096506644103813e-06, "loss": 10.3156, "step": 243330 }, { "epoch": 0.4915621957279702, "grad_norm": 104.12155151367188, "learning_rate": 6.096166072829757e-06, "loss": 11.0579, "step": 243340 }, { "epoch": 0.49158239636065404, "grad_norm": 357.9919738769531, "learning_rate": 6.095825496213119e-06, "loss": 13.1166, "step": 243350 }, { "epoch": 0.4916025969933378, "grad_norm": 311.8551025390625, "learning_rate": 6.095484914255561e-06, "loss": 25.4991, "step": 243360 }, { "epoch": 0.4916227976260216, "grad_norm": 660.0418701171875, "learning_rate": 6.0951443269587426e-06, "loss": 21.5258, "step": 243370 }, { "epoch": 0.49164299825870544, "grad_norm": 179.60174560546875, "learning_rate": 6.094803734324324e-06, "loss": 9.3044, "step": 243380 }, { "epoch": 0.49166319889138926, "grad_norm": 127.3393325805664, "learning_rate": 6.094463136353964e-06, "loss": 19.7053, "step": 243390 }, { "epoch": 0.4916833995240731, "grad_norm": 340.3312072753906, "learning_rate": 6.094122533049324e-06, "loss": 31.5879, "step": 243400 }, { "epoch": 0.4917036001567569, "grad_norm": 4.36138916015625, "learning_rate": 6.093781924412063e-06, "loss": 12.4941, "step": 243410 }, { "epoch": 0.4917238007894407, "grad_norm": 27.208877563476562, "learning_rate": 6.093441310443842e-06, "loss": 9.8569, "step": 243420 }, { "epoch": 0.49174400142212454, "grad_norm": 481.04437255859375, "learning_rate": 6.093100691146321e-06, "loss": 24.2707, "step": 243430 }, { "epoch": 0.49176420205480836, "grad_norm": 393.0268249511719, "learning_rate": 6.0927600665211575e-06, "loss": 23.7284, "step": 243440 }, { "epoch": 0.4917844026874922, "grad_norm": 139.5255889892578, "learning_rate": 6.092419436570016e-06, "loss": 9.5754, "step": 243450 }, { "epoch": 0.491804603320176, "grad_norm": 195.53273010253906, "learning_rate": 6.092078801294554e-06, "loss": 9.7685, "step": 243460 }, { "epoch": 0.4918248039528598, "grad_norm": 406.2366027832031, "learning_rate": 6.091738160696433e-06, "loss": 25.8861, "step": 243470 }, { "epoch": 0.49184500458554364, "grad_norm": 331.0402526855469, "learning_rate": 6.091397514777313e-06, "loss": 20.0857, "step": 243480 }, { "epoch": 0.4918652052182274, "grad_norm": 172.41392517089844, "learning_rate": 6.091056863538851e-06, "loss": 15.3087, "step": 243490 }, { "epoch": 0.49188540585091123, "grad_norm": 90.92706298828125, "learning_rate": 6.090716206982714e-06, "loss": 21.8831, "step": 243500 }, { "epoch": 0.49190560648359505, "grad_norm": 947.2507934570312, "learning_rate": 6.090375545110556e-06, "loss": 24.0457, "step": 243510 }, { "epoch": 0.49192580711627887, "grad_norm": 404.31024169921875, "learning_rate": 6.090034877924041e-06, "loss": 21.4566, "step": 243520 }, { "epoch": 0.4919460077489627, "grad_norm": 22.187524795532227, "learning_rate": 6.089694205424827e-06, "loss": 13.2223, "step": 243530 }, { "epoch": 0.4919662083816465, "grad_norm": 360.9028625488281, "learning_rate": 6.089353527614577e-06, "loss": 13.771, "step": 243540 }, { "epoch": 0.49198640901433033, "grad_norm": 385.9383850097656, "learning_rate": 6.08901284449495e-06, "loss": 17.856, "step": 243550 }, { "epoch": 0.49200660964701415, "grad_norm": 192.99761962890625, "learning_rate": 6.088672156067607e-06, "loss": 18.8634, "step": 243560 }, { "epoch": 0.49202681027969797, "grad_norm": 212.12274169921875, "learning_rate": 6.088331462334206e-06, "loss": 17.0532, "step": 243570 }, { "epoch": 0.4920470109123818, "grad_norm": 203.83892822265625, "learning_rate": 6.0879907632964095e-06, "loss": 21.4517, "step": 243580 }, { "epoch": 0.4920672115450656, "grad_norm": 170.85891723632812, "learning_rate": 6.087650058955879e-06, "loss": 17.4443, "step": 243590 }, { "epoch": 0.49208741217774943, "grad_norm": 245.99258422851562, "learning_rate": 6.087309349314275e-06, "loss": 17.6228, "step": 243600 }, { "epoch": 0.49210761281043325, "grad_norm": 483.9646911621094, "learning_rate": 6.086968634373256e-06, "loss": 37.3121, "step": 243610 }, { "epoch": 0.492127813443117, "grad_norm": 11.397486686706543, "learning_rate": 6.086627914134482e-06, "loss": 32.4802, "step": 243620 }, { "epoch": 0.49214801407580083, "grad_norm": 246.52471923828125, "learning_rate": 6.086287188599617e-06, "loss": 10.2483, "step": 243630 }, { "epoch": 0.49216821470848465, "grad_norm": 78.76648712158203, "learning_rate": 6.085946457770321e-06, "loss": 20.1452, "step": 243640 }, { "epoch": 0.4921884153411685, "grad_norm": 399.710693359375, "learning_rate": 6.085605721648253e-06, "loss": 19.8085, "step": 243650 }, { "epoch": 0.4922086159738523, "grad_norm": 328.56976318359375, "learning_rate": 6.085264980235075e-06, "loss": 19.4936, "step": 243660 }, { "epoch": 0.4922288166065361, "grad_norm": 341.4751892089844, "learning_rate": 6.084924233532444e-06, "loss": 12.9038, "step": 243670 }, { "epoch": 0.49224901723921993, "grad_norm": 356.3509826660156, "learning_rate": 6.084583481542028e-06, "loss": 9.079, "step": 243680 }, { "epoch": 0.49226921787190375, "grad_norm": 209.0740509033203, "learning_rate": 6.084242724265481e-06, "loss": 26.9748, "step": 243690 }, { "epoch": 0.4922894185045876, "grad_norm": 555.4505615234375, "learning_rate": 6.083901961704467e-06, "loss": 22.2173, "step": 243700 }, { "epoch": 0.4923096191372714, "grad_norm": 197.5386505126953, "learning_rate": 6.083561193860646e-06, "loss": 17.4759, "step": 243710 }, { "epoch": 0.4923298197699552, "grad_norm": 241.0132293701172, "learning_rate": 6.083220420735681e-06, "loss": 20.8579, "step": 243720 }, { "epoch": 0.49235002040263903, "grad_norm": 278.906005859375, "learning_rate": 6.08287964233123e-06, "loss": 14.1124, "step": 243730 }, { "epoch": 0.49237022103532285, "grad_norm": 11.665201187133789, "learning_rate": 6.082538858648954e-06, "loss": 19.0067, "step": 243740 }, { "epoch": 0.4923904216680066, "grad_norm": 164.0560760498047, "learning_rate": 6.0821980696905145e-06, "loss": 20.472, "step": 243750 }, { "epoch": 0.49241062230069044, "grad_norm": 409.1033935546875, "learning_rate": 6.081857275457574e-06, "loss": 20.619, "step": 243760 }, { "epoch": 0.49243082293337426, "grad_norm": 307.7234802246094, "learning_rate": 6.081516475951793e-06, "loss": 19.9875, "step": 243770 }, { "epoch": 0.4924510235660581, "grad_norm": 203.3827667236328, "learning_rate": 6.081175671174831e-06, "loss": 27.982, "step": 243780 }, { "epoch": 0.4924712241987419, "grad_norm": 161.2895965576172, "learning_rate": 6.0808348611283505e-06, "loss": 16.2151, "step": 243790 }, { "epoch": 0.4924914248314257, "grad_norm": 279.3691711425781, "learning_rate": 6.080494045814011e-06, "loss": 13.6769, "step": 243800 }, { "epoch": 0.49251162546410954, "grad_norm": 1016.8030395507812, "learning_rate": 6.080153225233475e-06, "loss": 17.9353, "step": 243810 }, { "epoch": 0.49253182609679336, "grad_norm": 79.41496276855469, "learning_rate": 6.079812399388404e-06, "loss": 9.4191, "step": 243820 }, { "epoch": 0.4925520267294772, "grad_norm": 520.326171875, "learning_rate": 6.079471568280456e-06, "loss": 20.7793, "step": 243830 }, { "epoch": 0.492572227362161, "grad_norm": 312.5320129394531, "learning_rate": 6.079130731911298e-06, "loss": 18.6115, "step": 243840 }, { "epoch": 0.4925924279948448, "grad_norm": 113.22270965576172, "learning_rate": 6.078789890282585e-06, "loss": 30.0833, "step": 243850 }, { "epoch": 0.49261262862752864, "grad_norm": 23.268213272094727, "learning_rate": 6.078449043395982e-06, "loss": 24.3871, "step": 243860 }, { "epoch": 0.49263282926021246, "grad_norm": 230.93312072753906, "learning_rate": 6.078108191253148e-06, "loss": 20.967, "step": 243870 }, { "epoch": 0.4926530298928962, "grad_norm": 205.9449462890625, "learning_rate": 6.077767333855748e-06, "loss": 14.1816, "step": 243880 }, { "epoch": 0.49267323052558004, "grad_norm": 117.29251098632812, "learning_rate": 6.077426471205439e-06, "loss": 17.1309, "step": 243890 }, { "epoch": 0.49269343115826386, "grad_norm": 239.22384643554688, "learning_rate": 6.077085603303883e-06, "loss": 22.231, "step": 243900 }, { "epoch": 0.4927136317909477, "grad_norm": 153.31817626953125, "learning_rate": 6.076744730152744e-06, "loss": 21.5334, "step": 243910 }, { "epoch": 0.4927338324236315, "grad_norm": 465.18255615234375, "learning_rate": 6.07640385175368e-06, "loss": 30.3252, "step": 243920 }, { "epoch": 0.4927540330563153, "grad_norm": 224.5220489501953, "learning_rate": 6.076062968108357e-06, "loss": 15.9072, "step": 243930 }, { "epoch": 0.49277423368899914, "grad_norm": 120.92229461669922, "learning_rate": 6.0757220792184314e-06, "loss": 21.1259, "step": 243940 }, { "epoch": 0.49279443432168296, "grad_norm": 195.13400268554688, "learning_rate": 6.075381185085568e-06, "loss": 15.1976, "step": 243950 }, { "epoch": 0.4928146349543668, "grad_norm": 17.923185348510742, "learning_rate": 6.075040285711427e-06, "loss": 19.0412, "step": 243960 }, { "epoch": 0.4928348355870506, "grad_norm": 287.99749755859375, "learning_rate": 6.074699381097669e-06, "loss": 17.6029, "step": 243970 }, { "epoch": 0.4928550362197344, "grad_norm": 296.1041564941406, "learning_rate": 6.074358471245957e-06, "loss": 10.0058, "step": 243980 }, { "epoch": 0.49287523685241824, "grad_norm": 594.8506469726562, "learning_rate": 6.074017556157952e-06, "loss": 32.912, "step": 243990 }, { "epoch": 0.492895437485102, "grad_norm": 109.69888305664062, "learning_rate": 6.073676635835317e-06, "loss": 18.6824, "step": 244000 }, { "epoch": 0.4929156381177858, "grad_norm": 182.17965698242188, "learning_rate": 6.073335710279711e-06, "loss": 19.0831, "step": 244010 }, { "epoch": 0.49293583875046965, "grad_norm": 1281.683349609375, "learning_rate": 6.072994779492798e-06, "loss": 26.8335, "step": 244020 }, { "epoch": 0.49295603938315347, "grad_norm": 287.4867248535156, "learning_rate": 6.072653843476237e-06, "loss": 24.5749, "step": 244030 }, { "epoch": 0.4929762400158373, "grad_norm": 409.3431396484375, "learning_rate": 6.072312902231692e-06, "loss": 17.0714, "step": 244040 }, { "epoch": 0.4929964406485211, "grad_norm": 208.3171844482422, "learning_rate": 6.071971955760823e-06, "loss": 28.9649, "step": 244050 }, { "epoch": 0.4930166412812049, "grad_norm": 650.1362915039062, "learning_rate": 6.071631004065296e-06, "loss": 19.94, "step": 244060 }, { "epoch": 0.49303684191388875, "grad_norm": 318.77557373046875, "learning_rate": 6.071290047146767e-06, "loss": 24.3591, "step": 244070 }, { "epoch": 0.49305704254657257, "grad_norm": 208.2752227783203, "learning_rate": 6.0709490850069e-06, "loss": 16.3146, "step": 244080 }, { "epoch": 0.4930772431792564, "grad_norm": 227.67562866210938, "learning_rate": 6.070608117647359e-06, "loss": 13.5313, "step": 244090 }, { "epoch": 0.4930974438119402, "grad_norm": 294.8225402832031, "learning_rate": 6.0702671450698005e-06, "loss": 16.92, "step": 244100 }, { "epoch": 0.493117644444624, "grad_norm": 357.53350830078125, "learning_rate": 6.069926167275893e-06, "loss": 15.8317, "step": 244110 }, { "epoch": 0.49313784507730785, "grad_norm": 424.4247131347656, "learning_rate": 6.069585184267292e-06, "loss": 20.8461, "step": 244120 }, { "epoch": 0.4931580457099916, "grad_norm": 169.8274688720703, "learning_rate": 6.069244196045666e-06, "loss": 11.8737, "step": 244130 }, { "epoch": 0.49317824634267543, "grad_norm": 431.5495300292969, "learning_rate": 6.068903202612672e-06, "loss": 23.2694, "step": 244140 }, { "epoch": 0.49319844697535925, "grad_norm": 416.96063232421875, "learning_rate": 6.068562203969972e-06, "loss": 18.458, "step": 244150 }, { "epoch": 0.49321864760804307, "grad_norm": 515.156982421875, "learning_rate": 6.068221200119232e-06, "loss": 12.8532, "step": 244160 }, { "epoch": 0.4932388482407269, "grad_norm": 449.691650390625, "learning_rate": 6.06788019106211e-06, "loss": 15.7347, "step": 244170 }, { "epoch": 0.4932590488734107, "grad_norm": 429.8311767578125, "learning_rate": 6.067539176800269e-06, "loss": 13.1296, "step": 244180 }, { "epoch": 0.49327924950609453, "grad_norm": 330.0807800292969, "learning_rate": 6.067198157335372e-06, "loss": 13.5122, "step": 244190 }, { "epoch": 0.49329945013877835, "grad_norm": 898.197998046875, "learning_rate": 6.066857132669081e-06, "loss": 25.3527, "step": 244200 }, { "epoch": 0.49331965077146217, "grad_norm": 100.76026153564453, "learning_rate": 6.066516102803057e-06, "loss": 21.9148, "step": 244210 }, { "epoch": 0.493339851404146, "grad_norm": 39.67820358276367, "learning_rate": 6.066175067738964e-06, "loss": 13.9674, "step": 244220 }, { "epoch": 0.4933600520368298, "grad_norm": 222.9066619873047, "learning_rate": 6.065834027478462e-06, "loss": 22.3811, "step": 244230 }, { "epoch": 0.49338025266951363, "grad_norm": 337.6153869628906, "learning_rate": 6.0654929820232146e-06, "loss": 13.3997, "step": 244240 }, { "epoch": 0.49340045330219745, "grad_norm": 222.28948974609375, "learning_rate": 6.065151931374884e-06, "loss": 10.6958, "step": 244250 }, { "epoch": 0.4934206539348812, "grad_norm": 259.8713684082031, "learning_rate": 6.0648108755351305e-06, "loss": 21.1764, "step": 244260 }, { "epoch": 0.49344085456756503, "grad_norm": 184.92410278320312, "learning_rate": 6.06446981450562e-06, "loss": 17.43, "step": 244270 }, { "epoch": 0.49346105520024885, "grad_norm": 682.4724731445312, "learning_rate": 6.0641287482880105e-06, "loss": 26.1328, "step": 244280 }, { "epoch": 0.4934812558329327, "grad_norm": 156.3910369873047, "learning_rate": 6.0637876768839696e-06, "loss": 18.6075, "step": 244290 }, { "epoch": 0.4935014564656165, "grad_norm": 634.3438110351562, "learning_rate": 6.0634466002951545e-06, "loss": 35.0273, "step": 244300 }, { "epoch": 0.4935216570983003, "grad_norm": 59.063880920410156, "learning_rate": 6.06310551852323e-06, "loss": 21.3219, "step": 244310 }, { "epoch": 0.49354185773098413, "grad_norm": 349.0487060546875, "learning_rate": 6.0627644315698575e-06, "loss": 21.5352, "step": 244320 }, { "epoch": 0.49356205836366795, "grad_norm": 118.60848999023438, "learning_rate": 6.062423339436701e-06, "loss": 26.1926, "step": 244330 }, { "epoch": 0.4935822589963518, "grad_norm": 172.1380157470703, "learning_rate": 6.062082242125422e-06, "loss": 14.3361, "step": 244340 }, { "epoch": 0.4936024596290356, "grad_norm": 0.0, "learning_rate": 6.061741139637682e-06, "loss": 17.9107, "step": 244350 }, { "epoch": 0.4936226602617194, "grad_norm": 959.0023803710938, "learning_rate": 6.061400031975147e-06, "loss": 20.7472, "step": 244360 }, { "epoch": 0.49364286089440323, "grad_norm": 245.41539001464844, "learning_rate": 6.061058919139474e-06, "loss": 16.6736, "step": 244370 }, { "epoch": 0.49366306152708705, "grad_norm": 325.2384338378906, "learning_rate": 6.060717801132329e-06, "loss": 24.3751, "step": 244380 }, { "epoch": 0.4936832621597708, "grad_norm": 245.00494384765625, "learning_rate": 6.060376677955375e-06, "loss": 10.433, "step": 244390 }, { "epoch": 0.49370346279245464, "grad_norm": 304.51129150390625, "learning_rate": 6.060035549610275e-06, "loss": 32.5843, "step": 244400 }, { "epoch": 0.49372366342513846, "grad_norm": 444.15252685546875, "learning_rate": 6.0596944160986885e-06, "loss": 23.0259, "step": 244410 }, { "epoch": 0.4937438640578223, "grad_norm": 355.3755798339844, "learning_rate": 6.0593532774222796e-06, "loss": 17.3964, "step": 244420 }, { "epoch": 0.4937640646905061, "grad_norm": 310.92913818359375, "learning_rate": 6.059012133582713e-06, "loss": 28.6486, "step": 244430 }, { "epoch": 0.4937842653231899, "grad_norm": 507.0361022949219, "learning_rate": 6.058670984581647e-06, "loss": 18.3852, "step": 244440 }, { "epoch": 0.49380446595587374, "grad_norm": 242.7211151123047, "learning_rate": 6.058329830420749e-06, "loss": 21.0649, "step": 244450 }, { "epoch": 0.49382466658855756, "grad_norm": 329.3995666503906, "learning_rate": 6.057988671101679e-06, "loss": 15.5041, "step": 244460 }, { "epoch": 0.4938448672212414, "grad_norm": 582.8204956054688, "learning_rate": 6.057647506626101e-06, "loss": 22.8234, "step": 244470 }, { "epoch": 0.4938650678539252, "grad_norm": 162.57894897460938, "learning_rate": 6.057306336995677e-06, "loss": 8.4585, "step": 244480 }, { "epoch": 0.493885268486609, "grad_norm": 272.8739013671875, "learning_rate": 6.056965162212072e-06, "loss": 25.1741, "step": 244490 }, { "epoch": 0.49390546911929284, "grad_norm": 36.329872131347656, "learning_rate": 6.056623982276945e-06, "loss": 13.8372, "step": 244500 }, { "epoch": 0.49392566975197666, "grad_norm": 427.57025146484375, "learning_rate": 6.05628279719196e-06, "loss": 28.4299, "step": 244510 }, { "epoch": 0.4939458703846604, "grad_norm": 237.22508239746094, "learning_rate": 6.0559416069587814e-06, "loss": 11.372, "step": 244520 }, { "epoch": 0.49396607101734424, "grad_norm": 270.677734375, "learning_rate": 6.055600411579072e-06, "loss": 14.6331, "step": 244530 }, { "epoch": 0.49398627165002806, "grad_norm": 0.0, "learning_rate": 6.055259211054496e-06, "loss": 23.0497, "step": 244540 }, { "epoch": 0.4940064722827119, "grad_norm": 202.64125061035156, "learning_rate": 6.0549180053867114e-06, "loss": 10.9263, "step": 244550 }, { "epoch": 0.4940266729153957, "grad_norm": 2.338362693786621, "learning_rate": 6.054576794577387e-06, "loss": 14.5765, "step": 244560 }, { "epoch": 0.4940468735480795, "grad_norm": 238.6088409423828, "learning_rate": 6.054235578628181e-06, "loss": 23.908, "step": 244570 }, { "epoch": 0.49406707418076334, "grad_norm": 304.35076904296875, "learning_rate": 6.053894357540761e-06, "loss": 9.6385, "step": 244580 }, { "epoch": 0.49408727481344716, "grad_norm": 418.65032958984375, "learning_rate": 6.053553131316785e-06, "loss": 19.8783, "step": 244590 }, { "epoch": 0.494107475446131, "grad_norm": 330.6903991699219, "learning_rate": 6.0532118999579206e-06, "loss": 21.676, "step": 244600 }, { "epoch": 0.4941276760788148, "grad_norm": 316.10943603515625, "learning_rate": 6.052870663465829e-06, "loss": 19.6024, "step": 244610 }, { "epoch": 0.4941478767114986, "grad_norm": 64.31596374511719, "learning_rate": 6.0525294218421735e-06, "loss": 12.5756, "step": 244620 }, { "epoch": 0.49416807734418244, "grad_norm": 346.3873291015625, "learning_rate": 6.052188175088617e-06, "loss": 24.2205, "step": 244630 }, { "epoch": 0.4941882779768662, "grad_norm": 117.12847137451172, "learning_rate": 6.051846923206824e-06, "loss": 9.5047, "step": 244640 }, { "epoch": 0.49420847860955003, "grad_norm": 313.15887451171875, "learning_rate": 6.051505666198454e-06, "loss": 17.5844, "step": 244650 }, { "epoch": 0.49422867924223385, "grad_norm": 167.126708984375, "learning_rate": 6.051164404065175e-06, "loss": 14.2004, "step": 244660 }, { "epoch": 0.49424887987491767, "grad_norm": 194.36598205566406, "learning_rate": 6.050823136808649e-06, "loss": 17.9698, "step": 244670 }, { "epoch": 0.4942690805076015, "grad_norm": 220.38336181640625, "learning_rate": 6.050481864430536e-06, "loss": 21.7677, "step": 244680 }, { "epoch": 0.4942892811402853, "grad_norm": 302.5650329589844, "learning_rate": 6.050140586932504e-06, "loss": 20.5469, "step": 244690 }, { "epoch": 0.49430948177296913, "grad_norm": 197.69094848632812, "learning_rate": 6.049799304316214e-06, "loss": 18.6935, "step": 244700 }, { "epoch": 0.49432968240565295, "grad_norm": 241.56390380859375, "learning_rate": 6.0494580165833275e-06, "loss": 12.1381, "step": 244710 }, { "epoch": 0.49434988303833677, "grad_norm": 104.44047546386719, "learning_rate": 6.049116723735512e-06, "loss": 19.6696, "step": 244720 }, { "epoch": 0.4943700836710206, "grad_norm": 19.153400421142578, "learning_rate": 6.048775425774426e-06, "loss": 18.5233, "step": 244730 }, { "epoch": 0.4943902843037044, "grad_norm": 260.1300964355469, "learning_rate": 6.048434122701738e-06, "loss": 21.6535, "step": 244740 }, { "epoch": 0.49441048493638823, "grad_norm": 86.92571258544922, "learning_rate": 6.048092814519109e-06, "loss": 13.8988, "step": 244750 }, { "epoch": 0.49443068556907205, "grad_norm": 113.34544372558594, "learning_rate": 6.047751501228203e-06, "loss": 8.8183, "step": 244760 }, { "epoch": 0.4944508862017558, "grad_norm": 218.61387634277344, "learning_rate": 6.047410182830684e-06, "loss": 18.0609, "step": 244770 }, { "epoch": 0.49447108683443963, "grad_norm": 476.4884948730469, "learning_rate": 6.047068859328213e-06, "loss": 23.7205, "step": 244780 }, { "epoch": 0.49449128746712345, "grad_norm": 230.36752319335938, "learning_rate": 6.046727530722456e-06, "loss": 18.8846, "step": 244790 }, { "epoch": 0.4945114880998073, "grad_norm": 314.3381042480469, "learning_rate": 6.046386197015076e-06, "loss": 36.129, "step": 244800 }, { "epoch": 0.4945316887324911, "grad_norm": 214.6229705810547, "learning_rate": 6.046044858207737e-06, "loss": 17.5281, "step": 244810 }, { "epoch": 0.4945518893651749, "grad_norm": 518.5840454101562, "learning_rate": 6.045703514302101e-06, "loss": 30.453, "step": 244820 }, { "epoch": 0.49457208999785873, "grad_norm": 0.0, "learning_rate": 6.045362165299835e-06, "loss": 13.8386, "step": 244830 }, { "epoch": 0.49459229063054255, "grad_norm": 466.2996520996094, "learning_rate": 6.0450208112026e-06, "loss": 23.7469, "step": 244840 }, { "epoch": 0.4946124912632264, "grad_norm": 72.21483612060547, "learning_rate": 6.044679452012059e-06, "loss": 18.9533, "step": 244850 }, { "epoch": 0.4946326918959102, "grad_norm": 958.6412353515625, "learning_rate": 6.044338087729878e-06, "loss": 40.1104, "step": 244860 }, { "epoch": 0.494652892528594, "grad_norm": 502.6129150390625, "learning_rate": 6.04399671835772e-06, "loss": 24.7627, "step": 244870 }, { "epoch": 0.49467309316127783, "grad_norm": 270.39532470703125, "learning_rate": 6.043655343897249e-06, "loss": 15.065, "step": 244880 }, { "epoch": 0.49469329379396165, "grad_norm": 29.861547470092773, "learning_rate": 6.043313964350126e-06, "loss": 12.1136, "step": 244890 }, { "epoch": 0.4947134944266454, "grad_norm": 241.96859741210938, "learning_rate": 6.04297257971802e-06, "loss": 19.0696, "step": 244900 }, { "epoch": 0.49473369505932924, "grad_norm": 49.75981903076172, "learning_rate": 6.0426311900025905e-06, "loss": 26.9656, "step": 244910 }, { "epoch": 0.49475389569201306, "grad_norm": 223.46434020996094, "learning_rate": 6.042289795205504e-06, "loss": 23.619, "step": 244920 }, { "epoch": 0.4947740963246969, "grad_norm": 441.3772888183594, "learning_rate": 6.041948395328423e-06, "loss": 14.3708, "step": 244930 }, { "epoch": 0.4947942969573807, "grad_norm": 253.46014404296875, "learning_rate": 6.041606990373012e-06, "loss": 15.9217, "step": 244940 }, { "epoch": 0.4948144975900645, "grad_norm": 225.38803100585938, "learning_rate": 6.041265580340935e-06, "loss": 19.3127, "step": 244950 }, { "epoch": 0.49483469822274834, "grad_norm": 0.0, "learning_rate": 6.040924165233856e-06, "loss": 13.5155, "step": 244960 }, { "epoch": 0.49485489885543216, "grad_norm": 410.49761962890625, "learning_rate": 6.040582745053438e-06, "loss": 19.4774, "step": 244970 }, { "epoch": 0.494875099488116, "grad_norm": 436.8222351074219, "learning_rate": 6.040241319801346e-06, "loss": 18.5624, "step": 244980 }, { "epoch": 0.4948953001207998, "grad_norm": 292.4451904296875, "learning_rate": 6.039899889479246e-06, "loss": 17.5294, "step": 244990 }, { "epoch": 0.4949155007534836, "grad_norm": 116.83655548095703, "learning_rate": 6.039558454088796e-06, "loss": 7.9498, "step": 245000 }, { "epoch": 0.49493570138616744, "grad_norm": 70.72248840332031, "learning_rate": 6.039217013631668e-06, "loss": 12.4493, "step": 245010 }, { "epoch": 0.49495590201885126, "grad_norm": 58.64997482299805, "learning_rate": 6.0388755681095216e-06, "loss": 20.0312, "step": 245020 }, { "epoch": 0.494976102651535, "grad_norm": 74.66365814208984, "learning_rate": 6.038534117524021e-06, "loss": 11.5502, "step": 245030 }, { "epoch": 0.49499630328421884, "grad_norm": 451.2154541015625, "learning_rate": 6.038192661876832e-06, "loss": 24.3529, "step": 245040 }, { "epoch": 0.49501650391690266, "grad_norm": 205.95565795898438, "learning_rate": 6.0378512011696155e-06, "loss": 10.9897, "step": 245050 }, { "epoch": 0.4950367045495865, "grad_norm": 365.2042236328125, "learning_rate": 6.03750973540404e-06, "loss": 15.2563, "step": 245060 }, { "epoch": 0.4950569051822703, "grad_norm": 145.86997985839844, "learning_rate": 6.037168264581767e-06, "loss": 20.2445, "step": 245070 }, { "epoch": 0.4950771058149541, "grad_norm": 77.67876434326172, "learning_rate": 6.036826788704463e-06, "loss": 12.411, "step": 245080 }, { "epoch": 0.49509730644763794, "grad_norm": 316.9991149902344, "learning_rate": 6.03648530777379e-06, "loss": 25.0226, "step": 245090 }, { "epoch": 0.49511750708032176, "grad_norm": 140.66482543945312, "learning_rate": 6.036143821791413e-06, "loss": 9.9153, "step": 245100 }, { "epoch": 0.4951377077130056, "grad_norm": 146.4595489501953, "learning_rate": 6.035802330758997e-06, "loss": 16.2253, "step": 245110 }, { "epoch": 0.4951579083456894, "grad_norm": 156.4415740966797, "learning_rate": 6.0354608346782075e-06, "loss": 32.41, "step": 245120 }, { "epoch": 0.4951781089783732, "grad_norm": 246.86753845214844, "learning_rate": 6.035119333550705e-06, "loss": 13.7969, "step": 245130 }, { "epoch": 0.49519830961105704, "grad_norm": 586.6036376953125, "learning_rate": 6.034777827378157e-06, "loss": 30.6134, "step": 245140 }, { "epoch": 0.49521851024374086, "grad_norm": 13.210935592651367, "learning_rate": 6.03443631616223e-06, "loss": 11.771, "step": 245150 }, { "epoch": 0.4952387108764246, "grad_norm": 389.6850891113281, "learning_rate": 6.034094799904583e-06, "loss": 25.157, "step": 245160 }, { "epoch": 0.49525891150910845, "grad_norm": 237.7878875732422, "learning_rate": 6.0337532786068846e-06, "loss": 31.0168, "step": 245170 }, { "epoch": 0.49527911214179227, "grad_norm": 427.48297119140625, "learning_rate": 6.033411752270798e-06, "loss": 17.7299, "step": 245180 }, { "epoch": 0.4952993127744761, "grad_norm": 286.3634948730469, "learning_rate": 6.033070220897988e-06, "loss": 26.3863, "step": 245190 }, { "epoch": 0.4953195134071599, "grad_norm": 642.2401123046875, "learning_rate": 6.032728684490118e-06, "loss": 27.2663, "step": 245200 }, { "epoch": 0.4953397140398437, "grad_norm": 496.3467102050781, "learning_rate": 6.032387143048853e-06, "loss": 22.4379, "step": 245210 }, { "epoch": 0.49535991467252755, "grad_norm": 558.6436157226562, "learning_rate": 6.032045596575862e-06, "loss": 24.5645, "step": 245220 }, { "epoch": 0.49538011530521137, "grad_norm": 244.09234619140625, "learning_rate": 6.031704045072803e-06, "loss": 11.2061, "step": 245230 }, { "epoch": 0.4954003159378952, "grad_norm": 547.108154296875, "learning_rate": 6.031362488541344e-06, "loss": 22.2405, "step": 245240 }, { "epoch": 0.495420516570579, "grad_norm": 153.71588134765625, "learning_rate": 6.031020926983149e-06, "loss": 13.6541, "step": 245250 }, { "epoch": 0.4954407172032628, "grad_norm": 37.5488166809082, "learning_rate": 6.030679360399883e-06, "loss": 19.5655, "step": 245260 }, { "epoch": 0.49546091783594665, "grad_norm": 40.69220733642578, "learning_rate": 6.030337788793212e-06, "loss": 17.1022, "step": 245270 }, { "epoch": 0.4954811184686304, "grad_norm": 39.55512619018555, "learning_rate": 6.029996212164799e-06, "loss": 12.0888, "step": 245280 }, { "epoch": 0.49550131910131423, "grad_norm": 844.1260375976562, "learning_rate": 6.029654630516308e-06, "loss": 25.5098, "step": 245290 }, { "epoch": 0.49552151973399805, "grad_norm": 310.4819030761719, "learning_rate": 6.029313043849407e-06, "loss": 25.4461, "step": 245300 }, { "epoch": 0.49554172036668187, "grad_norm": 246.79713439941406, "learning_rate": 6.02897145216576e-06, "loss": 18.9139, "step": 245310 }, { "epoch": 0.4955619209993657, "grad_norm": 74.28938293457031, "learning_rate": 6.0286298554670275e-06, "loss": 20.5199, "step": 245320 }, { "epoch": 0.4955821216320495, "grad_norm": 698.9652099609375, "learning_rate": 6.028288253754882e-06, "loss": 28.1589, "step": 245330 }, { "epoch": 0.49560232226473333, "grad_norm": 821.1625366210938, "learning_rate": 6.02794664703098e-06, "loss": 22.9419, "step": 245340 }, { "epoch": 0.49562252289741715, "grad_norm": 448.0999450683594, "learning_rate": 6.027605035296994e-06, "loss": 28.8267, "step": 245350 }, { "epoch": 0.49564272353010097, "grad_norm": 246.79574584960938, "learning_rate": 6.027263418554585e-06, "loss": 16.7485, "step": 245360 }, { "epoch": 0.4956629241627848, "grad_norm": 256.899169921875, "learning_rate": 6.026921796805417e-06, "loss": 25.642, "step": 245370 }, { "epoch": 0.4956831247954686, "grad_norm": 244.4949188232422, "learning_rate": 6.026580170051158e-06, "loss": 15.8482, "step": 245380 }, { "epoch": 0.49570332542815243, "grad_norm": 425.5538330078125, "learning_rate": 6.026238538293472e-06, "loss": 20.2088, "step": 245390 }, { "epoch": 0.49572352606083625, "grad_norm": 246.77471923828125, "learning_rate": 6.025896901534023e-06, "loss": 17.7487, "step": 245400 }, { "epoch": 0.49574372669352, "grad_norm": 240.14308166503906, "learning_rate": 6.025555259774478e-06, "loss": 23.832, "step": 245410 }, { "epoch": 0.49576392732620383, "grad_norm": 438.70880126953125, "learning_rate": 6.025213613016501e-06, "loss": 20.5724, "step": 245420 }, { "epoch": 0.49578412795888765, "grad_norm": 4.423550605773926, "learning_rate": 6.024871961261756e-06, "loss": 18.2726, "step": 245430 }, { "epoch": 0.4958043285915715, "grad_norm": 654.311279296875, "learning_rate": 6.024530304511911e-06, "loss": 23.9032, "step": 245440 }, { "epoch": 0.4958245292242553, "grad_norm": 262.79168701171875, "learning_rate": 6.024188642768628e-06, "loss": 18.4867, "step": 245450 }, { "epoch": 0.4958447298569391, "grad_norm": 214.47071838378906, "learning_rate": 6.023846976033574e-06, "loss": 11.4786, "step": 245460 }, { "epoch": 0.49586493048962293, "grad_norm": 259.78802490234375, "learning_rate": 6.0235053043084155e-06, "loss": 39.3809, "step": 245470 }, { "epoch": 0.49588513112230675, "grad_norm": 117.46600341796875, "learning_rate": 6.023163627594813e-06, "loss": 23.1067, "step": 245480 }, { "epoch": 0.4959053317549906, "grad_norm": 439.9520263671875, "learning_rate": 6.022821945894439e-06, "loss": 17.5653, "step": 245490 }, { "epoch": 0.4959255323876744, "grad_norm": 233.7616424560547, "learning_rate": 6.022480259208951e-06, "loss": 26.6165, "step": 245500 }, { "epoch": 0.4959457330203582, "grad_norm": 76.08538055419922, "learning_rate": 6.022138567540023e-06, "loss": 16.3839, "step": 245510 }, { "epoch": 0.49596593365304203, "grad_norm": 427.0955810546875, "learning_rate": 6.021796870889311e-06, "loss": 17.8166, "step": 245520 }, { "epoch": 0.49598613428572585, "grad_norm": 650.9671630859375, "learning_rate": 6.0214551692584875e-06, "loss": 32.6244, "step": 245530 }, { "epoch": 0.4960063349184096, "grad_norm": 233.66143798828125, "learning_rate": 6.021113462649215e-06, "loss": 14.3767, "step": 245540 }, { "epoch": 0.49602653555109344, "grad_norm": 241.6490020751953, "learning_rate": 6.020771751063159e-06, "loss": 19.8944, "step": 245550 }, { "epoch": 0.49604673618377726, "grad_norm": 492.3786926269531, "learning_rate": 6.020430034501986e-06, "loss": 25.1231, "step": 245560 }, { "epoch": 0.4960669368164611, "grad_norm": 208.30296325683594, "learning_rate": 6.02008831296736e-06, "loss": 16.7638, "step": 245570 }, { "epoch": 0.4960871374491449, "grad_norm": 418.12261962890625, "learning_rate": 6.019746586460947e-06, "loss": 18.8418, "step": 245580 }, { "epoch": 0.4961073380818287, "grad_norm": 98.33612823486328, "learning_rate": 6.019404854984413e-06, "loss": 17.8366, "step": 245590 }, { "epoch": 0.49612753871451254, "grad_norm": 149.95623779296875, "learning_rate": 6.019063118539425e-06, "loss": 11.0903, "step": 245600 }, { "epoch": 0.49614773934719636, "grad_norm": 288.5825500488281, "learning_rate": 6.018721377127644e-06, "loss": 25.7879, "step": 245610 }, { "epoch": 0.4961679399798802, "grad_norm": 261.2408142089844, "learning_rate": 6.018379630750741e-06, "loss": 14.7808, "step": 245620 }, { "epoch": 0.496188140612564, "grad_norm": 206.1098175048828, "learning_rate": 6.018037879410379e-06, "loss": 19.1463, "step": 245630 }, { "epoch": 0.4962083412452478, "grad_norm": 738.7783813476562, "learning_rate": 6.017696123108223e-06, "loss": 25.1224, "step": 245640 }, { "epoch": 0.49622854187793164, "grad_norm": 296.3504638671875, "learning_rate": 6.01735436184594e-06, "loss": 33.9275, "step": 245650 }, { "epoch": 0.49624874251061546, "grad_norm": 305.50482177734375, "learning_rate": 6.0170125956251935e-06, "loss": 13.4514, "step": 245660 }, { "epoch": 0.4962689431432992, "grad_norm": 242.17489624023438, "learning_rate": 6.016670824447653e-06, "loss": 16.2384, "step": 245670 }, { "epoch": 0.49628914377598304, "grad_norm": 210.87673950195312, "learning_rate": 6.0163290483149826e-06, "loss": 13.7175, "step": 245680 }, { "epoch": 0.49630934440866686, "grad_norm": 255.59169006347656, "learning_rate": 6.0159872672288464e-06, "loss": 16.2586, "step": 245690 }, { "epoch": 0.4963295450413507, "grad_norm": 62.464027404785156, "learning_rate": 6.015645481190912e-06, "loss": 12.6034, "step": 245700 }, { "epoch": 0.4963497456740345, "grad_norm": 397.4351806640625, "learning_rate": 6.0153036902028435e-06, "loss": 23.6827, "step": 245710 }, { "epoch": 0.4963699463067183, "grad_norm": 247.48492431640625, "learning_rate": 6.01496189426631e-06, "loss": 26.0792, "step": 245720 }, { "epoch": 0.49639014693940214, "grad_norm": 327.3603210449219, "learning_rate": 6.014620093382975e-06, "loss": 22.6065, "step": 245730 }, { "epoch": 0.49641034757208596, "grad_norm": 322.8639221191406, "learning_rate": 6.014278287554503e-06, "loss": 23.882, "step": 245740 }, { "epoch": 0.4964305482047698, "grad_norm": 325.9333801269531, "learning_rate": 6.013936476782563e-06, "loss": 19.9993, "step": 245750 }, { "epoch": 0.4964507488374536, "grad_norm": 229.17236328125, "learning_rate": 6.01359466106882e-06, "loss": 16.1936, "step": 245760 }, { "epoch": 0.4964709494701374, "grad_norm": 234.7612762451172, "learning_rate": 6.013252840414938e-06, "loss": 15.0988, "step": 245770 }, { "epoch": 0.49649115010282124, "grad_norm": 291.35101318359375, "learning_rate": 6.012911014822586e-06, "loss": 18.609, "step": 245780 }, { "epoch": 0.49651135073550506, "grad_norm": 490.72369384765625, "learning_rate": 6.012569184293427e-06, "loss": 18.4986, "step": 245790 }, { "epoch": 0.49653155136818883, "grad_norm": 41.716285705566406, "learning_rate": 6.01222734882913e-06, "loss": 17.8499, "step": 245800 }, { "epoch": 0.49655175200087265, "grad_norm": 137.29727172851562, "learning_rate": 6.0118855084313595e-06, "loss": 15.2594, "step": 245810 }, { "epoch": 0.49657195263355647, "grad_norm": 443.74713134765625, "learning_rate": 6.011543663101781e-06, "loss": 22.6023, "step": 245820 }, { "epoch": 0.4965921532662403, "grad_norm": 224.1483612060547, "learning_rate": 6.011201812842062e-06, "loss": 10.8209, "step": 245830 }, { "epoch": 0.4966123538989241, "grad_norm": 566.3534545898438, "learning_rate": 6.010859957653869e-06, "loss": 27.9259, "step": 245840 }, { "epoch": 0.49663255453160793, "grad_norm": 119.19084930419922, "learning_rate": 6.010518097538866e-06, "loss": 10.3284, "step": 245850 }, { "epoch": 0.49665275516429175, "grad_norm": 299.3779296875, "learning_rate": 6.010176232498719e-06, "loss": 19.7367, "step": 245860 }, { "epoch": 0.49667295579697557, "grad_norm": 283.1239318847656, "learning_rate": 6.009834362535097e-06, "loss": 15.9272, "step": 245870 }, { "epoch": 0.4966931564296594, "grad_norm": 450.26708984375, "learning_rate": 6.009492487649666e-06, "loss": 17.9796, "step": 245880 }, { "epoch": 0.4967133570623432, "grad_norm": 561.6602783203125, "learning_rate": 6.00915060784409e-06, "loss": 23.7331, "step": 245890 }, { "epoch": 0.49673355769502703, "grad_norm": 409.9729919433594, "learning_rate": 6.008808723120035e-06, "loss": 22.2393, "step": 245900 }, { "epoch": 0.49675375832771085, "grad_norm": 409.2427978515625, "learning_rate": 6.0084668334791695e-06, "loss": 14.047, "step": 245910 }, { "epoch": 0.4967739589603946, "grad_norm": 226.38072204589844, "learning_rate": 6.0081249389231615e-06, "loss": 15.7916, "step": 245920 }, { "epoch": 0.49679415959307843, "grad_norm": 202.22935485839844, "learning_rate": 6.00778303945367e-06, "loss": 11.9994, "step": 245930 }, { "epoch": 0.49681436022576225, "grad_norm": 444.7662048339844, "learning_rate": 6.007441135072371e-06, "loss": 22.9448, "step": 245940 }, { "epoch": 0.4968345608584461, "grad_norm": 20.31171417236328, "learning_rate": 6.007099225780922e-06, "loss": 24.2718, "step": 245950 }, { "epoch": 0.4968547614911299, "grad_norm": 278.421630859375, "learning_rate": 6.0067573115809965e-06, "loss": 21.4329, "step": 245960 }, { "epoch": 0.4968749621238137, "grad_norm": 200.64927673339844, "learning_rate": 6.006415392474256e-06, "loss": 20.0715, "step": 245970 }, { "epoch": 0.49689516275649753, "grad_norm": 485.5809631347656, "learning_rate": 6.00607346846237e-06, "loss": 28.4952, "step": 245980 }, { "epoch": 0.49691536338918135, "grad_norm": 66.97520446777344, "learning_rate": 6.005731539547004e-06, "loss": 22.7163, "step": 245990 }, { "epoch": 0.4969355640218652, "grad_norm": 68.0572509765625, "learning_rate": 6.005389605729824e-06, "loss": 13.3643, "step": 246000 }, { "epoch": 0.496955764654549, "grad_norm": 2298.267333984375, "learning_rate": 6.005047667012498e-06, "loss": 33.7005, "step": 246010 }, { "epoch": 0.4969759652872328, "grad_norm": 542.5613403320312, "learning_rate": 6.00470572339669e-06, "loss": 35.651, "step": 246020 }, { "epoch": 0.49699616591991663, "grad_norm": 325.0408020019531, "learning_rate": 6.004363774884069e-06, "loss": 13.6686, "step": 246030 }, { "epoch": 0.49701636655260045, "grad_norm": 341.2667236328125, "learning_rate": 6.0040218214763e-06, "loss": 19.0296, "step": 246040 }, { "epoch": 0.4970365671852842, "grad_norm": 219.3208770751953, "learning_rate": 6.003679863175052e-06, "loss": 12.727, "step": 246050 }, { "epoch": 0.49705676781796804, "grad_norm": 171.37416076660156, "learning_rate": 6.003337899981989e-06, "loss": 13.4412, "step": 246060 }, { "epoch": 0.49707696845065186, "grad_norm": 340.29180908203125, "learning_rate": 6.002995931898779e-06, "loss": 19.3153, "step": 246070 }, { "epoch": 0.4970971690833357, "grad_norm": 342.27227783203125, "learning_rate": 6.00265395892709e-06, "loss": 19.6893, "step": 246080 }, { "epoch": 0.4971173697160195, "grad_norm": 351.29742431640625, "learning_rate": 6.002311981068584e-06, "loss": 26.4404, "step": 246090 }, { "epoch": 0.4971375703487033, "grad_norm": 238.55567932128906, "learning_rate": 6.001969998324932e-06, "loss": 32.2122, "step": 246100 }, { "epoch": 0.49715777098138714, "grad_norm": 415.1384582519531, "learning_rate": 6.0016280106978e-06, "loss": 31.2196, "step": 246110 }, { "epoch": 0.49717797161407096, "grad_norm": 229.41343688964844, "learning_rate": 6.001286018188856e-06, "loss": 11.4703, "step": 246120 }, { "epoch": 0.4971981722467548, "grad_norm": 194.33383178710938, "learning_rate": 6.000944020799764e-06, "loss": 8.2334, "step": 246130 }, { "epoch": 0.4972183728794386, "grad_norm": 237.9521484375, "learning_rate": 6.000602018532193e-06, "loss": 23.7834, "step": 246140 }, { "epoch": 0.4972385735121224, "grad_norm": 33.80694580078125, "learning_rate": 6.000260011387809e-06, "loss": 18.4462, "step": 246150 }, { "epoch": 0.49725877414480624, "grad_norm": 174.9094696044922, "learning_rate": 5.999917999368278e-06, "loss": 9.1379, "step": 246160 }, { "epoch": 0.49727897477749006, "grad_norm": 293.1462707519531, "learning_rate": 5.999575982475269e-06, "loss": 24.6933, "step": 246170 }, { "epoch": 0.4972991754101738, "grad_norm": 228.96177673339844, "learning_rate": 5.999233960710447e-06, "loss": 17.5733, "step": 246180 }, { "epoch": 0.49731937604285764, "grad_norm": 320.4145812988281, "learning_rate": 5.99889193407548e-06, "loss": 10.7683, "step": 246190 }, { "epoch": 0.49733957667554146, "grad_norm": 368.358154296875, "learning_rate": 5.9985499025720354e-06, "loss": 24.6113, "step": 246200 }, { "epoch": 0.4973597773082253, "grad_norm": 919.4326782226562, "learning_rate": 5.998207866201781e-06, "loss": 55.0273, "step": 246210 }, { "epoch": 0.4973799779409091, "grad_norm": 577.2208251953125, "learning_rate": 5.99786582496638e-06, "loss": 23.3036, "step": 246220 }, { "epoch": 0.4974001785735929, "grad_norm": 294.1979064941406, "learning_rate": 5.9975237788675034e-06, "loss": 16.9634, "step": 246230 }, { "epoch": 0.49742037920627674, "grad_norm": 671.3495483398438, "learning_rate": 5.997181727906816e-06, "loss": 24.9423, "step": 246240 }, { "epoch": 0.49744057983896056, "grad_norm": 4.277970790863037, "learning_rate": 5.996839672085986e-06, "loss": 16.4941, "step": 246250 }, { "epoch": 0.4974607804716444, "grad_norm": 333.8573303222656, "learning_rate": 5.996497611406682e-06, "loss": 19.7933, "step": 246260 }, { "epoch": 0.4974809811043282, "grad_norm": 18.93907928466797, "learning_rate": 5.996155545870566e-06, "loss": 8.5453, "step": 246270 }, { "epoch": 0.497501181737012, "grad_norm": 243.36508178710938, "learning_rate": 5.995813475479313e-06, "loss": 9.0727, "step": 246280 }, { "epoch": 0.49752138236969584, "grad_norm": 238.2925567626953, "learning_rate": 5.995471400234584e-06, "loss": 13.722, "step": 246290 }, { "epoch": 0.49754158300237966, "grad_norm": 275.70751953125, "learning_rate": 5.995129320138047e-06, "loss": 9.8004, "step": 246300 }, { "epoch": 0.4975617836350634, "grad_norm": 307.3035583496094, "learning_rate": 5.994787235191372e-06, "loss": 13.5653, "step": 246310 }, { "epoch": 0.49758198426774725, "grad_norm": 211.58807373046875, "learning_rate": 5.994445145396223e-06, "loss": 22.2202, "step": 246320 }, { "epoch": 0.49760218490043107, "grad_norm": 335.21307373046875, "learning_rate": 5.994103050754271e-06, "loss": 21.0919, "step": 246330 }, { "epoch": 0.4976223855331149, "grad_norm": 215.21095275878906, "learning_rate": 5.99376095126718e-06, "loss": 25.6598, "step": 246340 }, { "epoch": 0.4976425861657987, "grad_norm": 504.8789978027344, "learning_rate": 5.993418846936619e-06, "loss": 34.1157, "step": 246350 }, { "epoch": 0.4976627867984825, "grad_norm": 466.95404052734375, "learning_rate": 5.993076737764254e-06, "loss": 27.43, "step": 246360 }, { "epoch": 0.49768298743116635, "grad_norm": 199.25454711914062, "learning_rate": 5.9927346237517554e-06, "loss": 17.0097, "step": 246370 }, { "epoch": 0.49770318806385017, "grad_norm": 207.2054443359375, "learning_rate": 5.992392504900786e-06, "loss": 11.8152, "step": 246380 }, { "epoch": 0.497723388696534, "grad_norm": 276.7508239746094, "learning_rate": 5.9920503812130196e-06, "loss": 18.9127, "step": 246390 }, { "epoch": 0.4977435893292178, "grad_norm": 175.2879638671875, "learning_rate": 5.991708252690117e-06, "loss": 22.2516, "step": 246400 }, { "epoch": 0.4977637899619016, "grad_norm": 587.28759765625, "learning_rate": 5.991366119333749e-06, "loss": 18.4652, "step": 246410 }, { "epoch": 0.49778399059458545, "grad_norm": 144.5283966064453, "learning_rate": 5.991023981145585e-06, "loss": 17.5111, "step": 246420 }, { "epoch": 0.4978041912272692, "grad_norm": 256.3717041015625, "learning_rate": 5.990681838127287e-06, "loss": 8.8905, "step": 246430 }, { "epoch": 0.49782439185995303, "grad_norm": 150.2742156982422, "learning_rate": 5.990339690280528e-06, "loss": 15.4399, "step": 246440 }, { "epoch": 0.49784459249263685, "grad_norm": 634.4785766601562, "learning_rate": 5.989997537606973e-06, "loss": 29.1268, "step": 246450 }, { "epoch": 0.49786479312532067, "grad_norm": 202.6644287109375, "learning_rate": 5.9896553801082906e-06, "loss": 13.9071, "step": 246460 }, { "epoch": 0.4978849937580045, "grad_norm": 136.6668243408203, "learning_rate": 5.989313217786146e-06, "loss": 21.2524, "step": 246470 }, { "epoch": 0.4979051943906883, "grad_norm": 325.2998962402344, "learning_rate": 5.988971050642211e-06, "loss": 24.7016, "step": 246480 }, { "epoch": 0.49792539502337213, "grad_norm": 131.2258758544922, "learning_rate": 5.98862887867815e-06, "loss": 19.8185, "step": 246490 }, { "epoch": 0.49794559565605595, "grad_norm": 231.00108337402344, "learning_rate": 5.988286701895631e-06, "loss": 27.7663, "step": 246500 }, { "epoch": 0.49796579628873977, "grad_norm": 198.72366333007812, "learning_rate": 5.987944520296324e-06, "loss": 16.1621, "step": 246510 }, { "epoch": 0.4979859969214236, "grad_norm": 159.7663116455078, "learning_rate": 5.987602333881894e-06, "loss": 8.0435, "step": 246520 }, { "epoch": 0.4980061975541074, "grad_norm": 62.52103805541992, "learning_rate": 5.987260142654013e-06, "loss": 16.3318, "step": 246530 }, { "epoch": 0.49802639818679123, "grad_norm": 275.44427490234375, "learning_rate": 5.986917946614341e-06, "loss": 11.3839, "step": 246540 }, { "epoch": 0.49804659881947505, "grad_norm": 423.0904541015625, "learning_rate": 5.986575745764553e-06, "loss": 22.6925, "step": 246550 }, { "epoch": 0.4980667994521588, "grad_norm": 240.21925354003906, "learning_rate": 5.986233540106315e-06, "loss": 22.1136, "step": 246560 }, { "epoch": 0.49808700008484263, "grad_norm": 335.5609436035156, "learning_rate": 5.985891329641294e-06, "loss": 23.3, "step": 246570 }, { "epoch": 0.49810720071752645, "grad_norm": 222.31324768066406, "learning_rate": 5.985549114371158e-06, "loss": 26.827, "step": 246580 }, { "epoch": 0.4981274013502103, "grad_norm": 358.239501953125, "learning_rate": 5.985206894297575e-06, "loss": 25.9379, "step": 246590 }, { "epoch": 0.4981476019828941, "grad_norm": 325.1926574707031, "learning_rate": 5.984864669422214e-06, "loss": 19.8624, "step": 246600 }, { "epoch": 0.4981678026155779, "grad_norm": 165.61190795898438, "learning_rate": 5.9845224397467415e-06, "loss": 19.0777, "step": 246610 }, { "epoch": 0.49818800324826173, "grad_norm": 2519.3388671875, "learning_rate": 5.984180205272826e-06, "loss": 39.0098, "step": 246620 }, { "epoch": 0.49820820388094555, "grad_norm": 66.86842346191406, "learning_rate": 5.9838379660021354e-06, "loss": 21.0587, "step": 246630 }, { "epoch": 0.4982284045136294, "grad_norm": 247.3855438232422, "learning_rate": 5.983495721936337e-06, "loss": 18.0975, "step": 246640 }, { "epoch": 0.4982486051463132, "grad_norm": 137.12356567382812, "learning_rate": 5.9831534730771e-06, "loss": 17.2381, "step": 246650 }, { "epoch": 0.498268805778997, "grad_norm": 33.68268585205078, "learning_rate": 5.982811219426095e-06, "loss": 6.6659, "step": 246660 }, { "epoch": 0.49828900641168083, "grad_norm": 20.357545852661133, "learning_rate": 5.982468960984984e-06, "loss": 23.4405, "step": 246670 }, { "epoch": 0.49830920704436465, "grad_norm": 573.2885131835938, "learning_rate": 5.9821266977554395e-06, "loss": 24.9594, "step": 246680 }, { "epoch": 0.4983294076770484, "grad_norm": 124.83633422851562, "learning_rate": 5.981784429739129e-06, "loss": 8.7071, "step": 246690 }, { "epoch": 0.49834960830973224, "grad_norm": 837.1769409179688, "learning_rate": 5.98144215693772e-06, "loss": 19.1126, "step": 246700 }, { "epoch": 0.49836980894241606, "grad_norm": 90.0428466796875, "learning_rate": 5.981099879352882e-06, "loss": 9.7829, "step": 246710 }, { "epoch": 0.4983900095750999, "grad_norm": 23.162240982055664, "learning_rate": 5.9807575969862796e-06, "loss": 11.9972, "step": 246720 }, { "epoch": 0.4984102102077837, "grad_norm": 224.2132110595703, "learning_rate": 5.980415309839586e-06, "loss": 20.0065, "step": 246730 }, { "epoch": 0.4984304108404675, "grad_norm": 210.44651794433594, "learning_rate": 5.9800730179144665e-06, "loss": 19.7137, "step": 246740 }, { "epoch": 0.49845061147315134, "grad_norm": 225.2881317138672, "learning_rate": 5.979730721212589e-06, "loss": 21.1851, "step": 246750 }, { "epoch": 0.49847081210583516, "grad_norm": 306.91015625, "learning_rate": 5.979388419735625e-06, "loss": 10.9148, "step": 246760 }, { "epoch": 0.498491012738519, "grad_norm": 493.2308654785156, "learning_rate": 5.979046113485237e-06, "loss": 13.4437, "step": 246770 }, { "epoch": 0.4985112133712028, "grad_norm": 220.34347534179688, "learning_rate": 5.978703802463101e-06, "loss": 22.1775, "step": 246780 }, { "epoch": 0.4985314140038866, "grad_norm": 244.67672729492188, "learning_rate": 5.9783614866708785e-06, "loss": 27.3463, "step": 246790 }, { "epoch": 0.49855161463657044, "grad_norm": 323.7417907714844, "learning_rate": 5.978019166110242e-06, "loss": 15.7285, "step": 246800 }, { "epoch": 0.49857181526925426, "grad_norm": 236.16128540039062, "learning_rate": 5.977676840782858e-06, "loss": 28.8174, "step": 246810 }, { "epoch": 0.498592015901938, "grad_norm": 216.7679443359375, "learning_rate": 5.977334510690397e-06, "loss": 21.824, "step": 246820 }, { "epoch": 0.49861221653462184, "grad_norm": 569.1253662109375, "learning_rate": 5.9769921758345254e-06, "loss": 23.1958, "step": 246830 }, { "epoch": 0.49863241716730566, "grad_norm": 463.51251220703125, "learning_rate": 5.976649836216912e-06, "loss": 15.0059, "step": 246840 }, { "epoch": 0.4986526177999895, "grad_norm": 349.98590087890625, "learning_rate": 5.976307491839226e-06, "loss": 30.4672, "step": 246850 }, { "epoch": 0.4986728184326733, "grad_norm": 338.9327087402344, "learning_rate": 5.975965142703135e-06, "loss": 36.7504, "step": 246860 }, { "epoch": 0.4986930190653571, "grad_norm": 253.1051788330078, "learning_rate": 5.97562278881031e-06, "loss": 15.2247, "step": 246870 }, { "epoch": 0.49871321969804094, "grad_norm": 116.48023986816406, "learning_rate": 5.975280430162416e-06, "loss": 17.8706, "step": 246880 }, { "epoch": 0.49873342033072476, "grad_norm": 302.9654541015625, "learning_rate": 5.974938066761124e-06, "loss": 15.8658, "step": 246890 }, { "epoch": 0.4987536209634086, "grad_norm": 431.087158203125, "learning_rate": 5.974595698608103e-06, "loss": 10.0084, "step": 246900 }, { "epoch": 0.4987738215960924, "grad_norm": 489.2413330078125, "learning_rate": 5.974253325705021e-06, "loss": 13.86, "step": 246910 }, { "epoch": 0.4987940222287762, "grad_norm": 286.79229736328125, "learning_rate": 5.973910948053545e-06, "loss": 18.3927, "step": 246920 }, { "epoch": 0.49881422286146004, "grad_norm": 452.56781005859375, "learning_rate": 5.973568565655345e-06, "loss": 31.5918, "step": 246930 }, { "epoch": 0.49883442349414386, "grad_norm": 90.1636962890625, "learning_rate": 5.973226178512093e-06, "loss": 11.3274, "step": 246940 }, { "epoch": 0.49885462412682763, "grad_norm": 190.4019317626953, "learning_rate": 5.972883786625452e-06, "loss": 18.9497, "step": 246950 }, { "epoch": 0.49887482475951145, "grad_norm": 206.96209716796875, "learning_rate": 5.972541389997093e-06, "loss": 23.0352, "step": 246960 }, { "epoch": 0.49889502539219527, "grad_norm": 286.6165771484375, "learning_rate": 5.972198988628686e-06, "loss": 17.0558, "step": 246970 }, { "epoch": 0.4989152260248791, "grad_norm": 177.94139099121094, "learning_rate": 5.9718565825219e-06, "loss": 13.9749, "step": 246980 }, { "epoch": 0.4989354266575629, "grad_norm": 328.6958923339844, "learning_rate": 5.971514171678401e-06, "loss": 13.6004, "step": 246990 }, { "epoch": 0.49895562729024673, "grad_norm": 466.6527404785156, "learning_rate": 5.97117175609986e-06, "loss": 20.9236, "step": 247000 }, { "epoch": 0.49897582792293055, "grad_norm": 377.2223205566406, "learning_rate": 5.970829335787946e-06, "loss": 12.8332, "step": 247010 }, { "epoch": 0.49899602855561437, "grad_norm": 489.24420166015625, "learning_rate": 5.9704869107443285e-06, "loss": 17.2657, "step": 247020 }, { "epoch": 0.4990162291882982, "grad_norm": 387.49920654296875, "learning_rate": 5.970144480970676e-06, "loss": 19.5889, "step": 247030 }, { "epoch": 0.499036429820982, "grad_norm": 361.7131652832031, "learning_rate": 5.969802046468655e-06, "loss": 27.4951, "step": 247040 }, { "epoch": 0.49905663045366583, "grad_norm": 127.88951873779297, "learning_rate": 5.969459607239938e-06, "loss": 8.6094, "step": 247050 }, { "epoch": 0.49907683108634965, "grad_norm": 579.5786743164062, "learning_rate": 5.969117163286191e-06, "loss": 42.4008, "step": 247060 }, { "epoch": 0.4990970317190334, "grad_norm": 295.8156433105469, "learning_rate": 5.968774714609086e-06, "loss": 28.6521, "step": 247070 }, { "epoch": 0.49911723235171723, "grad_norm": 231.35372924804688, "learning_rate": 5.96843226121029e-06, "loss": 12.7821, "step": 247080 }, { "epoch": 0.49913743298440105, "grad_norm": 272.6651306152344, "learning_rate": 5.968089803091471e-06, "loss": 45.1962, "step": 247090 }, { "epoch": 0.4991576336170849, "grad_norm": 36.426246643066406, "learning_rate": 5.967747340254303e-06, "loss": 17.4923, "step": 247100 }, { "epoch": 0.4991778342497687, "grad_norm": 6.658004283905029, "learning_rate": 5.967404872700449e-06, "loss": 20.6126, "step": 247110 }, { "epoch": 0.4991980348824525, "grad_norm": 232.3402099609375, "learning_rate": 5.967062400431583e-06, "loss": 19.928, "step": 247120 }, { "epoch": 0.49921823551513633, "grad_norm": 313.8846740722656, "learning_rate": 5.96671992344937e-06, "loss": 15.9184, "step": 247130 }, { "epoch": 0.49923843614782015, "grad_norm": 289.90716552734375, "learning_rate": 5.966377441755482e-06, "loss": 26.1218, "step": 247140 }, { "epoch": 0.499258636780504, "grad_norm": 211.78848266601562, "learning_rate": 5.966034955351588e-06, "loss": 19.9388, "step": 247150 }, { "epoch": 0.4992788374131878, "grad_norm": 223.04122924804688, "learning_rate": 5.965692464239358e-06, "loss": 22.9657, "step": 247160 }, { "epoch": 0.4992990380458716, "grad_norm": 1.8688573837280273, "learning_rate": 5.965349968420458e-06, "loss": 15.4811, "step": 247170 }, { "epoch": 0.49931923867855543, "grad_norm": 352.9197082519531, "learning_rate": 5.965007467896561e-06, "loss": 26.5372, "step": 247180 }, { "epoch": 0.49933943931123925, "grad_norm": 260.9129333496094, "learning_rate": 5.9646649626693335e-06, "loss": 11.9573, "step": 247190 }, { "epoch": 0.499359639943923, "grad_norm": 219.34231567382812, "learning_rate": 5.964322452740445e-06, "loss": 29.1539, "step": 247200 }, { "epoch": 0.49937984057660684, "grad_norm": 552.6179809570312, "learning_rate": 5.963979938111569e-06, "loss": 27.9101, "step": 247210 }, { "epoch": 0.49940004120929066, "grad_norm": 244.97413635253906, "learning_rate": 5.9636374187843686e-06, "loss": 26.4623, "step": 247220 }, { "epoch": 0.4994202418419745, "grad_norm": 657.6040649414062, "learning_rate": 5.963294894760518e-06, "loss": 20.8818, "step": 247230 }, { "epoch": 0.4994404424746583, "grad_norm": 295.69146728515625, "learning_rate": 5.962952366041685e-06, "loss": 14.0411, "step": 247240 }, { "epoch": 0.4994606431073421, "grad_norm": 148.9575958251953, "learning_rate": 5.962609832629538e-06, "loss": 13.6524, "step": 247250 }, { "epoch": 0.49948084374002594, "grad_norm": 6.49528694152832, "learning_rate": 5.962267294525747e-06, "loss": 12.1808, "step": 247260 }, { "epoch": 0.49950104437270976, "grad_norm": 305.91302490234375, "learning_rate": 5.961924751731985e-06, "loss": 28.3174, "step": 247270 }, { "epoch": 0.4995212450053936, "grad_norm": 388.1927795410156, "learning_rate": 5.961582204249915e-06, "loss": 28.4636, "step": 247280 }, { "epoch": 0.4995414456380774, "grad_norm": 280.144775390625, "learning_rate": 5.961239652081211e-06, "loss": 12.8092, "step": 247290 }, { "epoch": 0.4995616462707612, "grad_norm": 345.2969055175781, "learning_rate": 5.960897095227541e-06, "loss": 14.2124, "step": 247300 }, { "epoch": 0.49958184690344504, "grad_norm": 198.8424072265625, "learning_rate": 5.960554533690576e-06, "loss": 25.8451, "step": 247310 }, { "epoch": 0.49960204753612886, "grad_norm": 0.0, "learning_rate": 5.9602119674719846e-06, "loss": 27.4999, "step": 247320 }, { "epoch": 0.4996222481688126, "grad_norm": 152.44216918945312, "learning_rate": 5.959869396573435e-06, "loss": 12.2723, "step": 247330 }, { "epoch": 0.49964244880149644, "grad_norm": 234.5079803466797, "learning_rate": 5.959526820996602e-06, "loss": 17.8958, "step": 247340 }, { "epoch": 0.49966264943418026, "grad_norm": 503.7818298339844, "learning_rate": 5.959184240743149e-06, "loss": 21.64, "step": 247350 }, { "epoch": 0.4996828500668641, "grad_norm": 425.6216735839844, "learning_rate": 5.958841655814749e-06, "loss": 8.4155, "step": 247360 }, { "epoch": 0.4997030506995479, "grad_norm": 267.8376770019531, "learning_rate": 5.958499066213071e-06, "loss": 16.5845, "step": 247370 }, { "epoch": 0.4997232513322317, "grad_norm": 62.99100112915039, "learning_rate": 5.958156471939783e-06, "loss": 17.0105, "step": 247380 }, { "epoch": 0.49974345196491554, "grad_norm": 122.78208923339844, "learning_rate": 5.95781387299656e-06, "loss": 23.9143, "step": 247390 }, { "epoch": 0.49976365259759936, "grad_norm": 160.46665954589844, "learning_rate": 5.957471269385065e-06, "loss": 14.4954, "step": 247400 }, { "epoch": 0.4997838532302832, "grad_norm": 259.816162109375, "learning_rate": 5.957128661106973e-06, "loss": 31.2869, "step": 247410 }, { "epoch": 0.499804053862967, "grad_norm": 576.0125122070312, "learning_rate": 5.956786048163951e-06, "loss": 34.439, "step": 247420 }, { "epoch": 0.4998242544956508, "grad_norm": 504.95111083984375, "learning_rate": 5.9564434305576726e-06, "loss": 10.1218, "step": 247430 }, { "epoch": 0.49984445512833464, "grad_norm": 394.4610290527344, "learning_rate": 5.956100808289802e-06, "loss": 9.0431, "step": 247440 }, { "epoch": 0.49986465576101846, "grad_norm": 275.21820068359375, "learning_rate": 5.955758181362012e-06, "loss": 23.4996, "step": 247450 }, { "epoch": 0.4998848563937022, "grad_norm": 215.76683044433594, "learning_rate": 5.955415549775975e-06, "loss": 34.6607, "step": 247460 }, { "epoch": 0.49990505702638605, "grad_norm": 145.25111389160156, "learning_rate": 5.955072913533357e-06, "loss": 28.1554, "step": 247470 }, { "epoch": 0.49992525765906987, "grad_norm": 43.84568405151367, "learning_rate": 5.954730272635829e-06, "loss": 19.5966, "step": 247480 }, { "epoch": 0.4999454582917537, "grad_norm": 61.8556022644043, "learning_rate": 5.954387627085061e-06, "loss": 16.4235, "step": 247490 }, { "epoch": 0.4999656589244375, "grad_norm": 8.952932357788086, "learning_rate": 5.954044976882725e-06, "loss": 11.5383, "step": 247500 }, { "epoch": 0.4999858595571213, "grad_norm": 512.7568969726562, "learning_rate": 5.953702322030489e-06, "loss": 14.0087, "step": 247510 }, { "epoch": 0.5000060601898051, "grad_norm": 218.57373046875, "learning_rate": 5.9533596625300224e-06, "loss": 13.0934, "step": 247520 }, { "epoch": 0.5000262608224889, "grad_norm": 11.412131309509277, "learning_rate": 5.9530169983829974e-06, "loss": 16.343, "step": 247530 }, { "epoch": 0.5000464614551727, "grad_norm": 178.72964477539062, "learning_rate": 5.952674329591083e-06, "loss": 14.2263, "step": 247540 }, { "epoch": 0.5000666620878566, "grad_norm": 212.1409454345703, "learning_rate": 5.952331656155951e-06, "loss": 17.8006, "step": 247550 }, { "epoch": 0.5000868627205404, "grad_norm": 557.198974609375, "learning_rate": 5.951988978079268e-06, "loss": 17.0789, "step": 247560 }, { "epoch": 0.5001070633532242, "grad_norm": 605.3966674804688, "learning_rate": 5.951646295362706e-06, "loss": 21.2611, "step": 247570 }, { "epoch": 0.500127263985908, "grad_norm": 384.77655029296875, "learning_rate": 5.951303608007936e-06, "loss": 17.6327, "step": 247580 }, { "epoch": 0.5001474646185918, "grad_norm": 30.739229202270508, "learning_rate": 5.950960916016629e-06, "loss": 22.9097, "step": 247590 }, { "epoch": 0.5001676652512757, "grad_norm": 304.4714660644531, "learning_rate": 5.950618219390451e-06, "loss": 20.69, "step": 247600 }, { "epoch": 0.5001878658839595, "grad_norm": 181.74517822265625, "learning_rate": 5.9502755181310774e-06, "loss": 11.245, "step": 247610 }, { "epoch": 0.5002080665166433, "grad_norm": 479.09259033203125, "learning_rate": 5.949932812240176e-06, "loss": 14.2979, "step": 247620 }, { "epoch": 0.5002282671493271, "grad_norm": 3.902376890182495, "learning_rate": 5.949590101719416e-06, "loss": 17.172, "step": 247630 }, { "epoch": 0.5002484677820109, "grad_norm": 48.07268524169922, "learning_rate": 5.949247386570471e-06, "loss": 24.8091, "step": 247640 }, { "epoch": 0.5002686684146948, "grad_norm": 194.4252166748047, "learning_rate": 5.948904666795007e-06, "loss": 19.7359, "step": 247650 }, { "epoch": 0.5002888690473786, "grad_norm": 209.01576232910156, "learning_rate": 5.948561942394698e-06, "loss": 12.5281, "step": 247660 }, { "epoch": 0.5003090696800624, "grad_norm": 255.49163818359375, "learning_rate": 5.948219213371212e-06, "loss": 19.3858, "step": 247670 }, { "epoch": 0.5003292703127462, "grad_norm": 148.7952880859375, "learning_rate": 5.9478764797262225e-06, "loss": 14.3529, "step": 247680 }, { "epoch": 0.50034947094543, "grad_norm": 274.9064025878906, "learning_rate": 5.947533741461398e-06, "loss": 24.4826, "step": 247690 }, { "epoch": 0.5003696715781139, "grad_norm": 8.688118934631348, "learning_rate": 5.947190998578407e-06, "loss": 18.6259, "step": 247700 }, { "epoch": 0.5003898722107977, "grad_norm": 766.4295654296875, "learning_rate": 5.946848251078924e-06, "loss": 27.4652, "step": 247710 }, { "epoch": 0.5004100728434815, "grad_norm": 292.67645263671875, "learning_rate": 5.946505498964616e-06, "loss": 10.1977, "step": 247720 }, { "epoch": 0.5004302734761653, "grad_norm": 441.5558776855469, "learning_rate": 5.9461627422371545e-06, "loss": 19.2838, "step": 247730 }, { "epoch": 0.5004504741088491, "grad_norm": 312.2102966308594, "learning_rate": 5.945819980898212e-06, "loss": 24.7428, "step": 247740 }, { "epoch": 0.500470674741533, "grad_norm": 168.518798828125, "learning_rate": 5.945477214949457e-06, "loss": 16.514, "step": 247750 }, { "epoch": 0.5004908753742168, "grad_norm": 405.2102355957031, "learning_rate": 5.945134444392561e-06, "loss": 12.8612, "step": 247760 }, { "epoch": 0.5005110760069006, "grad_norm": 171.03099060058594, "learning_rate": 5.944791669229195e-06, "loss": 34.533, "step": 247770 }, { "epoch": 0.5005312766395843, "grad_norm": 38.24711608886719, "learning_rate": 5.944448889461027e-06, "loss": 17.9258, "step": 247780 }, { "epoch": 0.5005514772722681, "grad_norm": 223.00119018554688, "learning_rate": 5.9441061050897304e-06, "loss": 18.9464, "step": 247790 }, { "epoch": 0.5005716779049519, "grad_norm": 62.386959075927734, "learning_rate": 5.943763316116977e-06, "loss": 13.0859, "step": 247800 }, { "epoch": 0.5005918785376358, "grad_norm": 182.7643585205078, "learning_rate": 5.943420522544433e-06, "loss": 12.349, "step": 247810 }, { "epoch": 0.5006120791703196, "grad_norm": 307.2306213378906, "learning_rate": 5.9430777243737744e-06, "loss": 19.8827, "step": 247820 }, { "epoch": 0.5006322798030034, "grad_norm": 284.0505065917969, "learning_rate": 5.942734921606667e-06, "loss": 19.3649, "step": 247830 }, { "epoch": 0.5006524804356872, "grad_norm": 400.2987060546875, "learning_rate": 5.942392114244786e-06, "loss": 10.912, "step": 247840 }, { "epoch": 0.500672681068371, "grad_norm": 36.175445556640625, "learning_rate": 5.942049302289798e-06, "loss": 20.0841, "step": 247850 }, { "epoch": 0.5006928817010549, "grad_norm": 151.8917236328125, "learning_rate": 5.941706485743377e-06, "loss": 17.1747, "step": 247860 }, { "epoch": 0.5007130823337387, "grad_norm": 132.78646850585938, "learning_rate": 5.941363664607193e-06, "loss": 24.2429, "step": 247870 }, { "epoch": 0.5007332829664225, "grad_norm": 391.81610107421875, "learning_rate": 5.9410208388829174e-06, "loss": 17.8475, "step": 247880 }, { "epoch": 0.5007534835991063, "grad_norm": 342.81793212890625, "learning_rate": 5.9406780085722194e-06, "loss": 22.9874, "step": 247890 }, { "epoch": 0.5007736842317901, "grad_norm": 309.4046325683594, "learning_rate": 5.94033517367677e-06, "loss": 24.0724, "step": 247900 }, { "epoch": 0.500793884864474, "grad_norm": 479.4703674316406, "learning_rate": 5.939992334198242e-06, "loss": 19.2286, "step": 247910 }, { "epoch": 0.5008140854971578, "grad_norm": 92.2585220336914, "learning_rate": 5.939649490138305e-06, "loss": 18.1426, "step": 247920 }, { "epoch": 0.5008342861298416, "grad_norm": 385.2606506347656, "learning_rate": 5.939306641498632e-06, "loss": 12.4646, "step": 247930 }, { "epoch": 0.5008544867625254, "grad_norm": 349.48388671875, "learning_rate": 5.938963788280889e-06, "loss": 22.8863, "step": 247940 }, { "epoch": 0.5008746873952092, "grad_norm": 299.532958984375, "learning_rate": 5.938620930486754e-06, "loss": 22.6468, "step": 247950 }, { "epoch": 0.5008948880278931, "grad_norm": 490.596435546875, "learning_rate": 5.9382780681178935e-06, "loss": 19.5698, "step": 247960 }, { "epoch": 0.5009150886605769, "grad_norm": 244.21652221679688, "learning_rate": 5.9379352011759775e-06, "loss": 14.2951, "step": 247970 }, { "epoch": 0.5009352892932607, "grad_norm": 320.7451171875, "learning_rate": 5.9375923296626815e-06, "loss": 16.7675, "step": 247980 }, { "epoch": 0.5009554899259445, "grad_norm": 374.98370361328125, "learning_rate": 5.937249453579672e-06, "loss": 20.05, "step": 247990 }, { "epoch": 0.5009756905586283, "grad_norm": 403.4307861328125, "learning_rate": 5.936906572928625e-06, "loss": 14.388, "step": 248000 }, { "epoch": 0.5009958911913122, "grad_norm": 483.18048095703125, "learning_rate": 5.936563687711206e-06, "loss": 25.9618, "step": 248010 }, { "epoch": 0.501016091823996, "grad_norm": 625.214599609375, "learning_rate": 5.936220797929091e-06, "loss": 16.488, "step": 248020 }, { "epoch": 0.5010362924566797, "grad_norm": 312.840087890625, "learning_rate": 5.935877903583949e-06, "loss": 14.5212, "step": 248030 }, { "epoch": 0.5010564930893635, "grad_norm": 167.76040649414062, "learning_rate": 5.9355350046774515e-06, "loss": 7.5285, "step": 248040 }, { "epoch": 0.5010766937220473, "grad_norm": 532.289794921875, "learning_rate": 5.93519210121127e-06, "loss": 39.8879, "step": 248050 }, { "epoch": 0.5010968943547311, "grad_norm": 142.57928466796875, "learning_rate": 5.934849193187075e-06, "loss": 26.7096, "step": 248060 }, { "epoch": 0.501117094987415, "grad_norm": 261.7906799316406, "learning_rate": 5.93450628060654e-06, "loss": 15.4258, "step": 248070 }, { "epoch": 0.5011372956200988, "grad_norm": 300.1258850097656, "learning_rate": 5.934163363471333e-06, "loss": 15.5625, "step": 248080 }, { "epoch": 0.5011574962527826, "grad_norm": 232.407958984375, "learning_rate": 5.933820441783129e-06, "loss": 15.9671, "step": 248090 }, { "epoch": 0.5011776968854664, "grad_norm": 241.35769653320312, "learning_rate": 5.933477515543595e-06, "loss": 23.7474, "step": 248100 }, { "epoch": 0.5011978975181502, "grad_norm": 341.2308044433594, "learning_rate": 5.933134584754407e-06, "loss": 14.3403, "step": 248110 }, { "epoch": 0.5012180981508341, "grad_norm": 560.5354614257812, "learning_rate": 5.932791649417233e-06, "loss": 31.0036, "step": 248120 }, { "epoch": 0.5012382987835179, "grad_norm": 1234.332763671875, "learning_rate": 5.932448709533746e-06, "loss": 19.9431, "step": 248130 }, { "epoch": 0.5012584994162017, "grad_norm": 596.7092895507812, "learning_rate": 5.932105765105618e-06, "loss": 14.4233, "step": 248140 }, { "epoch": 0.5012787000488855, "grad_norm": 213.35089111328125, "learning_rate": 5.931762816134517e-06, "loss": 23.7825, "step": 248150 }, { "epoch": 0.5012989006815693, "grad_norm": 534.4385986328125, "learning_rate": 5.9314198626221185e-06, "loss": 16.2537, "step": 248160 }, { "epoch": 0.5013191013142532, "grad_norm": 332.01348876953125, "learning_rate": 5.931076904570094e-06, "loss": 18.1505, "step": 248170 }, { "epoch": 0.501339301946937, "grad_norm": 809.219482421875, "learning_rate": 5.930733941980111e-06, "loss": 24.3149, "step": 248180 }, { "epoch": 0.5013595025796208, "grad_norm": 378.70751953125, "learning_rate": 5.9303909748538444e-06, "loss": 19.7797, "step": 248190 }, { "epoch": 0.5013797032123046, "grad_norm": 165.17245483398438, "learning_rate": 5.930048003192965e-06, "loss": 24.3711, "step": 248200 }, { "epoch": 0.5013999038449884, "grad_norm": 270.6784362792969, "learning_rate": 5.929705026999145e-06, "loss": 14.0679, "step": 248210 }, { "epoch": 0.5014201044776723, "grad_norm": 428.8962707519531, "learning_rate": 5.929362046274057e-06, "loss": 16.1023, "step": 248220 }, { "epoch": 0.5014403051103561, "grad_norm": 310.4158630371094, "learning_rate": 5.929019061019369e-06, "loss": 15.7882, "step": 248230 }, { "epoch": 0.5014605057430399, "grad_norm": 450.4839172363281, "learning_rate": 5.928676071236756e-06, "loss": 22.4304, "step": 248240 }, { "epoch": 0.5014807063757237, "grad_norm": 151.94586181640625, "learning_rate": 5.928333076927888e-06, "loss": 16.7479, "step": 248250 }, { "epoch": 0.5015009070084075, "grad_norm": 318.79425048828125, "learning_rate": 5.927990078094435e-06, "loss": 16.7382, "step": 248260 }, { "epoch": 0.5015211076410914, "grad_norm": 314.4445495605469, "learning_rate": 5.927647074738074e-06, "loss": 16.1737, "step": 248270 }, { "epoch": 0.5015413082737752, "grad_norm": 509.84295654296875, "learning_rate": 5.927304066860471e-06, "loss": 21.4982, "step": 248280 }, { "epoch": 0.5015615089064589, "grad_norm": 337.4242248535156, "learning_rate": 5.926961054463303e-06, "loss": 22.3866, "step": 248290 }, { "epoch": 0.5015817095391427, "grad_norm": 223.4461212158203, "learning_rate": 5.926618037548237e-06, "loss": 14.2089, "step": 248300 }, { "epoch": 0.5016019101718265, "grad_norm": 254.37152099609375, "learning_rate": 5.926275016116949e-06, "loss": 18.3578, "step": 248310 }, { "epoch": 0.5016221108045104, "grad_norm": 163.58828735351562, "learning_rate": 5.925931990171109e-06, "loss": 19.516, "step": 248320 }, { "epoch": 0.5016423114371942, "grad_norm": 208.44894409179688, "learning_rate": 5.925588959712387e-06, "loss": 12.3089, "step": 248330 }, { "epoch": 0.501662512069878, "grad_norm": 416.5862121582031, "learning_rate": 5.925245924742458e-06, "loss": 31.7982, "step": 248340 }, { "epoch": 0.5016827127025618, "grad_norm": 452.9902038574219, "learning_rate": 5.924902885262992e-06, "loss": 18.9272, "step": 248350 }, { "epoch": 0.5017029133352456, "grad_norm": 117.52326202392578, "learning_rate": 5.924559841275661e-06, "loss": 42.689, "step": 248360 }, { "epoch": 0.5017231139679295, "grad_norm": 420.08087158203125, "learning_rate": 5.924216792782138e-06, "loss": 27.4245, "step": 248370 }, { "epoch": 0.5017433146006133, "grad_norm": 117.96812438964844, "learning_rate": 5.9238737397840966e-06, "loss": 11.1128, "step": 248380 }, { "epoch": 0.5017635152332971, "grad_norm": 121.48545837402344, "learning_rate": 5.923530682283204e-06, "loss": 20.1141, "step": 248390 }, { "epoch": 0.5017837158659809, "grad_norm": 431.2785949707031, "learning_rate": 5.923187620281135e-06, "loss": 32.5881, "step": 248400 }, { "epoch": 0.5018039164986647, "grad_norm": 196.30465698242188, "learning_rate": 5.922844553779563e-06, "loss": 20.357, "step": 248410 }, { "epoch": 0.5018241171313486, "grad_norm": 452.1803283691406, "learning_rate": 5.922501482780156e-06, "loss": 18.563, "step": 248420 }, { "epoch": 0.5018443177640324, "grad_norm": 219.0723114013672, "learning_rate": 5.9221584072845914e-06, "loss": 16.0067, "step": 248430 }, { "epoch": 0.5018645183967162, "grad_norm": 302.8324890136719, "learning_rate": 5.921815327294537e-06, "loss": 12.6082, "step": 248440 }, { "epoch": 0.5018847190294, "grad_norm": 276.16925048828125, "learning_rate": 5.9214722428116675e-06, "loss": 21.0685, "step": 248450 }, { "epoch": 0.5019049196620838, "grad_norm": 37.458740234375, "learning_rate": 5.921129153837654e-06, "loss": 18.6846, "step": 248460 }, { "epoch": 0.5019251202947677, "grad_norm": 211.03123474121094, "learning_rate": 5.9207860603741674e-06, "loss": 12.9043, "step": 248470 }, { "epoch": 0.5019453209274515, "grad_norm": 127.6812973022461, "learning_rate": 5.920442962422883e-06, "loss": 17.2812, "step": 248480 }, { "epoch": 0.5019655215601353, "grad_norm": 394.218505859375, "learning_rate": 5.920099859985469e-06, "loss": 12.7512, "step": 248490 }, { "epoch": 0.5019857221928191, "grad_norm": 429.7511291503906, "learning_rate": 5.919756753063601e-06, "loss": 18.0552, "step": 248500 }, { "epoch": 0.5020059228255029, "grad_norm": 338.23345947265625, "learning_rate": 5.919413641658951e-06, "loss": 16.0969, "step": 248510 }, { "epoch": 0.5020261234581868, "grad_norm": 389.58135986328125, "learning_rate": 5.91907052577319e-06, "loss": 32.5659, "step": 248520 }, { "epoch": 0.5020463240908706, "grad_norm": 644.7842407226562, "learning_rate": 5.9187274054079895e-06, "loss": 24.2401, "step": 248530 }, { "epoch": 0.5020665247235544, "grad_norm": 149.84742736816406, "learning_rate": 5.918384280565025e-06, "loss": 10.2397, "step": 248540 }, { "epoch": 0.5020867253562381, "grad_norm": 251.0876922607422, "learning_rate": 5.9180411512459655e-06, "loss": 19.9561, "step": 248550 }, { "epoch": 0.5021069259889219, "grad_norm": 109.50819396972656, "learning_rate": 5.917698017452484e-06, "loss": 17.6135, "step": 248560 }, { "epoch": 0.5021271266216057, "grad_norm": 22.225725173950195, "learning_rate": 5.9173548791862556e-06, "loss": 18.1013, "step": 248570 }, { "epoch": 0.5021473272542896, "grad_norm": 258.21490478515625, "learning_rate": 5.91701173644895e-06, "loss": 18.1763, "step": 248580 }, { "epoch": 0.5021675278869734, "grad_norm": 819.31884765625, "learning_rate": 5.916668589242241e-06, "loss": 28.4688, "step": 248590 }, { "epoch": 0.5021877285196572, "grad_norm": 534.130615234375, "learning_rate": 5.9163254375677995e-06, "loss": 30.2906, "step": 248600 }, { "epoch": 0.502207929152341, "grad_norm": 606.6949462890625, "learning_rate": 5.9159822814272995e-06, "loss": 20.6568, "step": 248610 }, { "epoch": 0.5022281297850248, "grad_norm": 129.61837768554688, "learning_rate": 5.915639120822413e-06, "loss": 28.2994, "step": 248620 }, { "epoch": 0.5022483304177087, "grad_norm": 141.55914306640625, "learning_rate": 5.915295955754812e-06, "loss": 8.3557, "step": 248630 }, { "epoch": 0.5022685310503925, "grad_norm": 8.555486679077148, "learning_rate": 5.914952786226169e-06, "loss": 29.9748, "step": 248640 }, { "epoch": 0.5022887316830763, "grad_norm": 531.2232055664062, "learning_rate": 5.914609612238159e-06, "loss": 14.5861, "step": 248650 }, { "epoch": 0.5023089323157601, "grad_norm": 279.55902099609375, "learning_rate": 5.914266433792452e-06, "loss": 31.8295, "step": 248660 }, { "epoch": 0.502329132948444, "grad_norm": 250.94915771484375, "learning_rate": 5.913923250890721e-06, "loss": 16.5447, "step": 248670 }, { "epoch": 0.5023493335811278, "grad_norm": 214.37149047851562, "learning_rate": 5.9135800635346385e-06, "loss": 16.9422, "step": 248680 }, { "epoch": 0.5023695342138116, "grad_norm": 331.63458251953125, "learning_rate": 5.913236871725877e-06, "loss": 23.8737, "step": 248690 }, { "epoch": 0.5023897348464954, "grad_norm": 487.180419921875, "learning_rate": 5.912893675466112e-06, "loss": 24.6903, "step": 248700 }, { "epoch": 0.5024099354791792, "grad_norm": 97.34019470214844, "learning_rate": 5.912550474757011e-06, "loss": 21.3134, "step": 248710 }, { "epoch": 0.502430136111863, "grad_norm": 109.35627746582031, "learning_rate": 5.912207269600252e-06, "loss": 34.7743, "step": 248720 }, { "epoch": 0.5024503367445469, "grad_norm": 163.42938232421875, "learning_rate": 5.911864059997504e-06, "loss": 13.1647, "step": 248730 }, { "epoch": 0.5024705373772307, "grad_norm": 1452.29345703125, "learning_rate": 5.911520845950442e-06, "loss": 23.1758, "step": 248740 }, { "epoch": 0.5024907380099145, "grad_norm": 323.7252502441406, "learning_rate": 5.911177627460739e-06, "loss": 18.1021, "step": 248750 }, { "epoch": 0.5025109386425983, "grad_norm": 294.9367370605469, "learning_rate": 5.910834404530064e-06, "loss": 22.9579, "step": 248760 }, { "epoch": 0.5025311392752821, "grad_norm": 291.49951171875, "learning_rate": 5.910491177160094e-06, "loss": 19.2065, "step": 248770 }, { "epoch": 0.502551339907966, "grad_norm": 986.3696899414062, "learning_rate": 5.910147945352501e-06, "loss": 28.506, "step": 248780 }, { "epoch": 0.5025715405406498, "grad_norm": 245.19931030273438, "learning_rate": 5.909804709108957e-06, "loss": 40.5798, "step": 248790 }, { "epoch": 0.5025917411733335, "grad_norm": 289.53399658203125, "learning_rate": 5.909461468431135e-06, "loss": 24.5793, "step": 248800 }, { "epoch": 0.5026119418060173, "grad_norm": 321.2113952636719, "learning_rate": 5.9091182233207075e-06, "loss": 22.5678, "step": 248810 }, { "epoch": 0.5026321424387011, "grad_norm": 254.4793243408203, "learning_rate": 5.90877497377935e-06, "loss": 30.8201, "step": 248820 }, { "epoch": 0.502652343071385, "grad_norm": 676.5030517578125, "learning_rate": 5.908431719808731e-06, "loss": 22.8239, "step": 248830 }, { "epoch": 0.5026725437040688, "grad_norm": 4.302178859710693, "learning_rate": 5.908088461410529e-06, "loss": 46.3537, "step": 248840 }, { "epoch": 0.5026927443367526, "grad_norm": 182.84593200683594, "learning_rate": 5.907745198586411e-06, "loss": 28.2463, "step": 248850 }, { "epoch": 0.5027129449694364, "grad_norm": 220.3950653076172, "learning_rate": 5.907401931338055e-06, "loss": 12.999, "step": 248860 }, { "epoch": 0.5027331456021202, "grad_norm": 240.21942138671875, "learning_rate": 5.90705865966713e-06, "loss": 29.6477, "step": 248870 }, { "epoch": 0.502753346234804, "grad_norm": 190.1591796875, "learning_rate": 5.9067153835753125e-06, "loss": 8.8411, "step": 248880 }, { "epoch": 0.5027735468674879, "grad_norm": 282.7282409667969, "learning_rate": 5.906372103064274e-06, "loss": 9.85, "step": 248890 }, { "epoch": 0.5027937475001717, "grad_norm": 379.7806396484375, "learning_rate": 5.906028818135687e-06, "loss": 21.808, "step": 248900 }, { "epoch": 0.5028139481328555, "grad_norm": 202.81578063964844, "learning_rate": 5.905685528791226e-06, "loss": 6.5798, "step": 248910 }, { "epoch": 0.5028341487655393, "grad_norm": 309.8311767578125, "learning_rate": 5.905342235032564e-06, "loss": 34.7844, "step": 248920 }, { "epoch": 0.5028543493982232, "grad_norm": 397.4351806640625, "learning_rate": 5.904998936861375e-06, "loss": 21.5871, "step": 248930 }, { "epoch": 0.502874550030907, "grad_norm": 292.7225646972656, "learning_rate": 5.904655634279328e-06, "loss": 24.3189, "step": 248940 }, { "epoch": 0.5028947506635908, "grad_norm": 1.754620909690857, "learning_rate": 5.904312327288101e-06, "loss": 21.5858, "step": 248950 }, { "epoch": 0.5029149512962746, "grad_norm": 390.2062683105469, "learning_rate": 5.903969015889365e-06, "loss": 13.3124, "step": 248960 }, { "epoch": 0.5029351519289584, "grad_norm": 443.1992492675781, "learning_rate": 5.903625700084794e-06, "loss": 16.1735, "step": 248970 }, { "epoch": 0.5029553525616423, "grad_norm": 99.09663391113281, "learning_rate": 5.9032823798760595e-06, "loss": 23.8032, "step": 248980 }, { "epoch": 0.5029755531943261, "grad_norm": 670.3941650390625, "learning_rate": 5.902939055264838e-06, "loss": 24.9736, "step": 248990 }, { "epoch": 0.5029957538270099, "grad_norm": 118.83430480957031, "learning_rate": 5.902595726252801e-06, "loss": 12.406, "step": 249000 }, { "epoch": 0.5030159544596937, "grad_norm": 208.8263702392578, "learning_rate": 5.902252392841621e-06, "loss": 20.5289, "step": 249010 }, { "epoch": 0.5030361550923775, "grad_norm": 370.722900390625, "learning_rate": 5.901909055032974e-06, "loss": 27.8139, "step": 249020 }, { "epoch": 0.5030563557250614, "grad_norm": 311.405029296875, "learning_rate": 5.901565712828528e-06, "loss": 15.541, "step": 249030 }, { "epoch": 0.5030765563577452, "grad_norm": 229.3588104248047, "learning_rate": 5.901222366229964e-06, "loss": 15.7926, "step": 249040 }, { "epoch": 0.503096756990429, "grad_norm": 202.77944946289062, "learning_rate": 5.900879015238948e-06, "loss": 14.431, "step": 249050 }, { "epoch": 0.5031169576231127, "grad_norm": 362.0393981933594, "learning_rate": 5.90053565985716e-06, "loss": 19.7511, "step": 249060 }, { "epoch": 0.5031371582557965, "grad_norm": 345.81640625, "learning_rate": 5.900192300086268e-06, "loss": 18.5604, "step": 249070 }, { "epoch": 0.5031573588884803, "grad_norm": 121.5953598022461, "learning_rate": 5.89984893592795e-06, "loss": 17.3146, "step": 249080 }, { "epoch": 0.5031775595211642, "grad_norm": 171.31080627441406, "learning_rate": 5.899505567383877e-06, "loss": 30.8338, "step": 249090 }, { "epoch": 0.503197760153848, "grad_norm": 142.41867065429688, "learning_rate": 5.8991621944557224e-06, "loss": 25.522, "step": 249100 }, { "epoch": 0.5032179607865318, "grad_norm": 83.28276824951172, "learning_rate": 5.8988188171451596e-06, "loss": 14.9517, "step": 249110 }, { "epoch": 0.5032381614192156, "grad_norm": 249.1461944580078, "learning_rate": 5.898475435453863e-06, "loss": 10.2464, "step": 249120 }, { "epoch": 0.5032583620518994, "grad_norm": 196.42015075683594, "learning_rate": 5.898132049383507e-06, "loss": 11.3615, "step": 249130 }, { "epoch": 0.5032785626845833, "grad_norm": 6.596041202545166, "learning_rate": 5.897788658935764e-06, "loss": 27.7936, "step": 249140 }, { "epoch": 0.5032987633172671, "grad_norm": 173.4099578857422, "learning_rate": 5.897445264112309e-06, "loss": 11.3804, "step": 249150 }, { "epoch": 0.5033189639499509, "grad_norm": 1186.976806640625, "learning_rate": 5.897101864914814e-06, "loss": 32.4666, "step": 249160 }, { "epoch": 0.5033391645826347, "grad_norm": 145.69381713867188, "learning_rate": 5.8967584613449525e-06, "loss": 10.5505, "step": 249170 }, { "epoch": 0.5033593652153185, "grad_norm": 148.46951293945312, "learning_rate": 5.896415053404399e-06, "loss": 16.1156, "step": 249180 }, { "epoch": 0.5033795658480024, "grad_norm": 190.38870239257812, "learning_rate": 5.896071641094827e-06, "loss": 16.4071, "step": 249190 }, { "epoch": 0.5033997664806862, "grad_norm": 338.08795166015625, "learning_rate": 5.8957282244179125e-06, "loss": 15.2041, "step": 249200 }, { "epoch": 0.50341996711337, "grad_norm": 149.76405334472656, "learning_rate": 5.895384803375325e-06, "loss": 23.0716, "step": 249210 }, { "epoch": 0.5034401677460538, "grad_norm": 18.71114730834961, "learning_rate": 5.895041377968742e-06, "loss": 20.6328, "step": 249220 }, { "epoch": 0.5034603683787376, "grad_norm": 166.65562438964844, "learning_rate": 5.894697948199836e-06, "loss": 9.1399, "step": 249230 }, { "epoch": 0.5034805690114215, "grad_norm": 57.174415588378906, "learning_rate": 5.89435451407028e-06, "loss": 10.5528, "step": 249240 }, { "epoch": 0.5035007696441053, "grad_norm": 224.65306091308594, "learning_rate": 5.8940110755817484e-06, "loss": 32.1402, "step": 249250 }, { "epoch": 0.5035209702767891, "grad_norm": 293.225341796875, "learning_rate": 5.893667632735915e-06, "loss": 12.5607, "step": 249260 }, { "epoch": 0.5035411709094729, "grad_norm": 37.300628662109375, "learning_rate": 5.893324185534456e-06, "loss": 6.9092, "step": 249270 }, { "epoch": 0.5035613715421567, "grad_norm": 357.302734375, "learning_rate": 5.892980733979041e-06, "loss": 19.4794, "step": 249280 }, { "epoch": 0.5035815721748406, "grad_norm": 336.6480407714844, "learning_rate": 5.892637278071347e-06, "loss": 14.1608, "step": 249290 }, { "epoch": 0.5036017728075244, "grad_norm": 378.0352478027344, "learning_rate": 5.892293817813048e-06, "loss": 19.5589, "step": 249300 }, { "epoch": 0.5036219734402081, "grad_norm": 487.54669189453125, "learning_rate": 5.891950353205817e-06, "loss": 22.9218, "step": 249310 }, { "epoch": 0.5036421740728919, "grad_norm": 121.94599914550781, "learning_rate": 5.891606884251326e-06, "loss": 10.7458, "step": 249320 }, { "epoch": 0.5036623747055757, "grad_norm": 487.99627685546875, "learning_rate": 5.8912634109512534e-06, "loss": 17.6269, "step": 249330 }, { "epoch": 0.5036825753382596, "grad_norm": 256.71527099609375, "learning_rate": 5.89091993330727e-06, "loss": 19.225, "step": 249340 }, { "epoch": 0.5037027759709434, "grad_norm": 319.05303955078125, "learning_rate": 5.89057645132105e-06, "loss": 13.4374, "step": 249350 }, { "epoch": 0.5037229766036272, "grad_norm": 254.13360595703125, "learning_rate": 5.8902329649942715e-06, "loss": 45.2772, "step": 249360 }, { "epoch": 0.503743177236311, "grad_norm": 84.680908203125, "learning_rate": 5.889889474328603e-06, "loss": 17.0626, "step": 249370 }, { "epoch": 0.5037633778689948, "grad_norm": 246.99557495117188, "learning_rate": 5.889545979325722e-06, "loss": 21.7544, "step": 249380 }, { "epoch": 0.5037835785016787, "grad_norm": 177.57138061523438, "learning_rate": 5.889202479987301e-06, "loss": 29.7375, "step": 249390 }, { "epoch": 0.5038037791343625, "grad_norm": 303.8663330078125, "learning_rate": 5.8888589763150165e-06, "loss": 19.7737, "step": 249400 }, { "epoch": 0.5038239797670463, "grad_norm": 73.7767333984375, "learning_rate": 5.8885154683105395e-06, "loss": 13.5495, "step": 249410 }, { "epoch": 0.5038441803997301, "grad_norm": 352.6000671386719, "learning_rate": 5.8881719559755454e-06, "loss": 16.124, "step": 249420 }, { "epoch": 0.5038643810324139, "grad_norm": 229.4619598388672, "learning_rate": 5.887828439311712e-06, "loss": 19.5907, "step": 249430 }, { "epoch": 0.5038845816650978, "grad_norm": 286.4491882324219, "learning_rate": 5.887484918320708e-06, "loss": 11.8365, "step": 249440 }, { "epoch": 0.5039047822977816, "grad_norm": 435.76080322265625, "learning_rate": 5.887141393004211e-06, "loss": 35.7272, "step": 249450 }, { "epoch": 0.5039249829304654, "grad_norm": 210.64137268066406, "learning_rate": 5.8867978633638935e-06, "loss": 10.797, "step": 249460 }, { "epoch": 0.5039451835631492, "grad_norm": 358.1566467285156, "learning_rate": 5.886454329401432e-06, "loss": 29.2828, "step": 249470 }, { "epoch": 0.503965384195833, "grad_norm": 307.8323669433594, "learning_rate": 5.8861107911184975e-06, "loss": 8.9984, "step": 249480 }, { "epoch": 0.5039855848285169, "grad_norm": 223.8067626953125, "learning_rate": 5.885767248516769e-06, "loss": 33.9285, "step": 249490 }, { "epoch": 0.5040057854612007, "grad_norm": 580.9506225585938, "learning_rate": 5.885423701597918e-06, "loss": 32.4289, "step": 249500 }, { "epoch": 0.5040259860938845, "grad_norm": 544.0473022460938, "learning_rate": 5.885080150363618e-06, "loss": 24.8667, "step": 249510 }, { "epoch": 0.5040461867265683, "grad_norm": 8.921372413635254, "learning_rate": 5.884736594815545e-06, "loss": 24.6336, "step": 249520 }, { "epoch": 0.5040663873592521, "grad_norm": 222.3411407470703, "learning_rate": 5.884393034955373e-06, "loss": 14.9024, "step": 249530 }, { "epoch": 0.504086587991936, "grad_norm": 44.301204681396484, "learning_rate": 5.8840494707847786e-06, "loss": 11.4704, "step": 249540 }, { "epoch": 0.5041067886246198, "grad_norm": 31.032039642333984, "learning_rate": 5.883705902305432e-06, "loss": 16.9202, "step": 249550 }, { "epoch": 0.5041269892573036, "grad_norm": 474.194091796875, "learning_rate": 5.8833623295190104e-06, "loss": 21.4345, "step": 249560 }, { "epoch": 0.5041471898899873, "grad_norm": 251.54615783691406, "learning_rate": 5.883018752427189e-06, "loss": 13.6385, "step": 249570 }, { "epoch": 0.5041673905226711, "grad_norm": 169.93524169921875, "learning_rate": 5.8826751710316395e-06, "loss": 14.5065, "step": 249580 }, { "epoch": 0.5041875911553549, "grad_norm": 838.94873046875, "learning_rate": 5.882331585334039e-06, "loss": 37.4746, "step": 249590 }, { "epoch": 0.5042077917880388, "grad_norm": 51.74504089355469, "learning_rate": 5.881987995336062e-06, "loss": 13.9399, "step": 249600 }, { "epoch": 0.5042279924207226, "grad_norm": 192.66714477539062, "learning_rate": 5.881644401039382e-06, "loss": 11.6571, "step": 249610 }, { "epoch": 0.5042481930534064, "grad_norm": 415.587158203125, "learning_rate": 5.881300802445675e-06, "loss": 14.427, "step": 249620 }, { "epoch": 0.5042683936860902, "grad_norm": 309.9430847167969, "learning_rate": 5.880957199556615e-06, "loss": 15.9198, "step": 249630 }, { "epoch": 0.504288594318774, "grad_norm": 108.81411743164062, "learning_rate": 5.880613592373874e-06, "loss": 9.4821, "step": 249640 }, { "epoch": 0.5043087949514579, "grad_norm": 152.8829345703125, "learning_rate": 5.880269980899132e-06, "loss": 11.9277, "step": 249650 }, { "epoch": 0.5043289955841417, "grad_norm": 237.3328094482422, "learning_rate": 5.879926365134059e-06, "loss": 14.0024, "step": 249660 }, { "epoch": 0.5043491962168255, "grad_norm": 0.9531915783882141, "learning_rate": 5.879582745080333e-06, "loss": 27.2982, "step": 249670 }, { "epoch": 0.5043693968495093, "grad_norm": 261.075439453125, "learning_rate": 5.879239120739626e-06, "loss": 17.5437, "step": 249680 }, { "epoch": 0.5043895974821931, "grad_norm": 191.22848510742188, "learning_rate": 5.878895492113614e-06, "loss": 17.3497, "step": 249690 }, { "epoch": 0.504409798114877, "grad_norm": 691.9635620117188, "learning_rate": 5.878551859203974e-06, "loss": 21.632, "step": 249700 }, { "epoch": 0.5044299987475608, "grad_norm": 16.918500900268555, "learning_rate": 5.878208222012377e-06, "loss": 15.4163, "step": 249710 }, { "epoch": 0.5044501993802446, "grad_norm": 494.54876708984375, "learning_rate": 5.8778645805405e-06, "loss": 26.2904, "step": 249720 }, { "epoch": 0.5044704000129284, "grad_norm": 244.71900939941406, "learning_rate": 5.8775209347900174e-06, "loss": 15.5968, "step": 249730 }, { "epoch": 0.5044906006456122, "grad_norm": 250.85565185546875, "learning_rate": 5.877177284762605e-06, "loss": 26.1053, "step": 249740 }, { "epoch": 0.5045108012782961, "grad_norm": 191.349365234375, "learning_rate": 5.876833630459936e-06, "loss": 10.1093, "step": 249750 }, { "epoch": 0.5045310019109799, "grad_norm": 173.04483032226562, "learning_rate": 5.876489971883688e-06, "loss": 18.5176, "step": 249760 }, { "epoch": 0.5045512025436637, "grad_norm": 179.32565307617188, "learning_rate": 5.876146309035532e-06, "loss": 12.4875, "step": 249770 }, { "epoch": 0.5045714031763475, "grad_norm": 78.02918243408203, "learning_rate": 5.8758026419171455e-06, "loss": 24.6649, "step": 249780 }, { "epoch": 0.5045916038090313, "grad_norm": 160.8350067138672, "learning_rate": 5.875458970530204e-06, "loss": 22.3923, "step": 249790 }, { "epoch": 0.5046118044417152, "grad_norm": 8.855094909667969, "learning_rate": 5.8751152948763815e-06, "loss": 12.9, "step": 249800 }, { "epoch": 0.504632005074399, "grad_norm": 163.60888671875, "learning_rate": 5.874771614957353e-06, "loss": 12.6746, "step": 249810 }, { "epoch": 0.5046522057070827, "grad_norm": 389.1127014160156, "learning_rate": 5.874427930774792e-06, "loss": 22.7989, "step": 249820 }, { "epoch": 0.5046724063397665, "grad_norm": 582.890380859375, "learning_rate": 5.874084242330378e-06, "loss": 23.5356, "step": 249830 }, { "epoch": 0.5046926069724503, "grad_norm": 179.73886108398438, "learning_rate": 5.873740549625783e-06, "loss": 13.0105, "step": 249840 }, { "epoch": 0.5047128076051342, "grad_norm": 348.8857421875, "learning_rate": 5.873396852662682e-06, "loss": 20.6109, "step": 249850 }, { "epoch": 0.504733008237818, "grad_norm": 179.30072021484375, "learning_rate": 5.873053151442749e-06, "loss": 26.5666, "step": 249860 }, { "epoch": 0.5047532088705018, "grad_norm": 369.7748107910156, "learning_rate": 5.872709445967662e-06, "loss": 22.1963, "step": 249870 }, { "epoch": 0.5047734095031856, "grad_norm": 164.04733276367188, "learning_rate": 5.872365736239097e-06, "loss": 7.1079, "step": 249880 }, { "epoch": 0.5047936101358694, "grad_norm": 343.9015197753906, "learning_rate": 5.872022022258726e-06, "loss": 26.1716, "step": 249890 }, { "epoch": 0.5048138107685533, "grad_norm": 288.1936340332031, "learning_rate": 5.871678304028224e-06, "loss": 13.3287, "step": 249900 }, { "epoch": 0.5048340114012371, "grad_norm": 178.91348266601562, "learning_rate": 5.8713345815492695e-06, "loss": 10.9531, "step": 249910 }, { "epoch": 0.5048542120339209, "grad_norm": 165.40403747558594, "learning_rate": 5.8709908548235365e-06, "loss": 12.9498, "step": 249920 }, { "epoch": 0.5048744126666047, "grad_norm": 257.7190246582031, "learning_rate": 5.870647123852696e-06, "loss": 16.8928, "step": 249930 }, { "epoch": 0.5048946132992885, "grad_norm": 236.89059448242188, "learning_rate": 5.870303388638431e-06, "loss": 14.7917, "step": 249940 }, { "epoch": 0.5049148139319724, "grad_norm": 156.11349487304688, "learning_rate": 5.86995964918241e-06, "loss": 17.2748, "step": 249950 }, { "epoch": 0.5049350145646562, "grad_norm": 342.63134765625, "learning_rate": 5.869615905486313e-06, "loss": 17.1683, "step": 249960 }, { "epoch": 0.50495521519734, "grad_norm": 86.48170471191406, "learning_rate": 5.869272157551814e-06, "loss": 18.3557, "step": 249970 }, { "epoch": 0.5049754158300238, "grad_norm": 352.78057861328125, "learning_rate": 5.868928405380585e-06, "loss": 19.3473, "step": 249980 }, { "epoch": 0.5049956164627076, "grad_norm": 382.2410583496094, "learning_rate": 5.868584648974308e-06, "loss": 13.5074, "step": 249990 }, { "epoch": 0.5050158170953915, "grad_norm": 7.6236138343811035, "learning_rate": 5.8682408883346535e-06, "loss": 18.291, "step": 250000 }, { "epoch": 0.5050360177280753, "grad_norm": 1853.5093994140625, "learning_rate": 5.8678971234632965e-06, "loss": 32.3856, "step": 250010 }, { "epoch": 0.5050562183607591, "grad_norm": 318.5978698730469, "learning_rate": 5.8675533543619155e-06, "loss": 9.8527, "step": 250020 }, { "epoch": 0.5050764189934429, "grad_norm": 630.1383666992188, "learning_rate": 5.867209581032184e-06, "loss": 40.2825, "step": 250030 }, { "epoch": 0.5050966196261267, "grad_norm": 292.3376159667969, "learning_rate": 5.8668658034757795e-06, "loss": 16.3755, "step": 250040 }, { "epoch": 0.5051168202588106, "grad_norm": 482.740478515625, "learning_rate": 5.866522021694376e-06, "loss": 20.5611, "step": 250050 }, { "epoch": 0.5051370208914944, "grad_norm": 205.50979614257812, "learning_rate": 5.866178235689648e-06, "loss": 17.5235, "step": 250060 }, { "epoch": 0.5051572215241782, "grad_norm": 285.65057373046875, "learning_rate": 5.865834445463273e-06, "loss": 15.8877, "step": 250070 }, { "epoch": 0.5051774221568619, "grad_norm": 29.13956069946289, "learning_rate": 5.865490651016927e-06, "loss": 20.7125, "step": 250080 }, { "epoch": 0.5051976227895457, "grad_norm": 175.9360809326172, "learning_rate": 5.865146852352283e-06, "loss": 18.7574, "step": 250090 }, { "epoch": 0.5052178234222295, "grad_norm": 536.719482421875, "learning_rate": 5.8648030494710195e-06, "loss": 16.4514, "step": 250100 }, { "epoch": 0.5052380240549134, "grad_norm": 875.5555419921875, "learning_rate": 5.864459242374809e-06, "loss": 27.1379, "step": 250110 }, { "epoch": 0.5052582246875972, "grad_norm": 581.74609375, "learning_rate": 5.86411543106533e-06, "loss": 17.6065, "step": 250120 }, { "epoch": 0.505278425320281, "grad_norm": 82.6348876953125, "learning_rate": 5.863771615544258e-06, "loss": 10.1244, "step": 250130 }, { "epoch": 0.5052986259529648, "grad_norm": 231.6234588623047, "learning_rate": 5.863427795813266e-06, "loss": 24.5023, "step": 250140 }, { "epoch": 0.5053188265856486, "grad_norm": 368.0700378417969, "learning_rate": 5.863083971874034e-06, "loss": 17.7294, "step": 250150 }, { "epoch": 0.5053390272183325, "grad_norm": 393.5963134765625, "learning_rate": 5.8627401437282334e-06, "loss": 26.2472, "step": 250160 }, { "epoch": 0.5053592278510163, "grad_norm": 185.78802490234375, "learning_rate": 5.862396311377543e-06, "loss": 13.2243, "step": 250170 }, { "epoch": 0.5053794284837001, "grad_norm": 0.0, "learning_rate": 5.862052474823637e-06, "loss": 20.5273, "step": 250180 }, { "epoch": 0.5053996291163839, "grad_norm": 184.67222595214844, "learning_rate": 5.861708634068193e-06, "loss": 15.9827, "step": 250190 }, { "epoch": 0.5054198297490677, "grad_norm": 329.6484069824219, "learning_rate": 5.8613647891128845e-06, "loss": 14.5813, "step": 250200 }, { "epoch": 0.5054400303817516, "grad_norm": 38.245479583740234, "learning_rate": 5.861020939959389e-06, "loss": 13.5024, "step": 250210 }, { "epoch": 0.5054602310144354, "grad_norm": 447.1597900390625, "learning_rate": 5.860677086609381e-06, "loss": 14.9755, "step": 250220 }, { "epoch": 0.5054804316471192, "grad_norm": 204.7637176513672, "learning_rate": 5.860333229064539e-06, "loss": 12.5741, "step": 250230 }, { "epoch": 0.505500632279803, "grad_norm": 341.10003662109375, "learning_rate": 5.859989367326535e-06, "loss": 15.4351, "step": 250240 }, { "epoch": 0.5055208329124868, "grad_norm": 490.8084716796875, "learning_rate": 5.859645501397048e-06, "loss": 12.9879, "step": 250250 }, { "epoch": 0.5055410335451707, "grad_norm": 274.2659606933594, "learning_rate": 5.859301631277754e-06, "loss": 26.0466, "step": 250260 }, { "epoch": 0.5055612341778545, "grad_norm": 140.74876403808594, "learning_rate": 5.858957756970326e-06, "loss": 21.6979, "step": 250270 }, { "epoch": 0.5055814348105383, "grad_norm": 232.7562713623047, "learning_rate": 5.858613878476445e-06, "loss": 15.0475, "step": 250280 }, { "epoch": 0.5056016354432221, "grad_norm": 192.58958435058594, "learning_rate": 5.858269995797781e-06, "loss": 11.7238, "step": 250290 }, { "epoch": 0.5056218360759059, "grad_norm": 402.9053039550781, "learning_rate": 5.857926108936015e-06, "loss": 35.6107, "step": 250300 }, { "epoch": 0.5056420367085898, "grad_norm": 209.80303955078125, "learning_rate": 5.8575822178928225e-06, "loss": 20.4901, "step": 250310 }, { "epoch": 0.5056622373412736, "grad_norm": 417.4851379394531, "learning_rate": 5.857238322669875e-06, "loss": 21.0044, "step": 250320 }, { "epoch": 0.5056824379739574, "grad_norm": 324.1552734375, "learning_rate": 5.8568944232688554e-06, "loss": 15.9117, "step": 250330 }, { "epoch": 0.5057026386066411, "grad_norm": 223.3287353515625, "learning_rate": 5.856550519691433e-06, "loss": 23.4425, "step": 250340 }, { "epoch": 0.5057228392393249, "grad_norm": 917.8973999023438, "learning_rate": 5.856206611939289e-06, "loss": 33.3792, "step": 250350 }, { "epoch": 0.5057430398720087, "grad_norm": 422.9471435546875, "learning_rate": 5.855862700014096e-06, "loss": 12.0189, "step": 250360 }, { "epoch": 0.5057632405046926, "grad_norm": 99.64682006835938, "learning_rate": 5.855518783917535e-06, "loss": 10.6516, "step": 250370 }, { "epoch": 0.5057834411373764, "grad_norm": 166.2522735595703, "learning_rate": 5.855174863651279e-06, "loss": 21.7713, "step": 250380 }, { "epoch": 0.5058036417700602, "grad_norm": 0.0, "learning_rate": 5.854830939217002e-06, "loss": 14.7901, "step": 250390 }, { "epoch": 0.505823842402744, "grad_norm": 545.0646362304688, "learning_rate": 5.854487010616384e-06, "loss": 25.391, "step": 250400 }, { "epoch": 0.5058440430354278, "grad_norm": 257.313232421875, "learning_rate": 5.8541430778511e-06, "loss": 18.5086, "step": 250410 }, { "epoch": 0.5058642436681117, "grad_norm": 3.576425552368164, "learning_rate": 5.853799140922827e-06, "loss": 11.6986, "step": 250420 }, { "epoch": 0.5058844443007955, "grad_norm": 351.53436279296875, "learning_rate": 5.853455199833238e-06, "loss": 16.4068, "step": 250430 }, { "epoch": 0.5059046449334793, "grad_norm": 149.59275817871094, "learning_rate": 5.853111254584014e-06, "loss": 20.5959, "step": 250440 }, { "epoch": 0.5059248455661631, "grad_norm": 307.71148681640625, "learning_rate": 5.852767305176829e-06, "loss": 15.426, "step": 250450 }, { "epoch": 0.505945046198847, "grad_norm": 327.72222900390625, "learning_rate": 5.852423351613359e-06, "loss": 22.3823, "step": 250460 }, { "epoch": 0.5059652468315308, "grad_norm": 95.00647735595703, "learning_rate": 5.852079393895281e-06, "loss": 13.5204, "step": 250470 }, { "epoch": 0.5059854474642146, "grad_norm": 423.1249694824219, "learning_rate": 5.85173543202427e-06, "loss": 22.79, "step": 250480 }, { "epoch": 0.5060056480968984, "grad_norm": 3.532135248184204, "learning_rate": 5.851391466002008e-06, "loss": 21.6637, "step": 250490 }, { "epoch": 0.5060258487295822, "grad_norm": 249.99795532226562, "learning_rate": 5.851047495830163e-06, "loss": 23.0233, "step": 250500 }, { "epoch": 0.506046049362266, "grad_norm": 391.09161376953125, "learning_rate": 5.850703521510418e-06, "loss": 28.2018, "step": 250510 }, { "epoch": 0.5060662499949499, "grad_norm": 290.35113525390625, "learning_rate": 5.850359543044446e-06, "loss": 12.5036, "step": 250520 }, { "epoch": 0.5060864506276337, "grad_norm": 333.9122619628906, "learning_rate": 5.850015560433926e-06, "loss": 20.2489, "step": 250530 }, { "epoch": 0.5061066512603175, "grad_norm": 235.20802307128906, "learning_rate": 5.849671573680532e-06, "loss": 9.9937, "step": 250540 }, { "epoch": 0.5061268518930013, "grad_norm": 259.4632873535156, "learning_rate": 5.849327582785943e-06, "loss": 12.8588, "step": 250550 }, { "epoch": 0.5061470525256851, "grad_norm": 179.0106964111328, "learning_rate": 5.848983587751833e-06, "loss": 26.1671, "step": 250560 }, { "epoch": 0.506167253158369, "grad_norm": 347.5480651855469, "learning_rate": 5.848639588579881e-06, "loss": 19.3995, "step": 250570 }, { "epoch": 0.5061874537910528, "grad_norm": 242.48777770996094, "learning_rate": 5.848295585271764e-06, "loss": 21.9617, "step": 250580 }, { "epoch": 0.5062076544237365, "grad_norm": 271.5162048339844, "learning_rate": 5.847951577829153e-06, "loss": 13.8296, "step": 250590 }, { "epoch": 0.5062278550564203, "grad_norm": 216.37660217285156, "learning_rate": 5.847607566253732e-06, "loss": 20.1218, "step": 250600 }, { "epoch": 0.5062480556891041, "grad_norm": 293.5303955078125, "learning_rate": 5.847263550547174e-06, "loss": 10.1037, "step": 250610 }, { "epoch": 0.506268256321788, "grad_norm": 277.8330078125, "learning_rate": 5.8469195307111555e-06, "loss": 19.9254, "step": 250620 }, { "epoch": 0.5062884569544718, "grad_norm": 250.25103759765625, "learning_rate": 5.846575506747355e-06, "loss": 18.2864, "step": 250630 }, { "epoch": 0.5063086575871556, "grad_norm": 92.2562026977539, "learning_rate": 5.846231478657447e-06, "loss": 12.5958, "step": 250640 }, { "epoch": 0.5063288582198394, "grad_norm": 139.2681884765625, "learning_rate": 5.8458874464431115e-06, "loss": 15.9558, "step": 250650 }, { "epoch": 0.5063490588525232, "grad_norm": 265.2603759765625, "learning_rate": 5.845543410106021e-06, "loss": 27.521, "step": 250660 }, { "epoch": 0.5063692594852071, "grad_norm": 305.817138671875, "learning_rate": 5.845199369647856e-06, "loss": 20.0973, "step": 250670 }, { "epoch": 0.5063894601178909, "grad_norm": 244.6973114013672, "learning_rate": 5.84485532507029e-06, "loss": 10.5939, "step": 250680 }, { "epoch": 0.5064096607505747, "grad_norm": 241.13597106933594, "learning_rate": 5.844511276375003e-06, "loss": 17.8674, "step": 250690 }, { "epoch": 0.5064298613832585, "grad_norm": 411.2794189453125, "learning_rate": 5.844167223563669e-06, "loss": 15.3619, "step": 250700 }, { "epoch": 0.5064500620159423, "grad_norm": 247.901611328125, "learning_rate": 5.8438231666379685e-06, "loss": 11.3977, "step": 250710 }, { "epoch": 0.5064702626486262, "grad_norm": 143.85256958007812, "learning_rate": 5.843479105599576e-06, "loss": 20.9519, "step": 250720 }, { "epoch": 0.50649046328131, "grad_norm": 22.255630493164062, "learning_rate": 5.843135040450168e-06, "loss": 16.7048, "step": 250730 }, { "epoch": 0.5065106639139938, "grad_norm": 118.3250961303711, "learning_rate": 5.842790971191422e-06, "loss": 18.7568, "step": 250740 }, { "epoch": 0.5065308645466776, "grad_norm": 277.5435791015625, "learning_rate": 5.842446897825014e-06, "loss": 18.4252, "step": 250750 }, { "epoch": 0.5065510651793614, "grad_norm": 352.3453063964844, "learning_rate": 5.842102820352623e-06, "loss": 13.6083, "step": 250760 }, { "epoch": 0.5065712658120453, "grad_norm": 181.1653289794922, "learning_rate": 5.841758738775923e-06, "loss": 13.4495, "step": 250770 }, { "epoch": 0.5065914664447291, "grad_norm": 282.04034423828125, "learning_rate": 5.841414653096597e-06, "loss": 23.5024, "step": 250780 }, { "epoch": 0.5066116670774129, "grad_norm": 263.9112548828125, "learning_rate": 5.841070563316316e-06, "loss": 20.0642, "step": 250790 }, { "epoch": 0.5066318677100967, "grad_norm": 2.108936309814453, "learning_rate": 5.840726469436758e-06, "loss": 19.6314, "step": 250800 }, { "epoch": 0.5066520683427805, "grad_norm": 200.79859924316406, "learning_rate": 5.840382371459603e-06, "loss": 10.5214, "step": 250810 }, { "epoch": 0.5066722689754644, "grad_norm": 187.30540466308594, "learning_rate": 5.8400382693865255e-06, "loss": 20.1009, "step": 250820 }, { "epoch": 0.5066924696081482, "grad_norm": 95.83682250976562, "learning_rate": 5.839694163219203e-06, "loss": 12.9878, "step": 250830 }, { "epoch": 0.506712670240832, "grad_norm": 570.0504760742188, "learning_rate": 5.839350052959313e-06, "loss": 24.0431, "step": 250840 }, { "epoch": 0.5067328708735157, "grad_norm": 55.68435287475586, "learning_rate": 5.839005938608533e-06, "loss": 12.3211, "step": 250850 }, { "epoch": 0.5067530715061995, "grad_norm": 274.0301513671875, "learning_rate": 5.838661820168539e-06, "loss": 19.1913, "step": 250860 }, { "epoch": 0.5067732721388833, "grad_norm": 174.39328002929688, "learning_rate": 5.838317697641011e-06, "loss": 12.3867, "step": 250870 }, { "epoch": 0.5067934727715672, "grad_norm": 318.1181945800781, "learning_rate": 5.837973571027621e-06, "loss": 28.3988, "step": 250880 }, { "epoch": 0.506813673404251, "grad_norm": 172.2155303955078, "learning_rate": 5.837629440330053e-06, "loss": 16.2557, "step": 250890 }, { "epoch": 0.5068338740369348, "grad_norm": 148.06532287597656, "learning_rate": 5.837285305549978e-06, "loss": 18.4491, "step": 250900 }, { "epoch": 0.5068540746696186, "grad_norm": 415.7568359375, "learning_rate": 5.836941166689077e-06, "loss": 25.7715, "step": 250910 }, { "epoch": 0.5068742753023024, "grad_norm": 144.23583984375, "learning_rate": 5.836597023749028e-06, "loss": 6.5707, "step": 250920 }, { "epoch": 0.5068944759349863, "grad_norm": 178.6160125732422, "learning_rate": 5.836252876731503e-06, "loss": 17.9103, "step": 250930 }, { "epoch": 0.5069146765676701, "grad_norm": 339.21038818359375, "learning_rate": 5.835908725638186e-06, "loss": 26.5561, "step": 250940 }, { "epoch": 0.5069348772003539, "grad_norm": 186.62161254882812, "learning_rate": 5.83556457047075e-06, "loss": 22.3863, "step": 250950 }, { "epoch": 0.5069550778330377, "grad_norm": 25.5028018951416, "learning_rate": 5.835220411230873e-06, "loss": 22.0245, "step": 250960 }, { "epoch": 0.5069752784657215, "grad_norm": 434.65399169921875, "learning_rate": 5.834876247920233e-06, "loss": 12.4999, "step": 250970 }, { "epoch": 0.5069954790984054, "grad_norm": 283.5364685058594, "learning_rate": 5.83453208054051e-06, "loss": 17.221, "step": 250980 }, { "epoch": 0.5070156797310892, "grad_norm": 336.2869873046875, "learning_rate": 5.834187909093376e-06, "loss": 20.7049, "step": 250990 }, { "epoch": 0.507035880363773, "grad_norm": 150.5238494873047, "learning_rate": 5.8338437335805124e-06, "loss": 13.6199, "step": 251000 }, { "epoch": 0.5070560809964568, "grad_norm": 143.8397216796875, "learning_rate": 5.833499554003596e-06, "loss": 15.0091, "step": 251010 }, { "epoch": 0.5070762816291406, "grad_norm": 181.44920349121094, "learning_rate": 5.833155370364302e-06, "loss": 13.0005, "step": 251020 }, { "epoch": 0.5070964822618245, "grad_norm": 373.6978759765625, "learning_rate": 5.832811182664312e-06, "loss": 12.217, "step": 251030 }, { "epoch": 0.5071166828945083, "grad_norm": 501.3938293457031, "learning_rate": 5.832466990905299e-06, "loss": 15.6599, "step": 251040 }, { "epoch": 0.5071368835271921, "grad_norm": 458.93182373046875, "learning_rate": 5.8321227950889455e-06, "loss": 19.6598, "step": 251050 }, { "epoch": 0.5071570841598759, "grad_norm": 235.57904052734375, "learning_rate": 5.8317785952169245e-06, "loss": 15.9404, "step": 251060 }, { "epoch": 0.5071772847925597, "grad_norm": 218.45022583007812, "learning_rate": 5.8314343912909165e-06, "loss": 18.5912, "step": 251070 }, { "epoch": 0.5071974854252436, "grad_norm": 210.4546661376953, "learning_rate": 5.831090183312599e-06, "loss": 8.6518, "step": 251080 }, { "epoch": 0.5072176860579274, "grad_norm": 156.12413024902344, "learning_rate": 5.830745971283646e-06, "loss": 29.3029, "step": 251090 }, { "epoch": 0.5072378866906111, "grad_norm": 667.841796875, "learning_rate": 5.83040175520574e-06, "loss": 24.5204, "step": 251100 }, { "epoch": 0.5072580873232949, "grad_norm": 492.423828125, "learning_rate": 5.8300575350805555e-06, "loss": 17.5975, "step": 251110 }, { "epoch": 0.5072782879559787, "grad_norm": 366.7998352050781, "learning_rate": 5.8297133109097715e-06, "loss": 21.4545, "step": 251120 }, { "epoch": 0.5072984885886626, "grad_norm": 552.1708984375, "learning_rate": 5.829369082695066e-06, "loss": 30.1371, "step": 251130 }, { "epoch": 0.5073186892213464, "grad_norm": 437.21240234375, "learning_rate": 5.8290248504381165e-06, "loss": 16.9496, "step": 251140 }, { "epoch": 0.5073388898540302, "grad_norm": 222.89244079589844, "learning_rate": 5.828680614140599e-06, "loss": 16.5723, "step": 251150 }, { "epoch": 0.507359090486714, "grad_norm": 97.03450775146484, "learning_rate": 5.8283363738041945e-06, "loss": 21.9588, "step": 251160 }, { "epoch": 0.5073792911193978, "grad_norm": 254.39234924316406, "learning_rate": 5.827992129430578e-06, "loss": 27.0806, "step": 251170 }, { "epoch": 0.5073994917520817, "grad_norm": 372.87969970703125, "learning_rate": 5.827647881021428e-06, "loss": 7.3879, "step": 251180 }, { "epoch": 0.5074196923847655, "grad_norm": 369.1946105957031, "learning_rate": 5.827303628578424e-06, "loss": 14.1785, "step": 251190 }, { "epoch": 0.5074398930174493, "grad_norm": 74.46592712402344, "learning_rate": 5.826959372103239e-06, "loss": 21.9841, "step": 251200 }, { "epoch": 0.5074600936501331, "grad_norm": 229.06613159179688, "learning_rate": 5.826615111597558e-06, "loss": 17.2332, "step": 251210 }, { "epoch": 0.5074802942828169, "grad_norm": 184.9846649169922, "learning_rate": 5.826270847063053e-06, "loss": 25.9342, "step": 251220 }, { "epoch": 0.5075004949155008, "grad_norm": 546.110595703125, "learning_rate": 5.8259265785014054e-06, "loss": 28.6547, "step": 251230 }, { "epoch": 0.5075206955481846, "grad_norm": 179.9212188720703, "learning_rate": 5.82558230591429e-06, "loss": 18.6759, "step": 251240 }, { "epoch": 0.5075408961808684, "grad_norm": 348.24444580078125, "learning_rate": 5.825238029303388e-06, "loss": 21.5477, "step": 251250 }, { "epoch": 0.5075610968135522, "grad_norm": 326.4447937011719, "learning_rate": 5.824893748670377e-06, "loss": 16.1654, "step": 251260 }, { "epoch": 0.507581297446236, "grad_norm": 287.4835510253906, "learning_rate": 5.824549464016933e-06, "loss": 25.1001, "step": 251270 }, { "epoch": 0.5076014980789199, "grad_norm": 2.0921356678009033, "learning_rate": 5.824205175344735e-06, "loss": 22.9859, "step": 251280 }, { "epoch": 0.5076216987116037, "grad_norm": 213.4917449951172, "learning_rate": 5.82386088265546e-06, "loss": 27.8893, "step": 251290 }, { "epoch": 0.5076418993442875, "grad_norm": 365.5149841308594, "learning_rate": 5.823516585950787e-06, "loss": 24.626, "step": 251300 }, { "epoch": 0.5076620999769713, "grad_norm": 390.4840087890625, "learning_rate": 5.823172285232394e-06, "loss": 18.145, "step": 251310 }, { "epoch": 0.5076823006096551, "grad_norm": 567.1654052734375, "learning_rate": 5.822827980501962e-06, "loss": 19.8141, "step": 251320 }, { "epoch": 0.507702501242339, "grad_norm": 280.54766845703125, "learning_rate": 5.822483671761164e-06, "loss": 12.6622, "step": 251330 }, { "epoch": 0.5077227018750228, "grad_norm": 267.04656982421875, "learning_rate": 5.82213935901168e-06, "loss": 12.5432, "step": 251340 }, { "epoch": 0.5077429025077066, "grad_norm": 279.6389465332031, "learning_rate": 5.821795042255189e-06, "loss": 21.4058, "step": 251350 }, { "epoch": 0.5077631031403903, "grad_norm": 158.29808044433594, "learning_rate": 5.8214507214933666e-06, "loss": 17.4758, "step": 251360 }, { "epoch": 0.5077833037730741, "grad_norm": 249.4699249267578, "learning_rate": 5.821106396727897e-06, "loss": 11.4371, "step": 251370 }, { "epoch": 0.5078035044057579, "grad_norm": 334.7232360839844, "learning_rate": 5.820762067960451e-06, "loss": 24.7948, "step": 251380 }, { "epoch": 0.5078237050384418, "grad_norm": 286.2801513671875, "learning_rate": 5.820417735192712e-06, "loss": 28.0708, "step": 251390 }, { "epoch": 0.5078439056711256, "grad_norm": 146.02745056152344, "learning_rate": 5.8200733984263556e-06, "loss": 23.2296, "step": 251400 }, { "epoch": 0.5078641063038094, "grad_norm": 268.02978515625, "learning_rate": 5.819729057663062e-06, "loss": 24.2651, "step": 251410 }, { "epoch": 0.5078843069364932, "grad_norm": 260.49273681640625, "learning_rate": 5.819384712904508e-06, "loss": 17.0353, "step": 251420 }, { "epoch": 0.507904507569177, "grad_norm": 164.01153564453125, "learning_rate": 5.819040364152372e-06, "loss": 25.774, "step": 251430 }, { "epoch": 0.5079247082018609, "grad_norm": 347.1624755859375, "learning_rate": 5.8186960114083325e-06, "loss": 14.6928, "step": 251440 }, { "epoch": 0.5079449088345447, "grad_norm": 149.4549102783203, "learning_rate": 5.818351654674067e-06, "loss": 14.3268, "step": 251450 }, { "epoch": 0.5079651094672285, "grad_norm": 76.93974304199219, "learning_rate": 5.818007293951255e-06, "loss": 29.9636, "step": 251460 }, { "epoch": 0.5079853100999123, "grad_norm": 0.0, "learning_rate": 5.817662929241576e-06, "loss": 24.1277, "step": 251470 }, { "epoch": 0.5080055107325961, "grad_norm": 478.0287170410156, "learning_rate": 5.817318560546708e-06, "loss": 37.8519, "step": 251480 }, { "epoch": 0.50802571136528, "grad_norm": 296.1255798339844, "learning_rate": 5.8169741878683265e-06, "loss": 13.1375, "step": 251490 }, { "epoch": 0.5080459119979638, "grad_norm": 2.960360050201416, "learning_rate": 5.816629811208112e-06, "loss": 16.4742, "step": 251500 }, { "epoch": 0.5080661126306476, "grad_norm": 78.4245834350586, "learning_rate": 5.816285430567743e-06, "loss": 18.9215, "step": 251510 }, { "epoch": 0.5080863132633314, "grad_norm": 139.4978790283203, "learning_rate": 5.815941045948898e-06, "loss": 23.0018, "step": 251520 }, { "epoch": 0.5081065138960152, "grad_norm": 491.62841796875, "learning_rate": 5.815596657353257e-06, "loss": 11.4281, "step": 251530 }, { "epoch": 0.5081267145286991, "grad_norm": 53.78145217895508, "learning_rate": 5.815252264782493e-06, "loss": 19.9552, "step": 251540 }, { "epoch": 0.5081469151613829, "grad_norm": 414.7879638671875, "learning_rate": 5.814907868238291e-06, "loss": 49.6362, "step": 251550 }, { "epoch": 0.5081671157940667, "grad_norm": 105.06440734863281, "learning_rate": 5.814563467722328e-06, "loss": 17.4304, "step": 251560 }, { "epoch": 0.5081873164267505, "grad_norm": 340.1830139160156, "learning_rate": 5.8142190632362785e-06, "loss": 30.1838, "step": 251570 }, { "epoch": 0.5082075170594343, "grad_norm": 406.2680358886719, "learning_rate": 5.813874654781825e-06, "loss": 18.0728, "step": 251580 }, { "epoch": 0.5082277176921182, "grad_norm": 274.26019287109375, "learning_rate": 5.813530242360647e-06, "loss": 12.4804, "step": 251590 }, { "epoch": 0.508247918324802, "grad_norm": 217.36582946777344, "learning_rate": 5.813185825974419e-06, "loss": 19.3986, "step": 251600 }, { "epoch": 0.5082681189574858, "grad_norm": 15.314507484436035, "learning_rate": 5.812841405624823e-06, "loss": 12.2681, "step": 251610 }, { "epoch": 0.5082883195901695, "grad_norm": 617.7559204101562, "learning_rate": 5.812496981313536e-06, "loss": 23.1795, "step": 251620 }, { "epoch": 0.5083085202228533, "grad_norm": 359.1141662597656, "learning_rate": 5.8121525530422375e-06, "loss": 22.951, "step": 251630 }, { "epoch": 0.5083287208555372, "grad_norm": 332.9979248046875, "learning_rate": 5.811808120812607e-06, "loss": 14.8693, "step": 251640 }, { "epoch": 0.508348921488221, "grad_norm": 178.17027282714844, "learning_rate": 5.811463684626319e-06, "loss": 15.059, "step": 251650 }, { "epoch": 0.5083691221209048, "grad_norm": 250.81533813476562, "learning_rate": 5.8111192444850586e-06, "loss": 17.4671, "step": 251660 }, { "epoch": 0.5083893227535886, "grad_norm": 537.250732421875, "learning_rate": 5.8107748003905e-06, "loss": 27.7206, "step": 251670 }, { "epoch": 0.5084095233862724, "grad_norm": 112.71495056152344, "learning_rate": 5.810430352344324e-06, "loss": 22.3311, "step": 251680 }, { "epoch": 0.5084297240189563, "grad_norm": 112.36412811279297, "learning_rate": 5.810085900348209e-06, "loss": 25.0356, "step": 251690 }, { "epoch": 0.5084499246516401, "grad_norm": 399.82135009765625, "learning_rate": 5.809741444403831e-06, "loss": 14.3363, "step": 251700 }, { "epoch": 0.5084701252843239, "grad_norm": 265.08221435546875, "learning_rate": 5.809396984512875e-06, "loss": 27.0822, "step": 251710 }, { "epoch": 0.5084903259170077, "grad_norm": 323.2629699707031, "learning_rate": 5.8090525206770145e-06, "loss": 16.1273, "step": 251720 }, { "epoch": 0.5085105265496915, "grad_norm": 157.2101287841797, "learning_rate": 5.808708052897931e-06, "loss": 16.8315, "step": 251730 }, { "epoch": 0.5085307271823754, "grad_norm": 82.2426986694336, "learning_rate": 5.808363581177301e-06, "loss": 18.6318, "step": 251740 }, { "epoch": 0.5085509278150592, "grad_norm": 61.487178802490234, "learning_rate": 5.8080191055168064e-06, "loss": 22.7295, "step": 251750 }, { "epoch": 0.508571128447743, "grad_norm": 167.49876403808594, "learning_rate": 5.807674625918125e-06, "loss": 14.2461, "step": 251760 }, { "epoch": 0.5085913290804268, "grad_norm": 233.8783416748047, "learning_rate": 5.807330142382934e-06, "loss": 15.2106, "step": 251770 }, { "epoch": 0.5086115297131106, "grad_norm": 260.9427185058594, "learning_rate": 5.806985654912915e-06, "loss": 24.6996, "step": 251780 }, { "epoch": 0.5086317303457945, "grad_norm": 60.19903564453125, "learning_rate": 5.806641163509744e-06, "loss": 13.068, "step": 251790 }, { "epoch": 0.5086519309784783, "grad_norm": 124.89225006103516, "learning_rate": 5.8062966681751046e-06, "loss": 9.8265, "step": 251800 }, { "epoch": 0.5086721316111621, "grad_norm": 168.75892639160156, "learning_rate": 5.805952168910669e-06, "loss": 18.7126, "step": 251810 }, { "epoch": 0.5086923322438459, "grad_norm": 142.0146026611328, "learning_rate": 5.805607665718124e-06, "loss": 21.4081, "step": 251820 }, { "epoch": 0.5087125328765297, "grad_norm": 183.4420623779297, "learning_rate": 5.805263158599143e-06, "loss": 11.9147, "step": 251830 }, { "epoch": 0.5087327335092136, "grad_norm": 301.62091064453125, "learning_rate": 5.804918647555408e-06, "loss": 19.1385, "step": 251840 }, { "epoch": 0.5087529341418974, "grad_norm": 608.1326904296875, "learning_rate": 5.8045741325885965e-06, "loss": 19.0388, "step": 251850 }, { "epoch": 0.5087731347745812, "grad_norm": 591.2579956054688, "learning_rate": 5.804229613700389e-06, "loss": 32.412, "step": 251860 }, { "epoch": 0.5087933354072649, "grad_norm": 84.89599609375, "learning_rate": 5.803885090892464e-06, "loss": 22.123, "step": 251870 }, { "epoch": 0.5088135360399487, "grad_norm": 236.92630004882812, "learning_rate": 5.8035405641665e-06, "loss": 11.7402, "step": 251880 }, { "epoch": 0.5088337366726325, "grad_norm": 605.5479736328125, "learning_rate": 5.803196033524176e-06, "loss": 21.6659, "step": 251890 }, { "epoch": 0.5088539373053164, "grad_norm": 0.0, "learning_rate": 5.802851498967173e-06, "loss": 16.8744, "step": 251900 }, { "epoch": 0.5088741379380002, "grad_norm": 787.18017578125, "learning_rate": 5.802506960497168e-06, "loss": 34.5116, "step": 251910 }, { "epoch": 0.508894338570684, "grad_norm": 131.16815185546875, "learning_rate": 5.802162418115842e-06, "loss": 18.5461, "step": 251920 }, { "epoch": 0.5089145392033678, "grad_norm": 379.4789123535156, "learning_rate": 5.801817871824876e-06, "loss": 16.4024, "step": 251930 }, { "epoch": 0.5089347398360516, "grad_norm": 497.5932312011719, "learning_rate": 5.801473321625944e-06, "loss": 7.3512, "step": 251940 }, { "epoch": 0.5089549404687355, "grad_norm": 449.67022705078125, "learning_rate": 5.80112876752073e-06, "loss": 19.6445, "step": 251950 }, { "epoch": 0.5089751411014193, "grad_norm": 410.890380859375, "learning_rate": 5.80078420951091e-06, "loss": 30.5156, "step": 251960 }, { "epoch": 0.5089953417341031, "grad_norm": 845.1141967773438, "learning_rate": 5.800439647598165e-06, "loss": 23.8294, "step": 251970 }, { "epoch": 0.5090155423667869, "grad_norm": 184.1002655029297, "learning_rate": 5.800095081784176e-06, "loss": 23.0964, "step": 251980 }, { "epoch": 0.5090357429994707, "grad_norm": 267.9649963378906, "learning_rate": 5.799750512070618e-06, "loss": 14.4452, "step": 251990 }, { "epoch": 0.5090559436321546, "grad_norm": 399.79718017578125, "learning_rate": 5.799405938459175e-06, "loss": 29.1811, "step": 252000 }, { "epoch": 0.5090761442648384, "grad_norm": 397.86322021484375, "learning_rate": 5.7990613609515235e-06, "loss": 18.8354, "step": 252010 }, { "epoch": 0.5090963448975222, "grad_norm": 175.89810180664062, "learning_rate": 5.798716779549344e-06, "loss": 31.0632, "step": 252020 }, { "epoch": 0.509116545530206, "grad_norm": 181.9753875732422, "learning_rate": 5.798372194254317e-06, "loss": 25.5333, "step": 252030 }, { "epoch": 0.5091367461628898, "grad_norm": 76.48158264160156, "learning_rate": 5.7980276050681195e-06, "loss": 18.1311, "step": 252040 }, { "epoch": 0.5091569467955737, "grad_norm": 348.2669372558594, "learning_rate": 5.797683011992432e-06, "loss": 11.1366, "step": 252050 }, { "epoch": 0.5091771474282575, "grad_norm": 132.4213104248047, "learning_rate": 5.797338415028934e-06, "loss": 26.2916, "step": 252060 }, { "epoch": 0.5091973480609413, "grad_norm": 27.393604278564453, "learning_rate": 5.796993814179307e-06, "loss": 22.2131, "step": 252070 }, { "epoch": 0.5092175486936251, "grad_norm": 5.225275993347168, "learning_rate": 5.796649209445227e-06, "loss": 14.6101, "step": 252080 }, { "epoch": 0.5092377493263089, "grad_norm": 504.0828552246094, "learning_rate": 5.7963046008283775e-06, "loss": 16.751, "step": 252090 }, { "epoch": 0.5092579499589928, "grad_norm": 301.7989501953125, "learning_rate": 5.795959988330434e-06, "loss": 22.4975, "step": 252100 }, { "epoch": 0.5092781505916766, "grad_norm": 382.95654296875, "learning_rate": 5.795615371953078e-06, "loss": 23.4574, "step": 252110 }, { "epoch": 0.5092983512243604, "grad_norm": 29.749422073364258, "learning_rate": 5.795270751697991e-06, "loss": 10.1176, "step": 252120 }, { "epoch": 0.5093185518570441, "grad_norm": 865.3911743164062, "learning_rate": 5.794926127566849e-06, "loss": 17.3412, "step": 252130 }, { "epoch": 0.5093387524897279, "grad_norm": 257.5775146484375, "learning_rate": 5.794581499561335e-06, "loss": 20.838, "step": 252140 }, { "epoch": 0.5093589531224118, "grad_norm": 315.5860290527344, "learning_rate": 5.794236867683125e-06, "loss": 15.2562, "step": 252150 }, { "epoch": 0.5093791537550956, "grad_norm": 296.7806091308594, "learning_rate": 5.793892231933903e-06, "loss": 15.704, "step": 252160 }, { "epoch": 0.5093993543877794, "grad_norm": 732.0755004882812, "learning_rate": 5.793547592315345e-06, "loss": 44.4594, "step": 252170 }, { "epoch": 0.5094195550204632, "grad_norm": 1590.49462890625, "learning_rate": 5.793202948829133e-06, "loss": 27.5048, "step": 252180 }, { "epoch": 0.509439755653147, "grad_norm": 381.807861328125, "learning_rate": 5.792858301476946e-06, "loss": 22.4006, "step": 252190 }, { "epoch": 0.5094599562858309, "grad_norm": 373.92926025390625, "learning_rate": 5.792513650260465e-06, "loss": 19.6564, "step": 252200 }, { "epoch": 0.5094801569185147, "grad_norm": 658.6917724609375, "learning_rate": 5.792168995181366e-06, "loss": 32.3448, "step": 252210 }, { "epoch": 0.5095003575511985, "grad_norm": 225.7948760986328, "learning_rate": 5.791824336241334e-06, "loss": 8.5778, "step": 252220 }, { "epoch": 0.5095205581838823, "grad_norm": 161.5819091796875, "learning_rate": 5.791479673442044e-06, "loss": 32.925, "step": 252230 }, { "epoch": 0.5095407588165661, "grad_norm": 247.9228973388672, "learning_rate": 5.791135006785179e-06, "loss": 11.4713, "step": 252240 }, { "epoch": 0.50956095944925, "grad_norm": 446.11541748046875, "learning_rate": 5.7907903362724195e-06, "loss": 23.019, "step": 252250 }, { "epoch": 0.5095811600819338, "grad_norm": 289.3345947265625, "learning_rate": 5.790445661905441e-06, "loss": 20.131, "step": 252260 }, { "epoch": 0.5096013607146176, "grad_norm": 119.7979507446289, "learning_rate": 5.790100983685928e-06, "loss": 14.9991, "step": 252270 }, { "epoch": 0.5096215613473014, "grad_norm": 435.15557861328125, "learning_rate": 5.789756301615558e-06, "loss": 15.8132, "step": 252280 }, { "epoch": 0.5096417619799852, "grad_norm": 418.84735107421875, "learning_rate": 5.7894116156960115e-06, "loss": 15.9891, "step": 252290 }, { "epoch": 0.509661962612669, "grad_norm": 291.987060546875, "learning_rate": 5.78906692592897e-06, "loss": 19.6317, "step": 252300 }, { "epoch": 0.5096821632453529, "grad_norm": 228.9920196533203, "learning_rate": 5.788722232316109e-06, "loss": 20.5863, "step": 252310 }, { "epoch": 0.5097023638780367, "grad_norm": 175.90408325195312, "learning_rate": 5.7883775348591146e-06, "loss": 16.7253, "step": 252320 }, { "epoch": 0.5097225645107205, "grad_norm": 398.8650207519531, "learning_rate": 5.788032833559661e-06, "loss": 11.1492, "step": 252330 }, { "epoch": 0.5097427651434043, "grad_norm": 427.85882568359375, "learning_rate": 5.787688128419433e-06, "loss": 16.6079, "step": 252340 }, { "epoch": 0.5097629657760882, "grad_norm": 167.2185516357422, "learning_rate": 5.787343419440108e-06, "loss": 13.8572, "step": 252350 }, { "epoch": 0.509783166408772, "grad_norm": 310.205810546875, "learning_rate": 5.786998706623365e-06, "loss": 23.7526, "step": 252360 }, { "epoch": 0.5098033670414558, "grad_norm": 271.3121337890625, "learning_rate": 5.786653989970889e-06, "loss": 11.5874, "step": 252370 }, { "epoch": 0.5098235676741395, "grad_norm": 339.94000244140625, "learning_rate": 5.786309269484355e-06, "loss": 21.5094, "step": 252380 }, { "epoch": 0.5098437683068233, "grad_norm": 434.64111328125, "learning_rate": 5.785964545165446e-06, "loss": 13.3356, "step": 252390 }, { "epoch": 0.5098639689395071, "grad_norm": 231.44146728515625, "learning_rate": 5.78561981701584e-06, "loss": 13.9666, "step": 252400 }, { "epoch": 0.509884169572191, "grad_norm": 251.97938537597656, "learning_rate": 5.785275085037218e-06, "loss": 21.2492, "step": 252410 }, { "epoch": 0.5099043702048748, "grad_norm": 483.2889709472656, "learning_rate": 5.7849303492312605e-06, "loss": 27.6086, "step": 252420 }, { "epoch": 0.5099245708375586, "grad_norm": 351.4258117675781, "learning_rate": 5.784585609599649e-06, "loss": 17.5145, "step": 252430 }, { "epoch": 0.5099447714702424, "grad_norm": 142.99046325683594, "learning_rate": 5.784240866144062e-06, "loss": 11.4723, "step": 252440 }, { "epoch": 0.5099649721029262, "grad_norm": 270.4617919921875, "learning_rate": 5.783896118866179e-06, "loss": 16.0982, "step": 252450 }, { "epoch": 0.5099851727356101, "grad_norm": 582.95166015625, "learning_rate": 5.783551367767683e-06, "loss": 24.2623, "step": 252460 }, { "epoch": 0.5100053733682939, "grad_norm": 0.0, "learning_rate": 5.783206612850251e-06, "loss": 13.5704, "step": 252470 }, { "epoch": 0.5100255740009777, "grad_norm": 109.71514892578125, "learning_rate": 5.782861854115567e-06, "loss": 17.1513, "step": 252480 }, { "epoch": 0.5100457746336615, "grad_norm": 111.66004943847656, "learning_rate": 5.782517091565308e-06, "loss": 17.606, "step": 252490 }, { "epoch": 0.5100659752663453, "grad_norm": 587.1206665039062, "learning_rate": 5.782172325201155e-06, "loss": 27.0883, "step": 252500 }, { "epoch": 0.5100861758990292, "grad_norm": 180.19126892089844, "learning_rate": 5.78182755502479e-06, "loss": 13.306, "step": 252510 }, { "epoch": 0.510106376531713, "grad_norm": 355.01422119140625, "learning_rate": 5.781482781037892e-06, "loss": 13.4748, "step": 252520 }, { "epoch": 0.5101265771643968, "grad_norm": 255.37890625, "learning_rate": 5.781138003242141e-06, "loss": 24.2507, "step": 252530 }, { "epoch": 0.5101467777970806, "grad_norm": 317.1413879394531, "learning_rate": 5.780793221639219e-06, "loss": 12.2112, "step": 252540 }, { "epoch": 0.5101669784297644, "grad_norm": 368.3630065917969, "learning_rate": 5.780448436230805e-06, "loss": 10.5696, "step": 252550 }, { "epoch": 0.5101871790624483, "grad_norm": 716.6698608398438, "learning_rate": 5.7801036470185815e-06, "loss": 37.78, "step": 252560 }, { "epoch": 0.5102073796951321, "grad_norm": 162.9932403564453, "learning_rate": 5.779758854004226e-06, "loss": 21.4951, "step": 252570 }, { "epoch": 0.5102275803278159, "grad_norm": 447.0302734375, "learning_rate": 5.77941405718942e-06, "loss": 16.4113, "step": 252580 }, { "epoch": 0.5102477809604997, "grad_norm": 356.1291809082031, "learning_rate": 5.779069256575846e-06, "loss": 34.7396, "step": 252590 }, { "epoch": 0.5102679815931835, "grad_norm": 171.63882446289062, "learning_rate": 5.778724452165181e-06, "loss": 17.7828, "step": 252600 }, { "epoch": 0.5102881822258674, "grad_norm": 493.62213134765625, "learning_rate": 5.7783796439591085e-06, "loss": 21.0078, "step": 252610 }, { "epoch": 0.5103083828585512, "grad_norm": 36.40460968017578, "learning_rate": 5.778034831959308e-06, "loss": 21.2271, "step": 252620 }, { "epoch": 0.510328583491235, "grad_norm": 357.38934326171875, "learning_rate": 5.77769001616746e-06, "loss": 34.0295, "step": 252630 }, { "epoch": 0.5103487841239187, "grad_norm": 153.60853576660156, "learning_rate": 5.777345196585247e-06, "loss": 24.96, "step": 252640 }, { "epoch": 0.5103689847566025, "grad_norm": 334.1118469238281, "learning_rate": 5.777000373214345e-06, "loss": 16.8896, "step": 252650 }, { "epoch": 0.5103891853892863, "grad_norm": 167.14508056640625, "learning_rate": 5.776655546056439e-06, "loss": 15.3726, "step": 252660 }, { "epoch": 0.5104093860219702, "grad_norm": 276.1703796386719, "learning_rate": 5.776310715113207e-06, "loss": 24.1061, "step": 252670 }, { "epoch": 0.510429586654654, "grad_norm": 534.8427734375, "learning_rate": 5.77596588038633e-06, "loss": 27.3373, "step": 252680 }, { "epoch": 0.5104497872873378, "grad_norm": 156.19293212890625, "learning_rate": 5.775621041877491e-06, "loss": 29.1591, "step": 252690 }, { "epoch": 0.5104699879200216, "grad_norm": 160.4987030029297, "learning_rate": 5.77527619958837e-06, "loss": 13.9708, "step": 252700 }, { "epoch": 0.5104901885527054, "grad_norm": 274.3448791503906, "learning_rate": 5.774931353520645e-06, "loss": 13.4434, "step": 252710 }, { "epoch": 0.5105103891853893, "grad_norm": 74.02042388916016, "learning_rate": 5.774586503676e-06, "loss": 13.4707, "step": 252720 }, { "epoch": 0.5105305898180731, "grad_norm": 303.2458801269531, "learning_rate": 5.774241650056114e-06, "loss": 27.6218, "step": 252730 }, { "epoch": 0.5105507904507569, "grad_norm": 194.05435180664062, "learning_rate": 5.773896792662666e-06, "loss": 22.4797, "step": 252740 }, { "epoch": 0.5105709910834407, "grad_norm": 502.05670166015625, "learning_rate": 5.773551931497342e-06, "loss": 22.4124, "step": 252750 }, { "epoch": 0.5105911917161245, "grad_norm": 432.9278564453125, "learning_rate": 5.773207066561817e-06, "loss": 17.6544, "step": 252760 }, { "epoch": 0.5106113923488084, "grad_norm": 897.4593505859375, "learning_rate": 5.772862197857776e-06, "loss": 23.6653, "step": 252770 }, { "epoch": 0.5106315929814922, "grad_norm": 214.57623291015625, "learning_rate": 5.772517325386898e-06, "loss": 7.53, "step": 252780 }, { "epoch": 0.510651793614176, "grad_norm": 317.4881286621094, "learning_rate": 5.772172449150865e-06, "loss": 15.7831, "step": 252790 }, { "epoch": 0.5106719942468598, "grad_norm": 193.80563354492188, "learning_rate": 5.771827569151357e-06, "loss": 16.8907, "step": 252800 }, { "epoch": 0.5106921948795436, "grad_norm": 362.6708068847656, "learning_rate": 5.771482685390053e-06, "loss": 19.6065, "step": 252810 }, { "epoch": 0.5107123955122275, "grad_norm": 208.90313720703125, "learning_rate": 5.7711377978686385e-06, "loss": 23.3499, "step": 252820 }, { "epoch": 0.5107325961449113, "grad_norm": 285.38824462890625, "learning_rate": 5.770792906588791e-06, "loss": 20.5698, "step": 252830 }, { "epoch": 0.5107527967775951, "grad_norm": 166.09811401367188, "learning_rate": 5.770448011552192e-06, "loss": 12.1161, "step": 252840 }, { "epoch": 0.5107729974102789, "grad_norm": 161.23663330078125, "learning_rate": 5.770103112760523e-06, "loss": 18.3146, "step": 252850 }, { "epoch": 0.5107931980429627, "grad_norm": 291.48529052734375, "learning_rate": 5.769758210215466e-06, "loss": 14.2107, "step": 252860 }, { "epoch": 0.5108133986756466, "grad_norm": 210.37660217285156, "learning_rate": 5.7694133039186986e-06, "loss": 18.561, "step": 252870 }, { "epoch": 0.5108335993083304, "grad_norm": 114.86100769042969, "learning_rate": 5.7690683938719065e-06, "loss": 15.0994, "step": 252880 }, { "epoch": 0.5108537999410141, "grad_norm": 162.49594116210938, "learning_rate": 5.7687234800767666e-06, "loss": 10.9558, "step": 252890 }, { "epoch": 0.5108740005736979, "grad_norm": 627.1798095703125, "learning_rate": 5.768378562534962e-06, "loss": 15.6655, "step": 252900 }, { "epoch": 0.5108942012063817, "grad_norm": 0.0, "learning_rate": 5.768033641248174e-06, "loss": 7.1724, "step": 252910 }, { "epoch": 0.5109144018390656, "grad_norm": 702.9738159179688, "learning_rate": 5.767688716218083e-06, "loss": 14.4627, "step": 252920 }, { "epoch": 0.5109346024717494, "grad_norm": 418.4978332519531, "learning_rate": 5.76734378744637e-06, "loss": 18.6617, "step": 252930 }, { "epoch": 0.5109548031044332, "grad_norm": 71.7748031616211, "learning_rate": 5.766998854934716e-06, "loss": 20.8422, "step": 252940 }, { "epoch": 0.510975003737117, "grad_norm": 149.43885803222656, "learning_rate": 5.766653918684803e-06, "loss": 17.9515, "step": 252950 }, { "epoch": 0.5109952043698008, "grad_norm": 154.11795043945312, "learning_rate": 5.766308978698313e-06, "loss": 16.0548, "step": 252960 }, { "epoch": 0.5110154050024847, "grad_norm": 79.58231353759766, "learning_rate": 5.765964034976924e-06, "loss": 17.9642, "step": 252970 }, { "epoch": 0.5110356056351685, "grad_norm": 179.7070770263672, "learning_rate": 5.765619087522322e-06, "loss": 13.9668, "step": 252980 }, { "epoch": 0.5110558062678523, "grad_norm": 695.0833740234375, "learning_rate": 5.765274136336183e-06, "loss": 24.2756, "step": 252990 }, { "epoch": 0.5110760069005361, "grad_norm": 472.7565002441406, "learning_rate": 5.764929181420191e-06, "loss": 15.1442, "step": 253000 }, { "epoch": 0.5110962075332199, "grad_norm": 140.40733337402344, "learning_rate": 5.7645842227760274e-06, "loss": 23.0324, "step": 253010 }, { "epoch": 0.5111164081659038, "grad_norm": 195.3859100341797, "learning_rate": 5.764239260405373e-06, "loss": 25.962, "step": 253020 }, { "epoch": 0.5111366087985876, "grad_norm": 146.00059509277344, "learning_rate": 5.763894294309909e-06, "loss": 12.9741, "step": 253030 }, { "epoch": 0.5111568094312714, "grad_norm": 245.92684936523438, "learning_rate": 5.763549324491317e-06, "loss": 18.737, "step": 253040 }, { "epoch": 0.5111770100639552, "grad_norm": 843.7227783203125, "learning_rate": 5.763204350951278e-06, "loss": 14.9897, "step": 253050 }, { "epoch": 0.511197210696639, "grad_norm": 149.62619018554688, "learning_rate": 5.762859373691473e-06, "loss": 21.7094, "step": 253060 }, { "epoch": 0.5112174113293229, "grad_norm": 113.38229370117188, "learning_rate": 5.7625143927135854e-06, "loss": 16.6067, "step": 253070 }, { "epoch": 0.5112376119620067, "grad_norm": 529.1049194335938, "learning_rate": 5.762169408019293e-06, "loss": 30.0604, "step": 253080 }, { "epoch": 0.5112578125946905, "grad_norm": 325.5870666503906, "learning_rate": 5.761824419610282e-06, "loss": 10.7373, "step": 253090 }, { "epoch": 0.5112780132273743, "grad_norm": 115.72893524169922, "learning_rate": 5.761479427488229e-06, "loss": 11.6328, "step": 253100 }, { "epoch": 0.5112982138600581, "grad_norm": 531.6671752929688, "learning_rate": 5.761134431654819e-06, "loss": 29.2204, "step": 253110 }, { "epoch": 0.511318414492742, "grad_norm": 306.19708251953125, "learning_rate": 5.760789432111731e-06, "loss": 19.4985, "step": 253120 }, { "epoch": 0.5113386151254258, "grad_norm": 293.4243469238281, "learning_rate": 5.760444428860648e-06, "loss": 18.2363, "step": 253130 }, { "epoch": 0.5113588157581096, "grad_norm": 241.1337432861328, "learning_rate": 5.760099421903253e-06, "loss": 20.1086, "step": 253140 }, { "epoch": 0.5113790163907933, "grad_norm": 254.03060913085938, "learning_rate": 5.7597544112412225e-06, "loss": 16.0217, "step": 253150 }, { "epoch": 0.5113992170234771, "grad_norm": 0.0, "learning_rate": 5.759409396876242e-06, "loss": 12.7892, "step": 253160 }, { "epoch": 0.511419417656161, "grad_norm": 328.09381103515625, "learning_rate": 5.759064378809993e-06, "loss": 15.1151, "step": 253170 }, { "epoch": 0.5114396182888448, "grad_norm": 117.01596069335938, "learning_rate": 5.758719357044157e-06, "loss": 25.2146, "step": 253180 }, { "epoch": 0.5114598189215286, "grad_norm": 392.1856384277344, "learning_rate": 5.758374331580412e-06, "loss": 21.865, "step": 253190 }, { "epoch": 0.5114800195542124, "grad_norm": 285.51995849609375, "learning_rate": 5.7580293024204455e-06, "loss": 10.8515, "step": 253200 }, { "epoch": 0.5115002201868962, "grad_norm": 179.7209930419922, "learning_rate": 5.7576842695659344e-06, "loss": 8.0926, "step": 253210 }, { "epoch": 0.51152042081958, "grad_norm": 416.4002990722656, "learning_rate": 5.757339233018563e-06, "loss": 21.5843, "step": 253220 }, { "epoch": 0.5115406214522639, "grad_norm": 255.66989135742188, "learning_rate": 5.756994192780011e-06, "loss": 17.6996, "step": 253230 }, { "epoch": 0.5115608220849477, "grad_norm": 216.25595092773438, "learning_rate": 5.756649148851962e-06, "loss": 23.715, "step": 253240 }, { "epoch": 0.5115810227176315, "grad_norm": 332.0377197265625, "learning_rate": 5.7563041012360975e-06, "loss": 21.368, "step": 253250 }, { "epoch": 0.5116012233503153, "grad_norm": 45.30392837524414, "learning_rate": 5.7559590499340965e-06, "loss": 19.7508, "step": 253260 }, { "epoch": 0.5116214239829991, "grad_norm": 78.61064147949219, "learning_rate": 5.7556139949476445e-06, "loss": 14.6541, "step": 253270 }, { "epoch": 0.511641624615683, "grad_norm": 324.4110107421875, "learning_rate": 5.755268936278421e-06, "loss": 26.2716, "step": 253280 }, { "epoch": 0.5116618252483668, "grad_norm": 22.560739517211914, "learning_rate": 5.754923873928108e-06, "loss": 20.7158, "step": 253290 }, { "epoch": 0.5116820258810506, "grad_norm": 161.76954650878906, "learning_rate": 5.7545788078983875e-06, "loss": 15.1186, "step": 253300 }, { "epoch": 0.5117022265137344, "grad_norm": 305.7482604980469, "learning_rate": 5.754233738190942e-06, "loss": 24.0888, "step": 253310 }, { "epoch": 0.5117224271464182, "grad_norm": 170.38169860839844, "learning_rate": 5.753888664807452e-06, "loss": 21.4562, "step": 253320 }, { "epoch": 0.5117426277791021, "grad_norm": 213.614990234375, "learning_rate": 5.753543587749601e-06, "loss": 31.8469, "step": 253330 }, { "epoch": 0.5117628284117859, "grad_norm": 627.0361938476562, "learning_rate": 5.753198507019068e-06, "loss": 44.3443, "step": 253340 }, { "epoch": 0.5117830290444697, "grad_norm": 363.5509033203125, "learning_rate": 5.752853422617539e-06, "loss": 17.5948, "step": 253350 }, { "epoch": 0.5118032296771535, "grad_norm": 307.6011962890625, "learning_rate": 5.752508334546695e-06, "loss": 16.7781, "step": 253360 }, { "epoch": 0.5118234303098373, "grad_norm": 133.14004516601562, "learning_rate": 5.7521632428082135e-06, "loss": 14.2057, "step": 253370 }, { "epoch": 0.5118436309425212, "grad_norm": 329.9244079589844, "learning_rate": 5.75181814740378e-06, "loss": 31.7192, "step": 253380 }, { "epoch": 0.511863831575205, "grad_norm": 466.1136474609375, "learning_rate": 5.751473048335078e-06, "loss": 13.3888, "step": 253390 }, { "epoch": 0.5118840322078888, "grad_norm": 119.69248962402344, "learning_rate": 5.751127945603786e-06, "loss": 4.6802, "step": 253400 }, { "epoch": 0.5119042328405725, "grad_norm": 460.5903015136719, "learning_rate": 5.750782839211588e-06, "loss": 19.4533, "step": 253410 }, { "epoch": 0.5119244334732563, "grad_norm": 344.0168151855469, "learning_rate": 5.750437729160165e-06, "loss": 14.6625, "step": 253420 }, { "epoch": 0.5119446341059402, "grad_norm": 216.51513671875, "learning_rate": 5.7500926154512e-06, "loss": 10.2293, "step": 253430 }, { "epoch": 0.511964834738624, "grad_norm": 259.9365234375, "learning_rate": 5.749747498086374e-06, "loss": 12.2768, "step": 253440 }, { "epoch": 0.5119850353713078, "grad_norm": 1282.296142578125, "learning_rate": 5.7494023770673705e-06, "loss": 23.5844, "step": 253450 }, { "epoch": 0.5120052360039916, "grad_norm": 709.0100708007812, "learning_rate": 5.74905725239587e-06, "loss": 23.4172, "step": 253460 }, { "epoch": 0.5120254366366754, "grad_norm": 196.07252502441406, "learning_rate": 5.748712124073556e-06, "loss": 12.1961, "step": 253470 }, { "epoch": 0.5120456372693593, "grad_norm": 259.0510559082031, "learning_rate": 5.74836699210211e-06, "loss": 17.8123, "step": 253480 }, { "epoch": 0.5120658379020431, "grad_norm": 105.40782928466797, "learning_rate": 5.748021856483212e-06, "loss": 12.7684, "step": 253490 }, { "epoch": 0.5120860385347269, "grad_norm": 322.8953552246094, "learning_rate": 5.747676717218549e-06, "loss": 15.4256, "step": 253500 }, { "epoch": 0.5121062391674107, "grad_norm": 324.45068359375, "learning_rate": 5.747331574309798e-06, "loss": 17.9697, "step": 253510 }, { "epoch": 0.5121264398000945, "grad_norm": 738.5859985351562, "learning_rate": 5.746986427758645e-06, "loss": 27.7131, "step": 253520 }, { "epoch": 0.5121466404327784, "grad_norm": 26.287155151367188, "learning_rate": 5.74664127756677e-06, "loss": 23.1173, "step": 253530 }, { "epoch": 0.5121668410654622, "grad_norm": 175.9619140625, "learning_rate": 5.746296123735857e-06, "loss": 28.9954, "step": 253540 }, { "epoch": 0.512187041698146, "grad_norm": 452.6490783691406, "learning_rate": 5.745950966267586e-06, "loss": 15.6788, "step": 253550 }, { "epoch": 0.5122072423308298, "grad_norm": 303.5792541503906, "learning_rate": 5.745605805163641e-06, "loss": 19.4856, "step": 253560 }, { "epoch": 0.5122274429635136, "grad_norm": 169.6615447998047, "learning_rate": 5.745260640425704e-06, "loss": 15.309, "step": 253570 }, { "epoch": 0.5122476435961975, "grad_norm": 119.26287078857422, "learning_rate": 5.744915472055457e-06, "loss": 13.1599, "step": 253580 }, { "epoch": 0.5122678442288813, "grad_norm": 488.5266418457031, "learning_rate": 5.744570300054583e-06, "loss": 24.8486, "step": 253590 }, { "epoch": 0.5122880448615651, "grad_norm": 487.51153564453125, "learning_rate": 5.744225124424762e-06, "loss": 39.7231, "step": 253600 }, { "epoch": 0.5123082454942489, "grad_norm": 379.5707092285156, "learning_rate": 5.743879945167678e-06, "loss": 22.9738, "step": 253610 }, { "epoch": 0.5123284461269327, "grad_norm": 329.4046325683594, "learning_rate": 5.7435347622850146e-06, "loss": 19.2722, "step": 253620 }, { "epoch": 0.5123486467596166, "grad_norm": 48.80684280395508, "learning_rate": 5.743189575778452e-06, "loss": 10.8372, "step": 253630 }, { "epoch": 0.5123688473923004, "grad_norm": 208.9192657470703, "learning_rate": 5.742844385649674e-06, "loss": 19.2507, "step": 253640 }, { "epoch": 0.5123890480249842, "grad_norm": 626.241943359375, "learning_rate": 5.742499191900364e-06, "loss": 20.6766, "step": 253650 }, { "epoch": 0.5124092486576679, "grad_norm": 191.5937042236328, "learning_rate": 5.7421539945322006e-06, "loss": 16.9434, "step": 253660 }, { "epoch": 0.5124294492903517, "grad_norm": 130.2559051513672, "learning_rate": 5.7418087935468706e-06, "loss": 15.9138, "step": 253670 }, { "epoch": 0.5124496499230355, "grad_norm": 293.62078857421875, "learning_rate": 5.741463588946053e-06, "loss": 12.1295, "step": 253680 }, { "epoch": 0.5124698505557194, "grad_norm": 94.0160903930664, "learning_rate": 5.741118380731432e-06, "loss": 16.8452, "step": 253690 }, { "epoch": 0.5124900511884032, "grad_norm": 362.78118896484375, "learning_rate": 5.740773168904691e-06, "loss": 16.5253, "step": 253700 }, { "epoch": 0.512510251821087, "grad_norm": 135.69134521484375, "learning_rate": 5.74042795346751e-06, "loss": 14.3394, "step": 253710 }, { "epoch": 0.5125304524537708, "grad_norm": 405.4832763671875, "learning_rate": 5.740082734421574e-06, "loss": 20.4736, "step": 253720 }, { "epoch": 0.5125506530864546, "grad_norm": 144.99844360351562, "learning_rate": 5.7397375117685635e-06, "loss": 15.929, "step": 253730 }, { "epoch": 0.5125708537191385, "grad_norm": 140.96217346191406, "learning_rate": 5.739392285510162e-06, "loss": 28.4868, "step": 253740 }, { "epoch": 0.5125910543518223, "grad_norm": 121.79772186279297, "learning_rate": 5.7390470556480545e-06, "loss": 21.5304, "step": 253750 }, { "epoch": 0.5126112549845061, "grad_norm": 244.34906005859375, "learning_rate": 5.7387018221839195e-06, "loss": 14.0509, "step": 253760 }, { "epoch": 0.5126314556171899, "grad_norm": 295.27374267578125, "learning_rate": 5.738356585119441e-06, "loss": 14.8742, "step": 253770 }, { "epoch": 0.5126516562498737, "grad_norm": 254.30955505371094, "learning_rate": 5.738011344456302e-06, "loss": 20.5418, "step": 253780 }, { "epoch": 0.5126718568825576, "grad_norm": 143.29164123535156, "learning_rate": 5.737666100196188e-06, "loss": 9.2871, "step": 253790 }, { "epoch": 0.5126920575152414, "grad_norm": 33.50775146484375, "learning_rate": 5.737320852340776e-06, "loss": 19.2799, "step": 253800 }, { "epoch": 0.5127122581479252, "grad_norm": 470.3171081542969, "learning_rate": 5.736975600891752e-06, "loss": 25.0348, "step": 253810 }, { "epoch": 0.512732458780609, "grad_norm": 220.83616638183594, "learning_rate": 5.7366303458507986e-06, "loss": 17.0437, "step": 253820 }, { "epoch": 0.5127526594132928, "grad_norm": 84.5722427368164, "learning_rate": 5.736285087219599e-06, "loss": 16.1016, "step": 253830 }, { "epoch": 0.5127728600459767, "grad_norm": 649.6181030273438, "learning_rate": 5.7359398249998335e-06, "loss": 16.5428, "step": 253840 }, { "epoch": 0.5127930606786605, "grad_norm": 36.09796905517578, "learning_rate": 5.735594559193187e-06, "loss": 30.4755, "step": 253850 }, { "epoch": 0.5128132613113443, "grad_norm": 415.18408203125, "learning_rate": 5.735249289801343e-06, "loss": 19.0643, "step": 253860 }, { "epoch": 0.5128334619440281, "grad_norm": 345.36920166015625, "learning_rate": 5.734904016825982e-06, "loss": 14.3597, "step": 253870 }, { "epoch": 0.5128536625767119, "grad_norm": 409.6925048828125, "learning_rate": 5.73455874026879e-06, "loss": 25.3695, "step": 253880 }, { "epoch": 0.5128738632093958, "grad_norm": 114.00918579101562, "learning_rate": 5.7342134601314445e-06, "loss": 20.4491, "step": 253890 }, { "epoch": 0.5128940638420796, "grad_norm": 271.3489685058594, "learning_rate": 5.733868176415633e-06, "loss": 25.9104, "step": 253900 }, { "epoch": 0.5129142644747634, "grad_norm": 249.5345916748047, "learning_rate": 5.733522889123038e-06, "loss": 13.8485, "step": 253910 }, { "epoch": 0.5129344651074471, "grad_norm": 320.58160400390625, "learning_rate": 5.733177598255341e-06, "loss": 34.8867, "step": 253920 }, { "epoch": 0.5129546657401309, "grad_norm": 250.7678985595703, "learning_rate": 5.732832303814225e-06, "loss": 13.328, "step": 253930 }, { "epoch": 0.5129748663728148, "grad_norm": 304.14556884765625, "learning_rate": 5.7324870058013736e-06, "loss": 23.8671, "step": 253940 }, { "epoch": 0.5129950670054986, "grad_norm": 291.4685974121094, "learning_rate": 5.732141704218469e-06, "loss": 24.2693, "step": 253950 }, { "epoch": 0.5130152676381824, "grad_norm": 335.6932373046875, "learning_rate": 5.731796399067194e-06, "loss": 17.6598, "step": 253960 }, { "epoch": 0.5130354682708662, "grad_norm": 141.06265258789062, "learning_rate": 5.731451090349234e-06, "loss": 18.9711, "step": 253970 }, { "epoch": 0.51305566890355, "grad_norm": 231.74606323242188, "learning_rate": 5.731105778066268e-06, "loss": 19.8735, "step": 253980 }, { "epoch": 0.5130758695362339, "grad_norm": 0.0, "learning_rate": 5.730760462219983e-06, "loss": 14.4515, "step": 253990 }, { "epoch": 0.5130960701689177, "grad_norm": 377.10833740234375, "learning_rate": 5.730415142812059e-06, "loss": 20.1133, "step": 254000 }, { "epoch": 0.5131162708016015, "grad_norm": 435.09429931640625, "learning_rate": 5.73006981984418e-06, "loss": 20.6712, "step": 254010 }, { "epoch": 0.5131364714342853, "grad_norm": 129.9061279296875, "learning_rate": 5.7297244933180306e-06, "loss": 11.3453, "step": 254020 }, { "epoch": 0.5131566720669691, "grad_norm": 576.8067016601562, "learning_rate": 5.72937916323529e-06, "loss": 18.581, "step": 254030 }, { "epoch": 0.513176872699653, "grad_norm": 497.6711730957031, "learning_rate": 5.729033829597646e-06, "loss": 23.8428, "step": 254040 }, { "epoch": 0.5131970733323368, "grad_norm": 76.8863525390625, "learning_rate": 5.728688492406778e-06, "loss": 11.1123, "step": 254050 }, { "epoch": 0.5132172739650206, "grad_norm": 305.623779296875, "learning_rate": 5.728343151664371e-06, "loss": 12.1834, "step": 254060 }, { "epoch": 0.5132374745977044, "grad_norm": 195.4307403564453, "learning_rate": 5.727997807372109e-06, "loss": 22.4545, "step": 254070 }, { "epoch": 0.5132576752303882, "grad_norm": 339.5502624511719, "learning_rate": 5.727652459531674e-06, "loss": 39.6915, "step": 254080 }, { "epoch": 0.513277875863072, "grad_norm": 169.36756896972656, "learning_rate": 5.727307108144749e-06, "loss": 16.1117, "step": 254090 }, { "epoch": 0.5132980764957559, "grad_norm": 232.59197998046875, "learning_rate": 5.726961753213016e-06, "loss": 20.1019, "step": 254100 }, { "epoch": 0.5133182771284397, "grad_norm": 213.02000427246094, "learning_rate": 5.726616394738161e-06, "loss": 16.5275, "step": 254110 }, { "epoch": 0.5133384777611235, "grad_norm": 221.1517791748047, "learning_rate": 5.726271032721864e-06, "loss": 11.9406, "step": 254120 }, { "epoch": 0.5133586783938073, "grad_norm": 466.3408203125, "learning_rate": 5.725925667165812e-06, "loss": 13.5652, "step": 254130 }, { "epoch": 0.5133788790264912, "grad_norm": 415.5694885253906, "learning_rate": 5.725580298071685e-06, "loss": 16.5602, "step": 254140 }, { "epoch": 0.513399079659175, "grad_norm": 9.30356502532959, "learning_rate": 5.725234925441169e-06, "loss": 20.2409, "step": 254150 }, { "epoch": 0.5134192802918588, "grad_norm": 513.2581787109375, "learning_rate": 5.724889549275945e-06, "loss": 22.0298, "step": 254160 }, { "epoch": 0.5134394809245425, "grad_norm": 520.2511596679688, "learning_rate": 5.724544169577697e-06, "loss": 20.2418, "step": 254170 }, { "epoch": 0.5134596815572263, "grad_norm": 140.28872680664062, "learning_rate": 5.72419878634811e-06, "loss": 15.9108, "step": 254180 }, { "epoch": 0.5134798821899101, "grad_norm": 521.94287109375, "learning_rate": 5.7238533995888645e-06, "loss": 20.5288, "step": 254190 }, { "epoch": 0.513500082822594, "grad_norm": 605.1796264648438, "learning_rate": 5.723508009301646e-06, "loss": 24.8135, "step": 254200 }, { "epoch": 0.5135202834552778, "grad_norm": 203.43077087402344, "learning_rate": 5.723162615488137e-06, "loss": 17.9746, "step": 254210 }, { "epoch": 0.5135404840879616, "grad_norm": 200.22918701171875, "learning_rate": 5.722817218150021e-06, "loss": 13.287, "step": 254220 }, { "epoch": 0.5135606847206454, "grad_norm": 167.10012817382812, "learning_rate": 5.722471817288982e-06, "loss": 13.3409, "step": 254230 }, { "epoch": 0.5135808853533292, "grad_norm": 581.9638061523438, "learning_rate": 5.722126412906703e-06, "loss": 18.7702, "step": 254240 }, { "epoch": 0.5136010859860131, "grad_norm": 248.90768432617188, "learning_rate": 5.721781005004866e-06, "loss": 23.3996, "step": 254250 }, { "epoch": 0.5136212866186969, "grad_norm": 289.7374572753906, "learning_rate": 5.721435593585158e-06, "loss": 23.7401, "step": 254260 }, { "epoch": 0.5136414872513807, "grad_norm": 580.4411010742188, "learning_rate": 5.72109017864926e-06, "loss": 23.1348, "step": 254270 }, { "epoch": 0.5136616878840645, "grad_norm": 72.83583068847656, "learning_rate": 5.720744760198855e-06, "loss": 26.9432, "step": 254280 }, { "epoch": 0.5136818885167483, "grad_norm": 492.5545959472656, "learning_rate": 5.720399338235628e-06, "loss": 22.7609, "step": 254290 }, { "epoch": 0.5137020891494322, "grad_norm": 257.7398681640625, "learning_rate": 5.720053912761261e-06, "loss": 26.6541, "step": 254300 }, { "epoch": 0.513722289782116, "grad_norm": 436.4644775390625, "learning_rate": 5.719708483777441e-06, "loss": 17.638, "step": 254310 }, { "epoch": 0.5137424904147998, "grad_norm": 355.8861083984375, "learning_rate": 5.719363051285847e-06, "loss": 28.8596, "step": 254320 }, { "epoch": 0.5137626910474836, "grad_norm": 362.0155029296875, "learning_rate": 5.719017615288165e-06, "loss": 21.4778, "step": 254330 }, { "epoch": 0.5137828916801674, "grad_norm": 174.845458984375, "learning_rate": 5.718672175786078e-06, "loss": 11.7757, "step": 254340 }, { "epoch": 0.5138030923128513, "grad_norm": 25.57386016845703, "learning_rate": 5.718326732781271e-06, "loss": 16.5323, "step": 254350 }, { "epoch": 0.5138232929455351, "grad_norm": 331.4518737792969, "learning_rate": 5.7179812862754265e-06, "loss": 18.7476, "step": 254360 }, { "epoch": 0.5138434935782189, "grad_norm": 325.46112060546875, "learning_rate": 5.717635836270228e-06, "loss": 17.7785, "step": 254370 }, { "epoch": 0.5138636942109027, "grad_norm": 12.930551528930664, "learning_rate": 5.71729038276736e-06, "loss": 17.8313, "step": 254380 }, { "epoch": 0.5138838948435865, "grad_norm": 99.90182495117188, "learning_rate": 5.716944925768505e-06, "loss": 16.5693, "step": 254390 }, { "epoch": 0.5139040954762704, "grad_norm": 208.16395568847656, "learning_rate": 5.716599465275347e-06, "loss": 32.694, "step": 254400 }, { "epoch": 0.5139242961089542, "grad_norm": 181.3454132080078, "learning_rate": 5.716254001289571e-06, "loss": 22.0217, "step": 254410 }, { "epoch": 0.513944496741638, "grad_norm": 117.0111083984375, "learning_rate": 5.7159085338128595e-06, "loss": 9.333, "step": 254420 }, { "epoch": 0.5139646973743217, "grad_norm": 155.2075653076172, "learning_rate": 5.7155630628468974e-06, "loss": 14.9246, "step": 254430 }, { "epoch": 0.5139848980070055, "grad_norm": 328.562255859375, "learning_rate": 5.715217588393367e-06, "loss": 25.3705, "step": 254440 }, { "epoch": 0.5140050986396894, "grad_norm": 448.53582763671875, "learning_rate": 5.714872110453952e-06, "loss": 22.038, "step": 254450 }, { "epoch": 0.5140252992723732, "grad_norm": 237.5946807861328, "learning_rate": 5.714526629030338e-06, "loss": 9.4407, "step": 254460 }, { "epoch": 0.514045499905057, "grad_norm": 288.1895446777344, "learning_rate": 5.714181144124209e-06, "loss": 13.5729, "step": 254470 }, { "epoch": 0.5140657005377408, "grad_norm": 545.0930786132812, "learning_rate": 5.7138356557372444e-06, "loss": 23.2966, "step": 254480 }, { "epoch": 0.5140859011704246, "grad_norm": 281.0247802734375, "learning_rate": 5.713490163871135e-06, "loss": 24.4417, "step": 254490 }, { "epoch": 0.5141061018031085, "grad_norm": 197.87193298339844, "learning_rate": 5.7131446685275595e-06, "loss": 33.3609, "step": 254500 }, { "epoch": 0.5141263024357923, "grad_norm": 351.1820068359375, "learning_rate": 5.712799169708203e-06, "loss": 18.8949, "step": 254510 }, { "epoch": 0.5141465030684761, "grad_norm": 349.86004638671875, "learning_rate": 5.71245366741475e-06, "loss": 20.8694, "step": 254520 }, { "epoch": 0.5141667037011599, "grad_norm": 312.7584533691406, "learning_rate": 5.712108161648885e-06, "loss": 30.5473, "step": 254530 }, { "epoch": 0.5141869043338437, "grad_norm": 148.776123046875, "learning_rate": 5.7117626524122905e-06, "loss": 16.9213, "step": 254540 }, { "epoch": 0.5142071049665276, "grad_norm": 200.54832458496094, "learning_rate": 5.711417139706651e-06, "loss": 16.2851, "step": 254550 }, { "epoch": 0.5142273055992114, "grad_norm": 249.05418395996094, "learning_rate": 5.711071623533651e-06, "loss": 26.024, "step": 254560 }, { "epoch": 0.5142475062318952, "grad_norm": 454.7738037109375, "learning_rate": 5.710726103894974e-06, "loss": 16.3285, "step": 254570 }, { "epoch": 0.514267706864579, "grad_norm": 193.38250732421875, "learning_rate": 5.710380580792305e-06, "loss": 11.7249, "step": 254580 }, { "epoch": 0.5142879074972628, "grad_norm": 701.7462768554688, "learning_rate": 5.710035054227326e-06, "loss": 25.9114, "step": 254590 }, { "epoch": 0.5143081081299467, "grad_norm": 415.61199951171875, "learning_rate": 5.709689524201723e-06, "loss": 6.6423, "step": 254600 }, { "epoch": 0.5143283087626305, "grad_norm": 331.1233825683594, "learning_rate": 5.709343990717179e-06, "loss": 12.3624, "step": 254610 }, { "epoch": 0.5143485093953143, "grad_norm": 871.7745361328125, "learning_rate": 5.708998453775378e-06, "loss": 34.4316, "step": 254620 }, { "epoch": 0.5143687100279981, "grad_norm": 215.926513671875, "learning_rate": 5.708652913378005e-06, "loss": 17.2928, "step": 254630 }, { "epoch": 0.5143889106606819, "grad_norm": 59.783119201660156, "learning_rate": 5.7083073695267435e-06, "loss": 16.1324, "step": 254640 }, { "epoch": 0.5144091112933658, "grad_norm": 276.78912353515625, "learning_rate": 5.707961822223279e-06, "loss": 18.4161, "step": 254650 }, { "epoch": 0.5144293119260496, "grad_norm": 230.135498046875, "learning_rate": 5.707616271469293e-06, "loss": 30.0935, "step": 254660 }, { "epoch": 0.5144495125587334, "grad_norm": 157.81597900390625, "learning_rate": 5.707270717266471e-06, "loss": 26.2185, "step": 254670 }, { "epoch": 0.5144697131914172, "grad_norm": 222.95639038085938, "learning_rate": 5.7069251596164975e-06, "loss": 22.3256, "step": 254680 }, { "epoch": 0.5144899138241009, "grad_norm": 349.41485595703125, "learning_rate": 5.706579598521058e-06, "loss": 11.8729, "step": 254690 }, { "epoch": 0.5145101144567847, "grad_norm": 193.88668823242188, "learning_rate": 5.706234033981835e-06, "loss": 17.6806, "step": 254700 }, { "epoch": 0.5145303150894686, "grad_norm": 517.8558349609375, "learning_rate": 5.705888466000511e-06, "loss": 25.9548, "step": 254710 }, { "epoch": 0.5145505157221524, "grad_norm": 156.00283813476562, "learning_rate": 5.705542894578773e-06, "loss": 20.4509, "step": 254720 }, { "epoch": 0.5145707163548362, "grad_norm": 88.97614288330078, "learning_rate": 5.705197319718304e-06, "loss": 38.7474, "step": 254730 }, { "epoch": 0.51459091698752, "grad_norm": 117.96489715576172, "learning_rate": 5.704851741420792e-06, "loss": 6.5741, "step": 254740 }, { "epoch": 0.5146111176202038, "grad_norm": 445.5604248046875, "learning_rate": 5.704506159687914e-06, "loss": 50.0862, "step": 254750 }, { "epoch": 0.5146313182528877, "grad_norm": 410.66802978515625, "learning_rate": 5.7041605745213605e-06, "loss": 42.2145, "step": 254760 }, { "epoch": 0.5146515188855715, "grad_norm": 258.73394775390625, "learning_rate": 5.703814985922813e-06, "loss": 17.1258, "step": 254770 }, { "epoch": 0.5146717195182553, "grad_norm": 338.84698486328125, "learning_rate": 5.703469393893957e-06, "loss": 17.4703, "step": 254780 }, { "epoch": 0.5146919201509391, "grad_norm": 71.55464935302734, "learning_rate": 5.7031237984364776e-06, "loss": 16.5107, "step": 254790 }, { "epoch": 0.5147121207836229, "grad_norm": 641.846435546875, "learning_rate": 5.702778199552055e-06, "loss": 17.7602, "step": 254800 }, { "epoch": 0.5147323214163068, "grad_norm": 173.63238525390625, "learning_rate": 5.7024325972423795e-06, "loss": 14.7053, "step": 254810 }, { "epoch": 0.5147525220489906, "grad_norm": 133.73562622070312, "learning_rate": 5.702086991509133e-06, "loss": 16.6784, "step": 254820 }, { "epoch": 0.5147727226816744, "grad_norm": 282.59814453125, "learning_rate": 5.701741382353998e-06, "loss": 15.5768, "step": 254830 }, { "epoch": 0.5147929233143582, "grad_norm": 133.83265686035156, "learning_rate": 5.70139576977866e-06, "loss": 16.5699, "step": 254840 }, { "epoch": 0.514813123947042, "grad_norm": 204.97201538085938, "learning_rate": 5.701050153784806e-06, "loss": 11.2832, "step": 254850 }, { "epoch": 0.5148333245797259, "grad_norm": 512.0682983398438, "learning_rate": 5.7007045343741176e-06, "loss": 29.9389, "step": 254860 }, { "epoch": 0.5148535252124097, "grad_norm": 43.07607650756836, "learning_rate": 5.70035891154828e-06, "loss": 24.434, "step": 254870 }, { "epoch": 0.5148737258450935, "grad_norm": 317.4356994628906, "learning_rate": 5.700013285308979e-06, "loss": 35.8571, "step": 254880 }, { "epoch": 0.5148939264777773, "grad_norm": 258.88409423828125, "learning_rate": 5.699667655657898e-06, "loss": 16.8953, "step": 254890 }, { "epoch": 0.5149141271104611, "grad_norm": 533.7759399414062, "learning_rate": 5.6993220225967214e-06, "loss": 21.6991, "step": 254900 }, { "epoch": 0.514934327743145, "grad_norm": 476.18182373046875, "learning_rate": 5.698976386127133e-06, "loss": 17.2035, "step": 254910 }, { "epoch": 0.5149545283758288, "grad_norm": 286.865234375, "learning_rate": 5.69863074625082e-06, "loss": 11.046, "step": 254920 }, { "epoch": 0.5149747290085126, "grad_norm": 356.1601257324219, "learning_rate": 5.6982851029694645e-06, "loss": 20.764, "step": 254930 }, { "epoch": 0.5149949296411963, "grad_norm": 332.8426208496094, "learning_rate": 5.697939456284753e-06, "loss": 19.1825, "step": 254940 }, { "epoch": 0.5150151302738801, "grad_norm": 8.152519226074219, "learning_rate": 5.697593806198369e-06, "loss": 26.6766, "step": 254950 }, { "epoch": 0.515035330906564, "grad_norm": 494.39154052734375, "learning_rate": 5.697248152711997e-06, "loss": 32.2604, "step": 254960 }, { "epoch": 0.5150555315392478, "grad_norm": 266.9371032714844, "learning_rate": 5.696902495827323e-06, "loss": 20.3483, "step": 254970 }, { "epoch": 0.5150757321719316, "grad_norm": 557.727783203125, "learning_rate": 5.69655683554603e-06, "loss": 22.5659, "step": 254980 }, { "epoch": 0.5150959328046154, "grad_norm": 448.79296875, "learning_rate": 5.6962111718698035e-06, "loss": 14.4727, "step": 254990 }, { "epoch": 0.5151161334372992, "grad_norm": 205.77252197265625, "learning_rate": 5.695865504800328e-06, "loss": 15.8414, "step": 255000 }, { "epoch": 0.515136334069983, "grad_norm": 60.22119903564453, "learning_rate": 5.695519834339288e-06, "loss": 22.7486, "step": 255010 }, { "epoch": 0.5151565347026669, "grad_norm": 355.2154846191406, "learning_rate": 5.695174160488369e-06, "loss": 13.6999, "step": 255020 }, { "epoch": 0.5151767353353507, "grad_norm": 306.7707214355469, "learning_rate": 5.694828483249257e-06, "loss": 20.0998, "step": 255030 }, { "epoch": 0.5151969359680345, "grad_norm": 70.03717803955078, "learning_rate": 5.694482802623634e-06, "loss": 10.8407, "step": 255040 }, { "epoch": 0.5152171366007183, "grad_norm": 440.99383544921875, "learning_rate": 5.694137118613185e-06, "loss": 21.6594, "step": 255050 }, { "epoch": 0.5152373372334021, "grad_norm": 22.99594497680664, "learning_rate": 5.693791431219599e-06, "loss": 13.9544, "step": 255060 }, { "epoch": 0.515257537866086, "grad_norm": 129.08335876464844, "learning_rate": 5.693445740444554e-06, "loss": 22.8678, "step": 255070 }, { "epoch": 0.5152777384987698, "grad_norm": 226.2176971435547, "learning_rate": 5.693100046289741e-06, "loss": 25.4926, "step": 255080 }, { "epoch": 0.5152979391314536, "grad_norm": 240.5113067626953, "learning_rate": 5.692754348756841e-06, "loss": 10.7279, "step": 255090 }, { "epoch": 0.5153181397641374, "grad_norm": 320.40740966796875, "learning_rate": 5.692408647847542e-06, "loss": 29.1683, "step": 255100 }, { "epoch": 0.5153383403968212, "grad_norm": 229.44317626953125, "learning_rate": 5.692062943563525e-06, "loss": 17.2986, "step": 255110 }, { "epoch": 0.5153585410295051, "grad_norm": 96.58769226074219, "learning_rate": 5.691717235906479e-06, "loss": 18.9004, "step": 255120 }, { "epoch": 0.5153787416621889, "grad_norm": 275.635498046875, "learning_rate": 5.691371524878087e-06, "loss": 16.5435, "step": 255130 }, { "epoch": 0.5153989422948727, "grad_norm": 190.42567443847656, "learning_rate": 5.6910258104800335e-06, "loss": 13.3475, "step": 255140 }, { "epoch": 0.5154191429275565, "grad_norm": 133.02833557128906, "learning_rate": 5.690680092714004e-06, "loss": 14.603, "step": 255150 }, { "epoch": 0.5154393435602403, "grad_norm": 361.9234924316406, "learning_rate": 5.690334371581683e-06, "loss": 23.1671, "step": 255160 }, { "epoch": 0.5154595441929242, "grad_norm": 337.7878112792969, "learning_rate": 5.689988647084756e-06, "loss": 30.4633, "step": 255170 }, { "epoch": 0.515479744825608, "grad_norm": 535.760986328125, "learning_rate": 5.6896429192249085e-06, "loss": 21.1913, "step": 255180 }, { "epoch": 0.5154999454582918, "grad_norm": 202.32672119140625, "learning_rate": 5.689297188003826e-06, "loss": 17.2422, "step": 255190 }, { "epoch": 0.5155201460909755, "grad_norm": 272.14306640625, "learning_rate": 5.68895145342319e-06, "loss": 16.7045, "step": 255200 }, { "epoch": 0.5155403467236593, "grad_norm": 335.41400146484375, "learning_rate": 5.688605715484691e-06, "loss": 13.0355, "step": 255210 }, { "epoch": 0.5155605473563432, "grad_norm": 325.00823974609375, "learning_rate": 5.68825997419001e-06, "loss": 12.9682, "step": 255220 }, { "epoch": 0.515580747989027, "grad_norm": 173.79905700683594, "learning_rate": 5.687914229540833e-06, "loss": 18.6543, "step": 255230 }, { "epoch": 0.5156009486217108, "grad_norm": 174.67311096191406, "learning_rate": 5.6875684815388475e-06, "loss": 23.1727, "step": 255240 }, { "epoch": 0.5156211492543946, "grad_norm": 369.1164855957031, "learning_rate": 5.687222730185733e-06, "loss": 19.5644, "step": 255250 }, { "epoch": 0.5156413498870784, "grad_norm": 72.65565490722656, "learning_rate": 5.686876975483182e-06, "loss": 16.0903, "step": 255260 }, { "epoch": 0.5156615505197623, "grad_norm": 321.65802001953125, "learning_rate": 5.686531217432873e-06, "loss": 28.2503, "step": 255270 }, { "epoch": 0.5156817511524461, "grad_norm": 139.31939697265625, "learning_rate": 5.686185456036496e-06, "loss": 7.7288, "step": 255280 }, { "epoch": 0.5157019517851299, "grad_norm": 143.13449096679688, "learning_rate": 5.685839691295734e-06, "loss": 13.0701, "step": 255290 }, { "epoch": 0.5157221524178137, "grad_norm": 342.24566650390625, "learning_rate": 5.685493923212273e-06, "loss": 25.9651, "step": 255300 }, { "epoch": 0.5157423530504975, "grad_norm": 353.04498291015625, "learning_rate": 5.685148151787796e-06, "loss": 12.2886, "step": 255310 }, { "epoch": 0.5157625536831814, "grad_norm": 292.7852783203125, "learning_rate": 5.684802377023991e-06, "loss": 18.7031, "step": 255320 }, { "epoch": 0.5157827543158652, "grad_norm": 722.6323852539062, "learning_rate": 5.684456598922542e-06, "loss": 19.4056, "step": 255330 }, { "epoch": 0.515802954948549, "grad_norm": 317.84442138671875, "learning_rate": 5.684110817485135e-06, "loss": 19.4486, "step": 255340 }, { "epoch": 0.5158231555812328, "grad_norm": 146.2397918701172, "learning_rate": 5.683765032713455e-06, "loss": 17.7238, "step": 255350 }, { "epoch": 0.5158433562139166, "grad_norm": 317.1430358886719, "learning_rate": 5.683419244609185e-06, "loss": 18.3853, "step": 255360 }, { "epoch": 0.5158635568466005, "grad_norm": 27.422441482543945, "learning_rate": 5.683073453174016e-06, "loss": 18.9533, "step": 255370 }, { "epoch": 0.5158837574792843, "grad_norm": 203.72201538085938, "learning_rate": 5.682727658409628e-06, "loss": 9.3473, "step": 255380 }, { "epoch": 0.5159039581119681, "grad_norm": 205.38072204589844, "learning_rate": 5.682381860317708e-06, "loss": 16.4411, "step": 255390 }, { "epoch": 0.5159241587446519, "grad_norm": 127.793212890625, "learning_rate": 5.682036058899942e-06, "loss": 17.5183, "step": 255400 }, { "epoch": 0.5159443593773357, "grad_norm": 431.080810546875, "learning_rate": 5.681690254158015e-06, "loss": 20.5413, "step": 255410 }, { "epoch": 0.5159645600100196, "grad_norm": 381.1543884277344, "learning_rate": 5.681344446093613e-06, "loss": 32.509, "step": 255420 }, { "epoch": 0.5159847606427034, "grad_norm": 77.7982177734375, "learning_rate": 5.680998634708419e-06, "loss": 23.3852, "step": 255430 }, { "epoch": 0.5160049612753872, "grad_norm": 324.9399719238281, "learning_rate": 5.6806528200041226e-06, "loss": 14.2961, "step": 255440 }, { "epoch": 0.5160251619080709, "grad_norm": 146.41201782226562, "learning_rate": 5.680307001982405e-06, "loss": 7.5408, "step": 255450 }, { "epoch": 0.5160453625407547, "grad_norm": 162.37672424316406, "learning_rate": 5.679961180644954e-06, "loss": 15.022, "step": 255460 }, { "epoch": 0.5160655631734385, "grad_norm": 339.5776672363281, "learning_rate": 5.679615355993455e-06, "loss": 14.5593, "step": 255470 }, { "epoch": 0.5160857638061224, "grad_norm": 208.3057098388672, "learning_rate": 5.679269528029593e-06, "loss": 20.3211, "step": 255480 }, { "epoch": 0.5161059644388062, "grad_norm": 440.0595397949219, "learning_rate": 5.678923696755054e-06, "loss": 22.0649, "step": 255490 }, { "epoch": 0.51612616507149, "grad_norm": 173.05210876464844, "learning_rate": 5.678577862171523e-06, "loss": 7.572, "step": 255500 }, { "epoch": 0.5161463657041738, "grad_norm": 119.2420654296875, "learning_rate": 5.678232024280687e-06, "loss": 27.6226, "step": 255510 }, { "epoch": 0.5161665663368576, "grad_norm": 238.20542907714844, "learning_rate": 5.677886183084227e-06, "loss": 15.5409, "step": 255520 }, { "epoch": 0.5161867669695415, "grad_norm": 276.6334533691406, "learning_rate": 5.677540338583836e-06, "loss": 16.1484, "step": 255530 }, { "epoch": 0.5162069676022253, "grad_norm": 264.7768859863281, "learning_rate": 5.677194490781192e-06, "loss": 22.04, "step": 255540 }, { "epoch": 0.5162271682349091, "grad_norm": 283.718994140625, "learning_rate": 5.676848639677987e-06, "loss": 28.1484, "step": 255550 }, { "epoch": 0.5162473688675929, "grad_norm": 0.0, "learning_rate": 5.6765027852759015e-06, "loss": 18.1477, "step": 255560 }, { "epoch": 0.5162675695002767, "grad_norm": 74.58321380615234, "learning_rate": 5.6761569275766246e-06, "loss": 12.324, "step": 255570 }, { "epoch": 0.5162877701329606, "grad_norm": 383.9439697265625, "learning_rate": 5.675811066581842e-06, "loss": 10.8873, "step": 255580 }, { "epoch": 0.5163079707656444, "grad_norm": 387.9299621582031, "learning_rate": 5.675465202293238e-06, "loss": 33.7816, "step": 255590 }, { "epoch": 0.5163281713983282, "grad_norm": 271.5996398925781, "learning_rate": 5.675119334712496e-06, "loss": 10.9214, "step": 255600 }, { "epoch": 0.516348372031012, "grad_norm": 659.542236328125, "learning_rate": 5.674773463841306e-06, "loss": 21.9678, "step": 255610 }, { "epoch": 0.5163685726636958, "grad_norm": 158.9383087158203, "learning_rate": 5.674427589681353e-06, "loss": 32.3553, "step": 255620 }, { "epoch": 0.5163887732963797, "grad_norm": 5.777574062347412, "learning_rate": 5.674081712234319e-06, "loss": 21.778, "step": 255630 }, { "epoch": 0.5164089739290635, "grad_norm": 481.7895202636719, "learning_rate": 5.6737358315018954e-06, "loss": 13.8249, "step": 255640 }, { "epoch": 0.5164291745617473, "grad_norm": 70.40879821777344, "learning_rate": 5.673389947485763e-06, "loss": 22.7174, "step": 255650 }, { "epoch": 0.5164493751944311, "grad_norm": 191.72650146484375, "learning_rate": 5.673044060187612e-06, "loss": 12.6066, "step": 255660 }, { "epoch": 0.516469575827115, "grad_norm": 380.042236328125, "learning_rate": 5.672698169609125e-06, "loss": 17.4248, "step": 255670 }, { "epoch": 0.5164897764597988, "grad_norm": 0.0, "learning_rate": 5.672352275751986e-06, "loss": 10.5637, "step": 255680 }, { "epoch": 0.5165099770924826, "grad_norm": 99.30463409423828, "learning_rate": 5.672006378617887e-06, "loss": 12.5247, "step": 255690 }, { "epoch": 0.5165301777251664, "grad_norm": 247.3821563720703, "learning_rate": 5.671660478208508e-06, "loss": 15.9123, "step": 255700 }, { "epoch": 0.5165503783578501, "grad_norm": 14.239204406738281, "learning_rate": 5.671314574525539e-06, "loss": 33.6204, "step": 255710 }, { "epoch": 0.5165705789905339, "grad_norm": 103.40309143066406, "learning_rate": 5.670968667570663e-06, "loss": 20.3274, "step": 255720 }, { "epoch": 0.5165907796232178, "grad_norm": 420.71649169921875, "learning_rate": 5.670622757345567e-06, "loss": 37.7399, "step": 255730 }, { "epoch": 0.5166109802559016, "grad_norm": 795.6857299804688, "learning_rate": 5.670276843851939e-06, "loss": 22.3497, "step": 255740 }, { "epoch": 0.5166311808885854, "grad_norm": 244.82371520996094, "learning_rate": 5.6699309270914615e-06, "loss": 13.4531, "step": 255750 }, { "epoch": 0.5166513815212692, "grad_norm": 251.3655242919922, "learning_rate": 5.669585007065822e-06, "loss": 33.3393, "step": 255760 }, { "epoch": 0.516671582153953, "grad_norm": 277.57147216796875, "learning_rate": 5.669239083776705e-06, "loss": 12.9531, "step": 255770 }, { "epoch": 0.5166917827866369, "grad_norm": 385.08782958984375, "learning_rate": 5.6688931572258e-06, "loss": 42.9166, "step": 255780 }, { "epoch": 0.5167119834193207, "grad_norm": 198.43801879882812, "learning_rate": 5.66854722741479e-06, "loss": 12.2479, "step": 255790 }, { "epoch": 0.5167321840520045, "grad_norm": 151.24290466308594, "learning_rate": 5.668201294345363e-06, "loss": 12.6463, "step": 255800 }, { "epoch": 0.5167523846846883, "grad_norm": 988.4615478515625, "learning_rate": 5.667855358019203e-06, "loss": 34.6549, "step": 255810 }, { "epoch": 0.5167725853173721, "grad_norm": 437.3974609375, "learning_rate": 5.667509418437996e-06, "loss": 15.555, "step": 255820 }, { "epoch": 0.516792785950056, "grad_norm": 0.0, "learning_rate": 5.66716347560343e-06, "loss": 12.3805, "step": 255830 }, { "epoch": 0.5168129865827398, "grad_norm": 465.49005126953125, "learning_rate": 5.66681752951719e-06, "loss": 12.1102, "step": 255840 }, { "epoch": 0.5168331872154236, "grad_norm": 466.6305847167969, "learning_rate": 5.666471580180963e-06, "loss": 18.417, "step": 255850 }, { "epoch": 0.5168533878481074, "grad_norm": 155.4015655517578, "learning_rate": 5.666125627596433e-06, "loss": 26.7957, "step": 255860 }, { "epoch": 0.5168735884807912, "grad_norm": 17.22089195251465, "learning_rate": 5.665779671765289e-06, "loss": 14.5508, "step": 255870 }, { "epoch": 0.516893789113475, "grad_norm": 474.7943115234375, "learning_rate": 5.665433712689214e-06, "loss": 23.6869, "step": 255880 }, { "epoch": 0.5169139897461589, "grad_norm": 81.22158813476562, "learning_rate": 5.665087750369898e-06, "loss": 14.4761, "step": 255890 }, { "epoch": 0.5169341903788427, "grad_norm": 232.2131805419922, "learning_rate": 5.6647417848090225e-06, "loss": 16.3062, "step": 255900 }, { "epoch": 0.5169543910115265, "grad_norm": 295.6409912109375, "learning_rate": 5.664395816008277e-06, "loss": 17.0638, "step": 255910 }, { "epoch": 0.5169745916442103, "grad_norm": 348.55419921875, "learning_rate": 5.664049843969348e-06, "loss": 15.3879, "step": 255920 }, { "epoch": 0.5169947922768942, "grad_norm": 526.3408203125, "learning_rate": 5.66370386869392e-06, "loss": 25.5016, "step": 255930 }, { "epoch": 0.517014992909578, "grad_norm": 374.92840576171875, "learning_rate": 5.663357890183679e-06, "loss": 17.2983, "step": 255940 }, { "epoch": 0.5170351935422618, "grad_norm": 313.7781982421875, "learning_rate": 5.6630119084403125e-06, "loss": 28.3423, "step": 255950 }, { "epoch": 0.5170553941749455, "grad_norm": 629.3341064453125, "learning_rate": 5.662665923465508e-06, "loss": 35.9001, "step": 255960 }, { "epoch": 0.5170755948076293, "grad_norm": 1285.2620849609375, "learning_rate": 5.662319935260947e-06, "loss": 35.8605, "step": 255970 }, { "epoch": 0.5170957954403131, "grad_norm": 193.32420349121094, "learning_rate": 5.661973943828321e-06, "loss": 18.9697, "step": 255980 }, { "epoch": 0.517115996072997, "grad_norm": 382.97723388671875, "learning_rate": 5.661627949169315e-06, "loss": 35.9798, "step": 255990 }, { "epoch": 0.5171361967056808, "grad_norm": 172.29153442382812, "learning_rate": 5.661281951285613e-06, "loss": 11.0411, "step": 256000 }, { "epoch": 0.5171563973383646, "grad_norm": 291.1146240234375, "learning_rate": 5.660935950178904e-06, "loss": 20.8456, "step": 256010 }, { "epoch": 0.5171765979710484, "grad_norm": 109.14506530761719, "learning_rate": 5.660589945850872e-06, "loss": 11.1765, "step": 256020 }, { "epoch": 0.5171967986037322, "grad_norm": 380.21539306640625, "learning_rate": 5.660243938303206e-06, "loss": 20.9902, "step": 256030 }, { "epoch": 0.5172169992364161, "grad_norm": 172.48512268066406, "learning_rate": 5.659897927537591e-06, "loss": 13.9882, "step": 256040 }, { "epoch": 0.5172371998690999, "grad_norm": 242.27835083007812, "learning_rate": 5.659551913555713e-06, "loss": 17.0546, "step": 256050 }, { "epoch": 0.5172574005017837, "grad_norm": 214.94818115234375, "learning_rate": 5.659205896359259e-06, "loss": 20.0751, "step": 256060 }, { "epoch": 0.5172776011344675, "grad_norm": 418.91021728515625, "learning_rate": 5.658859875949916e-06, "loss": 20.0402, "step": 256070 }, { "epoch": 0.5172978017671513, "grad_norm": 387.1694030761719, "learning_rate": 5.65851385232937e-06, "loss": 25.7717, "step": 256080 }, { "epoch": 0.5173180023998352, "grad_norm": 244.55508422851562, "learning_rate": 5.658167825499306e-06, "loss": 15.9047, "step": 256090 }, { "epoch": 0.517338203032519, "grad_norm": 339.7708740234375, "learning_rate": 5.657821795461413e-06, "loss": 12.493, "step": 256100 }, { "epoch": 0.5173584036652028, "grad_norm": 133.5510711669922, "learning_rate": 5.657475762217376e-06, "loss": 24.3762, "step": 256110 }, { "epoch": 0.5173786042978866, "grad_norm": 194.23983764648438, "learning_rate": 5.657129725768883e-06, "loss": 18.8254, "step": 256120 }, { "epoch": 0.5173988049305704, "grad_norm": 8.623376846313477, "learning_rate": 5.656783686117617e-06, "loss": 11.0092, "step": 256130 }, { "epoch": 0.5174190055632543, "grad_norm": 343.3054504394531, "learning_rate": 5.656437643265269e-06, "loss": 29.6143, "step": 256140 }, { "epoch": 0.5174392061959381, "grad_norm": 362.1727294921875, "learning_rate": 5.656091597213523e-06, "loss": 19.7388, "step": 256150 }, { "epoch": 0.5174594068286219, "grad_norm": 299.4844055175781, "learning_rate": 5.655745547964067e-06, "loss": 14.9962, "step": 256160 }, { "epoch": 0.5174796074613057, "grad_norm": 295.1239929199219, "learning_rate": 5.6553994955185846e-06, "loss": 29.1138, "step": 256170 }, { "epoch": 0.5174998080939895, "grad_norm": 391.3958435058594, "learning_rate": 5.655053439878766e-06, "loss": 28.8944, "step": 256180 }, { "epoch": 0.5175200087266734, "grad_norm": 339.3231201171875, "learning_rate": 5.654707381046296e-06, "loss": 18.1103, "step": 256190 }, { "epoch": 0.5175402093593572, "grad_norm": 549.136962890625, "learning_rate": 5.654361319022862e-06, "loss": 16.591, "step": 256200 }, { "epoch": 0.517560409992041, "grad_norm": 388.88336181640625, "learning_rate": 5.65401525381015e-06, "loss": 31.0608, "step": 256210 }, { "epoch": 0.5175806106247247, "grad_norm": 0.0, "learning_rate": 5.653669185409847e-06, "loss": 8.557, "step": 256220 }, { "epoch": 0.5176008112574085, "grad_norm": 270.6822509765625, "learning_rate": 5.653323113823639e-06, "loss": 19.6732, "step": 256230 }, { "epoch": 0.5176210118900924, "grad_norm": 30.694931030273438, "learning_rate": 5.652977039053213e-06, "loss": 22.9929, "step": 256240 }, { "epoch": 0.5176412125227762, "grad_norm": 288.6028137207031, "learning_rate": 5.65263096110026e-06, "loss": 17.3105, "step": 256250 }, { "epoch": 0.51766141315546, "grad_norm": 445.9811706542969, "learning_rate": 5.652284879966459e-06, "loss": 16.5481, "step": 256260 }, { "epoch": 0.5176816137881438, "grad_norm": 435.2417297363281, "learning_rate": 5.651938795653501e-06, "loss": 18.3291, "step": 256270 }, { "epoch": 0.5177018144208276, "grad_norm": 141.33279418945312, "learning_rate": 5.651592708163074e-06, "loss": 10.7005, "step": 256280 }, { "epoch": 0.5177220150535115, "grad_norm": 184.3224639892578, "learning_rate": 5.651246617496861e-06, "loss": 15.984, "step": 256290 }, { "epoch": 0.5177422156861953, "grad_norm": 366.53448486328125, "learning_rate": 5.650900523656553e-06, "loss": 14.8555, "step": 256300 }, { "epoch": 0.5177624163188791, "grad_norm": 213.7771759033203, "learning_rate": 5.6505544266438325e-06, "loss": 19.0495, "step": 256310 }, { "epoch": 0.5177826169515629, "grad_norm": 0.0, "learning_rate": 5.650208326460392e-06, "loss": 18.5562, "step": 256320 }, { "epoch": 0.5178028175842467, "grad_norm": 180.3216552734375, "learning_rate": 5.649862223107913e-06, "loss": 18.6471, "step": 256330 }, { "epoch": 0.5178230182169306, "grad_norm": 276.0146484375, "learning_rate": 5.6495161165880826e-06, "loss": 13.6393, "step": 256340 }, { "epoch": 0.5178432188496144, "grad_norm": 338.8985595703125, "learning_rate": 5.649170006902592e-06, "loss": 18.875, "step": 256350 }, { "epoch": 0.5178634194822982, "grad_norm": 441.9057922363281, "learning_rate": 5.6488238940531256e-06, "loss": 16.3307, "step": 256360 }, { "epoch": 0.517883620114982, "grad_norm": 203.91314697265625, "learning_rate": 5.648477778041369e-06, "loss": 16.6386, "step": 256370 }, { "epoch": 0.5179038207476658, "grad_norm": 148.0711669921875, "learning_rate": 5.6481316588690105e-06, "loss": 23.4606, "step": 256380 }, { "epoch": 0.5179240213803497, "grad_norm": 184.4454803466797, "learning_rate": 5.647785536537737e-06, "loss": 14.7997, "step": 256390 }, { "epoch": 0.5179442220130335, "grad_norm": 331.6366271972656, "learning_rate": 5.647439411049235e-06, "loss": 19.85, "step": 256400 }, { "epoch": 0.5179644226457173, "grad_norm": 371.5244445800781, "learning_rate": 5.647093282405194e-06, "loss": 27.193, "step": 256410 }, { "epoch": 0.5179846232784011, "grad_norm": 0.0, "learning_rate": 5.646747150607297e-06, "loss": 14.1701, "step": 256420 }, { "epoch": 0.5180048239110849, "grad_norm": 302.7779846191406, "learning_rate": 5.646401015657232e-06, "loss": 14.5225, "step": 256430 }, { "epoch": 0.5180250245437688, "grad_norm": 323.5245666503906, "learning_rate": 5.646054877556688e-06, "loss": 16.7588, "step": 256440 }, { "epoch": 0.5180452251764526, "grad_norm": 15.878558158874512, "learning_rate": 5.6457087363073505e-06, "loss": 21.7029, "step": 256450 }, { "epoch": 0.5180654258091364, "grad_norm": 495.80340576171875, "learning_rate": 5.645362591910908e-06, "loss": 20.2609, "step": 256460 }, { "epoch": 0.5180856264418202, "grad_norm": 422.1306457519531, "learning_rate": 5.645016444369045e-06, "loss": 18.9295, "step": 256470 }, { "epoch": 0.5181058270745039, "grad_norm": 390.0953674316406, "learning_rate": 5.644670293683451e-06, "loss": 13.9057, "step": 256480 }, { "epoch": 0.5181260277071877, "grad_norm": 106.3196792602539, "learning_rate": 5.6443241398558115e-06, "loss": 16.9325, "step": 256490 }, { "epoch": 0.5181462283398716, "grad_norm": 226.3938446044922, "learning_rate": 5.643977982887815e-06, "loss": 21.0977, "step": 256500 }, { "epoch": 0.5181664289725554, "grad_norm": 403.7469482421875, "learning_rate": 5.643631822781147e-06, "loss": 18.2843, "step": 256510 }, { "epoch": 0.5181866296052392, "grad_norm": 403.3016357421875, "learning_rate": 5.643285659537496e-06, "loss": 31.8133, "step": 256520 }, { "epoch": 0.518206830237923, "grad_norm": 111.47080993652344, "learning_rate": 5.64293949315855e-06, "loss": 73.9809, "step": 256530 }, { "epoch": 0.5182270308706068, "grad_norm": 228.7227325439453, "learning_rate": 5.642593323645993e-06, "loss": 13.304, "step": 256540 }, { "epoch": 0.5182472315032907, "grad_norm": 118.56266021728516, "learning_rate": 5.642247151001515e-06, "loss": 17.8374, "step": 256550 }, { "epoch": 0.5182674321359745, "grad_norm": 298.9107971191406, "learning_rate": 5.6419009752268015e-06, "loss": 14.9139, "step": 256560 }, { "epoch": 0.5182876327686583, "grad_norm": 460.5557861328125, "learning_rate": 5.641554796323543e-06, "loss": 16.8317, "step": 256570 }, { "epoch": 0.5183078334013421, "grad_norm": 318.7388610839844, "learning_rate": 5.641208614293421e-06, "loss": 9.3387, "step": 256580 }, { "epoch": 0.5183280340340259, "grad_norm": 235.020751953125, "learning_rate": 5.640862429138128e-06, "loss": 27.3261, "step": 256590 }, { "epoch": 0.5183482346667098, "grad_norm": 456.4938659667969, "learning_rate": 5.640516240859348e-06, "loss": 25.1892, "step": 256600 }, { "epoch": 0.5183684352993936, "grad_norm": 532.82470703125, "learning_rate": 5.64017004945877e-06, "loss": 26.0581, "step": 256610 }, { "epoch": 0.5183886359320774, "grad_norm": 303.11920166015625, "learning_rate": 5.639823854938082e-06, "loss": 24.833, "step": 256620 }, { "epoch": 0.5184088365647612, "grad_norm": 135.87252807617188, "learning_rate": 5.639477657298968e-06, "loss": 9.0454, "step": 256630 }, { "epoch": 0.518429037197445, "grad_norm": 218.00918579101562, "learning_rate": 5.639131456543119e-06, "loss": 15.072, "step": 256640 }, { "epoch": 0.5184492378301289, "grad_norm": 188.7112274169922, "learning_rate": 5.63878525267222e-06, "loss": 15.0758, "step": 256650 }, { "epoch": 0.5184694384628127, "grad_norm": 471.3847351074219, "learning_rate": 5.63843904568796e-06, "loss": 10.3643, "step": 256660 }, { "epoch": 0.5184896390954965, "grad_norm": 311.802001953125, "learning_rate": 5.638092835592024e-06, "loss": 18.6844, "step": 256670 }, { "epoch": 0.5185098397281803, "grad_norm": 257.2303161621094, "learning_rate": 5.637746622386102e-06, "loss": 19.5888, "step": 256680 }, { "epoch": 0.5185300403608641, "grad_norm": 131.4880828857422, "learning_rate": 5.637400406071881e-06, "loss": 11.9214, "step": 256690 }, { "epoch": 0.518550240993548, "grad_norm": 410.9390869140625, "learning_rate": 5.6370541866510476e-06, "loss": 20.8174, "step": 256700 }, { "epoch": 0.5185704416262318, "grad_norm": 0.0, "learning_rate": 5.6367079641252874e-06, "loss": 15.2384, "step": 256710 }, { "epoch": 0.5185906422589156, "grad_norm": 38.63165283203125, "learning_rate": 5.636361738496291e-06, "loss": 8.4046, "step": 256720 }, { "epoch": 0.5186108428915993, "grad_norm": 228.00552368164062, "learning_rate": 5.636015509765747e-06, "loss": 18.4551, "step": 256730 }, { "epoch": 0.5186310435242831, "grad_norm": 445.5107421875, "learning_rate": 5.6356692779353365e-06, "loss": 19.254, "step": 256740 }, { "epoch": 0.518651244156967, "grad_norm": 116.44206237792969, "learning_rate": 5.635323043006753e-06, "loss": 13.4935, "step": 256750 }, { "epoch": 0.5186714447896508, "grad_norm": 118.76641082763672, "learning_rate": 5.634976804981682e-06, "loss": 23.0766, "step": 256760 }, { "epoch": 0.5186916454223346, "grad_norm": 1311.7935791015625, "learning_rate": 5.634630563861811e-06, "loss": 41.0806, "step": 256770 }, { "epoch": 0.5187118460550184, "grad_norm": 506.02685546875, "learning_rate": 5.634284319648827e-06, "loss": 21.5712, "step": 256780 }, { "epoch": 0.5187320466877022, "grad_norm": 388.1326599121094, "learning_rate": 5.633938072344419e-06, "loss": 32.2906, "step": 256790 }, { "epoch": 0.518752247320386, "grad_norm": 272.7906494140625, "learning_rate": 5.633591821950274e-06, "loss": 27.798, "step": 256800 }, { "epoch": 0.5187724479530699, "grad_norm": 380.826416015625, "learning_rate": 5.633245568468079e-06, "loss": 24.3334, "step": 256810 }, { "epoch": 0.5187926485857537, "grad_norm": 328.3963317871094, "learning_rate": 5.6328993118995215e-06, "loss": 28.7394, "step": 256820 }, { "epoch": 0.5188128492184375, "grad_norm": 371.1546325683594, "learning_rate": 5.632553052246289e-06, "loss": 12.4148, "step": 256830 }, { "epoch": 0.5188330498511213, "grad_norm": 375.4221496582031, "learning_rate": 5.6322067895100705e-06, "loss": 15.243, "step": 256840 }, { "epoch": 0.5188532504838052, "grad_norm": 331.8681640625, "learning_rate": 5.631860523692553e-06, "loss": 16.0171, "step": 256850 }, { "epoch": 0.518873451116489, "grad_norm": 164.707275390625, "learning_rate": 5.631514254795424e-06, "loss": 17.2397, "step": 256860 }, { "epoch": 0.5188936517491728, "grad_norm": 425.901611328125, "learning_rate": 5.6311679828203706e-06, "loss": 20.0239, "step": 256870 }, { "epoch": 0.5189138523818566, "grad_norm": 627.7958374023438, "learning_rate": 5.630821707769081e-06, "loss": 17.5421, "step": 256880 }, { "epoch": 0.5189340530145404, "grad_norm": 19.891115188598633, "learning_rate": 5.630475429643244e-06, "loss": 21.682, "step": 256890 }, { "epoch": 0.5189542536472243, "grad_norm": 462.4209899902344, "learning_rate": 5.630129148444543e-06, "loss": 11.568, "step": 256900 }, { "epoch": 0.5189744542799081, "grad_norm": 170.34751892089844, "learning_rate": 5.629782864174672e-06, "loss": 7.7655, "step": 256910 }, { "epoch": 0.5189946549125919, "grad_norm": 341.96710205078125, "learning_rate": 5.629436576835315e-06, "loss": 19.018, "step": 256920 }, { "epoch": 0.5190148555452757, "grad_norm": 141.2561492919922, "learning_rate": 5.6290902864281605e-06, "loss": 25.1433, "step": 256930 }, { "epoch": 0.5190350561779595, "grad_norm": 274.15167236328125, "learning_rate": 5.628743992954896e-06, "loss": 16.0611, "step": 256940 }, { "epoch": 0.5190552568106434, "grad_norm": 94.52300262451172, "learning_rate": 5.62839769641721e-06, "loss": 21.9992, "step": 256950 }, { "epoch": 0.5190754574433272, "grad_norm": 164.6126251220703, "learning_rate": 5.6280513968167895e-06, "loss": 24.2955, "step": 256960 }, { "epoch": 0.519095658076011, "grad_norm": 304.1648864746094, "learning_rate": 5.627705094155322e-06, "loss": 23.2529, "step": 256970 }, { "epoch": 0.5191158587086948, "grad_norm": 672.8325805664062, "learning_rate": 5.627358788434497e-06, "loss": 22.1995, "step": 256980 }, { "epoch": 0.5191360593413785, "grad_norm": 428.90692138671875, "learning_rate": 5.627012479656001e-06, "loss": 30.9261, "step": 256990 }, { "epoch": 0.5191562599740623, "grad_norm": 390.6862487792969, "learning_rate": 5.626666167821522e-06, "loss": 22.227, "step": 257000 }, { "epoch": 0.5191764606067462, "grad_norm": 215.3688507080078, "learning_rate": 5.626319852932748e-06, "loss": 47.5204, "step": 257010 }, { "epoch": 0.51919666123943, "grad_norm": 265.74224853515625, "learning_rate": 5.625973534991368e-06, "loss": 22.1039, "step": 257020 }, { "epoch": 0.5192168618721138, "grad_norm": 229.567138671875, "learning_rate": 5.625627213999067e-06, "loss": 15.7737, "step": 257030 }, { "epoch": 0.5192370625047976, "grad_norm": 352.03753662109375, "learning_rate": 5.6252808899575375e-06, "loss": 13.5905, "step": 257040 }, { "epoch": 0.5192572631374814, "grad_norm": 52.979530334472656, "learning_rate": 5.624934562868463e-06, "loss": 20.5222, "step": 257050 }, { "epoch": 0.5192774637701653, "grad_norm": 406.5325622558594, "learning_rate": 5.624588232733533e-06, "loss": 16.6158, "step": 257060 }, { "epoch": 0.5192976644028491, "grad_norm": 169.905029296875, "learning_rate": 5.624241899554437e-06, "loss": 16.8367, "step": 257070 }, { "epoch": 0.5193178650355329, "grad_norm": 94.73262023925781, "learning_rate": 5.62389556333286e-06, "loss": 21.7165, "step": 257080 }, { "epoch": 0.5193380656682167, "grad_norm": 182.6281280517578, "learning_rate": 5.623549224070494e-06, "loss": 20.7275, "step": 257090 }, { "epoch": 0.5193582663009005, "grad_norm": 119.61746215820312, "learning_rate": 5.623202881769023e-06, "loss": 14.2551, "step": 257100 }, { "epoch": 0.5193784669335844, "grad_norm": 248.99929809570312, "learning_rate": 5.622856536430137e-06, "loss": 12.149, "step": 257110 }, { "epoch": 0.5193986675662682, "grad_norm": 531.6729736328125, "learning_rate": 5.622510188055523e-06, "loss": 27.2915, "step": 257120 }, { "epoch": 0.519418868198952, "grad_norm": 711.6083374023438, "learning_rate": 5.622163836646871e-06, "loss": 27.9329, "step": 257130 }, { "epoch": 0.5194390688316358, "grad_norm": 174.59408569335938, "learning_rate": 5.621817482205868e-06, "loss": 33.7191, "step": 257140 }, { "epoch": 0.5194592694643196, "grad_norm": 350.5267028808594, "learning_rate": 5.6214711247342015e-06, "loss": 16.6153, "step": 257150 }, { "epoch": 0.5194794700970035, "grad_norm": 116.79344177246094, "learning_rate": 5.621124764233561e-06, "loss": 28.2193, "step": 257160 }, { "epoch": 0.5194996707296873, "grad_norm": 20.90986442565918, "learning_rate": 5.620778400705632e-06, "loss": 17.2737, "step": 257170 }, { "epoch": 0.5195198713623711, "grad_norm": 247.1576690673828, "learning_rate": 5.620432034152107e-06, "loss": 7.3802, "step": 257180 }, { "epoch": 0.5195400719950549, "grad_norm": 371.4339294433594, "learning_rate": 5.620085664574668e-06, "loss": 18.3785, "step": 257190 }, { "epoch": 0.5195602726277387, "grad_norm": 549.288818359375, "learning_rate": 5.6197392919750095e-06, "loss": 23.0343, "step": 257200 }, { "epoch": 0.5195804732604226, "grad_norm": 397.2499084472656, "learning_rate": 5.619392916354815e-06, "loss": 22.1925, "step": 257210 }, { "epoch": 0.5196006738931064, "grad_norm": 298.8348388671875, "learning_rate": 5.619046537715776e-06, "loss": 20.6462, "step": 257220 }, { "epoch": 0.5196208745257902, "grad_norm": 124.66400146484375, "learning_rate": 5.61870015605958e-06, "loss": 26.9006, "step": 257230 }, { "epoch": 0.5196410751584739, "grad_norm": 366.28033447265625, "learning_rate": 5.618353771387912e-06, "loss": 40.624, "step": 257240 }, { "epoch": 0.5196612757911577, "grad_norm": 192.580078125, "learning_rate": 5.618007383702464e-06, "loss": 17.7506, "step": 257250 }, { "epoch": 0.5196814764238415, "grad_norm": 236.51666259765625, "learning_rate": 5.617660993004923e-06, "loss": 21.6476, "step": 257260 }, { "epoch": 0.5197016770565254, "grad_norm": 310.40875244140625, "learning_rate": 5.617314599296977e-06, "loss": 9.9978, "step": 257270 }, { "epoch": 0.5197218776892092, "grad_norm": 355.7420959472656, "learning_rate": 5.616968202580315e-06, "loss": 12.309, "step": 257280 }, { "epoch": 0.519742078321893, "grad_norm": 225.9285888671875, "learning_rate": 5.6166218028566246e-06, "loss": 16.9414, "step": 257290 }, { "epoch": 0.5197622789545768, "grad_norm": 161.36917114257812, "learning_rate": 5.616275400127594e-06, "loss": 11.9393, "step": 257300 }, { "epoch": 0.5197824795872606, "grad_norm": 174.21070861816406, "learning_rate": 5.615928994394913e-06, "loss": 15.4289, "step": 257310 }, { "epoch": 0.5198026802199445, "grad_norm": 278.45819091796875, "learning_rate": 5.615582585660266e-06, "loss": 22.2893, "step": 257320 }, { "epoch": 0.5198228808526283, "grad_norm": 377.08905029296875, "learning_rate": 5.615236173925347e-06, "loss": 26.4147, "step": 257330 }, { "epoch": 0.5198430814853121, "grad_norm": 279.3738708496094, "learning_rate": 5.61488975919184e-06, "loss": 21.7555, "step": 257340 }, { "epoch": 0.5198632821179959, "grad_norm": 195.02406311035156, "learning_rate": 5.6145433414614345e-06, "loss": 12.6662, "step": 257350 }, { "epoch": 0.5198834827506797, "grad_norm": 181.31800842285156, "learning_rate": 5.614196920735822e-06, "loss": 12.9057, "step": 257360 }, { "epoch": 0.5199036833833636, "grad_norm": 406.9359436035156, "learning_rate": 5.613850497016687e-06, "loss": 11.6466, "step": 257370 }, { "epoch": 0.5199238840160474, "grad_norm": 220.52139282226562, "learning_rate": 5.613504070305717e-06, "loss": 12.0979, "step": 257380 }, { "epoch": 0.5199440846487312, "grad_norm": 130.13363647460938, "learning_rate": 5.613157640604605e-06, "loss": 22.7876, "step": 257390 }, { "epoch": 0.519964285281415, "grad_norm": 485.1378479003906, "learning_rate": 5.612811207915034e-06, "loss": 18.9151, "step": 257400 }, { "epoch": 0.5199844859140988, "grad_norm": 0.0, "learning_rate": 5.6124647722386996e-06, "loss": 28.9663, "step": 257410 }, { "epoch": 0.5200046865467827, "grad_norm": 209.18368530273438, "learning_rate": 5.612118333577283e-06, "loss": 15.6152, "step": 257420 }, { "epoch": 0.5200248871794665, "grad_norm": 150.05416870117188, "learning_rate": 5.611771891932477e-06, "loss": 9.968, "step": 257430 }, { "epoch": 0.5200450878121503, "grad_norm": 184.6409454345703, "learning_rate": 5.611425447305969e-06, "loss": 21.5416, "step": 257440 }, { "epoch": 0.5200652884448341, "grad_norm": 93.94269561767578, "learning_rate": 5.611078999699448e-06, "loss": 16.1917, "step": 257450 }, { "epoch": 0.520085489077518, "grad_norm": 490.60748291015625, "learning_rate": 5.6107325491146024e-06, "loss": 23.7816, "step": 257460 }, { "epoch": 0.5201056897102018, "grad_norm": 565.3958740234375, "learning_rate": 5.61038609555312e-06, "loss": 24.993, "step": 257470 }, { "epoch": 0.5201258903428856, "grad_norm": 168.8619842529297, "learning_rate": 5.610039639016689e-06, "loss": 19.668, "step": 257480 }, { "epoch": 0.5201460909755694, "grad_norm": 0.0, "learning_rate": 5.609693179506999e-06, "loss": 18.2079, "step": 257490 }, { "epoch": 0.5201662916082531, "grad_norm": 68.84059143066406, "learning_rate": 5.609346717025738e-06, "loss": 21.0824, "step": 257500 }, { "epoch": 0.5201864922409369, "grad_norm": 223.14898681640625, "learning_rate": 5.609000251574596e-06, "loss": 16.0317, "step": 257510 }, { "epoch": 0.5202066928736208, "grad_norm": 179.80238342285156, "learning_rate": 5.60865378315526e-06, "loss": 22.3056, "step": 257520 }, { "epoch": 0.5202268935063046, "grad_norm": 291.3398132324219, "learning_rate": 5.6083073117694186e-06, "loss": 17.1269, "step": 257530 }, { "epoch": 0.5202470941389884, "grad_norm": 446.208984375, "learning_rate": 5.607960837418763e-06, "loss": 19.1245, "step": 257540 }, { "epoch": 0.5202672947716722, "grad_norm": 300.15203857421875, "learning_rate": 5.6076143601049795e-06, "loss": 11.1601, "step": 257550 }, { "epoch": 0.520287495404356, "grad_norm": 218.55335998535156, "learning_rate": 5.607267879829757e-06, "loss": 13.9812, "step": 257560 }, { "epoch": 0.5203076960370399, "grad_norm": 5.113644599914551, "learning_rate": 5.606921396594785e-06, "loss": 22.6028, "step": 257570 }, { "epoch": 0.5203278966697237, "grad_norm": 71.10938262939453, "learning_rate": 5.60657491040175e-06, "loss": 49.389, "step": 257580 }, { "epoch": 0.5203480973024075, "grad_norm": 324.4174499511719, "learning_rate": 5.606228421252344e-06, "loss": 22.7894, "step": 257590 }, { "epoch": 0.5203682979350913, "grad_norm": 534.28173828125, "learning_rate": 5.605881929148254e-06, "loss": 18.3112, "step": 257600 }, { "epoch": 0.5203884985677751, "grad_norm": 286.17205810546875, "learning_rate": 5.605535434091168e-06, "loss": 18.3809, "step": 257610 }, { "epoch": 0.520408699200459, "grad_norm": 108.36328887939453, "learning_rate": 5.605188936082776e-06, "loss": 14.0884, "step": 257620 }, { "epoch": 0.5204288998331428, "grad_norm": 338.8451843261719, "learning_rate": 5.604842435124769e-06, "loss": 17.7856, "step": 257630 }, { "epoch": 0.5204491004658266, "grad_norm": 295.896240234375, "learning_rate": 5.604495931218831e-06, "loss": 15.9285, "step": 257640 }, { "epoch": 0.5204693010985104, "grad_norm": 583.6438598632812, "learning_rate": 5.604149424366653e-06, "loss": 23.1871, "step": 257650 }, { "epoch": 0.5204895017311942, "grad_norm": 300.85406494140625, "learning_rate": 5.603802914569924e-06, "loss": 13.4596, "step": 257660 }, { "epoch": 0.5205097023638781, "grad_norm": 158.25392150878906, "learning_rate": 5.603456401830333e-06, "loss": 19.3366, "step": 257670 }, { "epoch": 0.5205299029965619, "grad_norm": 896.9717407226562, "learning_rate": 5.60310988614957e-06, "loss": 17.9347, "step": 257680 }, { "epoch": 0.5205501036292457, "grad_norm": 481.591064453125, "learning_rate": 5.60276336752932e-06, "loss": 21.3935, "step": 257690 }, { "epoch": 0.5205703042619295, "grad_norm": 338.22735595703125, "learning_rate": 5.6024168459712765e-06, "loss": 14.1011, "step": 257700 }, { "epoch": 0.5205905048946133, "grad_norm": 110.32855224609375, "learning_rate": 5.602070321477126e-06, "loss": 22.3468, "step": 257710 }, { "epoch": 0.5206107055272972, "grad_norm": 208.44537353515625, "learning_rate": 5.601723794048558e-06, "loss": 12.9933, "step": 257720 }, { "epoch": 0.520630906159981, "grad_norm": 70.29765319824219, "learning_rate": 5.601377263687262e-06, "loss": 13.7772, "step": 257730 }, { "epoch": 0.5206511067926648, "grad_norm": 175.18167114257812, "learning_rate": 5.601030730394923e-06, "loss": 28.2203, "step": 257740 }, { "epoch": 0.5206713074253486, "grad_norm": 327.8354797363281, "learning_rate": 5.600684194173236e-06, "loss": 13.6084, "step": 257750 }, { "epoch": 0.5206915080580323, "grad_norm": 618.5657348632812, "learning_rate": 5.600337655023887e-06, "loss": 39.9682, "step": 257760 }, { "epoch": 0.5207117086907161, "grad_norm": 306.13623046875, "learning_rate": 5.599991112948564e-06, "loss": 15.2657, "step": 257770 }, { "epoch": 0.5207319093234, "grad_norm": 578.1565551757812, "learning_rate": 5.5996445679489566e-06, "loss": 21.5061, "step": 257780 }, { "epoch": 0.5207521099560838, "grad_norm": 9.871747970581055, "learning_rate": 5.599298020026757e-06, "loss": 23.592, "step": 257790 }, { "epoch": 0.5207723105887676, "grad_norm": 191.72775268554688, "learning_rate": 5.598951469183649e-06, "loss": 25.0291, "step": 257800 }, { "epoch": 0.5207925112214514, "grad_norm": 185.57876586914062, "learning_rate": 5.598604915421324e-06, "loss": 11.8952, "step": 257810 }, { "epoch": 0.5208127118541352, "grad_norm": 206.27557373046875, "learning_rate": 5.598258358741472e-06, "loss": 14.4719, "step": 257820 }, { "epoch": 0.5208329124868191, "grad_norm": 329.6721496582031, "learning_rate": 5.597911799145781e-06, "loss": 17.8993, "step": 257830 }, { "epoch": 0.5208531131195029, "grad_norm": 703.06103515625, "learning_rate": 5.597565236635942e-06, "loss": 30.255, "step": 257840 }, { "epoch": 0.5208733137521867, "grad_norm": 275.4310302734375, "learning_rate": 5.59721867121364e-06, "loss": 21.346, "step": 257850 }, { "epoch": 0.5208935143848705, "grad_norm": 239.7360076904297, "learning_rate": 5.596872102880568e-06, "loss": 16.6683, "step": 257860 }, { "epoch": 0.5209137150175543, "grad_norm": 167.33457946777344, "learning_rate": 5.596525531638415e-06, "loss": 16.4424, "step": 257870 }, { "epoch": 0.5209339156502382, "grad_norm": 184.79196166992188, "learning_rate": 5.596178957488867e-06, "loss": 26.0154, "step": 257880 }, { "epoch": 0.520954116282922, "grad_norm": 93.61726379394531, "learning_rate": 5.595832380433616e-06, "loss": 9.9589, "step": 257890 }, { "epoch": 0.5209743169156058, "grad_norm": 41.655155181884766, "learning_rate": 5.59548580047435e-06, "loss": 18.4512, "step": 257900 }, { "epoch": 0.5209945175482896, "grad_norm": 306.0016174316406, "learning_rate": 5.595139217612758e-06, "loss": 12.2167, "step": 257910 }, { "epoch": 0.5210147181809734, "grad_norm": 233.15017700195312, "learning_rate": 5.59479263185053e-06, "loss": 15.9284, "step": 257920 }, { "epoch": 0.5210349188136573, "grad_norm": 223.04916381835938, "learning_rate": 5.594446043189355e-06, "loss": 12.4537, "step": 257930 }, { "epoch": 0.5210551194463411, "grad_norm": 360.2543029785156, "learning_rate": 5.594099451630921e-06, "loss": 17.1084, "step": 257940 }, { "epoch": 0.5210753200790249, "grad_norm": 598.1145629882812, "learning_rate": 5.593752857176921e-06, "loss": 22.486, "step": 257950 }, { "epoch": 0.5210955207117087, "grad_norm": 354.2784729003906, "learning_rate": 5.593406259829038e-06, "loss": 15.3867, "step": 257960 }, { "epoch": 0.5211157213443925, "grad_norm": 112.25636291503906, "learning_rate": 5.593059659588968e-06, "loss": 18.4534, "step": 257970 }, { "epoch": 0.5211359219770764, "grad_norm": 219.10601806640625, "learning_rate": 5.592713056458395e-06, "loss": 18.1768, "step": 257980 }, { "epoch": 0.5211561226097602, "grad_norm": 229.28787231445312, "learning_rate": 5.592366450439012e-06, "loss": 23.8682, "step": 257990 }, { "epoch": 0.521176323242444, "grad_norm": 206.49269104003906, "learning_rate": 5.592019841532507e-06, "loss": 14.9094, "step": 258000 }, { "epoch": 0.5211965238751277, "grad_norm": 395.5790100097656, "learning_rate": 5.591673229740566e-06, "loss": 23.8957, "step": 258010 }, { "epoch": 0.5212167245078115, "grad_norm": 323.26678466796875, "learning_rate": 5.591326615064885e-06, "loss": 17.0221, "step": 258020 }, { "epoch": 0.5212369251404954, "grad_norm": 1685.9793701171875, "learning_rate": 5.590979997507146e-06, "loss": 36.8575, "step": 258030 }, { "epoch": 0.5212571257731792, "grad_norm": 459.6819763183594, "learning_rate": 5.590633377069046e-06, "loss": 18.3521, "step": 258040 }, { "epoch": 0.521277326405863, "grad_norm": 431.72271728515625, "learning_rate": 5.590286753752269e-06, "loss": 18.8781, "step": 258050 }, { "epoch": 0.5212975270385468, "grad_norm": 24.403339385986328, "learning_rate": 5.5899401275585064e-06, "loss": 18.0905, "step": 258060 }, { "epoch": 0.5213177276712306, "grad_norm": 10.3805570602417, "learning_rate": 5.5895934984894476e-06, "loss": 17.6349, "step": 258070 }, { "epoch": 0.5213379283039145, "grad_norm": 467.8337097167969, "learning_rate": 5.58924686654678e-06, "loss": 22.9216, "step": 258080 }, { "epoch": 0.5213581289365983, "grad_norm": 146.01658630371094, "learning_rate": 5.588900231732196e-06, "loss": 9.1328, "step": 258090 }, { "epoch": 0.5213783295692821, "grad_norm": 228.30433654785156, "learning_rate": 5.588553594047382e-06, "loss": 10.849, "step": 258100 }, { "epoch": 0.5213985302019659, "grad_norm": 383.1952819824219, "learning_rate": 5.5882069534940305e-06, "loss": 22.9148, "step": 258110 }, { "epoch": 0.5214187308346497, "grad_norm": 63.40639877319336, "learning_rate": 5.58786031007383e-06, "loss": 10.1885, "step": 258120 }, { "epoch": 0.5214389314673336, "grad_norm": 197.54344177246094, "learning_rate": 5.5875136637884695e-06, "loss": 13.8133, "step": 258130 }, { "epoch": 0.5214591321000174, "grad_norm": 219.2477264404297, "learning_rate": 5.587167014639638e-06, "loss": 10.1996, "step": 258140 }, { "epoch": 0.5214793327327012, "grad_norm": 128.92933654785156, "learning_rate": 5.5868203626290266e-06, "loss": 21.9891, "step": 258150 }, { "epoch": 0.521499533365385, "grad_norm": 204.79119873046875, "learning_rate": 5.586473707758322e-06, "loss": 17.8645, "step": 258160 }, { "epoch": 0.5215197339980688, "grad_norm": 239.46432495117188, "learning_rate": 5.586127050029218e-06, "loss": 19.2068, "step": 258170 }, { "epoch": 0.5215399346307527, "grad_norm": 22.775691986083984, "learning_rate": 5.585780389443401e-06, "loss": 26.7691, "step": 258180 }, { "epoch": 0.5215601352634365, "grad_norm": 283.7246398925781, "learning_rate": 5.58543372600256e-06, "loss": 15.148, "step": 258190 }, { "epoch": 0.5215803358961203, "grad_norm": 428.1148986816406, "learning_rate": 5.585087059708389e-06, "loss": 25.4262, "step": 258200 }, { "epoch": 0.5216005365288041, "grad_norm": 133.83209228515625, "learning_rate": 5.584740390562572e-06, "loss": 8.4814, "step": 258210 }, { "epoch": 0.5216207371614879, "grad_norm": 243.32249450683594, "learning_rate": 5.584393718566802e-06, "loss": 15.0486, "step": 258220 }, { "epoch": 0.5216409377941718, "grad_norm": 453.9349365234375, "learning_rate": 5.584047043722768e-06, "loss": 25.0605, "step": 258230 }, { "epoch": 0.5216611384268556, "grad_norm": 431.14642333984375, "learning_rate": 5.5837003660321596e-06, "loss": 22.8897, "step": 258240 }, { "epoch": 0.5216813390595394, "grad_norm": 19.204057693481445, "learning_rate": 5.5833536854966665e-06, "loss": 10.4483, "step": 258250 }, { "epoch": 0.5217015396922232, "grad_norm": 281.699951171875, "learning_rate": 5.5830070021179785e-06, "loss": 11.6095, "step": 258260 }, { "epoch": 0.5217217403249069, "grad_norm": 601.601318359375, "learning_rate": 5.582660315897785e-06, "loss": 33.8348, "step": 258270 }, { "epoch": 0.5217419409575907, "grad_norm": 119.75731658935547, "learning_rate": 5.582313626837776e-06, "loss": 26.966, "step": 258280 }, { "epoch": 0.5217621415902746, "grad_norm": 267.8027648925781, "learning_rate": 5.58196693493964e-06, "loss": 14.4029, "step": 258290 }, { "epoch": 0.5217823422229584, "grad_norm": 229.92770385742188, "learning_rate": 5.581620240205068e-06, "loss": 19.896, "step": 258300 }, { "epoch": 0.5218025428556422, "grad_norm": 104.79776763916016, "learning_rate": 5.58127354263575e-06, "loss": 15.3973, "step": 258310 }, { "epoch": 0.521822743488326, "grad_norm": 382.8450622558594, "learning_rate": 5.580926842233375e-06, "loss": 15.4507, "step": 258320 }, { "epoch": 0.5218429441210098, "grad_norm": 246.13720703125, "learning_rate": 5.580580138999633e-06, "loss": 22.7874, "step": 258330 }, { "epoch": 0.5218631447536937, "grad_norm": 310.72943115234375, "learning_rate": 5.580233432936215e-06, "loss": 17.8477, "step": 258340 }, { "epoch": 0.5218833453863775, "grad_norm": 17.691062927246094, "learning_rate": 5.5798867240448075e-06, "loss": 13.0689, "step": 258350 }, { "epoch": 0.5219035460190613, "grad_norm": 260.335205078125, "learning_rate": 5.579540012327103e-06, "loss": 16.5898, "step": 258360 }, { "epoch": 0.5219237466517451, "grad_norm": 626.1919555664062, "learning_rate": 5.579193297784792e-06, "loss": 15.4365, "step": 258370 }, { "epoch": 0.5219439472844289, "grad_norm": 26.44988441467285, "learning_rate": 5.578846580419562e-06, "loss": 15.4676, "step": 258380 }, { "epoch": 0.5219641479171128, "grad_norm": 174.64190673828125, "learning_rate": 5.578499860233104e-06, "loss": 23.8875, "step": 258390 }, { "epoch": 0.5219843485497966, "grad_norm": 304.61273193359375, "learning_rate": 5.578153137227109e-06, "loss": 9.2644, "step": 258400 }, { "epoch": 0.5220045491824804, "grad_norm": 151.9954071044922, "learning_rate": 5.577806411403265e-06, "loss": 5.3574, "step": 258410 }, { "epoch": 0.5220247498151642, "grad_norm": 131.85997009277344, "learning_rate": 5.577459682763262e-06, "loss": 14.5437, "step": 258420 }, { "epoch": 0.522044950447848, "grad_norm": 521.451171875, "learning_rate": 5.577112951308792e-06, "loss": 30.0304, "step": 258430 }, { "epoch": 0.5220651510805319, "grad_norm": 248.85638427734375, "learning_rate": 5.576766217041541e-06, "loss": 15.9374, "step": 258440 }, { "epoch": 0.5220853517132157, "grad_norm": 513.6180419921875, "learning_rate": 5.576419479963204e-06, "loss": 22.3538, "step": 258450 }, { "epoch": 0.5221055523458995, "grad_norm": 314.9533386230469, "learning_rate": 5.576072740075467e-06, "loss": 14.6264, "step": 258460 }, { "epoch": 0.5221257529785833, "grad_norm": 251.08273315429688, "learning_rate": 5.575725997380023e-06, "loss": 23.772, "step": 258470 }, { "epoch": 0.5221459536112671, "grad_norm": 313.2798767089844, "learning_rate": 5.575379251878558e-06, "loss": 24.2623, "step": 258480 }, { "epoch": 0.522166154243951, "grad_norm": 226.98533630371094, "learning_rate": 5.575032503572765e-06, "loss": 25.2634, "step": 258490 }, { "epoch": 0.5221863548766348, "grad_norm": 30.95145034790039, "learning_rate": 5.5746857524643335e-06, "loss": 8.902, "step": 258500 }, { "epoch": 0.5222065555093186, "grad_norm": 383.521240234375, "learning_rate": 5.5743389985549535e-06, "loss": 20.8218, "step": 258510 }, { "epoch": 0.5222267561420023, "grad_norm": 39.35731887817383, "learning_rate": 5.573992241846315e-06, "loss": 10.7238, "step": 258520 }, { "epoch": 0.5222469567746861, "grad_norm": 448.3585205078125, "learning_rate": 5.573645482340107e-06, "loss": 17.9701, "step": 258530 }, { "epoch": 0.52226715740737, "grad_norm": 325.30145263671875, "learning_rate": 5.573298720038022e-06, "loss": 8.8879, "step": 258540 }, { "epoch": 0.5222873580400538, "grad_norm": 147.61260986328125, "learning_rate": 5.572951954941748e-06, "loss": 18.6196, "step": 258550 }, { "epoch": 0.5223075586727376, "grad_norm": 244.13461303710938, "learning_rate": 5.572605187052975e-06, "loss": 10.8128, "step": 258560 }, { "epoch": 0.5223277593054214, "grad_norm": 124.64093017578125, "learning_rate": 5.572258416373394e-06, "loss": 11.3584, "step": 258570 }, { "epoch": 0.5223479599381052, "grad_norm": 262.4112243652344, "learning_rate": 5.571911642904696e-06, "loss": 19.2767, "step": 258580 }, { "epoch": 0.522368160570789, "grad_norm": 126.02333068847656, "learning_rate": 5.571564866648569e-06, "loss": 12.8612, "step": 258590 }, { "epoch": 0.5223883612034729, "grad_norm": 120.83259582519531, "learning_rate": 5.5712180876067045e-06, "loss": 15.3122, "step": 258600 }, { "epoch": 0.5224085618361567, "grad_norm": 421.7219543457031, "learning_rate": 5.570871305780793e-06, "loss": 21.0374, "step": 258610 }, { "epoch": 0.5224287624688405, "grad_norm": 134.56985473632812, "learning_rate": 5.570524521172523e-06, "loss": 14.632, "step": 258620 }, { "epoch": 0.5224489631015243, "grad_norm": 237.55068969726562, "learning_rate": 5.570177733783586e-06, "loss": 20.6917, "step": 258630 }, { "epoch": 0.5224691637342082, "grad_norm": 219.10423278808594, "learning_rate": 5.56983094361567e-06, "loss": 8.4288, "step": 258640 }, { "epoch": 0.522489364366892, "grad_norm": 290.8097229003906, "learning_rate": 5.56948415067047e-06, "loss": 33.5521, "step": 258650 }, { "epoch": 0.5225095649995758, "grad_norm": 139.68862915039062, "learning_rate": 5.569137354949672e-06, "loss": 23.9473, "step": 258660 }, { "epoch": 0.5225297656322596, "grad_norm": 300.38153076171875, "learning_rate": 5.568790556454967e-06, "loss": 9.7815, "step": 258670 }, { "epoch": 0.5225499662649434, "grad_norm": 222.15321350097656, "learning_rate": 5.568443755188048e-06, "loss": 4.8133, "step": 258680 }, { "epoch": 0.5225701668976273, "grad_norm": 492.2003173828125, "learning_rate": 5.568096951150601e-06, "loss": 18.6229, "step": 258690 }, { "epoch": 0.5225903675303111, "grad_norm": 287.6752014160156, "learning_rate": 5.567750144344318e-06, "loss": 15.7764, "step": 258700 }, { "epoch": 0.5226105681629949, "grad_norm": 179.54063415527344, "learning_rate": 5.567403334770891e-06, "loss": 12.819, "step": 258710 }, { "epoch": 0.5226307687956787, "grad_norm": 108.2694320678711, "learning_rate": 5.567056522432008e-06, "loss": 15.1331, "step": 258720 }, { "epoch": 0.5226509694283625, "grad_norm": 648.162841796875, "learning_rate": 5.5667097073293605e-06, "loss": 30.4294, "step": 258730 }, { "epoch": 0.5226711700610464, "grad_norm": 7.009792327880859, "learning_rate": 5.56636288946464e-06, "loss": 28.7157, "step": 258740 }, { "epoch": 0.5226913706937302, "grad_norm": 397.51806640625, "learning_rate": 5.566016068839535e-06, "loss": 20.2085, "step": 258750 }, { "epoch": 0.522711571326414, "grad_norm": 588.4298095703125, "learning_rate": 5.565669245455735e-06, "loss": 14.4779, "step": 258760 }, { "epoch": 0.5227317719590978, "grad_norm": 287.2068176269531, "learning_rate": 5.565322419314933e-06, "loss": 21.1831, "step": 258770 }, { "epoch": 0.5227519725917815, "grad_norm": 375.7474365234375, "learning_rate": 5.564975590418816e-06, "loss": 36.9643, "step": 258780 }, { "epoch": 0.5227721732244653, "grad_norm": 457.03533935546875, "learning_rate": 5.564628758769079e-06, "loss": 30.9986, "step": 258790 }, { "epoch": 0.5227923738571492, "grad_norm": 442.6890869140625, "learning_rate": 5.5642819243674085e-06, "loss": 21.5062, "step": 258800 }, { "epoch": 0.522812574489833, "grad_norm": 540.66552734375, "learning_rate": 5.563935087215497e-06, "loss": 20.3579, "step": 258810 }, { "epoch": 0.5228327751225168, "grad_norm": 273.9793395996094, "learning_rate": 5.563588247315035e-06, "loss": 22.634, "step": 258820 }, { "epoch": 0.5228529757552006, "grad_norm": 48.061866760253906, "learning_rate": 5.563241404667711e-06, "loss": 11.5423, "step": 258830 }, { "epoch": 0.5228731763878844, "grad_norm": 162.49380493164062, "learning_rate": 5.562894559275216e-06, "loss": 21.3753, "step": 258840 }, { "epoch": 0.5228933770205683, "grad_norm": 494.9394836425781, "learning_rate": 5.562547711139243e-06, "loss": 16.9446, "step": 258850 }, { "epoch": 0.5229135776532521, "grad_norm": 146.72763061523438, "learning_rate": 5.562200860261481e-06, "loss": 14.9497, "step": 258860 }, { "epoch": 0.5229337782859359, "grad_norm": 230.59164428710938, "learning_rate": 5.5618540066436174e-06, "loss": 6.7164, "step": 258870 }, { "epoch": 0.5229539789186197, "grad_norm": 443.7369079589844, "learning_rate": 5.561507150287347e-06, "loss": 22.095, "step": 258880 }, { "epoch": 0.5229741795513035, "grad_norm": 378.72515869140625, "learning_rate": 5.56116029119436e-06, "loss": 28.066, "step": 258890 }, { "epoch": 0.5229943801839874, "grad_norm": 183.24334716796875, "learning_rate": 5.560813429366345e-06, "loss": 20.3704, "step": 258900 }, { "epoch": 0.5230145808166712, "grad_norm": 824.0833740234375, "learning_rate": 5.560466564804993e-06, "loss": 19.7306, "step": 258910 }, { "epoch": 0.523034781449355, "grad_norm": 188.2755584716797, "learning_rate": 5.560119697511995e-06, "loss": 9.9228, "step": 258920 }, { "epoch": 0.5230549820820388, "grad_norm": 71.25138092041016, "learning_rate": 5.559772827489042e-06, "loss": 23.7861, "step": 258930 }, { "epoch": 0.5230751827147226, "grad_norm": 7.063268184661865, "learning_rate": 5.559425954737824e-06, "loss": 13.3775, "step": 258940 }, { "epoch": 0.5230953833474065, "grad_norm": 216.2054443359375, "learning_rate": 5.559079079260032e-06, "loss": 15.2912, "step": 258950 }, { "epoch": 0.5231155839800903, "grad_norm": 465.7315368652344, "learning_rate": 5.558732201057355e-06, "loss": 13.8085, "step": 258960 }, { "epoch": 0.5231357846127741, "grad_norm": 200.19886779785156, "learning_rate": 5.558385320131487e-06, "loss": 16.1144, "step": 258970 }, { "epoch": 0.5231559852454579, "grad_norm": 153.0007781982422, "learning_rate": 5.558038436484116e-06, "loss": 19.8735, "step": 258980 }, { "epoch": 0.5231761858781417, "grad_norm": 326.7191467285156, "learning_rate": 5.5576915501169314e-06, "loss": 16.1709, "step": 258990 }, { "epoch": 0.5231963865108256, "grad_norm": 145.03091430664062, "learning_rate": 5.557344661031628e-06, "loss": 9.9621, "step": 259000 }, { "epoch": 0.5232165871435094, "grad_norm": 235.9960174560547, "learning_rate": 5.556997769229893e-06, "loss": 26.1863, "step": 259010 }, { "epoch": 0.5232367877761932, "grad_norm": 193.3775634765625, "learning_rate": 5.556650874713421e-06, "loss": 34.9024, "step": 259020 }, { "epoch": 0.5232569884088769, "grad_norm": 146.57818603515625, "learning_rate": 5.556303977483898e-06, "loss": 12.4339, "step": 259030 }, { "epoch": 0.5232771890415607, "grad_norm": 201.85423278808594, "learning_rate": 5.555957077543016e-06, "loss": 20.4893, "step": 259040 }, { "epoch": 0.5232973896742446, "grad_norm": 400.77886962890625, "learning_rate": 5.555610174892468e-06, "loss": 17.7073, "step": 259050 }, { "epoch": 0.5233175903069284, "grad_norm": 157.2223358154297, "learning_rate": 5.555263269533945e-06, "loss": 19.5089, "step": 259060 }, { "epoch": 0.5233377909396122, "grad_norm": 173.49676513671875, "learning_rate": 5.554916361469133e-06, "loss": 13.6633, "step": 259070 }, { "epoch": 0.523357991572296, "grad_norm": 239.4381103515625, "learning_rate": 5.554569450699727e-06, "loss": 12.9794, "step": 259080 }, { "epoch": 0.5233781922049798, "grad_norm": 323.8832702636719, "learning_rate": 5.554222537227417e-06, "loss": 25.8988, "step": 259090 }, { "epoch": 0.5233983928376637, "grad_norm": 574.66552734375, "learning_rate": 5.553875621053893e-06, "loss": 23.6245, "step": 259100 }, { "epoch": 0.5234185934703475, "grad_norm": 35.68470764160156, "learning_rate": 5.553528702180848e-06, "loss": 10.4686, "step": 259110 }, { "epoch": 0.5234387941030313, "grad_norm": 256.20672607421875, "learning_rate": 5.55318178060997e-06, "loss": 21.1911, "step": 259120 }, { "epoch": 0.5234589947357151, "grad_norm": 105.2943344116211, "learning_rate": 5.5528348563429524e-06, "loss": 19.1858, "step": 259130 }, { "epoch": 0.5234791953683989, "grad_norm": 63.11040115356445, "learning_rate": 5.552487929381484e-06, "loss": 14.1296, "step": 259140 }, { "epoch": 0.5234993960010828, "grad_norm": 187.67665100097656, "learning_rate": 5.552140999727256e-06, "loss": 14.5744, "step": 259150 }, { "epoch": 0.5235195966337666, "grad_norm": 26.45557975769043, "learning_rate": 5.551794067381959e-06, "loss": 26.2525, "step": 259160 }, { "epoch": 0.5235397972664504, "grad_norm": 419.6767272949219, "learning_rate": 5.551447132347286e-06, "loss": 18.1766, "step": 259170 }, { "epoch": 0.5235599978991342, "grad_norm": 152.57034301757812, "learning_rate": 5.551100194624925e-06, "loss": 19.302, "step": 259180 }, { "epoch": 0.523580198531818, "grad_norm": 64.94535064697266, "learning_rate": 5.5507532542165706e-06, "loss": 21.4099, "step": 259190 }, { "epoch": 0.5236003991645019, "grad_norm": 146.96109008789062, "learning_rate": 5.5504063111239116e-06, "loss": 16.0693, "step": 259200 }, { "epoch": 0.5236205997971857, "grad_norm": 189.69027709960938, "learning_rate": 5.550059365348638e-06, "loss": 14.8267, "step": 259210 }, { "epoch": 0.5236408004298695, "grad_norm": 317.4049072265625, "learning_rate": 5.549712416892442e-06, "loss": 9.34, "step": 259220 }, { "epoch": 0.5236610010625533, "grad_norm": 617.5248413085938, "learning_rate": 5.549365465757013e-06, "loss": 36.3037, "step": 259230 }, { "epoch": 0.5236812016952371, "grad_norm": 271.62249755859375, "learning_rate": 5.549018511944046e-06, "loss": 21.3517, "step": 259240 }, { "epoch": 0.523701402327921, "grad_norm": 246.02162170410156, "learning_rate": 5.548671555455226e-06, "loss": 46.1965, "step": 259250 }, { "epoch": 0.5237216029606048, "grad_norm": 193.2731170654297, "learning_rate": 5.548324596292251e-06, "loss": 34.6661, "step": 259260 }, { "epoch": 0.5237418035932886, "grad_norm": 306.77423095703125, "learning_rate": 5.547977634456806e-06, "loss": 24.1056, "step": 259270 }, { "epoch": 0.5237620042259724, "grad_norm": 279.6055908203125, "learning_rate": 5.547630669950585e-06, "loss": 23.1889, "step": 259280 }, { "epoch": 0.5237822048586561, "grad_norm": 318.9342956542969, "learning_rate": 5.547283702775279e-06, "loss": 20.7575, "step": 259290 }, { "epoch": 0.5238024054913399, "grad_norm": 44.0008544921875, "learning_rate": 5.546936732932578e-06, "loss": 8.5509, "step": 259300 }, { "epoch": 0.5238226061240238, "grad_norm": 803.6051025390625, "learning_rate": 5.546589760424175e-06, "loss": 15.9815, "step": 259310 }, { "epoch": 0.5238428067567076, "grad_norm": 194.72097778320312, "learning_rate": 5.5462427852517585e-06, "loss": 25.2729, "step": 259320 }, { "epoch": 0.5238630073893914, "grad_norm": 264.0461120605469, "learning_rate": 5.545895807417021e-06, "loss": 11.9179, "step": 259330 }, { "epoch": 0.5238832080220752, "grad_norm": 487.90301513671875, "learning_rate": 5.545548826921653e-06, "loss": 19.2199, "step": 259340 }, { "epoch": 0.523903408654759, "grad_norm": 221.6381072998047, "learning_rate": 5.545201843767348e-06, "loss": 19.203, "step": 259350 }, { "epoch": 0.5239236092874429, "grad_norm": 199.18655395507812, "learning_rate": 5.544854857955795e-06, "loss": 18.7355, "step": 259360 }, { "epoch": 0.5239438099201267, "grad_norm": 154.98268127441406, "learning_rate": 5.544507869488684e-06, "loss": 14.3678, "step": 259370 }, { "epoch": 0.5239640105528105, "grad_norm": 165.74281311035156, "learning_rate": 5.544160878367709e-06, "loss": 24.8527, "step": 259380 }, { "epoch": 0.5239842111854943, "grad_norm": 325.2137145996094, "learning_rate": 5.543813884594559e-06, "loss": 16.5163, "step": 259390 }, { "epoch": 0.5240044118181781, "grad_norm": 412.4348449707031, "learning_rate": 5.543466888170927e-06, "loss": 27.4612, "step": 259400 }, { "epoch": 0.524024612450862, "grad_norm": 111.1220474243164, "learning_rate": 5.5431198890985014e-06, "loss": 5.9065, "step": 259410 }, { "epoch": 0.5240448130835458, "grad_norm": 587.433349609375, "learning_rate": 5.542772887378978e-06, "loss": 19.0629, "step": 259420 }, { "epoch": 0.5240650137162296, "grad_norm": 289.19561767578125, "learning_rate": 5.5424258830140434e-06, "loss": 17.2366, "step": 259430 }, { "epoch": 0.5240852143489134, "grad_norm": 266.0487060546875, "learning_rate": 5.542078876005391e-06, "loss": 46.3919, "step": 259440 }, { "epoch": 0.5241054149815972, "grad_norm": 215.0181121826172, "learning_rate": 5.541731866354713e-06, "loss": 9.9413, "step": 259450 }, { "epoch": 0.5241256156142811, "grad_norm": 156.77053833007812, "learning_rate": 5.5413848540637e-06, "loss": 19.7301, "step": 259460 }, { "epoch": 0.5241458162469649, "grad_norm": 275.2035217285156, "learning_rate": 5.541037839134041e-06, "loss": 25.1224, "step": 259470 }, { "epoch": 0.5241660168796487, "grad_norm": 219.26095581054688, "learning_rate": 5.5406908215674306e-06, "loss": 13.8287, "step": 259480 }, { "epoch": 0.5241862175123325, "grad_norm": 146.04112243652344, "learning_rate": 5.5403438013655575e-06, "loss": 15.2955, "step": 259490 }, { "epoch": 0.5242064181450163, "grad_norm": 220.6439666748047, "learning_rate": 5.539996778530114e-06, "loss": 26.7504, "step": 259500 }, { "epoch": 0.5242266187777002, "grad_norm": 562.5277709960938, "learning_rate": 5.539649753062795e-06, "loss": 24.6788, "step": 259510 }, { "epoch": 0.524246819410384, "grad_norm": 334.96044921875, "learning_rate": 5.5393027249652844e-06, "loss": 26.8602, "step": 259520 }, { "epoch": 0.5242670200430678, "grad_norm": 311.2909240722656, "learning_rate": 5.5389556942392794e-06, "loss": 18.3157, "step": 259530 }, { "epoch": 0.5242872206757516, "grad_norm": 171.2403564453125, "learning_rate": 5.538608660886471e-06, "loss": 14.9653, "step": 259540 }, { "epoch": 0.5243074213084353, "grad_norm": 12.023273468017578, "learning_rate": 5.5382616249085476e-06, "loss": 19.617, "step": 259550 }, { "epoch": 0.5243276219411191, "grad_norm": 125.25318145751953, "learning_rate": 5.537914586307204e-06, "loss": 19.8828, "step": 259560 }, { "epoch": 0.524347822573803, "grad_norm": 315.923828125, "learning_rate": 5.537567545084127e-06, "loss": 20.4686, "step": 259570 }, { "epoch": 0.5243680232064868, "grad_norm": 143.9354248046875, "learning_rate": 5.537220501241014e-06, "loss": 12.8849, "step": 259580 }, { "epoch": 0.5243882238391706, "grad_norm": 215.9612579345703, "learning_rate": 5.536873454779552e-06, "loss": 18.5672, "step": 259590 }, { "epoch": 0.5244084244718544, "grad_norm": 462.2752990722656, "learning_rate": 5.536526405701433e-06, "loss": 14.9686, "step": 259600 }, { "epoch": 0.5244286251045382, "grad_norm": 229.48280334472656, "learning_rate": 5.536179354008351e-06, "loss": 5.8511, "step": 259610 }, { "epoch": 0.5244488257372221, "grad_norm": 18.511125564575195, "learning_rate": 5.5358322997019955e-06, "loss": 8.9462, "step": 259620 }, { "epoch": 0.5244690263699059, "grad_norm": 322.1188049316406, "learning_rate": 5.535485242784059e-06, "loss": 8.8003, "step": 259630 }, { "epoch": 0.5244892270025897, "grad_norm": 189.4031524658203, "learning_rate": 5.5351381832562316e-06, "loss": 22.3319, "step": 259640 }, { "epoch": 0.5245094276352735, "grad_norm": 20.326679229736328, "learning_rate": 5.534791121120205e-06, "loss": 21.7279, "step": 259650 }, { "epoch": 0.5245296282679573, "grad_norm": 8.89669132232666, "learning_rate": 5.534444056377671e-06, "loss": 34.2057, "step": 259660 }, { "epoch": 0.5245498289006412, "grad_norm": 220.15574645996094, "learning_rate": 5.534096989030324e-06, "loss": 19.3922, "step": 259670 }, { "epoch": 0.524570029533325, "grad_norm": 244.3562469482422, "learning_rate": 5.53374991907985e-06, "loss": 20.6864, "step": 259680 }, { "epoch": 0.5245902301660088, "grad_norm": 650.185546875, "learning_rate": 5.533402846527947e-06, "loss": 24.2801, "step": 259690 }, { "epoch": 0.5246104307986926, "grad_norm": 288.3074951171875, "learning_rate": 5.5330557713763e-06, "loss": 11.3768, "step": 259700 }, { "epoch": 0.5246306314313764, "grad_norm": 242.17684936523438, "learning_rate": 5.532708693626605e-06, "loss": 9.9874, "step": 259710 }, { "epoch": 0.5246508320640603, "grad_norm": 385.7685241699219, "learning_rate": 5.5323616132805536e-06, "loss": 16.1425, "step": 259720 }, { "epoch": 0.5246710326967441, "grad_norm": 94.9322509765625, "learning_rate": 5.532014530339834e-06, "loss": 36.6531, "step": 259730 }, { "epoch": 0.5246912333294279, "grad_norm": 383.5684814453125, "learning_rate": 5.531667444806142e-06, "loss": 17.1207, "step": 259740 }, { "epoch": 0.5247114339621117, "grad_norm": 341.15679931640625, "learning_rate": 5.5313203566811666e-06, "loss": 11.6147, "step": 259750 }, { "epoch": 0.5247316345947955, "grad_norm": 51.64564895629883, "learning_rate": 5.5309732659666e-06, "loss": 13.8328, "step": 259760 }, { "epoch": 0.5247518352274794, "grad_norm": 135.75830078125, "learning_rate": 5.530626172664135e-06, "loss": 15.8425, "step": 259770 }, { "epoch": 0.5247720358601632, "grad_norm": 123.22210693359375, "learning_rate": 5.530279076775461e-06, "loss": 21.0087, "step": 259780 }, { "epoch": 0.524792236492847, "grad_norm": 184.75772094726562, "learning_rate": 5.529931978302272e-06, "loss": 22.3714, "step": 259790 }, { "epoch": 0.5248124371255307, "grad_norm": 249.99517822265625, "learning_rate": 5.52958487724626e-06, "loss": 25.6398, "step": 259800 }, { "epoch": 0.5248326377582145, "grad_norm": 635.0875244140625, "learning_rate": 5.529237773609114e-06, "loss": 23.4563, "step": 259810 }, { "epoch": 0.5248528383908984, "grad_norm": 141.38330078125, "learning_rate": 5.528890667392527e-06, "loss": 45.8893, "step": 259820 }, { "epoch": 0.5248730390235822, "grad_norm": 406.303955078125, "learning_rate": 5.528543558598193e-06, "loss": 13.9894, "step": 259830 }, { "epoch": 0.524893239656266, "grad_norm": 71.26693725585938, "learning_rate": 5.528196447227798e-06, "loss": 15.8075, "step": 259840 }, { "epoch": 0.5249134402889498, "grad_norm": 117.57123565673828, "learning_rate": 5.527849333283042e-06, "loss": 23.8734, "step": 259850 }, { "epoch": 0.5249336409216336, "grad_norm": 428.620361328125, "learning_rate": 5.527502216765609e-06, "loss": 17.5253, "step": 259860 }, { "epoch": 0.5249538415543175, "grad_norm": 99.49028015136719, "learning_rate": 5.527155097677196e-06, "loss": 15.6156, "step": 259870 }, { "epoch": 0.5249740421870013, "grad_norm": 388.1944580078125, "learning_rate": 5.526807976019492e-06, "loss": 17.0942, "step": 259880 }, { "epoch": 0.5249942428196851, "grad_norm": 467.234619140625, "learning_rate": 5.526460851794191e-06, "loss": 19.0759, "step": 259890 }, { "epoch": 0.5250144434523689, "grad_norm": 58.666969299316406, "learning_rate": 5.526113725002984e-06, "loss": 12.6998, "step": 259900 }, { "epoch": 0.5250346440850527, "grad_norm": 140.40518188476562, "learning_rate": 5.525766595647561e-06, "loss": 14.2474, "step": 259910 }, { "epoch": 0.5250548447177366, "grad_norm": 20.88547706604004, "learning_rate": 5.525419463729615e-06, "loss": 13.4322, "step": 259920 }, { "epoch": 0.5250750453504204, "grad_norm": 653.6849975585938, "learning_rate": 5.525072329250839e-06, "loss": 12.2083, "step": 259930 }, { "epoch": 0.5250952459831042, "grad_norm": 671.771728515625, "learning_rate": 5.524725192212924e-06, "loss": 28.857, "step": 259940 }, { "epoch": 0.525115446615788, "grad_norm": 334.3973388671875, "learning_rate": 5.524378052617563e-06, "loss": 17.3562, "step": 259950 }, { "epoch": 0.5251356472484718, "grad_norm": 7.760514736175537, "learning_rate": 5.524030910466447e-06, "loss": 18.6441, "step": 259960 }, { "epoch": 0.5251558478811557, "grad_norm": 314.5237121582031, "learning_rate": 5.523683765761266e-06, "loss": 18.802, "step": 259970 }, { "epoch": 0.5251760485138395, "grad_norm": 304.3259582519531, "learning_rate": 5.523336618503715e-06, "loss": 21.3644, "step": 259980 }, { "epoch": 0.5251962491465233, "grad_norm": 221.23440551757812, "learning_rate": 5.522989468695487e-06, "loss": 24.9545, "step": 259990 }, { "epoch": 0.5252164497792071, "grad_norm": 267.3448486328125, "learning_rate": 5.522642316338268e-06, "loss": 13.7019, "step": 260000 }, { "epoch": 0.5252366504118909, "grad_norm": 617.1513671875, "learning_rate": 5.5222951614337564e-06, "loss": 25.0066, "step": 260010 }, { "epoch": 0.5252568510445748, "grad_norm": 203.33140563964844, "learning_rate": 5.521948003983639e-06, "loss": 12.1865, "step": 260020 }, { "epoch": 0.5252770516772586, "grad_norm": 761.9230346679688, "learning_rate": 5.521600843989613e-06, "loss": 21.656, "step": 260030 }, { "epoch": 0.5252972523099424, "grad_norm": 83.050537109375, "learning_rate": 5.521253681453366e-06, "loss": 20.3212, "step": 260040 }, { "epoch": 0.5253174529426262, "grad_norm": 277.6653747558594, "learning_rate": 5.520906516376592e-06, "loss": 9.0875, "step": 260050 }, { "epoch": 0.5253376535753099, "grad_norm": 117.86801147460938, "learning_rate": 5.520559348760984e-06, "loss": 22.3141, "step": 260060 }, { "epoch": 0.5253578542079937, "grad_norm": 30.09228515625, "learning_rate": 5.520212178608231e-06, "loss": 7.2699, "step": 260070 }, { "epoch": 0.5253780548406776, "grad_norm": 48.2441291809082, "learning_rate": 5.519865005920029e-06, "loss": 9.1876, "step": 260080 }, { "epoch": 0.5253982554733614, "grad_norm": 228.25572204589844, "learning_rate": 5.519517830698067e-06, "loss": 8.1684, "step": 260090 }, { "epoch": 0.5254184561060452, "grad_norm": 396.6614990234375, "learning_rate": 5.519170652944037e-06, "loss": 34.1241, "step": 260100 }, { "epoch": 0.525438656738729, "grad_norm": 465.3713684082031, "learning_rate": 5.518823472659634e-06, "loss": 20.6011, "step": 260110 }, { "epoch": 0.5254588573714128, "grad_norm": 297.3875427246094, "learning_rate": 5.518476289846548e-06, "loss": 15.8737, "step": 260120 }, { "epoch": 0.5254790580040967, "grad_norm": 363.44073486328125, "learning_rate": 5.518129104506471e-06, "loss": 18.7655, "step": 260130 }, { "epoch": 0.5254992586367805, "grad_norm": 275.0386962890625, "learning_rate": 5.5177819166410955e-06, "loss": 16.0229, "step": 260140 }, { "epoch": 0.5255194592694643, "grad_norm": 244.6927032470703, "learning_rate": 5.517434726252113e-06, "loss": 10.5713, "step": 260150 }, { "epoch": 0.5255396599021481, "grad_norm": 60.386268615722656, "learning_rate": 5.5170875333412176e-06, "loss": 20.076, "step": 260160 }, { "epoch": 0.525559860534832, "grad_norm": 105.25911712646484, "learning_rate": 5.516740337910101e-06, "loss": 9.4213, "step": 260170 }, { "epoch": 0.5255800611675158, "grad_norm": 292.5667724609375, "learning_rate": 5.516393139960452e-06, "loss": 19.9574, "step": 260180 }, { "epoch": 0.5256002618001996, "grad_norm": 563.6917114257812, "learning_rate": 5.516045939493968e-06, "loss": 22.946, "step": 260190 }, { "epoch": 0.5256204624328834, "grad_norm": 619.274169921875, "learning_rate": 5.515698736512337e-06, "loss": 29.9974, "step": 260200 }, { "epoch": 0.5256406630655672, "grad_norm": 209.61618041992188, "learning_rate": 5.515351531017254e-06, "loss": 24.0471, "step": 260210 }, { "epoch": 0.525660863698251, "grad_norm": 468.81939697265625, "learning_rate": 5.51500432301041e-06, "loss": 32.248, "step": 260220 }, { "epoch": 0.5256810643309349, "grad_norm": 202.190185546875, "learning_rate": 5.514657112493497e-06, "loss": 19.3793, "step": 260230 }, { "epoch": 0.5257012649636187, "grad_norm": 170.1098175048828, "learning_rate": 5.514309899468209e-06, "loss": 20.8298, "step": 260240 }, { "epoch": 0.5257214655963025, "grad_norm": 434.9039001464844, "learning_rate": 5.513962683936235e-06, "loss": 16.4601, "step": 260250 }, { "epoch": 0.5257416662289863, "grad_norm": 335.7632141113281, "learning_rate": 5.51361546589927e-06, "loss": 25.1906, "step": 260260 }, { "epoch": 0.5257618668616701, "grad_norm": 392.336181640625, "learning_rate": 5.513268245359005e-06, "loss": 14.5847, "step": 260270 }, { "epoch": 0.525782067494354, "grad_norm": 225.10400390625, "learning_rate": 5.512921022317135e-06, "loss": 27.6648, "step": 260280 }, { "epoch": 0.5258022681270378, "grad_norm": 293.0561218261719, "learning_rate": 5.512573796775347e-06, "loss": 24.6085, "step": 260290 }, { "epoch": 0.5258224687597216, "grad_norm": 141.6270294189453, "learning_rate": 5.512226568735338e-06, "loss": 21.7468, "step": 260300 }, { "epoch": 0.5258426693924053, "grad_norm": 294.03857421875, "learning_rate": 5.5118793381987985e-06, "loss": 22.3551, "step": 260310 }, { "epoch": 0.5258628700250891, "grad_norm": 200.8387908935547, "learning_rate": 5.511532105167422e-06, "loss": 13.7207, "step": 260320 }, { "epoch": 0.525883070657773, "grad_norm": 374.4164123535156, "learning_rate": 5.5111848696429005e-06, "loss": 7.1407, "step": 260330 }, { "epoch": 0.5259032712904568, "grad_norm": 212.82931518554688, "learning_rate": 5.510837631626923e-06, "loss": 5.3801, "step": 260340 }, { "epoch": 0.5259234719231406, "grad_norm": 449.7841796875, "learning_rate": 5.510490391121188e-06, "loss": 17.3518, "step": 260350 }, { "epoch": 0.5259436725558244, "grad_norm": 188.1182098388672, "learning_rate": 5.510143148127384e-06, "loss": 13.9168, "step": 260360 }, { "epoch": 0.5259638731885082, "grad_norm": 173.06532287597656, "learning_rate": 5.509795902647203e-06, "loss": 22.3141, "step": 260370 }, { "epoch": 0.525984073821192, "grad_norm": 574.2514038085938, "learning_rate": 5.509448654682339e-06, "loss": 22.9346, "step": 260380 }, { "epoch": 0.5260042744538759, "grad_norm": 201.17144775390625, "learning_rate": 5.509101404234485e-06, "loss": 10.4287, "step": 260390 }, { "epoch": 0.5260244750865597, "grad_norm": 212.7655029296875, "learning_rate": 5.508754151305332e-06, "loss": 23.4351, "step": 260400 }, { "epoch": 0.5260446757192435, "grad_norm": 437.1439514160156, "learning_rate": 5.508406895896573e-06, "loss": 21.1035, "step": 260410 }, { "epoch": 0.5260648763519273, "grad_norm": 171.0532989501953, "learning_rate": 5.5080596380099e-06, "loss": 14.8553, "step": 260420 }, { "epoch": 0.5260850769846112, "grad_norm": 266.5224304199219, "learning_rate": 5.507712377647006e-06, "loss": 10.0713, "step": 260430 }, { "epoch": 0.526105277617295, "grad_norm": 215.9308319091797, "learning_rate": 5.507365114809585e-06, "loss": 27.6167, "step": 260440 }, { "epoch": 0.5261254782499788, "grad_norm": 619.4093627929688, "learning_rate": 5.507017849499326e-06, "loss": 13.0589, "step": 260450 }, { "epoch": 0.5261456788826626, "grad_norm": 352.3145446777344, "learning_rate": 5.506670581717925e-06, "loss": 17.0362, "step": 260460 }, { "epoch": 0.5261658795153464, "grad_norm": 274.9704284667969, "learning_rate": 5.506323311467071e-06, "loss": 6.3565, "step": 260470 }, { "epoch": 0.5261860801480303, "grad_norm": 275.3585510253906, "learning_rate": 5.5059760387484595e-06, "loss": 13.316, "step": 260480 }, { "epoch": 0.5262062807807141, "grad_norm": 313.4962158203125, "learning_rate": 5.505628763563783e-06, "loss": 21.6234, "step": 260490 }, { "epoch": 0.5262264814133979, "grad_norm": 205.90879821777344, "learning_rate": 5.505281485914732e-06, "loss": 26.9509, "step": 260500 }, { "epoch": 0.5262466820460817, "grad_norm": 675.347412109375, "learning_rate": 5.504934205803002e-06, "loss": 22.1585, "step": 260510 }, { "epoch": 0.5262668826787655, "grad_norm": 334.2474365234375, "learning_rate": 5.504586923230283e-06, "loss": 13.8581, "step": 260520 }, { "epoch": 0.5262870833114494, "grad_norm": 252.559814453125, "learning_rate": 5.504239638198267e-06, "loss": 19.4571, "step": 260530 }, { "epoch": 0.5263072839441332, "grad_norm": 126.58157348632812, "learning_rate": 5.503892350708651e-06, "loss": 9.5694, "step": 260540 }, { "epoch": 0.526327484576817, "grad_norm": 208.34593200683594, "learning_rate": 5.503545060763123e-06, "loss": 33.266, "step": 260550 }, { "epoch": 0.5263476852095008, "grad_norm": 307.50457763671875, "learning_rate": 5.503197768363378e-06, "loss": 14.3512, "step": 260560 }, { "epoch": 0.5263678858421845, "grad_norm": 115.52758026123047, "learning_rate": 5.502850473511108e-06, "loss": 11.9319, "step": 260570 }, { "epoch": 0.5263880864748683, "grad_norm": 256.9719543457031, "learning_rate": 5.502503176208006e-06, "loss": 23.7465, "step": 260580 }, { "epoch": 0.5264082871075522, "grad_norm": 118.66407012939453, "learning_rate": 5.502155876455764e-06, "loss": 23.2442, "step": 260590 }, { "epoch": 0.526428487740236, "grad_norm": 309.2132873535156, "learning_rate": 5.5018085742560745e-06, "loss": 15.7358, "step": 260600 }, { "epoch": 0.5264486883729198, "grad_norm": 160.14321899414062, "learning_rate": 5.501461269610632e-06, "loss": 12.2317, "step": 260610 }, { "epoch": 0.5264688890056036, "grad_norm": 293.69293212890625, "learning_rate": 5.501113962521129e-06, "loss": 20.3768, "step": 260620 }, { "epoch": 0.5264890896382874, "grad_norm": 0.0, "learning_rate": 5.5007666529892545e-06, "loss": 23.3833, "step": 260630 }, { "epoch": 0.5265092902709713, "grad_norm": 182.0484161376953, "learning_rate": 5.500419341016707e-06, "loss": 13.8733, "step": 260640 }, { "epoch": 0.5265294909036551, "grad_norm": 183.57191467285156, "learning_rate": 5.500072026605175e-06, "loss": 14.5075, "step": 260650 }, { "epoch": 0.5265496915363389, "grad_norm": 258.9145202636719, "learning_rate": 5.499724709756352e-06, "loss": 12.5102, "step": 260660 }, { "epoch": 0.5265698921690227, "grad_norm": 449.4128112792969, "learning_rate": 5.499377390471933e-06, "loss": 30.071, "step": 260670 }, { "epoch": 0.5265900928017065, "grad_norm": 257.4154968261719, "learning_rate": 5.4990300687536065e-06, "loss": 11.5213, "step": 260680 }, { "epoch": 0.5266102934343904, "grad_norm": 41.63719177246094, "learning_rate": 5.498682744603071e-06, "loss": 13.2055, "step": 260690 }, { "epoch": 0.5266304940670742, "grad_norm": 310.0564880371094, "learning_rate": 5.498335418022015e-06, "loss": 13.1009, "step": 260700 }, { "epoch": 0.526650694699758, "grad_norm": 141.1959228515625, "learning_rate": 5.497988089012132e-06, "loss": 15.4368, "step": 260710 }, { "epoch": 0.5266708953324418, "grad_norm": 474.60589599609375, "learning_rate": 5.497640757575116e-06, "loss": 24.093, "step": 260720 }, { "epoch": 0.5266910959651256, "grad_norm": 265.9149475097656, "learning_rate": 5.497293423712661e-06, "loss": 23.8957, "step": 260730 }, { "epoch": 0.5267112965978095, "grad_norm": 186.1737823486328, "learning_rate": 5.4969460874264555e-06, "loss": 9.5942, "step": 260740 }, { "epoch": 0.5267314972304933, "grad_norm": 487.4860534667969, "learning_rate": 5.496598748718196e-06, "loss": 15.7591, "step": 260750 }, { "epoch": 0.5267516978631771, "grad_norm": 384.3796081542969, "learning_rate": 5.4962514075895746e-06, "loss": 19.1418, "step": 260760 }, { "epoch": 0.5267718984958609, "grad_norm": 326.15771484375, "learning_rate": 5.4959040640422836e-06, "loss": 20.8909, "step": 260770 }, { "epoch": 0.5267920991285447, "grad_norm": 329.18975830078125, "learning_rate": 5.495556718078017e-06, "loss": 23.665, "step": 260780 }, { "epoch": 0.5268122997612286, "grad_norm": 335.560302734375, "learning_rate": 5.495209369698466e-06, "loss": 16.2899, "step": 260790 }, { "epoch": 0.5268325003939124, "grad_norm": 339.13177490234375, "learning_rate": 5.4948620189053255e-06, "loss": 16.6005, "step": 260800 }, { "epoch": 0.5268527010265962, "grad_norm": 239.74522399902344, "learning_rate": 5.494514665700288e-06, "loss": 12.2394, "step": 260810 }, { "epoch": 0.52687290165928, "grad_norm": 754.085205078125, "learning_rate": 5.494167310085045e-06, "loss": 27.9257, "step": 260820 }, { "epoch": 0.5268931022919637, "grad_norm": 357.1209411621094, "learning_rate": 5.49381995206129e-06, "loss": 53.4285, "step": 260830 }, { "epoch": 0.5269133029246476, "grad_norm": 149.31304931640625, "learning_rate": 5.493472591630717e-06, "loss": 16.3919, "step": 260840 }, { "epoch": 0.5269335035573314, "grad_norm": 224.47265625, "learning_rate": 5.49312522879502e-06, "loss": 25.8907, "step": 260850 }, { "epoch": 0.5269537041900152, "grad_norm": 184.21197509765625, "learning_rate": 5.492777863555889e-06, "loss": 21.0767, "step": 260860 }, { "epoch": 0.526973904822699, "grad_norm": 653.7296142578125, "learning_rate": 5.492430495915018e-06, "loss": 18.8739, "step": 260870 }, { "epoch": 0.5269941054553828, "grad_norm": 484.0205078125, "learning_rate": 5.4920831258741016e-06, "loss": 20.2987, "step": 260880 }, { "epoch": 0.5270143060880667, "grad_norm": 132.02854919433594, "learning_rate": 5.491735753434832e-06, "loss": 15.4971, "step": 260890 }, { "epoch": 0.5270345067207505, "grad_norm": 369.4942626953125, "learning_rate": 5.491388378598899e-06, "loss": 12.1979, "step": 260900 }, { "epoch": 0.5270547073534343, "grad_norm": 278.73492431640625, "learning_rate": 5.4910410013680015e-06, "loss": 28.2986, "step": 260910 }, { "epoch": 0.5270749079861181, "grad_norm": 358.2839660644531, "learning_rate": 5.490693621743829e-06, "loss": 22.6286, "step": 260920 }, { "epoch": 0.5270951086188019, "grad_norm": 623.6057739257812, "learning_rate": 5.490346239728076e-06, "loss": 30.6603, "step": 260930 }, { "epoch": 0.5271153092514858, "grad_norm": 240.54376220703125, "learning_rate": 5.489998855322435e-06, "loss": 10.4583, "step": 260940 }, { "epoch": 0.5271355098841696, "grad_norm": 335.4035949707031, "learning_rate": 5.489651468528596e-06, "loss": 16.8745, "step": 260950 }, { "epoch": 0.5271557105168534, "grad_norm": 282.6549072265625, "learning_rate": 5.489304079348259e-06, "loss": 17.3337, "step": 260960 }, { "epoch": 0.5271759111495372, "grad_norm": 1038.04150390625, "learning_rate": 5.488956687783111e-06, "loss": 33.9547, "step": 260970 }, { "epoch": 0.527196111782221, "grad_norm": 2289.103271484375, "learning_rate": 5.4886092938348475e-06, "loss": 22.5374, "step": 260980 }, { "epoch": 0.5272163124149049, "grad_norm": 185.87796020507812, "learning_rate": 5.488261897505163e-06, "loss": 12.3492, "step": 260990 }, { "epoch": 0.5272365130475887, "grad_norm": 149.62278747558594, "learning_rate": 5.487914498795748e-06, "loss": 23.4671, "step": 261000 }, { "epoch": 0.5272567136802725, "grad_norm": 162.9595184326172, "learning_rate": 5.487567097708298e-06, "loss": 8.0455, "step": 261010 }, { "epoch": 0.5272769143129563, "grad_norm": 392.5347900390625, "learning_rate": 5.487219694244505e-06, "loss": 24.5853, "step": 261020 }, { "epoch": 0.5272971149456401, "grad_norm": 153.32264709472656, "learning_rate": 5.48687228840606e-06, "loss": 15.0642, "step": 261030 }, { "epoch": 0.527317315578324, "grad_norm": 789.3274536132812, "learning_rate": 5.48652488019466e-06, "loss": 40.9118, "step": 261040 }, { "epoch": 0.5273375162110078, "grad_norm": 139.1239013671875, "learning_rate": 5.486177469611999e-06, "loss": 22.2183, "step": 261050 }, { "epoch": 0.5273577168436916, "grad_norm": 49.93955993652344, "learning_rate": 5.485830056659763e-06, "loss": 16.9315, "step": 261060 }, { "epoch": 0.5273779174763754, "grad_norm": 100.06725311279297, "learning_rate": 5.4854826413396546e-06, "loss": 10.1681, "step": 261070 }, { "epoch": 0.5273981181090591, "grad_norm": 615.5888671875, "learning_rate": 5.485135223653362e-06, "loss": 24.4698, "step": 261080 }, { "epoch": 0.5274183187417429, "grad_norm": 291.5068359375, "learning_rate": 5.484787803602577e-06, "loss": 17.7381, "step": 261090 }, { "epoch": 0.5274385193744268, "grad_norm": 386.04803466796875, "learning_rate": 5.484440381188997e-06, "loss": 20.8227, "step": 261100 }, { "epoch": 0.5274587200071106, "grad_norm": 81.23558044433594, "learning_rate": 5.484092956414312e-06, "loss": 15.1964, "step": 261110 }, { "epoch": 0.5274789206397944, "grad_norm": 468.2884521484375, "learning_rate": 5.483745529280219e-06, "loss": 19.6629, "step": 261120 }, { "epoch": 0.5274991212724782, "grad_norm": 306.8998107910156, "learning_rate": 5.4833980997884054e-06, "loss": 13.4641, "step": 261130 }, { "epoch": 0.527519321905162, "grad_norm": 190.36932373046875, "learning_rate": 5.483050667940571e-06, "loss": 12.7973, "step": 261140 }, { "epoch": 0.5275395225378459, "grad_norm": 511.7398986816406, "learning_rate": 5.482703233738405e-06, "loss": 31.4582, "step": 261150 }, { "epoch": 0.5275597231705297, "grad_norm": 224.6817169189453, "learning_rate": 5.482355797183602e-06, "loss": 32.9045, "step": 261160 }, { "epoch": 0.5275799238032135, "grad_norm": 497.6763000488281, "learning_rate": 5.482008358277855e-06, "loss": 22.2459, "step": 261170 }, { "epoch": 0.5276001244358973, "grad_norm": 416.8973693847656, "learning_rate": 5.48166091702286e-06, "loss": 19.7433, "step": 261180 }, { "epoch": 0.5276203250685811, "grad_norm": 315.3449401855469, "learning_rate": 5.481313473420306e-06, "loss": 29.3417, "step": 261190 }, { "epoch": 0.527640525701265, "grad_norm": 569.2698974609375, "learning_rate": 5.480966027471889e-06, "loss": 18.4298, "step": 261200 }, { "epoch": 0.5276607263339488, "grad_norm": 394.5986022949219, "learning_rate": 5.480618579179301e-06, "loss": 30.4034, "step": 261210 }, { "epoch": 0.5276809269666326, "grad_norm": 268.4843444824219, "learning_rate": 5.4802711285442375e-06, "loss": 13.321, "step": 261220 }, { "epoch": 0.5277011275993164, "grad_norm": 337.9642028808594, "learning_rate": 5.4799236755683916e-06, "loss": 28.016, "step": 261230 }, { "epoch": 0.5277213282320002, "grad_norm": 223.8070831298828, "learning_rate": 5.479576220253453e-06, "loss": 20.2617, "step": 261240 }, { "epoch": 0.5277415288646841, "grad_norm": 367.6058654785156, "learning_rate": 5.4792287626011206e-06, "loss": 26.7933, "step": 261250 }, { "epoch": 0.5277617294973679, "grad_norm": 389.27496337890625, "learning_rate": 5.478881302613085e-06, "loss": 12.5764, "step": 261260 }, { "epoch": 0.5277819301300517, "grad_norm": 296.9028015136719, "learning_rate": 5.478533840291039e-06, "loss": 22.1718, "step": 261270 }, { "epoch": 0.5278021307627355, "grad_norm": 291.1451110839844, "learning_rate": 5.478186375636678e-06, "loss": 22.8295, "step": 261280 }, { "epoch": 0.5278223313954193, "grad_norm": 592.3668212890625, "learning_rate": 5.477838908651694e-06, "loss": 23.7733, "step": 261290 }, { "epoch": 0.5278425320281032, "grad_norm": 425.3497314453125, "learning_rate": 5.477491439337782e-06, "loss": 15.8703, "step": 261300 }, { "epoch": 0.527862732660787, "grad_norm": 107.24759674072266, "learning_rate": 5.477143967696634e-06, "loss": 11.121, "step": 261310 }, { "epoch": 0.5278829332934708, "grad_norm": 311.99432373046875, "learning_rate": 5.476796493729943e-06, "loss": 24.1567, "step": 261320 }, { "epoch": 0.5279031339261546, "grad_norm": 195.9283447265625, "learning_rate": 5.476449017439406e-06, "loss": 24.3931, "step": 261330 }, { "epoch": 0.5279233345588383, "grad_norm": 253.81068420410156, "learning_rate": 5.476101538826714e-06, "loss": 18.5882, "step": 261340 }, { "epoch": 0.5279435351915222, "grad_norm": 378.4322204589844, "learning_rate": 5.4757540578935595e-06, "loss": 19.8119, "step": 261350 }, { "epoch": 0.527963735824206, "grad_norm": 364.89776611328125, "learning_rate": 5.475406574641637e-06, "loss": 31.7021, "step": 261360 }, { "epoch": 0.5279839364568898, "grad_norm": 212.2139129638672, "learning_rate": 5.475059089072642e-06, "loss": 26.2784, "step": 261370 }, { "epoch": 0.5280041370895736, "grad_norm": 347.1594543457031, "learning_rate": 5.474711601188266e-06, "loss": 18.3404, "step": 261380 }, { "epoch": 0.5280243377222574, "grad_norm": 31.82312774658203, "learning_rate": 5.4743641109902045e-06, "loss": 17.4314, "step": 261390 }, { "epoch": 0.5280445383549413, "grad_norm": 470.8443603515625, "learning_rate": 5.474016618480147e-06, "loss": 12.2554, "step": 261400 }, { "epoch": 0.5280647389876251, "grad_norm": 272.6741943359375, "learning_rate": 5.473669123659793e-06, "loss": 27.4181, "step": 261410 }, { "epoch": 0.5280849396203089, "grad_norm": 293.17816162109375, "learning_rate": 5.4733216265308305e-06, "loss": 8.2727, "step": 261420 }, { "epoch": 0.5281051402529927, "grad_norm": 215.61517333984375, "learning_rate": 5.472974127094957e-06, "loss": 16.4116, "step": 261430 }, { "epoch": 0.5281253408856765, "grad_norm": 32.00222396850586, "learning_rate": 5.472626625353865e-06, "loss": 27.2694, "step": 261440 }, { "epoch": 0.5281455415183604, "grad_norm": 175.89833068847656, "learning_rate": 5.472279121309248e-06, "loss": 13.2481, "step": 261450 }, { "epoch": 0.5281657421510442, "grad_norm": 222.24864196777344, "learning_rate": 5.471931614962802e-06, "loss": 18.3643, "step": 261460 }, { "epoch": 0.528185942783728, "grad_norm": 189.31253051757812, "learning_rate": 5.471584106316216e-06, "loss": 12.2317, "step": 261470 }, { "epoch": 0.5282061434164118, "grad_norm": 281.2232971191406, "learning_rate": 5.471236595371187e-06, "loss": 15.3736, "step": 261480 }, { "epoch": 0.5282263440490956, "grad_norm": 150.04127502441406, "learning_rate": 5.470889082129407e-06, "loss": 26.7016, "step": 261490 }, { "epoch": 0.5282465446817795, "grad_norm": 171.63723754882812, "learning_rate": 5.470541566592573e-06, "loss": 8.8887, "step": 261500 }, { "epoch": 0.5282667453144633, "grad_norm": 332.7658386230469, "learning_rate": 5.470194048762374e-06, "loss": 13.2933, "step": 261510 }, { "epoch": 0.5282869459471471, "grad_norm": 138.92823791503906, "learning_rate": 5.469846528640508e-06, "loss": 7.2022, "step": 261520 }, { "epoch": 0.5283071465798309, "grad_norm": 393.7483825683594, "learning_rate": 5.469499006228666e-06, "loss": 15.0655, "step": 261530 }, { "epoch": 0.5283273472125147, "grad_norm": 764.9679565429688, "learning_rate": 5.469151481528543e-06, "loss": 16.6626, "step": 261540 }, { "epoch": 0.5283475478451986, "grad_norm": 786.9132690429688, "learning_rate": 5.468803954541834e-06, "loss": 36.1441, "step": 261550 }, { "epoch": 0.5283677484778824, "grad_norm": 479.46099853515625, "learning_rate": 5.468456425270229e-06, "loss": 38.6748, "step": 261560 }, { "epoch": 0.5283879491105662, "grad_norm": 215.56765747070312, "learning_rate": 5.468108893715426e-06, "loss": 20.7494, "step": 261570 }, { "epoch": 0.52840814974325, "grad_norm": 529.1991577148438, "learning_rate": 5.467761359879116e-06, "loss": 17.4953, "step": 261580 }, { "epoch": 0.5284283503759337, "grad_norm": 307.8897705078125, "learning_rate": 5.467413823762994e-06, "loss": 15.1405, "step": 261590 }, { "epoch": 0.5284485510086175, "grad_norm": 169.0829315185547, "learning_rate": 5.467066285368754e-06, "loss": 23.7261, "step": 261600 }, { "epoch": 0.5284687516413014, "grad_norm": 63.61439514160156, "learning_rate": 5.466718744698089e-06, "loss": 12.3982, "step": 261610 }, { "epoch": 0.5284889522739852, "grad_norm": 388.80352783203125, "learning_rate": 5.4663712017526946e-06, "loss": 25.1444, "step": 261620 }, { "epoch": 0.528509152906669, "grad_norm": 239.2300567626953, "learning_rate": 5.466023656534263e-06, "loss": 26.8509, "step": 261630 }, { "epoch": 0.5285293535393528, "grad_norm": 274.713623046875, "learning_rate": 5.4656761090444875e-06, "loss": 10.6086, "step": 261640 }, { "epoch": 0.5285495541720366, "grad_norm": 201.89999389648438, "learning_rate": 5.465328559285064e-06, "loss": 17.0904, "step": 261650 }, { "epoch": 0.5285697548047205, "grad_norm": 144.6829833984375, "learning_rate": 5.464981007257686e-06, "loss": 12.767, "step": 261660 }, { "epoch": 0.5285899554374043, "grad_norm": 317.0479431152344, "learning_rate": 5.4646334529640454e-06, "loss": 19.4758, "step": 261670 }, { "epoch": 0.5286101560700881, "grad_norm": 493.3648681640625, "learning_rate": 5.46428589640584e-06, "loss": 41.8456, "step": 261680 }, { "epoch": 0.5286303567027719, "grad_norm": 204.2173309326172, "learning_rate": 5.46393833758476e-06, "loss": 16.5158, "step": 261690 }, { "epoch": 0.5286505573354557, "grad_norm": 177.1187744140625, "learning_rate": 5.463590776502501e-06, "loss": 11.5184, "step": 261700 }, { "epoch": 0.5286707579681396, "grad_norm": 361.23150634765625, "learning_rate": 5.463243213160758e-06, "loss": 23.1856, "step": 261710 }, { "epoch": 0.5286909586008234, "grad_norm": 171.1581573486328, "learning_rate": 5.462895647561222e-06, "loss": 12.1291, "step": 261720 }, { "epoch": 0.5287111592335072, "grad_norm": 200.46287536621094, "learning_rate": 5.46254807970559e-06, "loss": 20.357, "step": 261730 }, { "epoch": 0.528731359866191, "grad_norm": 379.4353942871094, "learning_rate": 5.462200509595553e-06, "loss": 14.1759, "step": 261740 }, { "epoch": 0.5287515604988748, "grad_norm": 338.06365966796875, "learning_rate": 5.461852937232809e-06, "loss": 21.3981, "step": 261750 }, { "epoch": 0.5287717611315587, "grad_norm": 311.8997497558594, "learning_rate": 5.461505362619048e-06, "loss": 14.3849, "step": 261760 }, { "epoch": 0.5287919617642425, "grad_norm": 390.7314453125, "learning_rate": 5.4611577857559676e-06, "loss": 23.2989, "step": 261770 }, { "epoch": 0.5288121623969263, "grad_norm": 205.8661651611328, "learning_rate": 5.460810206645258e-06, "loss": 11.5159, "step": 261780 }, { "epoch": 0.5288323630296101, "grad_norm": 483.4912109375, "learning_rate": 5.460462625288617e-06, "loss": 22.4613, "step": 261790 }, { "epoch": 0.5288525636622939, "grad_norm": 652.6370849609375, "learning_rate": 5.460115041687737e-06, "loss": 19.4657, "step": 261800 }, { "epoch": 0.5288727642949778, "grad_norm": 5.648259162902832, "learning_rate": 5.4597674558443114e-06, "loss": 28.5796, "step": 261810 }, { "epoch": 0.5288929649276616, "grad_norm": 183.24977111816406, "learning_rate": 5.459419867760034e-06, "loss": 21.6729, "step": 261820 }, { "epoch": 0.5289131655603454, "grad_norm": 0.08587420731782913, "learning_rate": 5.4590722774366015e-06, "loss": 21.9042, "step": 261830 }, { "epoch": 0.5289333661930292, "grad_norm": 481.13323974609375, "learning_rate": 5.458724684875707e-06, "loss": 23.3072, "step": 261840 }, { "epoch": 0.5289535668257129, "grad_norm": 441.4974060058594, "learning_rate": 5.4583770900790415e-06, "loss": 20.1869, "step": 261850 }, { "epoch": 0.5289737674583967, "grad_norm": 259.1266784667969, "learning_rate": 5.458029493048303e-06, "loss": 12.5706, "step": 261860 }, { "epoch": 0.5289939680910806, "grad_norm": 315.01617431640625, "learning_rate": 5.457681893785185e-06, "loss": 17.2688, "step": 261870 }, { "epoch": 0.5290141687237644, "grad_norm": 214.92893981933594, "learning_rate": 5.45733429229138e-06, "loss": 13.9789, "step": 261880 }, { "epoch": 0.5290343693564482, "grad_norm": 122.87836456298828, "learning_rate": 5.456986688568584e-06, "loss": 22.5008, "step": 261890 }, { "epoch": 0.529054569989132, "grad_norm": 472.9090881347656, "learning_rate": 5.456639082618489e-06, "loss": 15.3659, "step": 261900 }, { "epoch": 0.5290747706218158, "grad_norm": 140.0076141357422, "learning_rate": 5.456291474442792e-06, "loss": 27.1757, "step": 261910 }, { "epoch": 0.5290949712544997, "grad_norm": 197.6658172607422, "learning_rate": 5.455943864043185e-06, "loss": 13.3154, "step": 261920 }, { "epoch": 0.5291151718871835, "grad_norm": 155.11343383789062, "learning_rate": 5.4555962514213624e-06, "loss": 29.7515, "step": 261930 }, { "epoch": 0.5291353725198673, "grad_norm": 179.83419799804688, "learning_rate": 5.4552486365790196e-06, "loss": 15.2664, "step": 261940 }, { "epoch": 0.5291555731525511, "grad_norm": 191.88998413085938, "learning_rate": 5.454901019517851e-06, "loss": 14.3281, "step": 261950 }, { "epoch": 0.529175773785235, "grad_norm": 230.7174072265625, "learning_rate": 5.454553400239548e-06, "loss": 10.3403, "step": 261960 }, { "epoch": 0.5291959744179188, "grad_norm": 273.0036315917969, "learning_rate": 5.454205778745808e-06, "loss": 10.0111, "step": 261970 }, { "epoch": 0.5292161750506026, "grad_norm": 167.47923278808594, "learning_rate": 5.453858155038324e-06, "loss": 14.7587, "step": 261980 }, { "epoch": 0.5292363756832864, "grad_norm": 336.5835876464844, "learning_rate": 5.45351052911879e-06, "loss": 23.0351, "step": 261990 }, { "epoch": 0.5292565763159702, "grad_norm": 187.7889404296875, "learning_rate": 5.453162900988902e-06, "loss": 24.6869, "step": 262000 }, { "epoch": 0.529276776948654, "grad_norm": 742.1465454101562, "learning_rate": 5.452815270650351e-06, "loss": 25.7629, "step": 262010 }, { "epoch": 0.5292969775813379, "grad_norm": 199.34866333007812, "learning_rate": 5.452467638104834e-06, "loss": 12.3809, "step": 262020 }, { "epoch": 0.5293171782140217, "grad_norm": 257.7416687011719, "learning_rate": 5.452120003354046e-06, "loss": 15.4352, "step": 262030 }, { "epoch": 0.5293373788467055, "grad_norm": 411.6751403808594, "learning_rate": 5.451772366399678e-06, "loss": 30.2565, "step": 262040 }, { "epoch": 0.5293575794793893, "grad_norm": 232.1109161376953, "learning_rate": 5.451424727243428e-06, "loss": 17.5583, "step": 262050 }, { "epoch": 0.5293777801120731, "grad_norm": 245.89053344726562, "learning_rate": 5.451077085886987e-06, "loss": 30.0109, "step": 262060 }, { "epoch": 0.529397980744757, "grad_norm": 195.16000366210938, "learning_rate": 5.450729442332052e-06, "loss": 25.8023, "step": 262070 }, { "epoch": 0.5294181813774408, "grad_norm": 222.051513671875, "learning_rate": 5.450381796580317e-06, "loss": 9.7835, "step": 262080 }, { "epoch": 0.5294383820101246, "grad_norm": 320.1292724609375, "learning_rate": 5.450034148633474e-06, "loss": 18.239, "step": 262090 }, { "epoch": 0.5294585826428083, "grad_norm": 226.79957580566406, "learning_rate": 5.449686498493219e-06, "loss": 18.5575, "step": 262100 }, { "epoch": 0.5294787832754921, "grad_norm": 251.6649627685547, "learning_rate": 5.449338846161248e-06, "loss": 13.9559, "step": 262110 }, { "epoch": 0.529498983908176, "grad_norm": 136.6876220703125, "learning_rate": 5.448991191639254e-06, "loss": 11.3143, "step": 262120 }, { "epoch": 0.5295191845408598, "grad_norm": 190.9188690185547, "learning_rate": 5.448643534928931e-06, "loss": 26.1147, "step": 262130 }, { "epoch": 0.5295393851735436, "grad_norm": 307.3971252441406, "learning_rate": 5.448295876031974e-06, "loss": 23.8905, "step": 262140 }, { "epoch": 0.5295595858062274, "grad_norm": 181.84950256347656, "learning_rate": 5.447948214950078e-06, "loss": 13.8564, "step": 262150 }, { "epoch": 0.5295797864389112, "grad_norm": 392.8540344238281, "learning_rate": 5.447600551684936e-06, "loss": 20.0235, "step": 262160 }, { "epoch": 0.5295999870715951, "grad_norm": 150.71595764160156, "learning_rate": 5.4472528862382415e-06, "loss": 28.4011, "step": 262170 }, { "epoch": 0.5296201877042789, "grad_norm": 421.3125915527344, "learning_rate": 5.446905218611694e-06, "loss": 20.4373, "step": 262180 }, { "epoch": 0.5296403883369627, "grad_norm": 94.99024200439453, "learning_rate": 5.4465575488069795e-06, "loss": 21.0108, "step": 262190 }, { "epoch": 0.5296605889696465, "grad_norm": 239.6387481689453, "learning_rate": 5.446209876825803e-06, "loss": 12.1321, "step": 262200 }, { "epoch": 0.5296807896023303, "grad_norm": 867.63427734375, "learning_rate": 5.445862202669851e-06, "loss": 25.4227, "step": 262210 }, { "epoch": 0.5297009902350142, "grad_norm": 304.4093017578125, "learning_rate": 5.445514526340822e-06, "loss": 13.3484, "step": 262220 }, { "epoch": 0.529721190867698, "grad_norm": 251.5116729736328, "learning_rate": 5.445166847840409e-06, "loss": 26.7382, "step": 262230 }, { "epoch": 0.5297413915003818, "grad_norm": 284.4418029785156, "learning_rate": 5.444819167170306e-06, "loss": 25.8798, "step": 262240 }, { "epoch": 0.5297615921330656, "grad_norm": 357.8984069824219, "learning_rate": 5.4444714843322085e-06, "loss": 12.8961, "step": 262250 }, { "epoch": 0.5297817927657494, "grad_norm": 178.8774871826172, "learning_rate": 5.444123799327811e-06, "loss": 18.5384, "step": 262260 }, { "epoch": 0.5298019933984333, "grad_norm": 450.9541931152344, "learning_rate": 5.443776112158808e-06, "loss": 23.5815, "step": 262270 }, { "epoch": 0.5298221940311171, "grad_norm": 19.972837448120117, "learning_rate": 5.443428422826893e-06, "loss": 27.8018, "step": 262280 }, { "epoch": 0.5298423946638009, "grad_norm": 407.6582946777344, "learning_rate": 5.443080731333764e-06, "loss": 28.4671, "step": 262290 }, { "epoch": 0.5298625952964847, "grad_norm": 161.39418029785156, "learning_rate": 5.442733037681112e-06, "loss": 12.2089, "step": 262300 }, { "epoch": 0.5298827959291685, "grad_norm": 112.4506607055664, "learning_rate": 5.442385341870633e-06, "loss": 21.5823, "step": 262310 }, { "epoch": 0.5299029965618524, "grad_norm": 167.3827667236328, "learning_rate": 5.442037643904022e-06, "loss": 8.8553, "step": 262320 }, { "epoch": 0.5299231971945362, "grad_norm": 280.62725830078125, "learning_rate": 5.4416899437829705e-06, "loss": 16.9141, "step": 262330 }, { "epoch": 0.52994339782722, "grad_norm": 430.4232177734375, "learning_rate": 5.441342241509179e-06, "loss": 26.6508, "step": 262340 }, { "epoch": 0.5299635984599038, "grad_norm": 169.89300537109375, "learning_rate": 5.440994537084337e-06, "loss": 15.411, "step": 262350 }, { "epoch": 0.5299837990925875, "grad_norm": 521.3826293945312, "learning_rate": 5.440646830510142e-06, "loss": 19.6971, "step": 262360 }, { "epoch": 0.5300039997252713, "grad_norm": 337.44146728515625, "learning_rate": 5.440299121788289e-06, "loss": 11.3851, "step": 262370 }, { "epoch": 0.5300242003579552, "grad_norm": 149.23318481445312, "learning_rate": 5.439951410920469e-06, "loss": 19.3097, "step": 262380 }, { "epoch": 0.530044400990639, "grad_norm": 365.7867736816406, "learning_rate": 5.439603697908381e-06, "loss": 28.9668, "step": 262390 }, { "epoch": 0.5300646016233228, "grad_norm": 261.7338562011719, "learning_rate": 5.439255982753717e-06, "loss": 12.5401, "step": 262400 }, { "epoch": 0.5300848022560066, "grad_norm": 372.1734313964844, "learning_rate": 5.438908265458172e-06, "loss": 26.4622, "step": 262410 }, { "epoch": 0.5301050028886904, "grad_norm": 118.73900604248047, "learning_rate": 5.438560546023442e-06, "loss": 14.3926, "step": 262420 }, { "epoch": 0.5301252035213743, "grad_norm": 0.0, "learning_rate": 5.438212824451221e-06, "loss": 15.8441, "step": 262430 }, { "epoch": 0.5301454041540581, "grad_norm": 369.8706970214844, "learning_rate": 5.437865100743205e-06, "loss": 30.8815, "step": 262440 }, { "epoch": 0.5301656047867419, "grad_norm": 189.74916076660156, "learning_rate": 5.437517374901087e-06, "loss": 26.5488, "step": 262450 }, { "epoch": 0.5301858054194257, "grad_norm": 319.7353515625, "learning_rate": 5.437169646926561e-06, "loss": 18.5132, "step": 262460 }, { "epoch": 0.5302060060521095, "grad_norm": 169.16693115234375, "learning_rate": 5.436821916821325e-06, "loss": 16.4562, "step": 262470 }, { "epoch": 0.5302262066847934, "grad_norm": 271.3753662109375, "learning_rate": 5.436474184587071e-06, "loss": 24.2771, "step": 262480 }, { "epoch": 0.5302464073174772, "grad_norm": 107.48536682128906, "learning_rate": 5.436126450225495e-06, "loss": 13.9678, "step": 262490 }, { "epoch": 0.530266607950161, "grad_norm": 581.6234130859375, "learning_rate": 5.435778713738292e-06, "loss": 19.1018, "step": 262500 }, { "epoch": 0.5302868085828448, "grad_norm": 144.06785583496094, "learning_rate": 5.435430975127155e-06, "loss": 10.6002, "step": 262510 }, { "epoch": 0.5303070092155286, "grad_norm": 344.51702880859375, "learning_rate": 5.435083234393782e-06, "loss": 10.0487, "step": 262520 }, { "epoch": 0.5303272098482125, "grad_norm": 664.1560668945312, "learning_rate": 5.434735491539866e-06, "loss": 28.8169, "step": 262530 }, { "epoch": 0.5303474104808963, "grad_norm": 175.690185546875, "learning_rate": 5.4343877465671e-06, "loss": 15.3365, "step": 262540 }, { "epoch": 0.5303676111135801, "grad_norm": 268.404541015625, "learning_rate": 5.434039999477182e-06, "loss": 21.4138, "step": 262550 }, { "epoch": 0.5303878117462639, "grad_norm": 339.0246887207031, "learning_rate": 5.433692250271806e-06, "loss": 25.7461, "step": 262560 }, { "epoch": 0.5304080123789477, "grad_norm": 437.43487548828125, "learning_rate": 5.433344498952666e-06, "loss": 16.9633, "step": 262570 }, { "epoch": 0.5304282130116316, "grad_norm": 321.1932067871094, "learning_rate": 5.432996745521458e-06, "loss": 12.4156, "step": 262580 }, { "epoch": 0.5304484136443154, "grad_norm": 311.9891662597656, "learning_rate": 5.4326489899798765e-06, "loss": 24.5929, "step": 262590 }, { "epoch": 0.5304686142769992, "grad_norm": 367.7344665527344, "learning_rate": 5.432301232329615e-06, "loss": 17.8372, "step": 262600 }, { "epoch": 0.530488814909683, "grad_norm": 40.47079849243164, "learning_rate": 5.431953472572372e-06, "loss": 16.3462, "step": 262610 }, { "epoch": 0.5305090155423667, "grad_norm": 217.53282165527344, "learning_rate": 5.431605710709838e-06, "loss": 12.8156, "step": 262620 }, { "epoch": 0.5305292161750506, "grad_norm": 506.8860778808594, "learning_rate": 5.431257946743711e-06, "loss": 12.9236, "step": 262630 }, { "epoch": 0.5305494168077344, "grad_norm": 531.5035400390625, "learning_rate": 5.430910180675685e-06, "loss": 16.7499, "step": 262640 }, { "epoch": 0.5305696174404182, "grad_norm": 550.3029174804688, "learning_rate": 5.430562412507454e-06, "loss": 22.645, "step": 262650 }, { "epoch": 0.530589818073102, "grad_norm": 202.5038604736328, "learning_rate": 5.430214642240716e-06, "loss": 17.8504, "step": 262660 }, { "epoch": 0.5306100187057858, "grad_norm": 423.6011657714844, "learning_rate": 5.429866869877163e-06, "loss": 18.2852, "step": 262670 }, { "epoch": 0.5306302193384697, "grad_norm": 409.5459899902344, "learning_rate": 5.429519095418492e-06, "loss": 13.5062, "step": 262680 }, { "epoch": 0.5306504199711535, "grad_norm": 58.00149917602539, "learning_rate": 5.429171318866395e-06, "loss": 29.6676, "step": 262690 }, { "epoch": 0.5306706206038373, "grad_norm": 388.8432312011719, "learning_rate": 5.42882354022257e-06, "loss": 15.7359, "step": 262700 }, { "epoch": 0.5306908212365211, "grad_norm": 324.7066345214844, "learning_rate": 5.428475759488711e-06, "loss": 32.4137, "step": 262710 }, { "epoch": 0.5307110218692049, "grad_norm": 153.56211853027344, "learning_rate": 5.428127976666513e-06, "loss": 14.091, "step": 262720 }, { "epoch": 0.5307312225018888, "grad_norm": 207.03982543945312, "learning_rate": 5.4277801917576724e-06, "loss": 23.14, "step": 262730 }, { "epoch": 0.5307514231345726, "grad_norm": 230.38368225097656, "learning_rate": 5.427432404763882e-06, "loss": 17.4549, "step": 262740 }, { "epoch": 0.5307716237672564, "grad_norm": 443.5598449707031, "learning_rate": 5.4270846156868386e-06, "loss": 24.3965, "step": 262750 }, { "epoch": 0.5307918243999402, "grad_norm": 94.85746765136719, "learning_rate": 5.426736824528236e-06, "loss": 10.6878, "step": 262760 }, { "epoch": 0.530812025032624, "grad_norm": 308.9444580078125, "learning_rate": 5.426389031289771e-06, "loss": 17.1823, "step": 262770 }, { "epoch": 0.5308322256653079, "grad_norm": 280.55108642578125, "learning_rate": 5.426041235973134e-06, "loss": 10.7612, "step": 262780 }, { "epoch": 0.5308524262979917, "grad_norm": 88.14566802978516, "learning_rate": 5.4256934385800275e-06, "loss": 20.3365, "step": 262790 }, { "epoch": 0.5308726269306755, "grad_norm": 414.4723815917969, "learning_rate": 5.425345639112141e-06, "loss": 16.4833, "step": 262800 }, { "epoch": 0.5308928275633593, "grad_norm": 143.44973754882812, "learning_rate": 5.424997837571172e-06, "loss": 19.5026, "step": 262810 }, { "epoch": 0.5309130281960431, "grad_norm": 238.3634796142578, "learning_rate": 5.4246500339588144e-06, "loss": 16.2526, "step": 262820 }, { "epoch": 0.530933228828727, "grad_norm": 246.7687225341797, "learning_rate": 5.4243022282767645e-06, "loss": 15.6987, "step": 262830 }, { "epoch": 0.5309534294614108, "grad_norm": 285.44775390625, "learning_rate": 5.4239544205267185e-06, "loss": 26.6096, "step": 262840 }, { "epoch": 0.5309736300940946, "grad_norm": 342.4588623046875, "learning_rate": 5.423606610710368e-06, "loss": 23.8388, "step": 262850 }, { "epoch": 0.5309938307267784, "grad_norm": 195.04515075683594, "learning_rate": 5.4232587988294105e-06, "loss": 23.341, "step": 262860 }, { "epoch": 0.5310140313594621, "grad_norm": 452.6519470214844, "learning_rate": 5.422910984885542e-06, "loss": 27.0958, "step": 262870 }, { "epoch": 0.5310342319921459, "grad_norm": 271.3914794921875, "learning_rate": 5.422563168880456e-06, "loss": 17.8749, "step": 262880 }, { "epoch": 0.5310544326248298, "grad_norm": 232.12046813964844, "learning_rate": 5.422215350815848e-06, "loss": 18.4699, "step": 262890 }, { "epoch": 0.5310746332575136, "grad_norm": 346.1502685546875, "learning_rate": 5.4218675306934145e-06, "loss": 15.1135, "step": 262900 }, { "epoch": 0.5310948338901974, "grad_norm": 325.3199157714844, "learning_rate": 5.42151970851485e-06, "loss": 24.631, "step": 262910 }, { "epoch": 0.5311150345228812, "grad_norm": 288.1345520019531, "learning_rate": 5.4211718842818485e-06, "loss": 12.1923, "step": 262920 }, { "epoch": 0.531135235155565, "grad_norm": 413.1113586425781, "learning_rate": 5.420824057996107e-06, "loss": 11.4076, "step": 262930 }, { "epoch": 0.5311554357882489, "grad_norm": 242.6788330078125, "learning_rate": 5.420476229659319e-06, "loss": 18.0094, "step": 262940 }, { "epoch": 0.5311756364209327, "grad_norm": 910.0072021484375, "learning_rate": 5.420128399273183e-06, "loss": 26.6705, "step": 262950 }, { "epoch": 0.5311958370536165, "grad_norm": 103.54688262939453, "learning_rate": 5.419780566839389e-06, "loss": 10.3844, "step": 262960 }, { "epoch": 0.5312160376863003, "grad_norm": 302.441162109375, "learning_rate": 5.419432732359637e-06, "loss": 11.539, "step": 262970 }, { "epoch": 0.5312362383189841, "grad_norm": 300.02264404296875, "learning_rate": 5.419084895835621e-06, "loss": 14.5013, "step": 262980 }, { "epoch": 0.531256438951668, "grad_norm": 399.79925537109375, "learning_rate": 5.418737057269037e-06, "loss": 26.1269, "step": 262990 }, { "epoch": 0.5312766395843518, "grad_norm": 564.0137329101562, "learning_rate": 5.41838921666158e-06, "loss": 24.9417, "step": 263000 }, { "epoch": 0.5312968402170356, "grad_norm": 389.2217712402344, "learning_rate": 5.418041374014942e-06, "loss": 25.4369, "step": 263010 }, { "epoch": 0.5313170408497194, "grad_norm": 404.6322937011719, "learning_rate": 5.417693529330822e-06, "loss": 20.1571, "step": 263020 }, { "epoch": 0.5313372414824032, "grad_norm": 237.67808532714844, "learning_rate": 5.417345682610914e-06, "loss": 18.2023, "step": 263030 }, { "epoch": 0.5313574421150871, "grad_norm": 259.6977233886719, "learning_rate": 5.416997833856914e-06, "loss": 30.1146, "step": 263040 }, { "epoch": 0.5313776427477709, "grad_norm": 261.0898132324219, "learning_rate": 5.416649983070518e-06, "loss": 13.0921, "step": 263050 }, { "epoch": 0.5313978433804547, "grad_norm": 77.90154266357422, "learning_rate": 5.4163021302534204e-06, "loss": 26.0175, "step": 263060 }, { "epoch": 0.5314180440131385, "grad_norm": 247.47628784179688, "learning_rate": 5.415954275407316e-06, "loss": 13.5337, "step": 263070 }, { "epoch": 0.5314382446458223, "grad_norm": 62.39498519897461, "learning_rate": 5.415606418533901e-06, "loss": 22.12, "step": 263080 }, { "epoch": 0.5314584452785062, "grad_norm": 56.369693756103516, "learning_rate": 5.4152585596348704e-06, "loss": 27.1883, "step": 263090 }, { "epoch": 0.53147864591119, "grad_norm": 331.546875, "learning_rate": 5.41491069871192e-06, "loss": 11.0016, "step": 263100 }, { "epoch": 0.5314988465438738, "grad_norm": 64.83863067626953, "learning_rate": 5.414562835766747e-06, "loss": 29.7927, "step": 263110 }, { "epoch": 0.5315190471765576, "grad_norm": 189.7975311279297, "learning_rate": 5.414214970801041e-06, "loss": 24.9707, "step": 263120 }, { "epoch": 0.5315392478092413, "grad_norm": 665.7513427734375, "learning_rate": 5.413867103816506e-06, "loss": 22.0005, "step": 263130 }, { "epoch": 0.5315594484419252, "grad_norm": 142.40969848632812, "learning_rate": 5.413519234814831e-06, "loss": 14.2571, "step": 263140 }, { "epoch": 0.531579649074609, "grad_norm": 232.74392700195312, "learning_rate": 5.413171363797713e-06, "loss": 13.8206, "step": 263150 }, { "epoch": 0.5315998497072928, "grad_norm": 969.3775634765625, "learning_rate": 5.412823490766849e-06, "loss": 25.0153, "step": 263160 }, { "epoch": 0.5316200503399766, "grad_norm": 124.52013397216797, "learning_rate": 5.412475615723931e-06, "loss": 17.4934, "step": 263170 }, { "epoch": 0.5316402509726604, "grad_norm": 205.3308563232422, "learning_rate": 5.41212773867066e-06, "loss": 29.4362, "step": 263180 }, { "epoch": 0.5316604516053443, "grad_norm": 265.3148193359375, "learning_rate": 5.4117798596087265e-06, "loss": 28.5772, "step": 263190 }, { "epoch": 0.5316806522380281, "grad_norm": 116.08619689941406, "learning_rate": 5.411431978539829e-06, "loss": 23.513, "step": 263200 }, { "epoch": 0.5317008528707119, "grad_norm": 302.97479248046875, "learning_rate": 5.411084095465661e-06, "loss": 23.3435, "step": 263210 }, { "epoch": 0.5317210535033957, "grad_norm": 456.6468811035156, "learning_rate": 5.41073621038792e-06, "loss": 18.8336, "step": 263220 }, { "epoch": 0.5317412541360795, "grad_norm": 278.3449401855469, "learning_rate": 5.410388323308299e-06, "loss": 21.5821, "step": 263230 }, { "epoch": 0.5317614547687634, "grad_norm": 102.11883544921875, "learning_rate": 5.410040434228496e-06, "loss": 11.3419, "step": 263240 }, { "epoch": 0.5317816554014472, "grad_norm": 8.891239166259766, "learning_rate": 5.409692543150206e-06, "loss": 13.87, "step": 263250 }, { "epoch": 0.531801856034131, "grad_norm": 341.0653381347656, "learning_rate": 5.409344650075123e-06, "loss": 21.926, "step": 263260 }, { "epoch": 0.5318220566668148, "grad_norm": 389.8683166503906, "learning_rate": 5.4089967550049445e-06, "loss": 14.9354, "step": 263270 }, { "epoch": 0.5318422572994986, "grad_norm": 325.59124755859375, "learning_rate": 5.408648857941365e-06, "loss": 15.1104, "step": 263280 }, { "epoch": 0.5318624579321825, "grad_norm": 175.74728393554688, "learning_rate": 5.408300958886083e-06, "loss": 22.3513, "step": 263290 }, { "epoch": 0.5318826585648663, "grad_norm": 300.7822265625, "learning_rate": 5.4079530578407895e-06, "loss": 16.1064, "step": 263300 }, { "epoch": 0.5319028591975501, "grad_norm": 228.97988891601562, "learning_rate": 5.407605154807182e-06, "loss": 11.835, "step": 263310 }, { "epoch": 0.5319230598302339, "grad_norm": 360.714599609375, "learning_rate": 5.4072572497869556e-06, "loss": 13.9692, "step": 263320 }, { "epoch": 0.5319432604629177, "grad_norm": 221.85888671875, "learning_rate": 5.406909342781809e-06, "loss": 25.2263, "step": 263330 }, { "epoch": 0.5319634610956016, "grad_norm": 191.1533203125, "learning_rate": 5.406561433793435e-06, "loss": 22.1888, "step": 263340 }, { "epoch": 0.5319836617282854, "grad_norm": 468.9291076660156, "learning_rate": 5.406213522823529e-06, "loss": 17.8024, "step": 263350 }, { "epoch": 0.5320038623609692, "grad_norm": 527.8284301757812, "learning_rate": 5.4058656098737885e-06, "loss": 10.475, "step": 263360 }, { "epoch": 0.532024062993653, "grad_norm": 73.70933532714844, "learning_rate": 5.405517694945907e-06, "loss": 26.1249, "step": 263370 }, { "epoch": 0.5320442636263367, "grad_norm": 340.512451171875, "learning_rate": 5.405169778041583e-06, "loss": 11.1223, "step": 263380 }, { "epoch": 0.5320644642590205, "grad_norm": 208.6023712158203, "learning_rate": 5.404821859162509e-06, "loss": 20.2412, "step": 263390 }, { "epoch": 0.5320846648917044, "grad_norm": 458.7839050292969, "learning_rate": 5.404473938310384e-06, "loss": 18.5332, "step": 263400 }, { "epoch": 0.5321048655243882, "grad_norm": 7.952865123748779, "learning_rate": 5.404126015486901e-06, "loss": 19.1363, "step": 263410 }, { "epoch": 0.532125066157072, "grad_norm": 243.2850799560547, "learning_rate": 5.403778090693758e-06, "loss": 21.2192, "step": 263420 }, { "epoch": 0.5321452667897558, "grad_norm": 171.4149627685547, "learning_rate": 5.403430163932648e-06, "loss": 29.5435, "step": 263430 }, { "epoch": 0.5321654674224396, "grad_norm": 138.6367950439453, "learning_rate": 5.403082235205269e-06, "loss": 13.5605, "step": 263440 }, { "epoch": 0.5321856680551235, "grad_norm": 0.0, "learning_rate": 5.402734304513316e-06, "loss": 18.1841, "step": 263450 }, { "epoch": 0.5322058686878073, "grad_norm": 85.37193298339844, "learning_rate": 5.402386371858486e-06, "loss": 13.4827, "step": 263460 }, { "epoch": 0.5322260693204911, "grad_norm": 137.23928833007812, "learning_rate": 5.402038437242471e-06, "loss": 22.3025, "step": 263470 }, { "epoch": 0.5322462699531749, "grad_norm": 250.8084259033203, "learning_rate": 5.401690500666972e-06, "loss": 10.1915, "step": 263480 }, { "epoch": 0.5322664705858587, "grad_norm": 19.831430435180664, "learning_rate": 5.401342562133682e-06, "loss": 15.9802, "step": 263490 }, { "epoch": 0.5322866712185426, "grad_norm": 103.7763671875, "learning_rate": 5.400994621644294e-06, "loss": 9.8262, "step": 263500 }, { "epoch": 0.5323068718512264, "grad_norm": 613.1729736328125, "learning_rate": 5.4006466792005105e-06, "loss": 22.4816, "step": 263510 }, { "epoch": 0.5323270724839102, "grad_norm": 158.41030883789062, "learning_rate": 5.400298734804023e-06, "loss": 19.7701, "step": 263520 }, { "epoch": 0.532347273116594, "grad_norm": 219.40818786621094, "learning_rate": 5.399950788456526e-06, "loss": 28.1359, "step": 263530 }, { "epoch": 0.5323674737492778, "grad_norm": 361.7120056152344, "learning_rate": 5.39960284015972e-06, "loss": 21.0942, "step": 263540 }, { "epoch": 0.5323876743819617, "grad_norm": 138.42750549316406, "learning_rate": 5.399254889915296e-06, "loss": 23.6721, "step": 263550 }, { "epoch": 0.5324078750146455, "grad_norm": 208.31907653808594, "learning_rate": 5.398906937724954e-06, "loss": 12.8471, "step": 263560 }, { "epoch": 0.5324280756473293, "grad_norm": 120.91117095947266, "learning_rate": 5.398558983590385e-06, "loss": 19.6015, "step": 263570 }, { "epoch": 0.5324482762800131, "grad_norm": 98.02713012695312, "learning_rate": 5.398211027513291e-06, "loss": 25.9277, "step": 263580 }, { "epoch": 0.5324684769126969, "grad_norm": 110.3919448852539, "learning_rate": 5.397863069495364e-06, "loss": 12.1987, "step": 263590 }, { "epoch": 0.5324886775453808, "grad_norm": 399.8277282714844, "learning_rate": 5.3975151095383e-06, "loss": 34.1183, "step": 263600 }, { "epoch": 0.5325088781780646, "grad_norm": 256.8631591796875, "learning_rate": 5.397167147643796e-06, "loss": 14.9968, "step": 263610 }, { "epoch": 0.5325290788107484, "grad_norm": 400.1690979003906, "learning_rate": 5.396819183813547e-06, "loss": 21.0692, "step": 263620 }, { "epoch": 0.5325492794434322, "grad_norm": 315.1650085449219, "learning_rate": 5.396471218049249e-06, "loss": 8.7566, "step": 263630 }, { "epoch": 0.5325694800761159, "grad_norm": 237.98956298828125, "learning_rate": 5.3961232503526e-06, "loss": 15.5124, "step": 263640 }, { "epoch": 0.5325896807087998, "grad_norm": 91.02640533447266, "learning_rate": 5.3957752807252925e-06, "loss": 14.854, "step": 263650 }, { "epoch": 0.5326098813414836, "grad_norm": 426.781005859375, "learning_rate": 5.3954273091690245e-06, "loss": 18.5214, "step": 263660 }, { "epoch": 0.5326300819741674, "grad_norm": 412.5982666015625, "learning_rate": 5.395079335685494e-06, "loss": 19.5292, "step": 263670 }, { "epoch": 0.5326502826068512, "grad_norm": 364.3861999511719, "learning_rate": 5.394731360276393e-06, "loss": 23.1509, "step": 263680 }, { "epoch": 0.532670483239535, "grad_norm": 247.47454833984375, "learning_rate": 5.394383382943419e-06, "loss": 13.3635, "step": 263690 }, { "epoch": 0.5326906838722189, "grad_norm": 335.28350830078125, "learning_rate": 5.394035403688268e-06, "loss": 16.4375, "step": 263700 }, { "epoch": 0.5327108845049027, "grad_norm": 118.2470474243164, "learning_rate": 5.393687422512637e-06, "loss": 16.1685, "step": 263710 }, { "epoch": 0.5327310851375865, "grad_norm": 75.08528137207031, "learning_rate": 5.393339439418222e-06, "loss": 23.8001, "step": 263720 }, { "epoch": 0.5327512857702703, "grad_norm": 478.8049011230469, "learning_rate": 5.392991454406716e-06, "loss": 15.9767, "step": 263730 }, { "epoch": 0.5327714864029541, "grad_norm": 577.3836059570312, "learning_rate": 5.39264346747982e-06, "loss": 19.3501, "step": 263740 }, { "epoch": 0.532791687035638, "grad_norm": 202.38394165039062, "learning_rate": 5.392295478639226e-06, "loss": 26.4529, "step": 263750 }, { "epoch": 0.5328118876683218, "grad_norm": 210.9169464111328, "learning_rate": 5.391947487886631e-06, "loss": 15.4854, "step": 263760 }, { "epoch": 0.5328320883010056, "grad_norm": 568.0921630859375, "learning_rate": 5.391599495223732e-06, "loss": 20.008, "step": 263770 }, { "epoch": 0.5328522889336894, "grad_norm": 316.9487609863281, "learning_rate": 5.391251500652224e-06, "loss": 15.0324, "step": 263780 }, { "epoch": 0.5328724895663732, "grad_norm": 461.7204284667969, "learning_rate": 5.390903504173805e-06, "loss": 10.5577, "step": 263790 }, { "epoch": 0.532892690199057, "grad_norm": 306.4527282714844, "learning_rate": 5.390555505790168e-06, "loss": 19.424, "step": 263800 }, { "epoch": 0.5329128908317409, "grad_norm": 342.4505310058594, "learning_rate": 5.390207505503012e-06, "loss": 14.6268, "step": 263810 }, { "epoch": 0.5329330914644247, "grad_norm": 305.6884460449219, "learning_rate": 5.389859503314031e-06, "loss": 10.2735, "step": 263820 }, { "epoch": 0.5329532920971085, "grad_norm": 272.5692443847656, "learning_rate": 5.389511499224925e-06, "loss": 25.1513, "step": 263830 }, { "epoch": 0.5329734927297923, "grad_norm": 413.098388671875, "learning_rate": 5.389163493237382e-06, "loss": 14.5763, "step": 263840 }, { "epoch": 0.5329936933624762, "grad_norm": 724.011962890625, "learning_rate": 5.388815485353109e-06, "loss": 38.6707, "step": 263850 }, { "epoch": 0.53301389399516, "grad_norm": 481.6500244140625, "learning_rate": 5.388467475573792e-06, "loss": 24.4034, "step": 263860 }, { "epoch": 0.5330340946278438, "grad_norm": 229.81214904785156, "learning_rate": 5.388119463901134e-06, "loss": 15.2215, "step": 263870 }, { "epoch": 0.5330542952605276, "grad_norm": 496.77691650390625, "learning_rate": 5.3877714503368285e-06, "loss": 12.4151, "step": 263880 }, { "epoch": 0.5330744958932114, "grad_norm": 791.6639404296875, "learning_rate": 5.387423434882571e-06, "loss": 28.4707, "step": 263890 }, { "epoch": 0.5330946965258951, "grad_norm": 594.6643676757812, "learning_rate": 5.3870754175400595e-06, "loss": 25.6743, "step": 263900 }, { "epoch": 0.533114897158579, "grad_norm": 249.26043701171875, "learning_rate": 5.386727398310989e-06, "loss": 27.8745, "step": 263910 }, { "epoch": 0.5331350977912628, "grad_norm": 623.6708984375, "learning_rate": 5.386379377197056e-06, "loss": 19.5054, "step": 263920 }, { "epoch": 0.5331552984239466, "grad_norm": 300.93798828125, "learning_rate": 5.386031354199956e-06, "loss": 19.2355, "step": 263930 }, { "epoch": 0.5331754990566304, "grad_norm": 173.6282196044922, "learning_rate": 5.385683329321387e-06, "loss": 23.157, "step": 263940 }, { "epoch": 0.5331956996893142, "grad_norm": 108.27371215820312, "learning_rate": 5.385335302563046e-06, "loss": 13.6654, "step": 263950 }, { "epoch": 0.5332159003219981, "grad_norm": 550.3907470703125, "learning_rate": 5.384987273926625e-06, "loss": 10.9481, "step": 263960 }, { "epoch": 0.5332361009546819, "grad_norm": 553.7071533203125, "learning_rate": 5.384639243413824e-06, "loss": 29.884, "step": 263970 }, { "epoch": 0.5332563015873657, "grad_norm": 368.5887451171875, "learning_rate": 5.384291211026337e-06, "loss": 17.9961, "step": 263980 }, { "epoch": 0.5332765022200495, "grad_norm": 333.175048828125, "learning_rate": 5.383943176765862e-06, "loss": 15.0957, "step": 263990 }, { "epoch": 0.5332967028527333, "grad_norm": 0.0, "learning_rate": 5.383595140634093e-06, "loss": 13.1446, "step": 264000 }, { "epoch": 0.5333169034854172, "grad_norm": 145.10325622558594, "learning_rate": 5.383247102632731e-06, "loss": 17.9334, "step": 264010 }, { "epoch": 0.533337104118101, "grad_norm": 488.7673645019531, "learning_rate": 5.3828990627634655e-06, "loss": 26.2413, "step": 264020 }, { "epoch": 0.5333573047507848, "grad_norm": 363.0857849121094, "learning_rate": 5.382551021027999e-06, "loss": 18.5862, "step": 264030 }, { "epoch": 0.5333775053834686, "grad_norm": 535.13330078125, "learning_rate": 5.382202977428025e-06, "loss": 21.4522, "step": 264040 }, { "epoch": 0.5333977060161524, "grad_norm": 60.024845123291016, "learning_rate": 5.381854931965238e-06, "loss": 17.8584, "step": 264050 }, { "epoch": 0.5334179066488363, "grad_norm": 236.71824645996094, "learning_rate": 5.381506884641339e-06, "loss": 12.9926, "step": 264060 }, { "epoch": 0.5334381072815201, "grad_norm": 129.0694580078125, "learning_rate": 5.381158835458019e-06, "loss": 17.4945, "step": 264070 }, { "epoch": 0.5334583079142039, "grad_norm": 373.35205078125, "learning_rate": 5.380810784416979e-06, "loss": 18.2615, "step": 264080 }, { "epoch": 0.5334785085468877, "grad_norm": 146.16390991210938, "learning_rate": 5.380462731519912e-06, "loss": 10.0562, "step": 264090 }, { "epoch": 0.5334987091795715, "grad_norm": 22.387361526489258, "learning_rate": 5.380114676768516e-06, "loss": 13.7306, "step": 264100 }, { "epoch": 0.5335189098122554, "grad_norm": 365.4973449707031, "learning_rate": 5.379766620164488e-06, "loss": 16.3674, "step": 264110 }, { "epoch": 0.5335391104449392, "grad_norm": 113.9658432006836, "learning_rate": 5.379418561709524e-06, "loss": 21.8753, "step": 264120 }, { "epoch": 0.533559311077623, "grad_norm": 484.54840087890625, "learning_rate": 5.37907050140532e-06, "loss": 16.9003, "step": 264130 }, { "epoch": 0.5335795117103068, "grad_norm": 214.47889709472656, "learning_rate": 5.378722439253571e-06, "loss": 15.8034, "step": 264140 }, { "epoch": 0.5335997123429905, "grad_norm": 414.7723083496094, "learning_rate": 5.378374375255977e-06, "loss": 23.9821, "step": 264150 }, { "epoch": 0.5336199129756743, "grad_norm": 223.8611297607422, "learning_rate": 5.378026309414229e-06, "loss": 13.7965, "step": 264160 }, { "epoch": 0.5336401136083582, "grad_norm": 478.074951171875, "learning_rate": 5.377678241730029e-06, "loss": 29.4383, "step": 264170 }, { "epoch": 0.533660314241042, "grad_norm": 577.9468383789062, "learning_rate": 5.377330172205068e-06, "loss": 24.6842, "step": 264180 }, { "epoch": 0.5336805148737258, "grad_norm": 138.7062225341797, "learning_rate": 5.37698210084105e-06, "loss": 15.9763, "step": 264190 }, { "epoch": 0.5337007155064096, "grad_norm": 649.977294921875, "learning_rate": 5.376634027639664e-06, "loss": 31.1183, "step": 264200 }, { "epoch": 0.5337209161390934, "grad_norm": 368.82855224609375, "learning_rate": 5.37628595260261e-06, "loss": 25.3188, "step": 264210 }, { "epoch": 0.5337411167717773, "grad_norm": 0.0, "learning_rate": 5.375937875731585e-06, "loss": 13.0238, "step": 264220 }, { "epoch": 0.5337613174044611, "grad_norm": 238.15341186523438, "learning_rate": 5.375589797028282e-06, "loss": 6.7665, "step": 264230 }, { "epoch": 0.5337815180371449, "grad_norm": 107.66752624511719, "learning_rate": 5.375241716494403e-06, "loss": 14.8686, "step": 264240 }, { "epoch": 0.5338017186698287, "grad_norm": 478.7092590332031, "learning_rate": 5.3748936341316395e-06, "loss": 14.085, "step": 264250 }, { "epoch": 0.5338219193025125, "grad_norm": 342.9481506347656, "learning_rate": 5.37454554994169e-06, "loss": 28.6526, "step": 264260 }, { "epoch": 0.5338421199351964, "grad_norm": 535.4735717773438, "learning_rate": 5.374197463926251e-06, "loss": 18.958, "step": 264270 }, { "epoch": 0.5338623205678802, "grad_norm": 227.9623260498047, "learning_rate": 5.37384937608702e-06, "loss": 15.2431, "step": 264280 }, { "epoch": 0.533882521200564, "grad_norm": 277.1716613769531, "learning_rate": 5.373501286425691e-06, "loss": 18.214, "step": 264290 }, { "epoch": 0.5339027218332478, "grad_norm": 575.446044921875, "learning_rate": 5.373153194943962e-06, "loss": 24.1122, "step": 264300 }, { "epoch": 0.5339229224659316, "grad_norm": 299.71942138671875, "learning_rate": 5.37280510164353e-06, "loss": 32.0509, "step": 264310 }, { "epoch": 0.5339431230986155, "grad_norm": 292.47906494140625, "learning_rate": 5.37245700652609e-06, "loss": 17.7643, "step": 264320 }, { "epoch": 0.5339633237312993, "grad_norm": 263.2667541503906, "learning_rate": 5.372108909593343e-06, "loss": 24.1843, "step": 264330 }, { "epoch": 0.5339835243639831, "grad_norm": 331.5159912109375, "learning_rate": 5.371760810846979e-06, "loss": 12.6568, "step": 264340 }, { "epoch": 0.5340037249966669, "grad_norm": 147.39437866210938, "learning_rate": 5.371412710288701e-06, "loss": 18.421, "step": 264350 }, { "epoch": 0.5340239256293507, "grad_norm": 50.00069808959961, "learning_rate": 5.3710646079202e-06, "loss": 15.1252, "step": 264360 }, { "epoch": 0.5340441262620346, "grad_norm": 376.8497314453125, "learning_rate": 5.370716503743175e-06, "loss": 36.5195, "step": 264370 }, { "epoch": 0.5340643268947184, "grad_norm": 631.4561767578125, "learning_rate": 5.370368397759324e-06, "loss": 42.4036, "step": 264380 }, { "epoch": 0.5340845275274022, "grad_norm": 218.19155883789062, "learning_rate": 5.370020289970341e-06, "loss": 17.6334, "step": 264390 }, { "epoch": 0.534104728160086, "grad_norm": 232.99266052246094, "learning_rate": 5.3696721803779265e-06, "loss": 24.6563, "step": 264400 }, { "epoch": 0.5341249287927697, "grad_norm": 173.87649536132812, "learning_rate": 5.369324068983772e-06, "loss": 14.2541, "step": 264410 }, { "epoch": 0.5341451294254536, "grad_norm": 139.04937744140625, "learning_rate": 5.368975955789577e-06, "loss": 17.2455, "step": 264420 }, { "epoch": 0.5341653300581374, "grad_norm": 349.06036376953125, "learning_rate": 5.368627840797039e-06, "loss": 18.3038, "step": 264430 }, { "epoch": 0.5341855306908212, "grad_norm": 85.2361831665039, "learning_rate": 5.368279724007854e-06, "loss": 17.5193, "step": 264440 }, { "epoch": 0.534205731323505, "grad_norm": 330.210205078125, "learning_rate": 5.3679316054237165e-06, "loss": 39.6962, "step": 264450 }, { "epoch": 0.5342259319561888, "grad_norm": 0.8538121581077576, "learning_rate": 5.367583485046327e-06, "loss": 26.3067, "step": 264460 }, { "epoch": 0.5342461325888727, "grad_norm": 166.63723754882812, "learning_rate": 5.367235362877379e-06, "loss": 9.5802, "step": 264470 }, { "epoch": 0.5342663332215565, "grad_norm": 30.568470001220703, "learning_rate": 5.366887238918571e-06, "loss": 15.9285, "step": 264480 }, { "epoch": 0.5342865338542403, "grad_norm": 124.72406768798828, "learning_rate": 5.3665391131716e-06, "loss": 17.1008, "step": 264490 }, { "epoch": 0.5343067344869241, "grad_norm": 582.5173950195312, "learning_rate": 5.366190985638159e-06, "loss": 42.4989, "step": 264500 }, { "epoch": 0.5343269351196079, "grad_norm": 103.28765869140625, "learning_rate": 5.36584285631995e-06, "loss": 11.0606, "step": 264510 }, { "epoch": 0.5343471357522918, "grad_norm": 40.87249755859375, "learning_rate": 5.365494725218667e-06, "loss": 16.6394, "step": 264520 }, { "epoch": 0.5343673363849756, "grad_norm": 253.8387908935547, "learning_rate": 5.3651465923360045e-06, "loss": 34.3461, "step": 264530 }, { "epoch": 0.5343875370176594, "grad_norm": 198.5197296142578, "learning_rate": 5.3647984576736645e-06, "loss": 15.2418, "step": 264540 }, { "epoch": 0.5344077376503432, "grad_norm": 238.29190063476562, "learning_rate": 5.3644503212333395e-06, "loss": 12.0259, "step": 264550 }, { "epoch": 0.534427938283027, "grad_norm": 129.78463745117188, "learning_rate": 5.36410218301673e-06, "loss": 23.003, "step": 264560 }, { "epoch": 0.5344481389157109, "grad_norm": 200.9611053466797, "learning_rate": 5.363754043025528e-06, "loss": 16.7223, "step": 264570 }, { "epoch": 0.5344683395483947, "grad_norm": 168.8322296142578, "learning_rate": 5.3634059012614345e-06, "loss": 6.7707, "step": 264580 }, { "epoch": 0.5344885401810785, "grad_norm": 9.641016006469727, "learning_rate": 5.363057757726145e-06, "loss": 13.7314, "step": 264590 }, { "epoch": 0.5345087408137623, "grad_norm": 177.96578979492188, "learning_rate": 5.362709612421355e-06, "loss": 22.2625, "step": 264600 }, { "epoch": 0.5345289414464461, "grad_norm": 202.83717346191406, "learning_rate": 5.362361465348762e-06, "loss": 13.0853, "step": 264610 }, { "epoch": 0.53454914207913, "grad_norm": 0.0, "learning_rate": 5.3620133165100656e-06, "loss": 22.667, "step": 264620 }, { "epoch": 0.5345693427118138, "grad_norm": 286.75604248046875, "learning_rate": 5.3616651659069576e-06, "loss": 27.3815, "step": 264630 }, { "epoch": 0.5345895433444976, "grad_norm": 6.989019870758057, "learning_rate": 5.3613170135411384e-06, "loss": 13.7708, "step": 264640 }, { "epoch": 0.5346097439771814, "grad_norm": 378.3674621582031, "learning_rate": 5.360968859414305e-06, "loss": 30.5771, "step": 264650 }, { "epoch": 0.5346299446098651, "grad_norm": 157.88551330566406, "learning_rate": 5.36062070352815e-06, "loss": 15.4614, "step": 264660 }, { "epoch": 0.534650145242549, "grad_norm": 169.51339721679688, "learning_rate": 5.360272545884376e-06, "loss": 18.3121, "step": 264670 }, { "epoch": 0.5346703458752328, "grad_norm": 261.600830078125, "learning_rate": 5.359924386484676e-06, "loss": 32.7511, "step": 264680 }, { "epoch": 0.5346905465079166, "grad_norm": 270.50079345703125, "learning_rate": 5.35957622533075e-06, "loss": 26.2048, "step": 264690 }, { "epoch": 0.5347107471406004, "grad_norm": 118.46422576904297, "learning_rate": 5.359228062424292e-06, "loss": 14.295, "step": 264700 }, { "epoch": 0.5347309477732842, "grad_norm": 35.038509368896484, "learning_rate": 5.358879897767e-06, "loss": 17.393, "step": 264710 }, { "epoch": 0.534751148405968, "grad_norm": 317.6742248535156, "learning_rate": 5.358531731360571e-06, "loss": 11.1578, "step": 264720 }, { "epoch": 0.5347713490386519, "grad_norm": 351.72216796875, "learning_rate": 5.358183563206703e-06, "loss": 17.5697, "step": 264730 }, { "epoch": 0.5347915496713357, "grad_norm": 273.04461669921875, "learning_rate": 5.357835393307089e-06, "loss": 13.5962, "step": 264740 }, { "epoch": 0.5348117503040195, "grad_norm": 367.6149597167969, "learning_rate": 5.35748722166343e-06, "loss": 41.6056, "step": 264750 }, { "epoch": 0.5348319509367033, "grad_norm": 313.6199951171875, "learning_rate": 5.357139048277422e-06, "loss": 13.6125, "step": 264760 }, { "epoch": 0.5348521515693871, "grad_norm": 166.67822265625, "learning_rate": 5.356790873150761e-06, "loss": 13.9214, "step": 264770 }, { "epoch": 0.534872352202071, "grad_norm": 94.70053100585938, "learning_rate": 5.356442696285146e-06, "loss": 10.0104, "step": 264780 }, { "epoch": 0.5348925528347548, "grad_norm": 253.89602661132812, "learning_rate": 5.3560945176822695e-06, "loss": 26.2876, "step": 264790 }, { "epoch": 0.5349127534674386, "grad_norm": 261.6278381347656, "learning_rate": 5.355746337343835e-06, "loss": 11.159, "step": 264800 }, { "epoch": 0.5349329541001224, "grad_norm": 91.49895477294922, "learning_rate": 5.355398155271535e-06, "loss": 10.4126, "step": 264810 }, { "epoch": 0.5349531547328062, "grad_norm": 188.7960662841797, "learning_rate": 5.355049971467066e-06, "loss": 12.1037, "step": 264820 }, { "epoch": 0.5349733553654901, "grad_norm": 94.56314849853516, "learning_rate": 5.354701785932129e-06, "loss": 17.0345, "step": 264830 }, { "epoch": 0.5349935559981739, "grad_norm": 355.7270812988281, "learning_rate": 5.354353598668416e-06, "loss": 23.8866, "step": 264840 }, { "epoch": 0.5350137566308577, "grad_norm": 208.4805908203125, "learning_rate": 5.354005409677628e-06, "loss": 32.6055, "step": 264850 }, { "epoch": 0.5350339572635415, "grad_norm": 187.07611083984375, "learning_rate": 5.35365721896146e-06, "loss": 13.2461, "step": 264860 }, { "epoch": 0.5350541578962253, "grad_norm": 289.86810302734375, "learning_rate": 5.353309026521609e-06, "loss": 22.2967, "step": 264870 }, { "epoch": 0.5350743585289092, "grad_norm": 406.1969299316406, "learning_rate": 5.3529608323597735e-06, "loss": 38.5972, "step": 264880 }, { "epoch": 0.535094559161593, "grad_norm": 424.3179931640625, "learning_rate": 5.352612636477651e-06, "loss": 24.7948, "step": 264890 }, { "epoch": 0.5351147597942768, "grad_norm": 207.13906860351562, "learning_rate": 5.352264438876935e-06, "loss": 25.5875, "step": 264900 }, { "epoch": 0.5351349604269606, "grad_norm": 307.5575866699219, "learning_rate": 5.351916239559326e-06, "loss": 7.746, "step": 264910 }, { "epoch": 0.5351551610596443, "grad_norm": 505.5068664550781, "learning_rate": 5.35156803852652e-06, "loss": 29.495, "step": 264920 }, { "epoch": 0.5351753616923282, "grad_norm": 464.21014404296875, "learning_rate": 5.351219835780213e-06, "loss": 16.351, "step": 264930 }, { "epoch": 0.535195562325012, "grad_norm": 27.638912200927734, "learning_rate": 5.3508716313221054e-06, "loss": 10.7804, "step": 264940 }, { "epoch": 0.5352157629576958, "grad_norm": 536.1787719726562, "learning_rate": 5.3505234251538885e-06, "loss": 26.4258, "step": 264950 }, { "epoch": 0.5352359635903796, "grad_norm": 136.93023681640625, "learning_rate": 5.3501752172772655e-06, "loss": 10.1787, "step": 264960 }, { "epoch": 0.5352561642230634, "grad_norm": 162.08984375, "learning_rate": 5.34982700769393e-06, "loss": 9.718, "step": 264970 }, { "epoch": 0.5352763648557473, "grad_norm": 300.3377990722656, "learning_rate": 5.3494787964055805e-06, "loss": 25.7311, "step": 264980 }, { "epoch": 0.5352965654884311, "grad_norm": 369.05523681640625, "learning_rate": 5.349130583413915e-06, "loss": 10.4849, "step": 264990 }, { "epoch": 0.5353167661211149, "grad_norm": 351.6495666503906, "learning_rate": 5.348782368720627e-06, "loss": 12.5732, "step": 265000 }, { "epoch": 0.5353369667537987, "grad_norm": 359.4354553222656, "learning_rate": 5.348434152327418e-06, "loss": 15.4023, "step": 265010 }, { "epoch": 0.5353571673864825, "grad_norm": 501.5163879394531, "learning_rate": 5.348085934235981e-06, "loss": 37.0669, "step": 265020 }, { "epoch": 0.5353773680191664, "grad_norm": 498.0756530761719, "learning_rate": 5.347737714448017e-06, "loss": 24.6193, "step": 265030 }, { "epoch": 0.5353975686518502, "grad_norm": 59.556739807128906, "learning_rate": 5.347389492965221e-06, "loss": 9.7148, "step": 265040 }, { "epoch": 0.535417769284534, "grad_norm": 267.8094482421875, "learning_rate": 5.347041269789293e-06, "loss": 21.3662, "step": 265050 }, { "epoch": 0.5354379699172178, "grad_norm": 340.4969787597656, "learning_rate": 5.346693044921925e-06, "loss": 18.9238, "step": 265060 }, { "epoch": 0.5354581705499016, "grad_norm": 634.2813110351562, "learning_rate": 5.3463448183648185e-06, "loss": 13.9175, "step": 265070 }, { "epoch": 0.5354783711825855, "grad_norm": 205.21766662597656, "learning_rate": 5.345996590119668e-06, "loss": 20.3336, "step": 265080 }, { "epoch": 0.5354985718152693, "grad_norm": 385.4761962890625, "learning_rate": 5.345648360188173e-06, "loss": 19.0683, "step": 265090 }, { "epoch": 0.5355187724479531, "grad_norm": 328.7206726074219, "learning_rate": 5.345300128572031e-06, "loss": 17.2363, "step": 265100 }, { "epoch": 0.5355389730806369, "grad_norm": 338.6438293457031, "learning_rate": 5.344951895272935e-06, "loss": 12.5276, "step": 265110 }, { "epoch": 0.5355591737133207, "grad_norm": 245.680908203125, "learning_rate": 5.344603660292588e-06, "loss": 10.4279, "step": 265120 }, { "epoch": 0.5355793743460046, "grad_norm": 530.47021484375, "learning_rate": 5.344255423632684e-06, "loss": 26.1957, "step": 265130 }, { "epoch": 0.5355995749786884, "grad_norm": 363.26910400390625, "learning_rate": 5.34390718529492e-06, "loss": 22.712, "step": 265140 }, { "epoch": 0.5356197756113722, "grad_norm": 137.65362548828125, "learning_rate": 5.343558945280994e-06, "loss": 6.5239, "step": 265150 }, { "epoch": 0.535639976244056, "grad_norm": 211.0373992919922, "learning_rate": 5.343210703592604e-06, "loss": 18.9273, "step": 265160 }, { "epoch": 0.5356601768767397, "grad_norm": 509.081787109375, "learning_rate": 5.342862460231448e-06, "loss": 18.4285, "step": 265170 }, { "epoch": 0.5356803775094235, "grad_norm": 316.6115417480469, "learning_rate": 5.34251421519922e-06, "loss": 33.7435, "step": 265180 }, { "epoch": 0.5357005781421074, "grad_norm": 361.59735107421875, "learning_rate": 5.3421659684976205e-06, "loss": 16.6484, "step": 265190 }, { "epoch": 0.5357207787747912, "grad_norm": 116.63799285888672, "learning_rate": 5.341817720128344e-06, "loss": 13.9785, "step": 265200 }, { "epoch": 0.535740979407475, "grad_norm": 646.306884765625, "learning_rate": 5.341469470093091e-06, "loss": 20.2344, "step": 265210 }, { "epoch": 0.5357611800401588, "grad_norm": 321.973876953125, "learning_rate": 5.341121218393555e-06, "loss": 13.8647, "step": 265220 }, { "epoch": 0.5357813806728426, "grad_norm": 134.1586456298828, "learning_rate": 5.340772965031439e-06, "loss": 11.6803, "step": 265230 }, { "epoch": 0.5358015813055265, "grad_norm": 12.555655479431152, "learning_rate": 5.340424710008434e-06, "loss": 15.1889, "step": 265240 }, { "epoch": 0.5358217819382103, "grad_norm": 142.04478454589844, "learning_rate": 5.3400764533262415e-06, "loss": 23.4509, "step": 265250 }, { "epoch": 0.5358419825708941, "grad_norm": 439.29052734375, "learning_rate": 5.339728194986559e-06, "loss": 16.4961, "step": 265260 }, { "epoch": 0.5358621832035779, "grad_norm": 408.9999694824219, "learning_rate": 5.339379934991079e-06, "loss": 22.8554, "step": 265270 }, { "epoch": 0.5358823838362617, "grad_norm": 461.274658203125, "learning_rate": 5.339031673341505e-06, "loss": 19.5465, "step": 265280 }, { "epoch": 0.5359025844689456, "grad_norm": 184.9110565185547, "learning_rate": 5.338683410039529e-06, "loss": 21.9049, "step": 265290 }, { "epoch": 0.5359227851016294, "grad_norm": 111.10748291015625, "learning_rate": 5.338335145086855e-06, "loss": 17.3286, "step": 265300 }, { "epoch": 0.5359429857343132, "grad_norm": 298.2159423828125, "learning_rate": 5.337986878485174e-06, "loss": 18.6927, "step": 265310 }, { "epoch": 0.535963186366997, "grad_norm": 438.24200439453125, "learning_rate": 5.337638610236186e-06, "loss": 12.7319, "step": 265320 }, { "epoch": 0.5359833869996808, "grad_norm": 352.2384338378906, "learning_rate": 5.3372903403415896e-06, "loss": 12.0339, "step": 265330 }, { "epoch": 0.5360035876323647, "grad_norm": 80.49260711669922, "learning_rate": 5.336942068803081e-06, "loss": 12.9118, "step": 265340 }, { "epoch": 0.5360237882650485, "grad_norm": 288.27813720703125, "learning_rate": 5.336593795622357e-06, "loss": 16.5863, "step": 265350 }, { "epoch": 0.5360439888977323, "grad_norm": 305.5639953613281, "learning_rate": 5.336245520801115e-06, "loss": 13.3339, "step": 265360 }, { "epoch": 0.5360641895304161, "grad_norm": 416.6864013671875, "learning_rate": 5.335897244341054e-06, "loss": 32.084, "step": 265370 }, { "epoch": 0.5360843901630999, "grad_norm": 393.9622802734375, "learning_rate": 5.33554896624387e-06, "loss": 17.2752, "step": 265380 }, { "epoch": 0.5361045907957838, "grad_norm": 156.0537872314453, "learning_rate": 5.335200686511262e-06, "loss": 10.7354, "step": 265390 }, { "epoch": 0.5361247914284676, "grad_norm": 36.114227294921875, "learning_rate": 5.334852405144926e-06, "loss": 13.2335, "step": 265400 }, { "epoch": 0.5361449920611514, "grad_norm": 514.9798583984375, "learning_rate": 5.3345041221465586e-06, "loss": 19.0041, "step": 265410 }, { "epoch": 0.5361651926938352, "grad_norm": 405.4417419433594, "learning_rate": 5.33415583751786e-06, "loss": 27.8262, "step": 265420 }, { "epoch": 0.5361853933265189, "grad_norm": 227.79820251464844, "learning_rate": 5.333807551260526e-06, "loss": 21.0099, "step": 265430 }, { "epoch": 0.5362055939592028, "grad_norm": 272.60760498046875, "learning_rate": 5.333459263376256e-06, "loss": 10.2631, "step": 265440 }, { "epoch": 0.5362257945918866, "grad_norm": 218.28001403808594, "learning_rate": 5.333110973866743e-06, "loss": 23.1848, "step": 265450 }, { "epoch": 0.5362459952245704, "grad_norm": 659.5874633789062, "learning_rate": 5.3327626827336906e-06, "loss": 20.4389, "step": 265460 }, { "epoch": 0.5362661958572542, "grad_norm": 11.413723945617676, "learning_rate": 5.332414389978792e-06, "loss": 12.149, "step": 265470 }, { "epoch": 0.536286396489938, "grad_norm": 334.3695373535156, "learning_rate": 5.332066095603745e-06, "loss": 25.7819, "step": 265480 }, { "epoch": 0.5363065971226219, "grad_norm": 115.32044982910156, "learning_rate": 5.33171779961025e-06, "loss": 13.4088, "step": 265490 }, { "epoch": 0.5363267977553057, "grad_norm": 240.41583251953125, "learning_rate": 5.3313695020000026e-06, "loss": 32.3069, "step": 265500 }, { "epoch": 0.5363469983879895, "grad_norm": 0.0, "learning_rate": 5.331021202774699e-06, "loss": 11.6313, "step": 265510 }, { "epoch": 0.5363671990206733, "grad_norm": 398.2794494628906, "learning_rate": 5.330672901936038e-06, "loss": 16.2008, "step": 265520 }, { "epoch": 0.5363873996533571, "grad_norm": 270.7489318847656, "learning_rate": 5.330324599485718e-06, "loss": 13.1072, "step": 265530 }, { "epoch": 0.536407600286041, "grad_norm": 423.32672119140625, "learning_rate": 5.329976295425437e-06, "loss": 7.5603, "step": 265540 }, { "epoch": 0.5364278009187248, "grad_norm": 12.48072338104248, "learning_rate": 5.32962798975689e-06, "loss": 9.2084, "step": 265550 }, { "epoch": 0.5364480015514086, "grad_norm": 223.8633270263672, "learning_rate": 5.329279682481776e-06, "loss": 12.9988, "step": 265560 }, { "epoch": 0.5364682021840924, "grad_norm": 164.66598510742188, "learning_rate": 5.328931373601794e-06, "loss": 17.2131, "step": 265570 }, { "epoch": 0.5364884028167762, "grad_norm": 206.40711975097656, "learning_rate": 5.328583063118641e-06, "loss": 21.9114, "step": 265580 }, { "epoch": 0.53650860344946, "grad_norm": 240.35484313964844, "learning_rate": 5.328234751034011e-06, "loss": 18.5981, "step": 265590 }, { "epoch": 0.5365288040821439, "grad_norm": 752.9829711914062, "learning_rate": 5.327886437349609e-06, "loss": 18.7914, "step": 265600 }, { "epoch": 0.5365490047148277, "grad_norm": 801.22265625, "learning_rate": 5.327538122067124e-06, "loss": 18.1934, "step": 265610 }, { "epoch": 0.5365692053475115, "grad_norm": 344.9349670410156, "learning_rate": 5.327189805188261e-06, "loss": 27.427, "step": 265620 }, { "epoch": 0.5365894059801953, "grad_norm": 252.720703125, "learning_rate": 5.326841486714713e-06, "loss": 21.4064, "step": 265630 }, { "epoch": 0.5366096066128792, "grad_norm": 467.5227966308594, "learning_rate": 5.326493166648179e-06, "loss": 26.5973, "step": 265640 }, { "epoch": 0.536629807245563, "grad_norm": 92.7689208984375, "learning_rate": 5.326144844990357e-06, "loss": 27.4589, "step": 265650 }, { "epoch": 0.5366500078782468, "grad_norm": 592.754150390625, "learning_rate": 5.3257965217429465e-06, "loss": 15.7651, "step": 265660 }, { "epoch": 0.5366702085109306, "grad_norm": 473.3447265625, "learning_rate": 5.325448196907642e-06, "loss": 20.9828, "step": 265670 }, { "epoch": 0.5366904091436144, "grad_norm": 17.7713680267334, "learning_rate": 5.3250998704861425e-06, "loss": 17.1283, "step": 265680 }, { "epoch": 0.5367106097762981, "grad_norm": 286.2649230957031, "learning_rate": 5.324751542480144e-06, "loss": 11.0243, "step": 265690 }, { "epoch": 0.536730810408982, "grad_norm": 107.76763916015625, "learning_rate": 5.3244032128913485e-06, "loss": 15.7168, "step": 265700 }, { "epoch": 0.5367510110416658, "grad_norm": 77.8515396118164, "learning_rate": 5.32405488172145e-06, "loss": 23.9465, "step": 265710 }, { "epoch": 0.5367712116743496, "grad_norm": 189.95460510253906, "learning_rate": 5.3237065489721465e-06, "loss": 18.9312, "step": 265720 }, { "epoch": 0.5367914123070334, "grad_norm": 327.388671875, "learning_rate": 5.3233582146451375e-06, "loss": 38.5575, "step": 265730 }, { "epoch": 0.5368116129397172, "grad_norm": 482.5219421386719, "learning_rate": 5.323009878742119e-06, "loss": 18.8585, "step": 265740 }, { "epoch": 0.5368318135724011, "grad_norm": 241.76324462890625, "learning_rate": 5.322661541264791e-06, "loss": 28.704, "step": 265750 }, { "epoch": 0.5368520142050849, "grad_norm": 548.6708984375, "learning_rate": 5.322313202214848e-06, "loss": 28.9363, "step": 265760 }, { "epoch": 0.5368722148377687, "grad_norm": 145.52320861816406, "learning_rate": 5.32196486159399e-06, "loss": 12.1706, "step": 265770 }, { "epoch": 0.5368924154704525, "grad_norm": 223.5120391845703, "learning_rate": 5.321616519403916e-06, "loss": 19.3248, "step": 265780 }, { "epoch": 0.5369126161031363, "grad_norm": 3.3384008407592773, "learning_rate": 5.3212681756463205e-06, "loss": 19.2859, "step": 265790 }, { "epoch": 0.5369328167358202, "grad_norm": 101.12583923339844, "learning_rate": 5.320919830322903e-06, "loss": 12.3743, "step": 265800 }, { "epoch": 0.536953017368504, "grad_norm": 151.59274291992188, "learning_rate": 5.320571483435362e-06, "loss": 17.015, "step": 265810 }, { "epoch": 0.5369732180011878, "grad_norm": 229.44415283203125, "learning_rate": 5.320223134985393e-06, "loss": 16.8586, "step": 265820 }, { "epoch": 0.5369934186338716, "grad_norm": 186.42259216308594, "learning_rate": 5.319874784974696e-06, "loss": 14.0089, "step": 265830 }, { "epoch": 0.5370136192665554, "grad_norm": 361.0640869140625, "learning_rate": 5.319526433404969e-06, "loss": 11.1294, "step": 265840 }, { "epoch": 0.5370338198992393, "grad_norm": 112.2176284790039, "learning_rate": 5.319178080277908e-06, "loss": 15.7867, "step": 265850 }, { "epoch": 0.5370540205319231, "grad_norm": 98.31188201904297, "learning_rate": 5.318829725595212e-06, "loss": 5.3418, "step": 265860 }, { "epoch": 0.5370742211646069, "grad_norm": 161.57559204101562, "learning_rate": 5.318481369358579e-06, "loss": 11.1526, "step": 265870 }, { "epoch": 0.5370944217972907, "grad_norm": 344.71893310546875, "learning_rate": 5.318133011569704e-06, "loss": 20.7998, "step": 265880 }, { "epoch": 0.5371146224299745, "grad_norm": 549.8175048828125, "learning_rate": 5.31778465223029e-06, "loss": 24.6235, "step": 265890 }, { "epoch": 0.5371348230626584, "grad_norm": 183.3669891357422, "learning_rate": 5.317436291342031e-06, "loss": 21.7207, "step": 265900 }, { "epoch": 0.5371550236953422, "grad_norm": 382.5271911621094, "learning_rate": 5.3170879289066265e-06, "loss": 23.4014, "step": 265910 }, { "epoch": 0.537175224328026, "grad_norm": 412.10003662109375, "learning_rate": 5.316739564925773e-06, "loss": 14.7331, "step": 265920 }, { "epoch": 0.5371954249607098, "grad_norm": 270.9097900390625, "learning_rate": 5.3163911994011705e-06, "loss": 53.0518, "step": 265930 }, { "epoch": 0.5372156255933935, "grad_norm": 44.17118835449219, "learning_rate": 5.316042832334516e-06, "loss": 10.6286, "step": 265940 }, { "epoch": 0.5372358262260774, "grad_norm": 456.9898986816406, "learning_rate": 5.315694463727506e-06, "loss": 16.6818, "step": 265950 }, { "epoch": 0.5372560268587612, "grad_norm": 567.5477294921875, "learning_rate": 5.3153460935818405e-06, "loss": 22.7101, "step": 265960 }, { "epoch": 0.537276227491445, "grad_norm": 87.14817810058594, "learning_rate": 5.314997721899214e-06, "loss": 20.0483, "step": 265970 }, { "epoch": 0.5372964281241288, "grad_norm": 350.6251525878906, "learning_rate": 5.3146493486813285e-06, "loss": 35.3954, "step": 265980 }, { "epoch": 0.5373166287568126, "grad_norm": 496.5779113769531, "learning_rate": 5.31430097392988e-06, "loss": 24.065, "step": 265990 }, { "epoch": 0.5373368293894965, "grad_norm": 138.64553833007812, "learning_rate": 5.3139525976465675e-06, "loss": 16.9822, "step": 266000 }, { "epoch": 0.5373570300221803, "grad_norm": 262.75311279296875, "learning_rate": 5.313604219833087e-06, "loss": 19.2101, "step": 266010 }, { "epoch": 0.5373772306548641, "grad_norm": 440.6421203613281, "learning_rate": 5.313255840491138e-06, "loss": 23.8345, "step": 266020 }, { "epoch": 0.5373974312875479, "grad_norm": 911.2402954101562, "learning_rate": 5.312907459622418e-06, "loss": 37.1924, "step": 266030 }, { "epoch": 0.5374176319202317, "grad_norm": 25.9619140625, "learning_rate": 5.3125590772286255e-06, "loss": 14.8119, "step": 266040 }, { "epoch": 0.5374378325529156, "grad_norm": 369.4199523925781, "learning_rate": 5.312210693311458e-06, "loss": 28.8928, "step": 266050 }, { "epoch": 0.5374580331855994, "grad_norm": 335.83154296875, "learning_rate": 5.311862307872611e-06, "loss": 24.341, "step": 266060 }, { "epoch": 0.5374782338182832, "grad_norm": 371.5845031738281, "learning_rate": 5.311513920913789e-06, "loss": 14.61, "step": 266070 }, { "epoch": 0.537498434450967, "grad_norm": 282.25152587890625, "learning_rate": 5.311165532436683e-06, "loss": 26.4066, "step": 266080 }, { "epoch": 0.5375186350836508, "grad_norm": 166.4915008544922, "learning_rate": 5.310817142442995e-06, "loss": 8.6248, "step": 266090 }, { "epoch": 0.5375388357163347, "grad_norm": 243.84902954101562, "learning_rate": 5.310468750934421e-06, "loss": 12.8543, "step": 266100 }, { "epoch": 0.5375590363490185, "grad_norm": 126.71887969970703, "learning_rate": 5.310120357912661e-06, "loss": 15.3639, "step": 266110 }, { "epoch": 0.5375792369817023, "grad_norm": 84.07275390625, "learning_rate": 5.309771963379412e-06, "loss": 18.9335, "step": 266120 }, { "epoch": 0.5375994376143861, "grad_norm": 575.5396118164062, "learning_rate": 5.309423567336371e-06, "loss": 21.143, "step": 266130 }, { "epoch": 0.5376196382470699, "grad_norm": 1744.4501953125, "learning_rate": 5.309075169785238e-06, "loss": 51.2284, "step": 266140 }, { "epoch": 0.5376398388797538, "grad_norm": 143.42788696289062, "learning_rate": 5.30872677072771e-06, "loss": 13.8134, "step": 266150 }, { "epoch": 0.5376600395124376, "grad_norm": 443.0891418457031, "learning_rate": 5.308378370165486e-06, "loss": 35.2179, "step": 266160 }, { "epoch": 0.5376802401451214, "grad_norm": 411.3719177246094, "learning_rate": 5.308029968100261e-06, "loss": 14.5203, "step": 266170 }, { "epoch": 0.5377004407778052, "grad_norm": 344.4471740722656, "learning_rate": 5.307681564533736e-06, "loss": 22.2609, "step": 266180 }, { "epoch": 0.537720641410489, "grad_norm": 273.4365234375, "learning_rate": 5.307333159467609e-06, "loss": 15.0273, "step": 266190 }, { "epoch": 0.5377408420431727, "grad_norm": 198.35134887695312, "learning_rate": 5.306984752903578e-06, "loss": 20.4413, "step": 266200 }, { "epoch": 0.5377610426758566, "grad_norm": 414.45849609375, "learning_rate": 5.3066363448433414e-06, "loss": 8.647, "step": 266210 }, { "epoch": 0.5377812433085404, "grad_norm": 250.86529541015625, "learning_rate": 5.306287935288593e-06, "loss": 24.9011, "step": 266220 }, { "epoch": 0.5378014439412242, "grad_norm": 269.31243896484375, "learning_rate": 5.305939524241037e-06, "loss": 20.3833, "step": 266230 }, { "epoch": 0.537821644573908, "grad_norm": 359.54119873046875, "learning_rate": 5.305591111702368e-06, "loss": 35.4665, "step": 266240 }, { "epoch": 0.5378418452065918, "grad_norm": 307.4914245605469, "learning_rate": 5.3052426976742855e-06, "loss": 14.9064, "step": 266250 }, { "epoch": 0.5378620458392757, "grad_norm": 2.002549886703491, "learning_rate": 5.304894282158486e-06, "loss": 14.1185, "step": 266260 }, { "epoch": 0.5378822464719595, "grad_norm": 73.3692398071289, "learning_rate": 5.304545865156669e-06, "loss": 23.6419, "step": 266270 }, { "epoch": 0.5379024471046433, "grad_norm": 137.2616729736328, "learning_rate": 5.3041974466705335e-06, "loss": 23.7897, "step": 266280 }, { "epoch": 0.5379226477373271, "grad_norm": 325.8764953613281, "learning_rate": 5.303849026701776e-06, "loss": 13.8303, "step": 266290 }, { "epoch": 0.5379428483700109, "grad_norm": 0.0, "learning_rate": 5.303500605252095e-06, "loss": 12.2661, "step": 266300 }, { "epoch": 0.5379630490026948, "grad_norm": 58.7404670715332, "learning_rate": 5.303152182323189e-06, "loss": 13.6312, "step": 266310 }, { "epoch": 0.5379832496353786, "grad_norm": 716.2264404296875, "learning_rate": 5.302803757916757e-06, "loss": 21.0367, "step": 266320 }, { "epoch": 0.5380034502680624, "grad_norm": 142.30709838867188, "learning_rate": 5.302455332034494e-06, "loss": 18.3105, "step": 266330 }, { "epoch": 0.5380236509007462, "grad_norm": 356.6372375488281, "learning_rate": 5.3021069046781025e-06, "loss": 18.3981, "step": 266340 }, { "epoch": 0.53804385153343, "grad_norm": 328.2477722167969, "learning_rate": 5.301758475849278e-06, "loss": 10.8442, "step": 266350 }, { "epoch": 0.5380640521661139, "grad_norm": 260.845703125, "learning_rate": 5.301410045549719e-06, "loss": 21.5401, "step": 266360 }, { "epoch": 0.5380842527987977, "grad_norm": 673.6998291015625, "learning_rate": 5.301061613781123e-06, "loss": 16.5076, "step": 266370 }, { "epoch": 0.5381044534314815, "grad_norm": 275.2965393066406, "learning_rate": 5.300713180545189e-06, "loss": 17.4159, "step": 266380 }, { "epoch": 0.5381246540641653, "grad_norm": 191.94093322753906, "learning_rate": 5.300364745843618e-06, "loss": 19.4287, "step": 266390 }, { "epoch": 0.5381448546968491, "grad_norm": 262.52838134765625, "learning_rate": 5.300016309678104e-06, "loss": 25.2159, "step": 266400 }, { "epoch": 0.538165055329533, "grad_norm": 73.41099548339844, "learning_rate": 5.299667872050348e-06, "loss": 34.5716, "step": 266410 }, { "epoch": 0.5381852559622168, "grad_norm": 459.25042724609375, "learning_rate": 5.299319432962046e-06, "loss": 20.3145, "step": 266420 }, { "epoch": 0.5382054565949006, "grad_norm": 291.1921691894531, "learning_rate": 5.298970992414897e-06, "loss": 16.7811, "step": 266430 }, { "epoch": 0.5382256572275844, "grad_norm": 261.6145935058594, "learning_rate": 5.2986225504106e-06, "loss": 12.4638, "step": 266440 }, { "epoch": 0.5382458578602681, "grad_norm": 320.27227783203125, "learning_rate": 5.298274106950855e-06, "loss": 14.7486, "step": 266450 }, { "epoch": 0.538266058492952, "grad_norm": 282.1473083496094, "learning_rate": 5.297925662037356e-06, "loss": 5.9319, "step": 266460 }, { "epoch": 0.5382862591256358, "grad_norm": 542.2208862304688, "learning_rate": 5.297577215671803e-06, "loss": 17.882, "step": 266470 }, { "epoch": 0.5383064597583196, "grad_norm": 202.61529541015625, "learning_rate": 5.297228767855898e-06, "loss": 16.5175, "step": 266480 }, { "epoch": 0.5383266603910034, "grad_norm": 68.44759368896484, "learning_rate": 5.296880318591331e-06, "loss": 28.9251, "step": 266490 }, { "epoch": 0.5383468610236872, "grad_norm": 206.13137817382812, "learning_rate": 5.296531867879809e-06, "loss": 13.3807, "step": 266500 }, { "epoch": 0.538367061656371, "grad_norm": 326.76641845703125, "learning_rate": 5.296183415723024e-06, "loss": 22.5667, "step": 266510 }, { "epoch": 0.5383872622890549, "grad_norm": 314.538818359375, "learning_rate": 5.29583496212268e-06, "loss": 19.1757, "step": 266520 }, { "epoch": 0.5384074629217387, "grad_norm": 222.81280517578125, "learning_rate": 5.2954865070804705e-06, "loss": 10.8099, "step": 266530 }, { "epoch": 0.5384276635544225, "grad_norm": 208.36935424804688, "learning_rate": 5.295138050598097e-06, "loss": 19.4201, "step": 266540 }, { "epoch": 0.5384478641871063, "grad_norm": 425.13189697265625, "learning_rate": 5.294789592677255e-06, "loss": 13.1474, "step": 266550 }, { "epoch": 0.5384680648197901, "grad_norm": 80.73239135742188, "learning_rate": 5.2944411333196445e-06, "loss": 13.5023, "step": 266560 }, { "epoch": 0.538488265452474, "grad_norm": 46.128963470458984, "learning_rate": 5.294092672526963e-06, "loss": 17.6277, "step": 266570 }, { "epoch": 0.5385084660851578, "grad_norm": 762.5989379882812, "learning_rate": 5.293744210300911e-06, "loss": 14.5085, "step": 266580 }, { "epoch": 0.5385286667178416, "grad_norm": 0.0, "learning_rate": 5.293395746643184e-06, "loss": 14.8363, "step": 266590 }, { "epoch": 0.5385488673505254, "grad_norm": 456.4728698730469, "learning_rate": 5.293047281555482e-06, "loss": 32.3451, "step": 266600 }, { "epoch": 0.5385690679832092, "grad_norm": 239.32101440429688, "learning_rate": 5.292698815039505e-06, "loss": 13.3809, "step": 266610 }, { "epoch": 0.5385892686158931, "grad_norm": 0.0, "learning_rate": 5.292350347096949e-06, "loss": 12.7726, "step": 266620 }, { "epoch": 0.5386094692485769, "grad_norm": 93.49626922607422, "learning_rate": 5.29200187772951e-06, "loss": 12.8177, "step": 266630 }, { "epoch": 0.5386296698812607, "grad_norm": 265.6518859863281, "learning_rate": 5.291653406938892e-06, "loss": 23.9154, "step": 266640 }, { "epoch": 0.5386498705139445, "grad_norm": 188.6708221435547, "learning_rate": 5.291304934726789e-06, "loss": 18.4039, "step": 266650 }, { "epoch": 0.5386700711466283, "grad_norm": 281.8116149902344, "learning_rate": 5.290956461094903e-06, "loss": 25.6349, "step": 266660 }, { "epoch": 0.5386902717793122, "grad_norm": 341.8420104980469, "learning_rate": 5.290607986044928e-06, "loss": 36.5975, "step": 266670 }, { "epoch": 0.538710472411996, "grad_norm": 541.3814086914062, "learning_rate": 5.290259509578567e-06, "loss": 20.6315, "step": 266680 }, { "epoch": 0.5387306730446798, "grad_norm": 202.99334716796875, "learning_rate": 5.289911031697515e-06, "loss": 13.1513, "step": 266690 }, { "epoch": 0.5387508736773636, "grad_norm": 120.10304260253906, "learning_rate": 5.289562552403472e-06, "loss": 14.1495, "step": 266700 }, { "epoch": 0.5387710743100473, "grad_norm": 484.4587707519531, "learning_rate": 5.289214071698138e-06, "loss": 10.9418, "step": 266710 }, { "epoch": 0.5387912749427312, "grad_norm": 519.1748046875, "learning_rate": 5.2888655895832075e-06, "loss": 27.3148, "step": 266720 }, { "epoch": 0.538811475575415, "grad_norm": 279.8232116699219, "learning_rate": 5.288517106060383e-06, "loss": 12.295, "step": 266730 }, { "epoch": 0.5388316762080988, "grad_norm": 394.7387390136719, "learning_rate": 5.288168621131359e-06, "loss": 19.0407, "step": 266740 }, { "epoch": 0.5388518768407826, "grad_norm": 84.26372528076172, "learning_rate": 5.287820134797837e-06, "loss": 13.3643, "step": 266750 }, { "epoch": 0.5388720774734664, "grad_norm": 170.74542236328125, "learning_rate": 5.287471647061515e-06, "loss": 18.576, "step": 266760 }, { "epoch": 0.5388922781061503, "grad_norm": 295.0085144042969, "learning_rate": 5.2871231579240916e-06, "loss": 14.716, "step": 266770 }, { "epoch": 0.5389124787388341, "grad_norm": 141.25515747070312, "learning_rate": 5.286774667387262e-06, "loss": 19.425, "step": 266780 }, { "epoch": 0.5389326793715179, "grad_norm": 477.3775939941406, "learning_rate": 5.28642617545273e-06, "loss": 13.0481, "step": 266790 }, { "epoch": 0.5389528800042017, "grad_norm": 87.62956237792969, "learning_rate": 5.2860776821221915e-06, "loss": 8.3978, "step": 266800 }, { "epoch": 0.5389730806368855, "grad_norm": 1015.1585693359375, "learning_rate": 5.285729187397344e-06, "loss": 24.9619, "step": 266810 }, { "epoch": 0.5389932812695694, "grad_norm": 4654.4306640625, "learning_rate": 5.285380691279889e-06, "loss": 32.1401, "step": 266820 }, { "epoch": 0.5390134819022532, "grad_norm": 245.5521240234375, "learning_rate": 5.2850321937715195e-06, "loss": 8.3038, "step": 266830 }, { "epoch": 0.539033682534937, "grad_norm": 48.32658767700195, "learning_rate": 5.284683694873941e-06, "loss": 16.2974, "step": 266840 }, { "epoch": 0.5390538831676208, "grad_norm": 416.8536682128906, "learning_rate": 5.284335194588848e-06, "loss": 28.7398, "step": 266850 }, { "epoch": 0.5390740838003046, "grad_norm": 536.3135375976562, "learning_rate": 5.28398669291794e-06, "loss": 29.1615, "step": 266860 }, { "epoch": 0.5390942844329885, "grad_norm": 323.451416015625, "learning_rate": 5.283638189862914e-06, "loss": 24.7409, "step": 266870 }, { "epoch": 0.5391144850656723, "grad_norm": 57.782623291015625, "learning_rate": 5.28328968542547e-06, "loss": 17.2445, "step": 266880 }, { "epoch": 0.5391346856983561, "grad_norm": 6.788858413696289, "learning_rate": 5.2829411796073085e-06, "loss": 18.6137, "step": 266890 }, { "epoch": 0.5391548863310399, "grad_norm": 59.27122497558594, "learning_rate": 5.282592672410124e-06, "loss": 13.2535, "step": 266900 }, { "epoch": 0.5391750869637237, "grad_norm": 141.57579040527344, "learning_rate": 5.282244163835617e-06, "loss": 11.4164, "step": 266910 }, { "epoch": 0.5391952875964076, "grad_norm": 27.804533004760742, "learning_rate": 5.281895653885486e-06, "loss": 30.5752, "step": 266920 }, { "epoch": 0.5392154882290914, "grad_norm": 525.464111328125, "learning_rate": 5.2815471425614315e-06, "loss": 24.4736, "step": 266930 }, { "epoch": 0.5392356888617752, "grad_norm": 56.19463348388672, "learning_rate": 5.281198629865149e-06, "loss": 24.7397, "step": 266940 }, { "epoch": 0.539255889494459, "grad_norm": 103.63961029052734, "learning_rate": 5.28085011579834e-06, "loss": 23.0777, "step": 266950 }, { "epoch": 0.5392760901271428, "grad_norm": 344.5896301269531, "learning_rate": 5.2805016003627e-06, "loss": 19.4375, "step": 266960 }, { "epoch": 0.5392962907598265, "grad_norm": 209.49497985839844, "learning_rate": 5.2801530835599295e-06, "loss": 18.5068, "step": 266970 }, { "epoch": 0.5393164913925104, "grad_norm": 122.97942352294922, "learning_rate": 5.2798045653917275e-06, "loss": 9.4342, "step": 266980 }, { "epoch": 0.5393366920251942, "grad_norm": 50.867759704589844, "learning_rate": 5.27945604585979e-06, "loss": 18.2503, "step": 266990 }, { "epoch": 0.539356892657878, "grad_norm": 170.2630157470703, "learning_rate": 5.27910752496582e-06, "loss": 13.3572, "step": 267000 }, { "epoch": 0.5393770932905618, "grad_norm": 465.73956298828125, "learning_rate": 5.278759002711513e-06, "loss": 25.1813, "step": 267010 }, { "epoch": 0.5393972939232456, "grad_norm": 296.2099304199219, "learning_rate": 5.278410479098568e-06, "loss": 26.858, "step": 267020 }, { "epoch": 0.5394174945559295, "grad_norm": 455.5629577636719, "learning_rate": 5.278061954128684e-06, "loss": 24.3683, "step": 267030 }, { "epoch": 0.5394376951886133, "grad_norm": 312.91741943359375, "learning_rate": 5.27771342780356e-06, "loss": 20.9359, "step": 267040 }, { "epoch": 0.5394578958212971, "grad_norm": 244.6012725830078, "learning_rate": 5.2773649001248965e-06, "loss": 18.638, "step": 267050 }, { "epoch": 0.5394780964539809, "grad_norm": 340.9270324707031, "learning_rate": 5.277016371094388e-06, "loss": 13.2528, "step": 267060 }, { "epoch": 0.5394982970866647, "grad_norm": 661.1080322265625, "learning_rate": 5.276667840713735e-06, "loss": 18.8909, "step": 267070 }, { "epoch": 0.5395184977193486, "grad_norm": 204.47894287109375, "learning_rate": 5.276319308984637e-06, "loss": 38.2901, "step": 267080 }, { "epoch": 0.5395386983520324, "grad_norm": 261.35986328125, "learning_rate": 5.275970775908793e-06, "loss": 15.8021, "step": 267090 }, { "epoch": 0.5395588989847162, "grad_norm": 305.7586669921875, "learning_rate": 5.275622241487899e-06, "loss": 17.7005, "step": 267100 }, { "epoch": 0.5395790996174, "grad_norm": 175.3005828857422, "learning_rate": 5.275273705723657e-06, "loss": 7.8846, "step": 267110 }, { "epoch": 0.5395993002500838, "grad_norm": 399.4211730957031, "learning_rate": 5.274925168617763e-06, "loss": 31.5131, "step": 267120 }, { "epoch": 0.5396195008827677, "grad_norm": 589.6577758789062, "learning_rate": 5.274576630171919e-06, "loss": 17.9268, "step": 267130 }, { "epoch": 0.5396397015154515, "grad_norm": 270.5091247558594, "learning_rate": 5.274228090387821e-06, "loss": 29.4483, "step": 267140 }, { "epoch": 0.5396599021481353, "grad_norm": 141.29510498046875, "learning_rate": 5.273879549267168e-06, "loss": 21.7281, "step": 267150 }, { "epoch": 0.5396801027808191, "grad_norm": 917.8594360351562, "learning_rate": 5.2735310068116605e-06, "loss": 32.6155, "step": 267160 }, { "epoch": 0.539700303413503, "grad_norm": 295.8758850097656, "learning_rate": 5.2731824630229955e-06, "loss": 28.6357, "step": 267170 }, { "epoch": 0.5397205040461868, "grad_norm": 330.1458740234375, "learning_rate": 5.272833917902872e-06, "loss": 21.3396, "step": 267180 }, { "epoch": 0.5397407046788706, "grad_norm": 526.3543701171875, "learning_rate": 5.27248537145299e-06, "loss": 18.4907, "step": 267190 }, { "epoch": 0.5397609053115544, "grad_norm": 414.3830261230469, "learning_rate": 5.272136823675046e-06, "loss": 33.1002, "step": 267200 }, { "epoch": 0.5397811059442382, "grad_norm": 281.3467102050781, "learning_rate": 5.271788274570741e-06, "loss": 9.882, "step": 267210 }, { "epoch": 0.5398013065769219, "grad_norm": 335.8013916015625, "learning_rate": 5.2714397241417736e-06, "loss": 14.886, "step": 267220 }, { "epoch": 0.5398215072096058, "grad_norm": 356.0302734375, "learning_rate": 5.271091172389841e-06, "loss": 25.5109, "step": 267230 }, { "epoch": 0.5398417078422896, "grad_norm": 300.1552429199219, "learning_rate": 5.270742619316642e-06, "loss": 23.2901, "step": 267240 }, { "epoch": 0.5398619084749734, "grad_norm": 108.5082015991211, "learning_rate": 5.270394064923878e-06, "loss": 19.1096, "step": 267250 }, { "epoch": 0.5398821091076572, "grad_norm": 193.50857543945312, "learning_rate": 5.270045509213244e-06, "loss": 18.6972, "step": 267260 }, { "epoch": 0.539902309740341, "grad_norm": 15.0328369140625, "learning_rate": 5.2696969521864435e-06, "loss": 23.9483, "step": 267270 }, { "epoch": 0.5399225103730249, "grad_norm": 274.4201354980469, "learning_rate": 5.2693483938451705e-06, "loss": 11.9568, "step": 267280 }, { "epoch": 0.5399427110057087, "grad_norm": 335.3061828613281, "learning_rate": 5.268999834191128e-06, "loss": 19.0985, "step": 267290 }, { "epoch": 0.5399629116383925, "grad_norm": 205.67137145996094, "learning_rate": 5.268651273226011e-06, "loss": 16.3288, "step": 267300 }, { "epoch": 0.5399831122710763, "grad_norm": 247.44305419921875, "learning_rate": 5.268302710951522e-06, "loss": 15.5502, "step": 267310 }, { "epoch": 0.5400033129037601, "grad_norm": 238.6695098876953, "learning_rate": 5.267954147369359e-06, "loss": 41.1315, "step": 267320 }, { "epoch": 0.540023513536444, "grad_norm": 171.28121948242188, "learning_rate": 5.267605582481216e-06, "loss": 20.1757, "step": 267330 }, { "epoch": 0.5400437141691278, "grad_norm": 223.7276153564453, "learning_rate": 5.2672570162887996e-06, "loss": 31.113, "step": 267340 }, { "epoch": 0.5400639148018116, "grad_norm": 285.41748046875, "learning_rate": 5.2669084487938025e-06, "loss": 14.6894, "step": 267350 }, { "epoch": 0.5400841154344954, "grad_norm": 385.9441833496094, "learning_rate": 5.266559879997928e-06, "loss": 27.9722, "step": 267360 }, { "epoch": 0.5401043160671792, "grad_norm": 373.7374572753906, "learning_rate": 5.266211309902871e-06, "loss": 12.8376, "step": 267370 }, { "epoch": 0.540124516699863, "grad_norm": 229.84140014648438, "learning_rate": 5.265862738510335e-06, "loss": 25.3829, "step": 267380 }, { "epoch": 0.5401447173325469, "grad_norm": 188.85060119628906, "learning_rate": 5.265514165822014e-06, "loss": 21.62, "step": 267390 }, { "epoch": 0.5401649179652307, "grad_norm": 853.23095703125, "learning_rate": 5.26516559183961e-06, "loss": 21.5823, "step": 267400 }, { "epoch": 0.5401851185979145, "grad_norm": 210.9942169189453, "learning_rate": 5.26481701656482e-06, "loss": 14.9183, "step": 267410 }, { "epoch": 0.5402053192305983, "grad_norm": 598.7924194335938, "learning_rate": 5.264468439999345e-06, "loss": 15.2315, "step": 267420 }, { "epoch": 0.5402255198632822, "grad_norm": 286.0208435058594, "learning_rate": 5.2641198621448845e-06, "loss": 23.3046, "step": 267430 }, { "epoch": 0.540245720495966, "grad_norm": 658.3983764648438, "learning_rate": 5.263771283003133e-06, "loss": 31.1154, "step": 267440 }, { "epoch": 0.5402659211286498, "grad_norm": 65.66263580322266, "learning_rate": 5.263422702575793e-06, "loss": 15.8578, "step": 267450 }, { "epoch": 0.5402861217613336, "grad_norm": 160.2906494140625, "learning_rate": 5.263074120864564e-06, "loss": 15.4203, "step": 267460 }, { "epoch": 0.5403063223940174, "grad_norm": 252.45521545410156, "learning_rate": 5.2627255378711414e-06, "loss": 17.8949, "step": 267470 }, { "epoch": 0.5403265230267011, "grad_norm": 251.3747100830078, "learning_rate": 5.262376953597228e-06, "loss": 10.3396, "step": 267480 }, { "epoch": 0.540346723659385, "grad_norm": 192.13729858398438, "learning_rate": 5.26202836804452e-06, "loss": 10.8089, "step": 267490 }, { "epoch": 0.5403669242920688, "grad_norm": 163.13058471679688, "learning_rate": 5.2616797812147205e-06, "loss": 26.603, "step": 267500 }, { "epoch": 0.5403871249247526, "grad_norm": 179.414794921875, "learning_rate": 5.261331193109524e-06, "loss": 12.995, "step": 267510 }, { "epoch": 0.5404073255574364, "grad_norm": 236.43939208984375, "learning_rate": 5.260982603730629e-06, "loss": 29.8317, "step": 267520 }, { "epoch": 0.5404275261901202, "grad_norm": 361.2092590332031, "learning_rate": 5.260634013079738e-06, "loss": 19.5141, "step": 267530 }, { "epoch": 0.5404477268228041, "grad_norm": 199.43821716308594, "learning_rate": 5.260285421158548e-06, "loss": 14.3407, "step": 267540 }, { "epoch": 0.5404679274554879, "grad_norm": 372.7821960449219, "learning_rate": 5.259936827968758e-06, "loss": 13.2983, "step": 267550 }, { "epoch": 0.5404881280881717, "grad_norm": 399.16766357421875, "learning_rate": 5.25958823351207e-06, "loss": 13.1112, "step": 267560 }, { "epoch": 0.5405083287208555, "grad_norm": 532.2105102539062, "learning_rate": 5.259239637790178e-06, "loss": 14.9261, "step": 267570 }, { "epoch": 0.5405285293535393, "grad_norm": 92.04874420166016, "learning_rate": 5.258891040804783e-06, "loss": 14.6243, "step": 267580 }, { "epoch": 0.5405487299862232, "grad_norm": 7896.95166015625, "learning_rate": 5.258542442557586e-06, "loss": 42.4149, "step": 267590 }, { "epoch": 0.540568930618907, "grad_norm": 95.56755065917969, "learning_rate": 5.258193843050283e-06, "loss": 23.626, "step": 267600 }, { "epoch": 0.5405891312515908, "grad_norm": 93.27716827392578, "learning_rate": 5.257845242284576e-06, "loss": 22.7209, "step": 267610 }, { "epoch": 0.5406093318842746, "grad_norm": 0.0, "learning_rate": 5.2574966402621615e-06, "loss": 23.1014, "step": 267620 }, { "epoch": 0.5406295325169584, "grad_norm": 397.4338073730469, "learning_rate": 5.25714803698474e-06, "loss": 21.9656, "step": 267630 }, { "epoch": 0.5406497331496423, "grad_norm": 264.91717529296875, "learning_rate": 5.25679943245401e-06, "loss": 17.9729, "step": 267640 }, { "epoch": 0.5406699337823261, "grad_norm": 260.9686584472656, "learning_rate": 5.256450826671671e-06, "loss": 16.0078, "step": 267650 }, { "epoch": 0.5406901344150099, "grad_norm": 437.55670166015625, "learning_rate": 5.256102219639423e-06, "loss": 20.9831, "step": 267660 }, { "epoch": 0.5407103350476937, "grad_norm": 499.3785400390625, "learning_rate": 5.2557536113589625e-06, "loss": 20.0379, "step": 267670 }, { "epoch": 0.5407305356803775, "grad_norm": 228.1459197998047, "learning_rate": 5.25540500183199e-06, "loss": 22.8546, "step": 267680 }, { "epoch": 0.5407507363130614, "grad_norm": 125.53427124023438, "learning_rate": 5.2550563910602035e-06, "loss": 13.1229, "step": 267690 }, { "epoch": 0.5407709369457452, "grad_norm": 203.1827392578125, "learning_rate": 5.254707779045305e-06, "loss": 18.5024, "step": 267700 }, { "epoch": 0.540791137578429, "grad_norm": 524.6204223632812, "learning_rate": 5.25435916578899e-06, "loss": 23.1371, "step": 267710 }, { "epoch": 0.5408113382111128, "grad_norm": 148.8800506591797, "learning_rate": 5.254010551292961e-06, "loss": 24.3182, "step": 267720 }, { "epoch": 0.5408315388437965, "grad_norm": 165.79576110839844, "learning_rate": 5.253661935558914e-06, "loss": 15.3915, "step": 267730 }, { "epoch": 0.5408517394764804, "grad_norm": 210.0788116455078, "learning_rate": 5.25331331858855e-06, "loss": 13.02, "step": 267740 }, { "epoch": 0.5408719401091642, "grad_norm": 204.95155334472656, "learning_rate": 5.252964700383567e-06, "loss": 14.2187, "step": 267750 }, { "epoch": 0.540892140741848, "grad_norm": 207.04425048828125, "learning_rate": 5.252616080945665e-06, "loss": 13.4826, "step": 267760 }, { "epoch": 0.5409123413745318, "grad_norm": 237.76992797851562, "learning_rate": 5.252267460276544e-06, "loss": 27.9926, "step": 267770 }, { "epoch": 0.5409325420072156, "grad_norm": 217.3527374267578, "learning_rate": 5.2519188383779e-06, "loss": 11.729, "step": 267780 }, { "epoch": 0.5409527426398995, "grad_norm": 292.14178466796875, "learning_rate": 5.251570215251436e-06, "loss": 20.8645, "step": 267790 }, { "epoch": 0.5409729432725833, "grad_norm": 58.043792724609375, "learning_rate": 5.251221590898848e-06, "loss": 19.9796, "step": 267800 }, { "epoch": 0.5409931439052671, "grad_norm": 279.92041015625, "learning_rate": 5.250872965321837e-06, "loss": 18.1242, "step": 267810 }, { "epoch": 0.5410133445379509, "grad_norm": 203.3126220703125, "learning_rate": 5.250524338522102e-06, "loss": 13.7004, "step": 267820 }, { "epoch": 0.5410335451706347, "grad_norm": 60.907291412353516, "learning_rate": 5.250175710501342e-06, "loss": 9.5043, "step": 267830 }, { "epoch": 0.5410537458033186, "grad_norm": 236.52088928222656, "learning_rate": 5.249827081261255e-06, "loss": 14.3663, "step": 267840 }, { "epoch": 0.5410739464360024, "grad_norm": 288.33514404296875, "learning_rate": 5.249478450803541e-06, "loss": 21.0068, "step": 267850 }, { "epoch": 0.5410941470686862, "grad_norm": 103.84046936035156, "learning_rate": 5.2491298191298986e-06, "loss": 19.496, "step": 267860 }, { "epoch": 0.54111434770137, "grad_norm": 171.8682098388672, "learning_rate": 5.248781186242029e-06, "loss": 12.0498, "step": 267870 }, { "epoch": 0.5411345483340538, "grad_norm": 281.7113952636719, "learning_rate": 5.2484325521416315e-06, "loss": 12.5595, "step": 267880 }, { "epoch": 0.5411547489667377, "grad_norm": 273.86907958984375, "learning_rate": 5.2480839168304e-06, "loss": 13.783, "step": 267890 }, { "epoch": 0.5411749495994215, "grad_norm": 690.4273681640625, "learning_rate": 5.247735280310041e-06, "loss": 16.2333, "step": 267900 }, { "epoch": 0.5411951502321053, "grad_norm": 553.8798217773438, "learning_rate": 5.247386642582248e-06, "loss": 18.8728, "step": 267910 }, { "epoch": 0.5412153508647891, "grad_norm": 443.9059753417969, "learning_rate": 5.2470380036487245e-06, "loss": 15.551, "step": 267920 }, { "epoch": 0.5412355514974729, "grad_norm": 70.2289810180664, "learning_rate": 5.246689363511167e-06, "loss": 23.9603, "step": 267930 }, { "epoch": 0.5412557521301568, "grad_norm": 397.4411315917969, "learning_rate": 5.2463407221712745e-06, "loss": 18.4745, "step": 267940 }, { "epoch": 0.5412759527628406, "grad_norm": 227.87106323242188, "learning_rate": 5.245992079630748e-06, "loss": 18.347, "step": 267950 }, { "epoch": 0.5412961533955244, "grad_norm": 393.6068420410156, "learning_rate": 5.2456434358912865e-06, "loss": 25.0059, "step": 267960 }, { "epoch": 0.5413163540282082, "grad_norm": 115.77591705322266, "learning_rate": 5.245294790954587e-06, "loss": 15.4794, "step": 267970 }, { "epoch": 0.541336554660892, "grad_norm": 84.56694030761719, "learning_rate": 5.244946144822351e-06, "loss": 11.1435, "step": 267980 }, { "epoch": 0.5413567552935757, "grad_norm": 479.2171936035156, "learning_rate": 5.24459749749628e-06, "loss": 16.8489, "step": 267990 }, { "epoch": 0.5413769559262596, "grad_norm": 193.6396484375, "learning_rate": 5.244248848978067e-06, "loss": 12.1415, "step": 268000 }, { "epoch": 0.5413971565589434, "grad_norm": 257.1857604980469, "learning_rate": 5.243900199269416e-06, "loss": 19.6243, "step": 268010 }, { "epoch": 0.5414173571916272, "grad_norm": 345.4239196777344, "learning_rate": 5.2435515483720246e-06, "loss": 19.7345, "step": 268020 }, { "epoch": 0.541437557824311, "grad_norm": 329.3836975097656, "learning_rate": 5.243202896287593e-06, "loss": 22.5997, "step": 268030 }, { "epoch": 0.5414577584569948, "grad_norm": 40.20481872558594, "learning_rate": 5.242854243017821e-06, "loss": 14.0404, "step": 268040 }, { "epoch": 0.5414779590896787, "grad_norm": 281.3708801269531, "learning_rate": 5.242505588564404e-06, "loss": 8.0399, "step": 268050 }, { "epoch": 0.5414981597223625, "grad_norm": 146.38233947753906, "learning_rate": 5.2421569329290465e-06, "loss": 9.9334, "step": 268060 }, { "epoch": 0.5415183603550463, "grad_norm": 101.25006103515625, "learning_rate": 5.241808276113445e-06, "loss": 20.3142, "step": 268070 }, { "epoch": 0.5415385609877301, "grad_norm": 244.2449188232422, "learning_rate": 5.241459618119299e-06, "loss": 23.9262, "step": 268080 }, { "epoch": 0.5415587616204139, "grad_norm": 338.0676574707031, "learning_rate": 5.241110958948307e-06, "loss": 15.6208, "step": 268090 }, { "epoch": 0.5415789622530978, "grad_norm": 0.0, "learning_rate": 5.240762298602171e-06, "loss": 10.5078, "step": 268100 }, { "epoch": 0.5415991628857816, "grad_norm": 220.89410400390625, "learning_rate": 5.240413637082588e-06, "loss": 22.7966, "step": 268110 }, { "epoch": 0.5416193635184654, "grad_norm": 262.5289611816406, "learning_rate": 5.240064974391259e-06, "loss": 23.6718, "step": 268120 }, { "epoch": 0.5416395641511492, "grad_norm": 241.12599182128906, "learning_rate": 5.239716310529882e-06, "loss": 18.0999, "step": 268130 }, { "epoch": 0.541659764783833, "grad_norm": 96.27742767333984, "learning_rate": 5.2393676455001565e-06, "loss": 31.3215, "step": 268140 }, { "epoch": 0.5416799654165169, "grad_norm": 96.0700454711914, "learning_rate": 5.239018979303784e-06, "loss": 35.1938, "step": 268150 }, { "epoch": 0.5417001660492007, "grad_norm": 611.3063354492188, "learning_rate": 5.238670311942459e-06, "loss": 15.8382, "step": 268160 }, { "epoch": 0.5417203666818845, "grad_norm": 238.66102600097656, "learning_rate": 5.2383216434178856e-06, "loss": 11.2855, "step": 268170 }, { "epoch": 0.5417405673145683, "grad_norm": 179.9120635986328, "learning_rate": 5.237972973731761e-06, "loss": 32.8968, "step": 268180 }, { "epoch": 0.5417607679472521, "grad_norm": 223.6497344970703, "learning_rate": 5.237624302885785e-06, "loss": 30.3425, "step": 268190 }, { "epoch": 0.541780968579936, "grad_norm": 119.02932739257812, "learning_rate": 5.237275630881658e-06, "loss": 14.2816, "step": 268200 }, { "epoch": 0.5418011692126198, "grad_norm": 131.75732421875, "learning_rate": 5.236926957721075e-06, "loss": 20.7634, "step": 268210 }, { "epoch": 0.5418213698453036, "grad_norm": 531.0144653320312, "learning_rate": 5.236578283405742e-06, "loss": 15.6526, "step": 268220 }, { "epoch": 0.5418415704779874, "grad_norm": 177.13221740722656, "learning_rate": 5.236229607937354e-06, "loss": 18.8871, "step": 268230 }, { "epoch": 0.5418617711106711, "grad_norm": 410.2221984863281, "learning_rate": 5.235880931317612e-06, "loss": 29.7928, "step": 268240 }, { "epoch": 0.541881971743355, "grad_norm": 196.44664001464844, "learning_rate": 5.235532253548213e-06, "loss": 20.0427, "step": 268250 }, { "epoch": 0.5419021723760388, "grad_norm": 445.3470458984375, "learning_rate": 5.235183574630861e-06, "loss": 16.543, "step": 268260 }, { "epoch": 0.5419223730087226, "grad_norm": 304.2573547363281, "learning_rate": 5.234834894567252e-06, "loss": 31.9388, "step": 268270 }, { "epoch": 0.5419425736414064, "grad_norm": 215.46038818359375, "learning_rate": 5.2344862133590855e-06, "loss": 14.8553, "step": 268280 }, { "epoch": 0.5419627742740902, "grad_norm": 302.2164611816406, "learning_rate": 5.234137531008062e-06, "loss": 13.5896, "step": 268290 }, { "epoch": 0.541982974906774, "grad_norm": 32.41757583618164, "learning_rate": 5.233788847515882e-06, "loss": 24.9908, "step": 268300 }, { "epoch": 0.5420031755394579, "grad_norm": 349.8468322753906, "learning_rate": 5.233440162884241e-06, "loss": 21.9324, "step": 268310 }, { "epoch": 0.5420233761721417, "grad_norm": 239.48072814941406, "learning_rate": 5.233091477114842e-06, "loss": 30.0262, "step": 268320 }, { "epoch": 0.5420435768048255, "grad_norm": 200.2691650390625, "learning_rate": 5.232742790209384e-06, "loss": 15.8788, "step": 268330 }, { "epoch": 0.5420637774375093, "grad_norm": 315.0242919921875, "learning_rate": 5.232394102169566e-06, "loss": 27.8753, "step": 268340 }, { "epoch": 0.5420839780701932, "grad_norm": 194.0788116455078, "learning_rate": 5.2320454129970866e-06, "loss": 14.7699, "step": 268350 }, { "epoch": 0.542104178702877, "grad_norm": 322.10748291015625, "learning_rate": 5.2316967226936454e-06, "loss": 18.2376, "step": 268360 }, { "epoch": 0.5421243793355608, "grad_norm": 202.24703979492188, "learning_rate": 5.231348031260943e-06, "loss": 12.3663, "step": 268370 }, { "epoch": 0.5421445799682446, "grad_norm": 93.21638488769531, "learning_rate": 5.2309993387006795e-06, "loss": 14.127, "step": 268380 }, { "epoch": 0.5421647806009284, "grad_norm": 417.3992004394531, "learning_rate": 5.230650645014551e-06, "loss": 15.8671, "step": 268390 }, { "epoch": 0.5421849812336123, "grad_norm": 365.2443542480469, "learning_rate": 5.230301950204261e-06, "loss": 22.1182, "step": 268400 }, { "epoch": 0.5422051818662961, "grad_norm": 431.7119445800781, "learning_rate": 5.229953254271507e-06, "loss": 23.3247, "step": 268410 }, { "epoch": 0.5422253824989799, "grad_norm": 226.32403564453125, "learning_rate": 5.229604557217988e-06, "loss": 10.098, "step": 268420 }, { "epoch": 0.5422455831316637, "grad_norm": 472.58001708984375, "learning_rate": 5.229255859045405e-06, "loss": 15.029, "step": 268430 }, { "epoch": 0.5422657837643475, "grad_norm": 393.46875, "learning_rate": 5.228907159755457e-06, "loss": 22.603, "step": 268440 }, { "epoch": 0.5422859843970314, "grad_norm": 220.483154296875, "learning_rate": 5.228558459349844e-06, "loss": 16.9208, "step": 268450 }, { "epoch": 0.5423061850297152, "grad_norm": 202.55198669433594, "learning_rate": 5.2282097578302624e-06, "loss": 19.8516, "step": 268460 }, { "epoch": 0.542326385662399, "grad_norm": 5.920296669006348, "learning_rate": 5.227861055198415e-06, "loss": 15.4662, "step": 268470 }, { "epoch": 0.5423465862950828, "grad_norm": 491.9482727050781, "learning_rate": 5.227512351456001e-06, "loss": 13.0008, "step": 268480 }, { "epoch": 0.5423667869277666, "grad_norm": 191.73680114746094, "learning_rate": 5.227163646604721e-06, "loss": 21.251, "step": 268490 }, { "epoch": 0.5423869875604503, "grad_norm": 411.56744384765625, "learning_rate": 5.226814940646268e-06, "loss": 22.14, "step": 268500 }, { "epoch": 0.5424071881931342, "grad_norm": 110.27123260498047, "learning_rate": 5.226466233582351e-06, "loss": 15.7443, "step": 268510 }, { "epoch": 0.542427388825818, "grad_norm": 189.28973388671875, "learning_rate": 5.226117525414663e-06, "loss": 27.6841, "step": 268520 }, { "epoch": 0.5424475894585018, "grad_norm": 272.6982116699219, "learning_rate": 5.225768816144907e-06, "loss": 10.7648, "step": 268530 }, { "epoch": 0.5424677900911856, "grad_norm": 282.3614196777344, "learning_rate": 5.225420105774781e-06, "loss": 17.3344, "step": 268540 }, { "epoch": 0.5424879907238694, "grad_norm": 230.4717254638672, "learning_rate": 5.2250713943059826e-06, "loss": 18.7078, "step": 268550 }, { "epoch": 0.5425081913565533, "grad_norm": 273.7574462890625, "learning_rate": 5.224722681740217e-06, "loss": 20.1452, "step": 268560 }, { "epoch": 0.5425283919892371, "grad_norm": 189.96983337402344, "learning_rate": 5.224373968079177e-06, "loss": 11.7968, "step": 268570 }, { "epoch": 0.5425485926219209, "grad_norm": 192.35438537597656, "learning_rate": 5.224025253324567e-06, "loss": 16.7766, "step": 268580 }, { "epoch": 0.5425687932546047, "grad_norm": 48.1296272277832, "learning_rate": 5.223676537478085e-06, "loss": 15.2845, "step": 268590 }, { "epoch": 0.5425889938872885, "grad_norm": 297.42962646484375, "learning_rate": 5.223327820541432e-06, "loss": 12.5965, "step": 268600 }, { "epoch": 0.5426091945199724, "grad_norm": 292.1661071777344, "learning_rate": 5.222979102516304e-06, "loss": 12.6612, "step": 268610 }, { "epoch": 0.5426293951526562, "grad_norm": 6.409508228302002, "learning_rate": 5.2226303834044036e-06, "loss": 11.1812, "step": 268620 }, { "epoch": 0.54264959578534, "grad_norm": 383.8204345703125, "learning_rate": 5.22228166320743e-06, "loss": 16.9637, "step": 268630 }, { "epoch": 0.5426697964180238, "grad_norm": 347.5732727050781, "learning_rate": 5.2219329419270825e-06, "loss": 7.9665, "step": 268640 }, { "epoch": 0.5426899970507076, "grad_norm": 385.9485168457031, "learning_rate": 5.221584219565061e-06, "loss": 19.5967, "step": 268650 }, { "epoch": 0.5427101976833915, "grad_norm": 222.83494567871094, "learning_rate": 5.221235496123064e-06, "loss": 19.0795, "step": 268660 }, { "epoch": 0.5427303983160753, "grad_norm": 461.10968017578125, "learning_rate": 5.220886771602793e-06, "loss": 9.3798, "step": 268670 }, { "epoch": 0.5427505989487591, "grad_norm": 7.26524019241333, "learning_rate": 5.2205380460059466e-06, "loss": 14.5801, "step": 268680 }, { "epoch": 0.5427707995814429, "grad_norm": 475.81396484375, "learning_rate": 5.2201893193342234e-06, "loss": 15.327, "step": 268690 }, { "epoch": 0.5427910002141267, "grad_norm": 151.8145751953125, "learning_rate": 5.219840591589325e-06, "loss": 15.777, "step": 268700 }, { "epoch": 0.5428112008468106, "grad_norm": 186.99859619140625, "learning_rate": 5.21949186277295e-06, "loss": 17.0748, "step": 268710 }, { "epoch": 0.5428314014794944, "grad_norm": 632.101318359375, "learning_rate": 5.219143132886799e-06, "loss": 28.8947, "step": 268720 }, { "epoch": 0.5428516021121782, "grad_norm": 666.052978515625, "learning_rate": 5.218794401932571e-06, "loss": 19.8697, "step": 268730 }, { "epoch": 0.542871802744862, "grad_norm": 243.41046142578125, "learning_rate": 5.218445669911964e-06, "loss": 10.9564, "step": 268740 }, { "epoch": 0.5428920033775458, "grad_norm": 326.09637451171875, "learning_rate": 5.218096936826681e-06, "loss": 22.1626, "step": 268750 }, { "epoch": 0.5429122040102295, "grad_norm": 307.6583251953125, "learning_rate": 5.21774820267842e-06, "loss": 17.9308, "step": 268760 }, { "epoch": 0.5429324046429134, "grad_norm": 201.6348114013672, "learning_rate": 5.2173994674688786e-06, "loss": 12.1483, "step": 268770 }, { "epoch": 0.5429526052755972, "grad_norm": 156.23472595214844, "learning_rate": 5.2170507311997605e-06, "loss": 26.8963, "step": 268780 }, { "epoch": 0.542972805908281, "grad_norm": 189.2065887451172, "learning_rate": 5.216701993872763e-06, "loss": 26.8422, "step": 268790 }, { "epoch": 0.5429930065409648, "grad_norm": 57.75368118286133, "learning_rate": 5.216353255489586e-06, "loss": 18.9127, "step": 268800 }, { "epoch": 0.5430132071736486, "grad_norm": 17.785980224609375, "learning_rate": 5.21600451605193e-06, "loss": 9.0461, "step": 268810 }, { "epoch": 0.5430334078063325, "grad_norm": 453.6943359375, "learning_rate": 5.215655775561493e-06, "loss": 18.6802, "step": 268820 }, { "epoch": 0.5430536084390163, "grad_norm": 237.1214141845703, "learning_rate": 5.215307034019977e-06, "loss": 12.6284, "step": 268830 }, { "epoch": 0.5430738090717001, "grad_norm": 195.33412170410156, "learning_rate": 5.214958291429079e-06, "loss": 16.8194, "step": 268840 }, { "epoch": 0.5430940097043839, "grad_norm": 110.08348846435547, "learning_rate": 5.214609547790504e-06, "loss": 20.7934, "step": 268850 }, { "epoch": 0.5431142103370677, "grad_norm": 245.46722412109375, "learning_rate": 5.214260803105945e-06, "loss": 26.7591, "step": 268860 }, { "epoch": 0.5431344109697516, "grad_norm": 261.34869384765625, "learning_rate": 5.213912057377105e-06, "loss": 14.6341, "step": 268870 }, { "epoch": 0.5431546116024354, "grad_norm": 274.8428955078125, "learning_rate": 5.213563310605686e-06, "loss": 11.0714, "step": 268880 }, { "epoch": 0.5431748122351192, "grad_norm": 481.3251037597656, "learning_rate": 5.213214562793383e-06, "loss": 24.7712, "step": 268890 }, { "epoch": 0.543195012867803, "grad_norm": 661.2894897460938, "learning_rate": 5.212865813941899e-06, "loss": 23.176, "step": 268900 }, { "epoch": 0.5432152135004868, "grad_norm": 246.08981323242188, "learning_rate": 5.2125170640529325e-06, "loss": 17.759, "step": 268910 }, { "epoch": 0.5432354141331707, "grad_norm": 11.962994575500488, "learning_rate": 5.212168313128183e-06, "loss": 13.2762, "step": 268920 }, { "epoch": 0.5432556147658545, "grad_norm": 264.292724609375, "learning_rate": 5.2118195611693515e-06, "loss": 17.6461, "step": 268930 }, { "epoch": 0.5432758153985383, "grad_norm": 346.6041259765625, "learning_rate": 5.211470808178137e-06, "loss": 16.208, "step": 268940 }, { "epoch": 0.5432960160312221, "grad_norm": 278.4539489746094, "learning_rate": 5.21112205415624e-06, "loss": 12.7197, "step": 268950 }, { "epoch": 0.543316216663906, "grad_norm": 263.31585693359375, "learning_rate": 5.210773299105358e-06, "loss": 19.4758, "step": 268960 }, { "epoch": 0.5433364172965898, "grad_norm": 533.1524047851562, "learning_rate": 5.210424543027195e-06, "loss": 30.3477, "step": 268970 }, { "epoch": 0.5433566179292736, "grad_norm": 71.55894470214844, "learning_rate": 5.210075785923446e-06, "loss": 11.8082, "step": 268980 }, { "epoch": 0.5433768185619574, "grad_norm": 535.6356811523438, "learning_rate": 5.209727027795816e-06, "loss": 23.1419, "step": 268990 }, { "epoch": 0.5433970191946412, "grad_norm": 275.212890625, "learning_rate": 5.209378268645998e-06, "loss": 28.7076, "step": 269000 }, { "epoch": 0.5434172198273249, "grad_norm": 123.33200073242188, "learning_rate": 5.209029508475699e-06, "loss": 18.4708, "step": 269010 }, { "epoch": 0.5434374204600088, "grad_norm": 397.0018615722656, "learning_rate": 5.208680747286614e-06, "loss": 14.7248, "step": 269020 }, { "epoch": 0.5434576210926926, "grad_norm": 274.04705810546875, "learning_rate": 5.2083319850804445e-06, "loss": 16.6912, "step": 269030 }, { "epoch": 0.5434778217253764, "grad_norm": 323.5614929199219, "learning_rate": 5.20798322185889e-06, "loss": 13.7128, "step": 269040 }, { "epoch": 0.5434980223580602, "grad_norm": 213.0019989013672, "learning_rate": 5.207634457623652e-06, "loss": 15.3503, "step": 269050 }, { "epoch": 0.543518222990744, "grad_norm": 176.44032287597656, "learning_rate": 5.207285692376427e-06, "loss": 14.0214, "step": 269060 }, { "epoch": 0.5435384236234279, "grad_norm": 500.7525939941406, "learning_rate": 5.206936926118917e-06, "loss": 32.6432, "step": 269070 }, { "epoch": 0.5435586242561117, "grad_norm": 214.0053253173828, "learning_rate": 5.206588158852822e-06, "loss": 13.2494, "step": 269080 }, { "epoch": 0.5435788248887955, "grad_norm": 295.88214111328125, "learning_rate": 5.206239390579842e-06, "loss": 9.0975, "step": 269090 }, { "epoch": 0.5435990255214793, "grad_norm": 111.06599426269531, "learning_rate": 5.205890621301676e-06, "loss": 11.8761, "step": 269100 }, { "epoch": 0.5436192261541631, "grad_norm": 101.27349090576172, "learning_rate": 5.205541851020022e-06, "loss": 11.203, "step": 269110 }, { "epoch": 0.543639426786847, "grad_norm": 196.61756896972656, "learning_rate": 5.205193079736584e-06, "loss": 19.4616, "step": 269120 }, { "epoch": 0.5436596274195308, "grad_norm": 659.3675537109375, "learning_rate": 5.204844307453059e-06, "loss": 22.2231, "step": 269130 }, { "epoch": 0.5436798280522146, "grad_norm": 536.2849731445312, "learning_rate": 5.204495534171148e-06, "loss": 16.3983, "step": 269140 }, { "epoch": 0.5437000286848984, "grad_norm": 109.75298309326172, "learning_rate": 5.204146759892551e-06, "loss": 21.8586, "step": 269150 }, { "epoch": 0.5437202293175822, "grad_norm": 486.2676086425781, "learning_rate": 5.2037979846189655e-06, "loss": 12.2087, "step": 269160 }, { "epoch": 0.5437404299502661, "grad_norm": 0.0, "learning_rate": 5.203449208352096e-06, "loss": 7.0972, "step": 269170 }, { "epoch": 0.5437606305829499, "grad_norm": 393.2877502441406, "learning_rate": 5.203100431093638e-06, "loss": 22.7023, "step": 269180 }, { "epoch": 0.5437808312156337, "grad_norm": 75.94367980957031, "learning_rate": 5.202751652845294e-06, "loss": 20.5391, "step": 269190 }, { "epoch": 0.5438010318483175, "grad_norm": 376.81195068359375, "learning_rate": 5.202402873608763e-06, "loss": 37.202, "step": 269200 }, { "epoch": 0.5438212324810013, "grad_norm": 336.64892578125, "learning_rate": 5.2020540933857455e-06, "loss": 28.3833, "step": 269210 }, { "epoch": 0.5438414331136852, "grad_norm": 224.55393981933594, "learning_rate": 5.201705312177939e-06, "loss": 11.8015, "step": 269220 }, { "epoch": 0.543861633746369, "grad_norm": 0.0, "learning_rate": 5.2013565299870475e-06, "loss": 13.7136, "step": 269230 }, { "epoch": 0.5438818343790528, "grad_norm": 39.89509201049805, "learning_rate": 5.201007746814767e-06, "loss": 17.5044, "step": 269240 }, { "epoch": 0.5439020350117366, "grad_norm": 441.6989440917969, "learning_rate": 5.200658962662799e-06, "loss": 8.8909, "step": 269250 }, { "epoch": 0.5439222356444204, "grad_norm": 275.45367431640625, "learning_rate": 5.2003101775328455e-06, "loss": 23.0557, "step": 269260 }, { "epoch": 0.5439424362771041, "grad_norm": 216.29795837402344, "learning_rate": 5.199961391426601e-06, "loss": 19.6615, "step": 269270 }, { "epoch": 0.543962636909788, "grad_norm": 228.86402893066406, "learning_rate": 5.199612604345773e-06, "loss": 12.0054, "step": 269280 }, { "epoch": 0.5439828375424718, "grad_norm": 244.080322265625, "learning_rate": 5.199263816292055e-06, "loss": 14.2562, "step": 269290 }, { "epoch": 0.5440030381751556, "grad_norm": 1036.6641845703125, "learning_rate": 5.19891502726715e-06, "loss": 32.4281, "step": 269300 }, { "epoch": 0.5440232388078394, "grad_norm": 237.253662109375, "learning_rate": 5.198566237272757e-06, "loss": 17.4395, "step": 269310 }, { "epoch": 0.5440434394405232, "grad_norm": 168.9481201171875, "learning_rate": 5.198217446310576e-06, "loss": 9.3896, "step": 269320 }, { "epoch": 0.5440636400732071, "grad_norm": 0.0, "learning_rate": 5.197868654382307e-06, "loss": 30.5356, "step": 269330 }, { "epoch": 0.5440838407058909, "grad_norm": 101.39205932617188, "learning_rate": 5.197519861489652e-06, "loss": 17.2009, "step": 269340 }, { "epoch": 0.5441040413385747, "grad_norm": 139.87548828125, "learning_rate": 5.197171067634307e-06, "loss": 20.2663, "step": 269350 }, { "epoch": 0.5441242419712585, "grad_norm": 925.1375122070312, "learning_rate": 5.196822272817975e-06, "loss": 35.9398, "step": 269360 }, { "epoch": 0.5441444426039423, "grad_norm": 266.6156311035156, "learning_rate": 5.196473477042355e-06, "loss": 25.803, "step": 269370 }, { "epoch": 0.5441646432366262, "grad_norm": 272.2398986816406, "learning_rate": 5.196124680309148e-06, "loss": 18.8466, "step": 269380 }, { "epoch": 0.54418484386931, "grad_norm": 211.54344177246094, "learning_rate": 5.1957758826200525e-06, "loss": 26.7716, "step": 269390 }, { "epoch": 0.5442050445019938, "grad_norm": 246.0782012939453, "learning_rate": 5.195427083976768e-06, "loss": 10.7839, "step": 269400 }, { "epoch": 0.5442252451346776, "grad_norm": 947.101806640625, "learning_rate": 5.195078284380996e-06, "loss": 35.3106, "step": 269410 }, { "epoch": 0.5442454457673614, "grad_norm": 311.4070129394531, "learning_rate": 5.194729483834438e-06, "loss": 23.1523, "step": 269420 }, { "epoch": 0.5442656464000453, "grad_norm": 121.906005859375, "learning_rate": 5.19438068233879e-06, "loss": 19.8072, "step": 269430 }, { "epoch": 0.5442858470327291, "grad_norm": 445.5409240722656, "learning_rate": 5.194031879895756e-06, "loss": 19.9083, "step": 269440 }, { "epoch": 0.5443060476654129, "grad_norm": 510.6283874511719, "learning_rate": 5.193683076507031e-06, "loss": 17.8356, "step": 269450 }, { "epoch": 0.5443262482980967, "grad_norm": 344.52581787109375, "learning_rate": 5.193334272174321e-06, "loss": 27.3816, "step": 269460 }, { "epoch": 0.5443464489307805, "grad_norm": 256.2313537597656, "learning_rate": 5.192985466899323e-06, "loss": 22.314, "step": 269470 }, { "epoch": 0.5443666495634644, "grad_norm": 155.7440643310547, "learning_rate": 5.1926366606837365e-06, "loss": 23.2286, "step": 269480 }, { "epoch": 0.5443868501961482, "grad_norm": 294.2278747558594, "learning_rate": 5.192287853529263e-06, "loss": 21.6733, "step": 269490 }, { "epoch": 0.544407050828832, "grad_norm": 127.89922332763672, "learning_rate": 5.1919390454376e-06, "loss": 22.9835, "step": 269500 }, { "epoch": 0.5444272514615158, "grad_norm": 63.10268020629883, "learning_rate": 5.191590236410451e-06, "loss": 21.8007, "step": 269510 }, { "epoch": 0.5444474520941995, "grad_norm": 280.4844665527344, "learning_rate": 5.191241426449513e-06, "loss": 17.9428, "step": 269520 }, { "epoch": 0.5444676527268834, "grad_norm": 517.4681396484375, "learning_rate": 5.1908926155564885e-06, "loss": 24.0983, "step": 269530 }, { "epoch": 0.5444878533595672, "grad_norm": 34.144874572753906, "learning_rate": 5.190543803733077e-06, "loss": 16.4101, "step": 269540 }, { "epoch": 0.544508053992251, "grad_norm": 235.7289581298828, "learning_rate": 5.190194990980979e-06, "loss": 16.8961, "step": 269550 }, { "epoch": 0.5445282546249348, "grad_norm": 437.2834167480469, "learning_rate": 5.189846177301892e-06, "loss": 26.5875, "step": 269560 }, { "epoch": 0.5445484552576186, "grad_norm": 157.26792907714844, "learning_rate": 5.189497362697518e-06, "loss": 13.3907, "step": 269570 }, { "epoch": 0.5445686558903025, "grad_norm": 482.2087097167969, "learning_rate": 5.189148547169558e-06, "loss": 24.7718, "step": 269580 }, { "epoch": 0.5445888565229863, "grad_norm": 0.0, "learning_rate": 5.188799730719708e-06, "loss": 26.068, "step": 269590 }, { "epoch": 0.5446090571556701, "grad_norm": 380.67120361328125, "learning_rate": 5.188450913349674e-06, "loss": 16.3004, "step": 269600 }, { "epoch": 0.5446292577883539, "grad_norm": 325.1511535644531, "learning_rate": 5.188102095061151e-06, "loss": 21.1398, "step": 269610 }, { "epoch": 0.5446494584210377, "grad_norm": 125.48682403564453, "learning_rate": 5.187753275855843e-06, "loss": 22.2096, "step": 269620 }, { "epoch": 0.5446696590537216, "grad_norm": 108.07566833496094, "learning_rate": 5.187404455735448e-06, "loss": 25.2026, "step": 269630 }, { "epoch": 0.5446898596864054, "grad_norm": 414.02215576171875, "learning_rate": 5.187055634701664e-06, "loss": 20.0864, "step": 269640 }, { "epoch": 0.5447100603190892, "grad_norm": 235.529296875, "learning_rate": 5.186706812756197e-06, "loss": 21.2647, "step": 269650 }, { "epoch": 0.544730260951773, "grad_norm": 230.7997589111328, "learning_rate": 5.1863579899007424e-06, "loss": 14.5944, "step": 269660 }, { "epoch": 0.5447504615844568, "grad_norm": 148.622802734375, "learning_rate": 5.186009166137e-06, "loss": 12.9752, "step": 269670 }, { "epoch": 0.5447706622171407, "grad_norm": 204.6719970703125, "learning_rate": 5.185660341466673e-06, "loss": 14.0465, "step": 269680 }, { "epoch": 0.5447908628498245, "grad_norm": 145.7555389404297, "learning_rate": 5.1853115158914595e-06, "loss": 8.9826, "step": 269690 }, { "epoch": 0.5448110634825083, "grad_norm": 12.56952953338623, "learning_rate": 5.18496268941306e-06, "loss": 9.6931, "step": 269700 }, { "epoch": 0.5448312641151921, "grad_norm": 285.7779846191406, "learning_rate": 5.1846138620331766e-06, "loss": 11.2871, "step": 269710 }, { "epoch": 0.5448514647478759, "grad_norm": 137.81903076171875, "learning_rate": 5.184265033753506e-06, "loss": 17.599, "step": 269720 }, { "epoch": 0.5448716653805598, "grad_norm": 341.4209899902344, "learning_rate": 5.18391620457575e-06, "loss": 26.9351, "step": 269730 }, { "epoch": 0.5448918660132436, "grad_norm": 156.1699981689453, "learning_rate": 5.183567374501608e-06, "loss": 20.0084, "step": 269740 }, { "epoch": 0.5449120666459274, "grad_norm": 162.0833740234375, "learning_rate": 5.183218543532782e-06, "loss": 10.9998, "step": 269750 }, { "epoch": 0.5449322672786112, "grad_norm": 170.84814453125, "learning_rate": 5.182869711670971e-06, "loss": 15.8497, "step": 269760 }, { "epoch": 0.544952467911295, "grad_norm": 700.93994140625, "learning_rate": 5.182520878917874e-06, "loss": 23.326, "step": 269770 }, { "epoch": 0.5449726685439787, "grad_norm": 529.3929443359375, "learning_rate": 5.1821720452751945e-06, "loss": 26.4393, "step": 269780 }, { "epoch": 0.5449928691766626, "grad_norm": 163.01763916015625, "learning_rate": 5.181823210744629e-06, "loss": 24.84, "step": 269790 }, { "epoch": 0.5450130698093464, "grad_norm": 215.44879150390625, "learning_rate": 5.18147437532788e-06, "loss": 15.4361, "step": 269800 }, { "epoch": 0.5450332704420302, "grad_norm": 178.82211303710938, "learning_rate": 5.181125539026646e-06, "loss": 13.0318, "step": 269810 }, { "epoch": 0.545053471074714, "grad_norm": 237.50674438476562, "learning_rate": 5.180776701842629e-06, "loss": 6.0251, "step": 269820 }, { "epoch": 0.5450736717073978, "grad_norm": 293.6346740722656, "learning_rate": 5.180427863777528e-06, "loss": 22.4456, "step": 269830 }, { "epoch": 0.5450938723400817, "grad_norm": 485.82659912109375, "learning_rate": 5.180079024833043e-06, "loss": 14.6317, "step": 269840 }, { "epoch": 0.5451140729727655, "grad_norm": 298.3912353515625, "learning_rate": 5.179730185010875e-06, "loss": 25.1705, "step": 269850 }, { "epoch": 0.5451342736054493, "grad_norm": 0.0, "learning_rate": 5.179381344312724e-06, "loss": 15.4196, "step": 269860 }, { "epoch": 0.5451544742381331, "grad_norm": 82.89555358886719, "learning_rate": 5.179032502740291e-06, "loss": 17.164, "step": 269870 }, { "epoch": 0.5451746748708169, "grad_norm": 269.6479187011719, "learning_rate": 5.178683660295273e-06, "loss": 24.7683, "step": 269880 }, { "epoch": 0.5451948755035008, "grad_norm": 397.26019287109375, "learning_rate": 5.178334816979374e-06, "loss": 21.0009, "step": 269890 }, { "epoch": 0.5452150761361846, "grad_norm": 138.2456817626953, "learning_rate": 5.177985972794293e-06, "loss": 21.8346, "step": 269900 }, { "epoch": 0.5452352767688684, "grad_norm": 97.94824981689453, "learning_rate": 5.177637127741729e-06, "loss": 21.0576, "step": 269910 }, { "epoch": 0.5452554774015522, "grad_norm": 1100.6279296875, "learning_rate": 5.177288281823385e-06, "loss": 36.568, "step": 269920 }, { "epoch": 0.545275678034236, "grad_norm": 732.3435668945312, "learning_rate": 5.176939435040958e-06, "loss": 32.4692, "step": 269930 }, { "epoch": 0.5452958786669199, "grad_norm": 153.51048278808594, "learning_rate": 5.17659058739615e-06, "loss": 24.7838, "step": 269940 }, { "epoch": 0.5453160792996037, "grad_norm": 472.3096618652344, "learning_rate": 5.17624173889066e-06, "loss": 18.7374, "step": 269950 }, { "epoch": 0.5453362799322875, "grad_norm": 83.25111389160156, "learning_rate": 5.175892889526189e-06, "loss": 13.8195, "step": 269960 }, { "epoch": 0.5453564805649713, "grad_norm": 161.97459411621094, "learning_rate": 5.175544039304439e-06, "loss": 18.131, "step": 269970 }, { "epoch": 0.5453766811976551, "grad_norm": 156.24270629882812, "learning_rate": 5.175195188227108e-06, "loss": 27.2489, "step": 269980 }, { "epoch": 0.545396881830339, "grad_norm": 299.0740661621094, "learning_rate": 5.174846336295897e-06, "loss": 23.0492, "step": 269990 }, { "epoch": 0.5454170824630228, "grad_norm": 357.5528869628906, "learning_rate": 5.174497483512506e-06, "loss": 13.2172, "step": 270000 }, { "epoch": 0.5454372830957066, "grad_norm": 541.0161743164062, "learning_rate": 5.174148629878635e-06, "loss": 30.7729, "step": 270010 }, { "epoch": 0.5454574837283904, "grad_norm": 201.42611694335938, "learning_rate": 5.1737997753959846e-06, "loss": 32.4419, "step": 270020 }, { "epoch": 0.5454776843610742, "grad_norm": 389.321533203125, "learning_rate": 5.173450920066256e-06, "loss": 19.5183, "step": 270030 }, { "epoch": 0.545497884993758, "grad_norm": 294.25445556640625, "learning_rate": 5.173102063891148e-06, "loss": 13.7414, "step": 270040 }, { "epoch": 0.5455180856264418, "grad_norm": 407.4480285644531, "learning_rate": 5.172753206872363e-06, "loss": 26.5853, "step": 270050 }, { "epoch": 0.5455382862591256, "grad_norm": 108.55326843261719, "learning_rate": 5.172404349011599e-06, "loss": 22.1039, "step": 270060 }, { "epoch": 0.5455584868918094, "grad_norm": 174.7959747314453, "learning_rate": 5.172055490310555e-06, "loss": 15.7386, "step": 270070 }, { "epoch": 0.5455786875244932, "grad_norm": 315.6850891113281, "learning_rate": 5.171706630770935e-06, "loss": 12.124, "step": 270080 }, { "epoch": 0.545598888157177, "grad_norm": 260.8179626464844, "learning_rate": 5.171357770394439e-06, "loss": 14.2606, "step": 270090 }, { "epoch": 0.5456190887898609, "grad_norm": 168.2576904296875, "learning_rate": 5.171008909182765e-06, "loss": 13.9521, "step": 270100 }, { "epoch": 0.5456392894225447, "grad_norm": 134.7886199951172, "learning_rate": 5.170660047137613e-06, "loss": 25.5104, "step": 270110 }, { "epoch": 0.5456594900552285, "grad_norm": 1376.333251953125, "learning_rate": 5.1703111842606864e-06, "loss": 17.2188, "step": 270120 }, { "epoch": 0.5456796906879123, "grad_norm": 158.46463012695312, "learning_rate": 5.169962320553683e-06, "loss": 16.7041, "step": 270130 }, { "epoch": 0.5456998913205962, "grad_norm": 0.0, "learning_rate": 5.1696134560183045e-06, "loss": 13.0031, "step": 270140 }, { "epoch": 0.54572009195328, "grad_norm": 257.8311462402344, "learning_rate": 5.169264590656249e-06, "loss": 17.2134, "step": 270150 }, { "epoch": 0.5457402925859638, "grad_norm": 498.65521240234375, "learning_rate": 5.16891572446922e-06, "loss": 21.8609, "step": 270160 }, { "epoch": 0.5457604932186476, "grad_norm": 46.80562210083008, "learning_rate": 5.168566857458917e-06, "loss": 11.3795, "step": 270170 }, { "epoch": 0.5457806938513314, "grad_norm": 365.07025146484375, "learning_rate": 5.168217989627037e-06, "loss": 17.6917, "step": 270180 }, { "epoch": 0.5458008944840153, "grad_norm": 545.3442993164062, "learning_rate": 5.1678691209752855e-06, "loss": 15.6325, "step": 270190 }, { "epoch": 0.5458210951166991, "grad_norm": 153.54275512695312, "learning_rate": 5.167520251505358e-06, "loss": 9.875, "step": 270200 }, { "epoch": 0.5458412957493829, "grad_norm": 425.96368408203125, "learning_rate": 5.1671713812189585e-06, "loss": 13.9371, "step": 270210 }, { "epoch": 0.5458614963820667, "grad_norm": 413.4054870605469, "learning_rate": 5.166822510117785e-06, "loss": 29.5842, "step": 270220 }, { "epoch": 0.5458816970147505, "grad_norm": 403.2741394042969, "learning_rate": 5.166473638203539e-06, "loss": 16.2073, "step": 270230 }, { "epoch": 0.5459018976474344, "grad_norm": 497.6345520019531, "learning_rate": 5.166124765477923e-06, "loss": 29.8323, "step": 270240 }, { "epoch": 0.5459220982801182, "grad_norm": 211.64317321777344, "learning_rate": 5.165775891942631e-06, "loss": 23.3378, "step": 270250 }, { "epoch": 0.545942298912802, "grad_norm": 319.8124694824219, "learning_rate": 5.165427017599371e-06, "loss": 20.9507, "step": 270260 }, { "epoch": 0.5459624995454858, "grad_norm": 372.1168212890625, "learning_rate": 5.1650781424498385e-06, "loss": 18.5941, "step": 270270 }, { "epoch": 0.5459827001781696, "grad_norm": 374.2793273925781, "learning_rate": 5.164729266495735e-06, "loss": 62.3474, "step": 270280 }, { "epoch": 0.5460029008108533, "grad_norm": 303.29705810546875, "learning_rate": 5.16438038973876e-06, "loss": 37.7127, "step": 270290 }, { "epoch": 0.5460231014435372, "grad_norm": 627.7276000976562, "learning_rate": 5.164031512180616e-06, "loss": 23.2928, "step": 270300 }, { "epoch": 0.546043302076221, "grad_norm": 156.0523681640625, "learning_rate": 5.163682633823003e-06, "loss": 14.4894, "step": 270310 }, { "epoch": 0.5460635027089048, "grad_norm": 433.52667236328125, "learning_rate": 5.16333375466762e-06, "loss": 22.0488, "step": 270320 }, { "epoch": 0.5460837033415886, "grad_norm": 22.975749969482422, "learning_rate": 5.162984874716168e-06, "loss": 29.059, "step": 270330 }, { "epoch": 0.5461039039742724, "grad_norm": 262.2184753417969, "learning_rate": 5.162635993970347e-06, "loss": 15.4433, "step": 270340 }, { "epoch": 0.5461241046069563, "grad_norm": 295.52313232421875, "learning_rate": 5.162287112431858e-06, "loss": 9.4425, "step": 270350 }, { "epoch": 0.5461443052396401, "grad_norm": 198.91925048828125, "learning_rate": 5.1619382301024025e-06, "loss": 17.4622, "step": 270360 }, { "epoch": 0.5461645058723239, "grad_norm": 250.1685028076172, "learning_rate": 5.16158934698368e-06, "loss": 16.1389, "step": 270370 }, { "epoch": 0.5461847065050077, "grad_norm": 109.9392318725586, "learning_rate": 5.161240463077387e-06, "loss": 25.3945, "step": 270380 }, { "epoch": 0.5462049071376915, "grad_norm": 307.5675354003906, "learning_rate": 5.160891578385232e-06, "loss": 13.4696, "step": 270390 }, { "epoch": 0.5462251077703754, "grad_norm": 331.61773681640625, "learning_rate": 5.160542692908909e-06, "loss": 21.0226, "step": 270400 }, { "epoch": 0.5462453084030592, "grad_norm": 256.344482421875, "learning_rate": 5.16019380665012e-06, "loss": 8.1739, "step": 270410 }, { "epoch": 0.546265509035743, "grad_norm": 102.5074234008789, "learning_rate": 5.159844919610566e-06, "loss": 25.1007, "step": 270420 }, { "epoch": 0.5462857096684268, "grad_norm": 533.8493041992188, "learning_rate": 5.159496031791947e-06, "loss": 35.0648, "step": 270430 }, { "epoch": 0.5463059103011106, "grad_norm": 198.41688537597656, "learning_rate": 5.159147143195965e-06, "loss": 13.7595, "step": 270440 }, { "epoch": 0.5463261109337945, "grad_norm": 109.09851837158203, "learning_rate": 5.158798253824319e-06, "loss": 14.1425, "step": 270450 }, { "epoch": 0.5463463115664783, "grad_norm": 0.0, "learning_rate": 5.158449363678708e-06, "loss": 24.8806, "step": 270460 }, { "epoch": 0.5463665121991621, "grad_norm": 288.85125732421875, "learning_rate": 5.1581004727608345e-06, "loss": 7.2604, "step": 270470 }, { "epoch": 0.5463867128318459, "grad_norm": 166.3119354248047, "learning_rate": 5.1577515810724e-06, "loss": 17.891, "step": 270480 }, { "epoch": 0.5464069134645297, "grad_norm": 122.14558410644531, "learning_rate": 5.1574026886151005e-06, "loss": 30.0876, "step": 270490 }, { "epoch": 0.5464271140972136, "grad_norm": 281.2025451660156, "learning_rate": 5.157053795390642e-06, "loss": 19.3947, "step": 270500 }, { "epoch": 0.5464473147298974, "grad_norm": 232.93797302246094, "learning_rate": 5.156704901400722e-06, "loss": 23.4486, "step": 270510 }, { "epoch": 0.5464675153625812, "grad_norm": 202.2167510986328, "learning_rate": 5.156356006647041e-06, "loss": 9.1284, "step": 270520 }, { "epoch": 0.546487715995265, "grad_norm": 359.35064697265625, "learning_rate": 5.156007111131301e-06, "loss": 13.5995, "step": 270530 }, { "epoch": 0.5465079166279488, "grad_norm": 294.4804992675781, "learning_rate": 5.155658214855197e-06, "loss": 12.2895, "step": 270540 }, { "epoch": 0.5465281172606326, "grad_norm": 569.6202392578125, "learning_rate": 5.155309317820438e-06, "loss": 14.9601, "step": 270550 }, { "epoch": 0.5465483178933164, "grad_norm": 324.67425537109375, "learning_rate": 5.154960420028718e-06, "loss": 16.3005, "step": 270560 }, { "epoch": 0.5465685185260002, "grad_norm": 311.3360900878906, "learning_rate": 5.154611521481742e-06, "loss": 17.0016, "step": 270570 }, { "epoch": 0.546588719158684, "grad_norm": 453.6484680175781, "learning_rate": 5.154262622181205e-06, "loss": 26.4558, "step": 270580 }, { "epoch": 0.5466089197913678, "grad_norm": 182.40086364746094, "learning_rate": 5.153913722128813e-06, "loss": 38.8882, "step": 270590 }, { "epoch": 0.5466291204240517, "grad_norm": 242.5882110595703, "learning_rate": 5.153564821326265e-06, "loss": 14.6667, "step": 270600 }, { "epoch": 0.5466493210567355, "grad_norm": 147.14776611328125, "learning_rate": 5.153215919775259e-06, "loss": 10.961, "step": 270610 }, { "epoch": 0.5466695216894193, "grad_norm": 160.1554718017578, "learning_rate": 5.1528670174774965e-06, "loss": 19.6478, "step": 270620 }, { "epoch": 0.5466897223221031, "grad_norm": 571.3995361328125, "learning_rate": 5.15251811443468e-06, "loss": 23.322, "step": 270630 }, { "epoch": 0.5467099229547869, "grad_norm": 409.4534606933594, "learning_rate": 5.152169210648509e-06, "loss": 19.238, "step": 270640 }, { "epoch": 0.5467301235874708, "grad_norm": 322.7481384277344, "learning_rate": 5.151820306120682e-06, "loss": 15.1364, "step": 270650 }, { "epoch": 0.5467503242201546, "grad_norm": 208.50643920898438, "learning_rate": 5.151471400852903e-06, "loss": 36.9993, "step": 270660 }, { "epoch": 0.5467705248528384, "grad_norm": 430.0145568847656, "learning_rate": 5.15112249484687e-06, "loss": 16.6775, "step": 270670 }, { "epoch": 0.5467907254855222, "grad_norm": 526.3101806640625, "learning_rate": 5.150773588104284e-06, "loss": 14.6583, "step": 270680 }, { "epoch": 0.546810926118206, "grad_norm": 175.59234619140625, "learning_rate": 5.150424680626846e-06, "loss": 19.8974, "step": 270690 }, { "epoch": 0.5468311267508899, "grad_norm": 194.67276000976562, "learning_rate": 5.150075772416256e-06, "loss": 20.5921, "step": 270700 }, { "epoch": 0.5468513273835737, "grad_norm": 99.42737579345703, "learning_rate": 5.149726863474217e-06, "loss": 12.0149, "step": 270710 }, { "epoch": 0.5468715280162575, "grad_norm": 245.2981414794922, "learning_rate": 5.149377953802426e-06, "loss": 17.0415, "step": 270720 }, { "epoch": 0.5468917286489413, "grad_norm": 355.744384765625, "learning_rate": 5.149029043402584e-06, "loss": 13.9784, "step": 270730 }, { "epoch": 0.5469119292816251, "grad_norm": 194.05551147460938, "learning_rate": 5.1486801322763935e-06, "loss": 8.4306, "step": 270740 }, { "epoch": 0.546932129914309, "grad_norm": 375.8598937988281, "learning_rate": 5.148331220425554e-06, "loss": 24.6891, "step": 270750 }, { "epoch": 0.5469523305469928, "grad_norm": 183.24119567871094, "learning_rate": 5.147982307851766e-06, "loss": 22.667, "step": 270760 }, { "epoch": 0.5469725311796766, "grad_norm": 221.62709045410156, "learning_rate": 5.147633394556731e-06, "loss": 17.9793, "step": 270770 }, { "epoch": 0.5469927318123604, "grad_norm": 545.1946411132812, "learning_rate": 5.147284480542149e-06, "loss": 25.9145, "step": 270780 }, { "epoch": 0.5470129324450442, "grad_norm": 267.5995788574219, "learning_rate": 5.1469355658097186e-06, "loss": 35.4548, "step": 270790 }, { "epoch": 0.5470331330777279, "grad_norm": 178.6726531982422, "learning_rate": 5.146586650361143e-06, "loss": 11.7341, "step": 270800 }, { "epoch": 0.5470533337104118, "grad_norm": 358.9482421875, "learning_rate": 5.146237734198121e-06, "loss": 24.0874, "step": 270810 }, { "epoch": 0.5470735343430956, "grad_norm": 83.60447692871094, "learning_rate": 5.145888817322355e-06, "loss": 5.1886, "step": 270820 }, { "epoch": 0.5470937349757794, "grad_norm": 316.01483154296875, "learning_rate": 5.145539899735543e-06, "loss": 13.9433, "step": 270830 }, { "epoch": 0.5471139356084632, "grad_norm": 259.2259216308594, "learning_rate": 5.1451909814393895e-06, "loss": 10.7172, "step": 270840 }, { "epoch": 0.547134136241147, "grad_norm": 16.119518280029297, "learning_rate": 5.144842062435591e-06, "loss": 18.6913, "step": 270850 }, { "epoch": 0.5471543368738309, "grad_norm": 93.94830322265625, "learning_rate": 5.144493142725851e-06, "loss": 21.156, "step": 270860 }, { "epoch": 0.5471745375065147, "grad_norm": 5.307872772216797, "learning_rate": 5.144144222311868e-06, "loss": 10.4802, "step": 270870 }, { "epoch": 0.5471947381391985, "grad_norm": 356.4959411621094, "learning_rate": 5.143795301195343e-06, "loss": 17.4769, "step": 270880 }, { "epoch": 0.5472149387718823, "grad_norm": 827.1213989257812, "learning_rate": 5.1434463793779795e-06, "loss": 14.072, "step": 270890 }, { "epoch": 0.5472351394045661, "grad_norm": 257.4329833984375, "learning_rate": 5.143097456861474e-06, "loss": 39.2777, "step": 270900 }, { "epoch": 0.54725534003725, "grad_norm": 1138.601806640625, "learning_rate": 5.14274853364753e-06, "loss": 26.6571, "step": 270910 }, { "epoch": 0.5472755406699338, "grad_norm": 192.43634033203125, "learning_rate": 5.142399609737846e-06, "loss": 12.1532, "step": 270920 }, { "epoch": 0.5472957413026176, "grad_norm": 318.43035888671875, "learning_rate": 5.142050685134124e-06, "loss": 8.2208, "step": 270930 }, { "epoch": 0.5473159419353014, "grad_norm": 264.47515869140625, "learning_rate": 5.141701759838065e-06, "loss": 30.3229, "step": 270940 }, { "epoch": 0.5473361425679852, "grad_norm": 0.0, "learning_rate": 5.141352833851367e-06, "loss": 19.0486, "step": 270950 }, { "epoch": 0.5473563432006691, "grad_norm": 53.55279541015625, "learning_rate": 5.141003907175733e-06, "loss": 14.0271, "step": 270960 }, { "epoch": 0.5473765438333529, "grad_norm": 428.15264892578125, "learning_rate": 5.140654979812864e-06, "loss": 35.0668, "step": 270970 }, { "epoch": 0.5473967444660367, "grad_norm": 273.74859619140625, "learning_rate": 5.140306051764459e-06, "loss": 15.9427, "step": 270980 }, { "epoch": 0.5474169450987205, "grad_norm": 319.54254150390625, "learning_rate": 5.139957123032217e-06, "loss": 23.0084, "step": 270990 }, { "epoch": 0.5474371457314043, "grad_norm": 202.77340698242188, "learning_rate": 5.139608193617846e-06, "loss": 13.9689, "step": 271000 }, { "epoch": 0.5474573463640882, "grad_norm": 114.99956512451172, "learning_rate": 5.139259263523038e-06, "loss": 11.3557, "step": 271010 }, { "epoch": 0.547477546996772, "grad_norm": 564.5018310546875, "learning_rate": 5.138910332749499e-06, "loss": 17.1529, "step": 271020 }, { "epoch": 0.5474977476294558, "grad_norm": 480.91888427734375, "learning_rate": 5.138561401298926e-06, "loss": 26.4232, "step": 271030 }, { "epoch": 0.5475179482621396, "grad_norm": 100.10216522216797, "learning_rate": 5.138212469173022e-06, "loss": 22.615, "step": 271040 }, { "epoch": 0.5475381488948234, "grad_norm": 227.4015655517578, "learning_rate": 5.1378635363734884e-06, "loss": 9.8633, "step": 271050 }, { "epoch": 0.5475583495275071, "grad_norm": 156.35894775390625, "learning_rate": 5.137514602902024e-06, "loss": 14.9207, "step": 271060 }, { "epoch": 0.547578550160191, "grad_norm": 109.6087646484375, "learning_rate": 5.13716566876033e-06, "loss": 25.6192, "step": 271070 }, { "epoch": 0.5475987507928748, "grad_norm": 96.10096740722656, "learning_rate": 5.136816733950108e-06, "loss": 19.335, "step": 271080 }, { "epoch": 0.5476189514255586, "grad_norm": 298.60089111328125, "learning_rate": 5.136467798473057e-06, "loss": 11.5343, "step": 271090 }, { "epoch": 0.5476391520582424, "grad_norm": 460.7716369628906, "learning_rate": 5.136118862330876e-06, "loss": 31.7471, "step": 271100 }, { "epoch": 0.5476593526909262, "grad_norm": 142.26687622070312, "learning_rate": 5.135769925525272e-06, "loss": 15.8231, "step": 271110 }, { "epoch": 0.5476795533236101, "grad_norm": 540.8987426757812, "learning_rate": 5.135420988057941e-06, "loss": 15.7245, "step": 271120 }, { "epoch": 0.5476997539562939, "grad_norm": 250.5954132080078, "learning_rate": 5.135072049930584e-06, "loss": 10.8472, "step": 271130 }, { "epoch": 0.5477199545889777, "grad_norm": 226.5298309326172, "learning_rate": 5.1347231111449034e-06, "loss": 16.835, "step": 271140 }, { "epoch": 0.5477401552216615, "grad_norm": 155.68560791015625, "learning_rate": 5.134374171702596e-06, "loss": 20.8249, "step": 271150 }, { "epoch": 0.5477603558543453, "grad_norm": 408.2979736328125, "learning_rate": 5.1340252316053686e-06, "loss": 14.9628, "step": 271160 }, { "epoch": 0.5477805564870292, "grad_norm": 172.22940063476562, "learning_rate": 5.133676290854915e-06, "loss": 26.6068, "step": 271170 }, { "epoch": 0.547800757119713, "grad_norm": 171.62872314453125, "learning_rate": 5.133327349452941e-06, "loss": 16.0277, "step": 271180 }, { "epoch": 0.5478209577523968, "grad_norm": 166.55117797851562, "learning_rate": 5.1329784074011454e-06, "loss": 20.5653, "step": 271190 }, { "epoch": 0.5478411583850806, "grad_norm": 340.9192199707031, "learning_rate": 5.13262946470123e-06, "loss": 19.9538, "step": 271200 }, { "epoch": 0.5478613590177644, "grad_norm": 258.0434265136719, "learning_rate": 5.132280521354896e-06, "loss": 30.4681, "step": 271210 }, { "epoch": 0.5478815596504483, "grad_norm": 143.42657470703125, "learning_rate": 5.13193157736384e-06, "loss": 8.7193, "step": 271220 }, { "epoch": 0.5479017602831321, "grad_norm": 410.7271423339844, "learning_rate": 5.131582632729766e-06, "loss": 17.4593, "step": 271230 }, { "epoch": 0.5479219609158159, "grad_norm": 312.06396484375, "learning_rate": 5.131233687454375e-06, "loss": 19.9669, "step": 271240 }, { "epoch": 0.5479421615484997, "grad_norm": 586.4053344726562, "learning_rate": 5.130884741539367e-06, "loss": 26.1685, "step": 271250 }, { "epoch": 0.5479623621811835, "grad_norm": 399.3693542480469, "learning_rate": 5.130535794986441e-06, "loss": 16.5427, "step": 271260 }, { "epoch": 0.5479825628138674, "grad_norm": 266.9270935058594, "learning_rate": 5.130186847797302e-06, "loss": 20.8838, "step": 271270 }, { "epoch": 0.5480027634465512, "grad_norm": 18.30973243713379, "learning_rate": 5.1298378999736465e-06, "loss": 10.8668, "step": 271280 }, { "epoch": 0.548022964079235, "grad_norm": 276.9812927246094, "learning_rate": 5.129488951517176e-06, "loss": 13.8818, "step": 271290 }, { "epoch": 0.5480431647119188, "grad_norm": 351.8283996582031, "learning_rate": 5.1291400024295946e-06, "loss": 12.6193, "step": 271300 }, { "epoch": 0.5480633653446025, "grad_norm": 330.1753234863281, "learning_rate": 5.128791052712597e-06, "loss": 29.6322, "step": 271310 }, { "epoch": 0.5480835659772864, "grad_norm": 241.70396423339844, "learning_rate": 5.128442102367891e-06, "loss": 16.8222, "step": 271320 }, { "epoch": 0.5481037666099702, "grad_norm": 81.72697448730469, "learning_rate": 5.128093151397172e-06, "loss": 12.2886, "step": 271330 }, { "epoch": 0.548123967242654, "grad_norm": 35.9173583984375, "learning_rate": 5.127744199802143e-06, "loss": 15.896, "step": 271340 }, { "epoch": 0.5481441678753378, "grad_norm": 200.15069580078125, "learning_rate": 5.127395247584503e-06, "loss": 15.9935, "step": 271350 }, { "epoch": 0.5481643685080216, "grad_norm": 161.22962951660156, "learning_rate": 5.127046294745955e-06, "loss": 12.5159, "step": 271360 }, { "epoch": 0.5481845691407055, "grad_norm": 250.8546142578125, "learning_rate": 5.126697341288198e-06, "loss": 17.3136, "step": 271370 }, { "epoch": 0.5482047697733893, "grad_norm": 237.55563354492188, "learning_rate": 5.126348387212935e-06, "loss": 21.7371, "step": 271380 }, { "epoch": 0.5482249704060731, "grad_norm": 9.324607849121094, "learning_rate": 5.125999432521864e-06, "loss": 14.0435, "step": 271390 }, { "epoch": 0.5482451710387569, "grad_norm": 101.00953674316406, "learning_rate": 5.1256504772166885e-06, "loss": 16.5123, "step": 271400 }, { "epoch": 0.5482653716714407, "grad_norm": 579.4340209960938, "learning_rate": 5.125301521299107e-06, "loss": 22.66, "step": 271410 }, { "epoch": 0.5482855723041246, "grad_norm": 401.2892150878906, "learning_rate": 5.12495256477082e-06, "loss": 17.8897, "step": 271420 }, { "epoch": 0.5483057729368084, "grad_norm": 333.0728454589844, "learning_rate": 5.124603607633532e-06, "loss": 26.5342, "step": 271430 }, { "epoch": 0.5483259735694922, "grad_norm": 513.0225219726562, "learning_rate": 5.124254649888938e-06, "loss": 22.1942, "step": 271440 }, { "epoch": 0.548346174202176, "grad_norm": 257.2457580566406, "learning_rate": 5.123905691538744e-06, "loss": 18.9608, "step": 271450 }, { "epoch": 0.5483663748348598, "grad_norm": 123.71678161621094, "learning_rate": 5.123556732584648e-06, "loss": 7.1973, "step": 271460 }, { "epoch": 0.5483865754675437, "grad_norm": 241.67001342773438, "learning_rate": 5.1232077730283515e-06, "loss": 30.372, "step": 271470 }, { "epoch": 0.5484067761002275, "grad_norm": 299.0604248046875, "learning_rate": 5.122858812871555e-06, "loss": 28.5222, "step": 271480 }, { "epoch": 0.5484269767329113, "grad_norm": 360.4393615722656, "learning_rate": 5.122509852115959e-06, "loss": 19.9251, "step": 271490 }, { "epoch": 0.5484471773655951, "grad_norm": 184.80104064941406, "learning_rate": 5.1221608907632665e-06, "loss": 11.0392, "step": 271500 }, { "epoch": 0.5484673779982789, "grad_norm": 161.20309448242188, "learning_rate": 5.121811928815176e-06, "loss": 18.7417, "step": 271510 }, { "epoch": 0.5484875786309628, "grad_norm": 138.88101196289062, "learning_rate": 5.121462966273388e-06, "loss": 10.7668, "step": 271520 }, { "epoch": 0.5485077792636466, "grad_norm": 0.0, "learning_rate": 5.121114003139605e-06, "loss": 28.2698, "step": 271530 }, { "epoch": 0.5485279798963304, "grad_norm": 166.53492736816406, "learning_rate": 5.120765039415528e-06, "loss": 25.7398, "step": 271540 }, { "epoch": 0.5485481805290142, "grad_norm": 257.6219482421875, "learning_rate": 5.120416075102855e-06, "loss": 14.2301, "step": 271550 }, { "epoch": 0.548568381161698, "grad_norm": 97.03495788574219, "learning_rate": 5.120067110203289e-06, "loss": 25.3023, "step": 271560 }, { "epoch": 0.5485885817943817, "grad_norm": 43.82855224609375, "learning_rate": 5.119718144718532e-06, "loss": 19.3315, "step": 271570 }, { "epoch": 0.5486087824270656, "grad_norm": 267.83892822265625, "learning_rate": 5.119369178650282e-06, "loss": 27.4988, "step": 271580 }, { "epoch": 0.5486289830597494, "grad_norm": 234.90887451171875, "learning_rate": 5.119020212000242e-06, "loss": 31.5029, "step": 271590 }, { "epoch": 0.5486491836924332, "grad_norm": 255.16880798339844, "learning_rate": 5.118671244770111e-06, "loss": 17.9404, "step": 271600 }, { "epoch": 0.548669384325117, "grad_norm": 188.7762451171875, "learning_rate": 5.118322276961591e-06, "loss": 9.6989, "step": 271610 }, { "epoch": 0.5486895849578008, "grad_norm": 92.162353515625, "learning_rate": 5.117973308576383e-06, "loss": 38.6206, "step": 271620 }, { "epoch": 0.5487097855904847, "grad_norm": 277.41070556640625, "learning_rate": 5.117624339616186e-06, "loss": 17.7081, "step": 271630 }, { "epoch": 0.5487299862231685, "grad_norm": 37.092681884765625, "learning_rate": 5.117275370082704e-06, "loss": 20.6392, "step": 271640 }, { "epoch": 0.5487501868558523, "grad_norm": 73.63348388671875, "learning_rate": 5.116926399977634e-06, "loss": 8.1502, "step": 271650 }, { "epoch": 0.5487703874885361, "grad_norm": 406.3538818359375, "learning_rate": 5.116577429302682e-06, "loss": 8.991, "step": 271660 }, { "epoch": 0.54879058812122, "grad_norm": 437.7600402832031, "learning_rate": 5.116228458059544e-06, "loss": 27.1598, "step": 271670 }, { "epoch": 0.5488107887539038, "grad_norm": 170.48220825195312, "learning_rate": 5.115879486249922e-06, "loss": 11.9321, "step": 271680 }, { "epoch": 0.5488309893865876, "grad_norm": 441.21240234375, "learning_rate": 5.115530513875517e-06, "loss": 13.696, "step": 271690 }, { "epoch": 0.5488511900192714, "grad_norm": 243.97061157226562, "learning_rate": 5.115181540938032e-06, "loss": 18.8238, "step": 271700 }, { "epoch": 0.5488713906519552, "grad_norm": 696.1702880859375, "learning_rate": 5.114832567439164e-06, "loss": 26.8331, "step": 271710 }, { "epoch": 0.548891591284639, "grad_norm": 591.5228881835938, "learning_rate": 5.114483593380619e-06, "loss": 27.8214, "step": 271720 }, { "epoch": 0.5489117919173229, "grad_norm": 146.0382080078125, "learning_rate": 5.114134618764093e-06, "loss": 13.9703, "step": 271730 }, { "epoch": 0.5489319925500067, "grad_norm": 254.78317260742188, "learning_rate": 5.113785643591289e-06, "loss": 12.1541, "step": 271740 }, { "epoch": 0.5489521931826905, "grad_norm": 146.23268127441406, "learning_rate": 5.113436667863908e-06, "loss": 21.1687, "step": 271750 }, { "epoch": 0.5489723938153743, "grad_norm": 153.45394897460938, "learning_rate": 5.1130876915836495e-06, "loss": 6.7421, "step": 271760 }, { "epoch": 0.5489925944480581, "grad_norm": 0.0, "learning_rate": 5.112738714752216e-06, "loss": 18.0899, "step": 271770 }, { "epoch": 0.549012795080742, "grad_norm": 144.56471252441406, "learning_rate": 5.112389737371307e-06, "loss": 15.8088, "step": 271780 }, { "epoch": 0.5490329957134258, "grad_norm": 447.4934387207031, "learning_rate": 5.112040759442624e-06, "loss": 16.1742, "step": 271790 }, { "epoch": 0.5490531963461096, "grad_norm": 18.567604064941406, "learning_rate": 5.111691780967869e-06, "loss": 10.3861, "step": 271800 }, { "epoch": 0.5490733969787934, "grad_norm": 186.19027709960938, "learning_rate": 5.111342801948741e-06, "loss": 11.6573, "step": 271810 }, { "epoch": 0.5490935976114772, "grad_norm": 283.5546569824219, "learning_rate": 5.110993822386943e-06, "loss": 29.5875, "step": 271820 }, { "epoch": 0.549113798244161, "grad_norm": 398.1877136230469, "learning_rate": 5.110644842284173e-06, "loss": 20.0432, "step": 271830 }, { "epoch": 0.5491339988768448, "grad_norm": 290.8800354003906, "learning_rate": 5.110295861642134e-06, "loss": 18.8825, "step": 271840 }, { "epoch": 0.5491541995095286, "grad_norm": 278.43914794921875, "learning_rate": 5.109946880462526e-06, "loss": 12.0163, "step": 271850 }, { "epoch": 0.5491744001422124, "grad_norm": 21.501798629760742, "learning_rate": 5.109597898747052e-06, "loss": 18.6667, "step": 271860 }, { "epoch": 0.5491946007748962, "grad_norm": 308.8944091796875, "learning_rate": 5.109248916497408e-06, "loss": 14.3364, "step": 271870 }, { "epoch": 0.54921480140758, "grad_norm": 248.5011444091797, "learning_rate": 5.1088999337153015e-06, "loss": 12.8829, "step": 271880 }, { "epoch": 0.5492350020402639, "grad_norm": 525.3104248046875, "learning_rate": 5.108550950402427e-06, "loss": 16.2217, "step": 271890 }, { "epoch": 0.5492552026729477, "grad_norm": 174.15567016601562, "learning_rate": 5.1082019665604895e-06, "loss": 17.1288, "step": 271900 }, { "epoch": 0.5492754033056315, "grad_norm": 220.61354064941406, "learning_rate": 5.1078529821911895e-06, "loss": 30.4043, "step": 271910 }, { "epoch": 0.5492956039383153, "grad_norm": 298.517822265625, "learning_rate": 5.107503997296225e-06, "loss": 22.4201, "step": 271920 }, { "epoch": 0.5493158045709992, "grad_norm": 346.22314453125, "learning_rate": 5.107155011877302e-06, "loss": 18.0831, "step": 271930 }, { "epoch": 0.549336005203683, "grad_norm": 617.18017578125, "learning_rate": 5.1068060259361155e-06, "loss": 14.7689, "step": 271940 }, { "epoch": 0.5493562058363668, "grad_norm": 271.6125183105469, "learning_rate": 5.1064570394743705e-06, "loss": 19.1435, "step": 271950 }, { "epoch": 0.5493764064690506, "grad_norm": 216.44577026367188, "learning_rate": 5.106108052493768e-06, "loss": 16.2638, "step": 271960 }, { "epoch": 0.5493966071017344, "grad_norm": 28.84137725830078, "learning_rate": 5.105759064996007e-06, "loss": 6.374, "step": 271970 }, { "epoch": 0.5494168077344183, "grad_norm": 722.36328125, "learning_rate": 5.105410076982789e-06, "loss": 34.3304, "step": 271980 }, { "epoch": 0.5494370083671021, "grad_norm": 23.422510147094727, "learning_rate": 5.105061088455815e-06, "loss": 26.4332, "step": 271990 }, { "epoch": 0.5494572089997859, "grad_norm": 267.1373291015625, "learning_rate": 5.1047120994167855e-06, "loss": 14.7317, "step": 272000 }, { "epoch": 0.5494774096324697, "grad_norm": 233.39849853515625, "learning_rate": 5.104363109867403e-06, "loss": 14.3733, "step": 272010 }, { "epoch": 0.5494976102651535, "grad_norm": 58.85588455200195, "learning_rate": 5.104014119809366e-06, "loss": 14.3085, "step": 272020 }, { "epoch": 0.5495178108978374, "grad_norm": 260.95770263671875, "learning_rate": 5.1036651292443774e-06, "loss": 20.5635, "step": 272030 }, { "epoch": 0.5495380115305212, "grad_norm": 634.3630981445312, "learning_rate": 5.103316138174139e-06, "loss": 9.9906, "step": 272040 }, { "epoch": 0.549558212163205, "grad_norm": 64.98638916015625, "learning_rate": 5.102967146600348e-06, "loss": 9.0494, "step": 272050 }, { "epoch": 0.5495784127958888, "grad_norm": 276.6283264160156, "learning_rate": 5.102618154524709e-06, "loss": 25.0627, "step": 272060 }, { "epoch": 0.5495986134285726, "grad_norm": 279.3950500488281, "learning_rate": 5.1022691619489205e-06, "loss": 9.4848, "step": 272070 }, { "epoch": 0.5496188140612563, "grad_norm": 297.8818054199219, "learning_rate": 5.101920168874686e-06, "loss": 24.2634, "step": 272080 }, { "epoch": 0.5496390146939402, "grad_norm": 280.63006591796875, "learning_rate": 5.101571175303704e-06, "loss": 23.3728, "step": 272090 }, { "epoch": 0.549659215326624, "grad_norm": 270.8159484863281, "learning_rate": 5.101222181237676e-06, "loss": 26.128, "step": 272100 }, { "epoch": 0.5496794159593078, "grad_norm": 254.62811279296875, "learning_rate": 5.1008731866783045e-06, "loss": 20.1614, "step": 272110 }, { "epoch": 0.5496996165919916, "grad_norm": 445.9420471191406, "learning_rate": 5.100524191627289e-06, "loss": 16.2008, "step": 272120 }, { "epoch": 0.5497198172246754, "grad_norm": 0.0, "learning_rate": 5.10017519608633e-06, "loss": 15.6924, "step": 272130 }, { "epoch": 0.5497400178573593, "grad_norm": 431.3376770019531, "learning_rate": 5.09982620005713e-06, "loss": 13.3287, "step": 272140 }, { "epoch": 0.5497602184900431, "grad_norm": 210.37918090820312, "learning_rate": 5.09947720354139e-06, "loss": 15.3655, "step": 272150 }, { "epoch": 0.5497804191227269, "grad_norm": 281.79913330078125, "learning_rate": 5.0991282065408086e-06, "loss": 25.351, "step": 272160 }, { "epoch": 0.5498006197554107, "grad_norm": 253.1290283203125, "learning_rate": 5.098779209057089e-06, "loss": 15.5308, "step": 272170 }, { "epoch": 0.5498208203880945, "grad_norm": 399.74090576171875, "learning_rate": 5.098430211091931e-06, "loss": 20.0125, "step": 272180 }, { "epoch": 0.5498410210207784, "grad_norm": 160.2550811767578, "learning_rate": 5.098081212647036e-06, "loss": 9.3959, "step": 272190 }, { "epoch": 0.5498612216534622, "grad_norm": 21.980295181274414, "learning_rate": 5.097732213724107e-06, "loss": 12.9835, "step": 272200 }, { "epoch": 0.549881422286146, "grad_norm": 448.3616638183594, "learning_rate": 5.0973832143248405e-06, "loss": 22.7705, "step": 272210 }, { "epoch": 0.5499016229188298, "grad_norm": 304.85400390625, "learning_rate": 5.097034214450941e-06, "loss": 17.1926, "step": 272220 }, { "epoch": 0.5499218235515136, "grad_norm": 252.26771545410156, "learning_rate": 5.096685214104109e-06, "loss": 29.9799, "step": 272230 }, { "epoch": 0.5499420241841975, "grad_norm": 417.1365661621094, "learning_rate": 5.096336213286044e-06, "loss": 36.3706, "step": 272240 }, { "epoch": 0.5499622248168813, "grad_norm": 334.8094787597656, "learning_rate": 5.09598721199845e-06, "loss": 14.9892, "step": 272250 }, { "epoch": 0.5499824254495651, "grad_norm": 150.68260192871094, "learning_rate": 5.095638210243023e-06, "loss": 11.5327, "step": 272260 }, { "epoch": 0.5500026260822489, "grad_norm": 232.5937042236328, "learning_rate": 5.09528920802147e-06, "loss": 17.507, "step": 272270 }, { "epoch": 0.5500228267149327, "grad_norm": 115.18302917480469, "learning_rate": 5.094940205335487e-06, "loss": 11.0031, "step": 272280 }, { "epoch": 0.5500430273476166, "grad_norm": 234.63011169433594, "learning_rate": 5.094591202186777e-06, "loss": 26.4619, "step": 272290 }, { "epoch": 0.5500632279803004, "grad_norm": 476.7989807128906, "learning_rate": 5.0942421985770415e-06, "loss": 12.6077, "step": 272300 }, { "epoch": 0.5500834286129842, "grad_norm": 213.03001403808594, "learning_rate": 5.093893194507982e-06, "loss": 24.9256, "step": 272310 }, { "epoch": 0.550103629245668, "grad_norm": 15.416720390319824, "learning_rate": 5.093544189981297e-06, "loss": 12.7993, "step": 272320 }, { "epoch": 0.5501238298783518, "grad_norm": 174.24441528320312, "learning_rate": 5.093195184998689e-06, "loss": 14.5623, "step": 272330 }, { "epoch": 0.5501440305110356, "grad_norm": 143.10040283203125, "learning_rate": 5.092846179561859e-06, "loss": 11.181, "step": 272340 }, { "epoch": 0.5501642311437194, "grad_norm": 0.0, "learning_rate": 5.092497173672508e-06, "loss": 16.4031, "step": 272350 }, { "epoch": 0.5501844317764032, "grad_norm": 406.3987731933594, "learning_rate": 5.092148167332338e-06, "loss": 27.1281, "step": 272360 }, { "epoch": 0.550204632409087, "grad_norm": 347.156982421875, "learning_rate": 5.091799160543047e-06, "loss": 19.0238, "step": 272370 }, { "epoch": 0.5502248330417708, "grad_norm": 331.4739990234375, "learning_rate": 5.09145015330634e-06, "loss": 17.5003, "step": 272380 }, { "epoch": 0.5502450336744547, "grad_norm": 202.94143676757812, "learning_rate": 5.091101145623916e-06, "loss": 16.5362, "step": 272390 }, { "epoch": 0.5502652343071385, "grad_norm": 244.3010711669922, "learning_rate": 5.090752137497474e-06, "loss": 21.0091, "step": 272400 }, { "epoch": 0.5502854349398223, "grad_norm": 209.11622619628906, "learning_rate": 5.090403128928719e-06, "loss": 11.4882, "step": 272410 }, { "epoch": 0.5503056355725061, "grad_norm": 271.7358703613281, "learning_rate": 5.090054119919349e-06, "loss": 34.0703, "step": 272420 }, { "epoch": 0.5503258362051899, "grad_norm": 734.7569580078125, "learning_rate": 5.089705110471068e-06, "loss": 20.3112, "step": 272430 }, { "epoch": 0.5503460368378738, "grad_norm": 301.1106262207031, "learning_rate": 5.089356100585574e-06, "loss": 9.6631, "step": 272440 }, { "epoch": 0.5503662374705576, "grad_norm": 116.67583465576172, "learning_rate": 5.089007090264568e-06, "loss": 8.7859, "step": 272450 }, { "epoch": 0.5503864381032414, "grad_norm": 422.8187255859375, "learning_rate": 5.088658079509754e-06, "loss": 18.244, "step": 272460 }, { "epoch": 0.5504066387359252, "grad_norm": 486.368408203125, "learning_rate": 5.08830906832283e-06, "loss": 23.6764, "step": 272470 }, { "epoch": 0.550426839368609, "grad_norm": 354.1516418457031, "learning_rate": 5.087960056705499e-06, "loss": 33.7588, "step": 272480 }, { "epoch": 0.5504470400012929, "grad_norm": 227.40798950195312, "learning_rate": 5.087611044659462e-06, "loss": 13.4114, "step": 272490 }, { "epoch": 0.5504672406339767, "grad_norm": 305.98822021484375, "learning_rate": 5.087262032186418e-06, "loss": 21.2637, "step": 272500 }, { "epoch": 0.5504874412666605, "grad_norm": 517.5442504882812, "learning_rate": 5.08691301928807e-06, "loss": 14.61, "step": 272510 }, { "epoch": 0.5505076418993443, "grad_norm": 108.20928192138672, "learning_rate": 5.08656400596612e-06, "loss": 21.5936, "step": 272520 }, { "epoch": 0.5505278425320281, "grad_norm": 312.9679870605469, "learning_rate": 5.086214992222265e-06, "loss": 10.3276, "step": 272530 }, { "epoch": 0.550548043164712, "grad_norm": 2.5861287117004395, "learning_rate": 5.08586597805821e-06, "loss": 6.8482, "step": 272540 }, { "epoch": 0.5505682437973958, "grad_norm": 183.41468811035156, "learning_rate": 5.085516963475653e-06, "loss": 23.4846, "step": 272550 }, { "epoch": 0.5505884444300796, "grad_norm": 249.87559509277344, "learning_rate": 5.0851679484762996e-06, "loss": 15.6154, "step": 272560 }, { "epoch": 0.5506086450627634, "grad_norm": 361.3088684082031, "learning_rate": 5.084818933061846e-06, "loss": 20.979, "step": 272570 }, { "epoch": 0.5506288456954472, "grad_norm": 232.0271453857422, "learning_rate": 5.084469917233995e-06, "loss": 17.0285, "step": 272580 }, { "epoch": 0.5506490463281309, "grad_norm": 264.08721923828125, "learning_rate": 5.084120900994449e-06, "loss": 16.351, "step": 272590 }, { "epoch": 0.5506692469608148, "grad_norm": 280.1943054199219, "learning_rate": 5.083771884344908e-06, "loss": 33.5774, "step": 272600 }, { "epoch": 0.5506894475934986, "grad_norm": 272.6334533691406, "learning_rate": 5.0834228672870725e-06, "loss": 20.4379, "step": 272610 }, { "epoch": 0.5507096482261824, "grad_norm": 278.1901550292969, "learning_rate": 5.083073849822643e-06, "loss": 33.4191, "step": 272620 }, { "epoch": 0.5507298488588662, "grad_norm": 318.83056640625, "learning_rate": 5.0827248319533225e-06, "loss": 32.4875, "step": 272630 }, { "epoch": 0.55075004949155, "grad_norm": 5.7458977699279785, "learning_rate": 5.082375813680811e-06, "loss": 10.8898, "step": 272640 }, { "epoch": 0.5507702501242339, "grad_norm": 8.907835006713867, "learning_rate": 5.0820267950068115e-06, "loss": 12.2414, "step": 272650 }, { "epoch": 0.5507904507569177, "grad_norm": 62.19697952270508, "learning_rate": 5.0816777759330215e-06, "loss": 12.417, "step": 272660 }, { "epoch": 0.5508106513896015, "grad_norm": 184.7407989501953, "learning_rate": 5.081328756461145e-06, "loss": 8.5759, "step": 272670 }, { "epoch": 0.5508308520222853, "grad_norm": 203.8895263671875, "learning_rate": 5.08097973659288e-06, "loss": 19.8834, "step": 272680 }, { "epoch": 0.5508510526549691, "grad_norm": 0.0, "learning_rate": 5.0806307163299305e-06, "loss": 10.372, "step": 272690 }, { "epoch": 0.550871253287653, "grad_norm": 253.9811248779297, "learning_rate": 5.080281695673999e-06, "loss": 15.5261, "step": 272700 }, { "epoch": 0.5508914539203368, "grad_norm": 346.6727600097656, "learning_rate": 5.079932674626781e-06, "loss": 11.3439, "step": 272710 }, { "epoch": 0.5509116545530206, "grad_norm": 796.883544921875, "learning_rate": 5.079583653189982e-06, "loss": 15.5374, "step": 272720 }, { "epoch": 0.5509318551857044, "grad_norm": 7.904236316680908, "learning_rate": 5.079234631365303e-06, "loss": 15.511, "step": 272730 }, { "epoch": 0.5509520558183882, "grad_norm": 240.46958923339844, "learning_rate": 5.0788856091544425e-06, "loss": 18.6114, "step": 272740 }, { "epoch": 0.5509722564510721, "grad_norm": 170.64633178710938, "learning_rate": 5.078536586559104e-06, "loss": 10.603, "step": 272750 }, { "epoch": 0.5509924570837559, "grad_norm": 124.4460678100586, "learning_rate": 5.078187563580988e-06, "loss": 22.7061, "step": 272760 }, { "epoch": 0.5510126577164397, "grad_norm": 620.9398803710938, "learning_rate": 5.077838540221794e-06, "loss": 31.4051, "step": 272770 }, { "epoch": 0.5510328583491235, "grad_norm": 159.94471740722656, "learning_rate": 5.077489516483225e-06, "loss": 53.498, "step": 272780 }, { "epoch": 0.5510530589818073, "grad_norm": 194.831298828125, "learning_rate": 5.077140492366982e-06, "loss": 17.5526, "step": 272790 }, { "epoch": 0.5510732596144912, "grad_norm": 400.83489990234375, "learning_rate": 5.0767914678747655e-06, "loss": 24.3749, "step": 272800 }, { "epoch": 0.551093460247175, "grad_norm": 460.9835510253906, "learning_rate": 5.076442443008277e-06, "loss": 16.932, "step": 272810 }, { "epoch": 0.5511136608798588, "grad_norm": 244.46713256835938, "learning_rate": 5.076093417769216e-06, "loss": 13.2635, "step": 272820 }, { "epoch": 0.5511338615125426, "grad_norm": 573.4295043945312, "learning_rate": 5.075744392159285e-06, "loss": 16.3991, "step": 272830 }, { "epoch": 0.5511540621452264, "grad_norm": 398.3282470703125, "learning_rate": 5.075395366180186e-06, "loss": 23.6394, "step": 272840 }, { "epoch": 0.5511742627779102, "grad_norm": 110.54041290283203, "learning_rate": 5.0750463398336195e-06, "loss": 48.9642, "step": 272850 }, { "epoch": 0.551194463410594, "grad_norm": 495.4403076171875, "learning_rate": 5.074697313121286e-06, "loss": 25.8939, "step": 272860 }, { "epoch": 0.5512146640432778, "grad_norm": 554.0845947265625, "learning_rate": 5.074348286044884e-06, "loss": 33.0909, "step": 272870 }, { "epoch": 0.5512348646759616, "grad_norm": 262.2660217285156, "learning_rate": 5.073999258606121e-06, "loss": 8.9729, "step": 272880 }, { "epoch": 0.5512550653086454, "grad_norm": 314.4496154785156, "learning_rate": 5.073650230806693e-06, "loss": 19.8806, "step": 272890 }, { "epoch": 0.5512752659413293, "grad_norm": 260.3829345703125, "learning_rate": 5.073301202648304e-06, "loss": 18.1126, "step": 272900 }, { "epoch": 0.5512954665740131, "grad_norm": 885.4226684570312, "learning_rate": 5.072952174132653e-06, "loss": 22.81, "step": 272910 }, { "epoch": 0.5513156672066969, "grad_norm": 137.13192749023438, "learning_rate": 5.072603145261442e-06, "loss": 10.1417, "step": 272920 }, { "epoch": 0.5513358678393807, "grad_norm": 173.0037841796875, "learning_rate": 5.072254116036372e-06, "loss": 11.0068, "step": 272930 }, { "epoch": 0.5513560684720645, "grad_norm": 162.48219299316406, "learning_rate": 5.071905086459145e-06, "loss": 18.8501, "step": 272940 }, { "epoch": 0.5513762691047484, "grad_norm": 165.0098876953125, "learning_rate": 5.0715560565314595e-06, "loss": 21.7332, "step": 272950 }, { "epoch": 0.5513964697374322, "grad_norm": 420.5088195800781, "learning_rate": 5.071207026255019e-06, "loss": 14.1502, "step": 272960 }, { "epoch": 0.551416670370116, "grad_norm": 235.39646911621094, "learning_rate": 5.070857995631526e-06, "loss": 5.796, "step": 272970 }, { "epoch": 0.5514368710027998, "grad_norm": 549.409912109375, "learning_rate": 5.070508964662677e-06, "loss": 35.026, "step": 272980 }, { "epoch": 0.5514570716354836, "grad_norm": 1514.242431640625, "learning_rate": 5.070159933350178e-06, "loss": 15.436, "step": 272990 }, { "epoch": 0.5514772722681675, "grad_norm": 249.48904418945312, "learning_rate": 5.069810901695727e-06, "loss": 25.1761, "step": 273000 }, { "epoch": 0.5514974729008513, "grad_norm": 19.538105010986328, "learning_rate": 5.0694618697010265e-06, "loss": 18.6375, "step": 273010 }, { "epoch": 0.5515176735335351, "grad_norm": 406.5464172363281, "learning_rate": 5.069112837367777e-06, "loss": 16.0779, "step": 273020 }, { "epoch": 0.5515378741662189, "grad_norm": 390.0971374511719, "learning_rate": 5.068763804697679e-06, "loss": 23.9447, "step": 273030 }, { "epoch": 0.5515580747989027, "grad_norm": 391.0527038574219, "learning_rate": 5.068414771692437e-06, "loss": 18.2133, "step": 273040 }, { "epoch": 0.5515782754315866, "grad_norm": 0.0, "learning_rate": 5.068065738353748e-06, "loss": 14.9086, "step": 273050 }, { "epoch": 0.5515984760642704, "grad_norm": 133.37362670898438, "learning_rate": 5.067716704683315e-06, "loss": 13.2041, "step": 273060 }, { "epoch": 0.5516186766969542, "grad_norm": 387.08441162109375, "learning_rate": 5.067367670682839e-06, "loss": 19.7711, "step": 273070 }, { "epoch": 0.551638877329638, "grad_norm": 179.871337890625, "learning_rate": 5.067018636354021e-06, "loss": 10.1834, "step": 273080 }, { "epoch": 0.5516590779623218, "grad_norm": 161.9391326904297, "learning_rate": 5.066669601698562e-06, "loss": 14.4349, "step": 273090 }, { "epoch": 0.5516792785950057, "grad_norm": 863.4387817382812, "learning_rate": 5.066320566718165e-06, "loss": 23.4938, "step": 273100 }, { "epoch": 0.5516994792276894, "grad_norm": 344.7052307128906, "learning_rate": 5.065971531414528e-06, "loss": 18.3189, "step": 273110 }, { "epoch": 0.5517196798603732, "grad_norm": 242.00177001953125, "learning_rate": 5.0656224957893545e-06, "loss": 18.9672, "step": 273120 }, { "epoch": 0.551739880493057, "grad_norm": 518.605224609375, "learning_rate": 5.065273459844345e-06, "loss": 14.8326, "step": 273130 }, { "epoch": 0.5517600811257408, "grad_norm": 241.82284545898438, "learning_rate": 5.0649244235812e-06, "loss": 18.504, "step": 273140 }, { "epoch": 0.5517802817584246, "grad_norm": 0.0, "learning_rate": 5.0645753870016224e-06, "loss": 14.5396, "step": 273150 }, { "epoch": 0.5518004823911085, "grad_norm": 612.7405395507812, "learning_rate": 5.0642263501073096e-06, "loss": 24.0131, "step": 273160 }, { "epoch": 0.5518206830237923, "grad_norm": 0.0, "learning_rate": 5.063877312899969e-06, "loss": 21.311, "step": 273170 }, { "epoch": 0.5518408836564761, "grad_norm": 194.5068817138672, "learning_rate": 5.063528275381295e-06, "loss": 14.551, "step": 273180 }, { "epoch": 0.5518610842891599, "grad_norm": 62.86180877685547, "learning_rate": 5.063179237552993e-06, "loss": 16.2012, "step": 273190 }, { "epoch": 0.5518812849218437, "grad_norm": 660.415283203125, "learning_rate": 5.062830199416764e-06, "loss": 24.2661, "step": 273200 }, { "epoch": 0.5519014855545276, "grad_norm": 323.9421081542969, "learning_rate": 5.062481160974308e-06, "loss": 15.233, "step": 273210 }, { "epoch": 0.5519216861872114, "grad_norm": 282.8017272949219, "learning_rate": 5.0621321222273255e-06, "loss": 30.42, "step": 273220 }, { "epoch": 0.5519418868198952, "grad_norm": 375.052978515625, "learning_rate": 5.061783083177519e-06, "loss": 10.3864, "step": 273230 }, { "epoch": 0.551962087452579, "grad_norm": 456.50836181640625, "learning_rate": 5.061434043826588e-06, "loss": 19.3524, "step": 273240 }, { "epoch": 0.5519822880852628, "grad_norm": 458.9396667480469, "learning_rate": 5.061085004176236e-06, "loss": 20.5243, "step": 273250 }, { "epoch": 0.5520024887179467, "grad_norm": 86.08439636230469, "learning_rate": 5.060735964228164e-06, "loss": 9.5614, "step": 273260 }, { "epoch": 0.5520226893506305, "grad_norm": 123.9028549194336, "learning_rate": 5.060386923984071e-06, "loss": 14.0727, "step": 273270 }, { "epoch": 0.5520428899833143, "grad_norm": 719.9234008789062, "learning_rate": 5.06003788344566e-06, "loss": 18.7667, "step": 273280 }, { "epoch": 0.5520630906159981, "grad_norm": 22.591482162475586, "learning_rate": 5.0596888426146325e-06, "loss": 14.574, "step": 273290 }, { "epoch": 0.5520832912486819, "grad_norm": 225.29420471191406, "learning_rate": 5.059339801492687e-06, "loss": 23.0747, "step": 273300 }, { "epoch": 0.5521034918813658, "grad_norm": 362.93145751953125, "learning_rate": 5.058990760081528e-06, "loss": 20.0726, "step": 273310 }, { "epoch": 0.5521236925140496, "grad_norm": 430.71234130859375, "learning_rate": 5.058641718382853e-06, "loss": 32.1516, "step": 273320 }, { "epoch": 0.5521438931467334, "grad_norm": 133.30226135253906, "learning_rate": 5.058292676398368e-06, "loss": 13.8477, "step": 273330 }, { "epoch": 0.5521640937794172, "grad_norm": 232.72979736328125, "learning_rate": 5.0579436341297705e-06, "loss": 16.1825, "step": 273340 }, { "epoch": 0.552184294412101, "grad_norm": 413.3011474609375, "learning_rate": 5.0575945915787616e-06, "loss": 19.059, "step": 273350 }, { "epoch": 0.5522044950447847, "grad_norm": 429.1296081542969, "learning_rate": 5.057245548747045e-06, "loss": 25.858, "step": 273360 }, { "epoch": 0.5522246956774686, "grad_norm": 193.27285766601562, "learning_rate": 5.056896505636322e-06, "loss": 15.8217, "step": 273370 }, { "epoch": 0.5522448963101524, "grad_norm": 207.70266723632812, "learning_rate": 5.05654746224829e-06, "loss": 24.9512, "step": 273380 }, { "epoch": 0.5522650969428362, "grad_norm": 489.8869323730469, "learning_rate": 5.056198418584653e-06, "loss": 27.7908, "step": 273390 }, { "epoch": 0.55228529757552, "grad_norm": 488.8653564453125, "learning_rate": 5.055849374647112e-06, "loss": 14.0518, "step": 273400 }, { "epoch": 0.5523054982082038, "grad_norm": 235.88938903808594, "learning_rate": 5.0555003304373674e-06, "loss": 26.0378, "step": 273410 }, { "epoch": 0.5523256988408877, "grad_norm": 372.5860900878906, "learning_rate": 5.0551512859571215e-06, "loss": 12.7416, "step": 273420 }, { "epoch": 0.5523458994735715, "grad_norm": 428.90411376953125, "learning_rate": 5.054802241208075e-06, "loss": 24.1856, "step": 273430 }, { "epoch": 0.5523661001062553, "grad_norm": 265.29376220703125, "learning_rate": 5.054453196191929e-06, "loss": 15.4131, "step": 273440 }, { "epoch": 0.5523863007389391, "grad_norm": 110.8825454711914, "learning_rate": 5.054104150910384e-06, "loss": 17.0487, "step": 273450 }, { "epoch": 0.552406501371623, "grad_norm": 358.1778259277344, "learning_rate": 5.053755105365142e-06, "loss": 19.226, "step": 273460 }, { "epoch": 0.5524267020043068, "grad_norm": 319.79779052734375, "learning_rate": 5.053406059557906e-06, "loss": 21.1769, "step": 273470 }, { "epoch": 0.5524469026369906, "grad_norm": 249.85028076171875, "learning_rate": 5.0530570134903725e-06, "loss": 21.082, "step": 273480 }, { "epoch": 0.5524671032696744, "grad_norm": 333.1750183105469, "learning_rate": 5.052707967164248e-06, "loss": 10.7601, "step": 273490 }, { "epoch": 0.5524873039023582, "grad_norm": 246.9015655517578, "learning_rate": 5.05235892058123e-06, "loss": 22.7461, "step": 273500 }, { "epoch": 0.552507504535042, "grad_norm": 302.4994201660156, "learning_rate": 5.052009873743021e-06, "loss": 18.5792, "step": 273510 }, { "epoch": 0.5525277051677259, "grad_norm": 55.885108947753906, "learning_rate": 5.051660826651322e-06, "loss": 24.5213, "step": 273520 }, { "epoch": 0.5525479058004097, "grad_norm": 200.94532775878906, "learning_rate": 5.0513117793078345e-06, "loss": 16.0201, "step": 273530 }, { "epoch": 0.5525681064330935, "grad_norm": 130.86167907714844, "learning_rate": 5.0509627317142606e-06, "loss": 13.9749, "step": 273540 }, { "epoch": 0.5525883070657773, "grad_norm": 320.12420654296875, "learning_rate": 5.050613683872299e-06, "loss": 20.2791, "step": 273550 }, { "epoch": 0.5526085076984611, "grad_norm": 68.21802520751953, "learning_rate": 5.050264635783654e-06, "loss": 10.5361, "step": 273560 }, { "epoch": 0.552628708331145, "grad_norm": 273.2901611328125, "learning_rate": 5.049915587450024e-06, "loss": 18.2626, "step": 273570 }, { "epoch": 0.5526489089638288, "grad_norm": 544.1163940429688, "learning_rate": 5.049566538873113e-06, "loss": 23.2479, "step": 273580 }, { "epoch": 0.5526691095965126, "grad_norm": 12.920458793640137, "learning_rate": 5.049217490054619e-06, "loss": 15.048, "step": 273590 }, { "epoch": 0.5526893102291964, "grad_norm": 240.81968688964844, "learning_rate": 5.048868440996246e-06, "loss": 20.5935, "step": 273600 }, { "epoch": 0.5527095108618802, "grad_norm": 471.04168701171875, "learning_rate": 5.048519391699693e-06, "loss": 20.2955, "step": 273610 }, { "epoch": 0.552729711494564, "grad_norm": 123.2837905883789, "learning_rate": 5.048170342166664e-06, "loss": 16.9225, "step": 273620 }, { "epoch": 0.5527499121272478, "grad_norm": 263.2627258300781, "learning_rate": 5.0478212923988576e-06, "loss": 13.1909, "step": 273630 }, { "epoch": 0.5527701127599316, "grad_norm": 322.5663757324219, "learning_rate": 5.047472242397976e-06, "loss": 16.3654, "step": 273640 }, { "epoch": 0.5527903133926154, "grad_norm": 383.7425537109375, "learning_rate": 5.047123192165722e-06, "loss": 10.2111, "step": 273650 }, { "epoch": 0.5528105140252992, "grad_norm": 377.22772216796875, "learning_rate": 5.046774141703793e-06, "loss": 25.5406, "step": 273660 }, { "epoch": 0.5528307146579831, "grad_norm": 59.314998626708984, "learning_rate": 5.046425091013893e-06, "loss": 13.2587, "step": 273670 }, { "epoch": 0.5528509152906669, "grad_norm": 229.21327209472656, "learning_rate": 5.046076040097722e-06, "loss": 26.9336, "step": 273680 }, { "epoch": 0.5528711159233507, "grad_norm": 442.7415771484375, "learning_rate": 5.045726988956984e-06, "loss": 17.4928, "step": 273690 }, { "epoch": 0.5528913165560345, "grad_norm": 424.8792419433594, "learning_rate": 5.045377937593376e-06, "loss": 20.0771, "step": 273700 }, { "epoch": 0.5529115171887183, "grad_norm": 52.748321533203125, "learning_rate": 5.045028886008605e-06, "loss": 9.0212, "step": 273710 }, { "epoch": 0.5529317178214022, "grad_norm": 459.68182373046875, "learning_rate": 5.044679834204366e-06, "loss": 29.3292, "step": 273720 }, { "epoch": 0.552951918454086, "grad_norm": 254.14453125, "learning_rate": 5.044330782182363e-06, "loss": 14.1011, "step": 273730 }, { "epoch": 0.5529721190867698, "grad_norm": 363.55206298828125, "learning_rate": 5.043981729944298e-06, "loss": 25.7608, "step": 273740 }, { "epoch": 0.5529923197194536, "grad_norm": 221.4928741455078, "learning_rate": 5.04363267749187e-06, "loss": 13.5054, "step": 273750 }, { "epoch": 0.5530125203521374, "grad_norm": 388.5581970214844, "learning_rate": 5.043283624826783e-06, "loss": 21.4175, "step": 273760 }, { "epoch": 0.5530327209848213, "grad_norm": 191.2097625732422, "learning_rate": 5.042934571950735e-06, "loss": 31.237, "step": 273770 }, { "epoch": 0.5530529216175051, "grad_norm": 143.50875854492188, "learning_rate": 5.042585518865431e-06, "loss": 15.7172, "step": 273780 }, { "epoch": 0.5530731222501889, "grad_norm": 143.4961700439453, "learning_rate": 5.04223646557257e-06, "loss": 10.7938, "step": 273790 }, { "epoch": 0.5530933228828727, "grad_norm": 286.82879638671875, "learning_rate": 5.041887412073853e-06, "loss": 21.7272, "step": 273800 }, { "epoch": 0.5531135235155565, "grad_norm": 31.47289276123047, "learning_rate": 5.041538358370983e-06, "loss": 16.7555, "step": 273810 }, { "epoch": 0.5531337241482404, "grad_norm": 187.60653686523438, "learning_rate": 5.04118930446566e-06, "loss": 28.1018, "step": 273820 }, { "epoch": 0.5531539247809242, "grad_norm": 237.72225952148438, "learning_rate": 5.040840250359584e-06, "loss": 21.0428, "step": 273830 }, { "epoch": 0.553174125413608, "grad_norm": 59.335670471191406, "learning_rate": 5.0404911960544575e-06, "loss": 7.6661, "step": 273840 }, { "epoch": 0.5531943260462918, "grad_norm": 221.32553100585938, "learning_rate": 5.040142141551982e-06, "loss": 14.374, "step": 273850 }, { "epoch": 0.5532145266789756, "grad_norm": 223.11941528320312, "learning_rate": 5.039793086853859e-06, "loss": 15.5132, "step": 273860 }, { "epoch": 0.5532347273116593, "grad_norm": 560.7000122070312, "learning_rate": 5.039444031961791e-06, "loss": 18.9194, "step": 273870 }, { "epoch": 0.5532549279443432, "grad_norm": 614.8914794921875, "learning_rate": 5.0390949768774755e-06, "loss": 20.36, "step": 273880 }, { "epoch": 0.553275128577027, "grad_norm": 238.68109130859375, "learning_rate": 5.038745921602617e-06, "loss": 24.854, "step": 273890 }, { "epoch": 0.5532953292097108, "grad_norm": 248.0565185546875, "learning_rate": 5.038396866138915e-06, "loss": 20.3157, "step": 273900 }, { "epoch": 0.5533155298423946, "grad_norm": 622.8152465820312, "learning_rate": 5.03804781048807e-06, "loss": 16.8985, "step": 273910 }, { "epoch": 0.5533357304750784, "grad_norm": 100.73365783691406, "learning_rate": 5.037698754651786e-06, "loss": 25.1163, "step": 273920 }, { "epoch": 0.5533559311077623, "grad_norm": 75.53772735595703, "learning_rate": 5.037349698631762e-06, "loss": 21.1352, "step": 273930 }, { "epoch": 0.5533761317404461, "grad_norm": 118.0751953125, "learning_rate": 5.037000642429701e-06, "loss": 15.3197, "step": 273940 }, { "epoch": 0.5533963323731299, "grad_norm": 6.805229663848877, "learning_rate": 5.036651586047303e-06, "loss": 19.2601, "step": 273950 }, { "epoch": 0.5534165330058137, "grad_norm": 260.5497741699219, "learning_rate": 5.03630252948627e-06, "loss": 11.3506, "step": 273960 }, { "epoch": 0.5534367336384975, "grad_norm": 510.424072265625, "learning_rate": 5.035953472748304e-06, "loss": 22.2624, "step": 273970 }, { "epoch": 0.5534569342711814, "grad_norm": 0.0, "learning_rate": 5.035604415835102e-06, "loss": 23.1045, "step": 273980 }, { "epoch": 0.5534771349038652, "grad_norm": 791.503173828125, "learning_rate": 5.035255358748371e-06, "loss": 26.8421, "step": 273990 }, { "epoch": 0.553497335536549, "grad_norm": 90.701171875, "learning_rate": 5.034906301489808e-06, "loss": 22.4735, "step": 274000 }, { "epoch": 0.5535175361692328, "grad_norm": 291.3519287109375, "learning_rate": 5.034557244061117e-06, "loss": 23.5925, "step": 274010 }, { "epoch": 0.5535377368019166, "grad_norm": 106.2227554321289, "learning_rate": 5.034208186463998e-06, "loss": 17.2287, "step": 274020 }, { "epoch": 0.5535579374346005, "grad_norm": 291.88970947265625, "learning_rate": 5.0338591287001525e-06, "loss": 13.1389, "step": 274030 }, { "epoch": 0.5535781380672843, "grad_norm": 317.79901123046875, "learning_rate": 5.03351007077128e-06, "loss": 13.7718, "step": 274040 }, { "epoch": 0.5535983386999681, "grad_norm": 504.2861633300781, "learning_rate": 5.033161012679087e-06, "loss": 16.1548, "step": 274050 }, { "epoch": 0.5536185393326519, "grad_norm": 361.0779113769531, "learning_rate": 5.032811954425268e-06, "loss": 17.6353, "step": 274060 }, { "epoch": 0.5536387399653357, "grad_norm": 182.768310546875, "learning_rate": 5.0324628960115296e-06, "loss": 10.5677, "step": 274070 }, { "epoch": 0.5536589405980196, "grad_norm": 226.9862060546875, "learning_rate": 5.032113837439571e-06, "loss": 14.7638, "step": 274080 }, { "epoch": 0.5536791412307034, "grad_norm": 4.088353157043457, "learning_rate": 5.0317647787110915e-06, "loss": 25.5374, "step": 274090 }, { "epoch": 0.5536993418633872, "grad_norm": 149.0470733642578, "learning_rate": 5.031415719827796e-06, "loss": 18.8346, "step": 274100 }, { "epoch": 0.553719542496071, "grad_norm": 308.3907470703125, "learning_rate": 5.031066660791383e-06, "loss": 20.3566, "step": 274110 }, { "epoch": 0.5537397431287548, "grad_norm": 444.4082336425781, "learning_rate": 5.030717601603556e-06, "loss": 14.6193, "step": 274120 }, { "epoch": 0.5537599437614386, "grad_norm": 323.3243103027344, "learning_rate": 5.030368542266013e-06, "loss": 23.0533, "step": 274130 }, { "epoch": 0.5537801443941224, "grad_norm": 266.3387451171875, "learning_rate": 5.030019482780459e-06, "loss": 17.7409, "step": 274140 }, { "epoch": 0.5538003450268062, "grad_norm": 554.4190673828125, "learning_rate": 5.029670423148595e-06, "loss": 20.1742, "step": 274150 }, { "epoch": 0.55382054565949, "grad_norm": 416.7316589355469, "learning_rate": 5.029321363372119e-06, "loss": 12.3216, "step": 274160 }, { "epoch": 0.5538407462921738, "grad_norm": 202.31741333007812, "learning_rate": 5.0289723034527345e-06, "loss": 14.2126, "step": 274170 }, { "epoch": 0.5538609469248577, "grad_norm": 404.17218017578125, "learning_rate": 5.028623243392143e-06, "loss": 17.525, "step": 274180 }, { "epoch": 0.5538811475575415, "grad_norm": 322.6646728515625, "learning_rate": 5.028274183192046e-06, "loss": 31.919, "step": 274190 }, { "epoch": 0.5539013481902253, "grad_norm": 289.2823181152344, "learning_rate": 5.027925122854141e-06, "loss": 14.252, "step": 274200 }, { "epoch": 0.5539215488229091, "grad_norm": 418.2778625488281, "learning_rate": 5.027576062380136e-06, "loss": 22.0489, "step": 274210 }, { "epoch": 0.5539417494555929, "grad_norm": 10.499457359313965, "learning_rate": 5.027227001771727e-06, "loss": 7.1966, "step": 274220 }, { "epoch": 0.5539619500882768, "grad_norm": 438.4384765625, "learning_rate": 5.0268779410306164e-06, "loss": 27.7584, "step": 274230 }, { "epoch": 0.5539821507209606, "grad_norm": 186.43426513671875, "learning_rate": 5.026528880158508e-06, "loss": 23.429, "step": 274240 }, { "epoch": 0.5540023513536444, "grad_norm": 91.04292297363281, "learning_rate": 5.0261798191570975e-06, "loss": 25.9122, "step": 274250 }, { "epoch": 0.5540225519863282, "grad_norm": 262.6781921386719, "learning_rate": 5.025830758028093e-06, "loss": 21.2346, "step": 274260 }, { "epoch": 0.554042752619012, "grad_norm": 77.18472290039062, "learning_rate": 5.025481696773191e-06, "loss": 9.5684, "step": 274270 }, { "epoch": 0.5540629532516959, "grad_norm": 402.70843505859375, "learning_rate": 5.025132635394095e-06, "loss": 20.0877, "step": 274280 }, { "epoch": 0.5540831538843797, "grad_norm": 854.1192016601562, "learning_rate": 5.024783573892505e-06, "loss": 32.3839, "step": 274290 }, { "epoch": 0.5541033545170635, "grad_norm": 349.13006591796875, "learning_rate": 5.024434512270123e-06, "loss": 12.7315, "step": 274300 }, { "epoch": 0.5541235551497473, "grad_norm": 135.63656616210938, "learning_rate": 5.0240854505286505e-06, "loss": 9.3104, "step": 274310 }, { "epoch": 0.5541437557824311, "grad_norm": 224.0810546875, "learning_rate": 5.023736388669789e-06, "loss": 16.8324, "step": 274320 }, { "epoch": 0.554163956415115, "grad_norm": 302.9511413574219, "learning_rate": 5.023387326695238e-06, "loss": 10.6159, "step": 274330 }, { "epoch": 0.5541841570477988, "grad_norm": 166.3214874267578, "learning_rate": 5.0230382646067e-06, "loss": 14.9477, "step": 274340 }, { "epoch": 0.5542043576804826, "grad_norm": 510.76434326171875, "learning_rate": 5.0226892024058785e-06, "loss": 30.4824, "step": 274350 }, { "epoch": 0.5542245583131664, "grad_norm": 421.52886962890625, "learning_rate": 5.022340140094469e-06, "loss": 17.575, "step": 274360 }, { "epoch": 0.5542447589458502, "grad_norm": 178.84568786621094, "learning_rate": 5.021991077674179e-06, "loss": 11.0027, "step": 274370 }, { "epoch": 0.5542649595785339, "grad_norm": 325.80938720703125, "learning_rate": 5.021642015146705e-06, "loss": 22.9005, "step": 274380 }, { "epoch": 0.5542851602112178, "grad_norm": 300.5986328125, "learning_rate": 5.021292952513752e-06, "loss": 23.1485, "step": 274390 }, { "epoch": 0.5543053608439016, "grad_norm": 243.4246826171875, "learning_rate": 5.0209438897770205e-06, "loss": 19.702, "step": 274400 }, { "epoch": 0.5543255614765854, "grad_norm": 275.55133056640625, "learning_rate": 5.020594826938209e-06, "loss": 33.3538, "step": 274410 }, { "epoch": 0.5543457621092692, "grad_norm": 287.3623046875, "learning_rate": 5.020245763999024e-06, "loss": 23.5659, "step": 274420 }, { "epoch": 0.554365962741953, "grad_norm": 125.89869689941406, "learning_rate": 5.01989670096116e-06, "loss": 26.1372, "step": 274430 }, { "epoch": 0.5543861633746369, "grad_norm": 294.7071838378906, "learning_rate": 5.019547637826323e-06, "loss": 25.2957, "step": 274440 }, { "epoch": 0.5544063640073207, "grad_norm": 111.70234680175781, "learning_rate": 5.019198574596213e-06, "loss": 14.944, "step": 274450 }, { "epoch": 0.5544265646400045, "grad_norm": 65.61572265625, "learning_rate": 5.018849511272532e-06, "loss": 27.3464, "step": 274460 }, { "epoch": 0.5544467652726883, "grad_norm": 235.71885681152344, "learning_rate": 5.018500447856981e-06, "loss": 20.1556, "step": 274470 }, { "epoch": 0.5544669659053721, "grad_norm": 281.04705810546875, "learning_rate": 5.0181513843512615e-06, "loss": 11.6232, "step": 274480 }, { "epoch": 0.554487166538056, "grad_norm": 170.74488830566406, "learning_rate": 5.017802320757073e-06, "loss": 27.4504, "step": 274490 }, { "epoch": 0.5545073671707398, "grad_norm": 122.58528900146484, "learning_rate": 5.0174532570761194e-06, "loss": 21.1881, "step": 274500 }, { "epoch": 0.5545275678034236, "grad_norm": 348.4722595214844, "learning_rate": 5.0171041933101e-06, "loss": 29.3161, "step": 274510 }, { "epoch": 0.5545477684361074, "grad_norm": 381.2569580078125, "learning_rate": 5.016755129460717e-06, "loss": 25.372, "step": 274520 }, { "epoch": 0.5545679690687912, "grad_norm": 202.76710510253906, "learning_rate": 5.016406065529672e-06, "loss": 23.1021, "step": 274530 }, { "epoch": 0.5545881697014751, "grad_norm": 233.45298767089844, "learning_rate": 5.016057001518664e-06, "loss": 21.2906, "step": 274540 }, { "epoch": 0.5546083703341589, "grad_norm": 247.58636474609375, "learning_rate": 5.015707937429398e-06, "loss": 16.6146, "step": 274550 }, { "epoch": 0.5546285709668427, "grad_norm": 136.72415161132812, "learning_rate": 5.0153588732635734e-06, "loss": 33.585, "step": 274560 }, { "epoch": 0.5546487715995265, "grad_norm": 56.595794677734375, "learning_rate": 5.015009809022891e-06, "loss": 31.2656, "step": 274570 }, { "epoch": 0.5546689722322103, "grad_norm": 189.0404815673828, "learning_rate": 5.014660744709053e-06, "loss": 17.3477, "step": 274580 }, { "epoch": 0.5546891728648942, "grad_norm": 341.2669372558594, "learning_rate": 5.014311680323759e-06, "loss": 27.2617, "step": 274590 }, { "epoch": 0.554709373497578, "grad_norm": 346.0419921875, "learning_rate": 5.013962615868714e-06, "loss": 24.382, "step": 274600 }, { "epoch": 0.5547295741302618, "grad_norm": 74.92779541015625, "learning_rate": 5.013613551345614e-06, "loss": 7.191, "step": 274610 }, { "epoch": 0.5547497747629456, "grad_norm": 145.8495635986328, "learning_rate": 5.013264486756165e-06, "loss": 15.8293, "step": 274620 }, { "epoch": 0.5547699753956294, "grad_norm": 76.50386047363281, "learning_rate": 5.012915422102066e-06, "loss": 12.8591, "step": 274630 }, { "epoch": 0.5547901760283132, "grad_norm": 544.4208984375, "learning_rate": 5.0125663573850204e-06, "loss": 17.4789, "step": 274640 }, { "epoch": 0.554810376660997, "grad_norm": 98.98028564453125, "learning_rate": 5.012217292606726e-06, "loss": 24.0937, "step": 274650 }, { "epoch": 0.5548305772936808, "grad_norm": 0.0, "learning_rate": 5.011868227768886e-06, "loss": 12.9601, "step": 274660 }, { "epoch": 0.5548507779263646, "grad_norm": 324.67529296875, "learning_rate": 5.011519162873202e-06, "loss": 19.3166, "step": 274670 }, { "epoch": 0.5548709785590484, "grad_norm": 183.0932159423828, "learning_rate": 5.011170097921375e-06, "loss": 9.6509, "step": 274680 }, { "epoch": 0.5548911791917323, "grad_norm": 401.04144287109375, "learning_rate": 5.010821032915108e-06, "loss": 15.6088, "step": 274690 }, { "epoch": 0.5549113798244161, "grad_norm": 569.4534301757812, "learning_rate": 5.010471967856096e-06, "loss": 21.036, "step": 274700 }, { "epoch": 0.5549315804570999, "grad_norm": 525.4457397460938, "learning_rate": 5.01012290274605e-06, "loss": 18.7589, "step": 274710 }, { "epoch": 0.5549517810897837, "grad_norm": 418.6214599609375, "learning_rate": 5.009773837586663e-06, "loss": 12.5092, "step": 274720 }, { "epoch": 0.5549719817224675, "grad_norm": 350.9180603027344, "learning_rate": 5.0094247723796405e-06, "loss": 16.5724, "step": 274730 }, { "epoch": 0.5549921823551514, "grad_norm": 192.02687072753906, "learning_rate": 5.009075707126684e-06, "loss": 17.4676, "step": 274740 }, { "epoch": 0.5550123829878352, "grad_norm": 301.23590087890625, "learning_rate": 5.008726641829492e-06, "loss": 33.282, "step": 274750 }, { "epoch": 0.555032583620519, "grad_norm": 35.92389678955078, "learning_rate": 5.008377576489769e-06, "loss": 13.1454, "step": 274760 }, { "epoch": 0.5550527842532028, "grad_norm": 1060.649169921875, "learning_rate": 5.008028511109213e-06, "loss": 10.1357, "step": 274770 }, { "epoch": 0.5550729848858866, "grad_norm": 408.6298828125, "learning_rate": 5.007679445689527e-06, "loss": 9.7646, "step": 274780 }, { "epoch": 0.5550931855185705, "grad_norm": 261.1011657714844, "learning_rate": 5.007330380232414e-06, "loss": 20.8869, "step": 274790 }, { "epoch": 0.5551133861512543, "grad_norm": 478.0770263671875, "learning_rate": 5.006981314739573e-06, "loss": 10.4074, "step": 274800 }, { "epoch": 0.5551335867839381, "grad_norm": 369.41009521484375, "learning_rate": 5.0066322492127036e-06, "loss": 38.8577, "step": 274810 }, { "epoch": 0.5551537874166219, "grad_norm": 286.3626708984375, "learning_rate": 5.006283183653513e-06, "loss": 16.7527, "step": 274820 }, { "epoch": 0.5551739880493057, "grad_norm": 509.7117004394531, "learning_rate": 5.005934118063697e-06, "loss": 28.9102, "step": 274830 }, { "epoch": 0.5551941886819896, "grad_norm": 464.114501953125, "learning_rate": 5.005585052444959e-06, "loss": 17.4176, "step": 274840 }, { "epoch": 0.5552143893146734, "grad_norm": 105.73057556152344, "learning_rate": 5.005235986799001e-06, "loss": 25.7034, "step": 274850 }, { "epoch": 0.5552345899473572, "grad_norm": 268.55218505859375, "learning_rate": 5.004886921127521e-06, "loss": 21.8714, "step": 274860 }, { "epoch": 0.555254790580041, "grad_norm": 387.05755615234375, "learning_rate": 5.0045378554322256e-06, "loss": 28.5721, "step": 274870 }, { "epoch": 0.5552749912127248, "grad_norm": 407.1351623535156, "learning_rate": 5.004188789714811e-06, "loss": 14.7272, "step": 274880 }, { "epoch": 0.5552951918454087, "grad_norm": 337.451171875, "learning_rate": 5.003839723976982e-06, "loss": 14.9373, "step": 274890 }, { "epoch": 0.5553153924780924, "grad_norm": 3.197208881378174, "learning_rate": 5.003490658220438e-06, "loss": 16.4629, "step": 274900 }, { "epoch": 0.5553355931107762, "grad_norm": 93.28102111816406, "learning_rate": 5.0031415924468816e-06, "loss": 11.4526, "step": 274910 }, { "epoch": 0.55535579374346, "grad_norm": 515.8828735351562, "learning_rate": 5.002792526658015e-06, "loss": 22.7259, "step": 274920 }, { "epoch": 0.5553759943761438, "grad_norm": 808.0952758789062, "learning_rate": 5.002443460855535e-06, "loss": 34.737, "step": 274930 }, { "epoch": 0.5553961950088276, "grad_norm": 262.9781799316406, "learning_rate": 5.002094395041147e-06, "loss": 13.5016, "step": 274940 }, { "epoch": 0.5554163956415115, "grad_norm": 210.69570922851562, "learning_rate": 5.001745329216551e-06, "loss": 11.8395, "step": 274950 }, { "epoch": 0.5554365962741953, "grad_norm": 0.0, "learning_rate": 5.00139626338345e-06, "loss": 18.2038, "step": 274960 }, { "epoch": 0.5554567969068791, "grad_norm": 149.49984741210938, "learning_rate": 5.00104719754354e-06, "loss": 37.0924, "step": 274970 }, { "epoch": 0.5554769975395629, "grad_norm": 360.1336669921875, "learning_rate": 5.000698131698531e-06, "loss": 28.0023, "step": 274980 }, { "epoch": 0.5554971981722467, "grad_norm": 548.3187255859375, "learning_rate": 5.000349065850117e-06, "loss": 31.5242, "step": 274990 }, { "epoch": 0.5555173988049306, "grad_norm": 299.048583984375, "learning_rate": 5e-06, "loss": 26.5229, "step": 275000 }, { "epoch": 0.5555375994376144, "grad_norm": 172.84494018554688, "learning_rate": 4.999650934149885e-06, "loss": 7.0285, "step": 275010 }, { "epoch": 0.5555578000702982, "grad_norm": 471.4140319824219, "learning_rate": 4.999301868301472e-06, "loss": 20.8972, "step": 275020 }, { "epoch": 0.555578000702982, "grad_norm": 213.53451538085938, "learning_rate": 4.9989528024564606e-06, "loss": 14.2753, "step": 275030 }, { "epoch": 0.5555982013356658, "grad_norm": 66.3450927734375, "learning_rate": 4.998603736616552e-06, "loss": 20.6154, "step": 275040 }, { "epoch": 0.5556184019683497, "grad_norm": 385.82574462890625, "learning_rate": 4.9982546707834514e-06, "loss": 12.0293, "step": 275050 }, { "epoch": 0.5556386026010335, "grad_norm": 167.15696716308594, "learning_rate": 4.9979056049588545e-06, "loss": 13.4257, "step": 275060 }, { "epoch": 0.5556588032337173, "grad_norm": 510.8609924316406, "learning_rate": 4.997556539144467e-06, "loss": 19.8228, "step": 275070 }, { "epoch": 0.5556790038664011, "grad_norm": 193.69122314453125, "learning_rate": 4.9972074733419875e-06, "loss": 18.476, "step": 275080 }, { "epoch": 0.5556992044990849, "grad_norm": 118.71813201904297, "learning_rate": 4.996858407553119e-06, "loss": 15.05, "step": 275090 }, { "epoch": 0.5557194051317688, "grad_norm": 337.19537353515625, "learning_rate": 4.996509341779563e-06, "loss": 19.9766, "step": 275100 }, { "epoch": 0.5557396057644526, "grad_norm": 505.9773254394531, "learning_rate": 4.996160276023018e-06, "loss": 41.889, "step": 275110 }, { "epoch": 0.5557598063971364, "grad_norm": 84.14517974853516, "learning_rate": 4.99581121028519e-06, "loss": 22.4179, "step": 275120 }, { "epoch": 0.5557800070298202, "grad_norm": 277.0834655761719, "learning_rate": 4.995462144567776e-06, "loss": 14.1062, "step": 275130 }, { "epoch": 0.555800207662504, "grad_norm": 362.31610107421875, "learning_rate": 4.9951130788724796e-06, "loss": 10.409, "step": 275140 }, { "epoch": 0.5558204082951878, "grad_norm": 286.32879638671875, "learning_rate": 4.994764013201002e-06, "loss": 10.3228, "step": 275150 }, { "epoch": 0.5558406089278716, "grad_norm": 245.91494750976562, "learning_rate": 4.994414947555043e-06, "loss": 25.7714, "step": 275160 }, { "epoch": 0.5558608095605554, "grad_norm": 452.76751708984375, "learning_rate": 4.994065881936305e-06, "loss": 30.5144, "step": 275170 }, { "epoch": 0.5558810101932392, "grad_norm": 240.3202667236328, "learning_rate": 4.99371681634649e-06, "loss": 23.1311, "step": 275180 }, { "epoch": 0.555901210825923, "grad_norm": 85.14454650878906, "learning_rate": 4.993367750787297e-06, "loss": 12.454, "step": 275190 }, { "epoch": 0.5559214114586069, "grad_norm": 207.3945770263672, "learning_rate": 4.993018685260428e-06, "loss": 16.1688, "step": 275200 }, { "epoch": 0.5559416120912907, "grad_norm": 244.25575256347656, "learning_rate": 4.992669619767589e-06, "loss": 12.958, "step": 275210 }, { "epoch": 0.5559618127239745, "grad_norm": 10.216423034667969, "learning_rate": 4.992320554310474e-06, "loss": 46.9581, "step": 275220 }, { "epoch": 0.5559820133566583, "grad_norm": 264.2204284667969, "learning_rate": 4.991971488890789e-06, "loss": 31.3653, "step": 275230 }, { "epoch": 0.5560022139893421, "grad_norm": 112.26936340332031, "learning_rate": 4.991622423510233e-06, "loss": 25.2175, "step": 275240 }, { "epoch": 0.556022414622026, "grad_norm": 245.9047393798828, "learning_rate": 4.9912733581705095e-06, "loss": 8.9174, "step": 275250 }, { "epoch": 0.5560426152547098, "grad_norm": 184.85629272460938, "learning_rate": 4.9909242928733185e-06, "loss": 24.1314, "step": 275260 }, { "epoch": 0.5560628158873936, "grad_norm": 292.9455871582031, "learning_rate": 4.990575227620359e-06, "loss": 25.1783, "step": 275270 }, { "epoch": 0.5560830165200774, "grad_norm": 373.4295654296875, "learning_rate": 4.990226162413338e-06, "loss": 31.2988, "step": 275280 }, { "epoch": 0.5561032171527612, "grad_norm": 0.0, "learning_rate": 4.989877097253952e-06, "loss": 11.0351, "step": 275290 }, { "epoch": 0.556123417785445, "grad_norm": 434.8949890136719, "learning_rate": 4.9895280321439036e-06, "loss": 9.6418, "step": 275300 }, { "epoch": 0.5561436184181289, "grad_norm": 458.89813232421875, "learning_rate": 4.989178967084896e-06, "loss": 21.2986, "step": 275310 }, { "epoch": 0.5561638190508127, "grad_norm": 25.408029556274414, "learning_rate": 4.9888299020786265e-06, "loss": 19.2745, "step": 275320 }, { "epoch": 0.5561840196834965, "grad_norm": 486.03912353515625, "learning_rate": 4.9884808371268e-06, "loss": 30.3504, "step": 275330 }, { "epoch": 0.5562042203161803, "grad_norm": 144.64817810058594, "learning_rate": 4.9881317722311165e-06, "loss": 6.3565, "step": 275340 }, { "epoch": 0.5562244209488642, "grad_norm": 233.3751678466797, "learning_rate": 4.987782707393276e-06, "loss": 13.277, "step": 275350 }, { "epoch": 0.556244621581548, "grad_norm": 346.3904113769531, "learning_rate": 4.987433642614981e-06, "loss": 20.0665, "step": 275360 }, { "epoch": 0.5562648222142318, "grad_norm": 5.109986305236816, "learning_rate": 4.987084577897936e-06, "loss": 12.8083, "step": 275370 }, { "epoch": 0.5562850228469156, "grad_norm": 91.89998626708984, "learning_rate": 4.986735513243836e-06, "loss": 6.8216, "step": 275380 }, { "epoch": 0.5563052234795994, "grad_norm": 183.62379455566406, "learning_rate": 4.9863864486543865e-06, "loss": 27.2318, "step": 275390 }, { "epoch": 0.5563254241122833, "grad_norm": 410.5458679199219, "learning_rate": 4.986037384131288e-06, "loss": 13.0085, "step": 275400 }, { "epoch": 0.556345624744967, "grad_norm": 590.7412719726562, "learning_rate": 4.985688319676242e-06, "loss": 18.4312, "step": 275410 }, { "epoch": 0.5563658253776508, "grad_norm": 152.92845153808594, "learning_rate": 4.985339255290948e-06, "loss": 8.1017, "step": 275420 }, { "epoch": 0.5563860260103346, "grad_norm": 261.1161193847656, "learning_rate": 4.98499019097711e-06, "loss": 12.2067, "step": 275430 }, { "epoch": 0.5564062266430184, "grad_norm": 177.15081787109375, "learning_rate": 4.984641126736428e-06, "loss": 15.176, "step": 275440 }, { "epoch": 0.5564264272757022, "grad_norm": 417.4114685058594, "learning_rate": 4.984292062570603e-06, "loss": 35.748, "step": 275450 }, { "epoch": 0.5564466279083861, "grad_norm": 313.15948486328125, "learning_rate": 4.983942998481336e-06, "loss": 13.2914, "step": 275460 }, { "epoch": 0.5564668285410699, "grad_norm": 187.69981384277344, "learning_rate": 4.9835939344703305e-06, "loss": 20.7104, "step": 275470 }, { "epoch": 0.5564870291737537, "grad_norm": 119.696533203125, "learning_rate": 4.983244870539284e-06, "loss": 21.2059, "step": 275480 }, { "epoch": 0.5565072298064375, "grad_norm": 6.533258438110352, "learning_rate": 4.9828958066899e-06, "loss": 6.0762, "step": 275490 }, { "epoch": 0.5565274304391213, "grad_norm": 280.7604675292969, "learning_rate": 4.982546742923883e-06, "loss": 12.7893, "step": 275500 }, { "epoch": 0.5565476310718052, "grad_norm": 143.6747589111328, "learning_rate": 4.9821976792429274e-06, "loss": 10.3919, "step": 275510 }, { "epoch": 0.556567831704489, "grad_norm": 176.18760681152344, "learning_rate": 4.981848615648739e-06, "loss": 17.4275, "step": 275520 }, { "epoch": 0.5565880323371728, "grad_norm": 205.65599060058594, "learning_rate": 4.9814995521430195e-06, "loss": 19.7357, "step": 275530 }, { "epoch": 0.5566082329698566, "grad_norm": 313.3788757324219, "learning_rate": 4.981150488727469e-06, "loss": 11.8917, "step": 275540 }, { "epoch": 0.5566284336025404, "grad_norm": 209.7315673828125, "learning_rate": 4.980801425403788e-06, "loss": 16.1987, "step": 275550 }, { "epoch": 0.5566486342352243, "grad_norm": 194.6985321044922, "learning_rate": 4.980452362173676e-06, "loss": 17.4527, "step": 275560 }, { "epoch": 0.5566688348679081, "grad_norm": 187.01988220214844, "learning_rate": 4.980103299038842e-06, "loss": 11.1597, "step": 275570 }, { "epoch": 0.5566890355005919, "grad_norm": 878.7315063476562, "learning_rate": 4.979754236000978e-06, "loss": 25.5012, "step": 275580 }, { "epoch": 0.5567092361332757, "grad_norm": 334.393798828125, "learning_rate": 4.979405173061791e-06, "loss": 12.592, "step": 275590 }, { "epoch": 0.5567294367659595, "grad_norm": 311.16864013671875, "learning_rate": 4.979056110222982e-06, "loss": 20.0491, "step": 275600 }, { "epoch": 0.5567496373986434, "grad_norm": 275.6115417480469, "learning_rate": 4.978707047486249e-06, "loss": 20.8084, "step": 275610 }, { "epoch": 0.5567698380313272, "grad_norm": 490.4664001464844, "learning_rate": 4.978357984853296e-06, "loss": 14.9251, "step": 275620 }, { "epoch": 0.556790038664011, "grad_norm": 222.76907348632812, "learning_rate": 4.9780089223258235e-06, "loss": 17.9937, "step": 275630 }, { "epoch": 0.5568102392966948, "grad_norm": 1.633421778678894, "learning_rate": 4.977659859905532e-06, "loss": 26.796, "step": 275640 }, { "epoch": 0.5568304399293786, "grad_norm": 193.53114318847656, "learning_rate": 4.977310797594124e-06, "loss": 11.1051, "step": 275650 }, { "epoch": 0.5568506405620623, "grad_norm": 109.830322265625, "learning_rate": 4.9769617353933025e-06, "loss": 14.5558, "step": 275660 }, { "epoch": 0.5568708411947462, "grad_norm": 9.360294342041016, "learning_rate": 4.976612673304764e-06, "loss": 18.2348, "step": 275670 }, { "epoch": 0.55689104182743, "grad_norm": 223.71148681640625, "learning_rate": 4.976263611330213e-06, "loss": 17.5995, "step": 275680 }, { "epoch": 0.5569112424601138, "grad_norm": 425.35308837890625, "learning_rate": 4.97591454947135e-06, "loss": 10.625, "step": 275690 }, { "epoch": 0.5569314430927976, "grad_norm": 342.57586669921875, "learning_rate": 4.975565487729879e-06, "loss": 12.8923, "step": 275700 }, { "epoch": 0.5569516437254814, "grad_norm": 342.2311706542969, "learning_rate": 4.9752164261074964e-06, "loss": 37.3864, "step": 275710 }, { "epoch": 0.5569718443581653, "grad_norm": 0.5458325743675232, "learning_rate": 4.974867364605906e-06, "loss": 14.3001, "step": 275720 }, { "epoch": 0.5569920449908491, "grad_norm": 471.9128112792969, "learning_rate": 4.97451830322681e-06, "loss": 24.6937, "step": 275730 }, { "epoch": 0.5570122456235329, "grad_norm": 423.0294494628906, "learning_rate": 4.974169241971908e-06, "loss": 20.9917, "step": 275740 }, { "epoch": 0.5570324462562167, "grad_norm": 8.86659049987793, "learning_rate": 4.9738201808429025e-06, "loss": 9.7589, "step": 275750 }, { "epoch": 0.5570526468889005, "grad_norm": 378.8095397949219, "learning_rate": 4.973471119841495e-06, "loss": 26.8174, "step": 275760 }, { "epoch": 0.5570728475215844, "grad_norm": 360.4798583984375, "learning_rate": 4.973122058969384e-06, "loss": 22.0355, "step": 275770 }, { "epoch": 0.5570930481542682, "grad_norm": 268.79180908203125, "learning_rate": 4.972772998228274e-06, "loss": 15.5028, "step": 275780 }, { "epoch": 0.557113248786952, "grad_norm": 675.5296630859375, "learning_rate": 4.972423937619866e-06, "loss": 20.3552, "step": 275790 }, { "epoch": 0.5571334494196358, "grad_norm": 276.287109375, "learning_rate": 4.9720748771458595e-06, "loss": 12.7121, "step": 275800 }, { "epoch": 0.5571536500523196, "grad_norm": 83.33101654052734, "learning_rate": 4.971725816807956e-06, "loss": 20.1748, "step": 275810 }, { "epoch": 0.5571738506850035, "grad_norm": 277.43780517578125, "learning_rate": 4.97137675660786e-06, "loss": 17.0486, "step": 275820 }, { "epoch": 0.5571940513176873, "grad_norm": 368.84039306640625, "learning_rate": 4.971027696547266e-06, "loss": 30.0573, "step": 275830 }, { "epoch": 0.5572142519503711, "grad_norm": 429.3113098144531, "learning_rate": 4.970678636627882e-06, "loss": 24.5154, "step": 275840 }, { "epoch": 0.5572344525830549, "grad_norm": 573.2977294921875, "learning_rate": 4.970329576851406e-06, "loss": 16.2653, "step": 275850 }, { "epoch": 0.5572546532157387, "grad_norm": 83.22220611572266, "learning_rate": 4.969980517219542e-06, "loss": 9.71, "step": 275860 }, { "epoch": 0.5572748538484226, "grad_norm": 591.4146118164062, "learning_rate": 4.969631457733988e-06, "loss": 26.05, "step": 275870 }, { "epoch": 0.5572950544811064, "grad_norm": 147.2950439453125, "learning_rate": 4.969282398396445e-06, "loss": 12.0743, "step": 275880 }, { "epoch": 0.5573152551137902, "grad_norm": 187.12771606445312, "learning_rate": 4.96893333920862e-06, "loss": 16.5335, "step": 275890 }, { "epoch": 0.557335455746474, "grad_norm": 173.2279510498047, "learning_rate": 4.968584280172206e-06, "loss": 17.651, "step": 275900 }, { "epoch": 0.5573556563791578, "grad_norm": 266.6116027832031, "learning_rate": 4.968235221288909e-06, "loss": 17.3757, "step": 275910 }, { "epoch": 0.5573758570118416, "grad_norm": 169.33193969726562, "learning_rate": 4.967886162560432e-06, "loss": 30.6125, "step": 275920 }, { "epoch": 0.5573960576445254, "grad_norm": 133.37158203125, "learning_rate": 4.967537103988472e-06, "loss": 32.7901, "step": 275930 }, { "epoch": 0.5574162582772092, "grad_norm": 447.1186828613281, "learning_rate": 4.967188045574733e-06, "loss": 20.5911, "step": 275940 }, { "epoch": 0.557436458909893, "grad_norm": 254.47415161132812, "learning_rate": 4.966838987320916e-06, "loss": 17.3635, "step": 275950 }, { "epoch": 0.5574566595425768, "grad_norm": 323.95068359375, "learning_rate": 4.966489929228721e-06, "loss": 27.2687, "step": 275960 }, { "epoch": 0.5574768601752607, "grad_norm": 635.6312255859375, "learning_rate": 4.966140871299849e-06, "loss": 18.7075, "step": 275970 }, { "epoch": 0.5574970608079445, "grad_norm": 261.3988342285156, "learning_rate": 4.965791813536004e-06, "loss": 22.48, "step": 275980 }, { "epoch": 0.5575172614406283, "grad_norm": 443.0406799316406, "learning_rate": 4.9654427559388845e-06, "loss": 15.7236, "step": 275990 }, { "epoch": 0.5575374620733121, "grad_norm": 288.59149169921875, "learning_rate": 4.965093698510192e-06, "loss": 12.5972, "step": 276000 }, { "epoch": 0.5575576627059959, "grad_norm": 522.8019409179688, "learning_rate": 4.96474464125163e-06, "loss": 12.5414, "step": 276010 }, { "epoch": 0.5575778633386798, "grad_norm": 343.75445556640625, "learning_rate": 4.964395584164899e-06, "loss": 15.7494, "step": 276020 }, { "epoch": 0.5575980639713636, "grad_norm": 311.196044921875, "learning_rate": 4.964046527251698e-06, "loss": 32.3373, "step": 276030 }, { "epoch": 0.5576182646040474, "grad_norm": 863.6561279296875, "learning_rate": 4.9636974705137305e-06, "loss": 19.1722, "step": 276040 }, { "epoch": 0.5576384652367312, "grad_norm": 741.5858154296875, "learning_rate": 4.9633484139526975e-06, "loss": 16.5389, "step": 276050 }, { "epoch": 0.557658665869415, "grad_norm": 362.6703796386719, "learning_rate": 4.9629993575702995e-06, "loss": 31.5292, "step": 276060 }, { "epoch": 0.5576788665020989, "grad_norm": 111.65090942382812, "learning_rate": 4.962650301368238e-06, "loss": 11.8779, "step": 276070 }, { "epoch": 0.5576990671347827, "grad_norm": 286.2112121582031, "learning_rate": 4.962301245348215e-06, "loss": 15.2731, "step": 276080 }, { "epoch": 0.5577192677674665, "grad_norm": 246.57623291015625, "learning_rate": 4.961952189511932e-06, "loss": 17.6612, "step": 276090 }, { "epoch": 0.5577394684001503, "grad_norm": 290.2296142578125, "learning_rate": 4.961603133861086e-06, "loss": 13.3983, "step": 276100 }, { "epoch": 0.5577596690328341, "grad_norm": 74.03767395019531, "learning_rate": 4.961254078397386e-06, "loss": 19.3411, "step": 276110 }, { "epoch": 0.557779869665518, "grad_norm": 29.67168426513672, "learning_rate": 4.960905023122526e-06, "loss": 18.0837, "step": 276120 }, { "epoch": 0.5578000702982018, "grad_norm": 171.3652801513672, "learning_rate": 4.9605559680382104e-06, "loss": 17.9385, "step": 276130 }, { "epoch": 0.5578202709308856, "grad_norm": 493.3277282714844, "learning_rate": 4.960206913146141e-06, "loss": 13.3611, "step": 276140 }, { "epoch": 0.5578404715635694, "grad_norm": 266.99627685546875, "learning_rate": 4.9598578584480186e-06, "loss": 7.4088, "step": 276150 }, { "epoch": 0.5578606721962532, "grad_norm": 22.441242218017578, "learning_rate": 4.959508803945543e-06, "loss": 11.7688, "step": 276160 }, { "epoch": 0.5578808728289371, "grad_norm": 61.81486892700195, "learning_rate": 4.9591597496404165e-06, "loss": 8.9547, "step": 276170 }, { "epoch": 0.5579010734616208, "grad_norm": 150.5062713623047, "learning_rate": 4.958810695534343e-06, "loss": 9.9549, "step": 276180 }, { "epoch": 0.5579212740943046, "grad_norm": 228.0824737548828, "learning_rate": 4.958461641629018e-06, "loss": 14.4607, "step": 276190 }, { "epoch": 0.5579414747269884, "grad_norm": 126.62825775146484, "learning_rate": 4.9581125879261476e-06, "loss": 10.2002, "step": 276200 }, { "epoch": 0.5579616753596722, "grad_norm": 178.60145568847656, "learning_rate": 4.957763534427431e-06, "loss": 22.2349, "step": 276210 }, { "epoch": 0.557981875992356, "grad_norm": 259.2889099121094, "learning_rate": 4.9574144811345695e-06, "loss": 12.3807, "step": 276220 }, { "epoch": 0.5580020766250399, "grad_norm": 417.0218505859375, "learning_rate": 4.957065428049265e-06, "loss": 13.7907, "step": 276230 }, { "epoch": 0.5580222772577237, "grad_norm": 205.4576873779297, "learning_rate": 4.956716375173219e-06, "loss": 26.9122, "step": 276240 }, { "epoch": 0.5580424778904075, "grad_norm": 181.435546875, "learning_rate": 4.956367322508131e-06, "loss": 11.1796, "step": 276250 }, { "epoch": 0.5580626785230913, "grad_norm": 256.149658203125, "learning_rate": 4.956018270055703e-06, "loss": 24.0192, "step": 276260 }, { "epoch": 0.5580828791557751, "grad_norm": 0.08139084279537201, "learning_rate": 4.9556692178176395e-06, "loss": 15.2448, "step": 276270 }, { "epoch": 0.558103079788459, "grad_norm": 200.0952606201172, "learning_rate": 4.955320165795636e-06, "loss": 17.302, "step": 276280 }, { "epoch": 0.5581232804211428, "grad_norm": 150.37730407714844, "learning_rate": 4.954971113991397e-06, "loss": 15.0633, "step": 276290 }, { "epoch": 0.5581434810538266, "grad_norm": 83.9626693725586, "learning_rate": 4.954622062406623e-06, "loss": 24.1653, "step": 276300 }, { "epoch": 0.5581636816865104, "grad_norm": 239.12542724609375, "learning_rate": 4.954273011043018e-06, "loss": 24.0703, "step": 276310 }, { "epoch": 0.5581838823191942, "grad_norm": 416.95379638671875, "learning_rate": 4.9539239599022784e-06, "loss": 19.8419, "step": 276320 }, { "epoch": 0.5582040829518781, "grad_norm": 267.5330810546875, "learning_rate": 4.953574908986108e-06, "loss": 23.1466, "step": 276330 }, { "epoch": 0.5582242835845619, "grad_norm": 350.47320556640625, "learning_rate": 4.95322585829621e-06, "loss": 26.1812, "step": 276340 }, { "epoch": 0.5582444842172457, "grad_norm": 130.4193878173828, "learning_rate": 4.952876807834281e-06, "loss": 27.7858, "step": 276350 }, { "epoch": 0.5582646848499295, "grad_norm": 165.8634796142578, "learning_rate": 4.952527757602025e-06, "loss": 19.2748, "step": 276360 }, { "epoch": 0.5582848854826133, "grad_norm": 333.1188049316406, "learning_rate": 4.952178707601144e-06, "loss": 63.2994, "step": 276370 }, { "epoch": 0.5583050861152972, "grad_norm": 647.6149291992188, "learning_rate": 4.951829657833337e-06, "loss": 18.7595, "step": 276380 }, { "epoch": 0.558325286747981, "grad_norm": 278.6905517578125, "learning_rate": 4.951480608300308e-06, "loss": 14.63, "step": 276390 }, { "epoch": 0.5583454873806648, "grad_norm": 111.368408203125, "learning_rate": 4.951131559003756e-06, "loss": 15.6562, "step": 276400 }, { "epoch": 0.5583656880133486, "grad_norm": 467.6835632324219, "learning_rate": 4.950782509945383e-06, "loss": 16.9683, "step": 276410 }, { "epoch": 0.5583858886460324, "grad_norm": 324.4986572265625, "learning_rate": 4.950433461126888e-06, "loss": 17.2026, "step": 276420 }, { "epoch": 0.5584060892787162, "grad_norm": 188.76190185546875, "learning_rate": 4.950084412549978e-06, "loss": 11.6193, "step": 276430 }, { "epoch": 0.5584262899114, "grad_norm": 35.11980438232422, "learning_rate": 4.949735364216348e-06, "loss": 16.675, "step": 276440 }, { "epoch": 0.5584464905440838, "grad_norm": 188.30287170410156, "learning_rate": 4.9493863161277016e-06, "loss": 13.5002, "step": 276450 }, { "epoch": 0.5584666911767676, "grad_norm": 166.82485961914062, "learning_rate": 4.949037268285741e-06, "loss": 33.3765, "step": 276460 }, { "epoch": 0.5584868918094514, "grad_norm": 325.6683349609375, "learning_rate": 4.948688220692167e-06, "loss": 22.6485, "step": 276470 }, { "epoch": 0.5585070924421353, "grad_norm": 278.64642333984375, "learning_rate": 4.94833917334868e-06, "loss": 13.4649, "step": 276480 }, { "epoch": 0.5585272930748191, "grad_norm": 220.89254760742188, "learning_rate": 4.9479901262569795e-06, "loss": 40.8484, "step": 276490 }, { "epoch": 0.5585474937075029, "grad_norm": 129.99549865722656, "learning_rate": 4.9476410794187726e-06, "loss": 20.1284, "step": 276500 }, { "epoch": 0.5585676943401867, "grad_norm": 353.2266540527344, "learning_rate": 4.947292032835754e-06, "loss": 18.9604, "step": 276510 }, { "epoch": 0.5585878949728705, "grad_norm": 284.7356872558594, "learning_rate": 4.946942986509628e-06, "loss": 19.1195, "step": 276520 }, { "epoch": 0.5586080956055544, "grad_norm": 282.4049072265625, "learning_rate": 4.946593940442097e-06, "loss": 10.563, "step": 276530 }, { "epoch": 0.5586282962382382, "grad_norm": 70.0137710571289, "learning_rate": 4.9462448946348594e-06, "loss": 13.5357, "step": 276540 }, { "epoch": 0.558648496870922, "grad_norm": 3.5875234603881836, "learning_rate": 4.945895849089618e-06, "loss": 22.4368, "step": 276550 }, { "epoch": 0.5586686975036058, "grad_norm": 174.52244567871094, "learning_rate": 4.945546803808074e-06, "loss": 20.2856, "step": 276560 }, { "epoch": 0.5586888981362896, "grad_norm": 488.70703125, "learning_rate": 4.945197758791928e-06, "loss": 20.5878, "step": 276570 }, { "epoch": 0.5587090987689735, "grad_norm": 191.0488739013672, "learning_rate": 4.944848714042879e-06, "loss": 17.3379, "step": 276580 }, { "epoch": 0.5587292994016573, "grad_norm": 158.88970947265625, "learning_rate": 4.9444996695626325e-06, "loss": 10.8986, "step": 276590 }, { "epoch": 0.5587495000343411, "grad_norm": 85.32459259033203, "learning_rate": 4.94415062535289e-06, "loss": 17.8002, "step": 276600 }, { "epoch": 0.5587697006670249, "grad_norm": 428.6336364746094, "learning_rate": 4.943801581415348e-06, "loss": 14.0157, "step": 276610 }, { "epoch": 0.5587899012997087, "grad_norm": 317.2505187988281, "learning_rate": 4.9434525377517115e-06, "loss": 25.4891, "step": 276620 }, { "epoch": 0.5588101019323926, "grad_norm": 37.396183013916016, "learning_rate": 4.9431034943636816e-06, "loss": 27.2328, "step": 276630 }, { "epoch": 0.5588303025650764, "grad_norm": 107.27095794677734, "learning_rate": 4.942754451252957e-06, "loss": 18.4312, "step": 276640 }, { "epoch": 0.5588505031977602, "grad_norm": 11.751585960388184, "learning_rate": 4.942405408421238e-06, "loss": 9.4702, "step": 276650 }, { "epoch": 0.558870703830444, "grad_norm": 264.1418151855469, "learning_rate": 4.942056365870231e-06, "loss": 9.3747, "step": 276660 }, { "epoch": 0.5588909044631278, "grad_norm": 617.4063720703125, "learning_rate": 4.941707323601633e-06, "loss": 24.539, "step": 276670 }, { "epoch": 0.5589111050958117, "grad_norm": 19.626880645751953, "learning_rate": 4.941358281617148e-06, "loss": 22.7, "step": 276680 }, { "epoch": 0.5589313057284954, "grad_norm": 237.05990600585938, "learning_rate": 4.941009239918474e-06, "loss": 26.0639, "step": 276690 }, { "epoch": 0.5589515063611792, "grad_norm": 497.498779296875, "learning_rate": 4.940660198507315e-06, "loss": 19.7217, "step": 276700 }, { "epoch": 0.558971706993863, "grad_norm": 106.92137145996094, "learning_rate": 4.940311157385369e-06, "loss": 16.221, "step": 276710 }, { "epoch": 0.5589919076265468, "grad_norm": 0.0, "learning_rate": 4.939962116554343e-06, "loss": 20.5559, "step": 276720 }, { "epoch": 0.5590121082592306, "grad_norm": 252.7835693359375, "learning_rate": 4.93961307601593e-06, "loss": 30.2953, "step": 276730 }, { "epoch": 0.5590323088919145, "grad_norm": 278.4783630371094, "learning_rate": 4.939264035771837e-06, "loss": 12.1939, "step": 276740 }, { "epoch": 0.5590525095245983, "grad_norm": 272.07281494140625, "learning_rate": 4.938914995823764e-06, "loss": 23.7275, "step": 276750 }, { "epoch": 0.5590727101572821, "grad_norm": 553.9537963867188, "learning_rate": 4.938565956173413e-06, "loss": 23.4649, "step": 276760 }, { "epoch": 0.5590929107899659, "grad_norm": 219.12022399902344, "learning_rate": 4.938216916822483e-06, "loss": 14.8551, "step": 276770 }, { "epoch": 0.5591131114226497, "grad_norm": 122.51792907714844, "learning_rate": 4.937867877772675e-06, "loss": 16.9952, "step": 276780 }, { "epoch": 0.5591333120553336, "grad_norm": 0.0, "learning_rate": 4.937518839025695e-06, "loss": 10.4408, "step": 276790 }, { "epoch": 0.5591535126880174, "grad_norm": 101.05311584472656, "learning_rate": 4.937169800583237e-06, "loss": 11.3117, "step": 276800 }, { "epoch": 0.5591737133207012, "grad_norm": 455.2959289550781, "learning_rate": 4.936820762447007e-06, "loss": 21.4421, "step": 276810 }, { "epoch": 0.559193913953385, "grad_norm": 122.63258361816406, "learning_rate": 4.936471724618706e-06, "loss": 12.3239, "step": 276820 }, { "epoch": 0.5592141145860688, "grad_norm": 216.90130615234375, "learning_rate": 4.936122687100034e-06, "loss": 21.066, "step": 276830 }, { "epoch": 0.5592343152187527, "grad_norm": 222.79347229003906, "learning_rate": 4.93577364989269e-06, "loss": 25.8168, "step": 276840 }, { "epoch": 0.5592545158514365, "grad_norm": 322.8423156738281, "learning_rate": 4.93542461299838e-06, "loss": 14.4072, "step": 276850 }, { "epoch": 0.5592747164841203, "grad_norm": 254.0032501220703, "learning_rate": 4.935075576418802e-06, "loss": 13.762, "step": 276860 }, { "epoch": 0.5592949171168041, "grad_norm": 447.92071533203125, "learning_rate": 4.934726540155656e-06, "loss": 25.997, "step": 276870 }, { "epoch": 0.5593151177494879, "grad_norm": 217.64109802246094, "learning_rate": 4.934377504210648e-06, "loss": 23.0112, "step": 276880 }, { "epoch": 0.5593353183821718, "grad_norm": 550.6470336914062, "learning_rate": 4.934028468585473e-06, "loss": 15.1522, "step": 276890 }, { "epoch": 0.5593555190148556, "grad_norm": 221.68679809570312, "learning_rate": 4.933679433281837e-06, "loss": 17.4858, "step": 276900 }, { "epoch": 0.5593757196475394, "grad_norm": 0.0, "learning_rate": 4.933330398301438e-06, "loss": 20.6486, "step": 276910 }, { "epoch": 0.5593959202802232, "grad_norm": 181.3440399169922, "learning_rate": 4.932981363645981e-06, "loss": 25.8347, "step": 276920 }, { "epoch": 0.559416120912907, "grad_norm": 407.78955078125, "learning_rate": 4.932632329317162e-06, "loss": 15.2693, "step": 276930 }, { "epoch": 0.5594363215455908, "grad_norm": 189.6531219482422, "learning_rate": 4.9322832953166856e-06, "loss": 17.8088, "step": 276940 }, { "epoch": 0.5594565221782746, "grad_norm": 521.8555297851562, "learning_rate": 4.931934261646255e-06, "loss": 23.1037, "step": 276950 }, { "epoch": 0.5594767228109584, "grad_norm": 5.679108619689941, "learning_rate": 4.931585228307564e-06, "loss": 8.7707, "step": 276960 }, { "epoch": 0.5594969234436422, "grad_norm": 34.98332595825195, "learning_rate": 4.931236195302321e-06, "loss": 20.5484, "step": 276970 }, { "epoch": 0.559517124076326, "grad_norm": 899.7645263671875, "learning_rate": 4.930887162632225e-06, "loss": 19.9753, "step": 276980 }, { "epoch": 0.5595373247090099, "grad_norm": 312.412353515625, "learning_rate": 4.930538130298975e-06, "loss": 33.1237, "step": 276990 }, { "epoch": 0.5595575253416937, "grad_norm": 496.9939270019531, "learning_rate": 4.9301890983042744e-06, "loss": 25.8742, "step": 277000 }, { "epoch": 0.5595777259743775, "grad_norm": 309.1521911621094, "learning_rate": 4.929840066649824e-06, "loss": 19.256, "step": 277010 }, { "epoch": 0.5595979266070613, "grad_norm": 242.05531311035156, "learning_rate": 4.929491035337325e-06, "loss": 18.177, "step": 277020 }, { "epoch": 0.5596181272397451, "grad_norm": 190.0525360107422, "learning_rate": 4.929142004368475e-06, "loss": 15.252, "step": 277030 }, { "epoch": 0.559638327872429, "grad_norm": 328.8849182128906, "learning_rate": 4.928792973744983e-06, "loss": 37.2918, "step": 277040 }, { "epoch": 0.5596585285051128, "grad_norm": 495.4390869140625, "learning_rate": 4.928443943468541e-06, "loss": 15.348, "step": 277050 }, { "epoch": 0.5596787291377966, "grad_norm": 310.7992858886719, "learning_rate": 4.928094913540857e-06, "loss": 12.3787, "step": 277060 }, { "epoch": 0.5596989297704804, "grad_norm": 442.8692932128906, "learning_rate": 4.927745883963629e-06, "loss": 17.7837, "step": 277070 }, { "epoch": 0.5597191304031642, "grad_norm": 252.6629180908203, "learning_rate": 4.92739685473856e-06, "loss": 14.0357, "step": 277080 }, { "epoch": 0.559739331035848, "grad_norm": 208.44969177246094, "learning_rate": 4.927047825867349e-06, "loss": 17.5744, "step": 277090 }, { "epoch": 0.5597595316685319, "grad_norm": 11.847308158874512, "learning_rate": 4.926698797351697e-06, "loss": 17.6188, "step": 277100 }, { "epoch": 0.5597797323012157, "grad_norm": 157.04449462890625, "learning_rate": 4.926349769193308e-06, "loss": 12.0241, "step": 277110 }, { "epoch": 0.5597999329338995, "grad_norm": 449.544921875, "learning_rate": 4.92600074139388e-06, "loss": 17.4144, "step": 277120 }, { "epoch": 0.5598201335665833, "grad_norm": 493.2408142089844, "learning_rate": 4.925651713955115e-06, "loss": 25.883, "step": 277130 }, { "epoch": 0.5598403341992672, "grad_norm": 321.0693359375, "learning_rate": 4.925302686878717e-06, "loss": 24.7436, "step": 277140 }, { "epoch": 0.559860534831951, "grad_norm": 191.90711975097656, "learning_rate": 4.924953660166383e-06, "loss": 22.9805, "step": 277150 }, { "epoch": 0.5598807354646348, "grad_norm": 353.6603698730469, "learning_rate": 4.924604633819815e-06, "loss": 16.4409, "step": 277160 }, { "epoch": 0.5599009360973186, "grad_norm": 140.645751953125, "learning_rate": 4.924255607840717e-06, "loss": 20.0223, "step": 277170 }, { "epoch": 0.5599211367300024, "grad_norm": 390.0442199707031, "learning_rate": 4.923906582230786e-06, "loss": 22.79, "step": 277180 }, { "epoch": 0.5599413373626863, "grad_norm": 207.0362091064453, "learning_rate": 4.923557556991724e-06, "loss": 11.8552, "step": 277190 }, { "epoch": 0.55996153799537, "grad_norm": 177.92315673828125, "learning_rate": 4.923208532125235e-06, "loss": 23.1536, "step": 277200 }, { "epoch": 0.5599817386280538, "grad_norm": 346.44482421875, "learning_rate": 4.9228595076330196e-06, "loss": 8.9916, "step": 277210 }, { "epoch": 0.5600019392607376, "grad_norm": 0.0, "learning_rate": 4.9225104835167755e-06, "loss": 10.0352, "step": 277220 }, { "epoch": 0.5600221398934214, "grad_norm": 349.52838134765625, "learning_rate": 4.9221614597782066e-06, "loss": 10.003, "step": 277230 }, { "epoch": 0.5600423405261052, "grad_norm": 204.0720672607422, "learning_rate": 4.921812436419014e-06, "loss": 19.6391, "step": 277240 }, { "epoch": 0.5600625411587891, "grad_norm": 0.0, "learning_rate": 4.921463413440898e-06, "loss": 20.6789, "step": 277250 }, { "epoch": 0.5600827417914729, "grad_norm": 158.17068481445312, "learning_rate": 4.9211143908455575e-06, "loss": 20.8643, "step": 277260 }, { "epoch": 0.5601029424241567, "grad_norm": 231.6426544189453, "learning_rate": 4.920765368634699e-06, "loss": 10.6356, "step": 277270 }, { "epoch": 0.5601231430568405, "grad_norm": 365.8121337890625, "learning_rate": 4.920416346810019e-06, "loss": 8.3911, "step": 277280 }, { "epoch": 0.5601433436895243, "grad_norm": 574.3558349609375, "learning_rate": 4.920067325373219e-06, "loss": 27.9118, "step": 277290 }, { "epoch": 0.5601635443222082, "grad_norm": 161.2744598388672, "learning_rate": 4.9197183043260035e-06, "loss": 17.7848, "step": 277300 }, { "epoch": 0.560183744954892, "grad_norm": 116.30492401123047, "learning_rate": 4.91936928367007e-06, "loss": 21.9178, "step": 277310 }, { "epoch": 0.5602039455875758, "grad_norm": 240.8571319580078, "learning_rate": 4.919020263407121e-06, "loss": 16.5004, "step": 277320 }, { "epoch": 0.5602241462202596, "grad_norm": 164.7035369873047, "learning_rate": 4.918671243538859e-06, "loss": 13.0571, "step": 277330 }, { "epoch": 0.5602443468529434, "grad_norm": 598.8656005859375, "learning_rate": 4.91832222406698e-06, "loss": 24.1363, "step": 277340 }, { "epoch": 0.5602645474856273, "grad_norm": 0.0, "learning_rate": 4.91797320499319e-06, "loss": 10.7766, "step": 277350 }, { "epoch": 0.5602847481183111, "grad_norm": 73.9151611328125, "learning_rate": 4.9176241863191895e-06, "loss": 20.3695, "step": 277360 }, { "epoch": 0.5603049487509949, "grad_norm": 530.8099365234375, "learning_rate": 4.917275168046678e-06, "loss": 29.2862, "step": 277370 }, { "epoch": 0.5603251493836787, "grad_norm": 158.08453369140625, "learning_rate": 4.916926150177358e-06, "loss": 14.0699, "step": 277380 }, { "epoch": 0.5603453500163625, "grad_norm": 413.6506042480469, "learning_rate": 4.916577132712929e-06, "loss": 22.7805, "step": 277390 }, { "epoch": 0.5603655506490464, "grad_norm": 397.7765808105469, "learning_rate": 4.9162281156550945e-06, "loss": 23.7533, "step": 277400 }, { "epoch": 0.5603857512817302, "grad_norm": 22.954145431518555, "learning_rate": 4.915879099005552e-06, "loss": 17.063, "step": 277410 }, { "epoch": 0.560405951914414, "grad_norm": 212.639892578125, "learning_rate": 4.915530082766005e-06, "loss": 15.2453, "step": 277420 }, { "epoch": 0.5604261525470978, "grad_norm": 304.84326171875, "learning_rate": 4.915181066938156e-06, "loss": 19.4742, "step": 277430 }, { "epoch": 0.5604463531797816, "grad_norm": 105.3963394165039, "learning_rate": 4.914832051523702e-06, "loss": 11.0662, "step": 277440 }, { "epoch": 0.5604665538124654, "grad_norm": 119.39349365234375, "learning_rate": 4.9144830365243464e-06, "loss": 16.348, "step": 277450 }, { "epoch": 0.5604867544451492, "grad_norm": 520.6262817382812, "learning_rate": 4.914134021941792e-06, "loss": 21.6157, "step": 277460 }, { "epoch": 0.560506955077833, "grad_norm": 260.0318298339844, "learning_rate": 4.913785007777737e-06, "loss": 10.5007, "step": 277470 }, { "epoch": 0.5605271557105168, "grad_norm": 31.290496826171875, "learning_rate": 4.9134359940338815e-06, "loss": 22.6351, "step": 277480 }, { "epoch": 0.5605473563432006, "grad_norm": 257.4629821777344, "learning_rate": 4.913086980711932e-06, "loss": 31.9134, "step": 277490 }, { "epoch": 0.5605675569758845, "grad_norm": 183.59674072265625, "learning_rate": 4.9127379678135825e-06, "loss": 18.1462, "step": 277500 }, { "epoch": 0.5605877576085683, "grad_norm": 377.9106140136719, "learning_rate": 4.91238895534054e-06, "loss": 9.8943, "step": 277510 }, { "epoch": 0.5606079582412521, "grad_norm": 42.025794982910156, "learning_rate": 4.912039943294502e-06, "loss": 16.3456, "step": 277520 }, { "epoch": 0.5606281588739359, "grad_norm": 91.11203002929688, "learning_rate": 4.911690931677172e-06, "loss": 16.2205, "step": 277530 }, { "epoch": 0.5606483595066197, "grad_norm": 18.846906661987305, "learning_rate": 4.911341920490248e-06, "loss": 11.6036, "step": 277540 }, { "epoch": 0.5606685601393036, "grad_norm": 190.92080688476562, "learning_rate": 4.910992909735432e-06, "loss": 26.1974, "step": 277550 }, { "epoch": 0.5606887607719874, "grad_norm": 222.0124053955078, "learning_rate": 4.910643899414429e-06, "loss": 15.7674, "step": 277560 }, { "epoch": 0.5607089614046712, "grad_norm": 285.14190673828125, "learning_rate": 4.910294889528934e-06, "loss": 14.1402, "step": 277570 }, { "epoch": 0.560729162037355, "grad_norm": 57.25175476074219, "learning_rate": 4.909945880080651e-06, "loss": 12.8143, "step": 277580 }, { "epoch": 0.5607493626700388, "grad_norm": 591.3250732421875, "learning_rate": 4.909596871071283e-06, "loss": 17.5264, "step": 277590 }, { "epoch": 0.5607695633027227, "grad_norm": 189.08969116210938, "learning_rate": 4.9092478625025266e-06, "loss": 6.4216, "step": 277600 }, { "epoch": 0.5607897639354065, "grad_norm": 242.05821228027344, "learning_rate": 4.908898854376086e-06, "loss": 13.0832, "step": 277610 }, { "epoch": 0.5608099645680903, "grad_norm": 57.378501892089844, "learning_rate": 4.908549846693662e-06, "loss": 8.1898, "step": 277620 }, { "epoch": 0.5608301652007741, "grad_norm": 506.9417419433594, "learning_rate": 4.908200839456955e-06, "loss": 12.4636, "step": 277630 }, { "epoch": 0.5608503658334579, "grad_norm": 217.1978759765625, "learning_rate": 4.907851832667663e-06, "loss": 11.8349, "step": 277640 }, { "epoch": 0.5608705664661418, "grad_norm": 258.0445251464844, "learning_rate": 4.9075028263274925e-06, "loss": 13.3343, "step": 277650 }, { "epoch": 0.5608907670988256, "grad_norm": 371.9382629394531, "learning_rate": 4.907153820438142e-06, "loss": 12.356, "step": 277660 }, { "epoch": 0.5609109677315094, "grad_norm": 495.5435485839844, "learning_rate": 4.9068048150013124e-06, "loss": 26.5264, "step": 277670 }, { "epoch": 0.5609311683641932, "grad_norm": 227.03375244140625, "learning_rate": 4.906455810018705e-06, "loss": 14.9031, "step": 277680 }, { "epoch": 0.560951368996877, "grad_norm": 311.9685363769531, "learning_rate": 4.906106805492021e-06, "loss": 21.3379, "step": 277690 }, { "epoch": 0.5609715696295609, "grad_norm": 462.1083984375, "learning_rate": 4.90575780142296e-06, "loss": 20.5691, "step": 277700 }, { "epoch": 0.5609917702622446, "grad_norm": 260.3223876953125, "learning_rate": 4.905408797813223e-06, "loss": 17.064, "step": 277710 }, { "epoch": 0.5610119708949284, "grad_norm": 8.443470001220703, "learning_rate": 4.9050597946645155e-06, "loss": 21.3101, "step": 277720 }, { "epoch": 0.5610321715276122, "grad_norm": 0.0, "learning_rate": 4.904710791978532e-06, "loss": 15.1152, "step": 277730 }, { "epoch": 0.561052372160296, "grad_norm": 173.3296661376953, "learning_rate": 4.9043617897569775e-06, "loss": 16.595, "step": 277740 }, { "epoch": 0.5610725727929798, "grad_norm": 213.83651733398438, "learning_rate": 4.904012788001553e-06, "loss": 18.1264, "step": 277750 }, { "epoch": 0.5610927734256637, "grad_norm": 823.8798217773438, "learning_rate": 4.903663786713957e-06, "loss": 25.2985, "step": 277760 }, { "epoch": 0.5611129740583475, "grad_norm": 204.80154418945312, "learning_rate": 4.903314785895893e-06, "loss": 25.7648, "step": 277770 }, { "epoch": 0.5611331746910313, "grad_norm": 636.3010864257812, "learning_rate": 4.902965785549061e-06, "loss": 22.7246, "step": 277780 }, { "epoch": 0.5611533753237151, "grad_norm": 110.04837036132812, "learning_rate": 4.902616785675162e-06, "loss": 21.0661, "step": 277790 }, { "epoch": 0.5611735759563989, "grad_norm": 307.3761291503906, "learning_rate": 4.902267786275895e-06, "loss": 26.5423, "step": 277800 }, { "epoch": 0.5611937765890828, "grad_norm": 827.3224487304688, "learning_rate": 4.901918787352965e-06, "loss": 21.8417, "step": 277810 }, { "epoch": 0.5612139772217666, "grad_norm": 195.31674194335938, "learning_rate": 4.901569788908071e-06, "loss": 16.373, "step": 277820 }, { "epoch": 0.5612341778544504, "grad_norm": 218.94174194335938, "learning_rate": 4.901220790942913e-06, "loss": 22.5598, "step": 277830 }, { "epoch": 0.5612543784871342, "grad_norm": 280.2568359375, "learning_rate": 4.900871793459193e-06, "loss": 23.4876, "step": 277840 }, { "epoch": 0.561274579119818, "grad_norm": 257.2622985839844, "learning_rate": 4.900522796458613e-06, "loss": 11.6112, "step": 277850 }, { "epoch": 0.5612947797525019, "grad_norm": 189.01766967773438, "learning_rate": 4.900173799942873e-06, "loss": 22.0472, "step": 277860 }, { "epoch": 0.5613149803851857, "grad_norm": 336.29656982421875, "learning_rate": 4.89982480391367e-06, "loss": 14.6663, "step": 277870 }, { "epoch": 0.5613351810178695, "grad_norm": 258.64019775390625, "learning_rate": 4.899475808372714e-06, "loss": 18.0924, "step": 277880 }, { "epoch": 0.5613553816505533, "grad_norm": 304.8099060058594, "learning_rate": 4.899126813321697e-06, "loss": 24.9734, "step": 277890 }, { "epoch": 0.5613755822832371, "grad_norm": 21.472389221191406, "learning_rate": 4.898777818762325e-06, "loss": 12.5957, "step": 277900 }, { "epoch": 0.561395782915921, "grad_norm": 193.42630004882812, "learning_rate": 4.898428824696298e-06, "loss": 26.4979, "step": 277910 }, { "epoch": 0.5614159835486048, "grad_norm": 144.01705932617188, "learning_rate": 4.898079831125316e-06, "loss": 18.4144, "step": 277920 }, { "epoch": 0.5614361841812886, "grad_norm": 183.90003967285156, "learning_rate": 4.897730838051081e-06, "loss": 27.4748, "step": 277930 }, { "epoch": 0.5614563848139724, "grad_norm": 427.65057373046875, "learning_rate": 4.897381845475294e-06, "loss": 26.6908, "step": 277940 }, { "epoch": 0.5614765854466562, "grad_norm": 191.4708709716797, "learning_rate": 4.897032853399653e-06, "loss": 20.3446, "step": 277950 }, { "epoch": 0.5614967860793401, "grad_norm": 0.0, "learning_rate": 4.896683861825863e-06, "loss": 12.0643, "step": 277960 }, { "epoch": 0.5615169867120238, "grad_norm": 310.6806335449219, "learning_rate": 4.896334870755623e-06, "loss": 20.1374, "step": 277970 }, { "epoch": 0.5615371873447076, "grad_norm": 262.95379638671875, "learning_rate": 4.895985880190636e-06, "loss": 17.8645, "step": 277980 }, { "epoch": 0.5615573879773914, "grad_norm": 168.00076293945312, "learning_rate": 4.895636890132599e-06, "loss": 19.8995, "step": 277990 }, { "epoch": 0.5615775886100752, "grad_norm": 983.3975830078125, "learning_rate": 4.895287900583216e-06, "loss": 34.7633, "step": 278000 }, { "epoch": 0.561597789242759, "grad_norm": 317.4393005371094, "learning_rate": 4.894938911544188e-06, "loss": 20.6055, "step": 278010 }, { "epoch": 0.5616179898754429, "grad_norm": 59.72663116455078, "learning_rate": 4.894589923017212e-06, "loss": 14.8865, "step": 278020 }, { "epoch": 0.5616381905081267, "grad_norm": 315.2657775878906, "learning_rate": 4.894240935003994e-06, "loss": 16.9661, "step": 278030 }, { "epoch": 0.5616583911408105, "grad_norm": 232.17445373535156, "learning_rate": 4.893891947506234e-06, "loss": 15.6879, "step": 278040 }, { "epoch": 0.5616785917734943, "grad_norm": 291.62628173828125, "learning_rate": 4.89354296052563e-06, "loss": 22.8517, "step": 278050 }, { "epoch": 0.5616987924061781, "grad_norm": 149.29132080078125, "learning_rate": 4.893193974063885e-06, "loss": 22.9086, "step": 278060 }, { "epoch": 0.561718993038862, "grad_norm": 416.2674255371094, "learning_rate": 4.892844988122701e-06, "loss": 26.3406, "step": 278070 }, { "epoch": 0.5617391936715458, "grad_norm": 260.8059387207031, "learning_rate": 4.892496002703777e-06, "loss": 18.8027, "step": 278080 }, { "epoch": 0.5617593943042296, "grad_norm": 134.00277709960938, "learning_rate": 4.892147017808812e-06, "loss": 35.4937, "step": 278090 }, { "epoch": 0.5617795949369134, "grad_norm": 272.9956970214844, "learning_rate": 4.891798033439511e-06, "loss": 15.6187, "step": 278100 }, { "epoch": 0.5617997955695972, "grad_norm": 140.95272827148438, "learning_rate": 4.891449049597574e-06, "loss": 18.0173, "step": 278110 }, { "epoch": 0.5618199962022811, "grad_norm": 19.853229522705078, "learning_rate": 4.891100066284701e-06, "loss": 12.6602, "step": 278120 }, { "epoch": 0.5618401968349649, "grad_norm": 189.73074340820312, "learning_rate": 4.8907510835025924e-06, "loss": 18.1913, "step": 278130 }, { "epoch": 0.5618603974676487, "grad_norm": 305.2147521972656, "learning_rate": 4.890402101252951e-06, "loss": 17.8252, "step": 278140 }, { "epoch": 0.5618805981003325, "grad_norm": 252.6506805419922, "learning_rate": 4.890053119537475e-06, "loss": 13.1726, "step": 278150 }, { "epoch": 0.5619007987330163, "grad_norm": 47.733394622802734, "learning_rate": 4.889704138357867e-06, "loss": 24.0107, "step": 278160 }, { "epoch": 0.5619209993657002, "grad_norm": 4.665319442749023, "learning_rate": 4.889355157715829e-06, "loss": 24.5208, "step": 278170 }, { "epoch": 0.561941199998384, "grad_norm": 0.4642222225666046, "learning_rate": 4.889006177613059e-06, "loss": 21.927, "step": 278180 }, { "epoch": 0.5619614006310678, "grad_norm": 100.05586242675781, "learning_rate": 4.888657198051259e-06, "loss": 23.3187, "step": 278190 }, { "epoch": 0.5619816012637516, "grad_norm": 167.355224609375, "learning_rate": 4.888308219032133e-06, "loss": 20.2599, "step": 278200 }, { "epoch": 0.5620018018964354, "grad_norm": 248.23931884765625, "learning_rate": 4.8879592405573765e-06, "loss": 22.6232, "step": 278210 }, { "epoch": 0.5620220025291192, "grad_norm": 177.37477111816406, "learning_rate": 4.887610262628694e-06, "loss": 14.6495, "step": 278220 }, { "epoch": 0.562042203161803, "grad_norm": 111.62626647949219, "learning_rate": 4.887261285247787e-06, "loss": 9.8284, "step": 278230 }, { "epoch": 0.5620624037944868, "grad_norm": 323.8587951660156, "learning_rate": 4.886912308416353e-06, "loss": 54.8779, "step": 278240 }, { "epoch": 0.5620826044271706, "grad_norm": 229.23570251464844, "learning_rate": 4.886563332136093e-06, "loss": 15.1459, "step": 278250 }, { "epoch": 0.5621028050598544, "grad_norm": 369.2301025390625, "learning_rate": 4.886214356408712e-06, "loss": 13.09, "step": 278260 }, { "epoch": 0.5621230056925383, "grad_norm": 184.39901733398438, "learning_rate": 4.885865381235909e-06, "loss": 13.1392, "step": 278270 }, { "epoch": 0.5621432063252221, "grad_norm": 298.9051513671875, "learning_rate": 4.885516406619383e-06, "loss": 20.6686, "step": 278280 }, { "epoch": 0.5621634069579059, "grad_norm": 437.9660949707031, "learning_rate": 4.885167432560836e-06, "loss": 15.5861, "step": 278290 }, { "epoch": 0.5621836075905897, "grad_norm": 207.29396057128906, "learning_rate": 4.88481845906197e-06, "loss": 26.7479, "step": 278300 }, { "epoch": 0.5622038082232735, "grad_norm": 1248.2421875, "learning_rate": 4.884469486124484e-06, "loss": 31.3452, "step": 278310 }, { "epoch": 0.5622240088559574, "grad_norm": 86.65067291259766, "learning_rate": 4.884120513750079e-06, "loss": 14.1464, "step": 278320 }, { "epoch": 0.5622442094886412, "grad_norm": 517.8101196289062, "learning_rate": 4.8837715419404596e-06, "loss": 26.7779, "step": 278330 }, { "epoch": 0.562264410121325, "grad_norm": 196.8671875, "learning_rate": 4.88342257069732e-06, "loss": 8.7219, "step": 278340 }, { "epoch": 0.5622846107540088, "grad_norm": 169.15975952148438, "learning_rate": 4.883073600022366e-06, "loss": 18.1894, "step": 278350 }, { "epoch": 0.5623048113866926, "grad_norm": 187.2698211669922, "learning_rate": 4.882724629917298e-06, "loss": 19.4205, "step": 278360 }, { "epoch": 0.5623250120193765, "grad_norm": 512.2772216796875, "learning_rate": 4.8823756603838155e-06, "loss": 14.9236, "step": 278370 }, { "epoch": 0.5623452126520603, "grad_norm": 0.0, "learning_rate": 4.882026691423619e-06, "loss": 25.3222, "step": 278380 }, { "epoch": 0.5623654132847441, "grad_norm": 256.5857238769531, "learning_rate": 4.881677723038411e-06, "loss": 18.4878, "step": 278390 }, { "epoch": 0.5623856139174279, "grad_norm": 356.4528503417969, "learning_rate": 4.881328755229892e-06, "loss": 32.9091, "step": 278400 }, { "epoch": 0.5624058145501117, "grad_norm": 300.02752685546875, "learning_rate": 4.88097978799976e-06, "loss": 17.7273, "step": 278410 }, { "epoch": 0.5624260151827956, "grad_norm": 182.89024353027344, "learning_rate": 4.880630821349718e-06, "loss": 20.8308, "step": 278420 }, { "epoch": 0.5624462158154794, "grad_norm": 178.96348571777344, "learning_rate": 4.8802818552814695e-06, "loss": 15.8326, "step": 278430 }, { "epoch": 0.5624664164481632, "grad_norm": 356.2645263671875, "learning_rate": 4.879932889796711e-06, "loss": 17.4712, "step": 278440 }, { "epoch": 0.562486617080847, "grad_norm": 715.0014038085938, "learning_rate": 4.879583924897146e-06, "loss": 24.7871, "step": 278450 }, { "epoch": 0.5625068177135308, "grad_norm": 566.8184204101562, "learning_rate": 4.879234960584474e-06, "loss": 21.7201, "step": 278460 }, { "epoch": 0.5625270183462147, "grad_norm": 293.0679016113281, "learning_rate": 4.878885996860396e-06, "loss": 20.1526, "step": 278470 }, { "epoch": 0.5625472189788984, "grad_norm": 223.39427185058594, "learning_rate": 4.878537033726612e-06, "loss": 16.9731, "step": 278480 }, { "epoch": 0.5625674196115822, "grad_norm": 166.48878479003906, "learning_rate": 4.878188071184827e-06, "loss": 25.2858, "step": 278490 }, { "epoch": 0.562587620244266, "grad_norm": 349.0497741699219, "learning_rate": 4.877839109236735e-06, "loss": 22.3946, "step": 278500 }, { "epoch": 0.5626078208769498, "grad_norm": 361.4746398925781, "learning_rate": 4.877490147884042e-06, "loss": 16.3511, "step": 278510 }, { "epoch": 0.5626280215096336, "grad_norm": 152.279052734375, "learning_rate": 4.8771411871284465e-06, "loss": 35.5206, "step": 278520 }, { "epoch": 0.5626482221423175, "grad_norm": 108.67223358154297, "learning_rate": 4.87679222697165e-06, "loss": 14.9876, "step": 278530 }, { "epoch": 0.5626684227750013, "grad_norm": 303.44219970703125, "learning_rate": 4.8764432674153536e-06, "loss": 20.0102, "step": 278540 }, { "epoch": 0.5626886234076851, "grad_norm": 296.330810546875, "learning_rate": 4.8760943084612585e-06, "loss": 11.3171, "step": 278550 }, { "epoch": 0.5627088240403689, "grad_norm": 283.5829772949219, "learning_rate": 4.875745350111064e-06, "loss": 15.1945, "step": 278560 }, { "epoch": 0.5627290246730527, "grad_norm": 866.6296997070312, "learning_rate": 4.87539639236647e-06, "loss": 23.7782, "step": 278570 }, { "epoch": 0.5627492253057366, "grad_norm": 284.0879821777344, "learning_rate": 4.87504743522918e-06, "loss": 25.1312, "step": 278580 }, { "epoch": 0.5627694259384204, "grad_norm": 380.7284240722656, "learning_rate": 4.874698478700895e-06, "loss": 21.5105, "step": 278590 }, { "epoch": 0.5627896265711042, "grad_norm": 440.097412109375, "learning_rate": 4.874349522783313e-06, "loss": 19.4864, "step": 278600 }, { "epoch": 0.562809827203788, "grad_norm": 292.55072021484375, "learning_rate": 4.874000567478137e-06, "loss": 18.3508, "step": 278610 }, { "epoch": 0.5628300278364718, "grad_norm": 345.33953857421875, "learning_rate": 4.873651612787067e-06, "loss": 24.2758, "step": 278620 }, { "epoch": 0.5628502284691557, "grad_norm": 251.29620361328125, "learning_rate": 4.873302658711803e-06, "loss": 13.6794, "step": 278630 }, { "epoch": 0.5628704291018395, "grad_norm": 326.3305969238281, "learning_rate": 4.872953705254045e-06, "loss": 10.7216, "step": 278640 }, { "epoch": 0.5628906297345233, "grad_norm": 76.95008087158203, "learning_rate": 4.8726047524154985e-06, "loss": 19.5541, "step": 278650 }, { "epoch": 0.5629108303672071, "grad_norm": 99.47364044189453, "learning_rate": 4.872255800197859e-06, "loss": 15.3627, "step": 278660 }, { "epoch": 0.562931030999891, "grad_norm": 131.0481414794922, "learning_rate": 4.87190684860283e-06, "loss": 17.2188, "step": 278670 }, { "epoch": 0.5629512316325748, "grad_norm": 214.32907104492188, "learning_rate": 4.871557897632111e-06, "loss": 21.0534, "step": 278680 }, { "epoch": 0.5629714322652586, "grad_norm": 369.5810241699219, "learning_rate": 4.871208947287404e-06, "loss": 38.4723, "step": 278690 }, { "epoch": 0.5629916328979424, "grad_norm": 382.8352966308594, "learning_rate": 4.870859997570407e-06, "loss": 18.5264, "step": 278700 }, { "epoch": 0.5630118335306262, "grad_norm": 402.9958801269531, "learning_rate": 4.870511048482824e-06, "loss": 28.559, "step": 278710 }, { "epoch": 0.56303203416331, "grad_norm": 147.1080322265625, "learning_rate": 4.870162100026355e-06, "loss": 9.6957, "step": 278720 }, { "epoch": 0.5630522347959938, "grad_norm": 342.0841979980469, "learning_rate": 4.8698131522027e-06, "loss": 19.1482, "step": 278730 }, { "epoch": 0.5630724354286776, "grad_norm": 141.9215850830078, "learning_rate": 4.8694642050135595e-06, "loss": 21.0353, "step": 278740 }, { "epoch": 0.5630926360613614, "grad_norm": 310.8072814941406, "learning_rate": 4.869115258460636e-06, "loss": 19.3756, "step": 278750 }, { "epoch": 0.5631128366940452, "grad_norm": 397.4310302734375, "learning_rate": 4.868766312545627e-06, "loss": 23.5017, "step": 278760 }, { "epoch": 0.563133037326729, "grad_norm": 84.85016632080078, "learning_rate": 4.868417367270234e-06, "loss": 24.5779, "step": 278770 }, { "epoch": 0.5631532379594129, "grad_norm": 251.9962615966797, "learning_rate": 4.8680684226361624e-06, "loss": 26.8925, "step": 278780 }, { "epoch": 0.5631734385920967, "grad_norm": 63.970096588134766, "learning_rate": 4.867719478645106e-06, "loss": 16.9777, "step": 278790 }, { "epoch": 0.5631936392247805, "grad_norm": 111.63780975341797, "learning_rate": 4.86737053529877e-06, "loss": 13.4846, "step": 278800 }, { "epoch": 0.5632138398574643, "grad_norm": 313.2712707519531, "learning_rate": 4.867021592598855e-06, "loss": 10.8606, "step": 278810 }, { "epoch": 0.5632340404901481, "grad_norm": 224.7801513671875, "learning_rate": 4.86667265054706e-06, "loss": 20.9353, "step": 278820 }, { "epoch": 0.563254241122832, "grad_norm": 402.0690002441406, "learning_rate": 4.8663237091450856e-06, "loss": 22.5183, "step": 278830 }, { "epoch": 0.5632744417555158, "grad_norm": 351.3423156738281, "learning_rate": 4.865974768394635e-06, "loss": 24.5882, "step": 278840 }, { "epoch": 0.5632946423881996, "grad_norm": 257.9015808105469, "learning_rate": 4.865625828297405e-06, "loss": 14.636, "step": 278850 }, { "epoch": 0.5633148430208834, "grad_norm": 108.882568359375, "learning_rate": 4.865276888855098e-06, "loss": 22.5738, "step": 278860 }, { "epoch": 0.5633350436535672, "grad_norm": 84.61980438232422, "learning_rate": 4.864927950069417e-06, "loss": 16.2749, "step": 278870 }, { "epoch": 0.5633552442862511, "grad_norm": 167.0817108154297, "learning_rate": 4.864579011942061e-06, "loss": 14.1125, "step": 278880 }, { "epoch": 0.5633754449189349, "grad_norm": 261.7198791503906, "learning_rate": 4.8642300744747285e-06, "loss": 20.6825, "step": 278890 }, { "epoch": 0.5633956455516187, "grad_norm": 486.28759765625, "learning_rate": 4.863881137669123e-06, "loss": 17.67, "step": 278900 }, { "epoch": 0.5634158461843025, "grad_norm": 118.86724853515625, "learning_rate": 4.8635322015269455e-06, "loss": 20.1311, "step": 278910 }, { "epoch": 0.5634360468169863, "grad_norm": 258.0909729003906, "learning_rate": 4.863183266049895e-06, "loss": 13.5972, "step": 278920 }, { "epoch": 0.5634562474496702, "grad_norm": 500.7968444824219, "learning_rate": 4.86283433123967e-06, "loss": 20.9346, "step": 278930 }, { "epoch": 0.563476448082354, "grad_norm": 1376.89111328125, "learning_rate": 4.862485397097979e-06, "loss": 14.9393, "step": 278940 }, { "epoch": 0.5634966487150378, "grad_norm": 310.7217102050781, "learning_rate": 4.862136463626512e-06, "loss": 8.2509, "step": 278950 }, { "epoch": 0.5635168493477216, "grad_norm": 370.8546447753906, "learning_rate": 4.861787530826979e-06, "loss": 17.6365, "step": 278960 }, { "epoch": 0.5635370499804054, "grad_norm": 199.53269958496094, "learning_rate": 4.861438598701076e-06, "loss": 24.2201, "step": 278970 }, { "epoch": 0.5635572506130893, "grad_norm": 170.06607055664062, "learning_rate": 4.861089667250504e-06, "loss": 18.1026, "step": 278980 }, { "epoch": 0.563577451245773, "grad_norm": 141.2415008544922, "learning_rate": 4.860740736476963e-06, "loss": 21.7971, "step": 278990 }, { "epoch": 0.5635976518784568, "grad_norm": 290.6050109863281, "learning_rate": 4.860391806382157e-06, "loss": 13.1886, "step": 279000 }, { "epoch": 0.5636178525111406, "grad_norm": 177.5428466796875, "learning_rate": 4.860042876967784e-06, "loss": 12.5035, "step": 279010 }, { "epoch": 0.5636380531438244, "grad_norm": 178.58155822753906, "learning_rate": 4.859693948235542e-06, "loss": 15.7363, "step": 279020 }, { "epoch": 0.5636582537765082, "grad_norm": 115.41307830810547, "learning_rate": 4.859345020187137e-06, "loss": 15.7589, "step": 279030 }, { "epoch": 0.5636784544091921, "grad_norm": 711.85107421875, "learning_rate": 4.858996092824268e-06, "loss": 15.9931, "step": 279040 }, { "epoch": 0.5636986550418759, "grad_norm": 77.49337768554688, "learning_rate": 4.8586471661486345e-06, "loss": 18.4843, "step": 279050 }, { "epoch": 0.5637188556745597, "grad_norm": 126.08858489990234, "learning_rate": 4.8582982401619376e-06, "loss": 9.4663, "step": 279060 }, { "epoch": 0.5637390563072435, "grad_norm": 476.90789794921875, "learning_rate": 4.857949314865878e-06, "loss": 21.5077, "step": 279070 }, { "epoch": 0.5637592569399273, "grad_norm": 203.4716339111328, "learning_rate": 4.857600390262156e-06, "loss": 13.9092, "step": 279080 }, { "epoch": 0.5637794575726112, "grad_norm": 412.09698486328125, "learning_rate": 4.857251466352471e-06, "loss": 17.9019, "step": 279090 }, { "epoch": 0.563799658205295, "grad_norm": 287.5928039550781, "learning_rate": 4.856902543138528e-06, "loss": 18.6657, "step": 279100 }, { "epoch": 0.5638198588379788, "grad_norm": 556.4721069335938, "learning_rate": 4.856553620622021e-06, "loss": 19.7105, "step": 279110 }, { "epoch": 0.5638400594706626, "grad_norm": 64.25711822509766, "learning_rate": 4.856204698804656e-06, "loss": 17.6424, "step": 279120 }, { "epoch": 0.5638602601033464, "grad_norm": 399.5543212890625, "learning_rate": 4.855855777688133e-06, "loss": 13.523, "step": 279130 }, { "epoch": 0.5638804607360303, "grad_norm": 292.6450500488281, "learning_rate": 4.85550685727415e-06, "loss": 27.7678, "step": 279140 }, { "epoch": 0.5639006613687141, "grad_norm": 448.06787109375, "learning_rate": 4.85515793756441e-06, "loss": 13.8679, "step": 279150 }, { "epoch": 0.5639208620013979, "grad_norm": 650.065185546875, "learning_rate": 4.854809018560611e-06, "loss": 26.5691, "step": 279160 }, { "epoch": 0.5639410626340817, "grad_norm": 159.2895050048828, "learning_rate": 4.8544601002644585e-06, "loss": 19.5402, "step": 279170 }, { "epoch": 0.5639612632667655, "grad_norm": 319.780517578125, "learning_rate": 4.854111182677646e-06, "loss": 16.3698, "step": 279180 }, { "epoch": 0.5639814638994494, "grad_norm": 115.94760131835938, "learning_rate": 4.85376226580188e-06, "loss": 11.9853, "step": 279190 }, { "epoch": 0.5640016645321332, "grad_norm": 331.8049011230469, "learning_rate": 4.853413349638859e-06, "loss": 17.2559, "step": 279200 }, { "epoch": 0.564021865164817, "grad_norm": 469.6151428222656, "learning_rate": 4.853064434190283e-06, "loss": 20.4374, "step": 279210 }, { "epoch": 0.5640420657975008, "grad_norm": 256.16326904296875, "learning_rate": 4.852715519457854e-06, "loss": 24.8087, "step": 279220 }, { "epoch": 0.5640622664301846, "grad_norm": 67.7621078491211, "learning_rate": 4.852366605443271e-06, "loss": 13.9539, "step": 279230 }, { "epoch": 0.5640824670628685, "grad_norm": 250.4800262451172, "learning_rate": 4.8520176921482355e-06, "loss": 17.1578, "step": 279240 }, { "epoch": 0.5641026676955522, "grad_norm": 153.327880859375, "learning_rate": 4.851668779574446e-06, "loss": 20.7425, "step": 279250 }, { "epoch": 0.564122868328236, "grad_norm": 177.28121948242188, "learning_rate": 4.851319867723607e-06, "loss": 20.4874, "step": 279260 }, { "epoch": 0.5641430689609198, "grad_norm": 176.99404907226562, "learning_rate": 4.8509709565974165e-06, "loss": 20.3399, "step": 279270 }, { "epoch": 0.5641632695936036, "grad_norm": 215.6617431640625, "learning_rate": 4.850622046197576e-06, "loss": 23.6927, "step": 279280 }, { "epoch": 0.5641834702262875, "grad_norm": 349.8871765136719, "learning_rate": 4.8502731365257855e-06, "loss": 20.1061, "step": 279290 }, { "epoch": 0.5642036708589713, "grad_norm": 324.34814453125, "learning_rate": 4.8499242275837444e-06, "loss": 19.505, "step": 279300 }, { "epoch": 0.5642238714916551, "grad_norm": 498.5208740234375, "learning_rate": 4.8495753193731545e-06, "loss": 35.8034, "step": 279310 }, { "epoch": 0.5642440721243389, "grad_norm": 1105.0838623046875, "learning_rate": 4.8492264118957165e-06, "loss": 39.6623, "step": 279320 }, { "epoch": 0.5642642727570227, "grad_norm": 438.07525634765625, "learning_rate": 4.848877505153131e-06, "loss": 28.9274, "step": 279330 }, { "epoch": 0.5642844733897066, "grad_norm": 797.2931518554688, "learning_rate": 4.848528599147098e-06, "loss": 23.7597, "step": 279340 }, { "epoch": 0.5643046740223904, "grad_norm": 234.2232208251953, "learning_rate": 4.848179693879319e-06, "loss": 23.0527, "step": 279350 }, { "epoch": 0.5643248746550742, "grad_norm": 123.10804748535156, "learning_rate": 4.8478307893514934e-06, "loss": 16.4166, "step": 279360 }, { "epoch": 0.564345075287758, "grad_norm": 269.87384033203125, "learning_rate": 4.847481885565322e-06, "loss": 14.1329, "step": 279370 }, { "epoch": 0.5643652759204418, "grad_norm": 188.09141540527344, "learning_rate": 4.8471329825225034e-06, "loss": 17.183, "step": 279380 }, { "epoch": 0.5643854765531257, "grad_norm": 284.40997314453125, "learning_rate": 4.846784080224744e-06, "loss": 15.2214, "step": 279390 }, { "epoch": 0.5644056771858095, "grad_norm": 454.8497009277344, "learning_rate": 4.846435178673737e-06, "loss": 25.5871, "step": 279400 }, { "epoch": 0.5644258778184933, "grad_norm": 321.1175231933594, "learning_rate": 4.846086277871188e-06, "loss": 24.3139, "step": 279410 }, { "epoch": 0.5644460784511771, "grad_norm": 413.5763244628906, "learning_rate": 4.845737377818796e-06, "loss": 18.6391, "step": 279420 }, { "epoch": 0.5644662790838609, "grad_norm": 291.66485595703125, "learning_rate": 4.845388478518261e-06, "loss": 43.5896, "step": 279430 }, { "epoch": 0.5644864797165448, "grad_norm": 380.6156921386719, "learning_rate": 4.845039579971283e-06, "loss": 28.6505, "step": 279440 }, { "epoch": 0.5645066803492286, "grad_norm": 426.30291748046875, "learning_rate": 4.8446906821795645e-06, "loss": 19.8765, "step": 279450 }, { "epoch": 0.5645268809819124, "grad_norm": 370.0867614746094, "learning_rate": 4.8443417851448035e-06, "loss": 12.0832, "step": 279460 }, { "epoch": 0.5645470816145962, "grad_norm": 334.0140075683594, "learning_rate": 4.843992888868702e-06, "loss": 18.118, "step": 279470 }, { "epoch": 0.56456728224728, "grad_norm": 594.2415161132812, "learning_rate": 4.84364399335296e-06, "loss": 24.8712, "step": 279480 }, { "epoch": 0.5645874828799639, "grad_norm": 584.8529052734375, "learning_rate": 4.84329509859928e-06, "loss": 22.2273, "step": 279490 }, { "epoch": 0.5646076835126476, "grad_norm": 162.4872283935547, "learning_rate": 4.842946204609359e-06, "loss": 18.376, "step": 279500 }, { "epoch": 0.5646278841453314, "grad_norm": 324.3424987792969, "learning_rate": 4.8425973113848995e-06, "loss": 29.0795, "step": 279510 }, { "epoch": 0.5646480847780152, "grad_norm": 281.09527587890625, "learning_rate": 4.842248418927603e-06, "loss": 35.7864, "step": 279520 }, { "epoch": 0.564668285410699, "grad_norm": 554.9288330078125, "learning_rate": 4.841899527239166e-06, "loss": 28.341, "step": 279530 }, { "epoch": 0.5646884860433828, "grad_norm": 330.2297668457031, "learning_rate": 4.841550636321292e-06, "loss": 22.8294, "step": 279540 }, { "epoch": 0.5647086866760667, "grad_norm": 181.87396240234375, "learning_rate": 4.8412017461756845e-06, "loss": 32.9023, "step": 279550 }, { "epoch": 0.5647288873087505, "grad_norm": 463.0613098144531, "learning_rate": 4.8408528568040365e-06, "loss": 21.8571, "step": 279560 }, { "epoch": 0.5647490879414343, "grad_norm": 198.1949920654297, "learning_rate": 4.840503968208054e-06, "loss": 10.4083, "step": 279570 }, { "epoch": 0.5647692885741181, "grad_norm": 163.3216094970703, "learning_rate": 4.840155080389436e-06, "loss": 13.9256, "step": 279580 }, { "epoch": 0.5647894892068019, "grad_norm": 320.76776123046875, "learning_rate": 4.839806193349882e-06, "loss": 14.379, "step": 279590 }, { "epoch": 0.5648096898394858, "grad_norm": 49.361270904541016, "learning_rate": 4.839457307091093e-06, "loss": 23.2858, "step": 279600 }, { "epoch": 0.5648298904721696, "grad_norm": 21.223169326782227, "learning_rate": 4.839108421614771e-06, "loss": 10.919, "step": 279610 }, { "epoch": 0.5648500911048534, "grad_norm": 265.06097412109375, "learning_rate": 4.838759536922614e-06, "loss": 33.289, "step": 279620 }, { "epoch": 0.5648702917375372, "grad_norm": 210.98927307128906, "learning_rate": 4.838410653016322e-06, "loss": 19.93, "step": 279630 }, { "epoch": 0.564890492370221, "grad_norm": 327.9624938964844, "learning_rate": 4.838061769897598e-06, "loss": 41.1663, "step": 279640 }, { "epoch": 0.5649106930029049, "grad_norm": 326.2887878417969, "learning_rate": 4.837712887568143e-06, "loss": 21.4061, "step": 279650 }, { "epoch": 0.5649308936355887, "grad_norm": 182.30650329589844, "learning_rate": 4.837364006029654e-06, "loss": 13.3754, "step": 279660 }, { "epoch": 0.5649510942682725, "grad_norm": 678.6213989257812, "learning_rate": 4.837015125283833e-06, "loss": 28.6831, "step": 279670 }, { "epoch": 0.5649712949009563, "grad_norm": 258.17755126953125, "learning_rate": 4.8366662453323826e-06, "loss": 24.4909, "step": 279680 }, { "epoch": 0.5649914955336401, "grad_norm": 776.3302001953125, "learning_rate": 4.836317366176999e-06, "loss": 23.5516, "step": 279690 }, { "epoch": 0.565011696166324, "grad_norm": 517.0597534179688, "learning_rate": 4.835968487819384e-06, "loss": 19.0119, "step": 279700 }, { "epoch": 0.5650318967990078, "grad_norm": 232.81039428710938, "learning_rate": 4.835619610261242e-06, "loss": 26.0337, "step": 279710 }, { "epoch": 0.5650520974316916, "grad_norm": 341.7688903808594, "learning_rate": 4.835270733504267e-06, "loss": 13.609, "step": 279720 }, { "epoch": 0.5650722980643754, "grad_norm": 418.46661376953125, "learning_rate": 4.834921857550163e-06, "loss": 16.9479, "step": 279730 }, { "epoch": 0.5650924986970592, "grad_norm": 178.58306884765625, "learning_rate": 4.834572982400631e-06, "loss": 13.4777, "step": 279740 }, { "epoch": 0.5651126993297431, "grad_norm": 311.50030517578125, "learning_rate": 4.8342241080573696e-06, "loss": 18.0479, "step": 279750 }, { "epoch": 0.5651328999624268, "grad_norm": 264.6207275390625, "learning_rate": 4.83387523452208e-06, "loss": 25.5544, "step": 279760 }, { "epoch": 0.5651531005951106, "grad_norm": 295.7739562988281, "learning_rate": 4.833526361796461e-06, "loss": 18.4108, "step": 279770 }, { "epoch": 0.5651733012277944, "grad_norm": 285.9744567871094, "learning_rate": 4.833177489882217e-06, "loss": 16.8202, "step": 279780 }, { "epoch": 0.5651935018604782, "grad_norm": 0.0, "learning_rate": 4.832828618781042e-06, "loss": 15.3053, "step": 279790 }, { "epoch": 0.565213702493162, "grad_norm": 164.79718017578125, "learning_rate": 4.832479748494643e-06, "loss": 24.7727, "step": 279800 }, { "epoch": 0.5652339031258459, "grad_norm": 346.725341796875, "learning_rate": 4.832130879024717e-06, "loss": 24.7308, "step": 279810 }, { "epoch": 0.5652541037585297, "grad_norm": 298.8397216796875, "learning_rate": 4.831782010372964e-06, "loss": 10.3633, "step": 279820 }, { "epoch": 0.5652743043912135, "grad_norm": 478.70404052734375, "learning_rate": 4.831433142541086e-06, "loss": 18.5964, "step": 279830 }, { "epoch": 0.5652945050238973, "grad_norm": 8.424243927001953, "learning_rate": 4.831084275530782e-06, "loss": 20.0509, "step": 279840 }, { "epoch": 0.5653147056565812, "grad_norm": 193.91409301757812, "learning_rate": 4.830735409343752e-06, "loss": 22.068, "step": 279850 }, { "epoch": 0.565334906289265, "grad_norm": 442.943603515625, "learning_rate": 4.830386543981696e-06, "loss": 22.7795, "step": 279860 }, { "epoch": 0.5653551069219488, "grad_norm": 316.213134765625, "learning_rate": 4.83003767944632e-06, "loss": 13.0437, "step": 279870 }, { "epoch": 0.5653753075546326, "grad_norm": 134.48486328125, "learning_rate": 4.829688815739315e-06, "loss": 12.8055, "step": 279880 }, { "epoch": 0.5653955081873164, "grad_norm": 349.2442932128906, "learning_rate": 4.829339952862388e-06, "loss": 18.6042, "step": 279890 }, { "epoch": 0.5654157088200003, "grad_norm": 171.7113494873047, "learning_rate": 4.828991090817238e-06, "loss": 18.2154, "step": 279900 }, { "epoch": 0.5654359094526841, "grad_norm": 183.54483032226562, "learning_rate": 4.828642229605564e-06, "loss": 24.6666, "step": 279910 }, { "epoch": 0.5654561100853679, "grad_norm": 259.3833923339844, "learning_rate": 4.8282933692290665e-06, "loss": 17.1714, "step": 279920 }, { "epoch": 0.5654763107180517, "grad_norm": 4.759011268615723, "learning_rate": 4.827944509689445e-06, "loss": 8.914, "step": 279930 }, { "epoch": 0.5654965113507355, "grad_norm": 169.48133850097656, "learning_rate": 4.827595650988404e-06, "loss": 27.8543, "step": 279940 }, { "epoch": 0.5655167119834194, "grad_norm": 411.7699279785156, "learning_rate": 4.827246793127639e-06, "loss": 11.6595, "step": 279950 }, { "epoch": 0.5655369126161032, "grad_norm": 259.6701965332031, "learning_rate": 4.826897936108853e-06, "loss": 16.7389, "step": 279960 }, { "epoch": 0.565557113248787, "grad_norm": 356.6174621582031, "learning_rate": 4.8265490799337455e-06, "loss": 22.3434, "step": 279970 }, { "epoch": 0.5655773138814708, "grad_norm": 146.75379943847656, "learning_rate": 4.826200224604017e-06, "loss": 7.6727, "step": 279980 }, { "epoch": 0.5655975145141546, "grad_norm": 367.9844055175781, "learning_rate": 4.8258513701213665e-06, "loss": 13.4061, "step": 279990 }, { "epoch": 0.5656177151468385, "grad_norm": 866.1168823242188, "learning_rate": 4.825502516487497e-06, "loss": 18.4437, "step": 280000 }, { "epoch": 0.5656379157795222, "grad_norm": 314.72027587890625, "learning_rate": 4.825153663704104e-06, "loss": 22.8611, "step": 280010 }, { "epoch": 0.565658116412206, "grad_norm": 309.2377624511719, "learning_rate": 4.824804811772893e-06, "loss": 26.0872, "step": 280020 }, { "epoch": 0.5656783170448898, "grad_norm": 111.04196166992188, "learning_rate": 4.824455960695563e-06, "loss": 21.2143, "step": 280030 }, { "epoch": 0.5656985176775736, "grad_norm": 356.427734375, "learning_rate": 4.8241071104738115e-06, "loss": 22.1832, "step": 280040 }, { "epoch": 0.5657187183102574, "grad_norm": 348.3055114746094, "learning_rate": 4.823758261109341e-06, "loss": 11.1854, "step": 280050 }, { "epoch": 0.5657389189429413, "grad_norm": 230.2288360595703, "learning_rate": 4.8234094126038524e-06, "loss": 19.0344, "step": 280060 }, { "epoch": 0.5657591195756251, "grad_norm": 214.37643432617188, "learning_rate": 4.823060564959045e-06, "loss": 17.091, "step": 280070 }, { "epoch": 0.5657793202083089, "grad_norm": 166.15428161621094, "learning_rate": 4.8227117181766165e-06, "loss": 12.8462, "step": 280080 }, { "epoch": 0.5657995208409927, "grad_norm": 538.3643798828125, "learning_rate": 4.82236287225827e-06, "loss": 24.2431, "step": 280090 }, { "epoch": 0.5658197214736765, "grad_norm": 0.0, "learning_rate": 4.822014027205708e-06, "loss": 15.2164, "step": 280100 }, { "epoch": 0.5658399221063604, "grad_norm": 148.37744140625, "learning_rate": 4.8216651830206265e-06, "loss": 21.1389, "step": 280110 }, { "epoch": 0.5658601227390442, "grad_norm": 560.9716186523438, "learning_rate": 4.821316339704727e-06, "loss": 23.7016, "step": 280120 }, { "epoch": 0.565880323371728, "grad_norm": 486.83258056640625, "learning_rate": 4.8209674972597116e-06, "loss": 27.1241, "step": 280130 }, { "epoch": 0.5659005240044118, "grad_norm": 129.1114044189453, "learning_rate": 4.820618655687277e-06, "loss": 19.0981, "step": 280140 }, { "epoch": 0.5659207246370956, "grad_norm": 331.1588439941406, "learning_rate": 4.820269814989125e-06, "loss": 17.4233, "step": 280150 }, { "epoch": 0.5659409252697795, "grad_norm": 177.93783569335938, "learning_rate": 4.819920975166959e-06, "loss": 23.7519, "step": 280160 }, { "epoch": 0.5659611259024633, "grad_norm": 56.42469787597656, "learning_rate": 4.8195721362224725e-06, "loss": 17.0853, "step": 280170 }, { "epoch": 0.5659813265351471, "grad_norm": 456.16192626953125, "learning_rate": 4.819223298157372e-06, "loss": 24.0965, "step": 280180 }, { "epoch": 0.5660015271678309, "grad_norm": 253.4281768798828, "learning_rate": 4.818874460973356e-06, "loss": 22.7249, "step": 280190 }, { "epoch": 0.5660217278005147, "grad_norm": 395.2105712890625, "learning_rate": 4.818525624672122e-06, "loss": 18.8625, "step": 280200 }, { "epoch": 0.5660419284331986, "grad_norm": 172.1767120361328, "learning_rate": 4.818176789255372e-06, "loss": 9.7295, "step": 280210 }, { "epoch": 0.5660621290658824, "grad_norm": 127.03837585449219, "learning_rate": 4.8178279547248055e-06, "loss": 18.899, "step": 280220 }, { "epoch": 0.5660823296985662, "grad_norm": 307.8238830566406, "learning_rate": 4.817479121082127e-06, "loss": 26.3571, "step": 280230 }, { "epoch": 0.56610253033125, "grad_norm": 151.37759399414062, "learning_rate": 4.81713028832903e-06, "loss": 15.1616, "step": 280240 }, { "epoch": 0.5661227309639338, "grad_norm": 218.04379272460938, "learning_rate": 4.816781456467218e-06, "loss": 14.5946, "step": 280250 }, { "epoch": 0.5661429315966177, "grad_norm": 361.037353515625, "learning_rate": 4.816432625498394e-06, "loss": 19.4318, "step": 280260 }, { "epoch": 0.5661631322293014, "grad_norm": 0.0, "learning_rate": 4.816083795424252e-06, "loss": 44.2089, "step": 280270 }, { "epoch": 0.5661833328619852, "grad_norm": 358.23236083984375, "learning_rate": 4.815734966246496e-06, "loss": 26.2569, "step": 280280 }, { "epoch": 0.566203533494669, "grad_norm": 469.8074035644531, "learning_rate": 4.815386137966827e-06, "loss": 29.477, "step": 280290 }, { "epoch": 0.5662237341273528, "grad_norm": 153.26101684570312, "learning_rate": 4.815037310586941e-06, "loss": 21.8876, "step": 280300 }, { "epoch": 0.5662439347600366, "grad_norm": 359.6404724121094, "learning_rate": 4.8146884841085405e-06, "loss": 22.1362, "step": 280310 }, { "epoch": 0.5662641353927205, "grad_norm": 1.9093093872070312, "learning_rate": 4.8143396585333295e-06, "loss": 14.7408, "step": 280320 }, { "epoch": 0.5662843360254043, "grad_norm": 205.42385864257812, "learning_rate": 4.813990833863001e-06, "loss": 24.3132, "step": 280330 }, { "epoch": 0.5663045366580881, "grad_norm": 244.7013702392578, "learning_rate": 4.81364201009926e-06, "loss": 29.4617, "step": 280340 }, { "epoch": 0.5663247372907719, "grad_norm": 440.9937438964844, "learning_rate": 4.813293187243806e-06, "loss": 15.6994, "step": 280350 }, { "epoch": 0.5663449379234557, "grad_norm": 316.2843933105469, "learning_rate": 4.812944365298337e-06, "loss": 14.7233, "step": 280360 }, { "epoch": 0.5663651385561396, "grad_norm": 181.1551971435547, "learning_rate": 4.812595544264554e-06, "loss": 22.2812, "step": 280370 }, { "epoch": 0.5663853391888234, "grad_norm": 348.6634826660156, "learning_rate": 4.812246724144158e-06, "loss": 20.6473, "step": 280380 }, { "epoch": 0.5664055398215072, "grad_norm": 447.79522705078125, "learning_rate": 4.811897904938851e-06, "loss": 32.4703, "step": 280390 }, { "epoch": 0.566425740454191, "grad_norm": 600.5847778320312, "learning_rate": 4.811549086650327e-06, "loss": 18.4391, "step": 280400 }, { "epoch": 0.5664459410868748, "grad_norm": 40.5888557434082, "learning_rate": 4.811200269280292e-06, "loss": 14.1961, "step": 280410 }, { "epoch": 0.5664661417195587, "grad_norm": 304.4341735839844, "learning_rate": 4.810851452830445e-06, "loss": 15.4045, "step": 280420 }, { "epoch": 0.5664863423522425, "grad_norm": 310.1363220214844, "learning_rate": 4.810502637302484e-06, "loss": 18.1209, "step": 280430 }, { "epoch": 0.5665065429849263, "grad_norm": 262.6470642089844, "learning_rate": 4.81015382269811e-06, "loss": 16.1632, "step": 280440 }, { "epoch": 0.5665267436176101, "grad_norm": 175.79693603515625, "learning_rate": 4.809805009019024e-06, "loss": 9.9751, "step": 280450 }, { "epoch": 0.566546944250294, "grad_norm": 301.2137756347656, "learning_rate": 4.809456196266925e-06, "loss": 30.9815, "step": 280460 }, { "epoch": 0.5665671448829778, "grad_norm": 197.34657287597656, "learning_rate": 4.809107384443511e-06, "loss": 11.9527, "step": 280470 }, { "epoch": 0.5665873455156616, "grad_norm": 263.9850158691406, "learning_rate": 4.808758573550488e-06, "loss": 17.5624, "step": 280480 }, { "epoch": 0.5666075461483454, "grad_norm": 470.81134033203125, "learning_rate": 4.8084097635895505e-06, "loss": 18.2175, "step": 280490 }, { "epoch": 0.5666277467810292, "grad_norm": 56.925140380859375, "learning_rate": 4.8080609545624004e-06, "loss": 12.1314, "step": 280500 }, { "epoch": 0.566647947413713, "grad_norm": 129.2071533203125, "learning_rate": 4.80771214647074e-06, "loss": 25.8828, "step": 280510 }, { "epoch": 0.5666681480463968, "grad_norm": 86.35612487792969, "learning_rate": 4.807363339316265e-06, "loss": 18.1436, "step": 280520 }, { "epoch": 0.5666883486790806, "grad_norm": 372.64178466796875, "learning_rate": 4.807014533100679e-06, "loss": 21.6, "step": 280530 }, { "epoch": 0.5667085493117644, "grad_norm": 295.4029541015625, "learning_rate": 4.806665727825679e-06, "loss": 16.8519, "step": 280540 }, { "epoch": 0.5667287499444482, "grad_norm": 227.572021484375, "learning_rate": 4.80631692349297e-06, "loss": 17.0906, "step": 280550 }, { "epoch": 0.566748950577132, "grad_norm": 487.73809814453125, "learning_rate": 4.805968120104246e-06, "loss": 20.0377, "step": 280560 }, { "epoch": 0.5667691512098159, "grad_norm": 467.6957702636719, "learning_rate": 4.805619317661211e-06, "loss": 15.7776, "step": 280570 }, { "epoch": 0.5667893518424997, "grad_norm": 399.56109619140625, "learning_rate": 4.805270516165564e-06, "loss": 18.6928, "step": 280580 }, { "epoch": 0.5668095524751835, "grad_norm": 191.09791564941406, "learning_rate": 4.8049217156190045e-06, "loss": 18.3524, "step": 280590 }, { "epoch": 0.5668297531078673, "grad_norm": 149.522705078125, "learning_rate": 4.8045729160232326e-06, "loss": 17.491, "step": 280600 }, { "epoch": 0.5668499537405511, "grad_norm": 30.250951766967773, "learning_rate": 4.80422411737995e-06, "loss": 12.2698, "step": 280610 }, { "epoch": 0.566870154373235, "grad_norm": 115.57962799072266, "learning_rate": 4.8038753196908534e-06, "loss": 13.818, "step": 280620 }, { "epoch": 0.5668903550059188, "grad_norm": 112.48053741455078, "learning_rate": 4.803526522957645e-06, "loss": 11.9326, "step": 280630 }, { "epoch": 0.5669105556386026, "grad_norm": 14.470004081726074, "learning_rate": 4.803177727182026e-06, "loss": 12.9828, "step": 280640 }, { "epoch": 0.5669307562712864, "grad_norm": 153.11111450195312, "learning_rate": 4.802828932365694e-06, "loss": 27.6957, "step": 280650 }, { "epoch": 0.5669509569039702, "grad_norm": 616.9156494140625, "learning_rate": 4.80248013851035e-06, "loss": 19.4223, "step": 280660 }, { "epoch": 0.5669711575366541, "grad_norm": 186.65170288085938, "learning_rate": 4.802131345617694e-06, "loss": 17.8819, "step": 280670 }, { "epoch": 0.5669913581693379, "grad_norm": 366.3613586425781, "learning_rate": 4.801782553689426e-06, "loss": 26.5888, "step": 280680 }, { "epoch": 0.5670115588020217, "grad_norm": 520.9959716796875, "learning_rate": 4.801433762727244e-06, "loss": 48.6489, "step": 280690 }, { "epoch": 0.5670317594347055, "grad_norm": 51.26625061035156, "learning_rate": 4.801084972732851e-06, "loss": 15.0705, "step": 280700 }, { "epoch": 0.5670519600673893, "grad_norm": 373.2862243652344, "learning_rate": 4.800736183707946e-06, "loss": 13.9215, "step": 280710 }, { "epoch": 0.5670721607000732, "grad_norm": 286.53619384765625, "learning_rate": 4.800387395654229e-06, "loss": 28.1129, "step": 280720 }, { "epoch": 0.567092361332757, "grad_norm": 430.1754455566406, "learning_rate": 4.800038608573398e-06, "loss": 13.2938, "step": 280730 }, { "epoch": 0.5671125619654408, "grad_norm": 303.8763427734375, "learning_rate": 4.799689822467157e-06, "loss": 15.9315, "step": 280740 }, { "epoch": 0.5671327625981246, "grad_norm": 368.7250061035156, "learning_rate": 4.799341037337203e-06, "loss": 23.5152, "step": 280750 }, { "epoch": 0.5671529632308084, "grad_norm": 331.9591369628906, "learning_rate": 4.798992253185233e-06, "loss": 15.6221, "step": 280760 }, { "epoch": 0.5671731638634923, "grad_norm": 137.53221130371094, "learning_rate": 4.798643470012956e-06, "loss": 9.2003, "step": 280770 }, { "epoch": 0.567193364496176, "grad_norm": 623.8389282226562, "learning_rate": 4.798294687822062e-06, "loss": 22.4159, "step": 280780 }, { "epoch": 0.5672135651288598, "grad_norm": 117.64341735839844, "learning_rate": 4.797945906614256e-06, "loss": 34.8272, "step": 280790 }, { "epoch": 0.5672337657615436, "grad_norm": 154.20986938476562, "learning_rate": 4.797597126391238e-06, "loss": 12.4807, "step": 280800 }, { "epoch": 0.5672539663942274, "grad_norm": 180.38404846191406, "learning_rate": 4.797248347154707e-06, "loss": 17.9203, "step": 280810 }, { "epoch": 0.5672741670269112, "grad_norm": 243.37831115722656, "learning_rate": 4.796899568906363e-06, "loss": 12.4638, "step": 280820 }, { "epoch": 0.5672943676595951, "grad_norm": 508.981201171875, "learning_rate": 4.796550791647905e-06, "loss": 23.8726, "step": 280830 }, { "epoch": 0.5673145682922789, "grad_norm": 148.24436950683594, "learning_rate": 4.796202015381035e-06, "loss": 21.203, "step": 280840 }, { "epoch": 0.5673347689249627, "grad_norm": 188.00076293945312, "learning_rate": 4.79585324010745e-06, "loss": 8.0256, "step": 280850 }, { "epoch": 0.5673549695576465, "grad_norm": 277.48779296875, "learning_rate": 4.795504465828853e-06, "loss": 21.021, "step": 280860 }, { "epoch": 0.5673751701903303, "grad_norm": 406.1884765625, "learning_rate": 4.795155692546942e-06, "loss": 22.936, "step": 280870 }, { "epoch": 0.5673953708230142, "grad_norm": 336.57647705078125, "learning_rate": 4.794806920263417e-06, "loss": 29.1364, "step": 280880 }, { "epoch": 0.567415571455698, "grad_norm": 290.390380859375, "learning_rate": 4.794458148979979e-06, "loss": 22.2395, "step": 280890 }, { "epoch": 0.5674357720883818, "grad_norm": 375.95501708984375, "learning_rate": 4.794109378698327e-06, "loss": 15.9913, "step": 280900 }, { "epoch": 0.5674559727210656, "grad_norm": 589.7791137695312, "learning_rate": 4.793760609420161e-06, "loss": 23.373, "step": 280910 }, { "epoch": 0.5674761733537494, "grad_norm": 120.37681579589844, "learning_rate": 4.7934118411471785e-06, "loss": 19.5685, "step": 280920 }, { "epoch": 0.5674963739864333, "grad_norm": 638.9361572265625, "learning_rate": 4.7930630738810855e-06, "loss": 38.6757, "step": 280930 }, { "epoch": 0.5675165746191171, "grad_norm": 335.6771240234375, "learning_rate": 4.792714307623574e-06, "loss": 28.7942, "step": 280940 }, { "epoch": 0.5675367752518009, "grad_norm": 110.70303344726562, "learning_rate": 4.79236554237635e-06, "loss": 15.0205, "step": 280950 }, { "epoch": 0.5675569758844847, "grad_norm": 106.36573028564453, "learning_rate": 4.792016778141111e-06, "loss": 17.518, "step": 280960 }, { "epoch": 0.5675771765171685, "grad_norm": 360.0838623046875, "learning_rate": 4.791668014919557e-06, "loss": 22.2234, "step": 280970 }, { "epoch": 0.5675973771498524, "grad_norm": 338.9099426269531, "learning_rate": 4.791319252713388e-06, "loss": 17.6014, "step": 280980 }, { "epoch": 0.5676175777825362, "grad_norm": 157.9730224609375, "learning_rate": 4.790970491524302e-06, "loss": 16.7947, "step": 280990 }, { "epoch": 0.56763777841522, "grad_norm": 9.623434066772461, "learning_rate": 4.7906217313540035e-06, "loss": 10.2614, "step": 281000 }, { "epoch": 0.5676579790479038, "grad_norm": 60.03096008300781, "learning_rate": 4.790272972204186e-06, "loss": 16.1123, "step": 281010 }, { "epoch": 0.5676781796805876, "grad_norm": 276.0263366699219, "learning_rate": 4.789924214076554e-06, "loss": 27.1024, "step": 281020 }, { "epoch": 0.5676983803132715, "grad_norm": 83.92981719970703, "learning_rate": 4.789575456972807e-06, "loss": 14.2442, "step": 281030 }, { "epoch": 0.5677185809459552, "grad_norm": 386.0709533691406, "learning_rate": 4.789226700894643e-06, "loss": 16.2561, "step": 281040 }, { "epoch": 0.567738781578639, "grad_norm": 168.31434631347656, "learning_rate": 4.788877945843762e-06, "loss": 15.5329, "step": 281050 }, { "epoch": 0.5677589822113228, "grad_norm": 181.3682861328125, "learning_rate": 4.7885291918218656e-06, "loss": 16.0113, "step": 281060 }, { "epoch": 0.5677791828440066, "grad_norm": 152.74374389648438, "learning_rate": 4.788180438830651e-06, "loss": 27.6814, "step": 281070 }, { "epoch": 0.5677993834766905, "grad_norm": 0.0, "learning_rate": 4.787831686871818e-06, "loss": 10.2495, "step": 281080 }, { "epoch": 0.5678195841093743, "grad_norm": 271.98614501953125, "learning_rate": 4.787482935947071e-06, "loss": 17.4034, "step": 281090 }, { "epoch": 0.5678397847420581, "grad_norm": 186.85464477539062, "learning_rate": 4.787134186058103e-06, "loss": 23.2563, "step": 281100 }, { "epoch": 0.5678599853747419, "grad_norm": 331.92999267578125, "learning_rate": 4.7867854372066185e-06, "loss": 19.5225, "step": 281110 }, { "epoch": 0.5678801860074257, "grad_norm": 336.4298095703125, "learning_rate": 4.786436689394317e-06, "loss": 13.6317, "step": 281120 }, { "epoch": 0.5679003866401096, "grad_norm": 351.786865234375, "learning_rate": 4.7860879426228965e-06, "loss": 22.3631, "step": 281130 }, { "epoch": 0.5679205872727934, "grad_norm": 296.818359375, "learning_rate": 4.785739196894056e-06, "loss": 16.5596, "step": 281140 }, { "epoch": 0.5679407879054772, "grad_norm": 158.88919067382812, "learning_rate": 4.785390452209497e-06, "loss": 18.8243, "step": 281150 }, { "epoch": 0.567960988538161, "grad_norm": 34.53234100341797, "learning_rate": 4.785041708570921e-06, "loss": 15.3848, "step": 281160 }, { "epoch": 0.5679811891708448, "grad_norm": 0.0, "learning_rate": 4.784692965980024e-06, "loss": 12.7712, "step": 281170 }, { "epoch": 0.5680013898035287, "grad_norm": 267.7544860839844, "learning_rate": 4.784344224438508e-06, "loss": 19.1841, "step": 281180 }, { "epoch": 0.5680215904362125, "grad_norm": 400.7547302246094, "learning_rate": 4.783995483948072e-06, "loss": 13.6826, "step": 281190 }, { "epoch": 0.5680417910688963, "grad_norm": 452.5321044921875, "learning_rate": 4.783646744510416e-06, "loss": 16.4937, "step": 281200 }, { "epoch": 0.5680619917015801, "grad_norm": 349.8495788574219, "learning_rate": 4.783298006127238e-06, "loss": 21.1118, "step": 281210 }, { "epoch": 0.5680821923342639, "grad_norm": 284.713623046875, "learning_rate": 4.782949268800242e-06, "loss": 18.397, "step": 281220 }, { "epoch": 0.5681023929669478, "grad_norm": 248.22409057617188, "learning_rate": 4.782600532531123e-06, "loss": 15.6041, "step": 281230 }, { "epoch": 0.5681225935996316, "grad_norm": 204.79275512695312, "learning_rate": 4.7822517973215814e-06, "loss": 13.8708, "step": 281240 }, { "epoch": 0.5681427942323154, "grad_norm": 284.60906982421875, "learning_rate": 4.781903063173321e-06, "loss": 17.2046, "step": 281250 }, { "epoch": 0.5681629948649992, "grad_norm": 246.77951049804688, "learning_rate": 4.7815543300880374e-06, "loss": 10.978, "step": 281260 }, { "epoch": 0.568183195497683, "grad_norm": 0.0, "learning_rate": 4.781205598067431e-06, "loss": 14.5805, "step": 281270 }, { "epoch": 0.5682033961303669, "grad_norm": 144.2512664794922, "learning_rate": 4.780856867113202e-06, "loss": 19.6867, "step": 281280 }, { "epoch": 0.5682235967630506, "grad_norm": 276.8296203613281, "learning_rate": 4.780508137227052e-06, "loss": 17.6111, "step": 281290 }, { "epoch": 0.5682437973957344, "grad_norm": 260.9032897949219, "learning_rate": 4.780159408410677e-06, "loss": 22.9931, "step": 281300 }, { "epoch": 0.5682639980284182, "grad_norm": 176.2408905029297, "learning_rate": 4.7798106806657765e-06, "loss": 12.0944, "step": 281310 }, { "epoch": 0.568284198661102, "grad_norm": 3.1943461894989014, "learning_rate": 4.779461953994055e-06, "loss": 13.738, "step": 281320 }, { "epoch": 0.5683043992937858, "grad_norm": 74.25674438476562, "learning_rate": 4.779113228397208e-06, "loss": 18.9764, "step": 281330 }, { "epoch": 0.5683245999264697, "grad_norm": 165.85488891601562, "learning_rate": 4.778764503876937e-06, "loss": 18.4524, "step": 281340 }, { "epoch": 0.5683448005591535, "grad_norm": 282.08087158203125, "learning_rate": 4.778415780434941e-06, "loss": 44.3654, "step": 281350 }, { "epoch": 0.5683650011918373, "grad_norm": 193.23577880859375, "learning_rate": 4.778067058072919e-06, "loss": 18.9738, "step": 281360 }, { "epoch": 0.5683852018245211, "grad_norm": 143.546875, "learning_rate": 4.777718336792571e-06, "loss": 18.6976, "step": 281370 }, { "epoch": 0.568405402457205, "grad_norm": 140.73605346679688, "learning_rate": 4.777369616595599e-06, "loss": 11.768, "step": 281380 }, { "epoch": 0.5684256030898888, "grad_norm": 0.0, "learning_rate": 4.777020897483697e-06, "loss": 11.4709, "step": 281390 }, { "epoch": 0.5684458037225726, "grad_norm": 555.305419921875, "learning_rate": 4.7766721794585704e-06, "loss": 13.0313, "step": 281400 }, { "epoch": 0.5684660043552564, "grad_norm": 252.3140869140625, "learning_rate": 4.776323462521916e-06, "loss": 20.8649, "step": 281410 }, { "epoch": 0.5684862049879402, "grad_norm": 456.9826354980469, "learning_rate": 4.775974746675434e-06, "loss": 17.645, "step": 281420 }, { "epoch": 0.568506405620624, "grad_norm": 129.27235412597656, "learning_rate": 4.7756260319208245e-06, "loss": 19.6462, "step": 281430 }, { "epoch": 0.5685266062533079, "grad_norm": 274.09515380859375, "learning_rate": 4.775277318259784e-06, "loss": 11.9597, "step": 281440 }, { "epoch": 0.5685468068859917, "grad_norm": 116.3635482788086, "learning_rate": 4.774928605694018e-06, "loss": 16.5401, "step": 281450 }, { "epoch": 0.5685670075186755, "grad_norm": 222.1180419921875, "learning_rate": 4.77457989422522e-06, "loss": 17.2528, "step": 281460 }, { "epoch": 0.5685872081513593, "grad_norm": 410.00616455078125, "learning_rate": 4.774231183855093e-06, "loss": 16.3985, "step": 281470 }, { "epoch": 0.5686074087840431, "grad_norm": 412.78009033203125, "learning_rate": 4.773882474585338e-06, "loss": 15.6646, "step": 281480 }, { "epoch": 0.568627609416727, "grad_norm": 204.82957458496094, "learning_rate": 4.77353376641765e-06, "loss": 20.4169, "step": 281490 }, { "epoch": 0.5686478100494108, "grad_norm": 378.14306640625, "learning_rate": 4.7731850593537316e-06, "loss": 13.9975, "step": 281500 }, { "epoch": 0.5686680106820946, "grad_norm": 188.23306274414062, "learning_rate": 4.772836353395283e-06, "loss": 9.4491, "step": 281510 }, { "epoch": 0.5686882113147784, "grad_norm": 391.470458984375, "learning_rate": 4.772487648544e-06, "loss": 20.9408, "step": 281520 }, { "epoch": 0.5687084119474622, "grad_norm": 412.4171447753906, "learning_rate": 4.772138944801585e-06, "loss": 10.7907, "step": 281530 }, { "epoch": 0.5687286125801461, "grad_norm": 134.4298858642578, "learning_rate": 4.77179024216974e-06, "loss": 15.812, "step": 281540 }, { "epoch": 0.5687488132128298, "grad_norm": 325.01995849609375, "learning_rate": 4.771441540650158e-06, "loss": 30.0529, "step": 281550 }, { "epoch": 0.5687690138455136, "grad_norm": 23.66779899597168, "learning_rate": 4.771092840244544e-06, "loss": 8.7128, "step": 281560 }, { "epoch": 0.5687892144781974, "grad_norm": 95.00125122070312, "learning_rate": 4.770744140954596e-06, "loss": 17.5654, "step": 281570 }, { "epoch": 0.5688094151108812, "grad_norm": 0.0, "learning_rate": 4.7703954427820125e-06, "loss": 23.4352, "step": 281580 }, { "epoch": 0.568829615743565, "grad_norm": 387.6609191894531, "learning_rate": 4.770046745728494e-06, "loss": 16.609, "step": 281590 }, { "epoch": 0.5688498163762489, "grad_norm": 750.984375, "learning_rate": 4.769698049795739e-06, "loss": 22.839, "step": 281600 }, { "epoch": 0.5688700170089327, "grad_norm": 453.1768493652344, "learning_rate": 4.76934935498545e-06, "loss": 25.6023, "step": 281610 }, { "epoch": 0.5688902176416165, "grad_norm": 200.67581176757812, "learning_rate": 4.769000661299322e-06, "loss": 25.9899, "step": 281620 }, { "epoch": 0.5689104182743003, "grad_norm": 66.00210571289062, "learning_rate": 4.768651968739057e-06, "loss": 12.4926, "step": 281630 }, { "epoch": 0.5689306189069842, "grad_norm": 270.63055419921875, "learning_rate": 4.768303277306356e-06, "loss": 25.1689, "step": 281640 }, { "epoch": 0.568950819539668, "grad_norm": 124.5906753540039, "learning_rate": 4.767954587002915e-06, "loss": 20.8289, "step": 281650 }, { "epoch": 0.5689710201723518, "grad_norm": 42.47569274902344, "learning_rate": 4.767605897830436e-06, "loss": 19.8543, "step": 281660 }, { "epoch": 0.5689912208050356, "grad_norm": 178.57415771484375, "learning_rate": 4.767257209790618e-06, "loss": 20.4517, "step": 281670 }, { "epoch": 0.5690114214377194, "grad_norm": 362.11431884765625, "learning_rate": 4.76690852288516e-06, "loss": 14.2748, "step": 281680 }, { "epoch": 0.5690316220704033, "grad_norm": 107.62004089355469, "learning_rate": 4.76655983711576e-06, "loss": 17.6286, "step": 281690 }, { "epoch": 0.5690518227030871, "grad_norm": 302.782470703125, "learning_rate": 4.766211152484122e-06, "loss": 10.1809, "step": 281700 }, { "epoch": 0.5690720233357709, "grad_norm": 114.69547271728516, "learning_rate": 4.765862468991939e-06, "loss": 19.7863, "step": 281710 }, { "epoch": 0.5690922239684547, "grad_norm": 278.48455810546875, "learning_rate": 4.765513786640915e-06, "loss": 27.9214, "step": 281720 }, { "epoch": 0.5691124246011385, "grad_norm": 322.5408020019531, "learning_rate": 4.765165105432749e-06, "loss": 21.2255, "step": 281730 }, { "epoch": 0.5691326252338224, "grad_norm": 83.29930877685547, "learning_rate": 4.764816425369141e-06, "loss": 16.943, "step": 281740 }, { "epoch": 0.5691528258665062, "grad_norm": 4.688990116119385, "learning_rate": 4.7644677464517874e-06, "loss": 17.9096, "step": 281750 }, { "epoch": 0.56917302649919, "grad_norm": 351.54833984375, "learning_rate": 4.764119068682389e-06, "loss": 33.9979, "step": 281760 }, { "epoch": 0.5691932271318738, "grad_norm": 197.4849395751953, "learning_rate": 4.7637703920626484e-06, "loss": 28.479, "step": 281770 }, { "epoch": 0.5692134277645576, "grad_norm": 283.5315856933594, "learning_rate": 4.76342171659426e-06, "loss": 17.8286, "step": 281780 }, { "epoch": 0.5692336283972415, "grad_norm": 215.35858154296875, "learning_rate": 4.763073042278925e-06, "loss": 11.6945, "step": 281790 }, { "epoch": 0.5692538290299252, "grad_norm": 160.43252563476562, "learning_rate": 4.762724369118346e-06, "loss": 16.7025, "step": 281800 }, { "epoch": 0.569274029662609, "grad_norm": 99.82978057861328, "learning_rate": 4.762375697114217e-06, "loss": 21.2828, "step": 281810 }, { "epoch": 0.5692942302952928, "grad_norm": 237.71224975585938, "learning_rate": 4.762027026268241e-06, "loss": 29.7128, "step": 281820 }, { "epoch": 0.5693144309279766, "grad_norm": 375.4344177246094, "learning_rate": 4.761678356582117e-06, "loss": 29.9441, "step": 281830 }, { "epoch": 0.5693346315606604, "grad_norm": 270.6103210449219, "learning_rate": 4.761329688057543e-06, "loss": 54.3881, "step": 281840 }, { "epoch": 0.5693548321933443, "grad_norm": 207.4618682861328, "learning_rate": 4.760981020696218e-06, "loss": 16.3416, "step": 281850 }, { "epoch": 0.5693750328260281, "grad_norm": 557.9196166992188, "learning_rate": 4.760632354499846e-06, "loss": 31.2801, "step": 281860 }, { "epoch": 0.5693952334587119, "grad_norm": 321.9739685058594, "learning_rate": 4.760283689470119e-06, "loss": 21.8109, "step": 281870 }, { "epoch": 0.5694154340913957, "grad_norm": 165.37344360351562, "learning_rate": 4.759935025608742e-06, "loss": 15.3162, "step": 281880 }, { "epoch": 0.5694356347240795, "grad_norm": 278.29541015625, "learning_rate": 4.7595863629174125e-06, "loss": 18.9391, "step": 281890 }, { "epoch": 0.5694558353567634, "grad_norm": 243.00848388671875, "learning_rate": 4.759237701397831e-06, "loss": 19.9585, "step": 281900 }, { "epoch": 0.5694760359894472, "grad_norm": 204.01318359375, "learning_rate": 4.758889041051694e-06, "loss": 23.2995, "step": 281910 }, { "epoch": 0.569496236622131, "grad_norm": 0.0, "learning_rate": 4.758540381880702e-06, "loss": 13.9821, "step": 281920 }, { "epoch": 0.5695164372548148, "grad_norm": 0.0, "learning_rate": 4.7581917238865565e-06, "loss": 10.2042, "step": 281930 }, { "epoch": 0.5695366378874986, "grad_norm": 193.84390258789062, "learning_rate": 4.757843067070955e-06, "loss": 19.7266, "step": 281940 }, { "epoch": 0.5695568385201825, "grad_norm": 627.3089599609375, "learning_rate": 4.757494411435597e-06, "loss": 44.1289, "step": 281950 }, { "epoch": 0.5695770391528663, "grad_norm": 299.1866455078125, "learning_rate": 4.757145756982182e-06, "loss": 17.0569, "step": 281960 }, { "epoch": 0.5695972397855501, "grad_norm": 604.5014038085938, "learning_rate": 4.756797103712409e-06, "loss": 15.7375, "step": 281970 }, { "epoch": 0.5696174404182339, "grad_norm": 171.7760467529297, "learning_rate": 4.756448451627977e-06, "loss": 15.0146, "step": 281980 }, { "epoch": 0.5696376410509177, "grad_norm": 363.2405700683594, "learning_rate": 4.7560998007305865e-06, "loss": 8.4294, "step": 281990 }, { "epoch": 0.5696578416836016, "grad_norm": 317.75518798828125, "learning_rate": 4.755751151021934e-06, "loss": 29.3333, "step": 282000 }, { "epoch": 0.5696780423162854, "grad_norm": 250.72210693359375, "learning_rate": 4.755402502503722e-06, "loss": 18.7691, "step": 282010 }, { "epoch": 0.5696982429489692, "grad_norm": 381.3222961425781, "learning_rate": 4.7550538551776495e-06, "loss": 10.3524, "step": 282020 }, { "epoch": 0.569718443581653, "grad_norm": 561.9158325195312, "learning_rate": 4.754705209045414e-06, "loss": 29.2622, "step": 282030 }, { "epoch": 0.5697386442143368, "grad_norm": 358.51934814453125, "learning_rate": 4.754356564108715e-06, "loss": 24.1509, "step": 282040 }, { "epoch": 0.5697588448470207, "grad_norm": 293.2953796386719, "learning_rate": 4.754007920369252e-06, "loss": 20.3478, "step": 282050 }, { "epoch": 0.5697790454797044, "grad_norm": 460.3548278808594, "learning_rate": 4.753659277828727e-06, "loss": 29.5492, "step": 282060 }, { "epoch": 0.5697992461123882, "grad_norm": 412.7047119140625, "learning_rate": 4.7533106364888345e-06, "loss": 14.1107, "step": 282070 }, { "epoch": 0.569819446745072, "grad_norm": 237.012451171875, "learning_rate": 4.752961996351276e-06, "loss": 11.1253, "step": 282080 }, { "epoch": 0.5698396473777558, "grad_norm": 298.9742736816406, "learning_rate": 4.7526133574177525e-06, "loss": 11.3084, "step": 282090 }, { "epoch": 0.5698598480104397, "grad_norm": 654.5994873046875, "learning_rate": 4.752264719689961e-06, "loss": 28.3167, "step": 282100 }, { "epoch": 0.5698800486431235, "grad_norm": 581.427978515625, "learning_rate": 4.7519160831696e-06, "loss": 25.5105, "step": 282110 }, { "epoch": 0.5699002492758073, "grad_norm": 306.7034606933594, "learning_rate": 4.751567447858372e-06, "loss": 19.5433, "step": 282120 }, { "epoch": 0.5699204499084911, "grad_norm": 0.0, "learning_rate": 4.751218813757972e-06, "loss": 20.5803, "step": 282130 }, { "epoch": 0.5699406505411749, "grad_norm": 209.4285125732422, "learning_rate": 4.7508701808701006e-06, "loss": 20.5156, "step": 282140 }, { "epoch": 0.5699608511738588, "grad_norm": 197.5854034423828, "learning_rate": 4.7505215491964615e-06, "loss": 10.6162, "step": 282150 }, { "epoch": 0.5699810518065426, "grad_norm": 111.80238342285156, "learning_rate": 4.750172918738747e-06, "loss": 23.37, "step": 282160 }, { "epoch": 0.5700012524392264, "grad_norm": 189.2079315185547, "learning_rate": 4.7498242894986595e-06, "loss": 21.0665, "step": 282170 }, { "epoch": 0.5700214530719102, "grad_norm": 452.709716796875, "learning_rate": 4.7494756614779e-06, "loss": 41.0544, "step": 282180 }, { "epoch": 0.570041653704594, "grad_norm": 436.0036926269531, "learning_rate": 4.749127034678165e-06, "loss": 19.0603, "step": 282190 }, { "epoch": 0.5700618543372779, "grad_norm": 205.26841735839844, "learning_rate": 4.748778409101153e-06, "loss": 19.7706, "step": 282200 }, { "epoch": 0.5700820549699617, "grad_norm": 405.2639465332031, "learning_rate": 4.748429784748564e-06, "loss": 22.1915, "step": 282210 }, { "epoch": 0.5701022556026455, "grad_norm": 145.3580322265625, "learning_rate": 4.748081161622101e-06, "loss": 9.2403, "step": 282220 }, { "epoch": 0.5701224562353293, "grad_norm": 212.44883728027344, "learning_rate": 4.7477325397234575e-06, "loss": 16.1488, "step": 282230 }, { "epoch": 0.5701426568680131, "grad_norm": 1410.409423828125, "learning_rate": 4.747383919054335e-06, "loss": 30.1615, "step": 282240 }, { "epoch": 0.570162857500697, "grad_norm": 260.4547119140625, "learning_rate": 4.747035299616434e-06, "loss": 15.8391, "step": 282250 }, { "epoch": 0.5701830581333808, "grad_norm": 223.7154541015625, "learning_rate": 4.746686681411451e-06, "loss": 20.0262, "step": 282260 }, { "epoch": 0.5702032587660646, "grad_norm": 426.34991455078125, "learning_rate": 4.746338064441087e-06, "loss": 34.3079, "step": 282270 }, { "epoch": 0.5702234593987484, "grad_norm": 334.94610595703125, "learning_rate": 4.745989448707042e-06, "loss": 22.101, "step": 282280 }, { "epoch": 0.5702436600314322, "grad_norm": 202.57594299316406, "learning_rate": 4.745640834211012e-06, "loss": 10.1708, "step": 282290 }, { "epoch": 0.570263860664116, "grad_norm": 456.0645446777344, "learning_rate": 4.745292220954696e-06, "loss": 25.1691, "step": 282300 }, { "epoch": 0.5702840612967999, "grad_norm": 61.172607421875, "learning_rate": 4.744943608939799e-06, "loss": 21.3836, "step": 282310 }, { "epoch": 0.5703042619294836, "grad_norm": 235.55142211914062, "learning_rate": 4.744594998168012e-06, "loss": 15.3958, "step": 282320 }, { "epoch": 0.5703244625621674, "grad_norm": 321.6095886230469, "learning_rate": 4.744246388641039e-06, "loss": 14.5754, "step": 282330 }, { "epoch": 0.5703446631948512, "grad_norm": 347.0094299316406, "learning_rate": 4.743897780360578e-06, "loss": 11.7727, "step": 282340 }, { "epoch": 0.570364863827535, "grad_norm": 267.7886047363281, "learning_rate": 4.74354917332833e-06, "loss": 16.8885, "step": 282350 }, { "epoch": 0.5703850644602189, "grad_norm": 137.775634765625, "learning_rate": 4.7432005675459905e-06, "loss": 27.9451, "step": 282360 }, { "epoch": 0.5704052650929027, "grad_norm": 156.79405212402344, "learning_rate": 4.74285196301526e-06, "loss": 29.0498, "step": 282370 }, { "epoch": 0.5704254657255865, "grad_norm": 572.094482421875, "learning_rate": 4.742503359737841e-06, "loss": 15.4878, "step": 282380 }, { "epoch": 0.5704456663582703, "grad_norm": 126.8975830078125, "learning_rate": 4.742154757715425e-06, "loss": 20.0502, "step": 282390 }, { "epoch": 0.5704658669909541, "grad_norm": 250.6568145751953, "learning_rate": 4.741806156949718e-06, "loss": 11.3743, "step": 282400 }, { "epoch": 0.570486067623638, "grad_norm": 245.07659912109375, "learning_rate": 4.741457557442416e-06, "loss": 19.5015, "step": 282410 }, { "epoch": 0.5705062682563218, "grad_norm": 353.1564636230469, "learning_rate": 4.7411089591952184e-06, "loss": 37.3215, "step": 282420 }, { "epoch": 0.5705264688890056, "grad_norm": 450.8144226074219, "learning_rate": 4.740760362209824e-06, "loss": 25.9348, "step": 282430 }, { "epoch": 0.5705466695216894, "grad_norm": 390.2644348144531, "learning_rate": 4.740411766487933e-06, "loss": 17.5398, "step": 282440 }, { "epoch": 0.5705668701543732, "grad_norm": 180.58837890625, "learning_rate": 4.740063172031243e-06, "loss": 41.0762, "step": 282450 }, { "epoch": 0.5705870707870571, "grad_norm": 168.8196563720703, "learning_rate": 4.7397145788414525e-06, "loss": 19.6491, "step": 282460 }, { "epoch": 0.5706072714197409, "grad_norm": 338.72857666015625, "learning_rate": 4.739365986920265e-06, "loss": 16.7117, "step": 282470 }, { "epoch": 0.5706274720524247, "grad_norm": 339.6976623535156, "learning_rate": 4.7390173962693724e-06, "loss": 22.247, "step": 282480 }, { "epoch": 0.5706476726851085, "grad_norm": 184.87835693359375, "learning_rate": 4.738668806890479e-06, "loss": 10.3727, "step": 282490 }, { "epoch": 0.5706678733177923, "grad_norm": 262.4919128417969, "learning_rate": 4.738320218785281e-06, "loss": 18.8615, "step": 282500 }, { "epoch": 0.5706880739504762, "grad_norm": 380.7738952636719, "learning_rate": 4.737971631955481e-06, "loss": 21.9385, "step": 282510 }, { "epoch": 0.57070827458316, "grad_norm": 543.34130859375, "learning_rate": 4.737623046402774e-06, "loss": 15.9731, "step": 282520 }, { "epoch": 0.5707284752158438, "grad_norm": 327.68017578125, "learning_rate": 4.737274462128858e-06, "loss": 17.601, "step": 282530 }, { "epoch": 0.5707486758485276, "grad_norm": 282.1386413574219, "learning_rate": 4.736925879135439e-06, "loss": 20.9554, "step": 282540 }, { "epoch": 0.5707688764812114, "grad_norm": 247.07455444335938, "learning_rate": 4.7365772974242075e-06, "loss": 11.4624, "step": 282550 }, { "epoch": 0.5707890771138953, "grad_norm": 868.4129028320312, "learning_rate": 4.736228716996868e-06, "loss": 10.268, "step": 282560 }, { "epoch": 0.570809277746579, "grad_norm": 309.4300842285156, "learning_rate": 4.735880137855118e-06, "loss": 10.5666, "step": 282570 }, { "epoch": 0.5708294783792628, "grad_norm": 302.09619140625, "learning_rate": 4.735531560000656e-06, "loss": 26.6475, "step": 282580 }, { "epoch": 0.5708496790119466, "grad_norm": 343.8674621582031, "learning_rate": 4.735182983435181e-06, "loss": 35.4305, "step": 282590 }, { "epoch": 0.5708698796446304, "grad_norm": 207.9016876220703, "learning_rate": 4.734834408160393e-06, "loss": 26.1212, "step": 282600 }, { "epoch": 0.5708900802773142, "grad_norm": 130.52896118164062, "learning_rate": 4.734485834177987e-06, "loss": 14.3217, "step": 282610 }, { "epoch": 0.5709102809099981, "grad_norm": 273.38018798828125, "learning_rate": 4.734137261489667e-06, "loss": 13.1367, "step": 282620 }, { "epoch": 0.5709304815426819, "grad_norm": 531.1248779296875, "learning_rate": 4.73378869009713e-06, "loss": 27.37, "step": 282630 }, { "epoch": 0.5709506821753657, "grad_norm": 587.421875, "learning_rate": 4.7334401200020745e-06, "loss": 22.8142, "step": 282640 }, { "epoch": 0.5709708828080495, "grad_norm": 245.77606201171875, "learning_rate": 4.733091551206198e-06, "loss": 11.6334, "step": 282650 }, { "epoch": 0.5709910834407333, "grad_norm": 266.7550354003906, "learning_rate": 4.732742983711202e-06, "loss": 22.6452, "step": 282660 }, { "epoch": 0.5710112840734172, "grad_norm": 512.4109497070312, "learning_rate": 4.732394417518785e-06, "loss": 27.6475, "step": 282670 }, { "epoch": 0.571031484706101, "grad_norm": 209.1186065673828, "learning_rate": 4.732045852630644e-06, "loss": 20.6381, "step": 282680 }, { "epoch": 0.5710516853387848, "grad_norm": 215.96141052246094, "learning_rate": 4.731697289048479e-06, "loss": 18.2823, "step": 282690 }, { "epoch": 0.5710718859714686, "grad_norm": 147.80747985839844, "learning_rate": 4.73134872677399e-06, "loss": 19.5149, "step": 282700 }, { "epoch": 0.5710920866041524, "grad_norm": 227.1821746826172, "learning_rate": 4.731000165808874e-06, "loss": 12.7712, "step": 282710 }, { "epoch": 0.5711122872368363, "grad_norm": 495.3968200683594, "learning_rate": 4.73065160615483e-06, "loss": 13.6671, "step": 282720 }, { "epoch": 0.5711324878695201, "grad_norm": 133.0815887451172, "learning_rate": 4.730303047813559e-06, "loss": 24.7884, "step": 282730 }, { "epoch": 0.5711526885022039, "grad_norm": 0.0, "learning_rate": 4.7299544907867576e-06, "loss": 17.4169, "step": 282740 }, { "epoch": 0.5711728891348877, "grad_norm": 293.224365234375, "learning_rate": 4.729605935076123e-06, "loss": 11.4995, "step": 282750 }, { "epoch": 0.5711930897675715, "grad_norm": 379.641845703125, "learning_rate": 4.7292573806833605e-06, "loss": 31.6166, "step": 282760 }, { "epoch": 0.5712132904002554, "grad_norm": 215.66285705566406, "learning_rate": 4.7289088276101616e-06, "loss": 18.2355, "step": 282770 }, { "epoch": 0.5712334910329392, "grad_norm": 175.5919189453125, "learning_rate": 4.728560275858228e-06, "loss": 21.4431, "step": 282780 }, { "epoch": 0.571253691665623, "grad_norm": 155.85536193847656, "learning_rate": 4.72821172542926e-06, "loss": 11.4065, "step": 282790 }, { "epoch": 0.5712738922983068, "grad_norm": 378.5502624511719, "learning_rate": 4.727863176324955e-06, "loss": 12.1892, "step": 282800 }, { "epoch": 0.5712940929309906, "grad_norm": 478.5702209472656, "learning_rate": 4.7275146285470115e-06, "loss": 23.6295, "step": 282810 }, { "epoch": 0.5713142935636745, "grad_norm": 148.85838317871094, "learning_rate": 4.727166082097127e-06, "loss": 14.4342, "step": 282820 }, { "epoch": 0.5713344941963582, "grad_norm": 295.2171936035156, "learning_rate": 4.726817536977006e-06, "loss": 29.6919, "step": 282830 }, { "epoch": 0.571354694829042, "grad_norm": 346.3277587890625, "learning_rate": 4.72646899318834e-06, "loss": 23.1125, "step": 282840 }, { "epoch": 0.5713748954617258, "grad_norm": 322.0348815917969, "learning_rate": 4.7261204507328315e-06, "loss": 21.6867, "step": 282850 }, { "epoch": 0.5713950960944096, "grad_norm": 164.9566192626953, "learning_rate": 4.72577190961218e-06, "loss": 14.6026, "step": 282860 }, { "epoch": 0.5714152967270935, "grad_norm": 252.9683074951172, "learning_rate": 4.725423369828082e-06, "loss": 14.682, "step": 282870 }, { "epoch": 0.5714354973597773, "grad_norm": 159.81973266601562, "learning_rate": 4.725074831382237e-06, "loss": 10.6439, "step": 282880 }, { "epoch": 0.5714556979924611, "grad_norm": 213.868408203125, "learning_rate": 4.724726294276345e-06, "loss": 15.9352, "step": 282890 }, { "epoch": 0.5714758986251449, "grad_norm": 537.5245361328125, "learning_rate": 4.7243777585121034e-06, "loss": 34.1805, "step": 282900 }, { "epoch": 0.5714960992578287, "grad_norm": 172.149658203125, "learning_rate": 4.724029224091209e-06, "loss": 23.3926, "step": 282910 }, { "epoch": 0.5715162998905126, "grad_norm": 164.90869140625, "learning_rate": 4.723680691015366e-06, "loss": 18.519, "step": 282920 }, { "epoch": 0.5715365005231964, "grad_norm": 256.1228942871094, "learning_rate": 4.723332159286267e-06, "loss": 14.3408, "step": 282930 }, { "epoch": 0.5715567011558802, "grad_norm": 183.81439208984375, "learning_rate": 4.722983628905614e-06, "loss": 11.5112, "step": 282940 }, { "epoch": 0.571576901788564, "grad_norm": 163.8199920654297, "learning_rate": 4.722635099875106e-06, "loss": 13.3997, "step": 282950 }, { "epoch": 0.5715971024212478, "grad_norm": 550.0780639648438, "learning_rate": 4.722286572196441e-06, "loss": 25.4576, "step": 282960 }, { "epoch": 0.5716173030539317, "grad_norm": 437.424560546875, "learning_rate": 4.721938045871317e-06, "loss": 11.0804, "step": 282970 }, { "epoch": 0.5716375036866155, "grad_norm": 0.0, "learning_rate": 4.721589520901433e-06, "loss": 15.9092, "step": 282980 }, { "epoch": 0.5716577043192993, "grad_norm": 446.7509460449219, "learning_rate": 4.7212409972884894e-06, "loss": 16.9422, "step": 282990 }, { "epoch": 0.5716779049519831, "grad_norm": 45.321407318115234, "learning_rate": 4.720892475034181e-06, "loss": 9.2414, "step": 283000 }, { "epoch": 0.5716981055846669, "grad_norm": 362.79547119140625, "learning_rate": 4.72054395414021e-06, "loss": 22.7621, "step": 283010 }, { "epoch": 0.5717183062173508, "grad_norm": 83.11360931396484, "learning_rate": 4.720195434608275e-06, "loss": 13.0025, "step": 283020 }, { "epoch": 0.5717385068500346, "grad_norm": 251.7356719970703, "learning_rate": 4.719846916440072e-06, "loss": 18.36, "step": 283030 }, { "epoch": 0.5717587074827184, "grad_norm": 296.3517761230469, "learning_rate": 4.719498399637302e-06, "loss": 14.8585, "step": 283040 }, { "epoch": 0.5717789081154022, "grad_norm": 48.791770935058594, "learning_rate": 4.719149884201664e-06, "loss": 28.4491, "step": 283050 }, { "epoch": 0.571799108748086, "grad_norm": 135.41790771484375, "learning_rate": 4.718801370134853e-06, "loss": 10.6924, "step": 283060 }, { "epoch": 0.5718193093807699, "grad_norm": 182.9420623779297, "learning_rate": 4.718452857438569e-06, "loss": 11.0936, "step": 283070 }, { "epoch": 0.5718395100134536, "grad_norm": 538.301025390625, "learning_rate": 4.7181043461145155e-06, "loss": 27.7757, "step": 283080 }, { "epoch": 0.5718597106461374, "grad_norm": 235.94053649902344, "learning_rate": 4.717755836164384e-06, "loss": 23.9527, "step": 283090 }, { "epoch": 0.5718799112788212, "grad_norm": 171.6576385498047, "learning_rate": 4.717407327589878e-06, "loss": 24.1038, "step": 283100 }, { "epoch": 0.571900111911505, "grad_norm": 360.25347900390625, "learning_rate": 4.717058820392694e-06, "loss": 28.8653, "step": 283110 }, { "epoch": 0.5719203125441888, "grad_norm": 279.0895690917969, "learning_rate": 4.7167103145745315e-06, "loss": 9.966, "step": 283120 }, { "epoch": 0.5719405131768727, "grad_norm": 247.61471557617188, "learning_rate": 4.716361810137088e-06, "loss": 10.0686, "step": 283130 }, { "epoch": 0.5719607138095565, "grad_norm": 621.6709594726562, "learning_rate": 4.716013307082061e-06, "loss": 24.0623, "step": 283140 }, { "epoch": 0.5719809144422403, "grad_norm": 246.0594024658203, "learning_rate": 4.715664805411155e-06, "loss": 13.4547, "step": 283150 }, { "epoch": 0.5720011150749241, "grad_norm": 126.27115631103516, "learning_rate": 4.715316305126059e-06, "loss": 26.1267, "step": 283160 }, { "epoch": 0.572021315707608, "grad_norm": 377.06634521484375, "learning_rate": 4.71496780622848e-06, "loss": 15.3202, "step": 283170 }, { "epoch": 0.5720415163402918, "grad_norm": 268.5616149902344, "learning_rate": 4.714619308720113e-06, "loss": 15.3157, "step": 283180 }, { "epoch": 0.5720617169729756, "grad_norm": 413.0830993652344, "learning_rate": 4.714270812602657e-06, "loss": 19.3924, "step": 283190 }, { "epoch": 0.5720819176056594, "grad_norm": 151.7684326171875, "learning_rate": 4.71392231787781e-06, "loss": 22.7582, "step": 283200 }, { "epoch": 0.5721021182383432, "grad_norm": 129.8608856201172, "learning_rate": 4.713573824547271e-06, "loss": 15.4786, "step": 283210 }, { "epoch": 0.572122318871027, "grad_norm": 136.99969482421875, "learning_rate": 4.7132253326127394e-06, "loss": 30.4862, "step": 283220 }, { "epoch": 0.5721425195037109, "grad_norm": 232.29505920410156, "learning_rate": 4.71287684207591e-06, "loss": 10.394, "step": 283230 }, { "epoch": 0.5721627201363947, "grad_norm": 225.86117553710938, "learning_rate": 4.712528352938487e-06, "loss": 15.4327, "step": 283240 }, { "epoch": 0.5721829207690785, "grad_norm": 354.385009765625, "learning_rate": 4.712179865202164e-06, "loss": 19.6162, "step": 283250 }, { "epoch": 0.5722031214017623, "grad_norm": 643.75341796875, "learning_rate": 4.711831378868643e-06, "loss": 31.6341, "step": 283260 }, { "epoch": 0.5722233220344461, "grad_norm": 238.54129028320312, "learning_rate": 4.71148289393962e-06, "loss": 22.9064, "step": 283270 }, { "epoch": 0.57224352266713, "grad_norm": 58.151554107666016, "learning_rate": 4.711134410416794e-06, "loss": 12.6484, "step": 283280 }, { "epoch": 0.5722637232998138, "grad_norm": 331.8014831542969, "learning_rate": 4.7107859283018635e-06, "loss": 22.9232, "step": 283290 }, { "epoch": 0.5722839239324976, "grad_norm": 170.11611938476562, "learning_rate": 4.710437447596528e-06, "loss": 27.0203, "step": 283300 }, { "epoch": 0.5723041245651814, "grad_norm": 301.7759094238281, "learning_rate": 4.710088968302486e-06, "loss": 12.4411, "step": 283310 }, { "epoch": 0.5723243251978652, "grad_norm": 234.26535034179688, "learning_rate": 4.709740490421435e-06, "loss": 11.4514, "step": 283320 }, { "epoch": 0.5723445258305491, "grad_norm": 227.5169219970703, "learning_rate": 4.709392013955073e-06, "loss": 29.5952, "step": 283330 }, { "epoch": 0.5723647264632328, "grad_norm": 345.9141540527344, "learning_rate": 4.7090435389051e-06, "loss": 17.3499, "step": 283340 }, { "epoch": 0.5723849270959166, "grad_norm": 348.0394287109375, "learning_rate": 4.708695065273213e-06, "loss": 18.8292, "step": 283350 }, { "epoch": 0.5724051277286004, "grad_norm": 708.4619750976562, "learning_rate": 4.708346593061109e-06, "loss": 21.5866, "step": 283360 }, { "epoch": 0.5724253283612842, "grad_norm": 239.94512939453125, "learning_rate": 4.707998122270493e-06, "loss": 19.1516, "step": 283370 }, { "epoch": 0.5724455289939681, "grad_norm": 119.6194076538086, "learning_rate": 4.707649652903054e-06, "loss": 19.7992, "step": 283380 }, { "epoch": 0.5724657296266519, "grad_norm": 26.884153366088867, "learning_rate": 4.707301184960496e-06, "loss": 25.8175, "step": 283390 }, { "epoch": 0.5724859302593357, "grad_norm": 1342.979736328125, "learning_rate": 4.706952718444518e-06, "loss": 25.5486, "step": 283400 }, { "epoch": 0.5725061308920195, "grad_norm": 160.66986083984375, "learning_rate": 4.706604253356817e-06, "loss": 18.0339, "step": 283410 }, { "epoch": 0.5725263315247033, "grad_norm": 558.6371459960938, "learning_rate": 4.70625578969909e-06, "loss": 28.4605, "step": 283420 }, { "epoch": 0.5725465321573872, "grad_norm": 138.16136169433594, "learning_rate": 4.705907327473036e-06, "loss": 20.675, "step": 283430 }, { "epoch": 0.572566732790071, "grad_norm": 282.0671691894531, "learning_rate": 4.705558866680357e-06, "loss": 47.4976, "step": 283440 }, { "epoch": 0.5725869334227548, "grad_norm": 361.6696472167969, "learning_rate": 4.705210407322746e-06, "loss": 26.4436, "step": 283450 }, { "epoch": 0.5726071340554386, "grad_norm": 575.1261596679688, "learning_rate": 4.704861949401904e-06, "loss": 22.5024, "step": 283460 }, { "epoch": 0.5726273346881224, "grad_norm": 265.9471740722656, "learning_rate": 4.70451349291953e-06, "loss": 23.8315, "step": 283470 }, { "epoch": 0.5726475353208063, "grad_norm": 24.00396728515625, "learning_rate": 4.704165037877321e-06, "loss": 11.0593, "step": 283480 }, { "epoch": 0.5726677359534901, "grad_norm": 208.60281372070312, "learning_rate": 4.703816584276975e-06, "loss": 15.7267, "step": 283490 }, { "epoch": 0.5726879365861739, "grad_norm": 207.38668823242188, "learning_rate": 4.703468132120193e-06, "loss": 26.3088, "step": 283500 }, { "epoch": 0.5727081372188577, "grad_norm": 306.1951904296875, "learning_rate": 4.70311968140867e-06, "loss": 21.4278, "step": 283510 }, { "epoch": 0.5727283378515415, "grad_norm": 257.7835388183594, "learning_rate": 4.702771232144104e-06, "loss": 23.4705, "step": 283520 }, { "epoch": 0.5727485384842254, "grad_norm": 222.4919891357422, "learning_rate": 4.702422784328199e-06, "loss": 17.6934, "step": 283530 }, { "epoch": 0.5727687391169092, "grad_norm": 513.1073608398438, "learning_rate": 4.702074337962645e-06, "loss": 29.089, "step": 283540 }, { "epoch": 0.572788939749593, "grad_norm": 47.4271125793457, "learning_rate": 4.7017258930491474e-06, "loss": 20.1649, "step": 283550 }, { "epoch": 0.5728091403822768, "grad_norm": 225.2616424560547, "learning_rate": 4.7013774495894e-06, "loss": 21.504, "step": 283560 }, { "epoch": 0.5728293410149606, "grad_norm": 582.306640625, "learning_rate": 4.7010290075851035e-06, "loss": 20.1862, "step": 283570 }, { "epoch": 0.5728495416476445, "grad_norm": 238.623046875, "learning_rate": 4.700680567037956e-06, "loss": 15.5726, "step": 283580 }, { "epoch": 0.5728697422803282, "grad_norm": 710.7210083007812, "learning_rate": 4.7003321279496526e-06, "loss": 7.4934, "step": 283590 }, { "epoch": 0.572889942913012, "grad_norm": 484.5224914550781, "learning_rate": 4.699983690321898e-06, "loss": 31.0483, "step": 283600 }, { "epoch": 0.5729101435456958, "grad_norm": 86.22411346435547, "learning_rate": 4.699635254156383e-06, "loss": 14.144, "step": 283610 }, { "epoch": 0.5729303441783796, "grad_norm": 385.9993591308594, "learning_rate": 4.69928681945481e-06, "loss": 23.0354, "step": 283620 }, { "epoch": 0.5729505448110634, "grad_norm": 230.2264862060547, "learning_rate": 4.6989383862188785e-06, "loss": 25.6004, "step": 283630 }, { "epoch": 0.5729707454437473, "grad_norm": 61.80024719238281, "learning_rate": 4.6985899544502835e-06, "loss": 14.8187, "step": 283640 }, { "epoch": 0.5729909460764311, "grad_norm": 332.0389404296875, "learning_rate": 4.698241524150724e-06, "loss": 24.9778, "step": 283650 }, { "epoch": 0.5730111467091149, "grad_norm": 612.7693481445312, "learning_rate": 4.6978930953219e-06, "loss": 26.2727, "step": 283660 }, { "epoch": 0.5730313473417987, "grad_norm": 402.0921630859375, "learning_rate": 4.697544667965507e-06, "loss": 24.292, "step": 283670 }, { "epoch": 0.5730515479744825, "grad_norm": 268.1552429199219, "learning_rate": 4.697196242083245e-06, "loss": 22.3334, "step": 283680 }, { "epoch": 0.5730717486071664, "grad_norm": 199.22511291503906, "learning_rate": 4.696847817676814e-06, "loss": 11.0471, "step": 283690 }, { "epoch": 0.5730919492398502, "grad_norm": 347.20867919921875, "learning_rate": 4.696499394747906e-06, "loss": 14.3459, "step": 283700 }, { "epoch": 0.573112149872534, "grad_norm": 162.71534729003906, "learning_rate": 4.696150973298225e-06, "loss": 15.9401, "step": 283710 }, { "epoch": 0.5731323505052178, "grad_norm": 376.11871337890625, "learning_rate": 4.695802553329467e-06, "loss": 25.088, "step": 283720 }, { "epoch": 0.5731525511379016, "grad_norm": 246.52537536621094, "learning_rate": 4.695454134843332e-06, "loss": 12.4076, "step": 283730 }, { "epoch": 0.5731727517705855, "grad_norm": 251.7625274658203, "learning_rate": 4.695105717841516e-06, "loss": 19.4473, "step": 283740 }, { "epoch": 0.5731929524032693, "grad_norm": 191.14413452148438, "learning_rate": 4.694757302325715e-06, "loss": 16.7442, "step": 283750 }, { "epoch": 0.5732131530359531, "grad_norm": 150.49366760253906, "learning_rate": 4.694408888297635e-06, "loss": 13.5001, "step": 283760 }, { "epoch": 0.5732333536686369, "grad_norm": 846.99609375, "learning_rate": 4.694060475758964e-06, "loss": 26.1988, "step": 283770 }, { "epoch": 0.5732535543013207, "grad_norm": 74.78963470458984, "learning_rate": 4.693712064711408e-06, "loss": 12.7691, "step": 283780 }, { "epoch": 0.5732737549340046, "grad_norm": 179.54226684570312, "learning_rate": 4.693363655156662e-06, "loss": 11.3239, "step": 283790 }, { "epoch": 0.5732939555666884, "grad_norm": 139.59075927734375, "learning_rate": 4.693015247096423e-06, "loss": 9.2394, "step": 283800 }, { "epoch": 0.5733141561993722, "grad_norm": 283.5695495605469, "learning_rate": 4.6926668405323915e-06, "loss": 17.2528, "step": 283810 }, { "epoch": 0.573334356832056, "grad_norm": 402.9572448730469, "learning_rate": 4.692318435466265e-06, "loss": 22.2303, "step": 283820 }, { "epoch": 0.5733545574647398, "grad_norm": 170.88238525390625, "learning_rate": 4.691970031899741e-06, "loss": 20.0895, "step": 283830 }, { "epoch": 0.5733747580974237, "grad_norm": 174.8932342529297, "learning_rate": 4.691621629834516e-06, "loss": 11.6274, "step": 283840 }, { "epoch": 0.5733949587301074, "grad_norm": 194.92312622070312, "learning_rate": 4.691273229272291e-06, "loss": 21.0296, "step": 283850 }, { "epoch": 0.5734151593627912, "grad_norm": 216.89234924316406, "learning_rate": 4.6909248302147634e-06, "loss": 19.8095, "step": 283860 }, { "epoch": 0.573435359995475, "grad_norm": 162.0137939453125, "learning_rate": 4.69057643266363e-06, "loss": 12.3856, "step": 283870 }, { "epoch": 0.5734555606281588, "grad_norm": 455.48199462890625, "learning_rate": 4.69022803662059e-06, "loss": 28.8267, "step": 283880 }, { "epoch": 0.5734757612608427, "grad_norm": 356.20379638671875, "learning_rate": 4.689879642087341e-06, "loss": 18.0121, "step": 283890 }, { "epoch": 0.5734959618935265, "grad_norm": 81.97564697265625, "learning_rate": 4.689531249065581e-06, "loss": 17.0858, "step": 283900 }, { "epoch": 0.5735161625262103, "grad_norm": 210.98526000976562, "learning_rate": 4.689182857557006e-06, "loss": 16.671, "step": 283910 }, { "epoch": 0.5735363631588941, "grad_norm": 335.4853210449219, "learning_rate": 4.688834467563318e-06, "loss": 18.6289, "step": 283920 }, { "epoch": 0.5735565637915779, "grad_norm": 472.8501892089844, "learning_rate": 4.688486079086213e-06, "loss": 14.4347, "step": 283930 }, { "epoch": 0.5735767644242618, "grad_norm": 59.3341064453125, "learning_rate": 4.688137692127389e-06, "loss": 15.3719, "step": 283940 }, { "epoch": 0.5735969650569456, "grad_norm": 175.0448760986328, "learning_rate": 4.687789306688544e-06, "loss": 19.0444, "step": 283950 }, { "epoch": 0.5736171656896294, "grad_norm": 362.24737548828125, "learning_rate": 4.687440922771376e-06, "loss": 11.0444, "step": 283960 }, { "epoch": 0.5736373663223132, "grad_norm": 220.01011657714844, "learning_rate": 4.687092540377583e-06, "loss": 20.8061, "step": 283970 }, { "epoch": 0.573657566954997, "grad_norm": 199.77923583984375, "learning_rate": 4.686744159508864e-06, "loss": 9.4997, "step": 283980 }, { "epoch": 0.5736777675876809, "grad_norm": 219.7061767578125, "learning_rate": 4.686395780166914e-06, "loss": 15.3783, "step": 283990 }, { "epoch": 0.5736979682203647, "grad_norm": 338.3221130371094, "learning_rate": 4.686047402353433e-06, "loss": 20.6865, "step": 284000 }, { "epoch": 0.5737181688530485, "grad_norm": 279.2441711425781, "learning_rate": 4.68569902607012e-06, "loss": 10.7901, "step": 284010 }, { "epoch": 0.5737383694857323, "grad_norm": 330.756103515625, "learning_rate": 4.685350651318672e-06, "loss": 26.2455, "step": 284020 }, { "epoch": 0.5737585701184161, "grad_norm": 16.19432830810547, "learning_rate": 4.685002278100787e-06, "loss": 20.2577, "step": 284030 }, { "epoch": 0.5737787707511, "grad_norm": 344.1307067871094, "learning_rate": 4.68465390641816e-06, "loss": 19.3012, "step": 284040 }, { "epoch": 0.5737989713837838, "grad_norm": 354.1252746582031, "learning_rate": 4.684305536272496e-06, "loss": 16.9904, "step": 284050 }, { "epoch": 0.5738191720164676, "grad_norm": 284.5281066894531, "learning_rate": 4.683957167665485e-06, "loss": 33.5291, "step": 284060 }, { "epoch": 0.5738393726491514, "grad_norm": 631.17724609375, "learning_rate": 4.6836088005988295e-06, "loss": 23.4246, "step": 284070 }, { "epoch": 0.5738595732818352, "grad_norm": 77.33037567138672, "learning_rate": 4.6832604350742275e-06, "loss": 22.7252, "step": 284080 }, { "epoch": 0.573879773914519, "grad_norm": 674.0731201171875, "learning_rate": 4.682912071093374e-06, "loss": 27.5394, "step": 284090 }, { "epoch": 0.5738999745472029, "grad_norm": 203.76834106445312, "learning_rate": 4.68256370865797e-06, "loss": 11.765, "step": 284100 }, { "epoch": 0.5739201751798866, "grad_norm": 373.0772399902344, "learning_rate": 4.682215347769712e-06, "loss": 24.4991, "step": 284110 }, { "epoch": 0.5739403758125704, "grad_norm": 506.02850341796875, "learning_rate": 4.681866988430297e-06, "loss": 19.8224, "step": 284120 }, { "epoch": 0.5739605764452542, "grad_norm": 205.22738647460938, "learning_rate": 4.6815186306414225e-06, "loss": 14.0659, "step": 284130 }, { "epoch": 0.573980777077938, "grad_norm": 597.6465454101562, "learning_rate": 4.681170274404791e-06, "loss": 23.5036, "step": 284140 }, { "epoch": 0.5740009777106219, "grad_norm": 284.9912109375, "learning_rate": 4.680821919722094e-06, "loss": 11.9414, "step": 284150 }, { "epoch": 0.5740211783433057, "grad_norm": 372.0305480957031, "learning_rate": 4.6804735665950325e-06, "loss": 25.5364, "step": 284160 }, { "epoch": 0.5740413789759895, "grad_norm": 324.0469055175781, "learning_rate": 4.680125215025304e-06, "loss": 17.3912, "step": 284170 }, { "epoch": 0.5740615796086733, "grad_norm": 534.034423828125, "learning_rate": 4.679776865014609e-06, "loss": 41.0064, "step": 284180 }, { "epoch": 0.5740817802413571, "grad_norm": 558.7857666015625, "learning_rate": 4.67942851656464e-06, "loss": 26.8843, "step": 284190 }, { "epoch": 0.574101980874041, "grad_norm": 197.9285430908203, "learning_rate": 4.679080169677097e-06, "loss": 7.9107, "step": 284200 }, { "epoch": 0.5741221815067248, "grad_norm": 89.56254577636719, "learning_rate": 4.678731824353682e-06, "loss": 25.0952, "step": 284210 }, { "epoch": 0.5741423821394086, "grad_norm": 276.2484130859375, "learning_rate": 4.678383480596085e-06, "loss": 16.8667, "step": 284220 }, { "epoch": 0.5741625827720924, "grad_norm": 475.7458190917969, "learning_rate": 4.678035138406009e-06, "loss": 46.1301, "step": 284230 }, { "epoch": 0.5741827834047762, "grad_norm": 346.0648193359375, "learning_rate": 4.677686797785153e-06, "loss": 18.4673, "step": 284240 }, { "epoch": 0.5742029840374601, "grad_norm": 276.5349426269531, "learning_rate": 4.677338458735211e-06, "loss": 36.4956, "step": 284250 }, { "epoch": 0.5742231846701439, "grad_norm": 225.17733764648438, "learning_rate": 4.676990121257882e-06, "loss": 18.4105, "step": 284260 }, { "epoch": 0.5742433853028277, "grad_norm": 150.3558349609375, "learning_rate": 4.676641785354865e-06, "loss": 15.6687, "step": 284270 }, { "epoch": 0.5742635859355115, "grad_norm": 578.6507568359375, "learning_rate": 4.676293451027855e-06, "loss": 15.8957, "step": 284280 }, { "epoch": 0.5742837865681953, "grad_norm": 177.9859161376953, "learning_rate": 4.675945118278552e-06, "loss": 23.0936, "step": 284290 }, { "epoch": 0.5743039872008792, "grad_norm": 101.79006958007812, "learning_rate": 4.675596787108652e-06, "loss": 13.5912, "step": 284300 }, { "epoch": 0.574324187833563, "grad_norm": 320.91107177734375, "learning_rate": 4.675248457519857e-06, "loss": 27.9563, "step": 284310 }, { "epoch": 0.5743443884662468, "grad_norm": 223.50698852539062, "learning_rate": 4.67490012951386e-06, "loss": 15.5595, "step": 284320 }, { "epoch": 0.5743645890989306, "grad_norm": 400.26800537109375, "learning_rate": 4.6745518030923595e-06, "loss": 18.7886, "step": 284330 }, { "epoch": 0.5743847897316144, "grad_norm": 184.7605438232422, "learning_rate": 4.674203478257055e-06, "loss": 20.0908, "step": 284340 }, { "epoch": 0.5744049903642983, "grad_norm": 179.9412384033203, "learning_rate": 4.673855155009644e-06, "loss": 18.4418, "step": 284350 }, { "epoch": 0.574425190996982, "grad_norm": 188.69569396972656, "learning_rate": 4.673506833351821e-06, "loss": 22.0967, "step": 284360 }, { "epoch": 0.5744453916296658, "grad_norm": 636.4642333984375, "learning_rate": 4.673158513285289e-06, "loss": 22.2627, "step": 284370 }, { "epoch": 0.5744655922623496, "grad_norm": 197.29751586914062, "learning_rate": 4.67281019481174e-06, "loss": 19.3154, "step": 284380 }, { "epoch": 0.5744857928950334, "grad_norm": 257.40618896484375, "learning_rate": 4.672461877932877e-06, "loss": 13.7814, "step": 284390 }, { "epoch": 0.5745059935277173, "grad_norm": 83.30047607421875, "learning_rate": 4.672113562650394e-06, "loss": 8.9339, "step": 284400 }, { "epoch": 0.5745261941604011, "grad_norm": 308.0887145996094, "learning_rate": 4.6717652489659894e-06, "loss": 19.2981, "step": 284410 }, { "epoch": 0.5745463947930849, "grad_norm": 870.3465576171875, "learning_rate": 4.671416936881361e-06, "loss": 20.1087, "step": 284420 }, { "epoch": 0.5745665954257687, "grad_norm": 99.390625, "learning_rate": 4.671068626398208e-06, "loss": 32.1828, "step": 284430 }, { "epoch": 0.5745867960584525, "grad_norm": 253.44515991210938, "learning_rate": 4.670720317518226e-06, "loss": 6.9071, "step": 284440 }, { "epoch": 0.5746069966911364, "grad_norm": 5.025718688964844, "learning_rate": 4.670372010243111e-06, "loss": 27.2693, "step": 284450 }, { "epoch": 0.5746271973238202, "grad_norm": 515.9242553710938, "learning_rate": 4.670023704574564e-06, "loss": 13.4875, "step": 284460 }, { "epoch": 0.574647397956504, "grad_norm": 92.65816497802734, "learning_rate": 4.669675400514283e-06, "loss": 24.181, "step": 284470 }, { "epoch": 0.5746675985891878, "grad_norm": 32.45513916015625, "learning_rate": 4.669327098063963e-06, "loss": 18.0267, "step": 284480 }, { "epoch": 0.5746877992218716, "grad_norm": 229.78411865234375, "learning_rate": 4.668978797225303e-06, "loss": 21.9848, "step": 284490 }, { "epoch": 0.5747079998545555, "grad_norm": 579.9200439453125, "learning_rate": 4.668630498000001e-06, "loss": 42.9849, "step": 284500 }, { "epoch": 0.5747282004872393, "grad_norm": 52.02117156982422, "learning_rate": 4.668282200389752e-06, "loss": 17.5985, "step": 284510 }, { "epoch": 0.5747484011199231, "grad_norm": 17.301328659057617, "learning_rate": 4.667933904396255e-06, "loss": 20.2721, "step": 284520 }, { "epoch": 0.5747686017526069, "grad_norm": 216.076171875, "learning_rate": 4.667585610021211e-06, "loss": 13.8389, "step": 284530 }, { "epoch": 0.5747888023852907, "grad_norm": 156.6760711669922, "learning_rate": 4.667237317266311e-06, "loss": 25.52, "step": 284540 }, { "epoch": 0.5748090030179746, "grad_norm": 841.9479370117188, "learning_rate": 4.666889026133257e-06, "loss": 32.1658, "step": 284550 }, { "epoch": 0.5748292036506584, "grad_norm": 407.2962646484375, "learning_rate": 4.666540736623746e-06, "loss": 19.8517, "step": 284560 }, { "epoch": 0.5748494042833422, "grad_norm": 257.7809753417969, "learning_rate": 4.666192448739475e-06, "loss": 15.3499, "step": 284570 }, { "epoch": 0.574869604916026, "grad_norm": 279.848876953125, "learning_rate": 4.665844162482141e-06, "loss": 10.7019, "step": 284580 }, { "epoch": 0.5748898055487098, "grad_norm": 0.0, "learning_rate": 4.665495877853443e-06, "loss": 19.2711, "step": 284590 }, { "epoch": 0.5749100061813937, "grad_norm": 405.44744873046875, "learning_rate": 4.6651475948550765e-06, "loss": 27.6279, "step": 284600 }, { "epoch": 0.5749302068140775, "grad_norm": 237.46156311035156, "learning_rate": 4.66479931348874e-06, "loss": 14.5751, "step": 284610 }, { "epoch": 0.5749504074467612, "grad_norm": 509.83990478515625, "learning_rate": 4.664451033756131e-06, "loss": 12.7964, "step": 284620 }, { "epoch": 0.574970608079445, "grad_norm": 817.7116088867188, "learning_rate": 4.664102755658948e-06, "loss": 23.412, "step": 284630 }, { "epoch": 0.5749908087121288, "grad_norm": 215.30523681640625, "learning_rate": 4.663754479198887e-06, "loss": 13.0564, "step": 284640 }, { "epoch": 0.5750110093448126, "grad_norm": 344.70672607421875, "learning_rate": 4.663406204377645e-06, "loss": 14.765, "step": 284650 }, { "epoch": 0.5750312099774965, "grad_norm": 354.31005859375, "learning_rate": 4.663057931196922e-06, "loss": 27.0633, "step": 284660 }, { "epoch": 0.5750514106101803, "grad_norm": 457.37939453125, "learning_rate": 4.662709659658411e-06, "loss": 21.4511, "step": 284670 }, { "epoch": 0.5750716112428641, "grad_norm": 192.31704711914062, "learning_rate": 4.6623613897638146e-06, "loss": 27.7717, "step": 284680 }, { "epoch": 0.5750918118755479, "grad_norm": 6.149340629577637, "learning_rate": 4.6620131215148275e-06, "loss": 12.694, "step": 284690 }, { "epoch": 0.5751120125082317, "grad_norm": 231.5117645263672, "learning_rate": 4.661664854913147e-06, "loss": 14.5019, "step": 284700 }, { "epoch": 0.5751322131409156, "grad_norm": 331.53680419921875, "learning_rate": 4.661316589960471e-06, "loss": 8.2895, "step": 284710 }, { "epoch": 0.5751524137735994, "grad_norm": 254.59254455566406, "learning_rate": 4.660968326658497e-06, "loss": 16.8034, "step": 284720 }, { "epoch": 0.5751726144062832, "grad_norm": 38.37737274169922, "learning_rate": 4.660620065008923e-06, "loss": 20.5245, "step": 284730 }, { "epoch": 0.575192815038967, "grad_norm": 429.5277099609375, "learning_rate": 4.6602718050134435e-06, "loss": 21.905, "step": 284740 }, { "epoch": 0.5752130156716508, "grad_norm": 129.74261474609375, "learning_rate": 4.659923546673761e-06, "loss": 37.4429, "step": 284750 }, { "epoch": 0.5752332163043347, "grad_norm": 107.42794036865234, "learning_rate": 4.659575289991567e-06, "loss": 19.3007, "step": 284760 }, { "epoch": 0.5752534169370185, "grad_norm": 97.8284912109375, "learning_rate": 4.659227034968563e-06, "loss": 28.0028, "step": 284770 }, { "epoch": 0.5752736175697023, "grad_norm": 264.2268371582031, "learning_rate": 4.658878781606445e-06, "loss": 13.4005, "step": 284780 }, { "epoch": 0.5752938182023861, "grad_norm": 310.3645935058594, "learning_rate": 4.658530529906911e-06, "loss": 25.6673, "step": 284790 }, { "epoch": 0.5753140188350699, "grad_norm": 358.5420227050781, "learning_rate": 4.658182279871657e-06, "loss": 13.7287, "step": 284800 }, { "epoch": 0.5753342194677538, "grad_norm": 175.28829956054688, "learning_rate": 4.65783403150238e-06, "loss": 15.8071, "step": 284810 }, { "epoch": 0.5753544201004376, "grad_norm": 99.36177062988281, "learning_rate": 4.657485784800782e-06, "loss": 14.267, "step": 284820 }, { "epoch": 0.5753746207331214, "grad_norm": 104.55500793457031, "learning_rate": 4.657137539768553e-06, "loss": 24.8641, "step": 284830 }, { "epoch": 0.5753948213658052, "grad_norm": 338.2184753417969, "learning_rate": 4.656789296407396e-06, "loss": 14.832, "step": 284840 }, { "epoch": 0.575415021998489, "grad_norm": 59.25962448120117, "learning_rate": 4.656441054719007e-06, "loss": 16.9345, "step": 284850 }, { "epoch": 0.5754352226311729, "grad_norm": 254.4717254638672, "learning_rate": 4.656092814705082e-06, "loss": 21.174, "step": 284860 }, { "epoch": 0.5754554232638566, "grad_norm": 222.71438598632812, "learning_rate": 4.655744576367318e-06, "loss": 10.6707, "step": 284870 }, { "epoch": 0.5754756238965404, "grad_norm": 158.31739807128906, "learning_rate": 4.655396339707414e-06, "loss": 7.2377, "step": 284880 }, { "epoch": 0.5754958245292242, "grad_norm": 131.37838745117188, "learning_rate": 4.655048104727066e-06, "loss": 6.1105, "step": 284890 }, { "epoch": 0.575516025161908, "grad_norm": 488.2782287597656, "learning_rate": 4.654699871427972e-06, "loss": 36.6512, "step": 284900 }, { "epoch": 0.5755362257945918, "grad_norm": 264.2821044921875, "learning_rate": 4.654351639811828e-06, "loss": 17.7087, "step": 284910 }, { "epoch": 0.5755564264272757, "grad_norm": 348.5777893066406, "learning_rate": 4.654003409880333e-06, "loss": 13.9554, "step": 284920 }, { "epoch": 0.5755766270599595, "grad_norm": 24.661806106567383, "learning_rate": 4.653655181635184e-06, "loss": 17.8823, "step": 284930 }, { "epoch": 0.5755968276926433, "grad_norm": 311.37384033203125, "learning_rate": 4.653306955078077e-06, "loss": 20.7192, "step": 284940 }, { "epoch": 0.5756170283253271, "grad_norm": 245.76300048828125, "learning_rate": 4.652958730210711e-06, "loss": 11.7026, "step": 284950 }, { "epoch": 0.575637228958011, "grad_norm": 267.60345458984375, "learning_rate": 4.65261050703478e-06, "loss": 11.098, "step": 284960 }, { "epoch": 0.5756574295906948, "grad_norm": 125.56407165527344, "learning_rate": 4.652262285551983e-06, "loss": 16.8608, "step": 284970 }, { "epoch": 0.5756776302233786, "grad_norm": 65.43804168701172, "learning_rate": 4.651914065764021e-06, "loss": 12.0831, "step": 284980 }, { "epoch": 0.5756978308560624, "grad_norm": 130.48143005371094, "learning_rate": 4.651565847672584e-06, "loss": 10.0159, "step": 284990 }, { "epoch": 0.5757180314887462, "grad_norm": 326.3272705078125, "learning_rate": 4.651217631279374e-06, "loss": 20.6109, "step": 285000 }, { "epoch": 0.57573823212143, "grad_norm": 477.07147216796875, "learning_rate": 4.650869416586088e-06, "loss": 13.1276, "step": 285010 }, { "epoch": 0.5757584327541139, "grad_norm": 293.3811950683594, "learning_rate": 4.650521203594421e-06, "loss": 11.8014, "step": 285020 }, { "epoch": 0.5757786333867977, "grad_norm": 64.406005859375, "learning_rate": 4.6501729923060705e-06, "loss": 19.1676, "step": 285030 }, { "epoch": 0.5757988340194815, "grad_norm": 235.46177673339844, "learning_rate": 4.649824782722737e-06, "loss": 12.507, "step": 285040 }, { "epoch": 0.5758190346521653, "grad_norm": 420.045166015625, "learning_rate": 4.649476574846113e-06, "loss": 18.2173, "step": 285050 }, { "epoch": 0.5758392352848491, "grad_norm": 421.7812194824219, "learning_rate": 4.649128368677896e-06, "loss": 29.4577, "step": 285060 }, { "epoch": 0.575859435917533, "grad_norm": 14.23394775390625, "learning_rate": 4.648780164219787e-06, "loss": 33.0511, "step": 285070 }, { "epoch": 0.5758796365502168, "grad_norm": 608.6332397460938, "learning_rate": 4.648431961473482e-06, "loss": 17.7862, "step": 285080 }, { "epoch": 0.5758998371829006, "grad_norm": 172.0471649169922, "learning_rate": 4.648083760440676e-06, "loss": 9.3484, "step": 285090 }, { "epoch": 0.5759200378155844, "grad_norm": 408.4461669921875, "learning_rate": 4.6477355611230655e-06, "loss": 21.2969, "step": 285100 }, { "epoch": 0.5759402384482682, "grad_norm": 163.0201416015625, "learning_rate": 4.6473873635223514e-06, "loss": 28.2878, "step": 285110 }, { "epoch": 0.5759604390809521, "grad_norm": 8.549009323120117, "learning_rate": 4.647039167640227e-06, "loss": 21.4488, "step": 285120 }, { "epoch": 0.5759806397136358, "grad_norm": 252.30709838867188, "learning_rate": 4.646690973478391e-06, "loss": 17.4802, "step": 285130 }, { "epoch": 0.5760008403463196, "grad_norm": 123.72653198242188, "learning_rate": 4.6463427810385425e-06, "loss": 11.3209, "step": 285140 }, { "epoch": 0.5760210409790034, "grad_norm": 141.37142944335938, "learning_rate": 4.645994590322373e-06, "loss": 30.1512, "step": 285150 }, { "epoch": 0.5760412416116872, "grad_norm": 8.125244140625, "learning_rate": 4.645646401331585e-06, "loss": 9.9319, "step": 285160 }, { "epoch": 0.5760614422443711, "grad_norm": 137.04541015625, "learning_rate": 4.6452982140678735e-06, "loss": 16.17, "step": 285170 }, { "epoch": 0.5760816428770549, "grad_norm": 475.18157958984375, "learning_rate": 4.644950028532935e-06, "loss": 31.5921, "step": 285180 }, { "epoch": 0.5761018435097387, "grad_norm": 485.63311767578125, "learning_rate": 4.644601844728467e-06, "loss": 21.8617, "step": 285190 }, { "epoch": 0.5761220441424225, "grad_norm": 368.98724365234375, "learning_rate": 4.644253662656167e-06, "loss": 24.5891, "step": 285200 }, { "epoch": 0.5761422447751063, "grad_norm": 218.46810913085938, "learning_rate": 4.643905482317731e-06, "loss": 13.6063, "step": 285210 }, { "epoch": 0.5761624454077902, "grad_norm": 160.7599639892578, "learning_rate": 4.643557303714855e-06, "loss": 15.6272, "step": 285220 }, { "epoch": 0.576182646040474, "grad_norm": 221.4073486328125, "learning_rate": 4.643209126849239e-06, "loss": 19.0862, "step": 285230 }, { "epoch": 0.5762028466731578, "grad_norm": 285.8033142089844, "learning_rate": 4.64286095172258e-06, "loss": 23.5187, "step": 285240 }, { "epoch": 0.5762230473058416, "grad_norm": 1169.0926513671875, "learning_rate": 4.642512778336571e-06, "loss": 31.2371, "step": 285250 }, { "epoch": 0.5762432479385254, "grad_norm": 109.01375579833984, "learning_rate": 4.642164606692912e-06, "loss": 17.6272, "step": 285260 }, { "epoch": 0.5762634485712093, "grad_norm": 158.94308471679688, "learning_rate": 4.641816436793301e-06, "loss": 7.0007, "step": 285270 }, { "epoch": 0.5762836492038931, "grad_norm": 439.58538818359375, "learning_rate": 4.64146826863943e-06, "loss": 36.7868, "step": 285280 }, { "epoch": 0.5763038498365769, "grad_norm": 631.0795288085938, "learning_rate": 4.641120102233001e-06, "loss": 19.3026, "step": 285290 }, { "epoch": 0.5763240504692607, "grad_norm": 0.0, "learning_rate": 4.6407719375757095e-06, "loss": 11.3203, "step": 285300 }, { "epoch": 0.5763442511019445, "grad_norm": 247.76739501953125, "learning_rate": 4.6404237746692514e-06, "loss": 19.0842, "step": 285310 }, { "epoch": 0.5763644517346284, "grad_norm": 253.413330078125, "learning_rate": 4.640075613515324e-06, "loss": 11.5528, "step": 285320 }, { "epoch": 0.5763846523673122, "grad_norm": 410.0896911621094, "learning_rate": 4.639727454115626e-06, "loss": 14.269, "step": 285330 }, { "epoch": 0.576404852999996, "grad_norm": 624.3638305664062, "learning_rate": 4.639379296471851e-06, "loss": 27.8451, "step": 285340 }, { "epoch": 0.5764250536326798, "grad_norm": 391.7532653808594, "learning_rate": 4.639031140585697e-06, "loss": 11.3759, "step": 285350 }, { "epoch": 0.5764452542653636, "grad_norm": 241.78648376464844, "learning_rate": 4.638682986458862e-06, "loss": 21.577, "step": 285360 }, { "epoch": 0.5764654548980475, "grad_norm": 481.373046875, "learning_rate": 4.638334834093044e-06, "loss": 22.5258, "step": 285370 }, { "epoch": 0.5764856555307313, "grad_norm": 502.5126647949219, "learning_rate": 4.637986683489937e-06, "loss": 16.4334, "step": 285380 }, { "epoch": 0.576505856163415, "grad_norm": 269.3331604003906, "learning_rate": 4.637638534651238e-06, "loss": 13.8888, "step": 285390 }, { "epoch": 0.5765260567960988, "grad_norm": 398.0639953613281, "learning_rate": 4.637290387578647e-06, "loss": 32.087, "step": 285400 }, { "epoch": 0.5765462574287826, "grad_norm": 466.3214111328125, "learning_rate": 4.636942242273857e-06, "loss": 21.6463, "step": 285410 }, { "epoch": 0.5765664580614664, "grad_norm": 225.61798095703125, "learning_rate": 4.6365940987385655e-06, "loss": 24.919, "step": 285420 }, { "epoch": 0.5765866586941503, "grad_norm": 59.341495513916016, "learning_rate": 4.636245956974474e-06, "loss": 10.637, "step": 285430 }, { "epoch": 0.5766068593268341, "grad_norm": 190.1195831298828, "learning_rate": 4.635897816983272e-06, "loss": 22.6361, "step": 285440 }, { "epoch": 0.5766270599595179, "grad_norm": 152.51788330078125, "learning_rate": 4.635549678766661e-06, "loss": 11.5018, "step": 285450 }, { "epoch": 0.5766472605922017, "grad_norm": 5.934749126434326, "learning_rate": 4.635201542326337e-06, "loss": 6.4349, "step": 285460 }, { "epoch": 0.5766674612248855, "grad_norm": 267.9740905761719, "learning_rate": 4.634853407663996e-06, "loss": 22.1094, "step": 285470 }, { "epoch": 0.5766876618575694, "grad_norm": 316.3170166015625, "learning_rate": 4.634505274781336e-06, "loss": 7.8531, "step": 285480 }, { "epoch": 0.5767078624902532, "grad_norm": 70.77676391601562, "learning_rate": 4.634157143680053e-06, "loss": 8.6912, "step": 285490 }, { "epoch": 0.576728063122937, "grad_norm": 741.0228271484375, "learning_rate": 4.6338090143618435e-06, "loss": 23.5055, "step": 285500 }, { "epoch": 0.5767482637556208, "grad_norm": 109.23967742919922, "learning_rate": 4.633460886828402e-06, "loss": 9.0652, "step": 285510 }, { "epoch": 0.5767684643883046, "grad_norm": 541.5836181640625, "learning_rate": 4.63311276108143e-06, "loss": 21.2665, "step": 285520 }, { "epoch": 0.5767886650209885, "grad_norm": 46.75623321533203, "learning_rate": 4.632764637122622e-06, "loss": 17.2829, "step": 285530 }, { "epoch": 0.5768088656536723, "grad_norm": 42.46260452270508, "learning_rate": 4.632416514953675e-06, "loss": 22.3401, "step": 285540 }, { "epoch": 0.5768290662863561, "grad_norm": 405.1050109863281, "learning_rate": 4.6320683945762835e-06, "loss": 12.5339, "step": 285550 }, { "epoch": 0.5768492669190399, "grad_norm": 439.3872985839844, "learning_rate": 4.631720275992148e-06, "loss": 23.5747, "step": 285560 }, { "epoch": 0.5768694675517237, "grad_norm": 462.4355773925781, "learning_rate": 4.631372159202962e-06, "loss": 23.7758, "step": 285570 }, { "epoch": 0.5768896681844076, "grad_norm": 407.3216247558594, "learning_rate": 4.631024044210422e-06, "loss": 26.1482, "step": 285580 }, { "epoch": 0.5769098688170914, "grad_norm": 422.0903015136719, "learning_rate": 4.6306759310162304e-06, "loss": 12.7475, "step": 285590 }, { "epoch": 0.5769300694497752, "grad_norm": 482.1695251464844, "learning_rate": 4.630327819622076e-06, "loss": 42.4765, "step": 285600 }, { "epoch": 0.576950270082459, "grad_norm": 391.8899230957031, "learning_rate": 4.62997971002966e-06, "loss": 24.4354, "step": 285610 }, { "epoch": 0.5769704707151428, "grad_norm": 501.4260559082031, "learning_rate": 4.629631602240678e-06, "loss": 16.0163, "step": 285620 }, { "epoch": 0.5769906713478267, "grad_norm": 88.6932144165039, "learning_rate": 4.6292834962568265e-06, "loss": 24.9909, "step": 285630 }, { "epoch": 0.5770108719805104, "grad_norm": 126.9096450805664, "learning_rate": 4.628935392079802e-06, "loss": 6.6741, "step": 285640 }, { "epoch": 0.5770310726131942, "grad_norm": 209.67538452148438, "learning_rate": 4.628587289711303e-06, "loss": 19.6977, "step": 285650 }, { "epoch": 0.577051273245878, "grad_norm": 161.6461639404297, "learning_rate": 4.628239189153023e-06, "loss": 13.7548, "step": 285660 }, { "epoch": 0.5770714738785618, "grad_norm": 2.181365489959717, "learning_rate": 4.627891090406659e-06, "loss": 4.9662, "step": 285670 }, { "epoch": 0.5770916745112457, "grad_norm": 41.47970962524414, "learning_rate": 4.627542993473909e-06, "loss": 22.6649, "step": 285680 }, { "epoch": 0.5771118751439295, "grad_norm": 145.6032257080078, "learning_rate": 4.6271948983564715e-06, "loss": 33.1367, "step": 285690 }, { "epoch": 0.5771320757766133, "grad_norm": 529.3170166015625, "learning_rate": 4.6268468050560394e-06, "loss": 16.6228, "step": 285700 }, { "epoch": 0.5771522764092971, "grad_norm": 165.0868682861328, "learning_rate": 4.626498713574311e-06, "loss": 18.2665, "step": 285710 }, { "epoch": 0.5771724770419809, "grad_norm": 377.662353515625, "learning_rate": 4.626150623912983e-06, "loss": 21.4174, "step": 285720 }, { "epoch": 0.5771926776746648, "grad_norm": 357.315185546875, "learning_rate": 4.625802536073751e-06, "loss": 16.8215, "step": 285730 }, { "epoch": 0.5772128783073486, "grad_norm": 174.01426696777344, "learning_rate": 4.625454450058311e-06, "loss": 25.0397, "step": 285740 }, { "epoch": 0.5772330789400324, "grad_norm": 62.286739349365234, "learning_rate": 4.625106365868363e-06, "loss": 21.871, "step": 285750 }, { "epoch": 0.5772532795727162, "grad_norm": 117.59977722167969, "learning_rate": 4.624758283505599e-06, "loss": 20.7667, "step": 285760 }, { "epoch": 0.5772734802054, "grad_norm": 366.11181640625, "learning_rate": 4.624410202971718e-06, "loss": 16.7235, "step": 285770 }, { "epoch": 0.5772936808380839, "grad_norm": 142.76168823242188, "learning_rate": 4.624062124268418e-06, "loss": 28.5184, "step": 285780 }, { "epoch": 0.5773138814707677, "grad_norm": 247.28787231445312, "learning_rate": 4.6237140473973916e-06, "loss": 21.4347, "step": 285790 }, { "epoch": 0.5773340821034515, "grad_norm": 444.7958068847656, "learning_rate": 4.6233659723603374e-06, "loss": 27.8045, "step": 285800 }, { "epoch": 0.5773542827361353, "grad_norm": 90.63640594482422, "learning_rate": 4.623017899158953e-06, "loss": 16.8898, "step": 285810 }, { "epoch": 0.5773744833688191, "grad_norm": 600.8631591796875, "learning_rate": 4.6226698277949325e-06, "loss": 29.1179, "step": 285820 }, { "epoch": 0.577394684001503, "grad_norm": 470.37493896484375, "learning_rate": 4.622321758269972e-06, "loss": 16.1433, "step": 285830 }, { "epoch": 0.5774148846341868, "grad_norm": 286.3653869628906, "learning_rate": 4.621973690585772e-06, "loss": 19.3398, "step": 285840 }, { "epoch": 0.5774350852668706, "grad_norm": 390.6236267089844, "learning_rate": 4.621625624744026e-06, "loss": 21.1697, "step": 285850 }, { "epoch": 0.5774552858995544, "grad_norm": 352.945068359375, "learning_rate": 4.62127756074643e-06, "loss": 19.3187, "step": 285860 }, { "epoch": 0.5774754865322382, "grad_norm": 450.7550048828125, "learning_rate": 4.620929498594682e-06, "loss": 18.7471, "step": 285870 }, { "epoch": 0.5774956871649221, "grad_norm": 265.99652099609375, "learning_rate": 4.620581438290478e-06, "loss": 17.8625, "step": 285880 }, { "epoch": 0.5775158877976059, "grad_norm": 5.381470680236816, "learning_rate": 4.620233379835513e-06, "loss": 15.511, "step": 285890 }, { "epoch": 0.5775360884302896, "grad_norm": 493.2744445800781, "learning_rate": 4.619885323231484e-06, "loss": 23.9346, "step": 285900 }, { "epoch": 0.5775562890629734, "grad_norm": 299.9281921386719, "learning_rate": 4.6195372684800895e-06, "loss": 13.0542, "step": 285910 }, { "epoch": 0.5775764896956572, "grad_norm": 523.2008056640625, "learning_rate": 4.619189215583023e-06, "loss": 32.6715, "step": 285920 }, { "epoch": 0.577596690328341, "grad_norm": 212.06443786621094, "learning_rate": 4.618841164541982e-06, "loss": 17.1562, "step": 285930 }, { "epoch": 0.5776168909610249, "grad_norm": 379.4666748046875, "learning_rate": 4.618493115358665e-06, "loss": 15.5386, "step": 285940 }, { "epoch": 0.5776370915937087, "grad_norm": 224.1942901611328, "learning_rate": 4.618145068034764e-06, "loss": 11.095, "step": 285950 }, { "epoch": 0.5776572922263925, "grad_norm": 16.193811416625977, "learning_rate": 4.617797022571977e-06, "loss": 13.652, "step": 285960 }, { "epoch": 0.5776774928590763, "grad_norm": 157.01187133789062, "learning_rate": 4.617448978972002e-06, "loss": 13.1918, "step": 285970 }, { "epoch": 0.5776976934917601, "grad_norm": 0.0, "learning_rate": 4.617100937236535e-06, "loss": 11.0576, "step": 285980 }, { "epoch": 0.577717894124444, "grad_norm": 47.21420669555664, "learning_rate": 4.616752897367271e-06, "loss": 19.2694, "step": 285990 }, { "epoch": 0.5777380947571278, "grad_norm": 181.82305908203125, "learning_rate": 4.6164048593659076e-06, "loss": 16.2885, "step": 286000 }, { "epoch": 0.5777582953898116, "grad_norm": 616.8236083984375, "learning_rate": 4.6160568232341406e-06, "loss": 15.4624, "step": 286010 }, { "epoch": 0.5777784960224954, "grad_norm": 96.83794403076172, "learning_rate": 4.615708788973664e-06, "loss": 11.7924, "step": 286020 }, { "epoch": 0.5777986966551792, "grad_norm": 108.90803527832031, "learning_rate": 4.615360756586177e-06, "loss": 34.913, "step": 286030 }, { "epoch": 0.5778188972878631, "grad_norm": 126.91084289550781, "learning_rate": 4.615012726073376e-06, "loss": 15.4759, "step": 286040 }, { "epoch": 0.5778390979205469, "grad_norm": 228.71510314941406, "learning_rate": 4.614664697436956e-06, "loss": 15.0486, "step": 286050 }, { "epoch": 0.5778592985532307, "grad_norm": 82.67373657226562, "learning_rate": 4.614316670678612e-06, "loss": 20.7496, "step": 286060 }, { "epoch": 0.5778794991859145, "grad_norm": 255.6056671142578, "learning_rate": 4.6139686458000445e-06, "loss": 15.6388, "step": 286070 }, { "epoch": 0.5778996998185983, "grad_norm": 126.6413803100586, "learning_rate": 4.613620622802945e-06, "loss": 13.5917, "step": 286080 }, { "epoch": 0.5779199004512822, "grad_norm": 217.02655029296875, "learning_rate": 4.6132726016890126e-06, "loss": 6.3674, "step": 286090 }, { "epoch": 0.577940101083966, "grad_norm": 91.89717102050781, "learning_rate": 4.612924582459943e-06, "loss": 26.9668, "step": 286100 }, { "epoch": 0.5779603017166498, "grad_norm": 507.17633056640625, "learning_rate": 4.612576565117431e-06, "loss": 18.2302, "step": 286110 }, { "epoch": 0.5779805023493336, "grad_norm": 190.70648193359375, "learning_rate": 4.612228549663173e-06, "loss": 22.5558, "step": 286120 }, { "epoch": 0.5780007029820174, "grad_norm": 445.7430114746094, "learning_rate": 4.611880536098867e-06, "loss": 20.5137, "step": 286130 }, { "epoch": 0.5780209036147013, "grad_norm": 107.94547271728516, "learning_rate": 4.61153252442621e-06, "loss": 10.6374, "step": 286140 }, { "epoch": 0.578041104247385, "grad_norm": 129.45318603515625, "learning_rate": 4.611184514646894e-06, "loss": 18.0441, "step": 286150 }, { "epoch": 0.5780613048800688, "grad_norm": 301.3741149902344, "learning_rate": 4.610836506762618e-06, "loss": 14.3528, "step": 286160 }, { "epoch": 0.5780815055127526, "grad_norm": 500.5530090332031, "learning_rate": 4.610488500775078e-06, "loss": 18.0014, "step": 286170 }, { "epoch": 0.5781017061454364, "grad_norm": 124.1290054321289, "learning_rate": 4.61014049668597e-06, "loss": 16.3391, "step": 286180 }, { "epoch": 0.5781219067781203, "grad_norm": 350.3399353027344, "learning_rate": 4.6097924944969885e-06, "loss": 30.6809, "step": 286190 }, { "epoch": 0.5781421074108041, "grad_norm": 169.69781494140625, "learning_rate": 4.609444494209834e-06, "loss": 20.7811, "step": 286200 }, { "epoch": 0.5781623080434879, "grad_norm": 361.3254699707031, "learning_rate": 4.609096495826196e-06, "loss": 20.2071, "step": 286210 }, { "epoch": 0.5781825086761717, "grad_norm": 111.34703063964844, "learning_rate": 4.608748499347777e-06, "loss": 12.4907, "step": 286220 }, { "epoch": 0.5782027093088555, "grad_norm": 175.975830078125, "learning_rate": 4.60840050477627e-06, "loss": 8.7415, "step": 286230 }, { "epoch": 0.5782229099415394, "grad_norm": 156.7076416015625, "learning_rate": 4.608052512113371e-06, "loss": 10.9943, "step": 286240 }, { "epoch": 0.5782431105742232, "grad_norm": 318.4982604980469, "learning_rate": 4.6077045213607765e-06, "loss": 11.4849, "step": 286250 }, { "epoch": 0.578263311206907, "grad_norm": 384.2841491699219, "learning_rate": 4.607356532520183e-06, "loss": 21.6204, "step": 286260 }, { "epoch": 0.5782835118395908, "grad_norm": 76.5994644165039, "learning_rate": 4.607008545593286e-06, "loss": 18.94, "step": 286270 }, { "epoch": 0.5783037124722746, "grad_norm": 200.82200622558594, "learning_rate": 4.606660560581779e-06, "loss": 16.1823, "step": 286280 }, { "epoch": 0.5783239131049585, "grad_norm": 88.69701385498047, "learning_rate": 4.606312577487364e-06, "loss": 25.1912, "step": 286290 }, { "epoch": 0.5783441137376423, "grad_norm": 769.1150512695312, "learning_rate": 4.605964596311733e-06, "loss": 21.3451, "step": 286300 }, { "epoch": 0.5783643143703261, "grad_norm": 200.97628784179688, "learning_rate": 4.605616617056583e-06, "loss": 12.8346, "step": 286310 }, { "epoch": 0.5783845150030099, "grad_norm": 247.99951171875, "learning_rate": 4.6052686397236084e-06, "loss": 12.2658, "step": 286320 }, { "epoch": 0.5784047156356937, "grad_norm": 283.7777404785156, "learning_rate": 4.6049206643145086e-06, "loss": 11.234, "step": 286330 }, { "epoch": 0.5784249162683776, "grad_norm": 635.4935302734375, "learning_rate": 4.604572690830976e-06, "loss": 16.6353, "step": 286340 }, { "epoch": 0.5784451169010614, "grad_norm": 373.9333190917969, "learning_rate": 4.604224719274708e-06, "loss": 24.0457, "step": 286350 }, { "epoch": 0.5784653175337452, "grad_norm": 161.45492553710938, "learning_rate": 4.603876749647404e-06, "loss": 67.5, "step": 286360 }, { "epoch": 0.578485518166429, "grad_norm": 5.060788631439209, "learning_rate": 4.6035287819507515e-06, "loss": 25.5124, "step": 286370 }, { "epoch": 0.5785057187991128, "grad_norm": 162.4276123046875, "learning_rate": 4.603180816186454e-06, "loss": 13.7449, "step": 286380 }, { "epoch": 0.5785259194317967, "grad_norm": 196.43824768066406, "learning_rate": 4.6028328523562065e-06, "loss": 17.3297, "step": 286390 }, { "epoch": 0.5785461200644805, "grad_norm": 198.7954864501953, "learning_rate": 4.602484890461702e-06, "loss": 16.1893, "step": 286400 }, { "epoch": 0.5785663206971642, "grad_norm": 314.6553039550781, "learning_rate": 4.602136930504638e-06, "loss": 23.667, "step": 286410 }, { "epoch": 0.578586521329848, "grad_norm": 360.92095947265625, "learning_rate": 4.601788972486709e-06, "loss": 20.7579, "step": 286420 }, { "epoch": 0.5786067219625318, "grad_norm": 449.2647399902344, "learning_rate": 4.601441016409616e-06, "loss": 10.3241, "step": 286430 }, { "epoch": 0.5786269225952156, "grad_norm": 288.7601623535156, "learning_rate": 4.601093062275048e-06, "loss": 11.3328, "step": 286440 }, { "epoch": 0.5786471232278995, "grad_norm": 309.07940673828125, "learning_rate": 4.600745110084704e-06, "loss": 20.7633, "step": 286450 }, { "epoch": 0.5786673238605833, "grad_norm": 209.19886779785156, "learning_rate": 4.6003971598402825e-06, "loss": 21.2215, "step": 286460 }, { "epoch": 0.5786875244932671, "grad_norm": 287.8970947265625, "learning_rate": 4.600049211543475e-06, "loss": 29.4716, "step": 286470 }, { "epoch": 0.5787077251259509, "grad_norm": 0.0, "learning_rate": 4.599701265195979e-06, "loss": 14.822, "step": 286480 }, { "epoch": 0.5787279257586347, "grad_norm": 3.092839479446411, "learning_rate": 4.599353320799492e-06, "loss": 20.0954, "step": 286490 }, { "epoch": 0.5787481263913186, "grad_norm": 33.70549011230469, "learning_rate": 4.5990053783557066e-06, "loss": 27.2155, "step": 286500 }, { "epoch": 0.5787683270240024, "grad_norm": 180.0660858154297, "learning_rate": 4.598657437866319e-06, "loss": 16.1631, "step": 286510 }, { "epoch": 0.5787885276566862, "grad_norm": 198.73983764648438, "learning_rate": 4.59830949933303e-06, "loss": 13.6565, "step": 286520 }, { "epoch": 0.57880872828937, "grad_norm": 78.77928924560547, "learning_rate": 4.5979615627575295e-06, "loss": 19.2835, "step": 286530 }, { "epoch": 0.5788289289220538, "grad_norm": 226.5452117919922, "learning_rate": 4.597613628141516e-06, "loss": 22.828, "step": 286540 }, { "epoch": 0.5788491295547377, "grad_norm": 318.3174133300781, "learning_rate": 4.597265695486685e-06, "loss": 25.131, "step": 286550 }, { "epoch": 0.5788693301874215, "grad_norm": 200.49415588378906, "learning_rate": 4.5969177647947325e-06, "loss": 16.8728, "step": 286560 }, { "epoch": 0.5788895308201053, "grad_norm": 267.5962829589844, "learning_rate": 4.596569836067353e-06, "loss": 15.8069, "step": 286570 }, { "epoch": 0.5789097314527891, "grad_norm": 292.01531982421875, "learning_rate": 4.596221909306243e-06, "loss": 16.8563, "step": 286580 }, { "epoch": 0.5789299320854729, "grad_norm": 297.97698974609375, "learning_rate": 4.5958739845131e-06, "loss": 18.7495, "step": 286590 }, { "epoch": 0.5789501327181568, "grad_norm": 242.80691528320312, "learning_rate": 4.595526061689617e-06, "loss": 20.1638, "step": 286600 }, { "epoch": 0.5789703333508406, "grad_norm": 212.60997009277344, "learning_rate": 4.5951781408374915e-06, "loss": 31.1352, "step": 286610 }, { "epoch": 0.5789905339835244, "grad_norm": 272.3203430175781, "learning_rate": 4.594830221958419e-06, "loss": 27.4255, "step": 286620 }, { "epoch": 0.5790107346162082, "grad_norm": 27.56256675720215, "learning_rate": 4.594482305054094e-06, "loss": 15.2662, "step": 286630 }, { "epoch": 0.579030935248892, "grad_norm": 245.6792449951172, "learning_rate": 4.594134390126213e-06, "loss": 21.0126, "step": 286640 }, { "epoch": 0.5790511358815759, "grad_norm": 301.4215393066406, "learning_rate": 4.593786477176473e-06, "loss": 14.3798, "step": 286650 }, { "epoch": 0.5790713365142596, "grad_norm": 211.72592163085938, "learning_rate": 4.593438566206567e-06, "loss": 19.3983, "step": 286660 }, { "epoch": 0.5790915371469434, "grad_norm": 305.2156677246094, "learning_rate": 4.593090657218192e-06, "loss": 20.7774, "step": 286670 }, { "epoch": 0.5791117377796272, "grad_norm": 152.0069580078125, "learning_rate": 4.592742750213045e-06, "loss": 29.5656, "step": 286680 }, { "epoch": 0.579131938412311, "grad_norm": 259.4425964355469, "learning_rate": 4.59239484519282e-06, "loss": 16.3694, "step": 286690 }, { "epoch": 0.5791521390449949, "grad_norm": 15.691226959228516, "learning_rate": 4.592046942159213e-06, "loss": 15.2952, "step": 286700 }, { "epoch": 0.5791723396776787, "grad_norm": 274.0994567871094, "learning_rate": 4.591699041113921e-06, "loss": 20.2142, "step": 286710 }, { "epoch": 0.5791925403103625, "grad_norm": 460.452880859375, "learning_rate": 4.5913511420586365e-06, "loss": 31.687, "step": 286720 }, { "epoch": 0.5792127409430463, "grad_norm": 251.98597717285156, "learning_rate": 4.591003244995056e-06, "loss": 22.3667, "step": 286730 }, { "epoch": 0.5792329415757301, "grad_norm": 219.96603393554688, "learning_rate": 4.5906553499248775e-06, "loss": 25.5671, "step": 286740 }, { "epoch": 0.579253142208414, "grad_norm": 143.68649291992188, "learning_rate": 4.590307456849797e-06, "loss": 12.7311, "step": 286750 }, { "epoch": 0.5792733428410978, "grad_norm": 137.46612548828125, "learning_rate": 4.589959565771505e-06, "loss": 16.9175, "step": 286760 }, { "epoch": 0.5792935434737816, "grad_norm": 344.4246520996094, "learning_rate": 4.589611676691702e-06, "loss": 8.2667, "step": 286770 }, { "epoch": 0.5793137441064654, "grad_norm": 258.138671875, "learning_rate": 4.589263789612083e-06, "loss": 32.2951, "step": 286780 }, { "epoch": 0.5793339447391492, "grad_norm": 191.00392150878906, "learning_rate": 4.588915904534341e-06, "loss": 8.3372, "step": 286790 }, { "epoch": 0.579354145371833, "grad_norm": 342.6805114746094, "learning_rate": 4.588568021460172e-06, "loss": 17.7741, "step": 286800 }, { "epoch": 0.5793743460045169, "grad_norm": 271.469482421875, "learning_rate": 4.588220140391276e-06, "loss": 32.6254, "step": 286810 }, { "epoch": 0.5793945466372007, "grad_norm": 413.4187927246094, "learning_rate": 4.5878722613293415e-06, "loss": 20.1382, "step": 286820 }, { "epoch": 0.5794147472698845, "grad_norm": 327.7380676269531, "learning_rate": 4.5875243842760684e-06, "loss": 12.983, "step": 286830 }, { "epoch": 0.5794349479025683, "grad_norm": 208.91575622558594, "learning_rate": 4.5871765092331536e-06, "loss": 26.9232, "step": 286840 }, { "epoch": 0.5794551485352522, "grad_norm": 295.54974365234375, "learning_rate": 4.586828636202288e-06, "loss": 17.8328, "step": 286850 }, { "epoch": 0.579475349167936, "grad_norm": 174.9067840576172, "learning_rate": 4.58648076518517e-06, "loss": 16.7348, "step": 286860 }, { "epoch": 0.5794955498006198, "grad_norm": 269.4542541503906, "learning_rate": 4.586132896183494e-06, "loss": 12.7341, "step": 286870 }, { "epoch": 0.5795157504333036, "grad_norm": 172.29776000976562, "learning_rate": 4.5857850291989596e-06, "loss": 21.896, "step": 286880 }, { "epoch": 0.5795359510659874, "grad_norm": 455.052734375, "learning_rate": 4.585437164233255e-06, "loss": 31.3553, "step": 286890 }, { "epoch": 0.5795561516986713, "grad_norm": 100.49359130859375, "learning_rate": 4.5850893012880806e-06, "loss": 12.5031, "step": 286900 }, { "epoch": 0.5795763523313551, "grad_norm": 294.8809814453125, "learning_rate": 4.584741440365131e-06, "loss": 21.917, "step": 286910 }, { "epoch": 0.5795965529640388, "grad_norm": 27.525474548339844, "learning_rate": 4.5843935814661e-06, "loss": 23.7134, "step": 286920 }, { "epoch": 0.5796167535967226, "grad_norm": 207.41574096679688, "learning_rate": 4.584045724592686e-06, "loss": 9.5036, "step": 286930 }, { "epoch": 0.5796369542294064, "grad_norm": 717.3247680664062, "learning_rate": 4.583697869746582e-06, "loss": 28.7792, "step": 286940 }, { "epoch": 0.5796571548620902, "grad_norm": 65.22682189941406, "learning_rate": 4.583350016929484e-06, "loss": 16.5211, "step": 286950 }, { "epoch": 0.5796773554947741, "grad_norm": 291.2391357421875, "learning_rate": 4.583002166143086e-06, "loss": 13.4069, "step": 286960 }, { "epoch": 0.5796975561274579, "grad_norm": 32.96533966064453, "learning_rate": 4.582654317389088e-06, "loss": 9.2696, "step": 286970 }, { "epoch": 0.5797177567601417, "grad_norm": 528.3932495117188, "learning_rate": 4.58230647066918e-06, "loss": 19.5158, "step": 286980 }, { "epoch": 0.5797379573928255, "grad_norm": 287.6618957519531, "learning_rate": 4.581958625985059e-06, "loss": 16.5194, "step": 286990 }, { "epoch": 0.5797581580255093, "grad_norm": 174.62303161621094, "learning_rate": 4.581610783338424e-06, "loss": 22.181, "step": 287000 }, { "epoch": 0.5797783586581932, "grad_norm": 451.3210144042969, "learning_rate": 4.581262942730965e-06, "loss": 14.1049, "step": 287010 }, { "epoch": 0.579798559290877, "grad_norm": 441.7242431640625, "learning_rate": 4.58091510416438e-06, "loss": 14.1918, "step": 287020 }, { "epoch": 0.5798187599235608, "grad_norm": 105.76792907714844, "learning_rate": 4.580567267640363e-06, "loss": 19.4641, "step": 287030 }, { "epoch": 0.5798389605562446, "grad_norm": 156.46128845214844, "learning_rate": 4.580219433160613e-06, "loss": 46.9274, "step": 287040 }, { "epoch": 0.5798591611889284, "grad_norm": 101.82992553710938, "learning_rate": 4.579871600726819e-06, "loss": 25.0936, "step": 287050 }, { "epoch": 0.5798793618216123, "grad_norm": 702.6982421875, "learning_rate": 4.579523770340681e-06, "loss": 18.8524, "step": 287060 }, { "epoch": 0.5798995624542961, "grad_norm": 350.4468994140625, "learning_rate": 4.579175942003895e-06, "loss": 20.2524, "step": 287070 }, { "epoch": 0.5799197630869799, "grad_norm": 85.17183685302734, "learning_rate": 4.578828115718153e-06, "loss": 11.3529, "step": 287080 }, { "epoch": 0.5799399637196637, "grad_norm": 484.6253967285156, "learning_rate": 4.578480291485153e-06, "loss": 22.7803, "step": 287090 }, { "epoch": 0.5799601643523475, "grad_norm": 102.88172149658203, "learning_rate": 4.578132469306588e-06, "loss": 17.9418, "step": 287100 }, { "epoch": 0.5799803649850314, "grad_norm": 16.67161750793457, "learning_rate": 4.5777846491841536e-06, "loss": 16.3473, "step": 287110 }, { "epoch": 0.5800005656177152, "grad_norm": 170.2120819091797, "learning_rate": 4.577436831119545e-06, "loss": 12.5137, "step": 287120 }, { "epoch": 0.580020766250399, "grad_norm": 5.024174213409424, "learning_rate": 4.577089015114461e-06, "loss": 15.326, "step": 287130 }, { "epoch": 0.5800409668830828, "grad_norm": 543.097900390625, "learning_rate": 4.57674120117059e-06, "loss": 17.4466, "step": 287140 }, { "epoch": 0.5800611675157666, "grad_norm": 252.35519409179688, "learning_rate": 4.576393389289633e-06, "loss": 13.2793, "step": 287150 }, { "epoch": 0.5800813681484505, "grad_norm": 233.26773071289062, "learning_rate": 4.576045579473284e-06, "loss": 21.4144, "step": 287160 }, { "epoch": 0.5801015687811343, "grad_norm": 121.49406433105469, "learning_rate": 4.575697771723236e-06, "loss": 8.7378, "step": 287170 }, { "epoch": 0.580121769413818, "grad_norm": 193.93386840820312, "learning_rate": 4.575349966041187e-06, "loss": 10.1595, "step": 287180 }, { "epoch": 0.5801419700465018, "grad_norm": 218.6847381591797, "learning_rate": 4.5750021624288285e-06, "loss": 6.1024, "step": 287190 }, { "epoch": 0.5801621706791856, "grad_norm": 264.17218017578125, "learning_rate": 4.57465436088786e-06, "loss": 23.9838, "step": 287200 }, { "epoch": 0.5801823713118694, "grad_norm": 153.8498077392578, "learning_rate": 4.574306561419974e-06, "loss": 19.2207, "step": 287210 }, { "epoch": 0.5802025719445533, "grad_norm": 225.393798828125, "learning_rate": 4.573958764026866e-06, "loss": 11.2855, "step": 287220 }, { "epoch": 0.5802227725772371, "grad_norm": 1475.801513671875, "learning_rate": 4.573610968710233e-06, "loss": 22.2295, "step": 287230 }, { "epoch": 0.5802429732099209, "grad_norm": 381.99139404296875, "learning_rate": 4.573263175471766e-06, "loss": 16.3363, "step": 287240 }, { "epoch": 0.5802631738426047, "grad_norm": 83.75250244140625, "learning_rate": 4.572915384313163e-06, "loss": 10.4685, "step": 287250 }, { "epoch": 0.5802833744752885, "grad_norm": 161.74557495117188, "learning_rate": 4.57256759523612e-06, "loss": 21.1192, "step": 287260 }, { "epoch": 0.5803035751079724, "grad_norm": 192.98358154296875, "learning_rate": 4.572219808242328e-06, "loss": 20.1399, "step": 287270 }, { "epoch": 0.5803237757406562, "grad_norm": 295.7524108886719, "learning_rate": 4.571872023333487e-06, "loss": 20.4163, "step": 287280 }, { "epoch": 0.58034397637334, "grad_norm": 498.69732666015625, "learning_rate": 4.57152424051129e-06, "loss": 23.3674, "step": 287290 }, { "epoch": 0.5803641770060238, "grad_norm": 37.353179931640625, "learning_rate": 4.571176459777431e-06, "loss": 32.8831, "step": 287300 }, { "epoch": 0.5803843776387076, "grad_norm": 188.8733367919922, "learning_rate": 4.570828681133606e-06, "loss": 14.8546, "step": 287310 }, { "epoch": 0.5804045782713915, "grad_norm": 185.47702026367188, "learning_rate": 4.570480904581511e-06, "loss": 27.8024, "step": 287320 }, { "epoch": 0.5804247789040753, "grad_norm": 383.5218505859375, "learning_rate": 4.5701331301228395e-06, "loss": 23.0716, "step": 287330 }, { "epoch": 0.5804449795367591, "grad_norm": 290.7412109375, "learning_rate": 4.5697853577592846e-06, "loss": 20.8834, "step": 287340 }, { "epoch": 0.5804651801694429, "grad_norm": 384.252685546875, "learning_rate": 4.569437587492545e-06, "loss": 12.3742, "step": 287350 }, { "epoch": 0.5804853808021267, "grad_norm": 363.5523681640625, "learning_rate": 4.569089819324317e-06, "loss": 11.4298, "step": 287360 }, { "epoch": 0.5805055814348106, "grad_norm": 560.8991088867188, "learning_rate": 4.56874205325629e-06, "loss": 16.5769, "step": 287370 }, { "epoch": 0.5805257820674944, "grad_norm": 176.21424865722656, "learning_rate": 4.568394289290163e-06, "loss": 20.1068, "step": 287380 }, { "epoch": 0.5805459827001782, "grad_norm": 80.88639068603516, "learning_rate": 4.5680465274276306e-06, "loss": 13.516, "step": 287390 }, { "epoch": 0.580566183332862, "grad_norm": 320.5604248046875, "learning_rate": 4.5676987676703865e-06, "loss": 16.702, "step": 287400 }, { "epoch": 0.5805863839655458, "grad_norm": 187.35928344726562, "learning_rate": 4.567351010020124e-06, "loss": 15.3613, "step": 287410 }, { "epoch": 0.5806065845982297, "grad_norm": 331.6491394042969, "learning_rate": 4.567003254478545e-06, "loss": 18.9145, "step": 287420 }, { "epoch": 0.5806267852309134, "grad_norm": 348.3050537109375, "learning_rate": 4.566655501047335e-06, "loss": 20.9906, "step": 287430 }, { "epoch": 0.5806469858635972, "grad_norm": 336.8033447265625, "learning_rate": 4.566307749728195e-06, "loss": 21.5479, "step": 287440 }, { "epoch": 0.580667186496281, "grad_norm": 450.8565368652344, "learning_rate": 4.56596000052282e-06, "loss": 20.7837, "step": 287450 }, { "epoch": 0.5806873871289648, "grad_norm": 184.52748107910156, "learning_rate": 4.565612253432902e-06, "loss": 14.4266, "step": 287460 }, { "epoch": 0.5807075877616487, "grad_norm": 159.72250366210938, "learning_rate": 4.565264508460137e-06, "loss": 10.6029, "step": 287470 }, { "epoch": 0.5807277883943325, "grad_norm": 286.60107421875, "learning_rate": 4.564916765606218e-06, "loss": 14.5463, "step": 287480 }, { "epoch": 0.5807479890270163, "grad_norm": 350.1337890625, "learning_rate": 4.564569024872846e-06, "loss": 17.6885, "step": 287490 }, { "epoch": 0.5807681896597001, "grad_norm": 125.16842651367188, "learning_rate": 4.564221286261709e-06, "loss": 6.576, "step": 287500 }, { "epoch": 0.5807883902923839, "grad_norm": 359.0954895019531, "learning_rate": 4.563873549774506e-06, "loss": 20.9535, "step": 287510 }, { "epoch": 0.5808085909250678, "grad_norm": 125.70683288574219, "learning_rate": 4.56352581541293e-06, "loss": 27.8897, "step": 287520 }, { "epoch": 0.5808287915577516, "grad_norm": 292.8467102050781, "learning_rate": 4.5631780831786765e-06, "loss": 23.0739, "step": 287530 }, { "epoch": 0.5808489921904354, "grad_norm": 531.7199096679688, "learning_rate": 4.5628303530734395e-06, "loss": 23.238, "step": 287540 }, { "epoch": 0.5808691928231192, "grad_norm": 139.6114959716797, "learning_rate": 4.5624826250989156e-06, "loss": 14.6786, "step": 287550 }, { "epoch": 0.580889393455803, "grad_norm": 26.32978057861328, "learning_rate": 4.562134899256797e-06, "loss": 23.2697, "step": 287560 }, { "epoch": 0.5809095940884869, "grad_norm": 379.4330139160156, "learning_rate": 4.561787175548779e-06, "loss": 16.2039, "step": 287570 }, { "epoch": 0.5809297947211707, "grad_norm": 252.7041015625, "learning_rate": 4.56143945397656e-06, "loss": 14.9163, "step": 287580 }, { "epoch": 0.5809499953538545, "grad_norm": 223.694091796875, "learning_rate": 4.5610917345418285e-06, "loss": 13.7453, "step": 287590 }, { "epoch": 0.5809701959865383, "grad_norm": 243.51058959960938, "learning_rate": 4.560744017246284e-06, "loss": 17.3256, "step": 287600 }, { "epoch": 0.5809903966192221, "grad_norm": 79.69801330566406, "learning_rate": 4.560396302091622e-06, "loss": 20.0409, "step": 287610 }, { "epoch": 0.581010597251906, "grad_norm": 157.72047424316406, "learning_rate": 4.5600485890795325e-06, "loss": 11.8577, "step": 287620 }, { "epoch": 0.5810307978845898, "grad_norm": 231.01861572265625, "learning_rate": 4.559700878211714e-06, "loss": 14.6216, "step": 287630 }, { "epoch": 0.5810509985172736, "grad_norm": 158.359375, "learning_rate": 4.5593531694898576e-06, "loss": 17.7361, "step": 287640 }, { "epoch": 0.5810711991499574, "grad_norm": 286.88018798828125, "learning_rate": 4.559005462915665e-06, "loss": 21.9961, "step": 287650 }, { "epoch": 0.5810913997826412, "grad_norm": 365.7005920410156, "learning_rate": 4.558657758490822e-06, "loss": 13.7816, "step": 287660 }, { "epoch": 0.5811116004153251, "grad_norm": 300.8783264160156, "learning_rate": 4.5583100562170294e-06, "loss": 13.2067, "step": 287670 }, { "epoch": 0.5811318010480089, "grad_norm": 380.34954833984375, "learning_rate": 4.55796235609598e-06, "loss": 13.0477, "step": 287680 }, { "epoch": 0.5811520016806926, "grad_norm": 225.0839385986328, "learning_rate": 4.557614658129369e-06, "loss": 8.7763, "step": 287690 }, { "epoch": 0.5811722023133764, "grad_norm": 268.810546875, "learning_rate": 4.557266962318889e-06, "loss": 21.9286, "step": 287700 }, { "epoch": 0.5811924029460602, "grad_norm": 50.3661003112793, "learning_rate": 4.556919268666238e-06, "loss": 20.3123, "step": 287710 }, { "epoch": 0.581212603578744, "grad_norm": 151.4187774658203, "learning_rate": 4.5565715771731075e-06, "loss": 22.9909, "step": 287720 }, { "epoch": 0.5812328042114279, "grad_norm": 209.9427490234375, "learning_rate": 4.556223887841192e-06, "loss": 20.9039, "step": 287730 }, { "epoch": 0.5812530048441117, "grad_norm": 189.8602294921875, "learning_rate": 4.555876200672192e-06, "loss": 18.6435, "step": 287740 }, { "epoch": 0.5812732054767955, "grad_norm": 1.3879389762878418, "learning_rate": 4.555528515667793e-06, "loss": 8.8686, "step": 287750 }, { "epoch": 0.5812934061094793, "grad_norm": 191.26828002929688, "learning_rate": 4.555180832829695e-06, "loss": 21.6437, "step": 287760 }, { "epoch": 0.5813136067421631, "grad_norm": 368.77996826171875, "learning_rate": 4.554833152159594e-06, "loss": 14.7504, "step": 287770 }, { "epoch": 0.581333807374847, "grad_norm": 33.17613983154297, "learning_rate": 4.55448547365918e-06, "loss": 9.8667, "step": 287780 }, { "epoch": 0.5813540080075308, "grad_norm": 216.5989990234375, "learning_rate": 4.5541377973301505e-06, "loss": 18.5552, "step": 287790 }, { "epoch": 0.5813742086402146, "grad_norm": 174.69696044921875, "learning_rate": 4.553790123174198e-06, "loss": 24.6479, "step": 287800 }, { "epoch": 0.5813944092728984, "grad_norm": 377.2240295410156, "learning_rate": 4.553442451193021e-06, "loss": 16.7069, "step": 287810 }, { "epoch": 0.5814146099055822, "grad_norm": 152.625244140625, "learning_rate": 4.553094781388309e-06, "loss": 15.7786, "step": 287820 }, { "epoch": 0.5814348105382661, "grad_norm": 305.63970947265625, "learning_rate": 4.552747113761759e-06, "loss": 10.7386, "step": 287830 }, { "epoch": 0.5814550111709499, "grad_norm": 419.4303283691406, "learning_rate": 4.552399448315067e-06, "loss": 23.1919, "step": 287840 }, { "epoch": 0.5814752118036337, "grad_norm": 462.816162109375, "learning_rate": 4.552051785049925e-06, "loss": 22.1566, "step": 287850 }, { "epoch": 0.5814954124363175, "grad_norm": 99.59991455078125, "learning_rate": 4.551704123968027e-06, "loss": 11.351, "step": 287860 }, { "epoch": 0.5815156130690013, "grad_norm": 0.0, "learning_rate": 4.55135646507107e-06, "loss": 22.6739, "step": 287870 }, { "epoch": 0.5815358137016852, "grad_norm": 20.855464935302734, "learning_rate": 4.551008808360747e-06, "loss": 22.197, "step": 287880 }, { "epoch": 0.581556014334369, "grad_norm": 174.08108520507812, "learning_rate": 4.550661153838752e-06, "loss": 15.6132, "step": 287890 }, { "epoch": 0.5815762149670528, "grad_norm": 443.4092102050781, "learning_rate": 4.5503135015067815e-06, "loss": 19.186, "step": 287900 }, { "epoch": 0.5815964155997366, "grad_norm": 330.13623046875, "learning_rate": 4.549965851366528e-06, "loss": 17.721, "step": 287910 }, { "epoch": 0.5816166162324204, "grad_norm": 350.770263671875, "learning_rate": 4.549618203419684e-06, "loss": 13.0152, "step": 287920 }, { "epoch": 0.5816368168651043, "grad_norm": 365.4722900390625, "learning_rate": 4.549270557667949e-06, "loss": 19.1132, "step": 287930 }, { "epoch": 0.581657017497788, "grad_norm": 685.891845703125, "learning_rate": 4.548922914113014e-06, "loss": 40.3823, "step": 287940 }, { "epoch": 0.5816772181304718, "grad_norm": 496.9896545410156, "learning_rate": 4.548575272756573e-06, "loss": 13.2885, "step": 287950 }, { "epoch": 0.5816974187631556, "grad_norm": 390.8963928222656, "learning_rate": 4.548227633600322e-06, "loss": 18.2851, "step": 287960 }, { "epoch": 0.5817176193958394, "grad_norm": 309.67108154296875, "learning_rate": 4.547879996645956e-06, "loss": 20.2207, "step": 287970 }, { "epoch": 0.5817378200285233, "grad_norm": 87.74240112304688, "learning_rate": 4.5475323618951665e-06, "loss": 20.618, "step": 287980 }, { "epoch": 0.5817580206612071, "grad_norm": 250.9869842529297, "learning_rate": 4.5471847293496495e-06, "loss": 22.9297, "step": 287990 }, { "epoch": 0.5817782212938909, "grad_norm": 29.114727020263672, "learning_rate": 4.546837099011101e-06, "loss": 13.3966, "step": 288000 }, { "epoch": 0.5817984219265747, "grad_norm": 203.90579223632812, "learning_rate": 4.546489470881211e-06, "loss": 15.2326, "step": 288010 }, { "epoch": 0.5818186225592585, "grad_norm": 230.37391662597656, "learning_rate": 4.5461418449616765e-06, "loss": 19.2027, "step": 288020 }, { "epoch": 0.5818388231919424, "grad_norm": 22.330493927001953, "learning_rate": 4.5457942212541944e-06, "loss": 23.1218, "step": 288030 }, { "epoch": 0.5818590238246262, "grad_norm": 116.11054229736328, "learning_rate": 4.545446599760453e-06, "loss": 15.183, "step": 288040 }, { "epoch": 0.58187922445731, "grad_norm": 699.3388671875, "learning_rate": 4.545098980482151e-06, "loss": 19.2425, "step": 288050 }, { "epoch": 0.5818994250899938, "grad_norm": 695.6719360351562, "learning_rate": 4.544751363420981e-06, "loss": 24.3165, "step": 288060 }, { "epoch": 0.5819196257226776, "grad_norm": 550.4653930664062, "learning_rate": 4.544403748578638e-06, "loss": 24.7628, "step": 288070 }, { "epoch": 0.5819398263553615, "grad_norm": 362.96539306640625, "learning_rate": 4.544056135956816e-06, "loss": 17.7441, "step": 288080 }, { "epoch": 0.5819600269880453, "grad_norm": 0.0, "learning_rate": 4.543708525557208e-06, "loss": 36.2894, "step": 288090 }, { "epoch": 0.5819802276207291, "grad_norm": 121.93158721923828, "learning_rate": 4.543360917381512e-06, "loss": 12.434, "step": 288100 }, { "epoch": 0.5820004282534129, "grad_norm": 51.09870147705078, "learning_rate": 4.543013311431417e-06, "loss": 11.4266, "step": 288110 }, { "epoch": 0.5820206288860967, "grad_norm": 95.76197052001953, "learning_rate": 4.54266570770862e-06, "loss": 13.8854, "step": 288120 }, { "epoch": 0.5820408295187806, "grad_norm": 595.9869995117188, "learning_rate": 4.542318106214817e-06, "loss": 25.0596, "step": 288130 }, { "epoch": 0.5820610301514644, "grad_norm": 348.0388488769531, "learning_rate": 4.541970506951698e-06, "loss": 26.3782, "step": 288140 }, { "epoch": 0.5820812307841482, "grad_norm": 21.491920471191406, "learning_rate": 4.541622909920959e-06, "loss": 15.0863, "step": 288150 }, { "epoch": 0.582101431416832, "grad_norm": 1.5433051586151123, "learning_rate": 4.541275315124296e-06, "loss": 10.843, "step": 288160 }, { "epoch": 0.5821216320495158, "grad_norm": 456.2001647949219, "learning_rate": 4.5409277225634e-06, "loss": 14.4076, "step": 288170 }, { "epoch": 0.5821418326821997, "grad_norm": 135.13168334960938, "learning_rate": 4.540580132239966e-06, "loss": 22.0623, "step": 288180 }, { "epoch": 0.5821620333148835, "grad_norm": 296.9651184082031, "learning_rate": 4.540232544155692e-06, "loss": 27.3622, "step": 288190 }, { "epoch": 0.5821822339475672, "grad_norm": 518.2774658203125, "learning_rate": 4.539884958312265e-06, "loss": 18.8996, "step": 288200 }, { "epoch": 0.582202434580251, "grad_norm": 274.4410400390625, "learning_rate": 4.539537374711384e-06, "loss": 18.7855, "step": 288210 }, { "epoch": 0.5822226352129348, "grad_norm": 232.41693115234375, "learning_rate": 4.5391897933547436e-06, "loss": 22.8382, "step": 288220 }, { "epoch": 0.5822428358456186, "grad_norm": 644.7366943359375, "learning_rate": 4.538842214244035e-06, "loss": 29.3684, "step": 288230 }, { "epoch": 0.5822630364783025, "grad_norm": 364.9535827636719, "learning_rate": 4.538494637380953e-06, "loss": 20.6363, "step": 288240 }, { "epoch": 0.5822832371109863, "grad_norm": 264.23516845703125, "learning_rate": 4.538147062767191e-06, "loss": 22.3197, "step": 288250 }, { "epoch": 0.5823034377436701, "grad_norm": 39.59340286254883, "learning_rate": 4.5377994904044485e-06, "loss": 21.7181, "step": 288260 }, { "epoch": 0.5823236383763539, "grad_norm": 489.9224548339844, "learning_rate": 4.537451920294411e-06, "loss": 23.2998, "step": 288270 }, { "epoch": 0.5823438390090377, "grad_norm": 375.8077697753906, "learning_rate": 4.537104352438779e-06, "loss": 18.9208, "step": 288280 }, { "epoch": 0.5823640396417216, "grad_norm": 263.1250305175781, "learning_rate": 4.5367567868392445e-06, "loss": 15.324, "step": 288290 }, { "epoch": 0.5823842402744054, "grad_norm": 357.82403564453125, "learning_rate": 4.5364092234975e-06, "loss": 14.8971, "step": 288300 }, { "epoch": 0.5824044409070892, "grad_norm": 551.8673706054688, "learning_rate": 4.536061662415241e-06, "loss": 17.0489, "step": 288310 }, { "epoch": 0.582424641539773, "grad_norm": 198.12448120117188, "learning_rate": 4.535714103594162e-06, "loss": 13.9833, "step": 288320 }, { "epoch": 0.5824448421724568, "grad_norm": 224.50852966308594, "learning_rate": 4.535366547035955e-06, "loss": 8.0892, "step": 288330 }, { "epoch": 0.5824650428051407, "grad_norm": 1059.10205078125, "learning_rate": 4.535018992742315e-06, "loss": 31.4095, "step": 288340 }, { "epoch": 0.5824852434378245, "grad_norm": 234.50794982910156, "learning_rate": 4.534671440714939e-06, "loss": 22.3984, "step": 288350 }, { "epoch": 0.5825054440705083, "grad_norm": 691.2176513671875, "learning_rate": 4.534323890955514e-06, "loss": 26.4925, "step": 288360 }, { "epoch": 0.5825256447031921, "grad_norm": 364.050537109375, "learning_rate": 4.533976343465739e-06, "loss": 22.3874, "step": 288370 }, { "epoch": 0.582545845335876, "grad_norm": 270.946533203125, "learning_rate": 4.533628798247308e-06, "loss": 17.0315, "step": 288380 }, { "epoch": 0.5825660459685598, "grad_norm": 448.8751220703125, "learning_rate": 4.533281255301913e-06, "loss": 26.4876, "step": 288390 }, { "epoch": 0.5825862466012436, "grad_norm": 365.2612609863281, "learning_rate": 4.532933714631248e-06, "loss": 14.3784, "step": 288400 }, { "epoch": 0.5826064472339274, "grad_norm": 306.1402893066406, "learning_rate": 4.532586176237007e-06, "loss": 15.8691, "step": 288410 }, { "epoch": 0.5826266478666112, "grad_norm": 390.19873046875, "learning_rate": 4.532238640120887e-06, "loss": 23.7989, "step": 288420 }, { "epoch": 0.582646848499295, "grad_norm": 181.97756958007812, "learning_rate": 4.531891106284576e-06, "loss": 17.4352, "step": 288430 }, { "epoch": 0.5826670491319789, "grad_norm": 31.97527503967285, "learning_rate": 4.531543574729772e-06, "loss": 22.0003, "step": 288440 }, { "epoch": 0.5826872497646627, "grad_norm": 71.65612030029297, "learning_rate": 4.5311960454581685e-06, "loss": 20.7366, "step": 288450 }, { "epoch": 0.5827074503973464, "grad_norm": 384.9290466308594, "learning_rate": 4.5308485184714585e-06, "loss": 16.9185, "step": 288460 }, { "epoch": 0.5827276510300302, "grad_norm": 291.6744384765625, "learning_rate": 4.530500993771335e-06, "loss": 13.7921, "step": 288470 }, { "epoch": 0.582747851662714, "grad_norm": 40.558841705322266, "learning_rate": 4.530153471359495e-06, "loss": 15.1317, "step": 288480 }, { "epoch": 0.5827680522953979, "grad_norm": 277.6069030761719, "learning_rate": 4.529805951237628e-06, "loss": 14.4757, "step": 288490 }, { "epoch": 0.5827882529280817, "grad_norm": 166.684326171875, "learning_rate": 4.529458433407429e-06, "loss": 13.7874, "step": 288500 }, { "epoch": 0.5828084535607655, "grad_norm": 556.7401123046875, "learning_rate": 4.529110917870594e-06, "loss": 25.6647, "step": 288510 }, { "epoch": 0.5828286541934493, "grad_norm": 161.28672790527344, "learning_rate": 4.528763404628815e-06, "loss": 20.3267, "step": 288520 }, { "epoch": 0.5828488548261331, "grad_norm": 372.0959777832031, "learning_rate": 4.528415893683785e-06, "loss": 15.8105, "step": 288530 }, { "epoch": 0.582869055458817, "grad_norm": 136.06529235839844, "learning_rate": 4.5280683850372e-06, "loss": 8.583, "step": 288540 }, { "epoch": 0.5828892560915008, "grad_norm": 372.06097412109375, "learning_rate": 4.527720878690752e-06, "loss": 26.4324, "step": 288550 }, { "epoch": 0.5829094567241846, "grad_norm": 196.03456115722656, "learning_rate": 4.527373374646136e-06, "loss": 19.2067, "step": 288560 }, { "epoch": 0.5829296573568684, "grad_norm": 114.34036254882812, "learning_rate": 4.527025872905043e-06, "loss": 12.3684, "step": 288570 }, { "epoch": 0.5829498579895522, "grad_norm": 504.799560546875, "learning_rate": 4.52667837346917e-06, "loss": 21.7843, "step": 288580 }, { "epoch": 0.582970058622236, "grad_norm": 473.3775329589844, "learning_rate": 4.526330876340209e-06, "loss": 20.325, "step": 288590 }, { "epoch": 0.5829902592549199, "grad_norm": 204.45594787597656, "learning_rate": 4.525983381519853e-06, "loss": 6.938, "step": 288600 }, { "epoch": 0.5830104598876037, "grad_norm": 496.2728271484375, "learning_rate": 4.525635889009798e-06, "loss": 24.0811, "step": 288610 }, { "epoch": 0.5830306605202875, "grad_norm": 517.05859375, "learning_rate": 4.5252883988117356e-06, "loss": 22.0567, "step": 288620 }, { "epoch": 0.5830508611529713, "grad_norm": 310.81561279296875, "learning_rate": 4.524940910927359e-06, "loss": 23.8584, "step": 288630 }, { "epoch": 0.5830710617856552, "grad_norm": 227.63519287109375, "learning_rate": 4.524593425358364e-06, "loss": 10.9546, "step": 288640 }, { "epoch": 0.583091262418339, "grad_norm": 276.2196960449219, "learning_rate": 4.524245942106442e-06, "loss": 19.5536, "step": 288650 }, { "epoch": 0.5831114630510228, "grad_norm": 212.6410369873047, "learning_rate": 4.523898461173288e-06, "loss": 6.9301, "step": 288660 }, { "epoch": 0.5831316636837066, "grad_norm": 468.315673828125, "learning_rate": 4.5235509825605965e-06, "loss": 19.5886, "step": 288670 }, { "epoch": 0.5831518643163904, "grad_norm": 402.0150451660156, "learning_rate": 4.523203506270058e-06, "loss": 17.571, "step": 288680 }, { "epoch": 0.5831720649490743, "grad_norm": 264.9726257324219, "learning_rate": 4.5228560323033675e-06, "loss": 10.5363, "step": 288690 }, { "epoch": 0.5831922655817581, "grad_norm": 269.37548828125, "learning_rate": 4.522508560662219e-06, "loss": 16.6236, "step": 288700 }, { "epoch": 0.5832124662144418, "grad_norm": 160.0282745361328, "learning_rate": 4.522161091348308e-06, "loss": 17.1735, "step": 288710 }, { "epoch": 0.5832326668471256, "grad_norm": 377.7644958496094, "learning_rate": 4.521813624363323e-06, "loss": 25.1488, "step": 288720 }, { "epoch": 0.5832528674798094, "grad_norm": 349.9884948730469, "learning_rate": 4.521466159708962e-06, "loss": 20.4603, "step": 288730 }, { "epoch": 0.5832730681124932, "grad_norm": 177.69366455078125, "learning_rate": 4.521118697386917e-06, "loss": 12.1969, "step": 288740 }, { "epoch": 0.5832932687451771, "grad_norm": 169.61322021484375, "learning_rate": 4.52077123739888e-06, "loss": 20.7314, "step": 288750 }, { "epoch": 0.5833134693778609, "grad_norm": 39.9294548034668, "learning_rate": 4.520423779746547e-06, "loss": 17.347, "step": 288760 }, { "epoch": 0.5833336700105447, "grad_norm": 321.1822814941406, "learning_rate": 4.520076324431612e-06, "loss": 26.3152, "step": 288770 }, { "epoch": 0.5833538706432285, "grad_norm": 257.10662841796875, "learning_rate": 4.519728871455764e-06, "loss": 16.0506, "step": 288780 }, { "epoch": 0.5833740712759123, "grad_norm": 100.08138275146484, "learning_rate": 4.519381420820699e-06, "loss": 25.8988, "step": 288790 }, { "epoch": 0.5833942719085962, "grad_norm": 163.68333435058594, "learning_rate": 4.519033972528114e-06, "loss": 20.6364, "step": 288800 }, { "epoch": 0.58341447254128, "grad_norm": 509.23089599609375, "learning_rate": 4.518686526579695e-06, "loss": 15.2019, "step": 288810 }, { "epoch": 0.5834346731739638, "grad_norm": 283.18023681640625, "learning_rate": 4.518339082977142e-06, "loss": 20.7073, "step": 288820 }, { "epoch": 0.5834548738066476, "grad_norm": 434.3264465332031, "learning_rate": 4.517991641722146e-06, "loss": 11.7278, "step": 288830 }, { "epoch": 0.5834750744393314, "grad_norm": 445.4441833496094, "learning_rate": 4.517644202816399e-06, "loss": 13.7886, "step": 288840 }, { "epoch": 0.5834952750720153, "grad_norm": 313.33441162109375, "learning_rate": 4.517296766261596e-06, "loss": 13.7243, "step": 288850 }, { "epoch": 0.5835154757046991, "grad_norm": 75.31336975097656, "learning_rate": 4.516949332059429e-06, "loss": 8.4273, "step": 288860 }, { "epoch": 0.5835356763373829, "grad_norm": 221.6724090576172, "learning_rate": 4.516601900211595e-06, "loss": 26.2238, "step": 288870 }, { "epoch": 0.5835558769700667, "grad_norm": 165.5292510986328, "learning_rate": 4.516254470719783e-06, "loss": 14.7146, "step": 288880 }, { "epoch": 0.5835760776027505, "grad_norm": 492.73663330078125, "learning_rate": 4.515907043585688e-06, "loss": 18.1448, "step": 288890 }, { "epoch": 0.5835962782354344, "grad_norm": 46.4738883972168, "learning_rate": 4.5155596188110055e-06, "loss": 17.8356, "step": 288900 }, { "epoch": 0.5836164788681182, "grad_norm": 295.6556396484375, "learning_rate": 4.515212196397424e-06, "loss": 14.2718, "step": 288910 }, { "epoch": 0.583636679500802, "grad_norm": 204.67526245117188, "learning_rate": 4.5148647763466405e-06, "loss": 19.5161, "step": 288920 }, { "epoch": 0.5836568801334858, "grad_norm": 606.898681640625, "learning_rate": 4.514517358660347e-06, "loss": 17.8595, "step": 288930 }, { "epoch": 0.5836770807661696, "grad_norm": 297.3255310058594, "learning_rate": 4.514169943340238e-06, "loss": 18.1356, "step": 288940 }, { "epoch": 0.5836972813988535, "grad_norm": 45.27486038208008, "learning_rate": 4.513822530388004e-06, "loss": 25.6842, "step": 288950 }, { "epoch": 0.5837174820315373, "grad_norm": 372.8363342285156, "learning_rate": 4.513475119805342e-06, "loss": 17.2106, "step": 288960 }, { "epoch": 0.583737682664221, "grad_norm": 74.33955383300781, "learning_rate": 4.513127711593941e-06, "loss": 8.3791, "step": 288970 }, { "epoch": 0.5837578832969048, "grad_norm": 275.3126525878906, "learning_rate": 4.512780305755498e-06, "loss": 16.7346, "step": 288980 }, { "epoch": 0.5837780839295886, "grad_norm": 48.23619842529297, "learning_rate": 4.512432902291703e-06, "loss": 31.1497, "step": 288990 }, { "epoch": 0.5837982845622725, "grad_norm": 61.14518737792969, "learning_rate": 4.512085501204254e-06, "loss": 14.2776, "step": 289000 }, { "epoch": 0.5838184851949563, "grad_norm": 42.546173095703125, "learning_rate": 4.511738102494839e-06, "loss": 12.3757, "step": 289010 }, { "epoch": 0.5838386858276401, "grad_norm": 263.49945068359375, "learning_rate": 4.5113907061651524e-06, "loss": 21.9304, "step": 289020 }, { "epoch": 0.5838588864603239, "grad_norm": 293.0477294921875, "learning_rate": 4.511043312216891e-06, "loss": 27.5352, "step": 289030 }, { "epoch": 0.5838790870930077, "grad_norm": 135.94578552246094, "learning_rate": 4.510695920651742e-06, "loss": 26.4674, "step": 289040 }, { "epoch": 0.5838992877256916, "grad_norm": 74.29129028320312, "learning_rate": 4.510348531471403e-06, "loss": 17.0031, "step": 289050 }, { "epoch": 0.5839194883583754, "grad_norm": 38.112979888916016, "learning_rate": 4.510001144677568e-06, "loss": 9.0786, "step": 289060 }, { "epoch": 0.5839396889910592, "grad_norm": 178.7277069091797, "learning_rate": 4.509653760271926e-06, "loss": 11.7004, "step": 289070 }, { "epoch": 0.583959889623743, "grad_norm": 10.39890193939209, "learning_rate": 4.509306378256172e-06, "loss": 15.6026, "step": 289080 }, { "epoch": 0.5839800902564268, "grad_norm": 214.49676513671875, "learning_rate": 4.508958998632e-06, "loss": 21.5425, "step": 289090 }, { "epoch": 0.5840002908891107, "grad_norm": 438.2300109863281, "learning_rate": 4.508611621401102e-06, "loss": 21.9681, "step": 289100 }, { "epoch": 0.5840204915217945, "grad_norm": 247.9979705810547, "learning_rate": 4.50826424656517e-06, "loss": 15.6061, "step": 289110 }, { "epoch": 0.5840406921544783, "grad_norm": 276.3267517089844, "learning_rate": 4.507916874125902e-06, "loss": 19.3964, "step": 289120 }, { "epoch": 0.5840608927871621, "grad_norm": 377.4900817871094, "learning_rate": 4.507569504084983e-06, "loss": 21.1468, "step": 289130 }, { "epoch": 0.5840810934198459, "grad_norm": 8.640204429626465, "learning_rate": 4.5072221364441126e-06, "loss": 17.9225, "step": 289140 }, { "epoch": 0.5841012940525298, "grad_norm": 164.23435974121094, "learning_rate": 4.506874771204981e-06, "loss": 16.9836, "step": 289150 }, { "epoch": 0.5841214946852136, "grad_norm": 123.06031036376953, "learning_rate": 4.506527408369285e-06, "loss": 17.228, "step": 289160 }, { "epoch": 0.5841416953178974, "grad_norm": 397.80938720703125, "learning_rate": 4.506180047938711e-06, "loss": 30.8915, "step": 289170 }, { "epoch": 0.5841618959505812, "grad_norm": 408.9669189453125, "learning_rate": 4.505832689914956e-06, "loss": 20.8444, "step": 289180 }, { "epoch": 0.584182096583265, "grad_norm": 264.7774658203125, "learning_rate": 4.505485334299714e-06, "loss": 12.0934, "step": 289190 }, { "epoch": 0.5842022972159489, "grad_norm": 323.95635986328125, "learning_rate": 4.505137981094675e-06, "loss": 8.8296, "step": 289200 }, { "epoch": 0.5842224978486327, "grad_norm": 232.66615295410156, "learning_rate": 4.504790630301535e-06, "loss": 20.7452, "step": 289210 }, { "epoch": 0.5842426984813164, "grad_norm": 654.0389404296875, "learning_rate": 4.504443281921985e-06, "loss": 14.6349, "step": 289220 }, { "epoch": 0.5842628991140002, "grad_norm": 330.26776123046875, "learning_rate": 4.504095935957718e-06, "loss": 17.1905, "step": 289230 }, { "epoch": 0.584283099746684, "grad_norm": 304.7987365722656, "learning_rate": 4.503748592410427e-06, "loss": 11.8641, "step": 289240 }, { "epoch": 0.5843033003793678, "grad_norm": 341.6554260253906, "learning_rate": 4.5034012512818065e-06, "loss": 23.1132, "step": 289250 }, { "epoch": 0.5843235010120517, "grad_norm": 357.39910888671875, "learning_rate": 4.503053912573545e-06, "loss": 12.1927, "step": 289260 }, { "epoch": 0.5843437016447355, "grad_norm": 190.261962890625, "learning_rate": 4.502706576287341e-06, "loss": 17.13, "step": 289270 }, { "epoch": 0.5843639022774193, "grad_norm": 197.83383178710938, "learning_rate": 4.502359242424885e-06, "loss": 25.7712, "step": 289280 }, { "epoch": 0.5843841029101031, "grad_norm": 232.97042846679688, "learning_rate": 4.502011910987869e-06, "loss": 14.8694, "step": 289290 }, { "epoch": 0.5844043035427869, "grad_norm": 414.5251770019531, "learning_rate": 4.5016645819779865e-06, "loss": 24.3213, "step": 289300 }, { "epoch": 0.5844245041754708, "grad_norm": 569.11083984375, "learning_rate": 4.501317255396931e-06, "loss": 12.0644, "step": 289310 }, { "epoch": 0.5844447048081546, "grad_norm": 503.9019775390625, "learning_rate": 4.500969931246394e-06, "loss": 19.7888, "step": 289320 }, { "epoch": 0.5844649054408384, "grad_norm": 79.37042999267578, "learning_rate": 4.500622609528068e-06, "loss": 14.0403, "step": 289330 }, { "epoch": 0.5844851060735222, "grad_norm": 348.4939270019531, "learning_rate": 4.500275290243648e-06, "loss": 14.9796, "step": 289340 }, { "epoch": 0.584505306706206, "grad_norm": 807.81103515625, "learning_rate": 4.499927973394826e-06, "loss": 15.8266, "step": 289350 }, { "epoch": 0.5845255073388899, "grad_norm": 313.7483215332031, "learning_rate": 4.499580658983294e-06, "loss": 21.1588, "step": 289360 }, { "epoch": 0.5845457079715737, "grad_norm": 0.7884628176689148, "learning_rate": 4.4992333470107455e-06, "loss": 29.6843, "step": 289370 }, { "epoch": 0.5845659086042575, "grad_norm": 286.7625732421875, "learning_rate": 4.498886037478874e-06, "loss": 14.9468, "step": 289380 }, { "epoch": 0.5845861092369413, "grad_norm": 193.8046112060547, "learning_rate": 4.498538730389369e-06, "loss": 12.2643, "step": 289390 }, { "epoch": 0.5846063098696251, "grad_norm": 264.8284606933594, "learning_rate": 4.4981914257439254e-06, "loss": 22.4373, "step": 289400 }, { "epoch": 0.584626510502309, "grad_norm": 4.238860607147217, "learning_rate": 4.497844123544239e-06, "loss": 12.4548, "step": 289410 }, { "epoch": 0.5846467111349928, "grad_norm": 114.17462158203125, "learning_rate": 4.497496823791996e-06, "loss": 28.8613, "step": 289420 }, { "epoch": 0.5846669117676766, "grad_norm": 164.07186889648438, "learning_rate": 4.497149526488893e-06, "loss": 14.4938, "step": 289430 }, { "epoch": 0.5846871124003604, "grad_norm": 152.35887145996094, "learning_rate": 4.496802231636624e-06, "loss": 12.9548, "step": 289440 }, { "epoch": 0.5847073130330442, "grad_norm": 32.35220718383789, "learning_rate": 4.496454939236879e-06, "loss": 12.6522, "step": 289450 }, { "epoch": 0.5847275136657281, "grad_norm": 714.1876831054688, "learning_rate": 4.496107649291351e-06, "loss": 23.5201, "step": 289460 }, { "epoch": 0.5847477142984119, "grad_norm": 471.0312805175781, "learning_rate": 4.495760361801732e-06, "loss": 23.7224, "step": 289470 }, { "epoch": 0.5847679149310956, "grad_norm": 145.50326538085938, "learning_rate": 4.49541307676972e-06, "loss": 19.7825, "step": 289480 }, { "epoch": 0.5847881155637794, "grad_norm": 356.8738708496094, "learning_rate": 4.495065794196999e-06, "loss": 22.4115, "step": 289490 }, { "epoch": 0.5848083161964632, "grad_norm": 47.6560173034668, "learning_rate": 4.494718514085269e-06, "loss": 16.2526, "step": 289500 }, { "epoch": 0.584828516829147, "grad_norm": 17.74315643310547, "learning_rate": 4.494371236436219e-06, "loss": 16.3815, "step": 289510 }, { "epoch": 0.5848487174618309, "grad_norm": 426.9003601074219, "learning_rate": 4.494023961251542e-06, "loss": 16.2234, "step": 289520 }, { "epoch": 0.5848689180945147, "grad_norm": 353.0143737792969, "learning_rate": 4.49367668853293e-06, "loss": 22.2602, "step": 289530 }, { "epoch": 0.5848891187271985, "grad_norm": 212.37547302246094, "learning_rate": 4.4933294182820785e-06, "loss": 18.4245, "step": 289540 }, { "epoch": 0.5849093193598823, "grad_norm": 308.4668884277344, "learning_rate": 4.492982150500677e-06, "loss": 20.103, "step": 289550 }, { "epoch": 0.5849295199925661, "grad_norm": 149.51832580566406, "learning_rate": 4.492634885190417e-06, "loss": 18.6691, "step": 289560 }, { "epoch": 0.58494972062525, "grad_norm": 539.282958984375, "learning_rate": 4.492287622352996e-06, "loss": 14.6241, "step": 289570 }, { "epoch": 0.5849699212579338, "grad_norm": 300.3185729980469, "learning_rate": 4.491940361990101e-06, "loss": 9.1401, "step": 289580 }, { "epoch": 0.5849901218906176, "grad_norm": 387.6613464355469, "learning_rate": 4.4915931041034285e-06, "loss": 16.7728, "step": 289590 }, { "epoch": 0.5850103225233014, "grad_norm": 207.0655059814453, "learning_rate": 4.491245848694669e-06, "loss": 22.4668, "step": 289600 }, { "epoch": 0.5850305231559852, "grad_norm": 547.8883666992188, "learning_rate": 4.490898595765517e-06, "loss": 28.7803, "step": 289610 }, { "epoch": 0.5850507237886691, "grad_norm": 369.9324951171875, "learning_rate": 4.490551345317662e-06, "loss": 13.821, "step": 289620 }, { "epoch": 0.5850709244213529, "grad_norm": 550.5057373046875, "learning_rate": 4.4902040973527974e-06, "loss": 15.3907, "step": 289630 }, { "epoch": 0.5850911250540367, "grad_norm": 368.1778564453125, "learning_rate": 4.489856851872619e-06, "loss": 33.0781, "step": 289640 }, { "epoch": 0.5851113256867205, "grad_norm": 152.4137420654297, "learning_rate": 4.489509608878813e-06, "loss": 13.8376, "step": 289650 }, { "epoch": 0.5851315263194043, "grad_norm": 666.6358642578125, "learning_rate": 4.4891623683730765e-06, "loss": 20.5935, "step": 289660 }, { "epoch": 0.5851517269520882, "grad_norm": 274.5471496582031, "learning_rate": 4.488815130357103e-06, "loss": 43.3687, "step": 289670 }, { "epoch": 0.585171927584772, "grad_norm": 98.67227935791016, "learning_rate": 4.48846789483258e-06, "loss": 18.9741, "step": 289680 }, { "epoch": 0.5851921282174558, "grad_norm": 124.13681030273438, "learning_rate": 4.488120661801202e-06, "loss": 14.2414, "step": 289690 }, { "epoch": 0.5852123288501396, "grad_norm": 156.50643920898438, "learning_rate": 4.487773431264664e-06, "loss": 18.3719, "step": 289700 }, { "epoch": 0.5852325294828234, "grad_norm": 283.260498046875, "learning_rate": 4.487426203224655e-06, "loss": 15.2287, "step": 289710 }, { "epoch": 0.5852527301155073, "grad_norm": 377.6554260253906, "learning_rate": 4.487078977682867e-06, "loss": 17.5147, "step": 289720 }, { "epoch": 0.585272930748191, "grad_norm": 331.236083984375, "learning_rate": 4.486731754640997e-06, "loss": 16.4378, "step": 289730 }, { "epoch": 0.5852931313808748, "grad_norm": 269.80157470703125, "learning_rate": 4.486384534100732e-06, "loss": 27.2629, "step": 289740 }, { "epoch": 0.5853133320135586, "grad_norm": 531.6373901367188, "learning_rate": 4.4860373160637665e-06, "loss": 17.5007, "step": 289750 }, { "epoch": 0.5853335326462424, "grad_norm": 444.719482421875, "learning_rate": 4.485690100531793e-06, "loss": 39.4604, "step": 289760 }, { "epoch": 0.5853537332789263, "grad_norm": 24.359006881713867, "learning_rate": 4.485342887506505e-06, "loss": 7.2438, "step": 289770 }, { "epoch": 0.5853739339116101, "grad_norm": 214.59530639648438, "learning_rate": 4.484995676989592e-06, "loss": 17.2741, "step": 289780 }, { "epoch": 0.5853941345442939, "grad_norm": 320.148681640625, "learning_rate": 4.4846484689827465e-06, "loss": 21.0638, "step": 289790 }, { "epoch": 0.5854143351769777, "grad_norm": 240.64695739746094, "learning_rate": 4.484301263487664e-06, "loss": 36.1935, "step": 289800 }, { "epoch": 0.5854345358096615, "grad_norm": 143.87245178222656, "learning_rate": 4.483954060506033e-06, "loss": 18.8906, "step": 289810 }, { "epoch": 0.5854547364423454, "grad_norm": 413.5054931640625, "learning_rate": 4.4836068600395484e-06, "loss": 11.9949, "step": 289820 }, { "epoch": 0.5854749370750292, "grad_norm": 251.2863311767578, "learning_rate": 4.483259662089902e-06, "loss": 20.2521, "step": 289830 }, { "epoch": 0.585495137707713, "grad_norm": 601.3387451171875, "learning_rate": 4.482912466658784e-06, "loss": 16.5747, "step": 289840 }, { "epoch": 0.5855153383403968, "grad_norm": 924.859375, "learning_rate": 4.482565273747888e-06, "loss": 15.2538, "step": 289850 }, { "epoch": 0.5855355389730806, "grad_norm": 115.95515441894531, "learning_rate": 4.482218083358907e-06, "loss": 31.7692, "step": 289860 }, { "epoch": 0.5855557396057645, "grad_norm": 219.22817993164062, "learning_rate": 4.481870895493531e-06, "loss": 16.3563, "step": 289870 }, { "epoch": 0.5855759402384483, "grad_norm": 564.8135986328125, "learning_rate": 4.481523710153454e-06, "loss": 25.422, "step": 289880 }, { "epoch": 0.5855961408711321, "grad_norm": 577.3983154296875, "learning_rate": 4.481176527340368e-06, "loss": 23.1931, "step": 289890 }, { "epoch": 0.5856163415038159, "grad_norm": 220.31182861328125, "learning_rate": 4.4808293470559645e-06, "loss": 13.0523, "step": 289900 }, { "epoch": 0.5856365421364997, "grad_norm": 377.9592590332031, "learning_rate": 4.480482169301935e-06, "loss": 18.7027, "step": 289910 }, { "epoch": 0.5856567427691836, "grad_norm": 71.51055145263672, "learning_rate": 4.480134994079973e-06, "loss": 12.1835, "step": 289920 }, { "epoch": 0.5856769434018674, "grad_norm": 593.5560913085938, "learning_rate": 4.479787821391771e-06, "loss": 21.5604, "step": 289930 }, { "epoch": 0.5856971440345512, "grad_norm": 39.5924072265625, "learning_rate": 4.4794406512390175e-06, "loss": 14.0351, "step": 289940 }, { "epoch": 0.585717344667235, "grad_norm": 40.399383544921875, "learning_rate": 4.479093483623409e-06, "loss": 18.0054, "step": 289950 }, { "epoch": 0.5857375452999188, "grad_norm": 215.93939208984375, "learning_rate": 4.478746318546636e-06, "loss": 12.9314, "step": 289960 }, { "epoch": 0.5857577459326027, "grad_norm": 1138.665283203125, "learning_rate": 4.478399156010389e-06, "loss": 25.8988, "step": 289970 }, { "epoch": 0.5857779465652865, "grad_norm": 166.22030639648438, "learning_rate": 4.478051996016362e-06, "loss": 8.1434, "step": 289980 }, { "epoch": 0.5857981471979702, "grad_norm": 260.38677978515625, "learning_rate": 4.477704838566246e-06, "loss": 14.4933, "step": 289990 }, { "epoch": 0.585818347830654, "grad_norm": 322.36871337890625, "learning_rate": 4.477357683661734e-06, "loss": 28.2757, "step": 290000 }, { "epoch": 0.5858385484633378, "grad_norm": 208.0443878173828, "learning_rate": 4.477010531304515e-06, "loss": 21.9077, "step": 290010 }, { "epoch": 0.5858587490960216, "grad_norm": 294.9044189453125, "learning_rate": 4.476663381496287e-06, "loss": 7.0848, "step": 290020 }, { "epoch": 0.5858789497287055, "grad_norm": 95.24069213867188, "learning_rate": 4.476316234238735e-06, "loss": 14.404, "step": 290030 }, { "epoch": 0.5858991503613893, "grad_norm": 395.36907958984375, "learning_rate": 4.4759690895335545e-06, "loss": 16.6598, "step": 290040 }, { "epoch": 0.5859193509940731, "grad_norm": 272.2182312011719, "learning_rate": 4.475621947382438e-06, "loss": 16.7168, "step": 290050 }, { "epoch": 0.5859395516267569, "grad_norm": 309.1654968261719, "learning_rate": 4.475274807787077e-06, "loss": 18.4691, "step": 290060 }, { "epoch": 0.5859597522594407, "grad_norm": 200.5016632080078, "learning_rate": 4.474927670749162e-06, "loss": 22.8496, "step": 290070 }, { "epoch": 0.5859799528921246, "grad_norm": 73.40181732177734, "learning_rate": 4.474580536270385e-06, "loss": 17.6789, "step": 290080 }, { "epoch": 0.5860001535248084, "grad_norm": 124.22648620605469, "learning_rate": 4.474233404352442e-06, "loss": 13.6312, "step": 290090 }, { "epoch": 0.5860203541574922, "grad_norm": 324.4682922363281, "learning_rate": 4.473886274997018e-06, "loss": 19.3401, "step": 290100 }, { "epoch": 0.586040554790176, "grad_norm": 128.1812286376953, "learning_rate": 4.47353914820581e-06, "loss": 17.5955, "step": 290110 }, { "epoch": 0.5860607554228598, "grad_norm": 175.2484588623047, "learning_rate": 4.473192023980509e-06, "loss": 31.4633, "step": 290120 }, { "epoch": 0.5860809560555437, "grad_norm": 276.7469482421875, "learning_rate": 4.472844902322805e-06, "loss": 16.7142, "step": 290130 }, { "epoch": 0.5861011566882275, "grad_norm": 93.66202545166016, "learning_rate": 4.472497783234392e-06, "loss": 8.0791, "step": 290140 }, { "epoch": 0.5861213573209113, "grad_norm": 22.94288444519043, "learning_rate": 4.472150666716961e-06, "loss": 55.0233, "step": 290150 }, { "epoch": 0.5861415579535951, "grad_norm": 326.4684753417969, "learning_rate": 4.471803552772203e-06, "loss": 17.3001, "step": 290160 }, { "epoch": 0.586161758586279, "grad_norm": 296.78643798828125, "learning_rate": 4.471456441401809e-06, "loss": 27.3948, "step": 290170 }, { "epoch": 0.5861819592189628, "grad_norm": 249.63197326660156, "learning_rate": 4.471109332607475e-06, "loss": 22.287, "step": 290180 }, { "epoch": 0.5862021598516466, "grad_norm": 355.4353332519531, "learning_rate": 4.4707622263908875e-06, "loss": 11.0487, "step": 290190 }, { "epoch": 0.5862223604843304, "grad_norm": 169.37098693847656, "learning_rate": 4.470415122753742e-06, "loss": 23.7076, "step": 290200 }, { "epoch": 0.5862425611170142, "grad_norm": 363.5976257324219, "learning_rate": 4.470068021697728e-06, "loss": 17.6602, "step": 290210 }, { "epoch": 0.586262761749698, "grad_norm": 363.8653259277344, "learning_rate": 4.4697209232245395e-06, "loss": 19.03, "step": 290220 }, { "epoch": 0.5862829623823819, "grad_norm": 139.24034118652344, "learning_rate": 4.469373827335866e-06, "loss": 22.3024, "step": 290230 }, { "epoch": 0.5863031630150657, "grad_norm": 130.05992126464844, "learning_rate": 4.4690267340334e-06, "loss": 22.8483, "step": 290240 }, { "epoch": 0.5863233636477494, "grad_norm": 230.1553955078125, "learning_rate": 4.468679643318836e-06, "loss": 26.5836, "step": 290250 }, { "epoch": 0.5863435642804332, "grad_norm": 1557.213134765625, "learning_rate": 4.468332555193859e-06, "loss": 27.6151, "step": 290260 }, { "epoch": 0.586363764913117, "grad_norm": 478.6858215332031, "learning_rate": 4.467985469660166e-06, "loss": 25.6459, "step": 290270 }, { "epoch": 0.5863839655458009, "grad_norm": 529.6866455078125, "learning_rate": 4.467638386719448e-06, "loss": 21.4676, "step": 290280 }, { "epoch": 0.5864041661784847, "grad_norm": 405.33941650390625, "learning_rate": 4.467291306373396e-06, "loss": 16.607, "step": 290290 }, { "epoch": 0.5864243668111685, "grad_norm": 405.6553649902344, "learning_rate": 4.466944228623701e-06, "loss": 32.6304, "step": 290300 }, { "epoch": 0.5864445674438523, "grad_norm": 196.00421142578125, "learning_rate": 4.466597153472056e-06, "loss": 26.4886, "step": 290310 }, { "epoch": 0.5864647680765361, "grad_norm": 100.44033813476562, "learning_rate": 4.4662500809201515e-06, "loss": 17.2256, "step": 290320 }, { "epoch": 0.58648496870922, "grad_norm": 422.9254150390625, "learning_rate": 4.465903010969677e-06, "loss": 32.1027, "step": 290330 }, { "epoch": 0.5865051693419038, "grad_norm": 300.0216064453125, "learning_rate": 4.46555594362233e-06, "loss": 20.6119, "step": 290340 }, { "epoch": 0.5865253699745876, "grad_norm": 373.6593933105469, "learning_rate": 4.4652088788797965e-06, "loss": 27.8067, "step": 290350 }, { "epoch": 0.5865455706072714, "grad_norm": 268.99078369140625, "learning_rate": 4.46486181674377e-06, "loss": 13.0746, "step": 290360 }, { "epoch": 0.5865657712399552, "grad_norm": 329.954345703125, "learning_rate": 4.464514757215943e-06, "loss": 13.5018, "step": 290370 }, { "epoch": 0.5865859718726391, "grad_norm": 256.49725341796875, "learning_rate": 4.464167700298006e-06, "loss": 18.7823, "step": 290380 }, { "epoch": 0.5866061725053229, "grad_norm": 86.00834655761719, "learning_rate": 4.463820645991651e-06, "loss": 18.3427, "step": 290390 }, { "epoch": 0.5866263731380067, "grad_norm": 447.105224609375, "learning_rate": 4.463473594298567e-06, "loss": 26.6898, "step": 290400 }, { "epoch": 0.5866465737706905, "grad_norm": 310.1979064941406, "learning_rate": 4.463126545220451e-06, "loss": 15.852, "step": 290410 }, { "epoch": 0.5866667744033743, "grad_norm": 76.35517883300781, "learning_rate": 4.462779498758988e-06, "loss": 26.9536, "step": 290420 }, { "epoch": 0.5866869750360582, "grad_norm": 478.8035583496094, "learning_rate": 4.462432454915873e-06, "loss": 7.5157, "step": 290430 }, { "epoch": 0.586707175668742, "grad_norm": 266.8946228027344, "learning_rate": 4.4620854136928e-06, "loss": 12.7786, "step": 290440 }, { "epoch": 0.5867273763014258, "grad_norm": 177.81044006347656, "learning_rate": 4.461738375091454e-06, "loss": 24.7046, "step": 290450 }, { "epoch": 0.5867475769341096, "grad_norm": 423.4189453125, "learning_rate": 4.461391339113531e-06, "loss": 12.465, "step": 290460 }, { "epoch": 0.5867677775667934, "grad_norm": 438.22265625, "learning_rate": 4.461044305760722e-06, "loss": 23.8092, "step": 290470 }, { "epoch": 0.5867879781994773, "grad_norm": 474.1412353515625, "learning_rate": 4.460697275034717e-06, "loss": 23.9518, "step": 290480 }, { "epoch": 0.5868081788321611, "grad_norm": 170.00314331054688, "learning_rate": 4.460350246937207e-06, "loss": 21.0856, "step": 290490 }, { "epoch": 0.5868283794648448, "grad_norm": 325.558837890625, "learning_rate": 4.460003221469886e-06, "loss": 17.3306, "step": 290500 }, { "epoch": 0.5868485800975286, "grad_norm": 192.7998809814453, "learning_rate": 4.459656198634444e-06, "loss": 16.0531, "step": 290510 }, { "epoch": 0.5868687807302124, "grad_norm": 98.61698913574219, "learning_rate": 4.459309178432571e-06, "loss": 20.7802, "step": 290520 }, { "epoch": 0.5868889813628962, "grad_norm": 425.8262939453125, "learning_rate": 4.458962160865961e-06, "loss": 61.3101, "step": 290530 }, { "epoch": 0.5869091819955801, "grad_norm": 302.86773681640625, "learning_rate": 4.458615145936303e-06, "loss": 16.1304, "step": 290540 }, { "epoch": 0.5869293826282639, "grad_norm": 478.3963623046875, "learning_rate": 4.458268133645289e-06, "loss": 10.0227, "step": 290550 }, { "epoch": 0.5869495832609477, "grad_norm": 468.877197265625, "learning_rate": 4.457921123994609e-06, "loss": 15.7349, "step": 290560 }, { "epoch": 0.5869697838936315, "grad_norm": 398.20684814453125, "learning_rate": 4.457574116985958e-06, "loss": 12.2009, "step": 290570 }, { "epoch": 0.5869899845263153, "grad_norm": 272.97430419921875, "learning_rate": 4.457227112621024e-06, "loss": 15.3631, "step": 290580 }, { "epoch": 0.5870101851589992, "grad_norm": 135.94468688964844, "learning_rate": 4.456880110901499e-06, "loss": 14.1023, "step": 290590 }, { "epoch": 0.587030385791683, "grad_norm": 86.0199966430664, "learning_rate": 4.456533111829076e-06, "loss": 19.1835, "step": 290600 }, { "epoch": 0.5870505864243668, "grad_norm": 378.6319885253906, "learning_rate": 4.456186115405443e-06, "loss": 16.1475, "step": 290610 }, { "epoch": 0.5870707870570506, "grad_norm": 423.5802917480469, "learning_rate": 4.455839121632292e-06, "loss": 25.9294, "step": 290620 }, { "epoch": 0.5870909876897344, "grad_norm": 26.87302017211914, "learning_rate": 4.455492130511318e-06, "loss": 13.3121, "step": 290630 }, { "epoch": 0.5871111883224183, "grad_norm": 359.8492431640625, "learning_rate": 4.455145142044207e-06, "loss": 21.4866, "step": 290640 }, { "epoch": 0.5871313889551021, "grad_norm": 397.218505859375, "learning_rate": 4.4547981562326535e-06, "loss": 14.9631, "step": 290650 }, { "epoch": 0.5871515895877859, "grad_norm": 397.3639221191406, "learning_rate": 4.454451173078347e-06, "loss": 14.3595, "step": 290660 }, { "epoch": 0.5871717902204697, "grad_norm": 143.85008239746094, "learning_rate": 4.454104192582981e-06, "loss": 19.5807, "step": 290670 }, { "epoch": 0.5871919908531535, "grad_norm": 290.7563781738281, "learning_rate": 4.453757214748243e-06, "loss": 30.9512, "step": 290680 }, { "epoch": 0.5872121914858374, "grad_norm": 357.4871826171875, "learning_rate": 4.453410239575826e-06, "loss": 24.6374, "step": 290690 }, { "epoch": 0.5872323921185212, "grad_norm": 182.50381469726562, "learning_rate": 4.453063267067424e-06, "loss": 11.8246, "step": 290700 }, { "epoch": 0.587252592751205, "grad_norm": 153.17713928222656, "learning_rate": 4.452716297224722e-06, "loss": 19.0082, "step": 290710 }, { "epoch": 0.5872727933838888, "grad_norm": 313.52947998046875, "learning_rate": 4.452369330049415e-06, "loss": 20.0228, "step": 290720 }, { "epoch": 0.5872929940165726, "grad_norm": 178.7478790283203, "learning_rate": 4.452022365543195e-06, "loss": 12.703, "step": 290730 }, { "epoch": 0.5873131946492565, "grad_norm": 429.48614501953125, "learning_rate": 4.451675403707751e-06, "loss": 20.7201, "step": 290740 }, { "epoch": 0.5873333952819403, "grad_norm": 108.81208801269531, "learning_rate": 4.451328444544774e-06, "loss": 27.8274, "step": 290750 }, { "epoch": 0.587353595914624, "grad_norm": 145.52964782714844, "learning_rate": 4.450981488055957e-06, "loss": 11.3739, "step": 290760 }, { "epoch": 0.5873737965473078, "grad_norm": 160.24290466308594, "learning_rate": 4.450634534242989e-06, "loss": 14.5005, "step": 290770 }, { "epoch": 0.5873939971799916, "grad_norm": 251.53326416015625, "learning_rate": 4.4502875831075596e-06, "loss": 15.7139, "step": 290780 }, { "epoch": 0.5874141978126755, "grad_norm": 311.8526611328125, "learning_rate": 4.449940634651365e-06, "loss": 20.776, "step": 290790 }, { "epoch": 0.5874343984453593, "grad_norm": 148.40283203125, "learning_rate": 4.44959368887609e-06, "loss": 13.4278, "step": 290800 }, { "epoch": 0.5874545990780431, "grad_norm": 402.01531982421875, "learning_rate": 4.44924674578343e-06, "loss": 19.0589, "step": 290810 }, { "epoch": 0.5874747997107269, "grad_norm": 528.681396484375, "learning_rate": 4.4488998053750746e-06, "loss": 27.5653, "step": 290820 }, { "epoch": 0.5874950003434107, "grad_norm": 71.03848266601562, "learning_rate": 4.448552867652715e-06, "loss": 15.3605, "step": 290830 }, { "epoch": 0.5875152009760946, "grad_norm": 123.18183135986328, "learning_rate": 4.448205932618042e-06, "loss": 12.2669, "step": 290840 }, { "epoch": 0.5875354016087784, "grad_norm": 496.4794616699219, "learning_rate": 4.447859000272744e-06, "loss": 18.629, "step": 290850 }, { "epoch": 0.5875556022414622, "grad_norm": 304.3057556152344, "learning_rate": 4.447512070618519e-06, "loss": 26.8603, "step": 290860 }, { "epoch": 0.587575802874146, "grad_norm": 212.2410125732422, "learning_rate": 4.447165143657049e-06, "loss": 15.0876, "step": 290870 }, { "epoch": 0.5875960035068298, "grad_norm": 436.3628845214844, "learning_rate": 4.44681821939003e-06, "loss": 22.2604, "step": 290880 }, { "epoch": 0.5876162041395137, "grad_norm": 403.2327575683594, "learning_rate": 4.446471297819154e-06, "loss": 16.2623, "step": 290890 }, { "epoch": 0.5876364047721975, "grad_norm": 444.27044677734375, "learning_rate": 4.446124378946108e-06, "loss": 14.4075, "step": 290900 }, { "epoch": 0.5876566054048813, "grad_norm": 146.2795867919922, "learning_rate": 4.4457774627725835e-06, "loss": 10.4365, "step": 290910 }, { "epoch": 0.5876768060375651, "grad_norm": 414.1048583984375, "learning_rate": 4.4454305493002744e-06, "loss": 15.4737, "step": 290920 }, { "epoch": 0.5876970066702489, "grad_norm": 428.47540283203125, "learning_rate": 4.44508363853087e-06, "loss": 17.7038, "step": 290930 }, { "epoch": 0.5877172073029328, "grad_norm": 50.497711181640625, "learning_rate": 4.444736730466057e-06, "loss": 14.0455, "step": 290940 }, { "epoch": 0.5877374079356166, "grad_norm": 312.23016357421875, "learning_rate": 4.444389825107534e-06, "loss": 30.5986, "step": 290950 }, { "epoch": 0.5877576085683004, "grad_norm": 714.9638061523438, "learning_rate": 4.444042922456985e-06, "loss": 23.5425, "step": 290960 }, { "epoch": 0.5877778092009842, "grad_norm": 185.51246643066406, "learning_rate": 4.4436960225161045e-06, "loss": 27.7287, "step": 290970 }, { "epoch": 0.587798009833668, "grad_norm": 250.52101135253906, "learning_rate": 4.443349125286581e-06, "loss": 21.285, "step": 290980 }, { "epoch": 0.5878182104663519, "grad_norm": 224.84230041503906, "learning_rate": 4.443002230770108e-06, "loss": 15.3876, "step": 290990 }, { "epoch": 0.5878384110990357, "grad_norm": 49.007843017578125, "learning_rate": 4.442655338968373e-06, "loss": 17.9634, "step": 291000 }, { "epoch": 0.5878586117317194, "grad_norm": 262.1280517578125, "learning_rate": 4.4423084498830685e-06, "loss": 25.7347, "step": 291010 }, { "epoch": 0.5878788123644032, "grad_norm": 335.28240966796875, "learning_rate": 4.4419615635158875e-06, "loss": 17.4234, "step": 291020 }, { "epoch": 0.587899012997087, "grad_norm": 277.8085021972656, "learning_rate": 4.441614679868514e-06, "loss": 19.312, "step": 291030 }, { "epoch": 0.5879192136297708, "grad_norm": 237.54649353027344, "learning_rate": 4.441267798942646e-06, "loss": 12.5169, "step": 291040 }, { "epoch": 0.5879394142624547, "grad_norm": 285.7334289550781, "learning_rate": 4.44092092073997e-06, "loss": 11.9964, "step": 291050 }, { "epoch": 0.5879596148951385, "grad_norm": 36.835784912109375, "learning_rate": 4.440574045262178e-06, "loss": 14.0135, "step": 291060 }, { "epoch": 0.5879798155278223, "grad_norm": 212.7928466796875, "learning_rate": 4.440227172510959e-06, "loss": 16.6958, "step": 291070 }, { "epoch": 0.5880000161605061, "grad_norm": 45.525604248046875, "learning_rate": 4.439880302488007e-06, "loss": 11.8339, "step": 291080 }, { "epoch": 0.5880202167931899, "grad_norm": 290.3560791015625, "learning_rate": 4.439533435195009e-06, "loss": 10.5188, "step": 291090 }, { "epoch": 0.5880404174258738, "grad_norm": 106.73890686035156, "learning_rate": 4.439186570633656e-06, "loss": 10.4727, "step": 291100 }, { "epoch": 0.5880606180585576, "grad_norm": 182.7162628173828, "learning_rate": 4.43883970880564e-06, "loss": 20.9043, "step": 291110 }, { "epoch": 0.5880808186912414, "grad_norm": 1036.343017578125, "learning_rate": 4.4384928497126534e-06, "loss": 26.5998, "step": 291120 }, { "epoch": 0.5881010193239252, "grad_norm": 0.0, "learning_rate": 4.438145993356383e-06, "loss": 12.8644, "step": 291130 }, { "epoch": 0.588121219956609, "grad_norm": 398.3003845214844, "learning_rate": 4.437799139738521e-06, "loss": 15.4873, "step": 291140 }, { "epoch": 0.5881414205892929, "grad_norm": 263.58233642578125, "learning_rate": 4.437452288860759e-06, "loss": 13.9147, "step": 291150 }, { "epoch": 0.5881616212219767, "grad_norm": 48.246829986572266, "learning_rate": 4.437105440724785e-06, "loss": 9.7122, "step": 291160 }, { "epoch": 0.5881818218546605, "grad_norm": 364.0764465332031, "learning_rate": 4.43675859533229e-06, "loss": 10.9167, "step": 291170 }, { "epoch": 0.5882020224873443, "grad_norm": 121.19731903076172, "learning_rate": 4.4364117526849674e-06, "loss": 13.4838, "step": 291180 }, { "epoch": 0.5882222231200281, "grad_norm": 281.46929931640625, "learning_rate": 4.436064912784504e-06, "loss": 27.3922, "step": 291190 }, { "epoch": 0.588242423752712, "grad_norm": 105.15433502197266, "learning_rate": 4.4357180756325915e-06, "loss": 28.3922, "step": 291200 }, { "epoch": 0.5882626243853958, "grad_norm": 268.4934997558594, "learning_rate": 4.435371241230923e-06, "loss": 15.0548, "step": 291210 }, { "epoch": 0.5882828250180796, "grad_norm": 343.62103271484375, "learning_rate": 4.435024409581185e-06, "loss": 10.2504, "step": 291220 }, { "epoch": 0.5883030256507634, "grad_norm": 675.0537719726562, "learning_rate": 4.434677580685069e-06, "loss": 25.8529, "step": 291230 }, { "epoch": 0.5883232262834472, "grad_norm": 203.5338897705078, "learning_rate": 4.434330754544267e-06, "loss": 11.7278, "step": 291240 }, { "epoch": 0.5883434269161311, "grad_norm": 279.1500549316406, "learning_rate": 4.4339839311604675e-06, "loss": 31.108, "step": 291250 }, { "epoch": 0.5883636275488149, "grad_norm": 264.0085754394531, "learning_rate": 4.433637110535361e-06, "loss": 16.6724, "step": 291260 }, { "epoch": 0.5883838281814986, "grad_norm": 55.4809684753418, "learning_rate": 4.4332902926706395e-06, "loss": 11.2673, "step": 291270 }, { "epoch": 0.5884040288141824, "grad_norm": 366.79718017578125, "learning_rate": 4.432943477567993e-06, "loss": 16.977, "step": 291280 }, { "epoch": 0.5884242294468662, "grad_norm": 77.18932342529297, "learning_rate": 4.43259666522911e-06, "loss": 27.6653, "step": 291290 }, { "epoch": 0.58844443007955, "grad_norm": 360.4466857910156, "learning_rate": 4.432249855655681e-06, "loss": 16.002, "step": 291300 }, { "epoch": 0.5884646307122339, "grad_norm": 310.1911926269531, "learning_rate": 4.431903048849402e-06, "loss": 22.2379, "step": 291310 }, { "epoch": 0.5884848313449177, "grad_norm": 365.0721130371094, "learning_rate": 4.431556244811954e-06, "loss": 25.5718, "step": 291320 }, { "epoch": 0.5885050319776015, "grad_norm": 521.8826293945312, "learning_rate": 4.431209443545033e-06, "loss": 20.178, "step": 291330 }, { "epoch": 0.5885252326102853, "grad_norm": 318.56982421875, "learning_rate": 4.43086264505033e-06, "loss": 20.0494, "step": 291340 }, { "epoch": 0.5885454332429692, "grad_norm": 43.707462310791016, "learning_rate": 4.430515849329532e-06, "loss": 12.3623, "step": 291350 }, { "epoch": 0.588565633875653, "grad_norm": 184.57839965820312, "learning_rate": 4.43016905638433e-06, "loss": 16.95, "step": 291360 }, { "epoch": 0.5885858345083368, "grad_norm": 174.91197204589844, "learning_rate": 4.429822266216417e-06, "loss": 8.6092, "step": 291370 }, { "epoch": 0.5886060351410206, "grad_norm": 73.11796569824219, "learning_rate": 4.42947547882748e-06, "loss": 15.1927, "step": 291380 }, { "epoch": 0.5886262357737044, "grad_norm": 253.36843872070312, "learning_rate": 4.4291286942192085e-06, "loss": 23.11, "step": 291390 }, { "epoch": 0.5886464364063883, "grad_norm": 79.34971618652344, "learning_rate": 4.428781912393299e-06, "loss": 15.1931, "step": 291400 }, { "epoch": 0.5886666370390721, "grad_norm": 621.6790161132812, "learning_rate": 4.4284351333514315e-06, "loss": 18.1213, "step": 291410 }, { "epoch": 0.5886868376717559, "grad_norm": 147.98269653320312, "learning_rate": 4.428088357095306e-06, "loss": 20.7369, "step": 291420 }, { "epoch": 0.5887070383044397, "grad_norm": 195.3923797607422, "learning_rate": 4.427741583626607e-06, "loss": 17.5523, "step": 291430 }, { "epoch": 0.5887272389371235, "grad_norm": 286.5545349121094, "learning_rate": 4.4273948129470264e-06, "loss": 28.0192, "step": 291440 }, { "epoch": 0.5887474395698074, "grad_norm": 244.3865203857422, "learning_rate": 4.427048045058254e-06, "loss": 42.9066, "step": 291450 }, { "epoch": 0.5887676402024912, "grad_norm": 168.0032196044922, "learning_rate": 4.426701279961978e-06, "loss": 17.1607, "step": 291460 }, { "epoch": 0.588787840835175, "grad_norm": 216.34617614746094, "learning_rate": 4.426354517659894e-06, "loss": 26.8765, "step": 291470 }, { "epoch": 0.5888080414678588, "grad_norm": 438.3743896484375, "learning_rate": 4.426007758153686e-06, "loss": 16.4062, "step": 291480 }, { "epoch": 0.5888282421005426, "grad_norm": 2.6894993782043457, "learning_rate": 4.4256610014450465e-06, "loss": 9.9761, "step": 291490 }, { "epoch": 0.5888484427332265, "grad_norm": 134.32626342773438, "learning_rate": 4.425314247535668e-06, "loss": 26.1339, "step": 291500 }, { "epoch": 0.5888686433659103, "grad_norm": 457.7600402832031, "learning_rate": 4.4249674964272365e-06, "loss": 21.9593, "step": 291510 }, { "epoch": 0.5888888439985941, "grad_norm": 291.055908203125, "learning_rate": 4.424620748121443e-06, "loss": 7.7716, "step": 291520 }, { "epoch": 0.5889090446312778, "grad_norm": 189.05601501464844, "learning_rate": 4.42427400261998e-06, "loss": 17.4152, "step": 291530 }, { "epoch": 0.5889292452639616, "grad_norm": 77.21434783935547, "learning_rate": 4.423927259924535e-06, "loss": 13.309, "step": 291540 }, { "epoch": 0.5889494458966454, "grad_norm": 145.0786895751953, "learning_rate": 4.423580520036797e-06, "loss": 14.823, "step": 291550 }, { "epoch": 0.5889696465293293, "grad_norm": 332.791259765625, "learning_rate": 4.423233782958459e-06, "loss": 19.3495, "step": 291560 }, { "epoch": 0.5889898471620131, "grad_norm": 514.349853515625, "learning_rate": 4.42288704869121e-06, "loss": 20.4933, "step": 291570 }, { "epoch": 0.5890100477946969, "grad_norm": 460.97357177734375, "learning_rate": 4.422540317236739e-06, "loss": 31.9182, "step": 291580 }, { "epoch": 0.5890302484273807, "grad_norm": 230.87152099609375, "learning_rate": 4.422193588596736e-06, "loss": 15.753, "step": 291590 }, { "epoch": 0.5890504490600645, "grad_norm": 204.1398162841797, "learning_rate": 4.4218468627728935e-06, "loss": 19.9189, "step": 291600 }, { "epoch": 0.5890706496927484, "grad_norm": 145.162353515625, "learning_rate": 4.421500139766897e-06, "loss": 12.7713, "step": 291610 }, { "epoch": 0.5890908503254322, "grad_norm": 120.92027282714844, "learning_rate": 4.4211534195804385e-06, "loss": 13.4618, "step": 291620 }, { "epoch": 0.589111050958116, "grad_norm": 334.56005859375, "learning_rate": 4.420806702215211e-06, "loss": 15.5255, "step": 291630 }, { "epoch": 0.5891312515907998, "grad_norm": 44.463008880615234, "learning_rate": 4.4204599876728975e-06, "loss": 37.3579, "step": 291640 }, { "epoch": 0.5891514522234836, "grad_norm": 347.09027099609375, "learning_rate": 4.420113275955193e-06, "loss": 19.971, "step": 291650 }, { "epoch": 0.5891716528561675, "grad_norm": 545.8432006835938, "learning_rate": 4.419766567063788e-06, "loss": 23.5071, "step": 291660 }, { "epoch": 0.5891918534888513, "grad_norm": 293.8843688964844, "learning_rate": 4.419419861000369e-06, "loss": 20.0356, "step": 291670 }, { "epoch": 0.5892120541215351, "grad_norm": 218.99510192871094, "learning_rate": 4.419073157766626e-06, "loss": 20.5223, "step": 291680 }, { "epoch": 0.5892322547542189, "grad_norm": 248.44635009765625, "learning_rate": 4.418726457364252e-06, "loss": 15.9384, "step": 291690 }, { "epoch": 0.5892524553869027, "grad_norm": 90.03661346435547, "learning_rate": 4.418379759794934e-06, "loss": 18.5981, "step": 291700 }, { "epoch": 0.5892726560195866, "grad_norm": 492.5525817871094, "learning_rate": 4.418033065060361e-06, "loss": 28.0002, "step": 291710 }, { "epoch": 0.5892928566522704, "grad_norm": 80.55989074707031, "learning_rate": 4.417686373162225e-06, "loss": 11.6782, "step": 291720 }, { "epoch": 0.5893130572849542, "grad_norm": 247.95602416992188, "learning_rate": 4.417339684102217e-06, "loss": 26.2939, "step": 291730 }, { "epoch": 0.589333257917638, "grad_norm": 327.27044677734375, "learning_rate": 4.416992997882023e-06, "loss": 17.8196, "step": 291740 }, { "epoch": 0.5893534585503218, "grad_norm": 262.67572021484375, "learning_rate": 4.416646314503334e-06, "loss": 16.3744, "step": 291750 }, { "epoch": 0.5893736591830057, "grad_norm": 472.91729736328125, "learning_rate": 4.416299633967842e-06, "loss": 29.4095, "step": 291760 }, { "epoch": 0.5893938598156895, "grad_norm": 235.04531860351562, "learning_rate": 4.415952956277234e-06, "loss": 18.3534, "step": 291770 }, { "epoch": 0.5894140604483732, "grad_norm": 225.06253051757812, "learning_rate": 4.415606281433199e-06, "loss": 30.9368, "step": 291780 }, { "epoch": 0.589434261081057, "grad_norm": 260.1799621582031, "learning_rate": 4.415259609437431e-06, "loss": 12.2839, "step": 291790 }, { "epoch": 0.5894544617137408, "grad_norm": 279.49249267578125, "learning_rate": 4.414912940291614e-06, "loss": 18.544, "step": 291800 }, { "epoch": 0.5894746623464246, "grad_norm": 295.9985656738281, "learning_rate": 4.414566273997441e-06, "loss": 20.2924, "step": 291810 }, { "epoch": 0.5894948629791085, "grad_norm": 304.43988037109375, "learning_rate": 4.414219610556601e-06, "loss": 16.291, "step": 291820 }, { "epoch": 0.5895150636117923, "grad_norm": 242.80091857910156, "learning_rate": 4.413872949970785e-06, "loss": 14.072, "step": 291830 }, { "epoch": 0.5895352642444761, "grad_norm": 233.06976318359375, "learning_rate": 4.413526292241679e-06, "loss": 15.4081, "step": 291840 }, { "epoch": 0.5895554648771599, "grad_norm": 174.5694122314453, "learning_rate": 4.413179637370977e-06, "loss": 13.0296, "step": 291850 }, { "epoch": 0.5895756655098437, "grad_norm": 273.3475341796875, "learning_rate": 4.412832985360363e-06, "loss": 27.3125, "step": 291860 }, { "epoch": 0.5895958661425276, "grad_norm": 157.59609985351562, "learning_rate": 4.412486336211531e-06, "loss": 7.7415, "step": 291870 }, { "epoch": 0.5896160667752114, "grad_norm": 246.74827575683594, "learning_rate": 4.412139689926171e-06, "loss": 27.3313, "step": 291880 }, { "epoch": 0.5896362674078952, "grad_norm": 299.73236083984375, "learning_rate": 4.41179304650597e-06, "loss": 9.4268, "step": 291890 }, { "epoch": 0.589656468040579, "grad_norm": 249.23501586914062, "learning_rate": 4.4114464059526185e-06, "loss": 13.9128, "step": 291900 }, { "epoch": 0.5896766686732628, "grad_norm": 348.7188415527344, "learning_rate": 4.4110997682678056e-06, "loss": 12.2381, "step": 291910 }, { "epoch": 0.5896968693059467, "grad_norm": 187.80091857910156, "learning_rate": 4.410753133453222e-06, "loss": 8.8371, "step": 291920 }, { "epoch": 0.5897170699386305, "grad_norm": 154.19802856445312, "learning_rate": 4.410406501510554e-06, "loss": 34.7205, "step": 291930 }, { "epoch": 0.5897372705713143, "grad_norm": 193.80943298339844, "learning_rate": 4.410059872441494e-06, "loss": 22.5011, "step": 291940 }, { "epoch": 0.5897574712039981, "grad_norm": 291.5367736816406, "learning_rate": 4.409713246247732e-06, "loss": 22.47, "step": 291950 }, { "epoch": 0.589777671836682, "grad_norm": 288.4925537109375, "learning_rate": 4.409366622930955e-06, "loss": 18.3217, "step": 291960 }, { "epoch": 0.5897978724693658, "grad_norm": 621.7650756835938, "learning_rate": 4.409020002492854e-06, "loss": 14.3393, "step": 291970 }, { "epoch": 0.5898180731020496, "grad_norm": 1412.941162109375, "learning_rate": 4.4086733849351174e-06, "loss": 26.6017, "step": 291980 }, { "epoch": 0.5898382737347334, "grad_norm": 271.4926452636719, "learning_rate": 4.408326770259435e-06, "loss": 13.6174, "step": 291990 }, { "epoch": 0.5898584743674172, "grad_norm": 204.55264282226562, "learning_rate": 4.4079801584674955e-06, "loss": 14.473, "step": 292000 }, { "epoch": 0.589878675000101, "grad_norm": 242.58876037597656, "learning_rate": 4.407633549560991e-06, "loss": 15.1916, "step": 292010 }, { "epoch": 0.5898988756327849, "grad_norm": 358.83062744140625, "learning_rate": 4.407286943541606e-06, "loss": 13.9019, "step": 292020 }, { "epoch": 0.5899190762654687, "grad_norm": 488.8674011230469, "learning_rate": 4.406940340411034e-06, "loss": 21.63, "step": 292030 }, { "epoch": 0.5899392768981524, "grad_norm": 192.82347106933594, "learning_rate": 4.406593740170963e-06, "loss": 12.518, "step": 292040 }, { "epoch": 0.5899594775308362, "grad_norm": 374.09942626953125, "learning_rate": 4.406247142823082e-06, "loss": 10.8709, "step": 292050 }, { "epoch": 0.58997967816352, "grad_norm": 264.1607666015625, "learning_rate": 4.4059005483690805e-06, "loss": 22.5497, "step": 292060 }, { "epoch": 0.5899998787962039, "grad_norm": 221.7178497314453, "learning_rate": 4.405553956810646e-06, "loss": 20.2499, "step": 292070 }, { "epoch": 0.5900200794288877, "grad_norm": 679.2761840820312, "learning_rate": 4.405207368149472e-06, "loss": 22.973, "step": 292080 }, { "epoch": 0.5900402800615715, "grad_norm": 0.0, "learning_rate": 4.404860782387243e-06, "loss": 26.7757, "step": 292090 }, { "epoch": 0.5900604806942553, "grad_norm": 119.9970932006836, "learning_rate": 4.404514199525651e-06, "loss": 11.6165, "step": 292100 }, { "epoch": 0.5900806813269391, "grad_norm": 155.15647888183594, "learning_rate": 4.404167619566386e-06, "loss": 18.5212, "step": 292110 }, { "epoch": 0.590100881959623, "grad_norm": 118.76912689208984, "learning_rate": 4.403821042511135e-06, "loss": 17.6673, "step": 292120 }, { "epoch": 0.5901210825923068, "grad_norm": 2.3559415340423584, "learning_rate": 4.403474468361587e-06, "loss": 17.4457, "step": 292130 }, { "epoch": 0.5901412832249906, "grad_norm": 487.3302001953125, "learning_rate": 4.4031278971194335e-06, "loss": 18.2621, "step": 292140 }, { "epoch": 0.5901614838576744, "grad_norm": 281.50946044921875, "learning_rate": 4.402781328786361e-06, "loss": 10.713, "step": 292150 }, { "epoch": 0.5901816844903582, "grad_norm": 44.178009033203125, "learning_rate": 4.402434763364059e-06, "loss": 21.7129, "step": 292160 }, { "epoch": 0.5902018851230421, "grad_norm": 358.00152587890625, "learning_rate": 4.4020882008542185e-06, "loss": 15.9698, "step": 292170 }, { "epoch": 0.5902220857557259, "grad_norm": 125.08795166015625, "learning_rate": 4.401741641258529e-06, "loss": 25.9756, "step": 292180 }, { "epoch": 0.5902422863884097, "grad_norm": 399.94873046875, "learning_rate": 4.401395084578677e-06, "loss": 22.1024, "step": 292190 }, { "epoch": 0.5902624870210935, "grad_norm": 64.13700866699219, "learning_rate": 4.401048530816353e-06, "loss": 13.002, "step": 292200 }, { "epoch": 0.5902826876537773, "grad_norm": 374.3088684082031, "learning_rate": 4.4007019799732465e-06, "loss": 17.9574, "step": 292210 }, { "epoch": 0.5903028882864612, "grad_norm": 305.5179748535156, "learning_rate": 4.400355432051044e-06, "loss": 24.2106, "step": 292220 }, { "epoch": 0.590323088919145, "grad_norm": 335.41015625, "learning_rate": 4.400008887051437e-06, "loss": 30.153, "step": 292230 }, { "epoch": 0.5903432895518288, "grad_norm": 190.3223876953125, "learning_rate": 4.399662344976116e-06, "loss": 12.9496, "step": 292240 }, { "epoch": 0.5903634901845126, "grad_norm": 247.95974731445312, "learning_rate": 4.399315805826765e-06, "loss": 15.1141, "step": 292250 }, { "epoch": 0.5903836908171964, "grad_norm": 387.2908020019531, "learning_rate": 4.398969269605077e-06, "loss": 14.1646, "step": 292260 }, { "epoch": 0.5904038914498803, "grad_norm": 445.03631591796875, "learning_rate": 4.398622736312741e-06, "loss": 37.6478, "step": 292270 }, { "epoch": 0.5904240920825641, "grad_norm": 289.7338562011719, "learning_rate": 4.398276205951443e-06, "loss": 25.1114, "step": 292280 }, { "epoch": 0.5904442927152478, "grad_norm": 514.7606201171875, "learning_rate": 4.3979296785228744e-06, "loss": 16.5436, "step": 292290 }, { "epoch": 0.5904644933479316, "grad_norm": 218.46302795410156, "learning_rate": 4.397583154028725e-06, "loss": 13.8046, "step": 292300 }, { "epoch": 0.5904846939806154, "grad_norm": 427.7803955078125, "learning_rate": 4.397236632470681e-06, "loss": 15.1686, "step": 292310 }, { "epoch": 0.5905048946132992, "grad_norm": 273.4482116699219, "learning_rate": 4.3968901138504315e-06, "loss": 13.7415, "step": 292320 }, { "epoch": 0.5905250952459831, "grad_norm": 310.4000549316406, "learning_rate": 4.396543598169667e-06, "loss": 11.705, "step": 292330 }, { "epoch": 0.5905452958786669, "grad_norm": 323.31060791015625, "learning_rate": 4.3961970854300774e-06, "loss": 20.6012, "step": 292340 }, { "epoch": 0.5905654965113507, "grad_norm": 390.39495849609375, "learning_rate": 4.395850575633348e-06, "loss": 15.171, "step": 292350 }, { "epoch": 0.5905856971440345, "grad_norm": 566.3759155273438, "learning_rate": 4.395504068781171e-06, "loss": 27.7985, "step": 292360 }, { "epoch": 0.5906058977767183, "grad_norm": 455.462646484375, "learning_rate": 4.3951575648752346e-06, "loss": 20.4871, "step": 292370 }, { "epoch": 0.5906260984094022, "grad_norm": 73.95668029785156, "learning_rate": 4.394811063917225e-06, "loss": 24.6616, "step": 292380 }, { "epoch": 0.590646299042086, "grad_norm": 236.248779296875, "learning_rate": 4.394464565908832e-06, "loss": 14.7515, "step": 292390 }, { "epoch": 0.5906664996747698, "grad_norm": 459.77911376953125, "learning_rate": 4.394118070851749e-06, "loss": 35.2126, "step": 292400 }, { "epoch": 0.5906867003074536, "grad_norm": 184.43475341796875, "learning_rate": 4.3937715787476576e-06, "loss": 13.1497, "step": 292410 }, { "epoch": 0.5907069009401374, "grad_norm": 68.68903350830078, "learning_rate": 4.393425089598251e-06, "loss": 16.2089, "step": 292420 }, { "epoch": 0.5907271015728213, "grad_norm": 575.5389404296875, "learning_rate": 4.393078603405218e-06, "loss": 27.1923, "step": 292430 }, { "epoch": 0.5907473022055051, "grad_norm": 326.22113037109375, "learning_rate": 4.392732120170245e-06, "loss": 17.6709, "step": 292440 }, { "epoch": 0.5907675028381889, "grad_norm": 408.13275146484375, "learning_rate": 4.392385639895022e-06, "loss": 20.6777, "step": 292450 }, { "epoch": 0.5907877034708727, "grad_norm": 60.27813720703125, "learning_rate": 4.392039162581239e-06, "loss": 8.7443, "step": 292460 }, { "epoch": 0.5908079041035565, "grad_norm": 118.65946197509766, "learning_rate": 4.391692688230583e-06, "loss": 18.2609, "step": 292470 }, { "epoch": 0.5908281047362404, "grad_norm": 340.8380126953125, "learning_rate": 4.391346216844741e-06, "loss": 19.5816, "step": 292480 }, { "epoch": 0.5908483053689242, "grad_norm": 203.91885375976562, "learning_rate": 4.390999748425405e-06, "loss": 15.6069, "step": 292490 }, { "epoch": 0.590868506001608, "grad_norm": 407.7024841308594, "learning_rate": 4.390653282974264e-06, "loss": 16.1668, "step": 292500 }, { "epoch": 0.5908887066342918, "grad_norm": 434.59576416015625, "learning_rate": 4.390306820493003e-06, "loss": 30.9441, "step": 292510 }, { "epoch": 0.5909089072669756, "grad_norm": 362.1976623535156, "learning_rate": 4.389960360983313e-06, "loss": 26.208, "step": 292520 }, { "epoch": 0.5909291078996595, "grad_norm": 278.88018798828125, "learning_rate": 4.3896139044468835e-06, "loss": 10.2839, "step": 292530 }, { "epoch": 0.5909493085323433, "grad_norm": 144.68084716796875, "learning_rate": 4.389267450885399e-06, "loss": 10.5559, "step": 292540 }, { "epoch": 0.590969509165027, "grad_norm": 61.35442352294922, "learning_rate": 4.388921000300553e-06, "loss": 18.4629, "step": 292550 }, { "epoch": 0.5909897097977108, "grad_norm": 458.2248840332031, "learning_rate": 4.388574552694032e-06, "loss": 21.6952, "step": 292560 }, { "epoch": 0.5910099104303946, "grad_norm": 133.91650390625, "learning_rate": 4.3882281080675234e-06, "loss": 12.7846, "step": 292570 }, { "epoch": 0.5910301110630785, "grad_norm": 168.57054138183594, "learning_rate": 4.387881666422718e-06, "loss": 12.4919, "step": 292580 }, { "epoch": 0.5910503116957623, "grad_norm": 654.8916625976562, "learning_rate": 4.387535227761303e-06, "loss": 23.9381, "step": 292590 }, { "epoch": 0.5910705123284461, "grad_norm": 345.5181884765625, "learning_rate": 4.387188792084967e-06, "loss": 25.4922, "step": 292600 }, { "epoch": 0.5910907129611299, "grad_norm": 484.9035949707031, "learning_rate": 4.386842359395396e-06, "loss": 13.1137, "step": 292610 }, { "epoch": 0.5911109135938137, "grad_norm": 158.39756774902344, "learning_rate": 4.3864959296942835e-06, "loss": 22.9156, "step": 292620 }, { "epoch": 0.5911311142264976, "grad_norm": 837.8232421875, "learning_rate": 4.386149502983316e-06, "loss": 29.7474, "step": 292630 }, { "epoch": 0.5911513148591814, "grad_norm": 717.583984375, "learning_rate": 4.38580307926418e-06, "loss": 35.1593, "step": 292640 }, { "epoch": 0.5911715154918652, "grad_norm": 661.4284057617188, "learning_rate": 4.385456658538565e-06, "loss": 23.9815, "step": 292650 }, { "epoch": 0.591191716124549, "grad_norm": 221.24595642089844, "learning_rate": 4.385110240808161e-06, "loss": 18.8884, "step": 292660 }, { "epoch": 0.5912119167572328, "grad_norm": 446.3244323730469, "learning_rate": 4.384763826074655e-06, "loss": 16.1107, "step": 292670 }, { "epoch": 0.5912321173899167, "grad_norm": 328.36578369140625, "learning_rate": 4.384417414339734e-06, "loss": 12.232, "step": 292680 }, { "epoch": 0.5912523180226005, "grad_norm": 283.3165588378906, "learning_rate": 4.38407100560509e-06, "loss": 17.5131, "step": 292690 }, { "epoch": 0.5912725186552843, "grad_norm": 267.6941223144531, "learning_rate": 4.383724599872407e-06, "loss": 27.3564, "step": 292700 }, { "epoch": 0.5912927192879681, "grad_norm": 213.01718139648438, "learning_rate": 4.383378197143376e-06, "loss": 28.6766, "step": 292710 }, { "epoch": 0.5913129199206519, "grad_norm": 88.78292846679688, "learning_rate": 4.3830317974196864e-06, "loss": 12.5166, "step": 292720 }, { "epoch": 0.5913331205533358, "grad_norm": 429.4929504394531, "learning_rate": 4.382685400703024e-06, "loss": 16.5409, "step": 292730 }, { "epoch": 0.5913533211860196, "grad_norm": 278.3033752441406, "learning_rate": 4.382339006995078e-06, "loss": 25.8284, "step": 292740 }, { "epoch": 0.5913735218187034, "grad_norm": 159.69842529296875, "learning_rate": 4.381992616297538e-06, "loss": 25.3997, "step": 292750 }, { "epoch": 0.5913937224513872, "grad_norm": 317.994384765625, "learning_rate": 4.38164622861209e-06, "loss": 18.721, "step": 292760 }, { "epoch": 0.591413923084071, "grad_norm": 195.66262817382812, "learning_rate": 4.381299843940421e-06, "loss": 19.6419, "step": 292770 }, { "epoch": 0.5914341237167549, "grad_norm": 192.93936157226562, "learning_rate": 4.3809534622842245e-06, "loss": 29.562, "step": 292780 }, { "epoch": 0.5914543243494387, "grad_norm": 184.4942169189453, "learning_rate": 4.380607083645185e-06, "loss": 18.1123, "step": 292790 }, { "epoch": 0.5914745249821224, "grad_norm": 272.6634826660156, "learning_rate": 4.380260708024991e-06, "loss": 18.0683, "step": 292800 }, { "epoch": 0.5914947256148062, "grad_norm": 0.0, "learning_rate": 4.379914335425332e-06, "loss": 17.3292, "step": 292810 }, { "epoch": 0.59151492624749, "grad_norm": 428.838134765625, "learning_rate": 4.379567965847896e-06, "loss": 15.0964, "step": 292820 }, { "epoch": 0.5915351268801738, "grad_norm": 368.07733154296875, "learning_rate": 4.379221599294369e-06, "loss": 29.8215, "step": 292830 }, { "epoch": 0.5915553275128577, "grad_norm": 321.2933654785156, "learning_rate": 4.37887523576644e-06, "loss": 22.3947, "step": 292840 }, { "epoch": 0.5915755281455415, "grad_norm": 363.10980224609375, "learning_rate": 4.378528875265801e-06, "loss": 23.9454, "step": 292850 }, { "epoch": 0.5915957287782253, "grad_norm": 276.0616455078125, "learning_rate": 4.378182517794133e-06, "loss": 11.3014, "step": 292860 }, { "epoch": 0.5916159294109091, "grad_norm": 15.973150253295898, "learning_rate": 4.3778361633531296e-06, "loss": 15.3456, "step": 292870 }, { "epoch": 0.591636130043593, "grad_norm": 425.35736083984375, "learning_rate": 4.377489811944478e-06, "loss": 27.2492, "step": 292880 }, { "epoch": 0.5916563306762768, "grad_norm": 97.25628662109375, "learning_rate": 4.377143463569865e-06, "loss": 25.6733, "step": 292890 }, { "epoch": 0.5916765313089606, "grad_norm": 392.7544250488281, "learning_rate": 4.376797118230978e-06, "loss": 18.2695, "step": 292900 }, { "epoch": 0.5916967319416444, "grad_norm": 9.744392395019531, "learning_rate": 4.37645077592951e-06, "loss": 14.6648, "step": 292910 }, { "epoch": 0.5917169325743282, "grad_norm": 342.9910583496094, "learning_rate": 4.376104436667142e-06, "loss": 11.1766, "step": 292920 }, { "epoch": 0.591737133207012, "grad_norm": 319.9494323730469, "learning_rate": 4.375758100445564e-06, "loss": 25.766, "step": 292930 }, { "epoch": 0.5917573338396959, "grad_norm": 136.3638458251953, "learning_rate": 4.375411767266468e-06, "loss": 11.4168, "step": 292940 }, { "epoch": 0.5917775344723797, "grad_norm": 499.80523681640625, "learning_rate": 4.375065437131539e-06, "loss": 26.6806, "step": 292950 }, { "epoch": 0.5917977351050635, "grad_norm": 471.6264953613281, "learning_rate": 4.374719110042465e-06, "loss": 24.6079, "step": 292960 }, { "epoch": 0.5918179357377473, "grad_norm": 302.2352294921875, "learning_rate": 4.374372786000934e-06, "loss": 31.4393, "step": 292970 }, { "epoch": 0.5918381363704311, "grad_norm": 20.890872955322266, "learning_rate": 4.374026465008634e-06, "loss": 29.9402, "step": 292980 }, { "epoch": 0.591858337003115, "grad_norm": 389.56634521484375, "learning_rate": 4.373680147067254e-06, "loss": 22.1629, "step": 292990 }, { "epoch": 0.5918785376357988, "grad_norm": 124.12600708007812, "learning_rate": 4.373333832178478e-06, "loss": 18.9202, "step": 293000 }, { "epoch": 0.5918987382684826, "grad_norm": 277.0358581542969, "learning_rate": 4.372987520344002e-06, "loss": 10.9525, "step": 293010 }, { "epoch": 0.5919189389011664, "grad_norm": 16.70751953125, "learning_rate": 4.3726412115655046e-06, "loss": 18.1198, "step": 293020 }, { "epoch": 0.5919391395338502, "grad_norm": 249.12088012695312, "learning_rate": 4.372294905844679e-06, "loss": 26.4442, "step": 293030 }, { "epoch": 0.5919593401665341, "grad_norm": 278.3045349121094, "learning_rate": 4.371948603183213e-06, "loss": 23.0259, "step": 293040 }, { "epoch": 0.5919795407992179, "grad_norm": 664.0480346679688, "learning_rate": 4.371602303582792e-06, "loss": 17.7175, "step": 293050 }, { "epoch": 0.5919997414319016, "grad_norm": 159.3212127685547, "learning_rate": 4.3712560070451055e-06, "loss": 10.8857, "step": 293060 }, { "epoch": 0.5920199420645854, "grad_norm": 163.10269165039062, "learning_rate": 4.3709097135718395e-06, "loss": 15.8228, "step": 293070 }, { "epoch": 0.5920401426972692, "grad_norm": 237.5184326171875, "learning_rate": 4.370563423164687e-06, "loss": 14.8328, "step": 293080 }, { "epoch": 0.592060343329953, "grad_norm": 297.11834716796875, "learning_rate": 4.370217135825329e-06, "loss": 24.6415, "step": 293090 }, { "epoch": 0.5920805439626369, "grad_norm": 820.2581787109375, "learning_rate": 4.369870851555457e-06, "loss": 29.7589, "step": 293100 }, { "epoch": 0.5921007445953207, "grad_norm": 416.0830078125, "learning_rate": 4.369524570356759e-06, "loss": 25.7129, "step": 293110 }, { "epoch": 0.5921209452280045, "grad_norm": 366.1256103515625, "learning_rate": 4.369178292230921e-06, "loss": 10.2317, "step": 293120 }, { "epoch": 0.5921411458606883, "grad_norm": 98.6461410522461, "learning_rate": 4.368832017179631e-06, "loss": 24.1292, "step": 293130 }, { "epoch": 0.5921613464933722, "grad_norm": 114.5399398803711, "learning_rate": 4.368485745204579e-06, "loss": 16.7311, "step": 293140 }, { "epoch": 0.592181547126056, "grad_norm": 642.5507202148438, "learning_rate": 4.3681394763074495e-06, "loss": 29.6576, "step": 293150 }, { "epoch": 0.5922017477587398, "grad_norm": 369.4560546875, "learning_rate": 4.36779321048993e-06, "loss": 17.2335, "step": 293160 }, { "epoch": 0.5922219483914236, "grad_norm": 313.81146240234375, "learning_rate": 4.367446947753712e-06, "loss": 19.6864, "step": 293170 }, { "epoch": 0.5922421490241074, "grad_norm": 391.7908935546875, "learning_rate": 4.36710068810048e-06, "loss": 20.4303, "step": 293180 }, { "epoch": 0.5922623496567913, "grad_norm": 471.5343322753906, "learning_rate": 4.366754431531923e-06, "loss": 13.7054, "step": 293190 }, { "epoch": 0.5922825502894751, "grad_norm": 144.7227783203125, "learning_rate": 4.366408178049728e-06, "loss": 11.8718, "step": 293200 }, { "epoch": 0.5923027509221589, "grad_norm": 319.34228515625, "learning_rate": 4.366061927655582e-06, "loss": 21.028, "step": 293210 }, { "epoch": 0.5923229515548427, "grad_norm": 135.13746643066406, "learning_rate": 4.3657156803511745e-06, "loss": 14.1794, "step": 293220 }, { "epoch": 0.5923431521875265, "grad_norm": 264.9501647949219, "learning_rate": 4.3653694361381894e-06, "loss": 11.6853, "step": 293230 }, { "epoch": 0.5923633528202104, "grad_norm": 174.29603576660156, "learning_rate": 4.365023195018319e-06, "loss": 21.2184, "step": 293240 }, { "epoch": 0.5923835534528942, "grad_norm": 405.31353759765625, "learning_rate": 4.3646769569932475e-06, "loss": 26.7447, "step": 293250 }, { "epoch": 0.592403754085578, "grad_norm": 124.74893951416016, "learning_rate": 4.364330722064664e-06, "loss": 18.0701, "step": 293260 }, { "epoch": 0.5924239547182618, "grad_norm": 140.876220703125, "learning_rate": 4.363984490234257e-06, "loss": 20.0851, "step": 293270 }, { "epoch": 0.5924441553509456, "grad_norm": 226.44039916992188, "learning_rate": 4.36363826150371e-06, "loss": 9.2471, "step": 293280 }, { "epoch": 0.5924643559836295, "grad_norm": 4.0844902992248535, "learning_rate": 4.3632920358747125e-06, "loss": 12.1399, "step": 293290 }, { "epoch": 0.5924845566163133, "grad_norm": 240.15658569335938, "learning_rate": 4.362945813348956e-06, "loss": 22.1798, "step": 293300 }, { "epoch": 0.5925047572489971, "grad_norm": 472.6387939453125, "learning_rate": 4.36259959392812e-06, "loss": 29.2208, "step": 293310 }, { "epoch": 0.5925249578816808, "grad_norm": 329.99444580078125, "learning_rate": 4.3622533776138985e-06, "loss": 23.239, "step": 293320 }, { "epoch": 0.5925451585143646, "grad_norm": 370.1582946777344, "learning_rate": 4.361907164407977e-06, "loss": 27.6166, "step": 293330 }, { "epoch": 0.5925653591470484, "grad_norm": 591.4259643554688, "learning_rate": 4.361560954312042e-06, "loss": 12.3857, "step": 293340 }, { "epoch": 0.5925855597797323, "grad_norm": 231.2650909423828, "learning_rate": 4.361214747327781e-06, "loss": 10.1627, "step": 293350 }, { "epoch": 0.5926057604124161, "grad_norm": 404.3431396484375, "learning_rate": 4.360868543456883e-06, "loss": 29.8764, "step": 293360 }, { "epoch": 0.5926259610450999, "grad_norm": 202.81423950195312, "learning_rate": 4.360522342701033e-06, "loss": 19.8703, "step": 293370 }, { "epoch": 0.5926461616777837, "grad_norm": 10.086012840270996, "learning_rate": 4.360176145061919e-06, "loss": 20.1728, "step": 293380 }, { "epoch": 0.5926663623104675, "grad_norm": 200.48219299316406, "learning_rate": 4.35982995054123e-06, "loss": 15.7567, "step": 293390 }, { "epoch": 0.5926865629431514, "grad_norm": 339.79296875, "learning_rate": 4.359483759140654e-06, "loss": 30.4409, "step": 293400 }, { "epoch": 0.5927067635758352, "grad_norm": 206.92832946777344, "learning_rate": 4.359137570861874e-06, "loss": 12.8619, "step": 293410 }, { "epoch": 0.592726964208519, "grad_norm": 116.26487731933594, "learning_rate": 4.35879138570658e-06, "loss": 18.6987, "step": 293420 }, { "epoch": 0.5927471648412028, "grad_norm": 327.9327392578125, "learning_rate": 4.35844520367646e-06, "loss": 21.413, "step": 293430 }, { "epoch": 0.5927673654738866, "grad_norm": 203.71055603027344, "learning_rate": 4.358099024773199e-06, "loss": 28.8986, "step": 293440 }, { "epoch": 0.5927875661065705, "grad_norm": 204.28443908691406, "learning_rate": 4.357752848998486e-06, "loss": 17.8037, "step": 293450 }, { "epoch": 0.5928077667392543, "grad_norm": 296.56793212890625, "learning_rate": 4.357406676354009e-06, "loss": 18.8605, "step": 293460 }, { "epoch": 0.5928279673719381, "grad_norm": 195.7593536376953, "learning_rate": 4.357060506841452e-06, "loss": 13.2065, "step": 293470 }, { "epoch": 0.5928481680046219, "grad_norm": 221.4251708984375, "learning_rate": 4.356714340462505e-06, "loss": 14.7917, "step": 293480 }, { "epoch": 0.5928683686373057, "grad_norm": 184.15028381347656, "learning_rate": 4.356368177218855e-06, "loss": 21.2213, "step": 293490 }, { "epoch": 0.5928885692699896, "grad_norm": 399.1337585449219, "learning_rate": 4.356022017112187e-06, "loss": 15.691, "step": 293500 }, { "epoch": 0.5929087699026734, "grad_norm": 298.57275390625, "learning_rate": 4.35567586014419e-06, "loss": 11.8265, "step": 293510 }, { "epoch": 0.5929289705353572, "grad_norm": 416.8627014160156, "learning_rate": 4.355329706316552e-06, "loss": 27.4403, "step": 293520 }, { "epoch": 0.592949171168041, "grad_norm": 464.4613037109375, "learning_rate": 4.354983555630957e-06, "loss": 22.2234, "step": 293530 }, { "epoch": 0.5929693718007248, "grad_norm": 28.93180274963379, "learning_rate": 4.354637408089093e-06, "loss": 12.1095, "step": 293540 }, { "epoch": 0.5929895724334087, "grad_norm": 300.5746154785156, "learning_rate": 4.35429126369265e-06, "loss": 16.6959, "step": 293550 }, { "epoch": 0.5930097730660925, "grad_norm": 345.64990234375, "learning_rate": 4.353945122443314e-06, "loss": 21.7761, "step": 293560 }, { "epoch": 0.5930299736987762, "grad_norm": 344.408447265625, "learning_rate": 4.3535989843427695e-06, "loss": 15.3375, "step": 293570 }, { "epoch": 0.59305017433146, "grad_norm": 411.94708251953125, "learning_rate": 4.3532528493927055e-06, "loss": 21.0969, "step": 293580 }, { "epoch": 0.5930703749641438, "grad_norm": 140.3260498046875, "learning_rate": 4.352906717594809e-06, "loss": 14.6769, "step": 293590 }, { "epoch": 0.5930905755968277, "grad_norm": 400.906982421875, "learning_rate": 4.352560588950766e-06, "loss": 20.1953, "step": 293600 }, { "epoch": 0.5931107762295115, "grad_norm": 254.31094360351562, "learning_rate": 4.352214463462263e-06, "loss": 22.4671, "step": 293610 }, { "epoch": 0.5931309768621953, "grad_norm": 5.759637355804443, "learning_rate": 4.351868341130992e-06, "loss": 9.3346, "step": 293620 }, { "epoch": 0.5931511774948791, "grad_norm": 205.90499877929688, "learning_rate": 4.351522221958633e-06, "loss": 14.4937, "step": 293630 }, { "epoch": 0.5931713781275629, "grad_norm": 475.67877197265625, "learning_rate": 4.351176105946876e-06, "loss": 24.3906, "step": 293640 }, { "epoch": 0.5931915787602468, "grad_norm": 232.34130859375, "learning_rate": 4.350829993097409e-06, "loss": 26.5611, "step": 293650 }, { "epoch": 0.5932117793929306, "grad_norm": 157.444580078125, "learning_rate": 4.350483883411918e-06, "loss": 19.2204, "step": 293660 }, { "epoch": 0.5932319800256144, "grad_norm": 109.25350952148438, "learning_rate": 4.350137776892089e-06, "loss": 10.2109, "step": 293670 }, { "epoch": 0.5932521806582982, "grad_norm": 194.7845001220703, "learning_rate": 4.349791673539609e-06, "loss": 23.4018, "step": 293680 }, { "epoch": 0.593272381290982, "grad_norm": 401.9795837402344, "learning_rate": 4.349445573356168e-06, "loss": 6.7271, "step": 293690 }, { "epoch": 0.5932925819236659, "grad_norm": 80.57222747802734, "learning_rate": 4.349099476343448e-06, "loss": 20.9458, "step": 293700 }, { "epoch": 0.5933127825563497, "grad_norm": 0.07639694213867188, "learning_rate": 4.3487533825031395e-06, "loss": 43.2083, "step": 293710 }, { "epoch": 0.5933329831890335, "grad_norm": 263.1050109863281, "learning_rate": 4.348407291836928e-06, "loss": 14.3733, "step": 293720 }, { "epoch": 0.5933531838217173, "grad_norm": 254.24449157714844, "learning_rate": 4.3480612043465e-06, "loss": 11.2772, "step": 293730 }, { "epoch": 0.5933733844544011, "grad_norm": 66.16065216064453, "learning_rate": 4.347715120033543e-06, "loss": 26.3292, "step": 293740 }, { "epoch": 0.593393585087085, "grad_norm": 75.22801208496094, "learning_rate": 4.347369038899744e-06, "loss": 11.5681, "step": 293750 }, { "epoch": 0.5934137857197688, "grad_norm": 147.4004364013672, "learning_rate": 4.3470229609467875e-06, "loss": 14.5266, "step": 293760 }, { "epoch": 0.5934339863524526, "grad_norm": 499.2951965332031, "learning_rate": 4.346676886176361e-06, "loss": 24.0114, "step": 293770 }, { "epoch": 0.5934541869851364, "grad_norm": 195.19635009765625, "learning_rate": 4.346330814590156e-06, "loss": 24.8911, "step": 293780 }, { "epoch": 0.5934743876178202, "grad_norm": 507.2892761230469, "learning_rate": 4.345984746189852e-06, "loss": 28.5222, "step": 293790 }, { "epoch": 0.593494588250504, "grad_norm": 553.01123046875, "learning_rate": 4.34563868097714e-06, "loss": 21.7281, "step": 293800 }, { "epoch": 0.5935147888831879, "grad_norm": 238.06121826171875, "learning_rate": 4.3452926189537056e-06, "loss": 23.3335, "step": 293810 }, { "epoch": 0.5935349895158717, "grad_norm": 519.3670043945312, "learning_rate": 4.344946560121236e-06, "loss": 21.1594, "step": 293820 }, { "epoch": 0.5935551901485554, "grad_norm": 242.68687438964844, "learning_rate": 4.344600504481416e-06, "loss": 16.9019, "step": 293830 }, { "epoch": 0.5935753907812392, "grad_norm": 98.96917724609375, "learning_rate": 4.344254452035934e-06, "loss": 19.8868, "step": 293840 }, { "epoch": 0.593595591413923, "grad_norm": 312.739990234375, "learning_rate": 4.343908402786478e-06, "loss": 16.0565, "step": 293850 }, { "epoch": 0.5936157920466069, "grad_norm": 256.668212890625, "learning_rate": 4.343562356734732e-06, "loss": 10.5124, "step": 293860 }, { "epoch": 0.5936359926792907, "grad_norm": 347.50970458984375, "learning_rate": 4.3432163138823826e-06, "loss": 37.8551, "step": 293870 }, { "epoch": 0.5936561933119745, "grad_norm": 270.307373046875, "learning_rate": 4.34287027423112e-06, "loss": 16.1036, "step": 293880 }, { "epoch": 0.5936763939446583, "grad_norm": 124.00232696533203, "learning_rate": 4.342524237782625e-06, "loss": 10.5599, "step": 293890 }, { "epoch": 0.5936965945773421, "grad_norm": 649.303955078125, "learning_rate": 4.342178204538588e-06, "loss": 17.0731, "step": 293900 }, { "epoch": 0.593716795210026, "grad_norm": 127.0454330444336, "learning_rate": 4.341832174500696e-06, "loss": 13.7668, "step": 293910 }, { "epoch": 0.5937369958427098, "grad_norm": 507.38519287109375, "learning_rate": 4.341486147670631e-06, "loss": 22.0901, "step": 293920 }, { "epoch": 0.5937571964753936, "grad_norm": 360.4374694824219, "learning_rate": 4.341140124050085e-06, "loss": 15.658, "step": 293930 }, { "epoch": 0.5937773971080774, "grad_norm": 98.56364440917969, "learning_rate": 4.340794103640743e-06, "loss": 22.7219, "step": 293940 }, { "epoch": 0.5937975977407612, "grad_norm": 203.2914581298828, "learning_rate": 4.340448086444288e-06, "loss": 11.8111, "step": 293950 }, { "epoch": 0.5938177983734451, "grad_norm": 347.62127685546875, "learning_rate": 4.340102072462411e-06, "loss": 37.6724, "step": 293960 }, { "epoch": 0.5938379990061289, "grad_norm": 382.10491943359375, "learning_rate": 4.339756061696796e-06, "loss": 39.4538, "step": 293970 }, { "epoch": 0.5938581996388127, "grad_norm": 166.504638671875, "learning_rate": 4.33941005414913e-06, "loss": 19.5773, "step": 293980 }, { "epoch": 0.5938784002714965, "grad_norm": 310.0020446777344, "learning_rate": 4.339064049821098e-06, "loss": 14.5886, "step": 293990 }, { "epoch": 0.5938986009041803, "grad_norm": 410.6182556152344, "learning_rate": 4.3387180487143875e-06, "loss": 18.0505, "step": 294000 }, { "epoch": 0.5939188015368642, "grad_norm": 91.48588562011719, "learning_rate": 4.338372050830687e-06, "loss": 11.8704, "step": 294010 }, { "epoch": 0.593939002169548, "grad_norm": 271.89837646484375, "learning_rate": 4.3380260561716795e-06, "loss": 22.8787, "step": 294020 }, { "epoch": 0.5939592028022318, "grad_norm": 275.8891296386719, "learning_rate": 4.337680064739053e-06, "loss": 13.7769, "step": 294030 }, { "epoch": 0.5939794034349156, "grad_norm": 25.833105087280273, "learning_rate": 4.337334076534495e-06, "loss": 30.7229, "step": 294040 }, { "epoch": 0.5939996040675994, "grad_norm": 314.5873718261719, "learning_rate": 4.336988091559688e-06, "loss": 16.8934, "step": 294050 }, { "epoch": 0.5940198047002833, "grad_norm": 277.19964599609375, "learning_rate": 4.3366421098163215e-06, "loss": 20.2802, "step": 294060 }, { "epoch": 0.5940400053329671, "grad_norm": 242.56472778320312, "learning_rate": 4.336296131306083e-06, "loss": 15.3184, "step": 294070 }, { "epoch": 0.5940602059656508, "grad_norm": 171.50576782226562, "learning_rate": 4.335950156030653e-06, "loss": 15.7217, "step": 294080 }, { "epoch": 0.5940804065983346, "grad_norm": 379.6099853515625, "learning_rate": 4.335604183991723e-06, "loss": 14.0358, "step": 294090 }, { "epoch": 0.5941006072310184, "grad_norm": 250.2049560546875, "learning_rate": 4.335258215190979e-06, "loss": 27.1886, "step": 294100 }, { "epoch": 0.5941208078637022, "grad_norm": 280.62005615234375, "learning_rate": 4.334912249630104e-06, "loss": 16.1684, "step": 294110 }, { "epoch": 0.5941410084963861, "grad_norm": 380.7889099121094, "learning_rate": 4.334566287310787e-06, "loss": 16.6607, "step": 294120 }, { "epoch": 0.5941612091290699, "grad_norm": 347.9708251953125, "learning_rate": 4.334220328234711e-06, "loss": 21.2354, "step": 294130 }, { "epoch": 0.5941814097617537, "grad_norm": 211.93284606933594, "learning_rate": 4.333874372403569e-06, "loss": 18.2137, "step": 294140 }, { "epoch": 0.5942016103944375, "grad_norm": 466.7325439453125, "learning_rate": 4.3335284198190385e-06, "loss": 20.2946, "step": 294150 }, { "epoch": 0.5942218110271213, "grad_norm": 126.02813720703125, "learning_rate": 4.33318247048281e-06, "loss": 18.6723, "step": 294160 }, { "epoch": 0.5942420116598052, "grad_norm": 431.5873107910156, "learning_rate": 4.332836524396571e-06, "loss": 15.7061, "step": 294170 }, { "epoch": 0.594262212292489, "grad_norm": 369.171142578125, "learning_rate": 4.332490581562005e-06, "loss": 19.6144, "step": 294180 }, { "epoch": 0.5942824129251728, "grad_norm": 52.65614700317383, "learning_rate": 4.332144641980799e-06, "loss": 13.1117, "step": 294190 }, { "epoch": 0.5943026135578566, "grad_norm": 359.465576171875, "learning_rate": 4.331798705654639e-06, "loss": 34.4855, "step": 294200 }, { "epoch": 0.5943228141905404, "grad_norm": 260.4563903808594, "learning_rate": 4.331452772585212e-06, "loss": 17.4853, "step": 294210 }, { "epoch": 0.5943430148232243, "grad_norm": 117.06153869628906, "learning_rate": 4.3311068427742e-06, "loss": 27.3306, "step": 294220 }, { "epoch": 0.5943632154559081, "grad_norm": 108.80721282958984, "learning_rate": 4.330760916223297e-06, "loss": 25.0565, "step": 294230 }, { "epoch": 0.5943834160885919, "grad_norm": 181.44244384765625, "learning_rate": 4.33041499293418e-06, "loss": 13.3681, "step": 294240 }, { "epoch": 0.5944036167212757, "grad_norm": 21.294790267944336, "learning_rate": 4.33006907290854e-06, "loss": 14.1222, "step": 294250 }, { "epoch": 0.5944238173539595, "grad_norm": 95.44473266601562, "learning_rate": 4.329723156148064e-06, "loss": 21.1636, "step": 294260 }, { "epoch": 0.5944440179866434, "grad_norm": 205.04876708984375, "learning_rate": 4.3293772426544336e-06, "loss": 13.2941, "step": 294270 }, { "epoch": 0.5944642186193272, "grad_norm": 244.82998657226562, "learning_rate": 4.329031332429338e-06, "loss": 18.758, "step": 294280 }, { "epoch": 0.594484419252011, "grad_norm": 74.0710220336914, "learning_rate": 4.328685425474462e-06, "loss": 15.4077, "step": 294290 }, { "epoch": 0.5945046198846948, "grad_norm": 175.26742553710938, "learning_rate": 4.328339521791493e-06, "loss": 24.2236, "step": 294300 }, { "epoch": 0.5945248205173786, "grad_norm": 219.14015197753906, "learning_rate": 4.327993621382115e-06, "loss": 15.1775, "step": 294310 }, { "epoch": 0.5945450211500625, "grad_norm": 27.472721099853516, "learning_rate": 4.327647724248014e-06, "loss": 19.5555, "step": 294320 }, { "epoch": 0.5945652217827463, "grad_norm": 38.00634002685547, "learning_rate": 4.327301830390878e-06, "loss": 9.0584, "step": 294330 }, { "epoch": 0.59458542241543, "grad_norm": 740.7202758789062, "learning_rate": 4.32695593981239e-06, "loss": 27.2564, "step": 294340 }, { "epoch": 0.5946056230481138, "grad_norm": 0.0, "learning_rate": 4.326610052514238e-06, "loss": 11.8257, "step": 294350 }, { "epoch": 0.5946258236807976, "grad_norm": 262.72344970703125, "learning_rate": 4.326264168498106e-06, "loss": 16.102, "step": 294360 }, { "epoch": 0.5946460243134815, "grad_norm": 239.7547607421875, "learning_rate": 4.325918287765682e-06, "loss": 18.0176, "step": 294370 }, { "epoch": 0.5946662249461653, "grad_norm": 22.678022384643555, "learning_rate": 4.325572410318648e-06, "loss": 18.2049, "step": 294380 }, { "epoch": 0.5946864255788491, "grad_norm": 252.98867797851562, "learning_rate": 4.325226536158696e-06, "loss": 31.2141, "step": 294390 }, { "epoch": 0.5947066262115329, "grad_norm": 212.02850341796875, "learning_rate": 4.3248806652875045e-06, "loss": 19.4335, "step": 294400 }, { "epoch": 0.5947268268442167, "grad_norm": 35.100833892822266, "learning_rate": 4.324534797706764e-06, "loss": 12.9208, "step": 294410 }, { "epoch": 0.5947470274769006, "grad_norm": 285.63427734375, "learning_rate": 4.32418893341816e-06, "loss": 17.3984, "step": 294420 }, { "epoch": 0.5947672281095844, "grad_norm": 365.4830017089844, "learning_rate": 4.323843072423376e-06, "loss": 23.1304, "step": 294430 }, { "epoch": 0.5947874287422682, "grad_norm": 364.1375427246094, "learning_rate": 4.323497214724099e-06, "loss": 22.1656, "step": 294440 }, { "epoch": 0.594807629374952, "grad_norm": 313.3019104003906, "learning_rate": 4.323151360322014e-06, "loss": 15.3534, "step": 294450 }, { "epoch": 0.5948278300076358, "grad_norm": 382.1801452636719, "learning_rate": 4.32280550921881e-06, "loss": 13.1598, "step": 294460 }, { "epoch": 0.5948480306403197, "grad_norm": 200.699462890625, "learning_rate": 4.3224596614161666e-06, "loss": 15.0914, "step": 294470 }, { "epoch": 0.5948682312730035, "grad_norm": 148.71641540527344, "learning_rate": 4.322113816915774e-06, "loss": 16.0701, "step": 294480 }, { "epoch": 0.5948884319056873, "grad_norm": 394.70758056640625, "learning_rate": 4.321767975719317e-06, "loss": 29.2967, "step": 294490 }, { "epoch": 0.5949086325383711, "grad_norm": 345.860107421875, "learning_rate": 4.321422137828479e-06, "loss": 13.8959, "step": 294500 }, { "epoch": 0.5949288331710549, "grad_norm": 329.0943908691406, "learning_rate": 4.321076303244948e-06, "loss": 7.3328, "step": 294510 }, { "epoch": 0.5949490338037388, "grad_norm": 67.54218292236328, "learning_rate": 4.320730471970409e-06, "loss": 22.0776, "step": 294520 }, { "epoch": 0.5949692344364226, "grad_norm": 270.8656921386719, "learning_rate": 4.320384644006546e-06, "loss": 16.9076, "step": 294530 }, { "epoch": 0.5949894350691064, "grad_norm": 72.56269836425781, "learning_rate": 4.320038819355047e-06, "loss": 17.2942, "step": 294540 }, { "epoch": 0.5950096357017902, "grad_norm": 10.861909866333008, "learning_rate": 4.319692998017597e-06, "loss": 19.4804, "step": 294550 }, { "epoch": 0.595029836334474, "grad_norm": 143.9412078857422, "learning_rate": 4.31934717999588e-06, "loss": 13.5116, "step": 294560 }, { "epoch": 0.5950500369671579, "grad_norm": 255.48040771484375, "learning_rate": 4.319001365291582e-06, "loss": 22.9448, "step": 294570 }, { "epoch": 0.5950702375998417, "grad_norm": 615.77783203125, "learning_rate": 4.31865555390639e-06, "loss": 16.4228, "step": 294580 }, { "epoch": 0.5950904382325255, "grad_norm": 264.7701721191406, "learning_rate": 4.318309745841987e-06, "loss": 19.1642, "step": 294590 }, { "epoch": 0.5951106388652092, "grad_norm": 251.09205627441406, "learning_rate": 4.317963941100059e-06, "loss": 15.5904, "step": 294600 }, { "epoch": 0.595130839497893, "grad_norm": 230.48606872558594, "learning_rate": 4.3176181396822925e-06, "loss": 19.6818, "step": 294610 }, { "epoch": 0.5951510401305768, "grad_norm": 349.52252197265625, "learning_rate": 4.317272341590373e-06, "loss": 23.2003, "step": 294620 }, { "epoch": 0.5951712407632607, "grad_norm": 310.99517822265625, "learning_rate": 4.3169265468259855e-06, "loss": 27.3721, "step": 294630 }, { "epoch": 0.5951914413959445, "grad_norm": 43.45647048950195, "learning_rate": 4.316580755390814e-06, "loss": 7.7432, "step": 294640 }, { "epoch": 0.5952116420286283, "grad_norm": 198.0667724609375, "learning_rate": 4.316234967286548e-06, "loss": 12.9083, "step": 294650 }, { "epoch": 0.5952318426613121, "grad_norm": 338.30072021484375, "learning_rate": 4.315889182514867e-06, "loss": 17.119, "step": 294660 }, { "epoch": 0.595252043293996, "grad_norm": 94.64888000488281, "learning_rate": 4.315543401077458e-06, "loss": 14.0969, "step": 294670 }, { "epoch": 0.5952722439266798, "grad_norm": 292.1901550292969, "learning_rate": 4.315197622976011e-06, "loss": 23.4879, "step": 294680 }, { "epoch": 0.5952924445593636, "grad_norm": 123.15663146972656, "learning_rate": 4.314851848212205e-06, "loss": 25.8411, "step": 294690 }, { "epoch": 0.5953126451920474, "grad_norm": 512.7115478515625, "learning_rate": 4.314506076787729e-06, "loss": 23.051, "step": 294700 }, { "epoch": 0.5953328458247312, "grad_norm": 209.6356658935547, "learning_rate": 4.314160308704269e-06, "loss": 17.5084, "step": 294710 }, { "epoch": 0.595353046457415, "grad_norm": 137.50048828125, "learning_rate": 4.313814543963505e-06, "loss": 17.2033, "step": 294720 }, { "epoch": 0.5953732470900989, "grad_norm": 257.4812927246094, "learning_rate": 4.313468782567128e-06, "loss": 15.2268, "step": 294730 }, { "epoch": 0.5953934477227827, "grad_norm": 93.55938720703125, "learning_rate": 4.313123024516819e-06, "loss": 15.6595, "step": 294740 }, { "epoch": 0.5954136483554665, "grad_norm": 100.10951232910156, "learning_rate": 4.312777269814268e-06, "loss": 19.2469, "step": 294750 }, { "epoch": 0.5954338489881503, "grad_norm": 198.2530975341797, "learning_rate": 4.312431518461154e-06, "loss": 17.1908, "step": 294760 }, { "epoch": 0.5954540496208341, "grad_norm": 96.54187774658203, "learning_rate": 4.312085770459167e-06, "loss": 11.3873, "step": 294770 }, { "epoch": 0.595474250253518, "grad_norm": 375.7646484375, "learning_rate": 4.311740025809992e-06, "loss": 11.1772, "step": 294780 }, { "epoch": 0.5954944508862018, "grad_norm": 124.87718963623047, "learning_rate": 4.31139428451531e-06, "loss": 30.5237, "step": 294790 }, { "epoch": 0.5955146515188856, "grad_norm": 51.2127685546875, "learning_rate": 4.31104854657681e-06, "loss": 14.9136, "step": 294800 }, { "epoch": 0.5955348521515694, "grad_norm": 259.867431640625, "learning_rate": 4.310702811996177e-06, "loss": 13.911, "step": 294810 }, { "epoch": 0.5955550527842532, "grad_norm": 113.5669937133789, "learning_rate": 4.310357080775092e-06, "loss": 24.9616, "step": 294820 }, { "epoch": 0.5955752534169371, "grad_norm": 7.571382999420166, "learning_rate": 4.3100113529152444e-06, "loss": 21.4529, "step": 294830 }, { "epoch": 0.5955954540496209, "grad_norm": 120.63533782958984, "learning_rate": 4.30966562841832e-06, "loss": 10.4215, "step": 294840 }, { "epoch": 0.5956156546823046, "grad_norm": 221.38540649414062, "learning_rate": 4.309319907285998e-06, "loss": 15.0222, "step": 294850 }, { "epoch": 0.5956358553149884, "grad_norm": 354.7468566894531, "learning_rate": 4.308974189519968e-06, "loss": 18.8002, "step": 294860 }, { "epoch": 0.5956560559476722, "grad_norm": 715.1752319335938, "learning_rate": 4.308628475121916e-06, "loss": 14.0687, "step": 294870 }, { "epoch": 0.5956762565803561, "grad_norm": 58.3545036315918, "learning_rate": 4.308282764093523e-06, "loss": 16.4715, "step": 294880 }, { "epoch": 0.5956964572130399, "grad_norm": 383.37890625, "learning_rate": 4.307937056436476e-06, "loss": 10.3894, "step": 294890 }, { "epoch": 0.5957166578457237, "grad_norm": 211.88648986816406, "learning_rate": 4.307591352152459e-06, "loss": 18.6475, "step": 294900 }, { "epoch": 0.5957368584784075, "grad_norm": 517.5878295898438, "learning_rate": 4.307245651243161e-06, "loss": 21.7568, "step": 294910 }, { "epoch": 0.5957570591110913, "grad_norm": 232.85023498535156, "learning_rate": 4.30689995371026e-06, "loss": 14.2648, "step": 294920 }, { "epoch": 0.5957772597437752, "grad_norm": 339.15081787109375, "learning_rate": 4.306554259555447e-06, "loss": 24.7543, "step": 294930 }, { "epoch": 0.595797460376459, "grad_norm": 523.5938110351562, "learning_rate": 4.306208568780404e-06, "loss": 18.2347, "step": 294940 }, { "epoch": 0.5958176610091428, "grad_norm": 264.4195556640625, "learning_rate": 4.3058628813868154e-06, "loss": 14.1999, "step": 294950 }, { "epoch": 0.5958378616418266, "grad_norm": 176.4294891357422, "learning_rate": 4.305517197376367e-06, "loss": 10.6257, "step": 294960 }, { "epoch": 0.5958580622745104, "grad_norm": 189.1776123046875, "learning_rate": 4.305171516750746e-06, "loss": 32.2111, "step": 294970 }, { "epoch": 0.5958782629071943, "grad_norm": 277.9081726074219, "learning_rate": 4.3048258395116326e-06, "loss": 10.9211, "step": 294980 }, { "epoch": 0.5958984635398781, "grad_norm": 551.1771240234375, "learning_rate": 4.304480165660712e-06, "loss": 22.3808, "step": 294990 }, { "epoch": 0.5959186641725619, "grad_norm": 954.1984252929688, "learning_rate": 4.304134495199675e-06, "loss": 15.5225, "step": 295000 }, { "epoch": 0.5959388648052457, "grad_norm": 340.311279296875, "learning_rate": 4.303788828130198e-06, "loss": 18.9877, "step": 295010 }, { "epoch": 0.5959590654379295, "grad_norm": 226.00218200683594, "learning_rate": 4.303443164453971e-06, "loss": 15.0139, "step": 295020 }, { "epoch": 0.5959792660706134, "grad_norm": 157.06405639648438, "learning_rate": 4.303097504172679e-06, "loss": 14.5607, "step": 295030 }, { "epoch": 0.5959994667032972, "grad_norm": 35.23577117919922, "learning_rate": 4.302751847288005e-06, "loss": 15.2179, "step": 295040 }, { "epoch": 0.596019667335981, "grad_norm": 167.92970275878906, "learning_rate": 4.302406193801632e-06, "loss": 25.5885, "step": 295050 }, { "epoch": 0.5960398679686648, "grad_norm": 478.05316162109375, "learning_rate": 4.302060543715247e-06, "loss": 14.2408, "step": 295060 }, { "epoch": 0.5960600686013486, "grad_norm": 121.89498901367188, "learning_rate": 4.301714897030537e-06, "loss": 17.0495, "step": 295070 }, { "epoch": 0.5960802692340325, "grad_norm": 344.9151306152344, "learning_rate": 4.3013692537491805e-06, "loss": 11.8287, "step": 295080 }, { "epoch": 0.5961004698667163, "grad_norm": 71.55351257324219, "learning_rate": 4.3010236138728674e-06, "loss": 29.54, "step": 295090 }, { "epoch": 0.5961206704994001, "grad_norm": 2.2755820751190186, "learning_rate": 4.300677977403281e-06, "loss": 8.5432, "step": 295100 }, { "epoch": 0.5961408711320838, "grad_norm": 428.93798828125, "learning_rate": 4.3003323443421045e-06, "loss": 29.409, "step": 295110 }, { "epoch": 0.5961610717647676, "grad_norm": 25.31964874267578, "learning_rate": 4.299986714691022e-06, "loss": 9.122, "step": 295120 }, { "epoch": 0.5961812723974514, "grad_norm": 367.4065856933594, "learning_rate": 4.299641088451721e-06, "loss": 15.8088, "step": 295130 }, { "epoch": 0.5962014730301353, "grad_norm": 344.1318054199219, "learning_rate": 4.299295465625884e-06, "loss": 19.5186, "step": 295140 }, { "epoch": 0.5962216736628191, "grad_norm": 338.06329345703125, "learning_rate": 4.298949846215195e-06, "loss": 24.6144, "step": 295150 }, { "epoch": 0.5962418742955029, "grad_norm": 392.39227294921875, "learning_rate": 4.298604230221341e-06, "loss": 14.2212, "step": 295160 }, { "epoch": 0.5962620749281867, "grad_norm": 264.4922790527344, "learning_rate": 4.298258617646004e-06, "loss": 27.0727, "step": 295170 }, { "epoch": 0.5962822755608705, "grad_norm": 330.17724609375, "learning_rate": 4.29791300849087e-06, "loss": 14.4535, "step": 295180 }, { "epoch": 0.5963024761935544, "grad_norm": 40.58692932128906, "learning_rate": 4.297567402757621e-06, "loss": 28.9532, "step": 295190 }, { "epoch": 0.5963226768262382, "grad_norm": 73.6310043334961, "learning_rate": 4.297221800447946e-06, "loss": 17.7478, "step": 295200 }, { "epoch": 0.596342877458922, "grad_norm": 505.3072204589844, "learning_rate": 4.296876201563524e-06, "loss": 13.3612, "step": 295210 }, { "epoch": 0.5963630780916058, "grad_norm": 310.61016845703125, "learning_rate": 4.296530606106043e-06, "loss": 18.051, "step": 295220 }, { "epoch": 0.5963832787242896, "grad_norm": 369.1302490234375, "learning_rate": 4.296185014077188e-06, "loss": 12.7423, "step": 295230 }, { "epoch": 0.5964034793569735, "grad_norm": 296.4026794433594, "learning_rate": 4.295839425478641e-06, "loss": 15.1056, "step": 295240 }, { "epoch": 0.5964236799896573, "grad_norm": 512.5640869140625, "learning_rate": 4.295493840312087e-06, "loss": 26.06, "step": 295250 }, { "epoch": 0.5964438806223411, "grad_norm": 243.4648895263672, "learning_rate": 4.295148258579211e-06, "loss": 21.9504, "step": 295260 }, { "epoch": 0.5964640812550249, "grad_norm": 250.87339782714844, "learning_rate": 4.294802680281696e-06, "loss": 11.8293, "step": 295270 }, { "epoch": 0.5964842818877087, "grad_norm": 2.5242743492126465, "learning_rate": 4.294457105421228e-06, "loss": 16.4247, "step": 295280 }, { "epoch": 0.5965044825203926, "grad_norm": 323.44085693359375, "learning_rate": 4.294111533999492e-06, "loss": 12.0895, "step": 295290 }, { "epoch": 0.5965246831530764, "grad_norm": 546.7369384765625, "learning_rate": 4.293765966018167e-06, "loss": 27.4745, "step": 295300 }, { "epoch": 0.5965448837857602, "grad_norm": 396.6521301269531, "learning_rate": 4.293420401478943e-06, "loss": 13.5561, "step": 295310 }, { "epoch": 0.596565084418444, "grad_norm": 149.00636291503906, "learning_rate": 4.293074840383504e-06, "loss": 15.2462, "step": 295320 }, { "epoch": 0.5965852850511278, "grad_norm": 98.19751739501953, "learning_rate": 4.29272928273353e-06, "loss": 18.231, "step": 295330 }, { "epoch": 0.5966054856838117, "grad_norm": 277.527587890625, "learning_rate": 4.2923837285307085e-06, "loss": 14.5218, "step": 295340 }, { "epoch": 0.5966256863164955, "grad_norm": 41.59236145019531, "learning_rate": 4.292038177776722e-06, "loss": 17.3115, "step": 295350 }, { "epoch": 0.5966458869491792, "grad_norm": 320.77337646484375, "learning_rate": 4.291692630473258e-06, "loss": 19.0116, "step": 295360 }, { "epoch": 0.596666087581863, "grad_norm": 163.66432189941406, "learning_rate": 4.291347086621996e-06, "loss": 11.6075, "step": 295370 }, { "epoch": 0.5966862882145468, "grad_norm": 184.95521545410156, "learning_rate": 4.2910015462246225e-06, "loss": 12.5109, "step": 295380 }, { "epoch": 0.5967064888472307, "grad_norm": 209.46165466308594, "learning_rate": 4.290656009282823e-06, "loss": 10.5073, "step": 295390 }, { "epoch": 0.5967266894799145, "grad_norm": 234.70741271972656, "learning_rate": 4.290310475798278e-06, "loss": 16.7471, "step": 295400 }, { "epoch": 0.5967468901125983, "grad_norm": 562.7808227539062, "learning_rate": 4.289964945772675e-06, "loss": 26.9131, "step": 295410 }, { "epoch": 0.5967670907452821, "grad_norm": 275.3915100097656, "learning_rate": 4.289619419207698e-06, "loss": 37.145, "step": 295420 }, { "epoch": 0.5967872913779659, "grad_norm": 240.804443359375, "learning_rate": 4.289273896105027e-06, "loss": 15.4559, "step": 295430 }, { "epoch": 0.5968074920106498, "grad_norm": 322.1091613769531, "learning_rate": 4.288928376466349e-06, "loss": 11.7772, "step": 295440 }, { "epoch": 0.5968276926433336, "grad_norm": 208.47418212890625, "learning_rate": 4.288582860293351e-06, "loss": 13.3241, "step": 295450 }, { "epoch": 0.5968478932760174, "grad_norm": 215.7115020751953, "learning_rate": 4.288237347587711e-06, "loss": 24.0919, "step": 295460 }, { "epoch": 0.5968680939087012, "grad_norm": 280.4097900390625, "learning_rate": 4.287891838351117e-06, "loss": 11.0054, "step": 295470 }, { "epoch": 0.596888294541385, "grad_norm": 159.92750549316406, "learning_rate": 4.2875463325852514e-06, "loss": 14.4242, "step": 295480 }, { "epoch": 0.5969084951740689, "grad_norm": 229.55873107910156, "learning_rate": 4.287200830291799e-06, "loss": 30.7519, "step": 295490 }, { "epoch": 0.5969286958067527, "grad_norm": 493.49945068359375, "learning_rate": 4.286855331472442e-06, "loss": 21.3874, "step": 295500 }, { "epoch": 0.5969488964394365, "grad_norm": 592.82373046875, "learning_rate": 4.286509836128866e-06, "loss": 30.7667, "step": 295510 }, { "epoch": 0.5969690970721203, "grad_norm": 89.87036895751953, "learning_rate": 4.286164344262756e-06, "loss": 17.1861, "step": 295520 }, { "epoch": 0.5969892977048041, "grad_norm": 492.77752685546875, "learning_rate": 4.285818855875793e-06, "loss": 19.7256, "step": 295530 }, { "epoch": 0.597009498337488, "grad_norm": 135.89012145996094, "learning_rate": 4.285473370969663e-06, "loss": 17.6888, "step": 295540 }, { "epoch": 0.5970296989701718, "grad_norm": 224.6938934326172, "learning_rate": 4.285127889546049e-06, "loss": 18.7706, "step": 295550 }, { "epoch": 0.5970498996028556, "grad_norm": 288.1461486816406, "learning_rate": 4.284782411606635e-06, "loss": 16.2203, "step": 295560 }, { "epoch": 0.5970701002355394, "grad_norm": 222.8823699951172, "learning_rate": 4.284436937153105e-06, "loss": 12.3512, "step": 295570 }, { "epoch": 0.5970903008682232, "grad_norm": 222.00140380859375, "learning_rate": 4.284091466187142e-06, "loss": 27.9922, "step": 295580 }, { "epoch": 0.597110501500907, "grad_norm": 53.50161361694336, "learning_rate": 4.283745998710431e-06, "loss": 12.5481, "step": 295590 }, { "epoch": 0.5971307021335909, "grad_norm": 335.0904541015625, "learning_rate": 4.283400534724654e-06, "loss": 17.7789, "step": 295600 }, { "epoch": 0.5971509027662747, "grad_norm": 392.7342529296875, "learning_rate": 4.283055074231498e-06, "loss": 21.2447, "step": 295610 }, { "epoch": 0.5971711033989584, "grad_norm": 166.31288146972656, "learning_rate": 4.282709617232642e-06, "loss": 17.0254, "step": 295620 }, { "epoch": 0.5971913040316422, "grad_norm": 338.8050537109375, "learning_rate": 4.282364163729773e-06, "loss": 16.5618, "step": 295630 }, { "epoch": 0.597211504664326, "grad_norm": 308.5074768066406, "learning_rate": 4.282018713724576e-06, "loss": 20.1889, "step": 295640 }, { "epoch": 0.5972317052970099, "grad_norm": 370.7738952636719, "learning_rate": 4.281673267218731e-06, "loss": 19.8171, "step": 295650 }, { "epoch": 0.5972519059296937, "grad_norm": 350.7268981933594, "learning_rate": 4.281327824213923e-06, "loss": 20.1067, "step": 295660 }, { "epoch": 0.5972721065623775, "grad_norm": 201.44546508789062, "learning_rate": 4.280982384711835e-06, "loss": 17.0885, "step": 295670 }, { "epoch": 0.5972923071950613, "grad_norm": 245.46580505371094, "learning_rate": 4.280636948714155e-06, "loss": 21.0244, "step": 295680 }, { "epoch": 0.5973125078277451, "grad_norm": 386.60662841796875, "learning_rate": 4.280291516222561e-06, "loss": 11.8302, "step": 295690 }, { "epoch": 0.597332708460429, "grad_norm": 237.2351531982422, "learning_rate": 4.279946087238739e-06, "loss": 13.3045, "step": 295700 }, { "epoch": 0.5973529090931128, "grad_norm": 196.26065063476562, "learning_rate": 4.279600661764374e-06, "loss": 16.4678, "step": 295710 }, { "epoch": 0.5973731097257966, "grad_norm": 403.2311096191406, "learning_rate": 4.279255239801146e-06, "loss": 35.0096, "step": 295720 }, { "epoch": 0.5973933103584804, "grad_norm": 1.0276484489440918, "learning_rate": 4.278909821350742e-06, "loss": 12.7512, "step": 295730 }, { "epoch": 0.5974135109911642, "grad_norm": 227.38836669921875, "learning_rate": 4.278564406414844e-06, "loss": 18.7705, "step": 295740 }, { "epoch": 0.5974337116238481, "grad_norm": 81.01026916503906, "learning_rate": 4.278218994995135e-06, "loss": 13.7834, "step": 295750 }, { "epoch": 0.5974539122565319, "grad_norm": 858.1123046875, "learning_rate": 4.277873587093298e-06, "loss": 22.801, "step": 295760 }, { "epoch": 0.5974741128892157, "grad_norm": 123.47573852539062, "learning_rate": 4.27752818271102e-06, "loss": 12.7264, "step": 295770 }, { "epoch": 0.5974943135218995, "grad_norm": 362.09539794921875, "learning_rate": 4.27718278184998e-06, "loss": 25.0226, "step": 295780 }, { "epoch": 0.5975145141545833, "grad_norm": 260.3680114746094, "learning_rate": 4.276837384511864e-06, "loss": 13.364, "step": 295790 }, { "epoch": 0.5975347147872672, "grad_norm": 294.3338317871094, "learning_rate": 4.2764919906983545e-06, "loss": 14.9635, "step": 295800 }, { "epoch": 0.597554915419951, "grad_norm": 155.4580535888672, "learning_rate": 4.276146600411137e-06, "loss": 15.8784, "step": 295810 }, { "epoch": 0.5975751160526348, "grad_norm": 887.7351684570312, "learning_rate": 4.2758012136518925e-06, "loss": 20.8218, "step": 295820 }, { "epoch": 0.5975953166853186, "grad_norm": 270.7889404296875, "learning_rate": 4.275455830422303e-06, "loss": 13.0122, "step": 295830 }, { "epoch": 0.5976155173180024, "grad_norm": 371.5483093261719, "learning_rate": 4.275110450724056e-06, "loss": 16.6919, "step": 295840 }, { "epoch": 0.5976357179506863, "grad_norm": 376.7240295410156, "learning_rate": 4.274765074558832e-06, "loss": 22.0264, "step": 295850 }, { "epoch": 0.5976559185833701, "grad_norm": 257.30743408203125, "learning_rate": 4.274419701928315e-06, "loss": 14.0515, "step": 295860 }, { "epoch": 0.5976761192160538, "grad_norm": 139.37063598632812, "learning_rate": 4.27407433283419e-06, "loss": 17.52, "step": 295870 }, { "epoch": 0.5976963198487376, "grad_norm": 407.4631652832031, "learning_rate": 4.273728967278137e-06, "loss": 15.5965, "step": 295880 }, { "epoch": 0.5977165204814214, "grad_norm": 313.07940673828125, "learning_rate": 4.273383605261841e-06, "loss": 15.0202, "step": 295890 }, { "epoch": 0.5977367211141053, "grad_norm": 178.27267456054688, "learning_rate": 4.273038246786986e-06, "loss": 22.6328, "step": 295900 }, { "epoch": 0.5977569217467891, "grad_norm": 213.9728240966797, "learning_rate": 4.272692891855253e-06, "loss": 25.7216, "step": 295910 }, { "epoch": 0.5977771223794729, "grad_norm": 397.1802978515625, "learning_rate": 4.272347540468327e-06, "loss": 25.3447, "step": 295920 }, { "epoch": 0.5977973230121567, "grad_norm": 202.57656860351562, "learning_rate": 4.272002192627892e-06, "loss": 20.3972, "step": 295930 }, { "epoch": 0.5978175236448405, "grad_norm": 126.68864440917969, "learning_rate": 4.2716568483356295e-06, "loss": 9.014, "step": 295940 }, { "epoch": 0.5978377242775244, "grad_norm": 253.6220245361328, "learning_rate": 4.2713115075932225e-06, "loss": 17.1973, "step": 295950 }, { "epoch": 0.5978579249102082, "grad_norm": 139.36277770996094, "learning_rate": 4.270966170402354e-06, "loss": 21.5333, "step": 295960 }, { "epoch": 0.597878125542892, "grad_norm": 764.9443359375, "learning_rate": 4.2706208367647115e-06, "loss": 25.677, "step": 295970 }, { "epoch": 0.5978983261755758, "grad_norm": 289.01824951171875, "learning_rate": 4.270275506681971e-06, "loss": 35.2365, "step": 295980 }, { "epoch": 0.5979185268082596, "grad_norm": 275.4140930175781, "learning_rate": 4.26993018015582e-06, "loss": 23.9593, "step": 295990 }, { "epoch": 0.5979387274409435, "grad_norm": 402.7630615234375, "learning_rate": 4.269584857187942e-06, "loss": 15.9805, "step": 296000 }, { "epoch": 0.5979589280736273, "grad_norm": 117.82694244384766, "learning_rate": 4.2692395377800185e-06, "loss": 14.2797, "step": 296010 }, { "epoch": 0.5979791287063111, "grad_norm": 7.401989936828613, "learning_rate": 4.268894221933733e-06, "loss": 20.5032, "step": 296020 }, { "epoch": 0.5979993293389949, "grad_norm": 69.07545471191406, "learning_rate": 4.268548909650768e-06, "loss": 18.9923, "step": 296030 }, { "epoch": 0.5980195299716787, "grad_norm": 152.6262664794922, "learning_rate": 4.2682036009328065e-06, "loss": 17.6954, "step": 296040 }, { "epoch": 0.5980397306043626, "grad_norm": 279.1143493652344, "learning_rate": 4.267858295781531e-06, "loss": 17.9371, "step": 296050 }, { "epoch": 0.5980599312370464, "grad_norm": 378.2461853027344, "learning_rate": 4.267512994198629e-06, "loss": 13.5321, "step": 296060 }, { "epoch": 0.5980801318697302, "grad_norm": 254.24795532226562, "learning_rate": 4.267167696185776e-06, "loss": 22.5439, "step": 296070 }, { "epoch": 0.598100332502414, "grad_norm": 158.1107177734375, "learning_rate": 4.2668224017446595e-06, "loss": 9.4117, "step": 296080 }, { "epoch": 0.5981205331350978, "grad_norm": 402.3255920410156, "learning_rate": 4.266477110876963e-06, "loss": 23.1821, "step": 296090 }, { "epoch": 0.5981407337677817, "grad_norm": 248.7587890625, "learning_rate": 4.266131823584368e-06, "loss": 29.9156, "step": 296100 }, { "epoch": 0.5981609344004655, "grad_norm": 158.78147888183594, "learning_rate": 4.265786539868556e-06, "loss": 12.4625, "step": 296110 }, { "epoch": 0.5981811350331493, "grad_norm": 329.5522155761719, "learning_rate": 4.265441259731211e-06, "loss": 24.9179, "step": 296120 }, { "epoch": 0.598201335665833, "grad_norm": 282.6330871582031, "learning_rate": 4.26509598317402e-06, "loss": 15.1684, "step": 296130 }, { "epoch": 0.5982215362985168, "grad_norm": 383.3296813964844, "learning_rate": 4.2647507101986575e-06, "loss": 16.9692, "step": 296140 }, { "epoch": 0.5982417369312006, "grad_norm": 113.87035369873047, "learning_rate": 4.264405440806813e-06, "loss": 8.6747, "step": 296150 }, { "epoch": 0.5982619375638845, "grad_norm": 97.12630462646484, "learning_rate": 4.264060175000168e-06, "loss": 15.7547, "step": 296160 }, { "epoch": 0.5982821381965683, "grad_norm": 381.64825439453125, "learning_rate": 4.263714912780403e-06, "loss": 22.1774, "step": 296170 }, { "epoch": 0.5983023388292521, "grad_norm": 241.85601806640625, "learning_rate": 4.263369654149203e-06, "loss": 28.2744, "step": 296180 }, { "epoch": 0.5983225394619359, "grad_norm": 208.2087860107422, "learning_rate": 4.263024399108251e-06, "loss": 13.5273, "step": 296190 }, { "epoch": 0.5983427400946197, "grad_norm": 745.3338623046875, "learning_rate": 4.262679147659227e-06, "loss": 26.893, "step": 296200 }, { "epoch": 0.5983629407273036, "grad_norm": 203.0643768310547, "learning_rate": 4.262333899803814e-06, "loss": 21.2511, "step": 296210 }, { "epoch": 0.5983831413599874, "grad_norm": 150.59527587890625, "learning_rate": 4.2619886555436995e-06, "loss": 20.342, "step": 296220 }, { "epoch": 0.5984033419926712, "grad_norm": 504.01422119140625, "learning_rate": 4.26164341488056e-06, "loss": 14.1678, "step": 296230 }, { "epoch": 0.598423542625355, "grad_norm": 546.4620361328125, "learning_rate": 4.261298177816082e-06, "loss": 18.5358, "step": 296240 }, { "epoch": 0.5984437432580388, "grad_norm": 503.2430114746094, "learning_rate": 4.260952944351947e-06, "loss": 15.0843, "step": 296250 }, { "epoch": 0.5984639438907227, "grad_norm": 0.0, "learning_rate": 4.260607714489839e-06, "loss": 13.3733, "step": 296260 }, { "epoch": 0.5984841445234065, "grad_norm": 401.8907775878906, "learning_rate": 4.260262488231438e-06, "loss": 17.0723, "step": 296270 }, { "epoch": 0.5985043451560903, "grad_norm": 365.26141357421875, "learning_rate": 4.259917265578427e-06, "loss": 24.2308, "step": 296280 }, { "epoch": 0.5985245457887741, "grad_norm": 195.3843994140625, "learning_rate": 4.259572046532493e-06, "loss": 16.5665, "step": 296290 }, { "epoch": 0.5985447464214579, "grad_norm": 175.9238739013672, "learning_rate": 4.259226831095311e-06, "loss": 29.2914, "step": 296300 }, { "epoch": 0.5985649470541418, "grad_norm": 516.8207397460938, "learning_rate": 4.258881619268569e-06, "loss": 34.2452, "step": 296310 }, { "epoch": 0.5985851476868256, "grad_norm": 26.03068733215332, "learning_rate": 4.258536411053949e-06, "loss": 25.0478, "step": 296320 }, { "epoch": 0.5986053483195094, "grad_norm": 290.5191345214844, "learning_rate": 4.258191206453132e-06, "loss": 26.0596, "step": 296330 }, { "epoch": 0.5986255489521932, "grad_norm": 94.56279754638672, "learning_rate": 4.2578460054678e-06, "loss": 24.5054, "step": 296340 }, { "epoch": 0.598645749584877, "grad_norm": 364.2471923828125, "learning_rate": 4.25750080809964e-06, "loss": 23.9814, "step": 296350 }, { "epoch": 0.5986659502175609, "grad_norm": 330.9288330078125, "learning_rate": 4.2571556143503275e-06, "loss": 12.5086, "step": 296360 }, { "epoch": 0.5986861508502447, "grad_norm": 382.0340576171875, "learning_rate": 4.256810424221548e-06, "loss": 24.3737, "step": 296370 }, { "epoch": 0.5987063514829285, "grad_norm": 534.4496459960938, "learning_rate": 4.256465237714989e-06, "loss": 30.5866, "step": 296380 }, { "epoch": 0.5987265521156122, "grad_norm": 211.0589141845703, "learning_rate": 4.2561200548323224e-06, "loss": 12.5179, "step": 296390 }, { "epoch": 0.598746752748296, "grad_norm": 405.158447265625, "learning_rate": 4.255774875575239e-06, "loss": 41.5195, "step": 296400 }, { "epoch": 0.5987669533809798, "grad_norm": 64.14668273925781, "learning_rate": 4.2554296999454194e-06, "loss": 18.2255, "step": 296410 }, { "epoch": 0.5987871540136637, "grad_norm": 193.95849609375, "learning_rate": 4.2550845279445455e-06, "loss": 10.6423, "step": 296420 }, { "epoch": 0.5988073546463475, "grad_norm": 278.5498352050781, "learning_rate": 4.254739359574298e-06, "loss": 12.3132, "step": 296430 }, { "epoch": 0.5988275552790313, "grad_norm": 64.79869079589844, "learning_rate": 4.25439419483636e-06, "loss": 14.4673, "step": 296440 }, { "epoch": 0.5988477559117151, "grad_norm": 111.8031997680664, "learning_rate": 4.2540490337324156e-06, "loss": 11.2333, "step": 296450 }, { "epoch": 0.598867956544399, "grad_norm": 571.5826416015625, "learning_rate": 4.253703876264144e-06, "loss": 34.1578, "step": 296460 }, { "epoch": 0.5988881571770828, "grad_norm": 44.16543197631836, "learning_rate": 4.253358722433231e-06, "loss": 19.942, "step": 296470 }, { "epoch": 0.5989083578097666, "grad_norm": 199.48397827148438, "learning_rate": 4.253013572241356e-06, "loss": 11.2671, "step": 296480 }, { "epoch": 0.5989285584424504, "grad_norm": 275.5598449707031, "learning_rate": 4.252668425690203e-06, "loss": 35.1063, "step": 296490 }, { "epoch": 0.5989487590751342, "grad_norm": 311.6197204589844, "learning_rate": 4.2523232827814534e-06, "loss": 11.677, "step": 296500 }, { "epoch": 0.598968959707818, "grad_norm": 533.260498046875, "learning_rate": 4.251978143516789e-06, "loss": 25.0076, "step": 296510 }, { "epoch": 0.5989891603405019, "grad_norm": 128.37274169921875, "learning_rate": 4.251633007897891e-06, "loss": 15.9332, "step": 296520 }, { "epoch": 0.5990093609731857, "grad_norm": 403.2014465332031, "learning_rate": 4.251287875926445e-06, "loss": 19.9306, "step": 296530 }, { "epoch": 0.5990295616058695, "grad_norm": 155.39321899414062, "learning_rate": 4.250942747604131e-06, "loss": 15.6795, "step": 296540 }, { "epoch": 0.5990497622385533, "grad_norm": 163.45626831054688, "learning_rate": 4.250597622932631e-06, "loss": 16.2877, "step": 296550 }, { "epoch": 0.5990699628712371, "grad_norm": 237.67529296875, "learning_rate": 4.250252501913627e-06, "loss": 18.8921, "step": 296560 }, { "epoch": 0.599090163503921, "grad_norm": 251.52774047851562, "learning_rate": 4.249907384548801e-06, "loss": 15.5165, "step": 296570 }, { "epoch": 0.5991103641366048, "grad_norm": 129.17051696777344, "learning_rate": 4.249562270839837e-06, "loss": 22.7851, "step": 296580 }, { "epoch": 0.5991305647692886, "grad_norm": 713.2863159179688, "learning_rate": 4.249217160788413e-06, "loss": 24.506, "step": 296590 }, { "epoch": 0.5991507654019724, "grad_norm": 129.50460815429688, "learning_rate": 4.248872054396215e-06, "loss": 17.0971, "step": 296600 }, { "epoch": 0.5991709660346562, "grad_norm": 226.728759765625, "learning_rate": 4.248526951664924e-06, "loss": 10.4105, "step": 296610 }, { "epoch": 0.5991911666673401, "grad_norm": 291.16815185546875, "learning_rate": 4.248181852596221e-06, "loss": 20.5274, "step": 296620 }, { "epoch": 0.5992113673000239, "grad_norm": 235.64059448242188, "learning_rate": 4.247836757191787e-06, "loss": 21.5361, "step": 296630 }, { "epoch": 0.5992315679327076, "grad_norm": 162.44039916992188, "learning_rate": 4.2474916654533085e-06, "loss": 9.9543, "step": 296640 }, { "epoch": 0.5992517685653914, "grad_norm": 324.6493225097656, "learning_rate": 4.247146577382462e-06, "loss": 16.0887, "step": 296650 }, { "epoch": 0.5992719691980752, "grad_norm": 148.7587127685547, "learning_rate": 4.246801492980931e-06, "loss": 16.8413, "step": 296660 }, { "epoch": 0.5992921698307591, "grad_norm": 331.1235656738281, "learning_rate": 4.246456412250401e-06, "loss": 10.3864, "step": 296670 }, { "epoch": 0.5993123704634429, "grad_norm": 45.85690689086914, "learning_rate": 4.246111335192548e-06, "loss": 12.456, "step": 296680 }, { "epoch": 0.5993325710961267, "grad_norm": 200.4580535888672, "learning_rate": 4.245766261809059e-06, "loss": 21.2271, "step": 296690 }, { "epoch": 0.5993527717288105, "grad_norm": 321.74066162109375, "learning_rate": 4.245421192101613e-06, "loss": 18.4419, "step": 296700 }, { "epoch": 0.5993729723614943, "grad_norm": 341.7735290527344, "learning_rate": 4.245076126071894e-06, "loss": 17.3698, "step": 296710 }, { "epoch": 0.5993931729941782, "grad_norm": 1.9352205991744995, "learning_rate": 4.244731063721581e-06, "loss": 19.2411, "step": 296720 }, { "epoch": 0.599413373626862, "grad_norm": 120.36441802978516, "learning_rate": 4.244386005052356e-06, "loss": 14.6422, "step": 296730 }, { "epoch": 0.5994335742595458, "grad_norm": 212.35549926757812, "learning_rate": 4.244040950065905e-06, "loss": 20.8324, "step": 296740 }, { "epoch": 0.5994537748922296, "grad_norm": 215.24107360839844, "learning_rate": 4.243695898763904e-06, "loss": 15.0458, "step": 296750 }, { "epoch": 0.5994739755249134, "grad_norm": 168.9129180908203, "learning_rate": 4.243350851148039e-06, "loss": 15.5687, "step": 296760 }, { "epoch": 0.5994941761575973, "grad_norm": 234.74745178222656, "learning_rate": 4.24300580721999e-06, "loss": 34.6163, "step": 296770 }, { "epoch": 0.5995143767902811, "grad_norm": 135.71824645996094, "learning_rate": 4.242660766981439e-06, "loss": 11.153, "step": 296780 }, { "epoch": 0.5995345774229649, "grad_norm": 256.6994934082031, "learning_rate": 4.242315730434066e-06, "loss": 14.3253, "step": 296790 }, { "epoch": 0.5995547780556487, "grad_norm": 367.36541748046875, "learning_rate": 4.241970697579557e-06, "loss": 30.1164, "step": 296800 }, { "epoch": 0.5995749786883325, "grad_norm": 126.00182342529297, "learning_rate": 4.2416256684195885e-06, "loss": 21.3393, "step": 296810 }, { "epoch": 0.5995951793210164, "grad_norm": 432.2103576660156, "learning_rate": 4.241280642955845e-06, "loss": 28.6807, "step": 296820 }, { "epoch": 0.5996153799537002, "grad_norm": 386.0881652832031, "learning_rate": 4.24093562119001e-06, "loss": 10.0764, "step": 296830 }, { "epoch": 0.599635580586384, "grad_norm": 264.82012939453125, "learning_rate": 4.240590603123759e-06, "loss": 30.6014, "step": 296840 }, { "epoch": 0.5996557812190678, "grad_norm": 277.88507080078125, "learning_rate": 4.240245588758778e-06, "loss": 22.6887, "step": 296850 }, { "epoch": 0.5996759818517516, "grad_norm": 145.0326385498047, "learning_rate": 4.23990057809675e-06, "loss": 9.3831, "step": 296860 }, { "epoch": 0.5996961824844355, "grad_norm": 256.7420959472656, "learning_rate": 4.239555571139353e-06, "loss": 15.386, "step": 296870 }, { "epoch": 0.5997163831171193, "grad_norm": 206.68206787109375, "learning_rate": 4.23921056788827e-06, "loss": 11.5794, "step": 296880 }, { "epoch": 0.5997365837498031, "grad_norm": 464.7113952636719, "learning_rate": 4.238865568345182e-06, "loss": 19.9311, "step": 296890 }, { "epoch": 0.5997567843824868, "grad_norm": 277.7959289550781, "learning_rate": 4.238520572511773e-06, "loss": 17.7763, "step": 296900 }, { "epoch": 0.5997769850151706, "grad_norm": 246.38624572753906, "learning_rate": 4.238175580389719e-06, "loss": 9.0426, "step": 296910 }, { "epoch": 0.5997971856478544, "grad_norm": 164.07276916503906, "learning_rate": 4.2378305919807075e-06, "loss": 26.376, "step": 296920 }, { "epoch": 0.5998173862805383, "grad_norm": 608.0077514648438, "learning_rate": 4.237485607286417e-06, "loss": 24.6859, "step": 296930 }, { "epoch": 0.5998375869132221, "grad_norm": 302.75775146484375, "learning_rate": 4.237140626308528e-06, "loss": 11.7885, "step": 296940 }, { "epoch": 0.5998577875459059, "grad_norm": 76.98960876464844, "learning_rate": 4.2367956490487235e-06, "loss": 13.584, "step": 296950 }, { "epoch": 0.5998779881785897, "grad_norm": 372.256591796875, "learning_rate": 4.2364506755086856e-06, "loss": 20.3057, "step": 296960 }, { "epoch": 0.5998981888112735, "grad_norm": 386.2876892089844, "learning_rate": 4.236105705690094e-06, "loss": 22.2029, "step": 296970 }, { "epoch": 0.5999183894439574, "grad_norm": 452.18963623046875, "learning_rate": 4.2357607395946275e-06, "loss": 32.8919, "step": 296980 }, { "epoch": 0.5999385900766412, "grad_norm": 145.4814453125, "learning_rate": 4.235415777223976e-06, "loss": 19.0043, "step": 296990 }, { "epoch": 0.599958790709325, "grad_norm": 468.30328369140625, "learning_rate": 4.23507081857981e-06, "loss": 12.1585, "step": 297000 }, { "epoch": 0.5999789913420088, "grad_norm": 417.04327392578125, "learning_rate": 4.234725863663819e-06, "loss": 13.4551, "step": 297010 }, { "epoch": 0.5999991919746926, "grad_norm": 148.23902893066406, "learning_rate": 4.23438091247768e-06, "loss": 14.7837, "step": 297020 }, { "epoch": 0.6000193926073765, "grad_norm": 696.6424560546875, "learning_rate": 4.234035965023077e-06, "loss": 26.9666, "step": 297030 }, { "epoch": 0.6000395932400603, "grad_norm": 356.2344665527344, "learning_rate": 4.233691021301689e-06, "loss": 23.3111, "step": 297040 }, { "epoch": 0.6000597938727441, "grad_norm": 86.95048522949219, "learning_rate": 4.233346081315197e-06, "loss": 35.9593, "step": 297050 }, { "epoch": 0.6000799945054279, "grad_norm": 360.9756164550781, "learning_rate": 4.233001145065286e-06, "loss": 14.1634, "step": 297060 }, { "epoch": 0.6001001951381117, "grad_norm": 111.36483001708984, "learning_rate": 4.232656212553631e-06, "loss": 13.5991, "step": 297070 }, { "epoch": 0.6001203957707956, "grad_norm": 334.063232421875, "learning_rate": 4.232311283781918e-06, "loss": 14.025, "step": 297080 }, { "epoch": 0.6001405964034794, "grad_norm": 393.69476318359375, "learning_rate": 4.231966358751828e-06, "loss": 14.9564, "step": 297090 }, { "epoch": 0.6001607970361632, "grad_norm": 512.2169799804688, "learning_rate": 4.23162143746504e-06, "loss": 23.9031, "step": 297100 }, { "epoch": 0.600180997668847, "grad_norm": 325.2622985839844, "learning_rate": 4.231276519923235e-06, "loss": 13.6719, "step": 297110 }, { "epoch": 0.6002011983015308, "grad_norm": 463.291015625, "learning_rate": 4.230931606128096e-06, "loss": 15.6742, "step": 297120 }, { "epoch": 0.6002213989342147, "grad_norm": 412.2735900878906, "learning_rate": 4.230586696081303e-06, "loss": 15.9979, "step": 297130 }, { "epoch": 0.6002415995668985, "grad_norm": 45.24440383911133, "learning_rate": 4.230241789784535e-06, "loss": 11.4448, "step": 297140 }, { "epoch": 0.6002618001995822, "grad_norm": 507.2142639160156, "learning_rate": 4.2298968872394784e-06, "loss": 15.8401, "step": 297150 }, { "epoch": 0.600282000832266, "grad_norm": 121.2529525756836, "learning_rate": 4.229551988447809e-06, "loss": 24.8569, "step": 297160 }, { "epoch": 0.6003022014649498, "grad_norm": 199.32620239257812, "learning_rate": 4.22920709341121e-06, "loss": 17.3647, "step": 297170 }, { "epoch": 0.6003224020976337, "grad_norm": 393.05078125, "learning_rate": 4.228862202131362e-06, "loss": 14.9084, "step": 297180 }, { "epoch": 0.6003426027303175, "grad_norm": 76.59890747070312, "learning_rate": 4.228517314609948e-06, "loss": 32.6611, "step": 297190 }, { "epoch": 0.6003628033630013, "grad_norm": 231.71075439453125, "learning_rate": 4.228172430848645e-06, "loss": 11.191, "step": 297200 }, { "epoch": 0.6003830039956851, "grad_norm": 376.9379577636719, "learning_rate": 4.227827550849136e-06, "loss": 27.0128, "step": 297210 }, { "epoch": 0.6004032046283689, "grad_norm": 0.0, "learning_rate": 4.227482674613103e-06, "loss": 21.0893, "step": 297220 }, { "epoch": 0.6004234052610528, "grad_norm": 182.09999084472656, "learning_rate": 4.227137802142225e-06, "loss": 13.9014, "step": 297230 }, { "epoch": 0.6004436058937366, "grad_norm": 324.2060852050781, "learning_rate": 4.226792933438183e-06, "loss": 35.4124, "step": 297240 }, { "epoch": 0.6004638065264204, "grad_norm": 375.556640625, "learning_rate": 4.226448068502661e-06, "loss": 7.6412, "step": 297250 }, { "epoch": 0.6004840071591042, "grad_norm": 547.1290283203125, "learning_rate": 4.2261032073373355e-06, "loss": 22.627, "step": 297260 }, { "epoch": 0.600504207791788, "grad_norm": 406.0616149902344, "learning_rate": 4.225758349943888e-06, "loss": 11.6514, "step": 297270 }, { "epoch": 0.6005244084244719, "grad_norm": 172.42800903320312, "learning_rate": 4.225413496324003e-06, "loss": 18.0903, "step": 297280 }, { "epoch": 0.6005446090571557, "grad_norm": 214.78427124023438, "learning_rate": 4.225068646479356e-06, "loss": 19.0836, "step": 297290 }, { "epoch": 0.6005648096898395, "grad_norm": 205.65557861328125, "learning_rate": 4.224723800411631e-06, "loss": 19.0035, "step": 297300 }, { "epoch": 0.6005850103225233, "grad_norm": 284.65228271484375, "learning_rate": 4.22437895812251e-06, "loss": 12.0778, "step": 297310 }, { "epoch": 0.6006052109552071, "grad_norm": 473.2984924316406, "learning_rate": 4.224034119613671e-06, "loss": 14.5974, "step": 297320 }, { "epoch": 0.600625411587891, "grad_norm": 207.19801330566406, "learning_rate": 4.223689284886795e-06, "loss": 25.6695, "step": 297330 }, { "epoch": 0.6006456122205748, "grad_norm": 459.6711730957031, "learning_rate": 4.223344453943562e-06, "loss": 19.7625, "step": 297340 }, { "epoch": 0.6006658128532586, "grad_norm": 246.1230010986328, "learning_rate": 4.222999626785658e-06, "loss": 16.0115, "step": 297350 }, { "epoch": 0.6006860134859424, "grad_norm": 775.7787475585938, "learning_rate": 4.2226548034147555e-06, "loss": 23.9877, "step": 297360 }, { "epoch": 0.6007062141186262, "grad_norm": 152.5539093017578, "learning_rate": 4.222309983832541e-06, "loss": 31.0656, "step": 297370 }, { "epoch": 0.6007264147513101, "grad_norm": 370.1913757324219, "learning_rate": 4.221965168040693e-06, "loss": 25.929, "step": 297380 }, { "epoch": 0.6007466153839939, "grad_norm": 282.4996643066406, "learning_rate": 4.221620356040892e-06, "loss": 27.7602, "step": 297390 }, { "epoch": 0.6007668160166777, "grad_norm": 241.69351196289062, "learning_rate": 4.22127554783482e-06, "loss": 29.7967, "step": 297400 }, { "epoch": 0.6007870166493614, "grad_norm": 120.89930725097656, "learning_rate": 4.220930743424157e-06, "loss": 19.5099, "step": 297410 }, { "epoch": 0.6008072172820452, "grad_norm": 349.36212158203125, "learning_rate": 4.220585942810582e-06, "loss": 12.5206, "step": 297420 }, { "epoch": 0.600827417914729, "grad_norm": 770.1076049804688, "learning_rate": 4.220241145995775e-06, "loss": 38.0541, "step": 297430 }, { "epoch": 0.6008476185474129, "grad_norm": 713.4065551757812, "learning_rate": 4.219896352981422e-06, "loss": 27.9212, "step": 297440 }, { "epoch": 0.6008678191800967, "grad_norm": 15.606483459472656, "learning_rate": 4.219551563769196e-06, "loss": 28.2083, "step": 297450 }, { "epoch": 0.6008880198127805, "grad_norm": 448.6654052734375, "learning_rate": 4.219206778360782e-06, "loss": 14.6773, "step": 297460 }, { "epoch": 0.6009082204454643, "grad_norm": 56.791412353515625, "learning_rate": 4.218861996757859e-06, "loss": 12.5887, "step": 297470 }, { "epoch": 0.6009284210781481, "grad_norm": 310.511474609375, "learning_rate": 4.218517218962111e-06, "loss": 12.9978, "step": 297480 }, { "epoch": 0.600948621710832, "grad_norm": 318.5117492675781, "learning_rate": 4.218172444975212e-06, "loss": 11.2139, "step": 297490 }, { "epoch": 0.6009688223435158, "grad_norm": 246.99830627441406, "learning_rate": 4.217827674798845e-06, "loss": 10.2034, "step": 297500 }, { "epoch": 0.6009890229761996, "grad_norm": 157.3436737060547, "learning_rate": 4.217482908434695e-06, "loss": 15.1918, "step": 297510 }, { "epoch": 0.6010092236088834, "grad_norm": 337.2286376953125, "learning_rate": 4.217138145884435e-06, "loss": 19.4948, "step": 297520 }, { "epoch": 0.6010294242415672, "grad_norm": 65.92939758300781, "learning_rate": 4.216793387149749e-06, "loss": 24.6283, "step": 297530 }, { "epoch": 0.6010496248742511, "grad_norm": 127.53124237060547, "learning_rate": 4.216448632232319e-06, "loss": 32.5427, "step": 297540 }, { "epoch": 0.6010698255069349, "grad_norm": 181.1674346923828, "learning_rate": 4.216103881133822e-06, "loss": 28.4412, "step": 297550 }, { "epoch": 0.6010900261396187, "grad_norm": 65.030029296875, "learning_rate": 4.21575913385594e-06, "loss": 6.2832, "step": 297560 }, { "epoch": 0.6011102267723025, "grad_norm": 441.56134033203125, "learning_rate": 4.215414390400353e-06, "loss": 30.8504, "step": 297570 }, { "epoch": 0.6011304274049863, "grad_norm": 675.4154663085938, "learning_rate": 4.21506965076874e-06, "loss": 37.3756, "step": 297580 }, { "epoch": 0.6011506280376702, "grad_norm": 433.27081298828125, "learning_rate": 4.2147249149627826e-06, "loss": 17.9339, "step": 297590 }, { "epoch": 0.601170828670354, "grad_norm": 445.5006408691406, "learning_rate": 4.2143801829841635e-06, "loss": 11.3348, "step": 297600 }, { "epoch": 0.6011910293030378, "grad_norm": 64.18819427490234, "learning_rate": 4.214035454834556e-06, "loss": 17.298, "step": 297610 }, { "epoch": 0.6012112299357216, "grad_norm": 417.6506042480469, "learning_rate": 4.213690730515646e-06, "loss": 19.1662, "step": 297620 }, { "epoch": 0.6012314305684054, "grad_norm": 265.9891662597656, "learning_rate": 4.213346010029112e-06, "loss": 15.67, "step": 297630 }, { "epoch": 0.6012516312010893, "grad_norm": 89.40283966064453, "learning_rate": 4.213001293376635e-06, "loss": 16.9218, "step": 297640 }, { "epoch": 0.6012718318337731, "grad_norm": 134.3324432373047, "learning_rate": 4.212656580559894e-06, "loss": 10.8143, "step": 297650 }, { "epoch": 0.6012920324664568, "grad_norm": 0.0, "learning_rate": 4.212311871580568e-06, "loss": 10.9761, "step": 297660 }, { "epoch": 0.6013122330991406, "grad_norm": 127.60708618164062, "learning_rate": 4.2119671664403404e-06, "loss": 11.1604, "step": 297670 }, { "epoch": 0.6013324337318244, "grad_norm": 307.857177734375, "learning_rate": 4.211622465140887e-06, "loss": 24.9658, "step": 297680 }, { "epoch": 0.6013526343645083, "grad_norm": 316.39849853515625, "learning_rate": 4.211277767683891e-06, "loss": 19.5383, "step": 297690 }, { "epoch": 0.6013728349971921, "grad_norm": 240.2183837890625, "learning_rate": 4.210933074071033e-06, "loss": 14.7879, "step": 297700 }, { "epoch": 0.6013930356298759, "grad_norm": 365.9661865234375, "learning_rate": 4.21058838430399e-06, "loss": 21.3423, "step": 297710 }, { "epoch": 0.6014132362625597, "grad_norm": 391.8970031738281, "learning_rate": 4.2102436983844435e-06, "loss": 37.3412, "step": 297720 }, { "epoch": 0.6014334368952435, "grad_norm": 495.6336364746094, "learning_rate": 4.209899016314075e-06, "loss": 14.5789, "step": 297730 }, { "epoch": 0.6014536375279274, "grad_norm": 226.6911163330078, "learning_rate": 4.209554338094561e-06, "loss": 9.694, "step": 297740 }, { "epoch": 0.6014738381606112, "grad_norm": 63.23526382446289, "learning_rate": 4.209209663727583e-06, "loss": 15.7997, "step": 297750 }, { "epoch": 0.601494038793295, "grad_norm": 213.0033721923828, "learning_rate": 4.208864993214821e-06, "loss": 26.7003, "step": 297760 }, { "epoch": 0.6015142394259788, "grad_norm": 138.12353515625, "learning_rate": 4.208520326557957e-06, "loss": 14.8954, "step": 297770 }, { "epoch": 0.6015344400586626, "grad_norm": 126.03828430175781, "learning_rate": 4.208175663758668e-06, "loss": 29.5105, "step": 297780 }, { "epoch": 0.6015546406913465, "grad_norm": 258.0828552246094, "learning_rate": 4.2078310048186345e-06, "loss": 32.3645, "step": 297790 }, { "epoch": 0.6015748413240303, "grad_norm": 179.9447021484375, "learning_rate": 4.207486349739538e-06, "loss": 10.1325, "step": 297800 }, { "epoch": 0.6015950419567141, "grad_norm": 649.5266723632812, "learning_rate": 4.207141698523055e-06, "loss": 33.9553, "step": 297810 }, { "epoch": 0.6016152425893979, "grad_norm": 387.7996826171875, "learning_rate": 4.206797051170867e-06, "loss": 22.851, "step": 297820 }, { "epoch": 0.6016354432220817, "grad_norm": 318.67510986328125, "learning_rate": 4.206452407684656e-06, "loss": 18.877, "step": 297830 }, { "epoch": 0.6016556438547656, "grad_norm": 318.50225830078125, "learning_rate": 4.206107768066099e-06, "loss": 12.5204, "step": 297840 }, { "epoch": 0.6016758444874494, "grad_norm": 495.54345703125, "learning_rate": 4.205763132316875e-06, "loss": 18.2418, "step": 297850 }, { "epoch": 0.6016960451201332, "grad_norm": 263.34796142578125, "learning_rate": 4.2054185004386675e-06, "loss": 15.1757, "step": 297860 }, { "epoch": 0.601716245752817, "grad_norm": 288.5061950683594, "learning_rate": 4.205073872433152e-06, "loss": 13.3205, "step": 297870 }, { "epoch": 0.6017364463855008, "grad_norm": 202.85061645507812, "learning_rate": 4.2047292483020096e-06, "loss": 15.6534, "step": 297880 }, { "epoch": 0.6017566470181847, "grad_norm": 313.70880126953125, "learning_rate": 4.204384628046924e-06, "loss": 18.961, "step": 297890 }, { "epoch": 0.6017768476508685, "grad_norm": 130.92532348632812, "learning_rate": 4.204040011669567e-06, "loss": 30.2752, "step": 297900 }, { "epoch": 0.6017970482835523, "grad_norm": 197.30050659179688, "learning_rate": 4.203695399171624e-06, "loss": 14.0073, "step": 297910 }, { "epoch": 0.601817248916236, "grad_norm": 104.5914077758789, "learning_rate": 4.203350790554773e-06, "loss": 9.0561, "step": 297920 }, { "epoch": 0.6018374495489198, "grad_norm": 165.4390411376953, "learning_rate": 4.203006185820695e-06, "loss": 18.1007, "step": 297930 }, { "epoch": 0.6018576501816036, "grad_norm": 369.489990234375, "learning_rate": 4.2026615849710665e-06, "loss": 21.0743, "step": 297940 }, { "epoch": 0.6018778508142875, "grad_norm": 285.8382263183594, "learning_rate": 4.202316988007568e-06, "loss": 20.0251, "step": 297950 }, { "epoch": 0.6018980514469713, "grad_norm": 197.68594360351562, "learning_rate": 4.201972394931883e-06, "loss": 11.9238, "step": 297960 }, { "epoch": 0.6019182520796551, "grad_norm": 137.123291015625, "learning_rate": 4.201627805745684e-06, "loss": 32.8179, "step": 297970 }, { "epoch": 0.6019384527123389, "grad_norm": 179.53021240234375, "learning_rate": 4.201283220450656e-06, "loss": 9.1352, "step": 297980 }, { "epoch": 0.6019586533450227, "grad_norm": 203.9075469970703, "learning_rate": 4.200938639048477e-06, "loss": 13.7623, "step": 297990 }, { "epoch": 0.6019788539777066, "grad_norm": 397.37786865234375, "learning_rate": 4.200594061540827e-06, "loss": 22.9023, "step": 298000 }, { "epoch": 0.6019990546103904, "grad_norm": 386.9990539550781, "learning_rate": 4.200249487929383e-06, "loss": 39.7382, "step": 298010 }, { "epoch": 0.6020192552430742, "grad_norm": 257.0818786621094, "learning_rate": 4.199904918215827e-06, "loss": 16.5778, "step": 298020 }, { "epoch": 0.602039455875758, "grad_norm": 301.31591796875, "learning_rate": 4.199560352401836e-06, "loss": 39.1892, "step": 298030 }, { "epoch": 0.6020596565084418, "grad_norm": 427.86865234375, "learning_rate": 4.199215790489091e-06, "loss": 18.7205, "step": 298040 }, { "epoch": 0.6020798571411257, "grad_norm": 405.53143310546875, "learning_rate": 4.198871232479274e-06, "loss": 17.5353, "step": 298050 }, { "epoch": 0.6021000577738095, "grad_norm": 398.2810363769531, "learning_rate": 4.1985266783740575e-06, "loss": 15.8978, "step": 298060 }, { "epoch": 0.6021202584064933, "grad_norm": 738.7849731445312, "learning_rate": 4.198182128175126e-06, "loss": 20.4751, "step": 298070 }, { "epoch": 0.6021404590391771, "grad_norm": 388.14129638671875, "learning_rate": 4.197837581884158e-06, "loss": 16.1196, "step": 298080 }, { "epoch": 0.6021606596718609, "grad_norm": 269.84478759765625, "learning_rate": 4.1974930395028325e-06, "loss": 28.0525, "step": 298090 }, { "epoch": 0.6021808603045448, "grad_norm": 152.56507873535156, "learning_rate": 4.197148501032829e-06, "loss": 18.6773, "step": 298100 }, { "epoch": 0.6022010609372286, "grad_norm": 538.8546142578125, "learning_rate": 4.1968039664758245e-06, "loss": 15.5587, "step": 298110 }, { "epoch": 0.6022212615699124, "grad_norm": 69.4065170288086, "learning_rate": 4.196459435833503e-06, "loss": 17.8487, "step": 298120 }, { "epoch": 0.6022414622025962, "grad_norm": 264.6071472167969, "learning_rate": 4.196114909107538e-06, "loss": 17.6078, "step": 298130 }, { "epoch": 0.60226166283528, "grad_norm": 242.25967407226562, "learning_rate": 4.195770386299612e-06, "loss": 11.3206, "step": 298140 }, { "epoch": 0.6022818634679639, "grad_norm": 180.07666015625, "learning_rate": 4.195425867411404e-06, "loss": 15.3392, "step": 298150 }, { "epoch": 0.6023020641006477, "grad_norm": 207.86135864257812, "learning_rate": 4.195081352444593e-06, "loss": 21.8768, "step": 298160 }, { "epoch": 0.6023222647333315, "grad_norm": 110.50847625732422, "learning_rate": 4.194736841400858e-06, "loss": 8.0854, "step": 298170 }, { "epoch": 0.6023424653660152, "grad_norm": 226.2045135498047, "learning_rate": 4.1943923342818785e-06, "loss": 13.1717, "step": 298180 }, { "epoch": 0.602362665998699, "grad_norm": 234.16796875, "learning_rate": 4.194047831089332e-06, "loss": 8.4753, "step": 298190 }, { "epoch": 0.6023828666313829, "grad_norm": 294.8853454589844, "learning_rate": 4.193703331824898e-06, "loss": 12.2121, "step": 298200 }, { "epoch": 0.6024030672640667, "grad_norm": 290.3738708496094, "learning_rate": 4.193358836490258e-06, "loss": 13.0586, "step": 298210 }, { "epoch": 0.6024232678967505, "grad_norm": 444.13482666015625, "learning_rate": 4.193014345087088e-06, "loss": 18.4882, "step": 298220 }, { "epoch": 0.6024434685294343, "grad_norm": 359.307861328125, "learning_rate": 4.192669857617068e-06, "loss": 16.4934, "step": 298230 }, { "epoch": 0.6024636691621181, "grad_norm": 213.1381378173828, "learning_rate": 4.192325374081877e-06, "loss": 13.3832, "step": 298240 }, { "epoch": 0.602483869794802, "grad_norm": 80.84144592285156, "learning_rate": 4.191980894483195e-06, "loss": 15.6522, "step": 298250 }, { "epoch": 0.6025040704274858, "grad_norm": 15.861350059509277, "learning_rate": 4.1916364188227e-06, "loss": 17.413, "step": 298260 }, { "epoch": 0.6025242710601696, "grad_norm": 218.122314453125, "learning_rate": 4.19129194710207e-06, "loss": 17.3236, "step": 298270 }, { "epoch": 0.6025444716928534, "grad_norm": 319.906005859375, "learning_rate": 4.190947479322988e-06, "loss": 12.1794, "step": 298280 }, { "epoch": 0.6025646723255372, "grad_norm": 281.9668273925781, "learning_rate": 4.190603015487126e-06, "loss": 21.3937, "step": 298290 }, { "epoch": 0.602584872958221, "grad_norm": 302.9158935546875, "learning_rate": 4.190258555596168e-06, "loss": 16.2498, "step": 298300 }, { "epoch": 0.6026050735909049, "grad_norm": 91.6068115234375, "learning_rate": 4.1899140996517934e-06, "loss": 13.1449, "step": 298310 }, { "epoch": 0.6026252742235887, "grad_norm": 315.924072265625, "learning_rate": 4.189569647655677e-06, "loss": 22.7317, "step": 298320 }, { "epoch": 0.6026454748562725, "grad_norm": 395.090576171875, "learning_rate": 4.189225199609501e-06, "loss": 15.7779, "step": 298330 }, { "epoch": 0.6026656754889563, "grad_norm": 309.2504577636719, "learning_rate": 4.188880755514944e-06, "loss": 21.6803, "step": 298340 }, { "epoch": 0.6026858761216402, "grad_norm": 125.37826538085938, "learning_rate": 4.1885363153736825e-06, "loss": 22.8309, "step": 298350 }, { "epoch": 0.602706076754324, "grad_norm": 179.2470245361328, "learning_rate": 4.188191879187395e-06, "loss": 16.2024, "step": 298360 }, { "epoch": 0.6027262773870078, "grad_norm": 403.6226501464844, "learning_rate": 4.187847446957763e-06, "loss": 21.749, "step": 298370 }, { "epoch": 0.6027464780196916, "grad_norm": 112.1532974243164, "learning_rate": 4.187503018686466e-06, "loss": 8.8174, "step": 298380 }, { "epoch": 0.6027666786523754, "grad_norm": 315.6024475097656, "learning_rate": 4.1871585943751795e-06, "loss": 28.044, "step": 298390 }, { "epoch": 0.6027868792850593, "grad_norm": 100.24488830566406, "learning_rate": 4.186814174025582e-06, "loss": 15.537, "step": 298400 }, { "epoch": 0.6028070799177431, "grad_norm": 197.50131225585938, "learning_rate": 4.186469757639356e-06, "loss": 23.9282, "step": 298410 }, { "epoch": 0.6028272805504269, "grad_norm": 0.0, "learning_rate": 4.186125345218177e-06, "loss": 10.389, "step": 298420 }, { "epoch": 0.6028474811831106, "grad_norm": 548.7827758789062, "learning_rate": 4.185780936763722e-06, "loss": 23.853, "step": 298430 }, { "epoch": 0.6028676818157944, "grad_norm": 65.2294921875, "learning_rate": 4.185436532277675e-06, "loss": 14.5715, "step": 298440 }, { "epoch": 0.6028878824484782, "grad_norm": 388.57470703125, "learning_rate": 4.18509213176171e-06, "loss": 22.9059, "step": 298450 }, { "epoch": 0.6029080830811621, "grad_norm": 273.25244140625, "learning_rate": 4.184747735217507e-06, "loss": 18.0269, "step": 298460 }, { "epoch": 0.6029282837138459, "grad_norm": 308.9242858886719, "learning_rate": 4.184403342646746e-06, "loss": 27.8424, "step": 298470 }, { "epoch": 0.6029484843465297, "grad_norm": 170.7335205078125, "learning_rate": 4.1840589540511035e-06, "loss": 13.1957, "step": 298480 }, { "epoch": 0.6029686849792135, "grad_norm": 53.684364318847656, "learning_rate": 4.183714569432259e-06, "loss": 16.3505, "step": 298490 }, { "epoch": 0.6029888856118973, "grad_norm": 284.6451416015625, "learning_rate": 4.183370188791891e-06, "loss": 22.2709, "step": 298500 }, { "epoch": 0.6030090862445812, "grad_norm": 342.7234802246094, "learning_rate": 4.183025812131674e-06, "loss": 16.7582, "step": 298510 }, { "epoch": 0.603029286877265, "grad_norm": 269.9075927734375, "learning_rate": 4.182681439453294e-06, "loss": 15.4676, "step": 298520 }, { "epoch": 0.6030494875099488, "grad_norm": 404.54595947265625, "learning_rate": 4.182337070758425e-06, "loss": 16.4276, "step": 298530 }, { "epoch": 0.6030696881426326, "grad_norm": 445.7948913574219, "learning_rate": 4.1819927060487454e-06, "loss": 15.3513, "step": 298540 }, { "epoch": 0.6030898887753164, "grad_norm": 214.68576049804688, "learning_rate": 4.181648345325934e-06, "loss": 17.4019, "step": 298550 }, { "epoch": 0.6031100894080003, "grad_norm": 39.058956146240234, "learning_rate": 4.181303988591669e-06, "loss": 9.1265, "step": 298560 }, { "epoch": 0.6031302900406841, "grad_norm": 379.4427185058594, "learning_rate": 4.1809596358476315e-06, "loss": 23.7962, "step": 298570 }, { "epoch": 0.6031504906733679, "grad_norm": 646.8310546875, "learning_rate": 4.180615287095494e-06, "loss": 20.3474, "step": 298580 }, { "epoch": 0.6031706913060517, "grad_norm": 29.26019859313965, "learning_rate": 4.180270942336939e-06, "loss": 19.192, "step": 298590 }, { "epoch": 0.6031908919387355, "grad_norm": 360.61480712890625, "learning_rate": 4.179926601573645e-06, "loss": 21.3761, "step": 298600 }, { "epoch": 0.6032110925714194, "grad_norm": 148.6192169189453, "learning_rate": 4.179582264807289e-06, "loss": 14.664, "step": 298610 }, { "epoch": 0.6032312932041032, "grad_norm": 73.40028381347656, "learning_rate": 4.17923793203955e-06, "loss": 12.3615, "step": 298620 }, { "epoch": 0.603251493836787, "grad_norm": 447.5791320800781, "learning_rate": 4.1788936032721065e-06, "loss": 23.0917, "step": 298630 }, { "epoch": 0.6032716944694708, "grad_norm": 200.035400390625, "learning_rate": 4.178549278506634e-06, "loss": 16.599, "step": 298640 }, { "epoch": 0.6032918951021546, "grad_norm": 237.63168334960938, "learning_rate": 4.178204957744812e-06, "loss": 14.1742, "step": 298650 }, { "epoch": 0.6033120957348385, "grad_norm": 216.46188354492188, "learning_rate": 4.177860640988323e-06, "loss": 18.4698, "step": 298660 }, { "epoch": 0.6033322963675223, "grad_norm": 119.4751968383789, "learning_rate": 4.177516328238838e-06, "loss": 20.364, "step": 298670 }, { "epoch": 0.6033524970002061, "grad_norm": 307.6964111328125, "learning_rate": 4.17717201949804e-06, "loss": 17.2583, "step": 298680 }, { "epoch": 0.6033726976328898, "grad_norm": 86.9405746459961, "learning_rate": 4.176827714767606e-06, "loss": 9.2858, "step": 298690 }, { "epoch": 0.6033928982655736, "grad_norm": 359.2953186035156, "learning_rate": 4.176483414049214e-06, "loss": 12.8209, "step": 298700 }, { "epoch": 0.6034130988982574, "grad_norm": 235.60838317871094, "learning_rate": 4.176139117344542e-06, "loss": 12.239, "step": 298710 }, { "epoch": 0.6034332995309413, "grad_norm": 293.3251037597656, "learning_rate": 4.175794824655266e-06, "loss": 15.0166, "step": 298720 }, { "epoch": 0.6034535001636251, "grad_norm": 237.31272888183594, "learning_rate": 4.17545053598307e-06, "loss": 25.8111, "step": 298730 }, { "epoch": 0.6034737007963089, "grad_norm": 165.53729248046875, "learning_rate": 4.1751062513296245e-06, "loss": 18.5105, "step": 298740 }, { "epoch": 0.6034939014289927, "grad_norm": 242.5736846923828, "learning_rate": 4.174761970696612e-06, "loss": 15.6253, "step": 298750 }, { "epoch": 0.6035141020616765, "grad_norm": 2694.142822265625, "learning_rate": 4.174417694085711e-06, "loss": 28.8639, "step": 298760 }, { "epoch": 0.6035343026943604, "grad_norm": 48.111297607421875, "learning_rate": 4.174073421498597e-06, "loss": 19.4476, "step": 298770 }, { "epoch": 0.6035545033270442, "grad_norm": 144.0858154296875, "learning_rate": 4.173729152936948e-06, "loss": 15.9993, "step": 298780 }, { "epoch": 0.603574703959728, "grad_norm": 1064.3560791015625, "learning_rate": 4.173384888402446e-06, "loss": 20.5819, "step": 298790 }, { "epoch": 0.6035949045924118, "grad_norm": 0.0, "learning_rate": 4.173040627896762e-06, "loss": 31.3088, "step": 298800 }, { "epoch": 0.6036151052250956, "grad_norm": 547.2920532226562, "learning_rate": 4.172696371421579e-06, "loss": 17.7626, "step": 298810 }, { "epoch": 0.6036353058577795, "grad_norm": 89.73358154296875, "learning_rate": 4.172352118978573e-06, "loss": 36.5414, "step": 298820 }, { "epoch": 0.6036555064904633, "grad_norm": 459.64044189453125, "learning_rate": 4.172007870569425e-06, "loss": 17.1486, "step": 298830 }, { "epoch": 0.6036757071231471, "grad_norm": 112.06534576416016, "learning_rate": 4.171663626195808e-06, "loss": 12.7199, "step": 298840 }, { "epoch": 0.6036959077558309, "grad_norm": 230.52272033691406, "learning_rate": 4.171319385859402e-06, "loss": 15.0083, "step": 298850 }, { "epoch": 0.6037161083885147, "grad_norm": 242.7662353515625, "learning_rate": 4.170975149561886e-06, "loss": 12.908, "step": 298860 }, { "epoch": 0.6037363090211986, "grad_norm": 522.4783325195312, "learning_rate": 4.170630917304935e-06, "loss": 24.3635, "step": 298870 }, { "epoch": 0.6037565096538824, "grad_norm": 263.58056640625, "learning_rate": 4.1702866890902285e-06, "loss": 17.6295, "step": 298880 }, { "epoch": 0.6037767102865662, "grad_norm": 206.5586395263672, "learning_rate": 4.169942464919446e-06, "loss": 22.0447, "step": 298890 }, { "epoch": 0.60379691091925, "grad_norm": 16.814697265625, "learning_rate": 4.169598244794261e-06, "loss": 16.5575, "step": 298900 }, { "epoch": 0.6038171115519338, "grad_norm": 449.1460266113281, "learning_rate": 4.169254028716355e-06, "loss": 29.2001, "step": 298910 }, { "epoch": 0.6038373121846177, "grad_norm": 170.6599884033203, "learning_rate": 4.1689098166874046e-06, "loss": 21.552, "step": 298920 }, { "epoch": 0.6038575128173015, "grad_norm": 113.89398956298828, "learning_rate": 4.168565608709085e-06, "loss": 14.5777, "step": 298930 }, { "epoch": 0.6038777134499852, "grad_norm": 382.1090393066406, "learning_rate": 4.168221404783076e-06, "loss": 21.6142, "step": 298940 }, { "epoch": 0.603897914082669, "grad_norm": 157.7279052734375, "learning_rate": 4.167877204911057e-06, "loss": 16.4709, "step": 298950 }, { "epoch": 0.6039181147153528, "grad_norm": 326.5459289550781, "learning_rate": 4.167533009094702e-06, "loss": 15.7896, "step": 298960 }, { "epoch": 0.6039383153480367, "grad_norm": 264.9243469238281, "learning_rate": 4.167188817335689e-06, "loss": 20.7495, "step": 298970 }, { "epoch": 0.6039585159807205, "grad_norm": 304.4413757324219, "learning_rate": 4.166844629635698e-06, "loss": 15.1924, "step": 298980 }, { "epoch": 0.6039787166134043, "grad_norm": 56.842220306396484, "learning_rate": 4.166500445996407e-06, "loss": 23.3079, "step": 298990 }, { "epoch": 0.6039989172460881, "grad_norm": 311.0484313964844, "learning_rate": 4.166156266419489e-06, "loss": 22.2205, "step": 299000 }, { "epoch": 0.6040191178787719, "grad_norm": 294.2660217285156, "learning_rate": 4.1658120909066255e-06, "loss": 16.3255, "step": 299010 }, { "epoch": 0.6040393185114558, "grad_norm": 363.77203369140625, "learning_rate": 4.165467919459493e-06, "loss": 15.1802, "step": 299020 }, { "epoch": 0.6040595191441396, "grad_norm": 395.3397216796875, "learning_rate": 4.165123752079768e-06, "loss": 25.9804, "step": 299030 }, { "epoch": 0.6040797197768234, "grad_norm": 188.2051239013672, "learning_rate": 4.1647795887691275e-06, "loss": 8.82, "step": 299040 }, { "epoch": 0.6040999204095072, "grad_norm": 295.6571044921875, "learning_rate": 4.164435429529253e-06, "loss": 19.396, "step": 299050 }, { "epoch": 0.604120121042191, "grad_norm": 356.2612609863281, "learning_rate": 4.164091274361815e-06, "loss": 15.7714, "step": 299060 }, { "epoch": 0.6041403216748749, "grad_norm": 335.6587219238281, "learning_rate": 4.163747123268497e-06, "loss": 15.0112, "step": 299070 }, { "epoch": 0.6041605223075587, "grad_norm": 386.6856689453125, "learning_rate": 4.1634029762509755e-06, "loss": 24.1372, "step": 299080 }, { "epoch": 0.6041807229402425, "grad_norm": 747.5763549804688, "learning_rate": 4.163058833310925e-06, "loss": 9.6639, "step": 299090 }, { "epoch": 0.6042009235729263, "grad_norm": 461.94500732421875, "learning_rate": 4.162714694450023e-06, "loss": 31.152, "step": 299100 }, { "epoch": 0.6042211242056101, "grad_norm": 201.0620880126953, "learning_rate": 4.16237055966995e-06, "loss": 20.3706, "step": 299110 }, { "epoch": 0.604241324838294, "grad_norm": 373.2374267578125, "learning_rate": 4.16202642897238e-06, "loss": 17.1602, "step": 299120 }, { "epoch": 0.6042615254709778, "grad_norm": 218.09156799316406, "learning_rate": 4.161682302358991e-06, "loss": 17.4732, "step": 299130 }, { "epoch": 0.6042817261036616, "grad_norm": 314.21002197265625, "learning_rate": 4.161338179831461e-06, "loss": 35.9937, "step": 299140 }, { "epoch": 0.6043019267363454, "grad_norm": 352.2030944824219, "learning_rate": 4.160994061391469e-06, "loss": 26.4133, "step": 299150 }, { "epoch": 0.6043221273690292, "grad_norm": 339.5334167480469, "learning_rate": 4.1606499470406885e-06, "loss": 29.524, "step": 299160 }, { "epoch": 0.6043423280017131, "grad_norm": 281.4578552246094, "learning_rate": 4.1603058367807986e-06, "loss": 21.2211, "step": 299170 }, { "epoch": 0.6043625286343969, "grad_norm": 360.1448669433594, "learning_rate": 4.159961730613478e-06, "loss": 13.3571, "step": 299180 }, { "epoch": 0.6043827292670807, "grad_norm": 38.991703033447266, "learning_rate": 4.1596176285403985e-06, "loss": 23.4387, "step": 299190 }, { "epoch": 0.6044029298997644, "grad_norm": 121.9236068725586, "learning_rate": 4.159273530563243e-06, "loss": 8.9618, "step": 299200 }, { "epoch": 0.6044231305324482, "grad_norm": 161.3202667236328, "learning_rate": 4.158929436683687e-06, "loss": 9.6468, "step": 299210 }, { "epoch": 0.604443331165132, "grad_norm": 204.0862579345703, "learning_rate": 4.158585346903405e-06, "loss": 15.9803, "step": 299220 }, { "epoch": 0.6044635317978159, "grad_norm": 248.75294494628906, "learning_rate": 4.1582412612240765e-06, "loss": 20.2998, "step": 299230 }, { "epoch": 0.6044837324304997, "grad_norm": 313.873779296875, "learning_rate": 4.157897179647379e-06, "loss": 16.6422, "step": 299240 }, { "epoch": 0.6045039330631835, "grad_norm": 185.78738403320312, "learning_rate": 4.157553102174988e-06, "loss": 18.3535, "step": 299250 }, { "epoch": 0.6045241336958673, "grad_norm": 235.48483276367188, "learning_rate": 4.15720902880858e-06, "loss": 29.2059, "step": 299260 }, { "epoch": 0.6045443343285511, "grad_norm": 299.7046813964844, "learning_rate": 4.156864959549833e-06, "loss": 25.1183, "step": 299270 }, { "epoch": 0.604564534961235, "grad_norm": 519.5421752929688, "learning_rate": 4.156520894400426e-06, "loss": 22.1145, "step": 299280 }, { "epoch": 0.6045847355939188, "grad_norm": 261.6352233886719, "learning_rate": 4.156176833362032e-06, "loss": 17.8663, "step": 299290 }, { "epoch": 0.6046049362266026, "grad_norm": 346.15753173828125, "learning_rate": 4.155832776436331e-06, "loss": 30.9108, "step": 299300 }, { "epoch": 0.6046251368592864, "grad_norm": 68.79360961914062, "learning_rate": 4.155488723624999e-06, "loss": 12.1505, "step": 299310 }, { "epoch": 0.6046453374919702, "grad_norm": 339.2813415527344, "learning_rate": 4.1551446749297104e-06, "loss": 24.4821, "step": 299320 }, { "epoch": 0.6046655381246541, "grad_norm": 417.6661682128906, "learning_rate": 4.154800630352145e-06, "loss": 13.9035, "step": 299330 }, { "epoch": 0.6046857387573379, "grad_norm": 256.5859069824219, "learning_rate": 4.154456589893981e-06, "loss": 13.483, "step": 299340 }, { "epoch": 0.6047059393900217, "grad_norm": 355.495849609375, "learning_rate": 4.15411255355689e-06, "loss": 17.0099, "step": 299350 }, { "epoch": 0.6047261400227055, "grad_norm": 97.88721466064453, "learning_rate": 4.153768521342552e-06, "loss": 7.5028, "step": 299360 }, { "epoch": 0.6047463406553893, "grad_norm": 384.6266784667969, "learning_rate": 4.153424493252646e-06, "loss": 25.203, "step": 299370 }, { "epoch": 0.6047665412880732, "grad_norm": 172.7843780517578, "learning_rate": 4.153080469288845e-06, "loss": 9.6276, "step": 299380 }, { "epoch": 0.604786741920757, "grad_norm": 278.9251403808594, "learning_rate": 4.152736449452827e-06, "loss": 30.1538, "step": 299390 }, { "epoch": 0.6048069425534408, "grad_norm": 508.84649658203125, "learning_rate": 4.15239243374627e-06, "loss": 20.9205, "step": 299400 }, { "epoch": 0.6048271431861246, "grad_norm": 628.2328491210938, "learning_rate": 4.152048422170848e-06, "loss": 30.4133, "step": 299410 }, { "epoch": 0.6048473438188084, "grad_norm": 834.5307006835938, "learning_rate": 4.151704414728238e-06, "loss": 23.9627, "step": 299420 }, { "epoch": 0.6048675444514923, "grad_norm": 432.1824951171875, "learning_rate": 4.151360411420119e-06, "loss": 17.0301, "step": 299430 }, { "epoch": 0.6048877450841761, "grad_norm": 0.0, "learning_rate": 4.1510164122481675e-06, "loss": 8.8541, "step": 299440 }, { "epoch": 0.6049079457168599, "grad_norm": 268.0205078125, "learning_rate": 4.150672417214058e-06, "loss": 9.6362, "step": 299450 }, { "epoch": 0.6049281463495436, "grad_norm": 254.6553955078125, "learning_rate": 4.150328426319469e-06, "loss": 19.6216, "step": 299460 }, { "epoch": 0.6049483469822274, "grad_norm": 202.44830322265625, "learning_rate": 4.149984439566076e-06, "loss": 33.5729, "step": 299470 }, { "epoch": 0.6049685476149113, "grad_norm": 222.9231414794922, "learning_rate": 4.149640456955555e-06, "loss": 19.3813, "step": 299480 }, { "epoch": 0.6049887482475951, "grad_norm": 413.1158142089844, "learning_rate": 4.149296478489583e-06, "loss": 33.4985, "step": 299490 }, { "epoch": 0.6050089488802789, "grad_norm": 367.1344299316406, "learning_rate": 4.148952504169839e-06, "loss": 13.7993, "step": 299500 }, { "epoch": 0.6050291495129627, "grad_norm": 440.9512023925781, "learning_rate": 4.1486085339979944e-06, "loss": 15.2196, "step": 299510 }, { "epoch": 0.6050493501456465, "grad_norm": 347.9075622558594, "learning_rate": 4.148264567975729e-06, "loss": 29.8938, "step": 299520 }, { "epoch": 0.6050695507783304, "grad_norm": 255.6634521484375, "learning_rate": 4.1479206061047205e-06, "loss": 25.6566, "step": 299530 }, { "epoch": 0.6050897514110142, "grad_norm": 370.9665222167969, "learning_rate": 4.147576648386643e-06, "loss": 18.986, "step": 299540 }, { "epoch": 0.605109952043698, "grad_norm": 112.6641616821289, "learning_rate": 4.147232694823173e-06, "loss": 9.7901, "step": 299550 }, { "epoch": 0.6051301526763818, "grad_norm": 7.952662467956543, "learning_rate": 4.146888745415988e-06, "loss": 27.058, "step": 299560 }, { "epoch": 0.6051503533090656, "grad_norm": 148.5594482421875, "learning_rate": 4.146544800166764e-06, "loss": 16.0661, "step": 299570 }, { "epoch": 0.6051705539417495, "grad_norm": 86.84959411621094, "learning_rate": 4.146200859077175e-06, "loss": 23.0521, "step": 299580 }, { "epoch": 0.6051907545744333, "grad_norm": 281.81976318359375, "learning_rate": 4.1458569221489006e-06, "loss": 9.2872, "step": 299590 }, { "epoch": 0.6052109552071171, "grad_norm": 283.1622619628906, "learning_rate": 4.145512989383618e-06, "loss": 17.2309, "step": 299600 }, { "epoch": 0.6052311558398009, "grad_norm": 206.9228515625, "learning_rate": 4.1451690607829995e-06, "loss": 13.6025, "step": 299610 }, { "epoch": 0.6052513564724847, "grad_norm": 81.89398956298828, "learning_rate": 4.144825136348724e-06, "loss": 18.1361, "step": 299620 }, { "epoch": 0.6052715571051686, "grad_norm": 113.26103210449219, "learning_rate": 4.144481216082467e-06, "loss": 15.0553, "step": 299630 }, { "epoch": 0.6052917577378524, "grad_norm": 159.92906188964844, "learning_rate": 4.1441372999859046e-06, "loss": 18.7581, "step": 299640 }, { "epoch": 0.6053119583705362, "grad_norm": 141.55226135253906, "learning_rate": 4.143793388060712e-06, "loss": 19.1576, "step": 299650 }, { "epoch": 0.60533215900322, "grad_norm": 293.8300476074219, "learning_rate": 4.143449480308569e-06, "loss": 22.7291, "step": 299660 }, { "epoch": 0.6053523596359038, "grad_norm": 287.6141357421875, "learning_rate": 4.143105576731147e-06, "loss": 16.1541, "step": 299670 }, { "epoch": 0.6053725602685877, "grad_norm": 78.10763549804688, "learning_rate": 4.1427616773301245e-06, "loss": 18.3547, "step": 299680 }, { "epoch": 0.6053927609012715, "grad_norm": 171.9502410888672, "learning_rate": 4.14241778210718e-06, "loss": 20.5227, "step": 299690 }, { "epoch": 0.6054129615339553, "grad_norm": 548.6365356445312, "learning_rate": 4.142073891063986e-06, "loss": 23.6436, "step": 299700 }, { "epoch": 0.605433162166639, "grad_norm": 167.31234741210938, "learning_rate": 4.1417300042022195e-06, "loss": 12.2588, "step": 299710 }, { "epoch": 0.6054533627993228, "grad_norm": 128.97158813476562, "learning_rate": 4.141386121523558e-06, "loss": 27.1315, "step": 299720 }, { "epoch": 0.6054735634320066, "grad_norm": 324.4867248535156, "learning_rate": 4.141042243029675e-06, "loss": 19.0655, "step": 299730 }, { "epoch": 0.6054937640646905, "grad_norm": 218.18421936035156, "learning_rate": 4.140698368722247e-06, "loss": 21.3062, "step": 299740 }, { "epoch": 0.6055139646973743, "grad_norm": 387.9167785644531, "learning_rate": 4.140354498602952e-06, "loss": 26.2181, "step": 299750 }, { "epoch": 0.6055341653300581, "grad_norm": 57.9122200012207, "learning_rate": 4.140010632673466e-06, "loss": 21.8963, "step": 299760 }, { "epoch": 0.6055543659627419, "grad_norm": 578.0679931640625, "learning_rate": 4.139666770935463e-06, "loss": 21.6686, "step": 299770 }, { "epoch": 0.6055745665954257, "grad_norm": 343.6753234863281, "learning_rate": 4.13932291339062e-06, "loss": 25.0862, "step": 299780 }, { "epoch": 0.6055947672281096, "grad_norm": 317.97906494140625, "learning_rate": 4.138979060040613e-06, "loss": 24.9135, "step": 299790 }, { "epoch": 0.6056149678607934, "grad_norm": 47.888999938964844, "learning_rate": 4.138635210887117e-06, "loss": 11.6634, "step": 299800 }, { "epoch": 0.6056351684934772, "grad_norm": 475.27032470703125, "learning_rate": 4.138291365931808e-06, "loss": 27.6815, "step": 299810 }, { "epoch": 0.605655369126161, "grad_norm": 487.06427001953125, "learning_rate": 4.137947525176364e-06, "loss": 15.9978, "step": 299820 }, { "epoch": 0.6056755697588448, "grad_norm": 19.037853240966797, "learning_rate": 4.137603688622458e-06, "loss": 14.7426, "step": 299830 }, { "epoch": 0.6056957703915287, "grad_norm": 111.6003189086914, "learning_rate": 4.137259856271767e-06, "loss": 15.0347, "step": 299840 }, { "epoch": 0.6057159710242125, "grad_norm": 533.7274780273438, "learning_rate": 4.1369160281259685e-06, "loss": 29.8628, "step": 299850 }, { "epoch": 0.6057361716568963, "grad_norm": 108.32072448730469, "learning_rate": 4.136572204186735e-06, "loss": 11.2165, "step": 299860 }, { "epoch": 0.6057563722895801, "grad_norm": 158.6745147705078, "learning_rate": 4.136228384455743e-06, "loss": 13.7041, "step": 299870 }, { "epoch": 0.605776572922264, "grad_norm": 162.4469757080078, "learning_rate": 4.13588456893467e-06, "loss": 18.4188, "step": 299880 }, { "epoch": 0.6057967735549478, "grad_norm": 460.8686218261719, "learning_rate": 4.1355407576251925e-06, "loss": 12.5849, "step": 299890 }, { "epoch": 0.6058169741876316, "grad_norm": 24.64805030822754, "learning_rate": 4.135196950528982e-06, "loss": 11.0722, "step": 299900 }, { "epoch": 0.6058371748203154, "grad_norm": 261.74261474609375, "learning_rate": 4.134853147647718e-06, "loss": 18.3054, "step": 299910 }, { "epoch": 0.6058573754529992, "grad_norm": 298.7730712890625, "learning_rate": 4.134509348983075e-06, "loss": 28.1828, "step": 299920 }, { "epoch": 0.605877576085683, "grad_norm": 496.96942138671875, "learning_rate": 4.134165554536728e-06, "loss": 16.1066, "step": 299930 }, { "epoch": 0.6058977767183669, "grad_norm": 4.797776699066162, "learning_rate": 4.133821764310352e-06, "loss": 25.5422, "step": 299940 }, { "epoch": 0.6059179773510507, "grad_norm": 319.41412353515625, "learning_rate": 4.133477978305626e-06, "loss": 26.4573, "step": 299950 }, { "epoch": 0.6059381779837345, "grad_norm": 195.1714630126953, "learning_rate": 4.133134196524221e-06, "loss": 18.2252, "step": 299960 }, { "epoch": 0.6059583786164182, "grad_norm": 180.11248779296875, "learning_rate": 4.132790418967816e-06, "loss": 26.4597, "step": 299970 }, { "epoch": 0.605978579249102, "grad_norm": 256.36334228515625, "learning_rate": 4.132446645638086e-06, "loss": 15.9536, "step": 299980 }, { "epoch": 0.6059987798817859, "grad_norm": 461.35845947265625, "learning_rate": 4.132102876536705e-06, "loss": 26.0206, "step": 299990 }, { "epoch": 0.6060189805144697, "grad_norm": 206.73876953125, "learning_rate": 4.131759111665349e-06, "loss": 16.1074, "step": 300000 }, { "epoch": 0.6060391811471535, "grad_norm": 341.3198547363281, "learning_rate": 4.131415351025695e-06, "loss": 10.7795, "step": 300010 }, { "epoch": 0.6060593817798373, "grad_norm": 606.48828125, "learning_rate": 4.131071594619416e-06, "loss": 34.6457, "step": 300020 }, { "epoch": 0.6060795824125211, "grad_norm": 148.39987182617188, "learning_rate": 4.130727842448187e-06, "loss": 12.0254, "step": 300030 }, { "epoch": 0.606099783045205, "grad_norm": 285.3895568847656, "learning_rate": 4.130384094513688e-06, "loss": 10.1689, "step": 300040 }, { "epoch": 0.6061199836778888, "grad_norm": 349.76593017578125, "learning_rate": 4.13004035081759e-06, "loss": 24.8115, "step": 300050 }, { "epoch": 0.6061401843105726, "grad_norm": 154.22274780273438, "learning_rate": 4.1296966113615705e-06, "loss": 17.9606, "step": 300060 }, { "epoch": 0.6061603849432564, "grad_norm": 153.65208435058594, "learning_rate": 4.129352876147304e-06, "loss": 27.0558, "step": 300070 }, { "epoch": 0.6061805855759402, "grad_norm": 199.4445037841797, "learning_rate": 4.129009145176467e-06, "loss": 22.0663, "step": 300080 }, { "epoch": 0.606200786208624, "grad_norm": 294.278076171875, "learning_rate": 4.128665418450732e-06, "loss": 17.1538, "step": 300090 }, { "epoch": 0.6062209868413079, "grad_norm": 321.5392761230469, "learning_rate": 4.128321695971775e-06, "loss": 6.98, "step": 300100 }, { "epoch": 0.6062411874739917, "grad_norm": 505.0638122558594, "learning_rate": 4.127977977741277e-06, "loss": 25.9787, "step": 300110 }, { "epoch": 0.6062613881066755, "grad_norm": 4.8182783126831055, "learning_rate": 4.127634263760904e-06, "loss": 24.2243, "step": 300120 }, { "epoch": 0.6062815887393593, "grad_norm": 266.6304931640625, "learning_rate": 4.127290554032337e-06, "loss": 9.2841, "step": 300130 }, { "epoch": 0.6063017893720432, "grad_norm": 41.385501861572266, "learning_rate": 4.126946848557252e-06, "loss": 21.6303, "step": 300140 }, { "epoch": 0.606321990004727, "grad_norm": 229.08641052246094, "learning_rate": 4.12660314733732e-06, "loss": 18.6331, "step": 300150 }, { "epoch": 0.6063421906374108, "grad_norm": 124.74713897705078, "learning_rate": 4.126259450374219e-06, "loss": 12.6173, "step": 300160 }, { "epoch": 0.6063623912700946, "grad_norm": 389.20379638671875, "learning_rate": 4.125915757669624e-06, "loss": 17.8154, "step": 300170 }, { "epoch": 0.6063825919027784, "grad_norm": 452.2779846191406, "learning_rate": 4.1255720692252084e-06, "loss": 24.1252, "step": 300180 }, { "epoch": 0.6064027925354623, "grad_norm": 352.031005859375, "learning_rate": 4.125228385042648e-06, "loss": 12.9192, "step": 300190 }, { "epoch": 0.6064229931681461, "grad_norm": 276.6169738769531, "learning_rate": 4.124884705123619e-06, "loss": 24.7772, "step": 300200 }, { "epoch": 0.6064431938008299, "grad_norm": 291.8409729003906, "learning_rate": 4.124541029469798e-06, "loss": 21.3639, "step": 300210 }, { "epoch": 0.6064633944335136, "grad_norm": 123.80675506591797, "learning_rate": 4.124197358082855e-06, "loss": 17.3767, "step": 300220 }, { "epoch": 0.6064835950661974, "grad_norm": 287.76849365234375, "learning_rate": 4.12385369096447e-06, "loss": 17.559, "step": 300230 }, { "epoch": 0.6065037956988812, "grad_norm": 376.82403564453125, "learning_rate": 4.123510028116315e-06, "loss": 26.9433, "step": 300240 }, { "epoch": 0.6065239963315651, "grad_norm": 8.99550724029541, "learning_rate": 4.123166369540066e-06, "loss": 17.6227, "step": 300250 }, { "epoch": 0.6065441969642489, "grad_norm": 459.0662841796875, "learning_rate": 4.1228227152373955e-06, "loss": 17.2209, "step": 300260 }, { "epoch": 0.6065643975969327, "grad_norm": 41.32606506347656, "learning_rate": 4.122479065209984e-06, "loss": 19.1101, "step": 300270 }, { "epoch": 0.6065845982296165, "grad_norm": 402.6487731933594, "learning_rate": 4.122135419459501e-06, "loss": 23.5126, "step": 300280 }, { "epoch": 0.6066047988623003, "grad_norm": 348.66741943359375, "learning_rate": 4.121791777987624e-06, "loss": 17.9, "step": 300290 }, { "epoch": 0.6066249994949842, "grad_norm": 397.2342529296875, "learning_rate": 4.121448140796029e-06, "loss": 16.6792, "step": 300300 }, { "epoch": 0.606645200127668, "grad_norm": 354.0387878417969, "learning_rate": 4.121104507886387e-06, "loss": 20.7347, "step": 300310 }, { "epoch": 0.6066654007603518, "grad_norm": 261.2240905761719, "learning_rate": 4.120760879260375e-06, "loss": 19.7656, "step": 300320 }, { "epoch": 0.6066856013930356, "grad_norm": 5.178500652313232, "learning_rate": 4.120417254919668e-06, "loss": 17.8664, "step": 300330 }, { "epoch": 0.6067058020257194, "grad_norm": 250.2174530029297, "learning_rate": 4.120073634865943e-06, "loss": 13.2032, "step": 300340 }, { "epoch": 0.6067260026584033, "grad_norm": 52.58137512207031, "learning_rate": 4.119730019100869e-06, "loss": 20.1613, "step": 300350 }, { "epoch": 0.6067462032910871, "grad_norm": 446.14715576171875, "learning_rate": 4.119386407626126e-06, "loss": 30.0472, "step": 300360 }, { "epoch": 0.6067664039237709, "grad_norm": 295.0727844238281, "learning_rate": 4.119042800443387e-06, "loss": 11.5798, "step": 300370 }, { "epoch": 0.6067866045564547, "grad_norm": 193.0285186767578, "learning_rate": 4.118699197554327e-06, "loss": 16.2283, "step": 300380 }, { "epoch": 0.6068068051891385, "grad_norm": 169.37954711914062, "learning_rate": 4.118355598960619e-06, "loss": 9.0736, "step": 300390 }, { "epoch": 0.6068270058218224, "grad_norm": 335.3083801269531, "learning_rate": 4.118012004663939e-06, "loss": 17.4046, "step": 300400 }, { "epoch": 0.6068472064545062, "grad_norm": 395.8465576171875, "learning_rate": 4.117668414665962e-06, "loss": 27.8119, "step": 300410 }, { "epoch": 0.60686740708719, "grad_norm": 502.36309814453125, "learning_rate": 4.117324828968361e-06, "loss": 18.3799, "step": 300420 }, { "epoch": 0.6068876077198738, "grad_norm": 330.7184143066406, "learning_rate": 4.116981247572814e-06, "loss": 15.7266, "step": 300430 }, { "epoch": 0.6069078083525576, "grad_norm": 322.1072082519531, "learning_rate": 4.11663767048099e-06, "loss": 28.6408, "step": 300440 }, { "epoch": 0.6069280089852415, "grad_norm": 128.4069061279297, "learning_rate": 4.1162940976945695e-06, "loss": 12.5329, "step": 300450 }, { "epoch": 0.6069482096179253, "grad_norm": 137.80978393554688, "learning_rate": 4.115950529215225e-06, "loss": 19.9789, "step": 300460 }, { "epoch": 0.6069684102506091, "grad_norm": 341.0010986328125, "learning_rate": 4.115606965044628e-06, "loss": 23.4594, "step": 300470 }, { "epoch": 0.6069886108832928, "grad_norm": 606.8335571289062, "learning_rate": 4.115263405184456e-06, "loss": 22.3263, "step": 300480 }, { "epoch": 0.6070088115159766, "grad_norm": 311.31781005859375, "learning_rate": 4.114919849636383e-06, "loss": 18.7267, "step": 300490 }, { "epoch": 0.6070290121486605, "grad_norm": 483.5519714355469, "learning_rate": 4.114576298402085e-06, "loss": 15.8881, "step": 300500 }, { "epoch": 0.6070492127813443, "grad_norm": 173.2476043701172, "learning_rate": 4.1142327514832326e-06, "loss": 23.3384, "step": 300510 }, { "epoch": 0.6070694134140281, "grad_norm": 328.857421875, "learning_rate": 4.1138892088815025e-06, "loss": 21.7299, "step": 300520 }, { "epoch": 0.6070896140467119, "grad_norm": 400.26678466796875, "learning_rate": 4.113545670598571e-06, "loss": 19.0921, "step": 300530 }, { "epoch": 0.6071098146793957, "grad_norm": 18.40981674194336, "learning_rate": 4.113202136636108e-06, "loss": 11.3929, "step": 300540 }, { "epoch": 0.6071300153120796, "grad_norm": 14.648477554321289, "learning_rate": 4.11285860699579e-06, "loss": 14.6017, "step": 300550 }, { "epoch": 0.6071502159447634, "grad_norm": 378.0171813964844, "learning_rate": 4.112515081679295e-06, "loss": 13.1383, "step": 300560 }, { "epoch": 0.6071704165774472, "grad_norm": 155.9915771484375, "learning_rate": 4.112171560688289e-06, "loss": 16.5142, "step": 300570 }, { "epoch": 0.607190617210131, "grad_norm": 287.2132568359375, "learning_rate": 4.111828044024454e-06, "loss": 28.7824, "step": 300580 }, { "epoch": 0.6072108178428148, "grad_norm": 83.69737243652344, "learning_rate": 4.111484531689462e-06, "loss": 11.8827, "step": 300590 }, { "epoch": 0.6072310184754987, "grad_norm": 406.6898193359375, "learning_rate": 4.111141023684986e-06, "loss": 15.2298, "step": 300600 }, { "epoch": 0.6072512191081825, "grad_norm": 398.8311462402344, "learning_rate": 4.1107975200126996e-06, "loss": 19.8289, "step": 300610 }, { "epoch": 0.6072714197408663, "grad_norm": 376.7922058105469, "learning_rate": 4.11045402067428e-06, "loss": 17.9475, "step": 300620 }, { "epoch": 0.6072916203735501, "grad_norm": 292.08026123046875, "learning_rate": 4.110110525671399e-06, "loss": 18.5191, "step": 300630 }, { "epoch": 0.6073118210062339, "grad_norm": 310.5513000488281, "learning_rate": 4.109767035005729e-06, "loss": 16.9888, "step": 300640 }, { "epoch": 0.6073320216389178, "grad_norm": 225.32894897460938, "learning_rate": 4.109423548678949e-06, "loss": 10.6479, "step": 300650 }, { "epoch": 0.6073522222716016, "grad_norm": 153.35182189941406, "learning_rate": 4.109080066692731e-06, "loss": 15.9965, "step": 300660 }, { "epoch": 0.6073724229042854, "grad_norm": 382.4284362792969, "learning_rate": 4.108736589048748e-06, "loss": 27.3892, "step": 300670 }, { "epoch": 0.6073926235369692, "grad_norm": 114.98696899414062, "learning_rate": 4.108393115748675e-06, "loss": 16.9811, "step": 300680 }, { "epoch": 0.607412824169653, "grad_norm": 302.2495422363281, "learning_rate": 4.108049646794186e-06, "loss": 19.9876, "step": 300690 }, { "epoch": 0.6074330248023369, "grad_norm": 171.1812286376953, "learning_rate": 4.107706182186954e-06, "loss": 18.4269, "step": 300700 }, { "epoch": 0.6074532254350207, "grad_norm": 314.04656982421875, "learning_rate": 4.107362721928653e-06, "loss": 17.8302, "step": 300710 }, { "epoch": 0.6074734260677045, "grad_norm": 291.04498291015625, "learning_rate": 4.107019266020961e-06, "loss": 12.3143, "step": 300720 }, { "epoch": 0.6074936267003882, "grad_norm": 341.0296936035156, "learning_rate": 4.106675814465545e-06, "loss": 16.3341, "step": 300730 }, { "epoch": 0.607513827333072, "grad_norm": 267.7034606933594, "learning_rate": 4.106332367264085e-06, "loss": 14.0231, "step": 300740 }, { "epoch": 0.6075340279657558, "grad_norm": 276.8511047363281, "learning_rate": 4.105988924418252e-06, "loss": 17.9352, "step": 300750 }, { "epoch": 0.6075542285984397, "grad_norm": 351.1433410644531, "learning_rate": 4.105645485929721e-06, "loss": 9.0568, "step": 300760 }, { "epoch": 0.6075744292311235, "grad_norm": 18.9317569732666, "learning_rate": 4.105302051800166e-06, "loss": 14.2675, "step": 300770 }, { "epoch": 0.6075946298638073, "grad_norm": 89.34229278564453, "learning_rate": 4.1049586220312594e-06, "loss": 49.4083, "step": 300780 }, { "epoch": 0.6076148304964911, "grad_norm": 193.96336364746094, "learning_rate": 4.104615196624676e-06, "loss": 17.097, "step": 300790 }, { "epoch": 0.6076350311291749, "grad_norm": 292.1412048339844, "learning_rate": 4.104271775582089e-06, "loss": 26.9367, "step": 300800 }, { "epoch": 0.6076552317618588, "grad_norm": 124.62691497802734, "learning_rate": 4.103928358905173e-06, "loss": 9.0844, "step": 300810 }, { "epoch": 0.6076754323945426, "grad_norm": 443.4271545410156, "learning_rate": 4.1035849465956024e-06, "loss": 34.6635, "step": 300820 }, { "epoch": 0.6076956330272264, "grad_norm": 153.2001190185547, "learning_rate": 4.103241538655049e-06, "loss": 14.2057, "step": 300830 }, { "epoch": 0.6077158336599102, "grad_norm": 85.39923095703125, "learning_rate": 4.1028981350851885e-06, "loss": 6.7408, "step": 300840 }, { "epoch": 0.607736034292594, "grad_norm": 254.2056121826172, "learning_rate": 4.102554735887694e-06, "loss": 22.4262, "step": 300850 }, { "epoch": 0.6077562349252779, "grad_norm": 157.08892822265625, "learning_rate": 4.102211341064237e-06, "loss": 15.5454, "step": 300860 }, { "epoch": 0.6077764355579617, "grad_norm": 83.6943359375, "learning_rate": 4.101867950616493e-06, "loss": 13.6504, "step": 300870 }, { "epoch": 0.6077966361906455, "grad_norm": 21.22698211669922, "learning_rate": 4.101524564546139e-06, "loss": 15.1054, "step": 300880 }, { "epoch": 0.6078168368233293, "grad_norm": 282.6283874511719, "learning_rate": 4.101181182854841e-06, "loss": 13.0693, "step": 300890 }, { "epoch": 0.6078370374560131, "grad_norm": 587.0730590820312, "learning_rate": 4.100837805544279e-06, "loss": 27.8729, "step": 300900 }, { "epoch": 0.607857238088697, "grad_norm": 46.59575271606445, "learning_rate": 4.100494432616126e-06, "loss": 15.5406, "step": 300910 }, { "epoch": 0.6078774387213808, "grad_norm": 327.95355224609375, "learning_rate": 4.1001510640720525e-06, "loss": 33.723, "step": 300920 }, { "epoch": 0.6078976393540646, "grad_norm": 429.2969055175781, "learning_rate": 4.099807699913733e-06, "loss": 21.2531, "step": 300930 }, { "epoch": 0.6079178399867484, "grad_norm": 148.35971069335938, "learning_rate": 4.09946434014284e-06, "loss": 17.7805, "step": 300940 }, { "epoch": 0.6079380406194322, "grad_norm": 231.91920471191406, "learning_rate": 4.099120984761053e-06, "loss": 19.6959, "step": 300950 }, { "epoch": 0.6079582412521161, "grad_norm": 146.3456268310547, "learning_rate": 4.098777633770038e-06, "loss": 8.1074, "step": 300960 }, { "epoch": 0.6079784418847999, "grad_norm": 136.85989379882812, "learning_rate": 4.0984342871714725e-06, "loss": 27.026, "step": 300970 }, { "epoch": 0.6079986425174837, "grad_norm": 648.6765747070312, "learning_rate": 4.0980909449670295e-06, "loss": 32.0894, "step": 300980 }, { "epoch": 0.6080188431501674, "grad_norm": 272.26348876953125, "learning_rate": 4.09774760715838e-06, "loss": 17.2467, "step": 300990 }, { "epoch": 0.6080390437828512, "grad_norm": 330.3271484375, "learning_rate": 4.0974042737472005e-06, "loss": 13.6804, "step": 301000 }, { "epoch": 0.608059244415535, "grad_norm": 618.1836547851562, "learning_rate": 4.0970609447351635e-06, "loss": 22.2134, "step": 301010 }, { "epoch": 0.6080794450482189, "grad_norm": 346.9175109863281, "learning_rate": 4.096717620123941e-06, "loss": 20.5127, "step": 301020 }, { "epoch": 0.6080996456809027, "grad_norm": 173.99307250976562, "learning_rate": 4.096374299915207e-06, "loss": 14.0726, "step": 301030 }, { "epoch": 0.6081198463135865, "grad_norm": 545.5914916992188, "learning_rate": 4.096030984110638e-06, "loss": 16.5668, "step": 301040 }, { "epoch": 0.6081400469462703, "grad_norm": 167.0265655517578, "learning_rate": 4.0956876727119e-06, "loss": 14.0678, "step": 301050 }, { "epoch": 0.6081602475789541, "grad_norm": 182.90701293945312, "learning_rate": 4.095344365720673e-06, "loss": 16.89, "step": 301060 }, { "epoch": 0.608180448211638, "grad_norm": 97.27924346923828, "learning_rate": 4.095001063138629e-06, "loss": 11.2082, "step": 301070 }, { "epoch": 0.6082006488443218, "grad_norm": 294.68084716796875, "learning_rate": 4.0946577649674375e-06, "loss": 20.9474, "step": 301080 }, { "epoch": 0.6082208494770056, "grad_norm": 324.84881591796875, "learning_rate": 4.094314471208775e-06, "loss": 16.4712, "step": 301090 }, { "epoch": 0.6082410501096894, "grad_norm": 387.46405029296875, "learning_rate": 4.093971181864313e-06, "loss": 21.1362, "step": 301100 }, { "epoch": 0.6082612507423732, "grad_norm": 18.901620864868164, "learning_rate": 4.093627896935727e-06, "loss": 11.4866, "step": 301110 }, { "epoch": 0.6082814513750571, "grad_norm": 132.14410400390625, "learning_rate": 4.093284616424688e-06, "loss": 15.8728, "step": 301120 }, { "epoch": 0.6083016520077409, "grad_norm": 345.72479248046875, "learning_rate": 4.092941340332871e-06, "loss": 14.4831, "step": 301130 }, { "epoch": 0.6083218526404247, "grad_norm": 279.4391174316406, "learning_rate": 4.092598068661948e-06, "loss": 19.099, "step": 301140 }, { "epoch": 0.6083420532731085, "grad_norm": 299.65924072265625, "learning_rate": 4.092254801413591e-06, "loss": 13.2114, "step": 301150 }, { "epoch": 0.6083622539057923, "grad_norm": 188.80506896972656, "learning_rate": 4.091911538589474e-06, "loss": 16.1995, "step": 301160 }, { "epoch": 0.6083824545384762, "grad_norm": 461.46685791015625, "learning_rate": 4.091568280191271e-06, "loss": 12.9305, "step": 301170 }, { "epoch": 0.60840265517116, "grad_norm": 300.6879577636719, "learning_rate": 4.091225026220652e-06, "loss": 20.0284, "step": 301180 }, { "epoch": 0.6084228558038438, "grad_norm": 327.4726257324219, "learning_rate": 4.090881776679293e-06, "loss": 11.9765, "step": 301190 }, { "epoch": 0.6084430564365276, "grad_norm": 1641.6641845703125, "learning_rate": 4.090538531568867e-06, "loss": 31.6387, "step": 301200 }, { "epoch": 0.6084632570692114, "grad_norm": 148.97674560546875, "learning_rate": 4.090195290891045e-06, "loss": 11.609, "step": 301210 }, { "epoch": 0.6084834577018953, "grad_norm": 453.103759765625, "learning_rate": 4.0898520546475e-06, "loss": 28.2901, "step": 301220 }, { "epoch": 0.6085036583345791, "grad_norm": 297.2235107421875, "learning_rate": 4.089508822839907e-06, "loss": 16.542, "step": 301230 }, { "epoch": 0.6085238589672629, "grad_norm": 264.8147277832031, "learning_rate": 4.089165595469937e-06, "loss": 13.3915, "step": 301240 }, { "epoch": 0.6085440595999466, "grad_norm": 241.60462951660156, "learning_rate": 4.088822372539263e-06, "loss": 18.8642, "step": 301250 }, { "epoch": 0.6085642602326304, "grad_norm": 226.2793426513672, "learning_rate": 4.0884791540495585e-06, "loss": 12.5173, "step": 301260 }, { "epoch": 0.6085844608653143, "grad_norm": 491.29962158203125, "learning_rate": 4.0881359400024964e-06, "loss": 32.1254, "step": 301270 }, { "epoch": 0.6086046614979981, "grad_norm": 244.948486328125, "learning_rate": 4.087792730399749e-06, "loss": 14.0426, "step": 301280 }, { "epoch": 0.6086248621306819, "grad_norm": 448.0541687011719, "learning_rate": 4.087449525242989e-06, "loss": 17.5574, "step": 301290 }, { "epoch": 0.6086450627633657, "grad_norm": 311.444580078125, "learning_rate": 4.087106324533891e-06, "loss": 16.9038, "step": 301300 }, { "epoch": 0.6086652633960495, "grad_norm": 444.9884033203125, "learning_rate": 4.086763128274124e-06, "loss": 15.0095, "step": 301310 }, { "epoch": 0.6086854640287334, "grad_norm": 247.54644775390625, "learning_rate": 4.086419936465362e-06, "loss": 13.7734, "step": 301320 }, { "epoch": 0.6087056646614172, "grad_norm": 667.581298828125, "learning_rate": 4.0860767491092825e-06, "loss": 18.8019, "step": 301330 }, { "epoch": 0.608725865294101, "grad_norm": 407.9124450683594, "learning_rate": 4.08573356620755e-06, "loss": 15.7534, "step": 301340 }, { "epoch": 0.6087460659267848, "grad_norm": 130.4237823486328, "learning_rate": 4.0853903877618425e-06, "loss": 16.2925, "step": 301350 }, { "epoch": 0.6087662665594686, "grad_norm": 21.202775955200195, "learning_rate": 4.085047213773831e-06, "loss": 6.7965, "step": 301360 }, { "epoch": 0.6087864671921525, "grad_norm": 275.2750244140625, "learning_rate": 4.08470404424519e-06, "loss": 27.5482, "step": 301370 }, { "epoch": 0.6088066678248363, "grad_norm": 451.3043212890625, "learning_rate": 4.084360879177588e-06, "loss": 15.4858, "step": 301380 }, { "epoch": 0.6088268684575201, "grad_norm": 181.0237579345703, "learning_rate": 4.0840177185727005e-06, "loss": 23.402, "step": 301390 }, { "epoch": 0.6088470690902039, "grad_norm": 290.1169738769531, "learning_rate": 4.083674562432203e-06, "loss": 26.0091, "step": 301400 }, { "epoch": 0.6088672697228877, "grad_norm": 305.6929931640625, "learning_rate": 4.0833314107577605e-06, "loss": 7.6184, "step": 301410 }, { "epoch": 0.6088874703555716, "grad_norm": 199.99932861328125, "learning_rate": 4.08298826355105e-06, "loss": 19.7872, "step": 301420 }, { "epoch": 0.6089076709882554, "grad_norm": 2.1812188625335693, "learning_rate": 4.082645120813746e-06, "loss": 17.7978, "step": 301430 }, { "epoch": 0.6089278716209392, "grad_norm": 436.4793701171875, "learning_rate": 4.082301982547517e-06, "loss": 12.1477, "step": 301440 }, { "epoch": 0.608948072253623, "grad_norm": 186.20779418945312, "learning_rate": 4.081958848754035e-06, "loss": 7.9247, "step": 301450 }, { "epoch": 0.6089682728863068, "grad_norm": 232.30868530273438, "learning_rate": 4.081615719434978e-06, "loss": 16.2423, "step": 301460 }, { "epoch": 0.6089884735189907, "grad_norm": 69.32320404052734, "learning_rate": 4.081272594592011e-06, "loss": 10.7213, "step": 301470 }, { "epoch": 0.6090086741516745, "grad_norm": 29.719039916992188, "learning_rate": 4.080929474226811e-06, "loss": 15.3702, "step": 301480 }, { "epoch": 0.6090288747843583, "grad_norm": 398.23974609375, "learning_rate": 4.080586358341051e-06, "loss": 16.5784, "step": 301490 }, { "epoch": 0.609049075417042, "grad_norm": 361.9292297363281, "learning_rate": 4.0802432469364e-06, "loss": 6.6114, "step": 301500 }, { "epoch": 0.6090692760497258, "grad_norm": 190.2130126953125, "learning_rate": 4.0799001400145315e-06, "loss": 21.7059, "step": 301510 }, { "epoch": 0.6090894766824096, "grad_norm": 247.02635192871094, "learning_rate": 4.07955703757712e-06, "loss": 9.3847, "step": 301520 }, { "epoch": 0.6091096773150935, "grad_norm": 175.6605987548828, "learning_rate": 4.079213939625834e-06, "loss": 26.8995, "step": 301530 }, { "epoch": 0.6091298779477773, "grad_norm": 256.77520751953125, "learning_rate": 4.078870846162349e-06, "loss": 12.9085, "step": 301540 }, { "epoch": 0.6091500785804611, "grad_norm": 331.1306457519531, "learning_rate": 4.078527757188333e-06, "loss": 27.1085, "step": 301550 }, { "epoch": 0.6091702792131449, "grad_norm": 357.85260009765625, "learning_rate": 4.078184672705465e-06, "loss": 23.558, "step": 301560 }, { "epoch": 0.6091904798458287, "grad_norm": 450.0954284667969, "learning_rate": 4.077841592715409e-06, "loss": 13.2752, "step": 301570 }, { "epoch": 0.6092106804785126, "grad_norm": 201.32147216796875, "learning_rate": 4.077498517219844e-06, "loss": 18.2694, "step": 301580 }, { "epoch": 0.6092308811111964, "grad_norm": 113.60115051269531, "learning_rate": 4.0771554462204395e-06, "loss": 27.8482, "step": 301590 }, { "epoch": 0.6092510817438802, "grad_norm": 1057.0537109375, "learning_rate": 4.0768123797188665e-06, "loss": 23.3814, "step": 301600 }, { "epoch": 0.609271282376564, "grad_norm": 501.0418701171875, "learning_rate": 4.076469317716798e-06, "loss": 14.4427, "step": 301610 }, { "epoch": 0.6092914830092478, "grad_norm": 0.0, "learning_rate": 4.076126260215906e-06, "loss": 15.9605, "step": 301620 }, { "epoch": 0.6093116836419317, "grad_norm": 160.85211181640625, "learning_rate": 4.0757832072178626e-06, "loss": 6.8101, "step": 301630 }, { "epoch": 0.6093318842746155, "grad_norm": 76.2935562133789, "learning_rate": 4.075440158724339e-06, "loss": 17.0009, "step": 301640 }, { "epoch": 0.6093520849072993, "grad_norm": 255.25634765625, "learning_rate": 4.075097114737011e-06, "loss": 13.6923, "step": 301650 }, { "epoch": 0.6093722855399831, "grad_norm": 653.8756103515625, "learning_rate": 4.074754075257543e-06, "loss": 22.0989, "step": 301660 }, { "epoch": 0.609392486172667, "grad_norm": 826.0037841796875, "learning_rate": 4.074411040287614e-06, "loss": 23.4467, "step": 301670 }, { "epoch": 0.6094126868053508, "grad_norm": 348.1150817871094, "learning_rate": 4.074068009828894e-06, "loss": 22.2655, "step": 301680 }, { "epoch": 0.6094328874380346, "grad_norm": 481.5484924316406, "learning_rate": 4.073724983883053e-06, "loss": 25.2459, "step": 301690 }, { "epoch": 0.6094530880707184, "grad_norm": 51.74058532714844, "learning_rate": 4.073381962451764e-06, "loss": 11.5862, "step": 301700 }, { "epoch": 0.6094732887034022, "grad_norm": 414.6076965332031, "learning_rate": 4.073038945536698e-06, "loss": 13.2963, "step": 301710 }, { "epoch": 0.609493489336086, "grad_norm": 228.4296112060547, "learning_rate": 4.07269593313953e-06, "loss": 15.6475, "step": 301720 }, { "epoch": 0.6095136899687699, "grad_norm": 136.12831115722656, "learning_rate": 4.0723529252619276e-06, "loss": 13.5618, "step": 301730 }, { "epoch": 0.6095338906014537, "grad_norm": 648.1093139648438, "learning_rate": 4.0720099219055655e-06, "loss": 11.8489, "step": 301740 }, { "epoch": 0.6095540912341375, "grad_norm": 432.55108642578125, "learning_rate": 4.0716669230721154e-06, "loss": 16.9446, "step": 301750 }, { "epoch": 0.6095742918668212, "grad_norm": 102.70632934570312, "learning_rate": 4.071323928763247e-06, "loss": 8.9785, "step": 301760 }, { "epoch": 0.609594492499505, "grad_norm": 16.458847045898438, "learning_rate": 4.070980938980633e-06, "loss": 19.9219, "step": 301770 }, { "epoch": 0.6096146931321889, "grad_norm": 449.44866943359375, "learning_rate": 4.070637953725946e-06, "loss": 19.1214, "step": 301780 }, { "epoch": 0.6096348937648727, "grad_norm": 362.70831298828125, "learning_rate": 4.0702949730008565e-06, "loss": 30.1825, "step": 301790 }, { "epoch": 0.6096550943975565, "grad_norm": 164.8471221923828, "learning_rate": 4.069951996807034e-06, "loss": 15.3036, "step": 301800 }, { "epoch": 0.6096752950302403, "grad_norm": 224.10682678222656, "learning_rate": 4.069609025146156e-06, "loss": 19.2649, "step": 301810 }, { "epoch": 0.6096954956629241, "grad_norm": 223.59571838378906, "learning_rate": 4.0692660580198905e-06, "loss": 23.2198, "step": 301820 }, { "epoch": 0.609715696295608, "grad_norm": 297.2260437011719, "learning_rate": 4.068923095429909e-06, "loss": 17.435, "step": 301830 }, { "epoch": 0.6097358969282918, "grad_norm": 0.0, "learning_rate": 4.068580137377882e-06, "loss": 15.5785, "step": 301840 }, { "epoch": 0.6097560975609756, "grad_norm": 309.1147155761719, "learning_rate": 4.068237183865485e-06, "loss": 15.5864, "step": 301850 }, { "epoch": 0.6097762981936594, "grad_norm": 263.13946533203125, "learning_rate": 4.067894234894384e-06, "loss": 29.7786, "step": 301860 }, { "epoch": 0.6097964988263432, "grad_norm": 237.5653839111328, "learning_rate": 4.067551290466255e-06, "loss": 27.4487, "step": 301870 }, { "epoch": 0.6098166994590271, "grad_norm": 333.82757568359375, "learning_rate": 4.067208350582769e-06, "loss": 15.0315, "step": 301880 }, { "epoch": 0.6098369000917109, "grad_norm": 505.87286376953125, "learning_rate": 4.066865415245594e-06, "loss": 28.8826, "step": 301890 }, { "epoch": 0.6098571007243947, "grad_norm": 207.93630981445312, "learning_rate": 4.066522484456406e-06, "loss": 13.7091, "step": 301900 }, { "epoch": 0.6098773013570785, "grad_norm": 432.1338806152344, "learning_rate": 4.066179558216874e-06, "loss": 21.5663, "step": 301910 }, { "epoch": 0.6098975019897623, "grad_norm": 88.10758209228516, "learning_rate": 4.0658366365286684e-06, "loss": 21.9751, "step": 301920 }, { "epoch": 0.6099177026224462, "grad_norm": 490.8226318359375, "learning_rate": 4.06549371939346e-06, "loss": 12.0886, "step": 301930 }, { "epoch": 0.60993790325513, "grad_norm": 470.6809387207031, "learning_rate": 4.0651508068129264e-06, "loss": 19.0926, "step": 301940 }, { "epoch": 0.6099581038878138, "grad_norm": 34.389259338378906, "learning_rate": 4.064807898788731e-06, "loss": 25.1848, "step": 301950 }, { "epoch": 0.6099783045204976, "grad_norm": 170.79234313964844, "learning_rate": 4.064464995322549e-06, "loss": 16.8585, "step": 301960 }, { "epoch": 0.6099985051531814, "grad_norm": 227.76080322265625, "learning_rate": 4.064122096416053e-06, "loss": 22.7437, "step": 301970 }, { "epoch": 0.6100187057858653, "grad_norm": 224.03878784179688, "learning_rate": 4.063779202070911e-06, "loss": 14.7213, "step": 301980 }, { "epoch": 0.6100389064185491, "grad_norm": 340.4079895019531, "learning_rate": 4.0634363122887945e-06, "loss": 17.8305, "step": 301990 }, { "epoch": 0.6100591070512329, "grad_norm": 274.3102722167969, "learning_rate": 4.063093427071376e-06, "loss": 17.8009, "step": 302000 }, { "epoch": 0.6100793076839166, "grad_norm": 419.6270751953125, "learning_rate": 4.06275054642033e-06, "loss": 22.348, "step": 302010 }, { "epoch": 0.6100995083166004, "grad_norm": 306.7874755859375, "learning_rate": 4.06240767033732e-06, "loss": 13.2765, "step": 302020 }, { "epoch": 0.6101197089492842, "grad_norm": 608.6171875, "learning_rate": 4.0620647988240225e-06, "loss": 23.4329, "step": 302030 }, { "epoch": 0.6101399095819681, "grad_norm": 403.0815734863281, "learning_rate": 4.061721931882109e-06, "loss": 15.0292, "step": 302040 }, { "epoch": 0.6101601102146519, "grad_norm": 0.0, "learning_rate": 4.061379069513248e-06, "loss": 17.12, "step": 302050 }, { "epoch": 0.6101803108473357, "grad_norm": 172.14205932617188, "learning_rate": 4.0610362117191106e-06, "loss": 15.9182, "step": 302060 }, { "epoch": 0.6102005114800195, "grad_norm": 356.615478515625, "learning_rate": 4.0606933585013704e-06, "loss": 18.8039, "step": 302070 }, { "epoch": 0.6102207121127033, "grad_norm": 328.2738952636719, "learning_rate": 4.060350509861696e-06, "loss": 10.1829, "step": 302080 }, { "epoch": 0.6102409127453872, "grad_norm": 161.40647888183594, "learning_rate": 4.0600076658017585e-06, "loss": 27.7949, "step": 302090 }, { "epoch": 0.610261113378071, "grad_norm": 398.1403503417969, "learning_rate": 4.0596648263232315e-06, "loss": 10.6438, "step": 302100 }, { "epoch": 0.6102813140107548, "grad_norm": 445.341064453125, "learning_rate": 4.059321991427782e-06, "loss": 13.8817, "step": 302110 }, { "epoch": 0.6103015146434386, "grad_norm": 158.53713989257812, "learning_rate": 4.058979161117084e-06, "loss": 12.5922, "step": 302120 }, { "epoch": 0.6103217152761224, "grad_norm": 249.50384521484375, "learning_rate": 4.058636335392809e-06, "loss": 21.1783, "step": 302130 }, { "epoch": 0.6103419159088063, "grad_norm": 541.7451782226562, "learning_rate": 4.0582935142566245e-06, "loss": 20.0878, "step": 302140 }, { "epoch": 0.6103621165414901, "grad_norm": 690.7022705078125, "learning_rate": 4.057950697710203e-06, "loss": 19.2531, "step": 302150 }, { "epoch": 0.6103823171741739, "grad_norm": 455.2514953613281, "learning_rate": 4.057607885755215e-06, "loss": 15.529, "step": 302160 }, { "epoch": 0.6104025178068577, "grad_norm": 471.48419189453125, "learning_rate": 4.057265078393335e-06, "loss": 17.3494, "step": 302170 }, { "epoch": 0.6104227184395415, "grad_norm": 328.9300231933594, "learning_rate": 4.056922275626227e-06, "loss": 28.1218, "step": 302180 }, { "epoch": 0.6104429190722254, "grad_norm": 512.0936279296875, "learning_rate": 4.056579477455567e-06, "loss": 23.1691, "step": 302190 }, { "epoch": 0.6104631197049092, "grad_norm": 66.92971801757812, "learning_rate": 4.0562366838830255e-06, "loss": 12.5647, "step": 302200 }, { "epoch": 0.610483320337593, "grad_norm": 546.1402587890625, "learning_rate": 4.05589389491027e-06, "loss": 17.9962, "step": 302210 }, { "epoch": 0.6105035209702768, "grad_norm": 64.57108306884766, "learning_rate": 4.0555511105389735e-06, "loss": 15.5535, "step": 302220 }, { "epoch": 0.6105237216029606, "grad_norm": 229.49009704589844, "learning_rate": 4.055208330770808e-06, "loss": 15.4532, "step": 302230 }, { "epoch": 0.6105439222356445, "grad_norm": 263.4242248535156, "learning_rate": 4.054865555607441e-06, "loss": 21.7159, "step": 302240 }, { "epoch": 0.6105641228683283, "grad_norm": 120.36184692382812, "learning_rate": 4.054522785050543e-06, "loss": 20.3244, "step": 302250 }, { "epoch": 0.6105843235010121, "grad_norm": 413.830078125, "learning_rate": 4.05418001910179e-06, "loss": 8.5703, "step": 302260 }, { "epoch": 0.6106045241336958, "grad_norm": 195.825927734375, "learning_rate": 4.053837257762846e-06, "loss": 16.6567, "step": 302270 }, { "epoch": 0.6106247247663796, "grad_norm": 377.5501403808594, "learning_rate": 4.053494501035385e-06, "loss": 17.2775, "step": 302280 }, { "epoch": 0.6106449253990635, "grad_norm": 508.1600036621094, "learning_rate": 4.053151748921078e-06, "loss": 14.1678, "step": 302290 }, { "epoch": 0.6106651260317473, "grad_norm": 173.021240234375, "learning_rate": 4.052809001421595e-06, "loss": 27.7673, "step": 302300 }, { "epoch": 0.6106853266644311, "grad_norm": 317.7706298828125, "learning_rate": 4.0524662585386045e-06, "loss": 20.1063, "step": 302310 }, { "epoch": 0.6107055272971149, "grad_norm": 256.08148193359375, "learning_rate": 4.0521235202737775e-06, "loss": 15.2602, "step": 302320 }, { "epoch": 0.6107257279297987, "grad_norm": 266.9493408203125, "learning_rate": 4.051780786628789e-06, "loss": 11.7173, "step": 302330 }, { "epoch": 0.6107459285624826, "grad_norm": 328.6699523925781, "learning_rate": 4.0514380576053035e-06, "loss": 23.3237, "step": 302340 }, { "epoch": 0.6107661291951664, "grad_norm": 1283.9908447265625, "learning_rate": 4.051095333204994e-06, "loss": 18.3715, "step": 302350 }, { "epoch": 0.6107863298278502, "grad_norm": 355.5120544433594, "learning_rate": 4.0507526134295314e-06, "loss": 24.4198, "step": 302360 }, { "epoch": 0.610806530460534, "grad_norm": 864.2909545898438, "learning_rate": 4.050409898280585e-06, "loss": 10.8633, "step": 302370 }, { "epoch": 0.6108267310932178, "grad_norm": 563.103759765625, "learning_rate": 4.050067187759826e-06, "loss": 17.9565, "step": 302380 }, { "epoch": 0.6108469317259017, "grad_norm": 0.0, "learning_rate": 4.049724481868924e-06, "loss": 19.726, "step": 302390 }, { "epoch": 0.6108671323585855, "grad_norm": 531.8023071289062, "learning_rate": 4.0493817806095504e-06, "loss": 23.0094, "step": 302400 }, { "epoch": 0.6108873329912693, "grad_norm": 96.84931182861328, "learning_rate": 4.049039083983372e-06, "loss": 20.5541, "step": 302410 }, { "epoch": 0.6109075336239531, "grad_norm": 108.69450378417969, "learning_rate": 4.048696391992065e-06, "loss": 25.5505, "step": 302420 }, { "epoch": 0.6109277342566369, "grad_norm": 336.5030517578125, "learning_rate": 4.048353704637295e-06, "loss": 10.5168, "step": 302430 }, { "epoch": 0.6109479348893208, "grad_norm": 54.228309631347656, "learning_rate": 4.048011021920733e-06, "loss": 19.1338, "step": 302440 }, { "epoch": 0.6109681355220046, "grad_norm": 0.0, "learning_rate": 4.047668343844051e-06, "loss": 9.6664, "step": 302450 }, { "epoch": 0.6109883361546884, "grad_norm": 193.78128051757812, "learning_rate": 4.047325670408918e-06, "loss": 6.8528, "step": 302460 }, { "epoch": 0.6110085367873722, "grad_norm": 25.38318634033203, "learning_rate": 4.046983001617004e-06, "loss": 18.0018, "step": 302470 }, { "epoch": 0.611028737420056, "grad_norm": 323.3836364746094, "learning_rate": 4.0466403374699775e-06, "loss": 35.9004, "step": 302480 }, { "epoch": 0.6110489380527399, "grad_norm": 294.5228271484375, "learning_rate": 4.046297677969513e-06, "loss": 17.5183, "step": 302490 }, { "epoch": 0.6110691386854237, "grad_norm": 417.97802734375, "learning_rate": 4.045955023117276e-06, "loss": 21.2968, "step": 302500 }, { "epoch": 0.6110893393181075, "grad_norm": 245.76817321777344, "learning_rate": 4.045612372914939e-06, "loss": 18.6677, "step": 302510 }, { "epoch": 0.6111095399507913, "grad_norm": 242.54116821289062, "learning_rate": 4.045269727364173e-06, "loss": 11.5636, "step": 302520 }, { "epoch": 0.611129740583475, "grad_norm": 497.0908508300781, "learning_rate": 4.044927086466646e-06, "loss": 17.4828, "step": 302530 }, { "epoch": 0.6111499412161588, "grad_norm": 625.4280395507812, "learning_rate": 4.044584450224026e-06, "loss": 14.4463, "step": 302540 }, { "epoch": 0.6111701418488427, "grad_norm": 11.54104232788086, "learning_rate": 4.0442418186379895e-06, "loss": 10.4158, "step": 302550 }, { "epoch": 0.6111903424815265, "grad_norm": 222.9636993408203, "learning_rate": 4.043899191710199e-06, "loss": 18.6837, "step": 302560 }, { "epoch": 0.6112105431142103, "grad_norm": 920.7866821289062, "learning_rate": 4.043556569442329e-06, "loss": 26.9404, "step": 302570 }, { "epoch": 0.6112307437468941, "grad_norm": 178.75839233398438, "learning_rate": 4.0432139518360495e-06, "loss": 16.6833, "step": 302580 }, { "epoch": 0.6112509443795779, "grad_norm": 508.0994873046875, "learning_rate": 4.0428713388930276e-06, "loss": 20.9416, "step": 302590 }, { "epoch": 0.6112711450122618, "grad_norm": 538.3495483398438, "learning_rate": 4.042528730614935e-06, "loss": 28.2326, "step": 302600 }, { "epoch": 0.6112913456449456, "grad_norm": 365.6939392089844, "learning_rate": 4.042186127003441e-06, "loss": 18.0228, "step": 302610 }, { "epoch": 0.6113115462776294, "grad_norm": 804.1202392578125, "learning_rate": 4.0418435280602185e-06, "loss": 14.4241, "step": 302620 }, { "epoch": 0.6113317469103132, "grad_norm": 371.264404296875, "learning_rate": 4.04150093378693e-06, "loss": 17.1336, "step": 302630 }, { "epoch": 0.611351947542997, "grad_norm": 209.99737548828125, "learning_rate": 4.041158344185252e-06, "loss": 21.216, "step": 302640 }, { "epoch": 0.6113721481756809, "grad_norm": 256.618408203125, "learning_rate": 4.040815759256852e-06, "loss": 25.6357, "step": 302650 }, { "epoch": 0.6113923488083647, "grad_norm": 182.76116943359375, "learning_rate": 4.0404731790034e-06, "loss": 32.6357, "step": 302660 }, { "epoch": 0.6114125494410485, "grad_norm": 327.65069580078125, "learning_rate": 4.040130603426565e-06, "loss": 28.3785, "step": 302670 }, { "epoch": 0.6114327500737323, "grad_norm": 537.5012817382812, "learning_rate": 4.039788032528017e-06, "loss": 10.3664, "step": 302680 }, { "epoch": 0.6114529507064161, "grad_norm": 242.74002075195312, "learning_rate": 4.039445466309426e-06, "loss": 21.4152, "step": 302690 }, { "epoch": 0.6114731513391, "grad_norm": 385.9034729003906, "learning_rate": 4.039102904772459e-06, "loss": 8.5415, "step": 302700 }, { "epoch": 0.6114933519717838, "grad_norm": 355.650146484375, "learning_rate": 4.0387603479187915e-06, "loss": 28.8241, "step": 302710 }, { "epoch": 0.6115135526044676, "grad_norm": 499.15948486328125, "learning_rate": 4.038417795750086e-06, "loss": 15.6121, "step": 302720 }, { "epoch": 0.6115337532371514, "grad_norm": 275.37127685546875, "learning_rate": 4.038075248268018e-06, "loss": 16.0339, "step": 302730 }, { "epoch": 0.6115539538698352, "grad_norm": 364.453857421875, "learning_rate": 4.0377327054742544e-06, "loss": 19.5774, "step": 302740 }, { "epoch": 0.6115741545025191, "grad_norm": 154.44297790527344, "learning_rate": 4.037390167370464e-06, "loss": 14.0028, "step": 302750 }, { "epoch": 0.6115943551352029, "grad_norm": 219.45130920410156, "learning_rate": 4.037047633958317e-06, "loss": 16.4271, "step": 302760 }, { "epoch": 0.6116145557678867, "grad_norm": 241.10362243652344, "learning_rate": 4.0367051052394825e-06, "loss": 13.9913, "step": 302770 }, { "epoch": 0.6116347564005704, "grad_norm": 519.4535522460938, "learning_rate": 4.036362581215633e-06, "loss": 18.2545, "step": 302780 }, { "epoch": 0.6116549570332542, "grad_norm": 18.164527893066406, "learning_rate": 4.036020061888432e-06, "loss": 16.2977, "step": 302790 }, { "epoch": 0.611675157665938, "grad_norm": 381.4558410644531, "learning_rate": 4.035677547259555e-06, "loss": 18.6765, "step": 302800 }, { "epoch": 0.6116953582986219, "grad_norm": 116.06623840332031, "learning_rate": 4.035335037330668e-06, "loss": 28.2293, "step": 302810 }, { "epoch": 0.6117155589313057, "grad_norm": 419.48529052734375, "learning_rate": 4.034992532103441e-06, "loss": 20.3574, "step": 302820 }, { "epoch": 0.6117357595639895, "grad_norm": 187.1387939453125, "learning_rate": 4.034650031579543e-06, "loss": 13.373, "step": 302830 }, { "epoch": 0.6117559601966733, "grad_norm": 375.39886474609375, "learning_rate": 4.0343075357606445e-06, "loss": 18.233, "step": 302840 }, { "epoch": 0.6117761608293572, "grad_norm": 222.97288513183594, "learning_rate": 4.0339650446484135e-06, "loss": 20.6082, "step": 302850 }, { "epoch": 0.611796361462041, "grad_norm": 37.50168228149414, "learning_rate": 4.033622558244519e-06, "loss": 10.9728, "step": 302860 }, { "epoch": 0.6118165620947248, "grad_norm": 408.7652587890625, "learning_rate": 4.0332800765506325e-06, "loss": 8.4043, "step": 302870 }, { "epoch": 0.6118367627274086, "grad_norm": 326.9809265136719, "learning_rate": 4.03293759956842e-06, "loss": 15.7878, "step": 302880 }, { "epoch": 0.6118569633600924, "grad_norm": 131.39865112304688, "learning_rate": 4.032595127299552e-06, "loss": 20.5895, "step": 302890 }, { "epoch": 0.6118771639927763, "grad_norm": 133.35475158691406, "learning_rate": 4.032252659745699e-06, "loss": 15.2407, "step": 302900 }, { "epoch": 0.6118973646254601, "grad_norm": 176.69598388671875, "learning_rate": 4.03191019690853e-06, "loss": 13.6706, "step": 302910 }, { "epoch": 0.6119175652581439, "grad_norm": 240.8840789794922, "learning_rate": 4.031567738789713e-06, "loss": 14.7905, "step": 302920 }, { "epoch": 0.6119377658908277, "grad_norm": 198.0797119140625, "learning_rate": 4.031225285390915e-06, "loss": 13.4085, "step": 302930 }, { "epoch": 0.6119579665235115, "grad_norm": 310.97454833984375, "learning_rate": 4.0308828367138106e-06, "loss": 8.6858, "step": 302940 }, { "epoch": 0.6119781671561954, "grad_norm": 473.3871154785156, "learning_rate": 4.030540392760064e-06, "loss": 28.0333, "step": 302950 }, { "epoch": 0.6119983677888792, "grad_norm": 39.727569580078125, "learning_rate": 4.030197953531346e-06, "loss": 17.8296, "step": 302960 }, { "epoch": 0.612018568421563, "grad_norm": 290.7801208496094, "learning_rate": 4.029855519029326e-06, "loss": 27.3416, "step": 302970 }, { "epoch": 0.6120387690542468, "grad_norm": 285.1624755859375, "learning_rate": 4.029513089255673e-06, "loss": 13.9092, "step": 302980 }, { "epoch": 0.6120589696869306, "grad_norm": 355.80853271484375, "learning_rate": 4.0291706642120545e-06, "loss": 12.8309, "step": 302990 }, { "epoch": 0.6120791703196145, "grad_norm": 23.040863037109375, "learning_rate": 4.028828243900141e-06, "loss": 13.6793, "step": 303000 }, { "epoch": 0.6120993709522983, "grad_norm": 540.1137084960938, "learning_rate": 4.028485828321601e-06, "loss": 20.5472, "step": 303010 }, { "epoch": 0.6121195715849821, "grad_norm": 537.0182495117188, "learning_rate": 4.028143417478102e-06, "loss": 19.4431, "step": 303020 }, { "epoch": 0.6121397722176659, "grad_norm": 20.57938575744629, "learning_rate": 4.0278010113713165e-06, "loss": 22.6833, "step": 303030 }, { "epoch": 0.6121599728503496, "grad_norm": 568.78662109375, "learning_rate": 4.027458610002908e-06, "loss": 16.5557, "step": 303040 }, { "epoch": 0.6121801734830334, "grad_norm": 517.9844970703125, "learning_rate": 4.02711621337455e-06, "loss": 35.6758, "step": 303050 }, { "epoch": 0.6122003741157173, "grad_norm": 70.72477722167969, "learning_rate": 4.0267738214879095e-06, "loss": 17.1752, "step": 303060 }, { "epoch": 0.6122205747484011, "grad_norm": 134.55294799804688, "learning_rate": 4.026431434344656e-06, "loss": 17.99, "step": 303070 }, { "epoch": 0.6122407753810849, "grad_norm": 358.7864074707031, "learning_rate": 4.0260890519464565e-06, "loss": 18.1444, "step": 303080 }, { "epoch": 0.6122609760137687, "grad_norm": 206.77333068847656, "learning_rate": 4.02574667429498e-06, "loss": 14.1423, "step": 303090 }, { "epoch": 0.6122811766464525, "grad_norm": 293.6374816894531, "learning_rate": 4.025404301391898e-06, "loss": 13.7979, "step": 303100 }, { "epoch": 0.6123013772791364, "grad_norm": 96.79202270507812, "learning_rate": 4.0250619332388765e-06, "loss": 9.5087, "step": 303110 }, { "epoch": 0.6123215779118202, "grad_norm": 275.1803894042969, "learning_rate": 4.024719569837584e-06, "loss": 14.2356, "step": 303120 }, { "epoch": 0.612341778544504, "grad_norm": 110.54795837402344, "learning_rate": 4.024377211189693e-06, "loss": 12.0165, "step": 303130 }, { "epoch": 0.6123619791771878, "grad_norm": 123.18721008300781, "learning_rate": 4.024034857296866e-06, "loss": 18.6534, "step": 303140 }, { "epoch": 0.6123821798098716, "grad_norm": 416.5890808105469, "learning_rate": 4.023692508160776e-06, "loss": 19.4065, "step": 303150 }, { "epoch": 0.6124023804425555, "grad_norm": 326.8098449707031, "learning_rate": 4.0233501637830905e-06, "loss": 14.5304, "step": 303160 }, { "epoch": 0.6124225810752393, "grad_norm": 409.9657287597656, "learning_rate": 4.023007824165476e-06, "loss": 24.3574, "step": 303170 }, { "epoch": 0.6124427817079231, "grad_norm": 244.0195770263672, "learning_rate": 4.022665489309604e-06, "loss": 23.8146, "step": 303180 }, { "epoch": 0.6124629823406069, "grad_norm": 537.6898803710938, "learning_rate": 4.022323159217144e-06, "loss": 16.9534, "step": 303190 }, { "epoch": 0.6124831829732907, "grad_norm": 259.3690185546875, "learning_rate": 4.02198083388976e-06, "loss": 18.9822, "step": 303200 }, { "epoch": 0.6125033836059746, "grad_norm": 528.8189697265625, "learning_rate": 4.021638513329123e-06, "loss": 16.7702, "step": 303210 }, { "epoch": 0.6125235842386584, "grad_norm": 148.33056640625, "learning_rate": 4.0212961975369e-06, "loss": 21.9525, "step": 303220 }, { "epoch": 0.6125437848713422, "grad_norm": 372.2474060058594, "learning_rate": 4.020953886514764e-06, "loss": 19.1226, "step": 303230 }, { "epoch": 0.612563985504026, "grad_norm": 127.28125762939453, "learning_rate": 4.020611580264377e-06, "loss": 27.4047, "step": 303240 }, { "epoch": 0.6125841861367098, "grad_norm": 177.97096252441406, "learning_rate": 4.020269278787411e-06, "loss": 13.9339, "step": 303250 }, { "epoch": 0.6126043867693937, "grad_norm": 149.255126953125, "learning_rate": 4.019926982085536e-06, "loss": 10.9807, "step": 303260 }, { "epoch": 0.6126245874020775, "grad_norm": 338.8303527832031, "learning_rate": 4.019584690160416e-06, "loss": 11.7567, "step": 303270 }, { "epoch": 0.6126447880347613, "grad_norm": 97.56961822509766, "learning_rate": 4.019242403013721e-06, "loss": 20.0096, "step": 303280 }, { "epoch": 0.612664988667445, "grad_norm": 264.19854736328125, "learning_rate": 4.0189001206471215e-06, "loss": 25.0497, "step": 303290 }, { "epoch": 0.6126851893001288, "grad_norm": 257.3600158691406, "learning_rate": 4.018557843062282e-06, "loss": 24.1359, "step": 303300 }, { "epoch": 0.6127053899328126, "grad_norm": 207.45973205566406, "learning_rate": 4.018215570260872e-06, "loss": 20.7778, "step": 303310 }, { "epoch": 0.6127255905654965, "grad_norm": 144.51002502441406, "learning_rate": 4.017873302244563e-06, "loss": 16.147, "step": 303320 }, { "epoch": 0.6127457911981803, "grad_norm": 181.77085876464844, "learning_rate": 4.017531039015017e-06, "loss": 22.6905, "step": 303330 }, { "epoch": 0.6127659918308641, "grad_norm": 177.75131225585938, "learning_rate": 4.017188780573907e-06, "loss": 14.9384, "step": 303340 }, { "epoch": 0.6127861924635479, "grad_norm": 185.46885681152344, "learning_rate": 4.016846526922901e-06, "loss": 19.2441, "step": 303350 }, { "epoch": 0.6128063930962317, "grad_norm": 317.3322448730469, "learning_rate": 4.016504278063664e-06, "loss": 10.8314, "step": 303360 }, { "epoch": 0.6128265937289156, "grad_norm": 122.62620544433594, "learning_rate": 4.016162033997867e-06, "loss": 14.5572, "step": 303370 }, { "epoch": 0.6128467943615994, "grad_norm": 225.37986755371094, "learning_rate": 4.0158197947271746e-06, "loss": 9.0634, "step": 303380 }, { "epoch": 0.6128669949942832, "grad_norm": 467.8299865722656, "learning_rate": 4.015477560253261e-06, "loss": 20.8479, "step": 303390 }, { "epoch": 0.612887195626967, "grad_norm": 140.73699951171875, "learning_rate": 4.015135330577787e-06, "loss": 13.5863, "step": 303400 }, { "epoch": 0.6129073962596508, "grad_norm": 546.203857421875, "learning_rate": 4.014793105702425e-06, "loss": 22.633, "step": 303410 }, { "epoch": 0.6129275968923347, "grad_norm": 697.6559448242188, "learning_rate": 4.014450885628843e-06, "loss": 25.0173, "step": 303420 }, { "epoch": 0.6129477975250185, "grad_norm": 200.85304260253906, "learning_rate": 4.014108670358707e-06, "loss": 12.8197, "step": 303430 }, { "epoch": 0.6129679981577023, "grad_norm": 339.7164611816406, "learning_rate": 4.013766459893686e-06, "loss": 14.0112, "step": 303440 }, { "epoch": 0.6129881987903861, "grad_norm": 253.38294982910156, "learning_rate": 4.0134242542354486e-06, "loss": 12.068, "step": 303450 }, { "epoch": 0.61300839942307, "grad_norm": 327.7706298828125, "learning_rate": 4.013082053385661e-06, "loss": 22.8556, "step": 303460 }, { "epoch": 0.6130286000557538, "grad_norm": 914.8258056640625, "learning_rate": 4.01273985734599e-06, "loss": 30.8542, "step": 303470 }, { "epoch": 0.6130488006884376, "grad_norm": 426.2039794921875, "learning_rate": 4.012397666118108e-06, "loss": 33.9603, "step": 303480 }, { "epoch": 0.6130690013211214, "grad_norm": 75.91201782226562, "learning_rate": 4.012055479703678e-06, "loss": 9.1208, "step": 303490 }, { "epoch": 0.6130892019538052, "grad_norm": 26.533742904663086, "learning_rate": 4.0117132981043695e-06, "loss": 16.1355, "step": 303500 }, { "epoch": 0.613109402586489, "grad_norm": 206.44146728515625, "learning_rate": 4.011371121321851e-06, "loss": 34.9997, "step": 303510 }, { "epoch": 0.6131296032191729, "grad_norm": 301.47027587890625, "learning_rate": 4.011028949357791e-06, "loss": 15.2284, "step": 303520 }, { "epoch": 0.6131498038518567, "grad_norm": 192.57034301757812, "learning_rate": 4.010686782213855e-06, "loss": 12.0119, "step": 303530 }, { "epoch": 0.6131700044845405, "grad_norm": 365.083251953125, "learning_rate": 4.01034461989171e-06, "loss": 22.7354, "step": 303540 }, { "epoch": 0.6131902051172242, "grad_norm": 127.57703399658203, "learning_rate": 4.01000246239303e-06, "loss": 20.6417, "step": 303550 }, { "epoch": 0.613210405749908, "grad_norm": 96.91691589355469, "learning_rate": 4.009660309719473e-06, "loss": 7.2648, "step": 303560 }, { "epoch": 0.6132306063825919, "grad_norm": 69.4158706665039, "learning_rate": 4.009318161872714e-06, "loss": 6.5586, "step": 303570 }, { "epoch": 0.6132508070152757, "grad_norm": 138.2059326171875, "learning_rate": 4.008976018854418e-06, "loss": 14.9358, "step": 303580 }, { "epoch": 0.6132710076479595, "grad_norm": 522.6701049804688, "learning_rate": 4.0086338806662525e-06, "loss": 21.9627, "step": 303590 }, { "epoch": 0.6132912082806433, "grad_norm": 580.1276245117188, "learning_rate": 4.0082917473098845e-06, "loss": 27.2813, "step": 303600 }, { "epoch": 0.6133114089133271, "grad_norm": 749.5454711914062, "learning_rate": 4.007949618786984e-06, "loss": 17.3553, "step": 303610 }, { "epoch": 0.613331609546011, "grad_norm": 66.59337615966797, "learning_rate": 4.007607495099215e-06, "loss": 23.3556, "step": 303620 }, { "epoch": 0.6133518101786948, "grad_norm": 82.20333099365234, "learning_rate": 4.007265376248246e-06, "loss": 22.5988, "step": 303630 }, { "epoch": 0.6133720108113786, "grad_norm": 243.89247131347656, "learning_rate": 4.0069232622357475e-06, "loss": 22.3378, "step": 303640 }, { "epoch": 0.6133922114440624, "grad_norm": 351.98095703125, "learning_rate": 4.006581153063383e-06, "loss": 20.8268, "step": 303650 }, { "epoch": 0.6134124120767462, "grad_norm": 247.8439483642578, "learning_rate": 4.006239048732822e-06, "loss": 27.7245, "step": 303660 }, { "epoch": 0.6134326127094301, "grad_norm": 418.3920593261719, "learning_rate": 4.005896949245731e-06, "loss": 18.5711, "step": 303670 }, { "epoch": 0.6134528133421139, "grad_norm": 162.52294921875, "learning_rate": 4.005554854603779e-06, "loss": 8.1957, "step": 303680 }, { "epoch": 0.6134730139747977, "grad_norm": 222.6929168701172, "learning_rate": 4.0052127648086305e-06, "loss": 23.7336, "step": 303690 }, { "epoch": 0.6134932146074815, "grad_norm": 42.90855026245117, "learning_rate": 4.004870679861953e-06, "loss": 11.6229, "step": 303700 }, { "epoch": 0.6135134152401653, "grad_norm": 230.06561279296875, "learning_rate": 4.004528599765419e-06, "loss": 19.6492, "step": 303710 }, { "epoch": 0.6135336158728492, "grad_norm": 221.94558715820312, "learning_rate": 4.004186524520689e-06, "loss": 28.0311, "step": 303720 }, { "epoch": 0.613553816505533, "grad_norm": 157.37518310546875, "learning_rate": 4.003844454129434e-06, "loss": 16.1183, "step": 303730 }, { "epoch": 0.6135740171382168, "grad_norm": 351.822998046875, "learning_rate": 4.003502388593321e-06, "loss": 18.2537, "step": 303740 }, { "epoch": 0.6135942177709006, "grad_norm": 144.45896911621094, "learning_rate": 4.003160327914015e-06, "loss": 21.867, "step": 303750 }, { "epoch": 0.6136144184035844, "grad_norm": 687.1519775390625, "learning_rate": 4.002818272093185e-06, "loss": 42.1998, "step": 303760 }, { "epoch": 0.6136346190362683, "grad_norm": 459.29864501953125, "learning_rate": 4.002476221132499e-06, "loss": 19.4606, "step": 303770 }, { "epoch": 0.6136548196689521, "grad_norm": 312.3213195800781, "learning_rate": 4.002134175033621e-06, "loss": 31.6935, "step": 303780 }, { "epoch": 0.6136750203016359, "grad_norm": 386.993408203125, "learning_rate": 4.001792133798221e-06, "loss": 18.3934, "step": 303790 }, { "epoch": 0.6136952209343196, "grad_norm": 420.37811279296875, "learning_rate": 4.001450097427965e-06, "loss": 13.488, "step": 303800 }, { "epoch": 0.6137154215670034, "grad_norm": 249.7445068359375, "learning_rate": 4.001108065924521e-06, "loss": 24.2968, "step": 303810 }, { "epoch": 0.6137356221996872, "grad_norm": 268.2642822265625, "learning_rate": 4.000766039289554e-06, "loss": 23.1588, "step": 303820 }, { "epoch": 0.6137558228323711, "grad_norm": 158.5491485595703, "learning_rate": 4.000424017524732e-06, "loss": 19.1991, "step": 303830 }, { "epoch": 0.6137760234650549, "grad_norm": 295.2488098144531, "learning_rate": 4.000082000631724e-06, "loss": 15.5372, "step": 303840 }, { "epoch": 0.6137962240977387, "grad_norm": 255.60597229003906, "learning_rate": 3.999739988612192e-06, "loss": 17.8267, "step": 303850 }, { "epoch": 0.6138164247304225, "grad_norm": 2.9543447494506836, "learning_rate": 3.999397981467808e-06, "loss": 9.9864, "step": 303860 }, { "epoch": 0.6138366253631063, "grad_norm": 523.4191284179688, "learning_rate": 3.999055979200238e-06, "loss": 20.1841, "step": 303870 }, { "epoch": 0.6138568259957902, "grad_norm": 101.53417205810547, "learning_rate": 3.998713981811145e-06, "loss": 19.6823, "step": 303880 }, { "epoch": 0.613877026628474, "grad_norm": 1567.840087890625, "learning_rate": 3.9983719893022e-06, "loss": 20.6828, "step": 303890 }, { "epoch": 0.6138972272611578, "grad_norm": 364.6643981933594, "learning_rate": 3.9980300016750696e-06, "loss": 14.7878, "step": 303900 }, { "epoch": 0.6139174278938416, "grad_norm": 335.1248779296875, "learning_rate": 3.997688018931418e-06, "loss": 19.222, "step": 303910 }, { "epoch": 0.6139376285265254, "grad_norm": 0.0, "learning_rate": 3.997346041072912e-06, "loss": 22.3278, "step": 303920 }, { "epoch": 0.6139578291592093, "grad_norm": 272.1278381347656, "learning_rate": 3.997004068101224e-06, "loss": 14.0328, "step": 303930 }, { "epoch": 0.6139780297918931, "grad_norm": 79.75018310546875, "learning_rate": 3.9966621000180125e-06, "loss": 25.1082, "step": 303940 }, { "epoch": 0.6139982304245769, "grad_norm": 274.4888916015625, "learning_rate": 3.9963201368249495e-06, "loss": 24.5026, "step": 303950 }, { "epoch": 0.6140184310572607, "grad_norm": 407.2281494140625, "learning_rate": 3.9959781785237e-06, "loss": 20.6957, "step": 303960 }, { "epoch": 0.6140386316899445, "grad_norm": 350.5308837890625, "learning_rate": 3.995636225115933e-06, "loss": 19.2128, "step": 303970 }, { "epoch": 0.6140588323226284, "grad_norm": 265.3211975097656, "learning_rate": 3.995294276603312e-06, "loss": 20.2212, "step": 303980 }, { "epoch": 0.6140790329553122, "grad_norm": 556.0194091796875, "learning_rate": 3.9949523329875025e-06, "loss": 26.8115, "step": 303990 }, { "epoch": 0.614099233587996, "grad_norm": 324.6321105957031, "learning_rate": 3.994610394270178e-06, "loss": 18.8246, "step": 304000 }, { "epoch": 0.6141194342206798, "grad_norm": 3.5997114181518555, "learning_rate": 3.994268460452997e-06, "loss": 18.0958, "step": 304010 }, { "epoch": 0.6141396348533636, "grad_norm": 308.4197082519531, "learning_rate": 3.993926531537631e-06, "loss": 10.9138, "step": 304020 }, { "epoch": 0.6141598354860475, "grad_norm": 378.4521789550781, "learning_rate": 3.993584607525745e-06, "loss": 20.8312, "step": 304030 }, { "epoch": 0.6141800361187313, "grad_norm": 154.62342834472656, "learning_rate": 3.993242688419006e-06, "loss": 21.6376, "step": 304040 }, { "epoch": 0.6142002367514151, "grad_norm": 414.7979431152344, "learning_rate": 3.992900774219078e-06, "loss": 22.4774, "step": 304050 }, { "epoch": 0.6142204373840988, "grad_norm": 559.0978393554688, "learning_rate": 3.992558864927633e-06, "loss": 26.6666, "step": 304060 }, { "epoch": 0.6142406380167826, "grad_norm": 183.42886352539062, "learning_rate": 3.9922169605463305e-06, "loss": 10.925, "step": 304070 }, { "epoch": 0.6142608386494665, "grad_norm": 217.54473876953125, "learning_rate": 3.991875061076841e-06, "loss": 17.1213, "step": 304080 }, { "epoch": 0.6142810392821503, "grad_norm": 192.2228240966797, "learning_rate": 3.991533166520832e-06, "loss": 17.7436, "step": 304090 }, { "epoch": 0.6143012399148341, "grad_norm": 403.9972839355469, "learning_rate": 3.991191276879966e-06, "loss": 27.2213, "step": 304100 }, { "epoch": 0.6143214405475179, "grad_norm": 367.00299072265625, "learning_rate": 3.990849392155912e-06, "loss": 14.5086, "step": 304110 }, { "epoch": 0.6143416411802017, "grad_norm": 497.26043701171875, "learning_rate": 3.990507512350336e-06, "loss": 31.919, "step": 304120 }, { "epoch": 0.6143618418128856, "grad_norm": 462.1752014160156, "learning_rate": 3.990165637464904e-06, "loss": 25.6189, "step": 304130 }, { "epoch": 0.6143820424455694, "grad_norm": 267.2065124511719, "learning_rate": 3.9898237675012815e-06, "loss": 15.9635, "step": 304140 }, { "epoch": 0.6144022430782532, "grad_norm": 283.1322326660156, "learning_rate": 3.989481902461135e-06, "loss": 24.4795, "step": 304150 }, { "epoch": 0.614422443710937, "grad_norm": 172.75286865234375, "learning_rate": 3.989140042346134e-06, "loss": 16.6167, "step": 304160 }, { "epoch": 0.6144426443436208, "grad_norm": 429.82598876953125, "learning_rate": 3.988798187157939e-06, "loss": 18.2117, "step": 304170 }, { "epoch": 0.6144628449763047, "grad_norm": 251.40528869628906, "learning_rate": 3.988456336898219e-06, "loss": 13.3678, "step": 304180 }, { "epoch": 0.6144830456089885, "grad_norm": 383.9817199707031, "learning_rate": 3.988114491568642e-06, "loss": 18.8584, "step": 304190 }, { "epoch": 0.6145032462416723, "grad_norm": 335.6404113769531, "learning_rate": 3.987772651170871e-06, "loss": 39.2626, "step": 304200 }, { "epoch": 0.6145234468743561, "grad_norm": 209.65673828125, "learning_rate": 3.9874308157065735e-06, "loss": 22.2432, "step": 304210 }, { "epoch": 0.6145436475070399, "grad_norm": 445.579833984375, "learning_rate": 3.987088985177417e-06, "loss": 22.2905, "step": 304220 }, { "epoch": 0.6145638481397238, "grad_norm": 18.512432098388672, "learning_rate": 3.986747159585063e-06, "loss": 23.2755, "step": 304230 }, { "epoch": 0.6145840487724076, "grad_norm": 371.87274169921875, "learning_rate": 3.986405338931182e-06, "loss": 27.5295, "step": 304240 }, { "epoch": 0.6146042494050914, "grad_norm": 384.5657043457031, "learning_rate": 3.986063523217439e-06, "loss": 17.4216, "step": 304250 }, { "epoch": 0.6146244500377752, "grad_norm": 39.9920768737793, "learning_rate": 3.9857217124454985e-06, "loss": 25.4616, "step": 304260 }, { "epoch": 0.614644650670459, "grad_norm": 0.0, "learning_rate": 3.985379906617027e-06, "loss": 21.3613, "step": 304270 }, { "epoch": 0.6146648513031429, "grad_norm": 395.71429443359375, "learning_rate": 3.985038105733691e-06, "loss": 18.622, "step": 304280 }, { "epoch": 0.6146850519358267, "grad_norm": 11.261442184448242, "learning_rate": 3.984696309797157e-06, "loss": 12.1378, "step": 304290 }, { "epoch": 0.6147052525685105, "grad_norm": 199.3360595703125, "learning_rate": 3.98435451880909e-06, "loss": 38.7362, "step": 304300 }, { "epoch": 0.6147254532011943, "grad_norm": 218.71835327148438, "learning_rate": 3.984012732771154e-06, "loss": 14.0924, "step": 304310 }, { "epoch": 0.614745653833878, "grad_norm": 534.924072265625, "learning_rate": 3.98367095168502e-06, "loss": 17.0713, "step": 304320 }, { "epoch": 0.6147658544665618, "grad_norm": 227.94003295898438, "learning_rate": 3.983329175552348e-06, "loss": 18.7763, "step": 304330 }, { "epoch": 0.6147860550992457, "grad_norm": 359.94989013671875, "learning_rate": 3.9829874043748064e-06, "loss": 14.03, "step": 304340 }, { "epoch": 0.6148062557319295, "grad_norm": 330.6523742675781, "learning_rate": 3.982645638154062e-06, "loss": 18.2002, "step": 304350 }, { "epoch": 0.6148264563646133, "grad_norm": 238.61209106445312, "learning_rate": 3.982303876891778e-06, "loss": 18.9135, "step": 304360 }, { "epoch": 0.6148466569972971, "grad_norm": 138.2786407470703, "learning_rate": 3.981962120589623e-06, "loss": 20.403, "step": 304370 }, { "epoch": 0.614866857629981, "grad_norm": 128.43121337890625, "learning_rate": 3.981620369249261e-06, "loss": 11.3467, "step": 304380 }, { "epoch": 0.6148870582626648, "grad_norm": 478.43115234375, "learning_rate": 3.981278622872357e-06, "loss": 15.0125, "step": 304390 }, { "epoch": 0.6149072588953486, "grad_norm": 269.7008972167969, "learning_rate": 3.980936881460576e-06, "loss": 41.3204, "step": 304400 }, { "epoch": 0.6149274595280324, "grad_norm": 145.97987365722656, "learning_rate": 3.980595145015588e-06, "loss": 9.0604, "step": 304410 }, { "epoch": 0.6149476601607162, "grad_norm": 151.25381469726562, "learning_rate": 3.9802534135390544e-06, "loss": 7.6668, "step": 304420 }, { "epoch": 0.6149678607934, "grad_norm": 471.8896484375, "learning_rate": 3.979911687032642e-06, "loss": 15.5134, "step": 304430 }, { "epoch": 0.6149880614260839, "grad_norm": 40.33194351196289, "learning_rate": 3.979569965498016e-06, "loss": 14.0003, "step": 304440 }, { "epoch": 0.6150082620587677, "grad_norm": 345.1069030761719, "learning_rate": 3.979228248936843e-06, "loss": 38.5977, "step": 304450 }, { "epoch": 0.6150284626914515, "grad_norm": 480.0616455078125, "learning_rate": 3.978886537350786e-06, "loss": 27.5864, "step": 304460 }, { "epoch": 0.6150486633241353, "grad_norm": 255.13621520996094, "learning_rate": 3.978544830741513e-06, "loss": 17.7348, "step": 304470 }, { "epoch": 0.6150688639568191, "grad_norm": 562.545654296875, "learning_rate": 3.9782031291106895e-06, "loss": 18.4887, "step": 304480 }, { "epoch": 0.615089064589503, "grad_norm": 8.470002174377441, "learning_rate": 3.97786143245998e-06, "loss": 22.0551, "step": 304490 }, { "epoch": 0.6151092652221868, "grad_norm": 301.8929138183594, "learning_rate": 3.977519740791049e-06, "loss": 15.5956, "step": 304500 }, { "epoch": 0.6151294658548706, "grad_norm": 280.8388671875, "learning_rate": 3.977178054105564e-06, "loss": 21.6675, "step": 304510 }, { "epoch": 0.6151496664875544, "grad_norm": 804.147705078125, "learning_rate": 3.9768363724051875e-06, "loss": 16.979, "step": 304520 }, { "epoch": 0.6151698671202382, "grad_norm": 222.47317504882812, "learning_rate": 3.976494695691586e-06, "loss": 23.4734, "step": 304530 }, { "epoch": 0.6151900677529221, "grad_norm": 130.9874725341797, "learning_rate": 3.976153023966428e-06, "loss": 15.2954, "step": 304540 }, { "epoch": 0.6152102683856059, "grad_norm": 301.4122619628906, "learning_rate": 3.9758113572313735e-06, "loss": 15.6254, "step": 304550 }, { "epoch": 0.6152304690182897, "grad_norm": 432.7672424316406, "learning_rate": 3.975469695488091e-06, "loss": 28.8893, "step": 304560 }, { "epoch": 0.6152506696509734, "grad_norm": 211.76853942871094, "learning_rate": 3.975128038738245e-06, "loss": 26.3517, "step": 304570 }, { "epoch": 0.6152708702836572, "grad_norm": 50.22599411010742, "learning_rate": 3.974786386983501e-06, "loss": 20.3054, "step": 304580 }, { "epoch": 0.615291070916341, "grad_norm": 131.66336059570312, "learning_rate": 3.974444740225524e-06, "loss": 16.2291, "step": 304590 }, { "epoch": 0.6153112715490249, "grad_norm": 283.45172119140625, "learning_rate": 3.974103098465976e-06, "loss": 12.6837, "step": 304600 }, { "epoch": 0.6153314721817087, "grad_norm": 256.03302001953125, "learning_rate": 3.97376146170653e-06, "loss": 17.3728, "step": 304610 }, { "epoch": 0.6153516728143925, "grad_norm": 545.793212890625, "learning_rate": 3.973419829948843e-06, "loss": 21.9407, "step": 304620 }, { "epoch": 0.6153718734470763, "grad_norm": 1102.090576171875, "learning_rate": 3.973078203194584e-06, "loss": 19.4214, "step": 304630 }, { "epoch": 0.6153920740797602, "grad_norm": 216.1559600830078, "learning_rate": 3.972736581445418e-06, "loss": 14.5676, "step": 304640 }, { "epoch": 0.615412274712444, "grad_norm": 413.9364929199219, "learning_rate": 3.972394964703008e-06, "loss": 23.2032, "step": 304650 }, { "epoch": 0.6154324753451278, "grad_norm": 186.80946350097656, "learning_rate": 3.97205335296902e-06, "loss": 21.2076, "step": 304660 }, { "epoch": 0.6154526759778116, "grad_norm": 265.9928894042969, "learning_rate": 3.971711746245122e-06, "loss": 18.8444, "step": 304670 }, { "epoch": 0.6154728766104954, "grad_norm": 120.10843658447266, "learning_rate": 3.971370144532973e-06, "loss": 12.5545, "step": 304680 }, { "epoch": 0.6154930772431793, "grad_norm": 229.55987548828125, "learning_rate": 3.971028547834241e-06, "loss": 14.2739, "step": 304690 }, { "epoch": 0.6155132778758631, "grad_norm": 251.9241180419922, "learning_rate": 3.970686956150595e-06, "loss": 16.433, "step": 304700 }, { "epoch": 0.6155334785085469, "grad_norm": 299.3033752441406, "learning_rate": 3.970345369483693e-06, "loss": 14.9389, "step": 304710 }, { "epoch": 0.6155536791412307, "grad_norm": 405.1098327636719, "learning_rate": 3.970003787835203e-06, "loss": 17.4654, "step": 304720 }, { "epoch": 0.6155738797739145, "grad_norm": 320.255126953125, "learning_rate": 3.969662211206789e-06, "loss": 19.6432, "step": 304730 }, { "epoch": 0.6155940804065984, "grad_norm": 222.3494415283203, "learning_rate": 3.969320639600118e-06, "loss": 17.5295, "step": 304740 }, { "epoch": 0.6156142810392822, "grad_norm": 549.2926635742188, "learning_rate": 3.968979073016853e-06, "loss": 19.9787, "step": 304750 }, { "epoch": 0.615634481671966, "grad_norm": 102.84200286865234, "learning_rate": 3.968637511458657e-06, "loss": 15.3183, "step": 304760 }, { "epoch": 0.6156546823046498, "grad_norm": 169.35118103027344, "learning_rate": 3.9682959549272e-06, "loss": 15.4054, "step": 304770 }, { "epoch": 0.6156748829373336, "grad_norm": 205.1328582763672, "learning_rate": 3.9679544034241406e-06, "loss": 15.2751, "step": 304780 }, { "epoch": 0.6156950835700175, "grad_norm": 15.390819549560547, "learning_rate": 3.967612856951146e-06, "loss": 13.1561, "step": 304790 }, { "epoch": 0.6157152842027013, "grad_norm": 336.6370544433594, "learning_rate": 3.967271315509884e-06, "loss": 30.15, "step": 304800 }, { "epoch": 0.6157354848353851, "grad_norm": 0.0, "learning_rate": 3.966929779102015e-06, "loss": 28.8895, "step": 304810 }, { "epoch": 0.6157556854680689, "grad_norm": 178.34214782714844, "learning_rate": 3.9665882477292036e-06, "loss": 16.599, "step": 304820 }, { "epoch": 0.6157758861007526, "grad_norm": 625.7208862304688, "learning_rate": 3.966246721393118e-06, "loss": 15.2445, "step": 304830 }, { "epoch": 0.6157960867334364, "grad_norm": 518.2688598632812, "learning_rate": 3.965905200095419e-06, "loss": 30.8544, "step": 304840 }, { "epoch": 0.6158162873661203, "grad_norm": 143.34950256347656, "learning_rate": 3.965563683837772e-06, "loss": 14.1829, "step": 304850 }, { "epoch": 0.6158364879988041, "grad_norm": 195.54823303222656, "learning_rate": 3.965222172621844e-06, "loss": 21.4858, "step": 304860 }, { "epoch": 0.6158566886314879, "grad_norm": 450.74066162109375, "learning_rate": 3.964880666449296e-06, "loss": 28.5496, "step": 304870 }, { "epoch": 0.6158768892641717, "grad_norm": 192.30154418945312, "learning_rate": 3.964539165321795e-06, "loss": 24.8401, "step": 304880 }, { "epoch": 0.6158970898968555, "grad_norm": 628.0191650390625, "learning_rate": 3.964197669241004e-06, "loss": 19.3285, "step": 304890 }, { "epoch": 0.6159172905295394, "grad_norm": 16.403432846069336, "learning_rate": 3.963856178208588e-06, "loss": 7.2873, "step": 304900 }, { "epoch": 0.6159374911622232, "grad_norm": 518.9742431640625, "learning_rate": 3.963514692226212e-06, "loss": 15.9864, "step": 304910 }, { "epoch": 0.615957691794907, "grad_norm": 228.70155334472656, "learning_rate": 3.963173211295538e-06, "loss": 20.6585, "step": 304920 }, { "epoch": 0.6159778924275908, "grad_norm": 282.86383056640625, "learning_rate": 3.962831735418235e-06, "loss": 20.7208, "step": 304930 }, { "epoch": 0.6159980930602746, "grad_norm": 222.43106079101562, "learning_rate": 3.962490264595961e-06, "loss": 22.8038, "step": 304940 }, { "epoch": 0.6160182936929585, "grad_norm": 334.6487121582031, "learning_rate": 3.962148798830385e-06, "loss": 9.6963, "step": 304950 }, { "epoch": 0.6160384943256423, "grad_norm": 258.1534729003906, "learning_rate": 3.9618073381231705e-06, "loss": 31.4712, "step": 304960 }, { "epoch": 0.6160586949583261, "grad_norm": 279.94378662109375, "learning_rate": 3.9614658824759815e-06, "loss": 27.8578, "step": 304970 }, { "epoch": 0.6160788955910099, "grad_norm": 119.04959869384766, "learning_rate": 3.96112443189048e-06, "loss": 24.3361, "step": 304980 }, { "epoch": 0.6160990962236937, "grad_norm": 89.23475646972656, "learning_rate": 3.960782986368334e-06, "loss": 17.7106, "step": 304990 }, { "epoch": 0.6161192968563776, "grad_norm": 74.45112609863281, "learning_rate": 3.960441545911205e-06, "loss": 20.1002, "step": 305000 }, { "epoch": 0.6161394974890614, "grad_norm": 47.54545593261719, "learning_rate": 3.960100110520756e-06, "loss": 11.4118, "step": 305010 }, { "epoch": 0.6161596981217452, "grad_norm": 432.5112609863281, "learning_rate": 3.9597586801986544e-06, "loss": 15.1238, "step": 305020 }, { "epoch": 0.616179898754429, "grad_norm": 269.0662536621094, "learning_rate": 3.959417254946563e-06, "loss": 20.4096, "step": 305030 }, { "epoch": 0.6162000993871128, "grad_norm": 421.74267578125, "learning_rate": 3.9590758347661465e-06, "loss": 10.8944, "step": 305040 }, { "epoch": 0.6162203000197967, "grad_norm": 75.48063659667969, "learning_rate": 3.9587344196590665e-06, "loss": 16.4413, "step": 305050 }, { "epoch": 0.6162405006524805, "grad_norm": 300.36785888671875, "learning_rate": 3.95839300962699e-06, "loss": 7.9627, "step": 305060 }, { "epoch": 0.6162607012851643, "grad_norm": 439.3934326171875, "learning_rate": 3.958051604671579e-06, "loss": 19.1489, "step": 305070 }, { "epoch": 0.616280901917848, "grad_norm": 221.34597778320312, "learning_rate": 3.957710204794497e-06, "loss": 11.4541, "step": 305080 }, { "epoch": 0.6163011025505318, "grad_norm": 208.64430236816406, "learning_rate": 3.95736880999741e-06, "loss": 17.5262, "step": 305090 }, { "epoch": 0.6163213031832157, "grad_norm": 298.9476623535156, "learning_rate": 3.957027420281981e-06, "loss": 24.536, "step": 305100 }, { "epoch": 0.6163415038158995, "grad_norm": 183.13394165039062, "learning_rate": 3.956686035649874e-06, "loss": 19.4481, "step": 305110 }, { "epoch": 0.6163617044485833, "grad_norm": 141.32566833496094, "learning_rate": 3.956344656102754e-06, "loss": 12.1898, "step": 305120 }, { "epoch": 0.6163819050812671, "grad_norm": 483.9979553222656, "learning_rate": 3.9560032816422825e-06, "loss": 21.239, "step": 305130 }, { "epoch": 0.6164021057139509, "grad_norm": 176.9670867919922, "learning_rate": 3.955661912270123e-06, "loss": 19.6266, "step": 305140 }, { "epoch": 0.6164223063466348, "grad_norm": 244.12843322753906, "learning_rate": 3.955320547987943e-06, "loss": 12.4107, "step": 305150 }, { "epoch": 0.6164425069793186, "grad_norm": 592.330322265625, "learning_rate": 3.954979188797402e-06, "loss": 33.184, "step": 305160 }, { "epoch": 0.6164627076120024, "grad_norm": 438.67523193359375, "learning_rate": 3.954637834700166e-06, "loss": 17.6582, "step": 305170 }, { "epoch": 0.6164829082446862, "grad_norm": 322.2751159667969, "learning_rate": 3.954296485697899e-06, "loss": 19.7545, "step": 305180 }, { "epoch": 0.61650310887737, "grad_norm": 188.514892578125, "learning_rate": 3.953955141792264e-06, "loss": 11.0684, "step": 305190 }, { "epoch": 0.6165233095100539, "grad_norm": 499.8799743652344, "learning_rate": 3.9536138029849244e-06, "loss": 23.1779, "step": 305200 }, { "epoch": 0.6165435101427377, "grad_norm": 348.1578674316406, "learning_rate": 3.953272469277544e-06, "loss": 22.734, "step": 305210 }, { "epoch": 0.6165637107754215, "grad_norm": 238.52215576171875, "learning_rate": 3.952931140671789e-06, "loss": 16.7239, "step": 305220 }, { "epoch": 0.6165839114081053, "grad_norm": 211.25155639648438, "learning_rate": 3.9525898171693175e-06, "loss": 30.2442, "step": 305230 }, { "epoch": 0.6166041120407891, "grad_norm": 1.3977607488632202, "learning_rate": 3.952248498771797e-06, "loss": 15.0786, "step": 305240 }, { "epoch": 0.616624312673473, "grad_norm": 185.7633056640625, "learning_rate": 3.951907185480892e-06, "loss": 12.7904, "step": 305250 }, { "epoch": 0.6166445133061568, "grad_norm": 215.534912109375, "learning_rate": 3.9515658772982625e-06, "loss": 20.0435, "step": 305260 }, { "epoch": 0.6166647139388406, "grad_norm": 454.0273132324219, "learning_rate": 3.951224574225574e-06, "loss": 21.7461, "step": 305270 }, { "epoch": 0.6166849145715244, "grad_norm": 452.2432861328125, "learning_rate": 3.950883276264491e-06, "loss": 24.6903, "step": 305280 }, { "epoch": 0.6167051152042082, "grad_norm": 171.3167724609375, "learning_rate": 3.950541983416675e-06, "loss": 19.927, "step": 305290 }, { "epoch": 0.616725315836892, "grad_norm": 410.5175476074219, "learning_rate": 3.950200695683788e-06, "loss": 11.7182, "step": 305300 }, { "epoch": 0.6167455164695759, "grad_norm": 253.60366821289062, "learning_rate": 3.9498594130674985e-06, "loss": 13.394, "step": 305310 }, { "epoch": 0.6167657171022597, "grad_norm": 522.223876953125, "learning_rate": 3.949518135569465e-06, "loss": 26.6336, "step": 305320 }, { "epoch": 0.6167859177349435, "grad_norm": 91.65657806396484, "learning_rate": 3.949176863191353e-06, "loss": 9.8163, "step": 305330 }, { "epoch": 0.6168061183676272, "grad_norm": 50.497615814208984, "learning_rate": 3.948835595934826e-06, "loss": 9.9528, "step": 305340 }, { "epoch": 0.616826319000311, "grad_norm": 21.22364616394043, "learning_rate": 3.9484943338015465e-06, "loss": 15.4545, "step": 305350 }, { "epoch": 0.6168465196329949, "grad_norm": 135.08160400390625, "learning_rate": 3.948153076793179e-06, "loss": 24.1537, "step": 305360 }, { "epoch": 0.6168667202656787, "grad_norm": 444.0975341796875, "learning_rate": 3.947811824911383e-06, "loss": 12.6911, "step": 305370 }, { "epoch": 0.6168869208983625, "grad_norm": 167.71820068359375, "learning_rate": 3.947470578157829e-06, "loss": 12.124, "step": 305380 }, { "epoch": 0.6169071215310463, "grad_norm": 369.75543212890625, "learning_rate": 3.9471293365341716e-06, "loss": 17.2049, "step": 305390 }, { "epoch": 0.6169273221637301, "grad_norm": 51.165740966796875, "learning_rate": 3.94678810004208e-06, "loss": 26.9628, "step": 305400 }, { "epoch": 0.616947522796414, "grad_norm": 12.910235404968262, "learning_rate": 3.946446868683216e-06, "loss": 21.068, "step": 305410 }, { "epoch": 0.6169677234290978, "grad_norm": 373.57427978515625, "learning_rate": 3.946105642459241e-06, "loss": 12.0611, "step": 305420 }, { "epoch": 0.6169879240617816, "grad_norm": 170.7071990966797, "learning_rate": 3.9457644213718195e-06, "loss": 14.1581, "step": 305430 }, { "epoch": 0.6170081246944654, "grad_norm": 460.3855285644531, "learning_rate": 3.945423205422616e-06, "loss": 15.9806, "step": 305440 }, { "epoch": 0.6170283253271492, "grad_norm": 359.9859619140625, "learning_rate": 3.94508199461329e-06, "loss": 18.5477, "step": 305450 }, { "epoch": 0.6170485259598331, "grad_norm": 695.4000854492188, "learning_rate": 3.9447407889455054e-06, "loss": 31.6696, "step": 305460 }, { "epoch": 0.6170687265925169, "grad_norm": 356.8310852050781, "learning_rate": 3.944399588420928e-06, "loss": 12.73, "step": 305470 }, { "epoch": 0.6170889272252007, "grad_norm": 330.8516845703125, "learning_rate": 3.944058393041219e-06, "loss": 18.5961, "step": 305480 }, { "epoch": 0.6171091278578845, "grad_norm": 177.11868286132812, "learning_rate": 3.943717202808041e-06, "loss": 20.9808, "step": 305490 }, { "epoch": 0.6171293284905683, "grad_norm": 340.6443786621094, "learning_rate": 3.943376017723058e-06, "loss": 28.7177, "step": 305500 }, { "epoch": 0.6171495291232522, "grad_norm": 201.56466674804688, "learning_rate": 3.9430348377879315e-06, "loss": 10.7044, "step": 305510 }, { "epoch": 0.617169729755936, "grad_norm": 177.49868774414062, "learning_rate": 3.942693663004324e-06, "loss": 33.2082, "step": 305520 }, { "epoch": 0.6171899303886198, "grad_norm": 190.2554473876953, "learning_rate": 3.942352493373899e-06, "loss": 12.1064, "step": 305530 }, { "epoch": 0.6172101310213036, "grad_norm": 209.1536865234375, "learning_rate": 3.9420113288983235e-06, "loss": 21.3282, "step": 305540 }, { "epoch": 0.6172303316539874, "grad_norm": 223.57810974121094, "learning_rate": 3.941670169579252e-06, "loss": 12.6712, "step": 305550 }, { "epoch": 0.6172505322866713, "grad_norm": 424.9971008300781, "learning_rate": 3.9413290154183536e-06, "loss": 21.8203, "step": 305560 }, { "epoch": 0.6172707329193551, "grad_norm": 214.12815856933594, "learning_rate": 3.94098786641729e-06, "loss": 12.5897, "step": 305570 }, { "epoch": 0.6172909335520389, "grad_norm": 40.39407730102539, "learning_rate": 3.940646722577722e-06, "loss": 5.7521, "step": 305580 }, { "epoch": 0.6173111341847227, "grad_norm": 300.8336486816406, "learning_rate": 3.940305583901314e-06, "loss": 20.4113, "step": 305590 }, { "epoch": 0.6173313348174064, "grad_norm": 57.11048126220703, "learning_rate": 3.939964450389728e-06, "loss": 12.5926, "step": 305600 }, { "epoch": 0.6173515354500902, "grad_norm": 448.09088134765625, "learning_rate": 3.939623322044627e-06, "loss": 25.2387, "step": 305610 }, { "epoch": 0.6173717360827741, "grad_norm": 399.2313232421875, "learning_rate": 3.9392821988676715e-06, "loss": 13.5341, "step": 305620 }, { "epoch": 0.6173919367154579, "grad_norm": 116.9328842163086, "learning_rate": 3.938941080860527e-06, "loss": 29.1785, "step": 305630 }, { "epoch": 0.6174121373481417, "grad_norm": 201.0056610107422, "learning_rate": 3.938599968024855e-06, "loss": 13.8283, "step": 305640 }, { "epoch": 0.6174323379808255, "grad_norm": 659.1585083007812, "learning_rate": 3.938258860362319e-06, "loss": 19.4256, "step": 305650 }, { "epoch": 0.6174525386135093, "grad_norm": 504.7539978027344, "learning_rate": 3.937917757874579e-06, "loss": 29.0006, "step": 305660 }, { "epoch": 0.6174727392461932, "grad_norm": 321.6143798828125, "learning_rate": 3.9375766605633005e-06, "loss": 21.808, "step": 305670 }, { "epoch": 0.617492939878877, "grad_norm": 241.0337677001953, "learning_rate": 3.937235568430143e-06, "loss": 18.0898, "step": 305680 }, { "epoch": 0.6175131405115608, "grad_norm": 45.4260368347168, "learning_rate": 3.9368944814767704e-06, "loss": 21.88, "step": 305690 }, { "epoch": 0.6175333411442446, "grad_norm": 657.54931640625, "learning_rate": 3.936553399704848e-06, "loss": 17.7174, "step": 305700 }, { "epoch": 0.6175535417769284, "grad_norm": 590.8599243164062, "learning_rate": 3.936212323116032e-06, "loss": 26.6341, "step": 305710 }, { "epoch": 0.6175737424096123, "grad_norm": 584.8609619140625, "learning_rate": 3.935871251711989e-06, "loss": 34.2213, "step": 305720 }, { "epoch": 0.6175939430422961, "grad_norm": 160.8706512451172, "learning_rate": 3.935530185494381e-06, "loss": 5.4741, "step": 305730 }, { "epoch": 0.6176141436749799, "grad_norm": 190.96109008789062, "learning_rate": 3.93518912446487e-06, "loss": 24.8564, "step": 305740 }, { "epoch": 0.6176343443076637, "grad_norm": 235.7034454345703, "learning_rate": 3.934848068625117e-06, "loss": 22.7256, "step": 305750 }, { "epoch": 0.6176545449403475, "grad_norm": 238.10548400878906, "learning_rate": 3.934507017976788e-06, "loss": 16.2639, "step": 305760 }, { "epoch": 0.6176747455730314, "grad_norm": 260.1474914550781, "learning_rate": 3.9341659725215395e-06, "loss": 29.3742, "step": 305770 }, { "epoch": 0.6176949462057152, "grad_norm": 416.37353515625, "learning_rate": 3.9338249322610375e-06, "loss": 30.5366, "step": 305780 }, { "epoch": 0.617715146838399, "grad_norm": 148.81581115722656, "learning_rate": 3.933483897196944e-06, "loss": 17.3039, "step": 305790 }, { "epoch": 0.6177353474710828, "grad_norm": 189.60006713867188, "learning_rate": 3.933142867330921e-06, "loss": 18.9617, "step": 305800 }, { "epoch": 0.6177555481037666, "grad_norm": 334.80419921875, "learning_rate": 3.932801842664629e-06, "loss": 25.3259, "step": 305810 }, { "epoch": 0.6177757487364505, "grad_norm": 234.61390686035156, "learning_rate": 3.932460823199732e-06, "loss": 16.932, "step": 305820 }, { "epoch": 0.6177959493691343, "grad_norm": 631.3731689453125, "learning_rate": 3.932119808937892e-06, "loss": 19.9027, "step": 305830 }, { "epoch": 0.6178161500018181, "grad_norm": 379.9443054199219, "learning_rate": 3.9317787998807695e-06, "loss": 15.5665, "step": 305840 }, { "epoch": 0.6178363506345018, "grad_norm": 237.21026611328125, "learning_rate": 3.931437796030028e-06, "loss": 17.6408, "step": 305850 }, { "epoch": 0.6178565512671856, "grad_norm": 140.50453186035156, "learning_rate": 3.93109679738733e-06, "loss": 14.9056, "step": 305860 }, { "epoch": 0.6178767518998695, "grad_norm": 227.11630249023438, "learning_rate": 3.9307558039543355e-06, "loss": 18.4565, "step": 305870 }, { "epoch": 0.6178969525325533, "grad_norm": 141.63327026367188, "learning_rate": 3.930414815732709e-06, "loss": 16.7789, "step": 305880 }, { "epoch": 0.6179171531652371, "grad_norm": 328.6080017089844, "learning_rate": 3.93007383272411e-06, "loss": 19.8446, "step": 305890 }, { "epoch": 0.6179373537979209, "grad_norm": 120.56464385986328, "learning_rate": 3.9297328549302e-06, "loss": 16.741, "step": 305900 }, { "epoch": 0.6179575544306047, "grad_norm": 118.81554412841797, "learning_rate": 3.929391882352643e-06, "loss": 18.9382, "step": 305910 }, { "epoch": 0.6179777550632886, "grad_norm": 165.1092529296875, "learning_rate": 3.929050914993102e-06, "loss": 21.7598, "step": 305920 }, { "epoch": 0.6179979556959724, "grad_norm": 1592.407470703125, "learning_rate": 3.928709952853235e-06, "loss": 13.0876, "step": 305930 }, { "epoch": 0.6180181563286562, "grad_norm": 225.1356201171875, "learning_rate": 3.928368995934706e-06, "loss": 24.722, "step": 305940 }, { "epoch": 0.61803835696134, "grad_norm": 71.59931182861328, "learning_rate": 3.928028044239176e-06, "loss": 12.6054, "step": 305950 }, { "epoch": 0.6180585575940238, "grad_norm": 123.03755950927734, "learning_rate": 3.927687097768309e-06, "loss": 13.6578, "step": 305960 }, { "epoch": 0.6180787582267077, "grad_norm": 264.57470703125, "learning_rate": 3.927346156523764e-06, "loss": 17.9809, "step": 305970 }, { "epoch": 0.6180989588593915, "grad_norm": 228.93267822265625, "learning_rate": 3.927005220507203e-06, "loss": 16.2541, "step": 305980 }, { "epoch": 0.6181191594920753, "grad_norm": 209.06851196289062, "learning_rate": 3.926664289720291e-06, "loss": 25.0559, "step": 305990 }, { "epoch": 0.6181393601247591, "grad_norm": 298.55389404296875, "learning_rate": 3.926323364164684e-06, "loss": 32.6835, "step": 306000 }, { "epoch": 0.6181595607574429, "grad_norm": 183.11880493164062, "learning_rate": 3.925982443842048e-06, "loss": 10.8361, "step": 306010 }, { "epoch": 0.6181797613901268, "grad_norm": 329.8923034667969, "learning_rate": 3.925641528754045e-06, "loss": 17.4323, "step": 306020 }, { "epoch": 0.6181999620228106, "grad_norm": 364.4468994140625, "learning_rate": 3.925300618902332e-06, "loss": 15.4782, "step": 306030 }, { "epoch": 0.6182201626554944, "grad_norm": 193.6592559814453, "learning_rate": 3.924959714288575e-06, "loss": 16.0033, "step": 306040 }, { "epoch": 0.6182403632881782, "grad_norm": 117.53700256347656, "learning_rate": 3.924618814914435e-06, "loss": 2.7977, "step": 306050 }, { "epoch": 0.618260563920862, "grad_norm": 92.81304931640625, "learning_rate": 3.924277920781571e-06, "loss": 10.6817, "step": 306060 }, { "epoch": 0.6182807645535459, "grad_norm": 610.0953369140625, "learning_rate": 3.9239370318916445e-06, "loss": 19.0707, "step": 306070 }, { "epoch": 0.6183009651862297, "grad_norm": 182.5031280517578, "learning_rate": 3.92359614824632e-06, "loss": 23.4202, "step": 306080 }, { "epoch": 0.6183211658189135, "grad_norm": 376.035888671875, "learning_rate": 3.923255269847258e-06, "loss": 21.9551, "step": 306090 }, { "epoch": 0.6183413664515973, "grad_norm": 181.05616760253906, "learning_rate": 3.922914396696118e-06, "loss": 20.3982, "step": 306100 }, { "epoch": 0.618361567084281, "grad_norm": 286.5108947753906, "learning_rate": 3.9225735287945635e-06, "loss": 20.8756, "step": 306110 }, { "epoch": 0.6183817677169648, "grad_norm": 520.08935546875, "learning_rate": 3.922232666144255e-06, "loss": 25.6112, "step": 306120 }, { "epoch": 0.6184019683496487, "grad_norm": 323.1652526855469, "learning_rate": 3.921891808746853e-06, "loss": 21.2778, "step": 306130 }, { "epoch": 0.6184221689823325, "grad_norm": 0.0, "learning_rate": 3.921550956604019e-06, "loss": 20.249, "step": 306140 }, { "epoch": 0.6184423696150163, "grad_norm": 469.04248046875, "learning_rate": 3.921210109717417e-06, "loss": 20.345, "step": 306150 }, { "epoch": 0.6184625702477001, "grad_norm": 129.31651306152344, "learning_rate": 3.920869268088704e-06, "loss": 11.7205, "step": 306160 }, { "epoch": 0.618482770880384, "grad_norm": 301.40203857421875, "learning_rate": 3.920528431719544e-06, "loss": 20.1821, "step": 306170 }, { "epoch": 0.6185029715130678, "grad_norm": 159.47674560546875, "learning_rate": 3.9201876006115985e-06, "loss": 16.823, "step": 306180 }, { "epoch": 0.6185231721457516, "grad_norm": 8.716283798217773, "learning_rate": 3.9198467747665265e-06, "loss": 34.3622, "step": 306190 }, { "epoch": 0.6185433727784354, "grad_norm": 214.84754943847656, "learning_rate": 3.91950595418599e-06, "loss": 31.5351, "step": 306200 }, { "epoch": 0.6185635734111192, "grad_norm": 121.29723358154297, "learning_rate": 3.919165138871652e-06, "loss": 16.1025, "step": 306210 }, { "epoch": 0.618583774043803, "grad_norm": 363.14324951171875, "learning_rate": 3.918824328825171e-06, "loss": 23.8285, "step": 306220 }, { "epoch": 0.6186039746764869, "grad_norm": 391.4519348144531, "learning_rate": 3.918483524048208e-06, "loss": 16.137, "step": 306230 }, { "epoch": 0.6186241753091707, "grad_norm": 140.31028747558594, "learning_rate": 3.9181427245424266e-06, "loss": 34.3479, "step": 306240 }, { "epoch": 0.6186443759418545, "grad_norm": 654.1033935546875, "learning_rate": 3.917801930309486e-06, "loss": 26.4127, "step": 306250 }, { "epoch": 0.6186645765745383, "grad_norm": 148.85430908203125, "learning_rate": 3.9174611413510474e-06, "loss": 15.5625, "step": 306260 }, { "epoch": 0.6186847772072221, "grad_norm": 432.305419921875, "learning_rate": 3.9171203576687725e-06, "loss": 19.0575, "step": 306270 }, { "epoch": 0.618704977839906, "grad_norm": 361.6748352050781, "learning_rate": 3.916779579264322e-06, "loss": 31.501, "step": 306280 }, { "epoch": 0.6187251784725898, "grad_norm": 173.53648376464844, "learning_rate": 3.916438806139355e-06, "loss": 11.4388, "step": 306290 }, { "epoch": 0.6187453791052736, "grad_norm": 2.846980571746826, "learning_rate": 3.9160980382955336e-06, "loss": 26.6505, "step": 306300 }, { "epoch": 0.6187655797379574, "grad_norm": 280.8685607910156, "learning_rate": 3.9157572757345215e-06, "loss": 13.2198, "step": 306310 }, { "epoch": 0.6187857803706412, "grad_norm": 121.65333557128906, "learning_rate": 3.915416518457974e-06, "loss": 16.2801, "step": 306320 }, { "epoch": 0.6188059810033251, "grad_norm": 128.5829315185547, "learning_rate": 3.915075766467556e-06, "loss": 15.9424, "step": 306330 }, { "epoch": 0.6188261816360089, "grad_norm": 435.1832275390625, "learning_rate": 3.914735019764928e-06, "loss": 20.1728, "step": 306340 }, { "epoch": 0.6188463822686927, "grad_norm": 276.8594665527344, "learning_rate": 3.914394278351749e-06, "loss": 17.0881, "step": 306350 }, { "epoch": 0.6188665829013764, "grad_norm": 161.87864685058594, "learning_rate": 3.91405354222968e-06, "loss": 22.5121, "step": 306360 }, { "epoch": 0.6188867835340602, "grad_norm": 54.590213775634766, "learning_rate": 3.913712811400384e-06, "loss": 14.995, "step": 306370 }, { "epoch": 0.6189069841667441, "grad_norm": 271.6815490722656, "learning_rate": 3.913372085865519e-06, "loss": 9.4796, "step": 306380 }, { "epoch": 0.6189271847994279, "grad_norm": 256.6903381347656, "learning_rate": 3.913031365626746e-06, "loss": 16.2445, "step": 306390 }, { "epoch": 0.6189473854321117, "grad_norm": 33.27079772949219, "learning_rate": 3.912690650685726e-06, "loss": 29.314, "step": 306400 }, { "epoch": 0.6189675860647955, "grad_norm": 619.3416748046875, "learning_rate": 3.912349941044122e-06, "loss": 19.6506, "step": 306410 }, { "epoch": 0.6189877866974793, "grad_norm": 186.41400146484375, "learning_rate": 3.912009236703591e-06, "loss": 15.7942, "step": 306420 }, { "epoch": 0.6190079873301632, "grad_norm": 284.1806945800781, "learning_rate": 3.911668537665796e-06, "loss": 7.8367, "step": 306430 }, { "epoch": 0.619028187962847, "grad_norm": 198.03782653808594, "learning_rate": 3.9113278439323965e-06, "loss": 18.0484, "step": 306440 }, { "epoch": 0.6190483885955308, "grad_norm": 524.9952392578125, "learning_rate": 3.9109871555050514e-06, "loss": 23.3029, "step": 306450 }, { "epoch": 0.6190685892282146, "grad_norm": 317.2939147949219, "learning_rate": 3.910646472385423e-06, "loss": 20.9682, "step": 306460 }, { "epoch": 0.6190887898608984, "grad_norm": 139.11680603027344, "learning_rate": 3.910305794575174e-06, "loss": 23.2802, "step": 306470 }, { "epoch": 0.6191089904935823, "grad_norm": 347.69854736328125, "learning_rate": 3.90996512207596e-06, "loss": 26.708, "step": 306480 }, { "epoch": 0.6191291911262661, "grad_norm": 169.5913543701172, "learning_rate": 3.9096244548894445e-06, "loss": 16.2701, "step": 306490 }, { "epoch": 0.6191493917589499, "grad_norm": 511.9338684082031, "learning_rate": 3.909283793017289e-06, "loss": 29.2978, "step": 306500 }, { "epoch": 0.6191695923916337, "grad_norm": 102.9381103515625, "learning_rate": 3.90894313646115e-06, "loss": 19.722, "step": 306510 }, { "epoch": 0.6191897930243175, "grad_norm": 334.4829406738281, "learning_rate": 3.908602485222688e-06, "loss": 10.9484, "step": 306520 }, { "epoch": 0.6192099936570014, "grad_norm": 320.2415771484375, "learning_rate": 3.908261839303568e-06, "loss": 18.7713, "step": 306530 }, { "epoch": 0.6192301942896852, "grad_norm": 377.2544250488281, "learning_rate": 3.9079211987054475e-06, "loss": 21.9845, "step": 306540 }, { "epoch": 0.619250394922369, "grad_norm": 391.4934387207031, "learning_rate": 3.907580563429985e-06, "loss": 13.5573, "step": 306550 }, { "epoch": 0.6192705955550528, "grad_norm": 122.7528305053711, "learning_rate": 3.907239933478843e-06, "loss": 23.7539, "step": 306560 }, { "epoch": 0.6192907961877366, "grad_norm": 389.6560974121094, "learning_rate": 3.906899308853682e-06, "loss": 24.9005, "step": 306570 }, { "epoch": 0.6193109968204205, "grad_norm": 285.6032409667969, "learning_rate": 3.9065586895561605e-06, "loss": 13.0971, "step": 306580 }, { "epoch": 0.6193311974531043, "grad_norm": 532.7347412109375, "learning_rate": 3.906218075587938e-06, "loss": 17.9759, "step": 306590 }, { "epoch": 0.6193513980857881, "grad_norm": 346.3021545410156, "learning_rate": 3.905877466950679e-06, "loss": 13.7559, "step": 306600 }, { "epoch": 0.6193715987184719, "grad_norm": 52.47577667236328, "learning_rate": 3.905536863646037e-06, "loss": 12.4775, "step": 306610 }, { "epoch": 0.6193917993511556, "grad_norm": 244.10885620117188, "learning_rate": 3.905196265675677e-06, "loss": 30.4464, "step": 306620 }, { "epoch": 0.6194119999838394, "grad_norm": 65.07695770263672, "learning_rate": 3.904855673041259e-06, "loss": 13.4974, "step": 306630 }, { "epoch": 0.6194322006165233, "grad_norm": 219.5935821533203, "learning_rate": 3.90451508574444e-06, "loss": 27.4405, "step": 306640 }, { "epoch": 0.6194524012492071, "grad_norm": 373.8103942871094, "learning_rate": 3.904174503786882e-06, "loss": 21.7143, "step": 306650 }, { "epoch": 0.6194726018818909, "grad_norm": 400.4162292480469, "learning_rate": 3.903833927170245e-06, "loss": 18.7065, "step": 306660 }, { "epoch": 0.6194928025145747, "grad_norm": 596.9290161132812, "learning_rate": 3.9034933558961885e-06, "loss": 32.1879, "step": 306670 }, { "epoch": 0.6195130031472585, "grad_norm": 363.76336669921875, "learning_rate": 3.9031527899663705e-06, "loss": 30.448, "step": 306680 }, { "epoch": 0.6195332037799424, "grad_norm": 67.45185089111328, "learning_rate": 3.9028122293824535e-06, "loss": 12.24, "step": 306690 }, { "epoch": 0.6195534044126262, "grad_norm": 533.4208984375, "learning_rate": 3.902471674146099e-06, "loss": 20.2442, "step": 306700 }, { "epoch": 0.61957360504531, "grad_norm": 172.52378845214844, "learning_rate": 3.902131124258962e-06, "loss": 9.1007, "step": 306710 }, { "epoch": 0.6195938056779938, "grad_norm": 303.6745300292969, "learning_rate": 3.901790579722706e-06, "loss": 28.6964, "step": 306720 }, { "epoch": 0.6196140063106776, "grad_norm": 235.04116821289062, "learning_rate": 3.90145004053899e-06, "loss": 16.4848, "step": 306730 }, { "epoch": 0.6196342069433615, "grad_norm": 283.39752197265625, "learning_rate": 3.901109506709472e-06, "loss": 18.6285, "step": 306740 }, { "epoch": 0.6196544075760453, "grad_norm": 394.3387756347656, "learning_rate": 3.900768978235812e-06, "loss": 17.9247, "step": 306750 }, { "epoch": 0.6196746082087291, "grad_norm": 184.34324645996094, "learning_rate": 3.900428455119674e-06, "loss": 9.9521, "step": 306760 }, { "epoch": 0.6196948088414129, "grad_norm": 349.9398498535156, "learning_rate": 3.900087937362711e-06, "loss": 10.4889, "step": 306770 }, { "epoch": 0.6197150094740967, "grad_norm": 477.8321228027344, "learning_rate": 3.899747424966588e-06, "loss": 19.9721, "step": 306780 }, { "epoch": 0.6197352101067806, "grad_norm": 185.40760803222656, "learning_rate": 3.899406917932962e-06, "loss": 14.5079, "step": 306790 }, { "epoch": 0.6197554107394644, "grad_norm": 274.4436950683594, "learning_rate": 3.899066416263493e-06, "loss": 15.9882, "step": 306800 }, { "epoch": 0.6197756113721482, "grad_norm": 537.9502563476562, "learning_rate": 3.898725919959841e-06, "loss": 32.3725, "step": 306810 }, { "epoch": 0.619795812004832, "grad_norm": 345.4900817871094, "learning_rate": 3.898385429023666e-06, "loss": 13.1914, "step": 306820 }, { "epoch": 0.6198160126375158, "grad_norm": 469.9812927246094, "learning_rate": 3.898044943456626e-06, "loss": 27.4257, "step": 306830 }, { "epoch": 0.6198362132701997, "grad_norm": 461.4267578125, "learning_rate": 3.89770446326038e-06, "loss": 38.2657, "step": 306840 }, { "epoch": 0.6198564139028835, "grad_norm": 46.6909294128418, "learning_rate": 3.89736398843659e-06, "loss": 21.8874, "step": 306850 }, { "epoch": 0.6198766145355673, "grad_norm": 211.06100463867188, "learning_rate": 3.897023518986915e-06, "loss": 13.0863, "step": 306860 }, { "epoch": 0.619896815168251, "grad_norm": 527.6475219726562, "learning_rate": 3.896683054913013e-06, "loss": 30.2895, "step": 306870 }, { "epoch": 0.6199170158009348, "grad_norm": 563.4493408203125, "learning_rate": 3.896342596216543e-06, "loss": 21.4077, "step": 306880 }, { "epoch": 0.6199372164336187, "grad_norm": 386.384033203125, "learning_rate": 3.896002142899167e-06, "loss": 12.504, "step": 306890 }, { "epoch": 0.6199574170663025, "grad_norm": 256.8215026855469, "learning_rate": 3.895661694962542e-06, "loss": 13.468, "step": 306900 }, { "epoch": 0.6199776176989863, "grad_norm": 312.50958251953125, "learning_rate": 3.895321252408326e-06, "loss": 36.4803, "step": 306910 }, { "epoch": 0.6199978183316701, "grad_norm": 199.5186004638672, "learning_rate": 3.894980815238184e-06, "loss": 10.4368, "step": 306920 }, { "epoch": 0.6200180189643539, "grad_norm": 398.5443115234375, "learning_rate": 3.894640383453769e-06, "loss": 15.2277, "step": 306930 }, { "epoch": 0.6200382195970378, "grad_norm": 303.25445556640625, "learning_rate": 3.894299957056743e-06, "loss": 25.5356, "step": 306940 }, { "epoch": 0.6200584202297216, "grad_norm": 301.67645263671875, "learning_rate": 3.8939595360487655e-06, "loss": 14.8142, "step": 306950 }, { "epoch": 0.6200786208624054, "grad_norm": 369.89044189453125, "learning_rate": 3.893619120431494e-06, "loss": 22.1066, "step": 306960 }, { "epoch": 0.6200988214950892, "grad_norm": 402.91802978515625, "learning_rate": 3.893278710206589e-06, "loss": 19.9133, "step": 306970 }, { "epoch": 0.620119022127773, "grad_norm": 228.1986846923828, "learning_rate": 3.892938305375712e-06, "loss": 20.1618, "step": 306980 }, { "epoch": 0.6201392227604569, "grad_norm": 269.5574035644531, "learning_rate": 3.892597905940516e-06, "loss": 16.7638, "step": 306990 }, { "epoch": 0.6201594233931407, "grad_norm": 349.4557189941406, "learning_rate": 3.892257511902664e-06, "loss": 16.9979, "step": 307000 }, { "epoch": 0.6201796240258245, "grad_norm": 102.29174041748047, "learning_rate": 3.891917123263815e-06, "loss": 10.6777, "step": 307010 }, { "epoch": 0.6201998246585083, "grad_norm": 295.9165954589844, "learning_rate": 3.891576740025628e-06, "loss": 36.8738, "step": 307020 }, { "epoch": 0.6202200252911921, "grad_norm": 18.980192184448242, "learning_rate": 3.891236362189761e-06, "loss": 12.7489, "step": 307030 }, { "epoch": 0.620240225923876, "grad_norm": 265.3538513183594, "learning_rate": 3.890895989757874e-06, "loss": 21.3802, "step": 307040 }, { "epoch": 0.6202604265565598, "grad_norm": 53.41250228881836, "learning_rate": 3.890555622731626e-06, "loss": 12.3517, "step": 307050 }, { "epoch": 0.6202806271892436, "grad_norm": 259.9350280761719, "learning_rate": 3.890215261112674e-06, "loss": 11.9244, "step": 307060 }, { "epoch": 0.6203008278219274, "grad_norm": 438.7828674316406, "learning_rate": 3.889874904902678e-06, "loss": 12.7854, "step": 307070 }, { "epoch": 0.6203210284546112, "grad_norm": 982.8797607421875, "learning_rate": 3.889534554103299e-06, "loss": 30.3465, "step": 307080 }, { "epoch": 0.620341229087295, "grad_norm": 456.8072204589844, "learning_rate": 3.889194208716192e-06, "loss": 21.4674, "step": 307090 }, { "epoch": 0.6203614297199789, "grad_norm": 465.4453125, "learning_rate": 3.888853868743018e-06, "loss": 19.1703, "step": 307100 }, { "epoch": 0.6203816303526627, "grad_norm": 360.46148681640625, "learning_rate": 3.888513534185438e-06, "loss": 21.3085, "step": 307110 }, { "epoch": 0.6204018309853465, "grad_norm": 160.9456329345703, "learning_rate": 3.888173205045105e-06, "loss": 18.4519, "step": 307120 }, { "epoch": 0.6204220316180302, "grad_norm": 0.9178258776664734, "learning_rate": 3.887832881323681e-06, "loss": 16.7321, "step": 307130 }, { "epoch": 0.620442232250714, "grad_norm": 0.0, "learning_rate": 3.887492563022826e-06, "loss": 14.9353, "step": 307140 }, { "epoch": 0.6204624328833979, "grad_norm": 120.66466522216797, "learning_rate": 3.887152250144197e-06, "loss": 14.4958, "step": 307150 }, { "epoch": 0.6204826335160817, "grad_norm": 485.40374755859375, "learning_rate": 3.886811942689453e-06, "loss": 33.0535, "step": 307160 }, { "epoch": 0.6205028341487655, "grad_norm": 464.27606201171875, "learning_rate": 3.8864716406602525e-06, "loss": 15.2691, "step": 307170 }, { "epoch": 0.6205230347814493, "grad_norm": 527.4126586914062, "learning_rate": 3.886131344058255e-06, "loss": 20.4709, "step": 307180 }, { "epoch": 0.6205432354141331, "grad_norm": 6.754678249359131, "learning_rate": 3.8857910528851175e-06, "loss": 9.9202, "step": 307190 }, { "epoch": 0.620563436046817, "grad_norm": 368.6692810058594, "learning_rate": 3.885450767142498e-06, "loss": 20.8284, "step": 307200 }, { "epoch": 0.6205836366795008, "grad_norm": 367.6383972167969, "learning_rate": 3.8851104868320595e-06, "loss": 24.0959, "step": 307210 }, { "epoch": 0.6206038373121846, "grad_norm": 265.99951171875, "learning_rate": 3.884770211955454e-06, "loss": 20.2314, "step": 307220 }, { "epoch": 0.6206240379448684, "grad_norm": 179.40701293945312, "learning_rate": 3.884429942514345e-06, "loss": 10.7946, "step": 307230 }, { "epoch": 0.6206442385775522, "grad_norm": 159.17857360839844, "learning_rate": 3.884089678510389e-06, "loss": 13.5314, "step": 307240 }, { "epoch": 0.6206644392102361, "grad_norm": 187.47714233398438, "learning_rate": 3.883749419945244e-06, "loss": 14.5845, "step": 307250 }, { "epoch": 0.6206846398429199, "grad_norm": 296.05523681640625, "learning_rate": 3.883409166820569e-06, "loss": 24.1632, "step": 307260 }, { "epoch": 0.6207048404756037, "grad_norm": 322.72454833984375, "learning_rate": 3.883068919138023e-06, "loss": 19.8097, "step": 307270 }, { "epoch": 0.6207250411082875, "grad_norm": 57.570430755615234, "learning_rate": 3.882728676899263e-06, "loss": 25.0841, "step": 307280 }, { "epoch": 0.6207452417409713, "grad_norm": 495.6961669921875, "learning_rate": 3.882388440105947e-06, "loss": 21.4398, "step": 307290 }, { "epoch": 0.6207654423736552, "grad_norm": 130.6812744140625, "learning_rate": 3.882048208759735e-06, "loss": 21.7468, "step": 307300 }, { "epoch": 0.620785643006339, "grad_norm": 245.38528442382812, "learning_rate": 3.8817079828622855e-06, "loss": 21.0555, "step": 307310 }, { "epoch": 0.6208058436390228, "grad_norm": 20.44548225402832, "learning_rate": 3.881367762415255e-06, "loss": 9.2277, "step": 307320 }, { "epoch": 0.6208260442717066, "grad_norm": 704.4786376953125, "learning_rate": 3.881027547420302e-06, "loss": 35.6508, "step": 307330 }, { "epoch": 0.6208462449043904, "grad_norm": 0.0, "learning_rate": 3.880687337879086e-06, "loss": 7.4592, "step": 307340 }, { "epoch": 0.6208664455370743, "grad_norm": 276.523193359375, "learning_rate": 3.880347133793263e-06, "loss": 18.8749, "step": 307350 }, { "epoch": 0.6208866461697581, "grad_norm": 176.84860229492188, "learning_rate": 3.880006935164491e-06, "loss": 11.0034, "step": 307360 }, { "epoch": 0.6209068468024419, "grad_norm": 244.14663696289062, "learning_rate": 3.8796667419944335e-06, "loss": 16.0684, "step": 307370 }, { "epoch": 0.6209270474351257, "grad_norm": 307.1121826171875, "learning_rate": 3.87932655428474e-06, "loss": 23.7145, "step": 307380 }, { "epoch": 0.6209472480678094, "grad_norm": 417.1995544433594, "learning_rate": 3.878986372037074e-06, "loss": 17.3914, "step": 307390 }, { "epoch": 0.6209674487004933, "grad_norm": 127.32433319091797, "learning_rate": 3.8786461952530955e-06, "loss": 28.3173, "step": 307400 }, { "epoch": 0.6209876493331771, "grad_norm": 129.66036987304688, "learning_rate": 3.878306023934457e-06, "loss": 19.8415, "step": 307410 }, { "epoch": 0.6210078499658609, "grad_norm": 368.0888366699219, "learning_rate": 3.877965858082818e-06, "loss": 13.1207, "step": 307420 }, { "epoch": 0.6210280505985447, "grad_norm": 75.2337646484375, "learning_rate": 3.87762569769984e-06, "loss": 17.5971, "step": 307430 }, { "epoch": 0.6210482512312285, "grad_norm": 69.49254608154297, "learning_rate": 3.877285542787176e-06, "loss": 23.1641, "step": 307440 }, { "epoch": 0.6210684518639124, "grad_norm": 233.8582000732422, "learning_rate": 3.876945393346486e-06, "loss": 12.0797, "step": 307450 }, { "epoch": 0.6210886524965962, "grad_norm": 6.205816268920898, "learning_rate": 3.8766052493794286e-06, "loss": 14.5266, "step": 307460 }, { "epoch": 0.62110885312928, "grad_norm": 139.42173767089844, "learning_rate": 3.876265110887662e-06, "loss": 13.7484, "step": 307470 }, { "epoch": 0.6211290537619638, "grad_norm": 304.41009521484375, "learning_rate": 3.875924977872842e-06, "loss": 14.7066, "step": 307480 }, { "epoch": 0.6211492543946476, "grad_norm": 0.0, "learning_rate": 3.875584850336627e-06, "loss": 25.8814, "step": 307490 }, { "epoch": 0.6211694550273315, "grad_norm": 296.00640869140625, "learning_rate": 3.875244728280676e-06, "loss": 10.6843, "step": 307500 }, { "epoch": 0.6211896556600153, "grad_norm": 467.0179443359375, "learning_rate": 3.8749046117066455e-06, "loss": 13.0466, "step": 307510 }, { "epoch": 0.6212098562926991, "grad_norm": 373.14959716796875, "learning_rate": 3.874564500616192e-06, "loss": 14.3156, "step": 307520 }, { "epoch": 0.6212300569253829, "grad_norm": 0.0, "learning_rate": 3.874224395010977e-06, "loss": 10.4328, "step": 307530 }, { "epoch": 0.6212502575580667, "grad_norm": 302.8861389160156, "learning_rate": 3.873884294892654e-06, "loss": 17.7315, "step": 307540 }, { "epoch": 0.6212704581907506, "grad_norm": 390.365234375, "learning_rate": 3.873544200262882e-06, "loss": 13.9235, "step": 307550 }, { "epoch": 0.6212906588234344, "grad_norm": 238.66615295410156, "learning_rate": 3.873204111123321e-06, "loss": 14.8573, "step": 307560 }, { "epoch": 0.6213108594561182, "grad_norm": 142.0887908935547, "learning_rate": 3.872864027475626e-06, "loss": 18.9076, "step": 307570 }, { "epoch": 0.621331060088802, "grad_norm": 208.62063598632812, "learning_rate": 3.872523949321454e-06, "loss": 16.6379, "step": 307580 }, { "epoch": 0.6213512607214858, "grad_norm": 44.814292907714844, "learning_rate": 3.872183876662462e-06, "loss": 26.4601, "step": 307590 }, { "epoch": 0.6213714613541697, "grad_norm": 303.5199890136719, "learning_rate": 3.871843809500313e-06, "loss": 22.8079, "step": 307600 }, { "epoch": 0.6213916619868535, "grad_norm": 34.680694580078125, "learning_rate": 3.871503747836657e-06, "loss": 10.0516, "step": 307610 }, { "epoch": 0.6214118626195373, "grad_norm": 284.672119140625, "learning_rate": 3.8711636916731566e-06, "loss": 18.5806, "step": 307620 }, { "epoch": 0.6214320632522211, "grad_norm": 374.7568664550781, "learning_rate": 3.870823641011467e-06, "loss": 21.3824, "step": 307630 }, { "epoch": 0.6214522638849048, "grad_norm": 204.97398376464844, "learning_rate": 3.870483595853246e-06, "loss": 29.5283, "step": 307640 }, { "epoch": 0.6214724645175886, "grad_norm": 443.0758361816406, "learning_rate": 3.870143556200152e-06, "loss": 18.5894, "step": 307650 }, { "epoch": 0.6214926651502725, "grad_norm": 140.74176025390625, "learning_rate": 3.8698035220538404e-06, "loss": 11.0893, "step": 307660 }, { "epoch": 0.6215128657829563, "grad_norm": 3.694681406021118, "learning_rate": 3.869463493415969e-06, "loss": 29.1234, "step": 307670 }, { "epoch": 0.6215330664156401, "grad_norm": 211.23452758789062, "learning_rate": 3.869123470288195e-06, "loss": 20.4499, "step": 307680 }, { "epoch": 0.6215532670483239, "grad_norm": 468.81842041015625, "learning_rate": 3.868783452672177e-06, "loss": 18.2606, "step": 307690 }, { "epoch": 0.6215734676810077, "grad_norm": 163.2721710205078, "learning_rate": 3.868443440569571e-06, "loss": 23.1014, "step": 307700 }, { "epoch": 0.6215936683136916, "grad_norm": 330.5058898925781, "learning_rate": 3.868103433982034e-06, "loss": 15.8683, "step": 307710 }, { "epoch": 0.6216138689463754, "grad_norm": 371.6576843261719, "learning_rate": 3.867763432911225e-06, "loss": 24.2214, "step": 307720 }, { "epoch": 0.6216340695790592, "grad_norm": 233.36228942871094, "learning_rate": 3.867423437358799e-06, "loss": 16.5363, "step": 307730 }, { "epoch": 0.621654270211743, "grad_norm": 278.19989013671875, "learning_rate": 3.867083447326413e-06, "loss": 19.8851, "step": 307740 }, { "epoch": 0.6216744708444268, "grad_norm": 433.671875, "learning_rate": 3.866743462815724e-06, "loss": 14.4983, "step": 307750 }, { "epoch": 0.6216946714771107, "grad_norm": 187.676025390625, "learning_rate": 3.866403483828392e-06, "loss": 9.9716, "step": 307760 }, { "epoch": 0.6217148721097945, "grad_norm": 242.82997131347656, "learning_rate": 3.866063510366072e-06, "loss": 16.7995, "step": 307770 }, { "epoch": 0.6217350727424783, "grad_norm": 61.38282775878906, "learning_rate": 3.86572354243042e-06, "loss": 22.3902, "step": 307780 }, { "epoch": 0.6217552733751621, "grad_norm": 113.97576141357422, "learning_rate": 3.865383580023094e-06, "loss": 24.4628, "step": 307790 }, { "epoch": 0.6217754740078459, "grad_norm": 160.39501953125, "learning_rate": 3.865043623145751e-06, "loss": 16.7578, "step": 307800 }, { "epoch": 0.6217956746405298, "grad_norm": 222.91017150878906, "learning_rate": 3.864703671800047e-06, "loss": 24.4345, "step": 307810 }, { "epoch": 0.6218158752732136, "grad_norm": 240.78118896484375, "learning_rate": 3.8643637259876415e-06, "loss": 21.2983, "step": 307820 }, { "epoch": 0.6218360759058974, "grad_norm": 187.2924041748047, "learning_rate": 3.864023785710187e-06, "loss": 11.8364, "step": 307830 }, { "epoch": 0.6218562765385812, "grad_norm": 536.9988403320312, "learning_rate": 3.863683850969343e-06, "loss": 17.8358, "step": 307840 }, { "epoch": 0.621876477171265, "grad_norm": 418.7780456542969, "learning_rate": 3.863343921766769e-06, "loss": 17.0945, "step": 307850 }, { "epoch": 0.6218966778039489, "grad_norm": 361.35968017578125, "learning_rate": 3.863003998104117e-06, "loss": 8.2555, "step": 307860 }, { "epoch": 0.6219168784366327, "grad_norm": 118.85611724853516, "learning_rate": 3.862664079983045e-06, "loss": 18.5271, "step": 307870 }, { "epoch": 0.6219370790693165, "grad_norm": 416.3175354003906, "learning_rate": 3.862324167405212e-06, "loss": 23.3311, "step": 307880 }, { "epoch": 0.6219572797020003, "grad_norm": 543.1470947265625, "learning_rate": 3.8619842603722715e-06, "loss": 13.0038, "step": 307890 }, { "epoch": 0.621977480334684, "grad_norm": 296.35711669921875, "learning_rate": 3.86164435888588e-06, "loss": 25.3402, "step": 307900 }, { "epoch": 0.6219976809673678, "grad_norm": 30.66621971130371, "learning_rate": 3.861304462947698e-06, "loss": 10.6037, "step": 307910 }, { "epoch": 0.6220178816000517, "grad_norm": 175.96739196777344, "learning_rate": 3.860964572559381e-06, "loss": 13.5033, "step": 307920 }, { "epoch": 0.6220380822327355, "grad_norm": 10.922056198120117, "learning_rate": 3.860624687722583e-06, "loss": 13.8491, "step": 307930 }, { "epoch": 0.6220582828654193, "grad_norm": 445.810302734375, "learning_rate": 3.860284808438962e-06, "loss": 15.0965, "step": 307940 }, { "epoch": 0.6220784834981031, "grad_norm": 326.8045959472656, "learning_rate": 3.859944934710177e-06, "loss": 23.8901, "step": 307950 }, { "epoch": 0.622098684130787, "grad_norm": 13.565624237060547, "learning_rate": 3.859605066537879e-06, "loss": 19.0306, "step": 307960 }, { "epoch": 0.6221188847634708, "grad_norm": 241.51303100585938, "learning_rate": 3.859265203923728e-06, "loss": 29.2578, "step": 307970 }, { "epoch": 0.6221390853961546, "grad_norm": 49.91804504394531, "learning_rate": 3.858925346869383e-06, "loss": 30.2512, "step": 307980 }, { "epoch": 0.6221592860288384, "grad_norm": 247.2067108154297, "learning_rate": 3.858585495376494e-06, "loss": 16.7159, "step": 307990 }, { "epoch": 0.6221794866615222, "grad_norm": 213.8575897216797, "learning_rate": 3.8582456494467214e-06, "loss": 18.9339, "step": 308000 }, { "epoch": 0.622199687294206, "grad_norm": 156.19007873535156, "learning_rate": 3.857905809081723e-06, "loss": 19.0255, "step": 308010 }, { "epoch": 0.6222198879268899, "grad_norm": 188.07540893554688, "learning_rate": 3.857565974283152e-06, "loss": 20.4675, "step": 308020 }, { "epoch": 0.6222400885595737, "grad_norm": 261.4509582519531, "learning_rate": 3.857226145052665e-06, "loss": 17.3628, "step": 308030 }, { "epoch": 0.6222602891922575, "grad_norm": 63.95356750488281, "learning_rate": 3.856886321391919e-06, "loss": 16.8565, "step": 308040 }, { "epoch": 0.6222804898249413, "grad_norm": 59.160743713378906, "learning_rate": 3.856546503302573e-06, "loss": 13.2862, "step": 308050 }, { "epoch": 0.6223006904576251, "grad_norm": 345.3314208984375, "learning_rate": 3.856206690786278e-06, "loss": 12.6927, "step": 308060 }, { "epoch": 0.622320891090309, "grad_norm": 275.8583984375, "learning_rate": 3.8558668838446935e-06, "loss": 20.6535, "step": 308070 }, { "epoch": 0.6223410917229928, "grad_norm": 315.6982421875, "learning_rate": 3.855527082479477e-06, "loss": 16.9193, "step": 308080 }, { "epoch": 0.6223612923556766, "grad_norm": 478.2763366699219, "learning_rate": 3.85518728669228e-06, "loss": 23.3973, "step": 308090 }, { "epoch": 0.6223814929883604, "grad_norm": 0.0, "learning_rate": 3.854847496484762e-06, "loss": 15.059, "step": 308100 }, { "epoch": 0.6224016936210442, "grad_norm": 272.1664733886719, "learning_rate": 3.85450771185858e-06, "loss": 18.7769, "step": 308110 }, { "epoch": 0.6224218942537281, "grad_norm": 411.6300048828125, "learning_rate": 3.854167932815387e-06, "loss": 21.0439, "step": 308120 }, { "epoch": 0.6224420948864119, "grad_norm": 257.7439880371094, "learning_rate": 3.85382815935684e-06, "loss": 15.9535, "step": 308130 }, { "epoch": 0.6224622955190957, "grad_norm": 116.10952758789062, "learning_rate": 3.853488391484599e-06, "loss": 20.662, "step": 308140 }, { "epoch": 0.6224824961517794, "grad_norm": 508.61029052734375, "learning_rate": 3.853148629200312e-06, "loss": 18.5934, "step": 308150 }, { "epoch": 0.6225026967844632, "grad_norm": 387.45611572265625, "learning_rate": 3.852808872505642e-06, "loss": 16.2777, "step": 308160 }, { "epoch": 0.6225228974171471, "grad_norm": 330.9191589355469, "learning_rate": 3.8524691214022425e-06, "loss": 26.6279, "step": 308170 }, { "epoch": 0.6225430980498309, "grad_norm": 200.01390075683594, "learning_rate": 3.8521293758917684e-06, "loss": 10.6009, "step": 308180 }, { "epoch": 0.6225632986825147, "grad_norm": 245.4202423095703, "learning_rate": 3.851789635975877e-06, "loss": 18.6933, "step": 308190 }, { "epoch": 0.6225834993151985, "grad_norm": 167.76712036132812, "learning_rate": 3.8514499016562216e-06, "loss": 23.2338, "step": 308200 }, { "epoch": 0.6226036999478823, "grad_norm": 439.7986755371094, "learning_rate": 3.851110172934463e-06, "loss": 14.2105, "step": 308210 }, { "epoch": 0.6226239005805662, "grad_norm": 337.0575866699219, "learning_rate": 3.850770449812252e-06, "loss": 14.3114, "step": 308220 }, { "epoch": 0.62264410121325, "grad_norm": 592.26953125, "learning_rate": 3.850430732291248e-06, "loss": 17.5648, "step": 308230 }, { "epoch": 0.6226643018459338, "grad_norm": 101.96550750732422, "learning_rate": 3.850091020373105e-06, "loss": 15.2549, "step": 308240 }, { "epoch": 0.6226845024786176, "grad_norm": 331.58526611328125, "learning_rate": 3.849751314059479e-06, "loss": 31.2377, "step": 308250 }, { "epoch": 0.6227047031113014, "grad_norm": 533.5179443359375, "learning_rate": 3.849411613352024e-06, "loss": 18.0106, "step": 308260 }, { "epoch": 0.6227249037439853, "grad_norm": 208.2753143310547, "learning_rate": 3.8490719182524e-06, "loss": 11.0107, "step": 308270 }, { "epoch": 0.6227451043766691, "grad_norm": 370.33575439453125, "learning_rate": 3.848732228762257e-06, "loss": 15.2775, "step": 308280 }, { "epoch": 0.6227653050093529, "grad_norm": 456.10552978515625, "learning_rate": 3.848392544883254e-06, "loss": 40.4141, "step": 308290 }, { "epoch": 0.6227855056420367, "grad_norm": 269.7329406738281, "learning_rate": 3.8480528666170495e-06, "loss": 17.8819, "step": 308300 }, { "epoch": 0.6228057062747205, "grad_norm": 136.74368286132812, "learning_rate": 3.847713193965291e-06, "loss": 15.929, "step": 308310 }, { "epoch": 0.6228259069074044, "grad_norm": 357.62725830078125, "learning_rate": 3.84737352692964e-06, "loss": 16.8502, "step": 308320 }, { "epoch": 0.6228461075400882, "grad_norm": 277.4453125, "learning_rate": 3.847033865511752e-06, "loss": 22.3991, "step": 308330 }, { "epoch": 0.622866308172772, "grad_norm": 225.10763549804688, "learning_rate": 3.84669420971328e-06, "loss": 10.3757, "step": 308340 }, { "epoch": 0.6228865088054558, "grad_norm": 387.1439514160156, "learning_rate": 3.846354559535881e-06, "loss": 29.1575, "step": 308350 }, { "epoch": 0.6229067094381396, "grad_norm": 98.27305603027344, "learning_rate": 3.846014914981209e-06, "loss": 20.9277, "step": 308360 }, { "epoch": 0.6229269100708235, "grad_norm": 130.1021728515625, "learning_rate": 3.845675276050923e-06, "loss": 35.6931, "step": 308370 }, { "epoch": 0.6229471107035073, "grad_norm": 218.9253692626953, "learning_rate": 3.845335642746672e-06, "loss": 12.159, "step": 308380 }, { "epoch": 0.6229673113361911, "grad_norm": 281.0378112792969, "learning_rate": 3.8449960150701175e-06, "loss": 14.4139, "step": 308390 }, { "epoch": 0.6229875119688749, "grad_norm": 206.79696655273438, "learning_rate": 3.844656393022912e-06, "loss": 10.6231, "step": 308400 }, { "epoch": 0.6230077126015586, "grad_norm": 224.1689453125, "learning_rate": 3.84431677660671e-06, "loss": 13.4657, "step": 308410 }, { "epoch": 0.6230279132342424, "grad_norm": 472.20196533203125, "learning_rate": 3.843977165823169e-06, "loss": 21.0853, "step": 308420 }, { "epoch": 0.6230481138669263, "grad_norm": 398.5922546386719, "learning_rate": 3.843637560673943e-06, "loss": 13.0395, "step": 308430 }, { "epoch": 0.6230683144996101, "grad_norm": 23.40195655822754, "learning_rate": 3.843297961160686e-06, "loss": 14.2749, "step": 308440 }, { "epoch": 0.6230885151322939, "grad_norm": 292.8092041015625, "learning_rate": 3.842958367285056e-06, "loss": 21.9751, "step": 308450 }, { "epoch": 0.6231087157649777, "grad_norm": 659.0105590820312, "learning_rate": 3.842618779048706e-06, "loss": 30.0681, "step": 308460 }, { "epoch": 0.6231289163976615, "grad_norm": 66.26213073730469, "learning_rate": 3.842279196453292e-06, "loss": 13.2273, "step": 308470 }, { "epoch": 0.6231491170303454, "grad_norm": 62.13567352294922, "learning_rate": 3.841939619500468e-06, "loss": 29.1746, "step": 308480 }, { "epoch": 0.6231693176630292, "grad_norm": 210.9709014892578, "learning_rate": 3.841600048191891e-06, "loss": 13.7839, "step": 308490 }, { "epoch": 0.623189518295713, "grad_norm": 440.2713317871094, "learning_rate": 3.841260482529215e-06, "loss": 18.8423, "step": 308500 }, { "epoch": 0.6232097189283968, "grad_norm": 2165.419921875, "learning_rate": 3.8409209225140925e-06, "loss": 16.5092, "step": 308510 }, { "epoch": 0.6232299195610806, "grad_norm": 208.42665100097656, "learning_rate": 3.840581368148182e-06, "loss": 24.165, "step": 308520 }, { "epoch": 0.6232501201937645, "grad_norm": 0.0, "learning_rate": 3.840241819433139e-06, "loss": 16.0194, "step": 308530 }, { "epoch": 0.6232703208264483, "grad_norm": 219.37803649902344, "learning_rate": 3.839902276370615e-06, "loss": 19.0872, "step": 308540 }, { "epoch": 0.6232905214591321, "grad_norm": 102.4112777709961, "learning_rate": 3.839562738962267e-06, "loss": 15.0382, "step": 308550 }, { "epoch": 0.6233107220918159, "grad_norm": 161.55409240722656, "learning_rate": 3.83922320720975e-06, "loss": 9.4128, "step": 308560 }, { "epoch": 0.6233309227244997, "grad_norm": 226.05775451660156, "learning_rate": 3.838883681114718e-06, "loss": 17.7716, "step": 308570 }, { "epoch": 0.6233511233571836, "grad_norm": 371.1513671875, "learning_rate": 3.838544160678824e-06, "loss": 9.0566, "step": 308580 }, { "epoch": 0.6233713239898674, "grad_norm": 420.34930419921875, "learning_rate": 3.838204645903729e-06, "loss": 35.1801, "step": 308590 }, { "epoch": 0.6233915246225512, "grad_norm": 61.92141342163086, "learning_rate": 3.83786513679108e-06, "loss": 12.5675, "step": 308600 }, { "epoch": 0.623411725255235, "grad_norm": 147.2946014404297, "learning_rate": 3.837525633342537e-06, "loss": 18.4885, "step": 308610 }, { "epoch": 0.6234319258879188, "grad_norm": 336.087646484375, "learning_rate": 3.837186135559754e-06, "loss": 31.7714, "step": 308620 }, { "epoch": 0.6234521265206027, "grad_norm": 213.505615234375, "learning_rate": 3.836846643444383e-06, "loss": 19.3192, "step": 308630 }, { "epoch": 0.6234723271532865, "grad_norm": 175.28668212890625, "learning_rate": 3.836507156998081e-06, "loss": 28.1637, "step": 308640 }, { "epoch": 0.6234925277859703, "grad_norm": 228.68026733398438, "learning_rate": 3.836167676222501e-06, "loss": 22.8894, "step": 308650 }, { "epoch": 0.6235127284186541, "grad_norm": 300.21380615234375, "learning_rate": 3.835828201119302e-06, "loss": 18.9208, "step": 308660 }, { "epoch": 0.6235329290513378, "grad_norm": 339.1099853515625, "learning_rate": 3.835488731690131e-06, "loss": 28.6613, "step": 308670 }, { "epoch": 0.6235531296840217, "grad_norm": 424.7220153808594, "learning_rate": 3.835149267936649e-06, "loss": 16.2611, "step": 308680 }, { "epoch": 0.6235733303167055, "grad_norm": 138.7810821533203, "learning_rate": 3.834809809860508e-06, "loss": 21.2992, "step": 308690 }, { "epoch": 0.6235935309493893, "grad_norm": 173.34335327148438, "learning_rate": 3.834470357463362e-06, "loss": 14.7518, "step": 308700 }, { "epoch": 0.6236137315820731, "grad_norm": 120.71334075927734, "learning_rate": 3.834130910746866e-06, "loss": 16.4815, "step": 308710 }, { "epoch": 0.6236339322147569, "grad_norm": 409.7410888671875, "learning_rate": 3.833791469712676e-06, "loss": 19.1314, "step": 308720 }, { "epoch": 0.6236541328474408, "grad_norm": 238.66445922851562, "learning_rate": 3.833452034362444e-06, "loss": 26.8663, "step": 308730 }, { "epoch": 0.6236743334801246, "grad_norm": 508.831298828125, "learning_rate": 3.833112604697824e-06, "loss": 16.4898, "step": 308740 }, { "epoch": 0.6236945341128084, "grad_norm": 448.3441162109375, "learning_rate": 3.832773180720475e-06, "loss": 16.585, "step": 308750 }, { "epoch": 0.6237147347454922, "grad_norm": 143.68203735351562, "learning_rate": 3.832433762432044e-06, "loss": 22.2272, "step": 308760 }, { "epoch": 0.623734935378176, "grad_norm": 283.65167236328125, "learning_rate": 3.832094349834191e-06, "loss": 18.5195, "step": 308770 }, { "epoch": 0.6237551360108599, "grad_norm": 529.751708984375, "learning_rate": 3.83175494292857e-06, "loss": 18.1589, "step": 308780 }, { "epoch": 0.6237753366435437, "grad_norm": 225.71478271484375, "learning_rate": 3.831415541716831e-06, "loss": 13.9355, "step": 308790 }, { "epoch": 0.6237955372762275, "grad_norm": 209.81497192382812, "learning_rate": 3.831076146200633e-06, "loss": 14.701, "step": 308800 }, { "epoch": 0.6238157379089113, "grad_norm": 192.020751953125, "learning_rate": 3.830736756381626e-06, "loss": 18.2454, "step": 308810 }, { "epoch": 0.6238359385415951, "grad_norm": 260.6600036621094, "learning_rate": 3.830397372261469e-06, "loss": 16.6297, "step": 308820 }, { "epoch": 0.623856139174279, "grad_norm": 295.0828857421875, "learning_rate": 3.8300579938418105e-06, "loss": 16.2841, "step": 308830 }, { "epoch": 0.6238763398069628, "grad_norm": 223.87210083007812, "learning_rate": 3.8297186211243085e-06, "loss": 13.9062, "step": 308840 }, { "epoch": 0.6238965404396466, "grad_norm": 0.0, "learning_rate": 3.829379254110617e-06, "loss": 29.4623, "step": 308850 }, { "epoch": 0.6239167410723304, "grad_norm": 1337.0556640625, "learning_rate": 3.829039892802388e-06, "loss": 27.2706, "step": 308860 }, { "epoch": 0.6239369417050142, "grad_norm": 109.80281829833984, "learning_rate": 3.828700537201277e-06, "loss": 6.9781, "step": 308870 }, { "epoch": 0.6239571423376981, "grad_norm": 770.5431518554688, "learning_rate": 3.828361187308938e-06, "loss": 26.4188, "step": 308880 }, { "epoch": 0.6239773429703819, "grad_norm": 236.28302001953125, "learning_rate": 3.828021843127024e-06, "loss": 22.1503, "step": 308890 }, { "epoch": 0.6239975436030657, "grad_norm": 81.62903594970703, "learning_rate": 3.827682504657187e-06, "loss": 15.1781, "step": 308900 }, { "epoch": 0.6240177442357495, "grad_norm": 258.8533020019531, "learning_rate": 3.827343171901089e-06, "loss": 8.8318, "step": 308910 }, { "epoch": 0.6240379448684332, "grad_norm": 497.34136962890625, "learning_rate": 3.827003844860373e-06, "loss": 34.5471, "step": 308920 }, { "epoch": 0.624058145501117, "grad_norm": 61.81496047973633, "learning_rate": 3.8266645235367e-06, "loss": 10.2258, "step": 308930 }, { "epoch": 0.6240783461338009, "grad_norm": 643.0823974609375, "learning_rate": 3.826325207931722e-06, "loss": 31.5776, "step": 308940 }, { "epoch": 0.6240985467664847, "grad_norm": 185.8888702392578, "learning_rate": 3.825985898047091e-06, "loss": 28.2542, "step": 308950 }, { "epoch": 0.6241187473991685, "grad_norm": 151.5341796875, "learning_rate": 3.8256465938844635e-06, "loss": 24.7832, "step": 308960 }, { "epoch": 0.6241389480318523, "grad_norm": 149.05722045898438, "learning_rate": 3.8253072954454905e-06, "loss": 15.3322, "step": 308970 }, { "epoch": 0.6241591486645361, "grad_norm": 94.24774169921875, "learning_rate": 3.824968002731831e-06, "loss": 22.6606, "step": 308980 }, { "epoch": 0.62417934929722, "grad_norm": 123.56239318847656, "learning_rate": 3.824628715745131e-06, "loss": 16.7207, "step": 308990 }, { "epoch": 0.6241995499299038, "grad_norm": 283.40277099609375, "learning_rate": 3.82428943448705e-06, "loss": 14.8097, "step": 309000 }, { "epoch": 0.6242197505625876, "grad_norm": 87.08314514160156, "learning_rate": 3.82395015895924e-06, "loss": 23.3029, "step": 309010 }, { "epoch": 0.6242399511952714, "grad_norm": 409.2423400878906, "learning_rate": 3.823610889163354e-06, "loss": 20.2591, "step": 309020 }, { "epoch": 0.6242601518279552, "grad_norm": 137.38645935058594, "learning_rate": 3.823271625101045e-06, "loss": 14.1638, "step": 309030 }, { "epoch": 0.6242803524606391, "grad_norm": 172.6559295654297, "learning_rate": 3.822932366773969e-06, "loss": 15.6918, "step": 309040 }, { "epoch": 0.6243005530933229, "grad_norm": 264.81256103515625, "learning_rate": 3.822593114183777e-06, "loss": 16.6102, "step": 309050 }, { "epoch": 0.6243207537260067, "grad_norm": 134.5736541748047, "learning_rate": 3.822253867332122e-06, "loss": 18.7621, "step": 309060 }, { "epoch": 0.6243409543586905, "grad_norm": 196.30345153808594, "learning_rate": 3.821914626220661e-06, "loss": 14.2891, "step": 309070 }, { "epoch": 0.6243611549913743, "grad_norm": 35.265785217285156, "learning_rate": 3.8215753908510435e-06, "loss": 17.6513, "step": 309080 }, { "epoch": 0.6243813556240582, "grad_norm": 295.5240173339844, "learning_rate": 3.8212361612249255e-06, "loss": 15.4379, "step": 309090 }, { "epoch": 0.624401556256742, "grad_norm": 387.10394287109375, "learning_rate": 3.820896937343959e-06, "loss": 23.4288, "step": 309100 }, { "epoch": 0.6244217568894258, "grad_norm": 207.6017303466797, "learning_rate": 3.820557719209799e-06, "loss": 12.6847, "step": 309110 }, { "epoch": 0.6244419575221096, "grad_norm": 262.0101623535156, "learning_rate": 3.820218506824096e-06, "loss": 6.8893, "step": 309120 }, { "epoch": 0.6244621581547934, "grad_norm": 235.878662109375, "learning_rate": 3.819879300188505e-06, "loss": 21.3174, "step": 309130 }, { "epoch": 0.6244823587874773, "grad_norm": 222.66151428222656, "learning_rate": 3.8195400993046815e-06, "loss": 16.767, "step": 309140 }, { "epoch": 0.6245025594201611, "grad_norm": 132.57534790039062, "learning_rate": 3.819200904174274e-06, "loss": 15.8151, "step": 309150 }, { "epoch": 0.6245227600528449, "grad_norm": 678.8401489257812, "learning_rate": 3.818861714798939e-06, "loss": 36.0361, "step": 309160 }, { "epoch": 0.6245429606855287, "grad_norm": 160.21295166015625, "learning_rate": 3.8185225311803295e-06, "loss": 10.7321, "step": 309170 }, { "epoch": 0.6245631613182124, "grad_norm": 137.0468292236328, "learning_rate": 3.8181833533200965e-06, "loss": 9.0202, "step": 309180 }, { "epoch": 0.6245833619508963, "grad_norm": 179.94883728027344, "learning_rate": 3.817844181219893e-06, "loss": 37.7728, "step": 309190 }, { "epoch": 0.6246035625835801, "grad_norm": 387.963623046875, "learning_rate": 3.817505014881378e-06, "loss": 16.0731, "step": 309200 }, { "epoch": 0.6246237632162639, "grad_norm": 492.0161437988281, "learning_rate": 3.817165854306197e-06, "loss": 9.6186, "step": 309210 }, { "epoch": 0.6246439638489477, "grad_norm": 579.0895385742188, "learning_rate": 3.816826699496006e-06, "loss": 26.9322, "step": 309220 }, { "epoch": 0.6246641644816315, "grad_norm": 47.42612075805664, "learning_rate": 3.81648755045246e-06, "loss": 19.4958, "step": 309230 }, { "epoch": 0.6246843651143154, "grad_norm": 330.9357604980469, "learning_rate": 3.816148407177209e-06, "loss": 16.7222, "step": 309240 }, { "epoch": 0.6247045657469992, "grad_norm": 292.9753112792969, "learning_rate": 3.815809269671908e-06, "loss": 13.8475, "step": 309250 }, { "epoch": 0.624724766379683, "grad_norm": 133.4097137451172, "learning_rate": 3.8154701379382064e-06, "loss": 19.7857, "step": 309260 }, { "epoch": 0.6247449670123668, "grad_norm": 247.11520385742188, "learning_rate": 3.815131011977763e-06, "loss": 20.1633, "step": 309270 }, { "epoch": 0.6247651676450506, "grad_norm": 464.36981201171875, "learning_rate": 3.814791891792225e-06, "loss": 23.8233, "step": 309280 }, { "epoch": 0.6247853682777345, "grad_norm": 226.33570861816406, "learning_rate": 3.814452777383248e-06, "loss": 14.4315, "step": 309290 }, { "epoch": 0.6248055689104183, "grad_norm": 57.977783203125, "learning_rate": 3.814113668752486e-06, "loss": 16.8172, "step": 309300 }, { "epoch": 0.6248257695431021, "grad_norm": 277.8394470214844, "learning_rate": 3.8137745659015884e-06, "loss": 16.917, "step": 309310 }, { "epoch": 0.6248459701757859, "grad_norm": 23.92864990234375, "learning_rate": 3.81343546883221e-06, "loss": 17.3133, "step": 309320 }, { "epoch": 0.6248661708084697, "grad_norm": 554.9177856445312, "learning_rate": 3.8130963775460045e-06, "loss": 20.8343, "step": 309330 }, { "epoch": 0.6248863714411536, "grad_norm": 215.94619750976562, "learning_rate": 3.812757292044622e-06, "loss": 12.7598, "step": 309340 }, { "epoch": 0.6249065720738374, "grad_norm": 37.26991271972656, "learning_rate": 3.8124182123297153e-06, "loss": 18.6214, "step": 309350 }, { "epoch": 0.6249267727065212, "grad_norm": 462.3626708984375, "learning_rate": 3.8120791384029414e-06, "loss": 17.8651, "step": 309360 }, { "epoch": 0.624946973339205, "grad_norm": 521.3860473632812, "learning_rate": 3.811740070265947e-06, "loss": 17.3765, "step": 309370 }, { "epoch": 0.6249671739718888, "grad_norm": 270.1254577636719, "learning_rate": 3.8114010079203877e-06, "loss": 23.4025, "step": 309380 }, { "epoch": 0.6249873746045727, "grad_norm": 733.4840698242188, "learning_rate": 3.8110619513679176e-06, "loss": 14.2012, "step": 309390 }, { "epoch": 0.6250075752372565, "grad_norm": 261.3502502441406, "learning_rate": 3.810722900610186e-06, "loss": 25.0586, "step": 309400 }, { "epoch": 0.6250277758699403, "grad_norm": 219.67422485351562, "learning_rate": 3.8103838556488467e-06, "loss": 16.7279, "step": 309410 }, { "epoch": 0.6250479765026241, "grad_norm": 477.48095703125, "learning_rate": 3.810044816485551e-06, "loss": 25.0391, "step": 309420 }, { "epoch": 0.6250681771353078, "grad_norm": 55.64144515991211, "learning_rate": 3.809705783121956e-06, "loss": 23.468, "step": 309430 }, { "epoch": 0.6250883777679916, "grad_norm": 83.61530303955078, "learning_rate": 3.8093667555597068e-06, "loss": 21.6713, "step": 309440 }, { "epoch": 0.6251085784006755, "grad_norm": 264.65234375, "learning_rate": 3.809027733800461e-06, "loss": 11.2381, "step": 309450 }, { "epoch": 0.6251287790333593, "grad_norm": 471.4862976074219, "learning_rate": 3.808688717845871e-06, "loss": 20.0118, "step": 309460 }, { "epoch": 0.6251489796660431, "grad_norm": 159.5740509033203, "learning_rate": 3.8083497076975863e-06, "loss": 16.1668, "step": 309470 }, { "epoch": 0.6251691802987269, "grad_norm": 196.59725952148438, "learning_rate": 3.808010703357261e-06, "loss": 18.6009, "step": 309480 }, { "epoch": 0.6251893809314107, "grad_norm": 25.111797332763672, "learning_rate": 3.8076717048265477e-06, "loss": 14.3249, "step": 309490 }, { "epoch": 0.6252095815640946, "grad_norm": 129.2117919921875, "learning_rate": 3.8073327121070968e-06, "loss": 19.9303, "step": 309500 }, { "epoch": 0.6252297821967784, "grad_norm": 52.25770950317383, "learning_rate": 3.8069937252005606e-06, "loss": 18.688, "step": 309510 }, { "epoch": 0.6252499828294622, "grad_norm": 300.2548522949219, "learning_rate": 3.8066547441085956e-06, "loss": 28.5121, "step": 309520 }, { "epoch": 0.625270183462146, "grad_norm": 1133.289306640625, "learning_rate": 3.806315768832847e-06, "loss": 46.7954, "step": 309530 }, { "epoch": 0.6252903840948298, "grad_norm": 255.74110412597656, "learning_rate": 3.805976799374972e-06, "loss": 35.8322, "step": 309540 }, { "epoch": 0.6253105847275137, "grad_norm": 527.6331176757812, "learning_rate": 3.8056378357366224e-06, "loss": 18.161, "step": 309550 }, { "epoch": 0.6253307853601975, "grad_norm": 142.5237274169922, "learning_rate": 3.8052988779194478e-06, "loss": 8.2858, "step": 309560 }, { "epoch": 0.6253509859928813, "grad_norm": 280.269775390625, "learning_rate": 3.804959925925102e-06, "loss": 14.4983, "step": 309570 }, { "epoch": 0.6253711866255651, "grad_norm": 310.82342529296875, "learning_rate": 3.8046209797552353e-06, "loss": 10.2862, "step": 309580 }, { "epoch": 0.6253913872582489, "grad_norm": 49.71223831176758, "learning_rate": 3.804282039411504e-06, "loss": 22.743, "step": 309590 }, { "epoch": 0.6254115878909328, "grad_norm": 42.58936309814453, "learning_rate": 3.8039431048955537e-06, "loss": 17.0874, "step": 309600 }, { "epoch": 0.6254317885236166, "grad_norm": 620.5482788085938, "learning_rate": 3.8036041762090416e-06, "loss": 13.122, "step": 309610 }, { "epoch": 0.6254519891563004, "grad_norm": 389.4205322265625, "learning_rate": 3.8032652533536173e-06, "loss": 23.0514, "step": 309620 }, { "epoch": 0.6254721897889842, "grad_norm": 111.9659194946289, "learning_rate": 3.802926336330933e-06, "loss": 25.0991, "step": 309630 }, { "epoch": 0.625492390421668, "grad_norm": 25.627323150634766, "learning_rate": 3.80258742514264e-06, "loss": 11.2972, "step": 309640 }, { "epoch": 0.6255125910543519, "grad_norm": 352.11907958984375, "learning_rate": 3.8022485197903924e-06, "loss": 15.6232, "step": 309650 }, { "epoch": 0.6255327916870357, "grad_norm": 244.0524444580078, "learning_rate": 3.801909620275839e-06, "loss": 17.1555, "step": 309660 }, { "epoch": 0.6255529923197195, "grad_norm": 293.2171325683594, "learning_rate": 3.8015707266006307e-06, "loss": 29.7122, "step": 309670 }, { "epoch": 0.6255731929524033, "grad_norm": 284.01470947265625, "learning_rate": 3.801231838766425e-06, "loss": 20.3846, "step": 309680 }, { "epoch": 0.625593393585087, "grad_norm": 105.06939697265625, "learning_rate": 3.8008929567748676e-06, "loss": 15.9692, "step": 309690 }, { "epoch": 0.6256135942177709, "grad_norm": 586.7716064453125, "learning_rate": 3.8005540806276132e-06, "loss": 24.4992, "step": 309700 }, { "epoch": 0.6256337948504547, "grad_norm": 169.8800811767578, "learning_rate": 3.800215210326312e-06, "loss": 16.3365, "step": 309710 }, { "epoch": 0.6256539954831385, "grad_norm": 357.4581298828125, "learning_rate": 3.7998763458726183e-06, "loss": 18.1023, "step": 309720 }, { "epoch": 0.6256741961158223, "grad_norm": 78.17816925048828, "learning_rate": 3.79953748726818e-06, "loss": 23.3375, "step": 309730 }, { "epoch": 0.6256943967485061, "grad_norm": 96.6305160522461, "learning_rate": 3.7991986345146503e-06, "loss": 21.632, "step": 309740 }, { "epoch": 0.62571459738119, "grad_norm": 328.3742980957031, "learning_rate": 3.798859787613682e-06, "loss": 25.069, "step": 309750 }, { "epoch": 0.6257347980138738, "grad_norm": 462.5702209472656, "learning_rate": 3.7985209465669248e-06, "loss": 26.0323, "step": 309760 }, { "epoch": 0.6257549986465576, "grad_norm": 454.1412658691406, "learning_rate": 3.7981821113760305e-06, "loss": 15.117, "step": 309770 }, { "epoch": 0.6257751992792414, "grad_norm": 160.02479553222656, "learning_rate": 3.797843282042652e-06, "loss": 10.69, "step": 309780 }, { "epoch": 0.6257953999119252, "grad_norm": 337.59442138671875, "learning_rate": 3.7975044585684382e-06, "loss": 11.3072, "step": 309790 }, { "epoch": 0.625815600544609, "grad_norm": 181.88673400878906, "learning_rate": 3.797165640955041e-06, "loss": 21.7581, "step": 309800 }, { "epoch": 0.6258358011772929, "grad_norm": 222.74325561523438, "learning_rate": 3.796826829204116e-06, "loss": 20.8176, "step": 309810 }, { "epoch": 0.6258560018099767, "grad_norm": 175.66232299804688, "learning_rate": 3.796488023317308e-06, "loss": 22.7107, "step": 309820 }, { "epoch": 0.6258762024426605, "grad_norm": 241.65371704101562, "learning_rate": 3.796149223296272e-06, "loss": 10.603, "step": 309830 }, { "epoch": 0.6258964030753443, "grad_norm": 300.0515441894531, "learning_rate": 3.79581042914266e-06, "loss": 33.4149, "step": 309840 }, { "epoch": 0.6259166037080282, "grad_norm": 277.576416015625, "learning_rate": 3.7954716408581206e-06, "loss": 21.1126, "step": 309850 }, { "epoch": 0.625936804340712, "grad_norm": 285.8462219238281, "learning_rate": 3.7951328584443063e-06, "loss": 16.8115, "step": 309860 }, { "epoch": 0.6259570049733958, "grad_norm": 323.103759765625, "learning_rate": 3.7947940819028678e-06, "loss": 15.8726, "step": 309870 }, { "epoch": 0.6259772056060796, "grad_norm": 0.0, "learning_rate": 3.79445531123546e-06, "loss": 10.8496, "step": 309880 }, { "epoch": 0.6259974062387634, "grad_norm": 479.741943359375, "learning_rate": 3.794116546443727e-06, "loss": 22.8273, "step": 309890 }, { "epoch": 0.6260176068714473, "grad_norm": 276.6650085449219, "learning_rate": 3.793777787529325e-06, "loss": 13.3925, "step": 309900 }, { "epoch": 0.6260378075041311, "grad_norm": 101.07061004638672, "learning_rate": 3.793439034493905e-06, "loss": 27.7177, "step": 309910 }, { "epoch": 0.6260580081368149, "grad_norm": 304.4996643066406, "learning_rate": 3.7931002873391156e-06, "loss": 16.8825, "step": 309920 }, { "epoch": 0.6260782087694987, "grad_norm": 271.0724792480469, "learning_rate": 3.792761546066609e-06, "loss": 10.6366, "step": 309930 }, { "epoch": 0.6260984094021824, "grad_norm": 143.5612030029297, "learning_rate": 3.792422810678037e-06, "loss": 9.7198, "step": 309940 }, { "epoch": 0.6261186100348662, "grad_norm": 37.80231475830078, "learning_rate": 3.7920840811750485e-06, "loss": 9.1106, "step": 309950 }, { "epoch": 0.6261388106675501, "grad_norm": 182.70262145996094, "learning_rate": 3.7917453575592956e-06, "loss": 9.9327, "step": 309960 }, { "epoch": 0.6261590113002339, "grad_norm": 235.55149841308594, "learning_rate": 3.7914066398324317e-06, "loss": 14.9824, "step": 309970 }, { "epoch": 0.6261792119329177, "grad_norm": 80.4307861328125, "learning_rate": 3.7910679279961025e-06, "loss": 18.3356, "step": 309980 }, { "epoch": 0.6261994125656015, "grad_norm": 272.1025085449219, "learning_rate": 3.790729222051962e-06, "loss": 19.7276, "step": 309990 }, { "epoch": 0.6262196131982853, "grad_norm": 244.11318969726562, "learning_rate": 3.790390522001662e-06, "loss": 25.3405, "step": 310000 }, { "epoch": 0.6262398138309692, "grad_norm": 198.02719116210938, "learning_rate": 3.790051827846851e-06, "loss": 25.4474, "step": 310010 }, { "epoch": 0.626260014463653, "grad_norm": 246.3344268798828, "learning_rate": 3.789713139589181e-06, "loss": 27.6912, "step": 310020 }, { "epoch": 0.6262802150963368, "grad_norm": 205.59927368164062, "learning_rate": 3.789374457230301e-06, "loss": 14.96, "step": 310030 }, { "epoch": 0.6263004157290206, "grad_norm": 431.5172424316406, "learning_rate": 3.789035780771866e-06, "loss": 35.6508, "step": 310040 }, { "epoch": 0.6263206163617044, "grad_norm": 407.1021728515625, "learning_rate": 3.7886971102155205e-06, "loss": 19.5517, "step": 310050 }, { "epoch": 0.6263408169943883, "grad_norm": 25.62296485900879, "learning_rate": 3.78835844556292e-06, "loss": 13.3329, "step": 310060 }, { "epoch": 0.6263610176270721, "grad_norm": 46.0384521484375, "learning_rate": 3.7880197868157143e-06, "loss": 14.646, "step": 310070 }, { "epoch": 0.6263812182597559, "grad_norm": 212.39109802246094, "learning_rate": 3.7876811339755522e-06, "loss": 16.992, "step": 310080 }, { "epoch": 0.6264014188924397, "grad_norm": 273.8504943847656, "learning_rate": 3.7873424870440845e-06, "loss": 23.1462, "step": 310090 }, { "epoch": 0.6264216195251235, "grad_norm": 418.1965637207031, "learning_rate": 3.787003846022964e-06, "loss": 32.2874, "step": 310100 }, { "epoch": 0.6264418201578074, "grad_norm": 365.9175109863281, "learning_rate": 3.786665210913839e-06, "loss": 15.2217, "step": 310110 }, { "epoch": 0.6264620207904912, "grad_norm": 383.8537902832031, "learning_rate": 3.786326581718359e-06, "loss": 15.835, "step": 310120 }, { "epoch": 0.626482221423175, "grad_norm": 380.4593200683594, "learning_rate": 3.785987958438179e-06, "loss": 18.5239, "step": 310130 }, { "epoch": 0.6265024220558588, "grad_norm": 68.89872741699219, "learning_rate": 3.785649341074944e-06, "loss": 15.7921, "step": 310140 }, { "epoch": 0.6265226226885426, "grad_norm": 435.1707458496094, "learning_rate": 3.785310729630307e-06, "loss": 18.587, "step": 310150 }, { "epoch": 0.6265428233212265, "grad_norm": 339.081787109375, "learning_rate": 3.784972124105919e-06, "loss": 15.6138, "step": 310160 }, { "epoch": 0.6265630239539103, "grad_norm": 145.7270965576172, "learning_rate": 3.7846335245034304e-06, "loss": 24.4815, "step": 310170 }, { "epoch": 0.6265832245865941, "grad_norm": 283.1360778808594, "learning_rate": 3.784294930824489e-06, "loss": 16.5279, "step": 310180 }, { "epoch": 0.6266034252192779, "grad_norm": 451.7042236328125, "learning_rate": 3.783956343070746e-06, "loss": 21.8398, "step": 310190 }, { "epoch": 0.6266236258519616, "grad_norm": 270.9722900390625, "learning_rate": 3.7836177612438557e-06, "loss": 19.35, "step": 310200 }, { "epoch": 0.6266438264846454, "grad_norm": 420.7585144042969, "learning_rate": 3.7832791853454616e-06, "loss": 32.1663, "step": 310210 }, { "epoch": 0.6266640271173293, "grad_norm": 294.93853759765625, "learning_rate": 3.782940615377218e-06, "loss": 14.8515, "step": 310220 }, { "epoch": 0.6266842277500131, "grad_norm": 452.30352783203125, "learning_rate": 3.7826020513407753e-06, "loss": 19.5308, "step": 310230 }, { "epoch": 0.6267044283826969, "grad_norm": 550.592041015625, "learning_rate": 3.7822634932377814e-06, "loss": 29.0238, "step": 310240 }, { "epoch": 0.6267246290153807, "grad_norm": 41.52668380737305, "learning_rate": 3.7819249410698877e-06, "loss": 26.7512, "step": 310250 }, { "epoch": 0.6267448296480645, "grad_norm": 260.639892578125, "learning_rate": 3.7815863948387455e-06, "loss": 13.9434, "step": 310260 }, { "epoch": 0.6267650302807484, "grad_norm": 396.3734130859375, "learning_rate": 3.7812478545460017e-06, "loss": 23.8986, "step": 310270 }, { "epoch": 0.6267852309134322, "grad_norm": 338.4451599121094, "learning_rate": 3.7809093201933078e-06, "loss": 31.4744, "step": 310280 }, { "epoch": 0.626805431546116, "grad_norm": 187.42295837402344, "learning_rate": 3.7805707917823165e-06, "loss": 15.4408, "step": 310290 }, { "epoch": 0.6268256321787998, "grad_norm": 221.08206176757812, "learning_rate": 3.7802322693146726e-06, "loss": 28.2451, "step": 310300 }, { "epoch": 0.6268458328114836, "grad_norm": 204.87940979003906, "learning_rate": 3.7798937527920294e-06, "loss": 19.832, "step": 310310 }, { "epoch": 0.6268660334441675, "grad_norm": 251.55825805664062, "learning_rate": 3.7795552422160364e-06, "loss": 21.148, "step": 310320 }, { "epoch": 0.6268862340768513, "grad_norm": 188.8391876220703, "learning_rate": 3.779216737588344e-06, "loss": 24.134, "step": 310330 }, { "epoch": 0.6269064347095351, "grad_norm": 198.32923889160156, "learning_rate": 3.7788782389105994e-06, "loss": 12.6254, "step": 310340 }, { "epoch": 0.6269266353422189, "grad_norm": 461.3886413574219, "learning_rate": 3.778539746184454e-06, "loss": 41.7177, "step": 310350 }, { "epoch": 0.6269468359749027, "grad_norm": 5.274872303009033, "learning_rate": 3.77820125941156e-06, "loss": 37.2752, "step": 310360 }, { "epoch": 0.6269670366075866, "grad_norm": 140.4351348876953, "learning_rate": 3.7778627785935627e-06, "loss": 23.9428, "step": 310370 }, { "epoch": 0.6269872372402704, "grad_norm": 148.87588500976562, "learning_rate": 3.777524303732115e-06, "loss": 16.8922, "step": 310380 }, { "epoch": 0.6270074378729542, "grad_norm": 369.7200927734375, "learning_rate": 3.777185834828866e-06, "loss": 23.8789, "step": 310390 }, { "epoch": 0.627027638505638, "grad_norm": 344.2936706542969, "learning_rate": 3.776847371885464e-06, "loss": 18.9454, "step": 310400 }, { "epoch": 0.6270478391383218, "grad_norm": 284.3508605957031, "learning_rate": 3.77650891490356e-06, "loss": 13.7655, "step": 310410 }, { "epoch": 0.6270680397710057, "grad_norm": 324.42474365234375, "learning_rate": 3.776170463884804e-06, "loss": 13.9055, "step": 310420 }, { "epoch": 0.6270882404036895, "grad_norm": 140.72235107421875, "learning_rate": 3.775832018830843e-06, "loss": 19.8189, "step": 310430 }, { "epoch": 0.6271084410363733, "grad_norm": 265.1845397949219, "learning_rate": 3.7754935797433284e-06, "loss": 16.8076, "step": 310440 }, { "epoch": 0.6271286416690571, "grad_norm": 411.4680480957031, "learning_rate": 3.7751551466239113e-06, "loss": 11.1601, "step": 310450 }, { "epoch": 0.6271488423017408, "grad_norm": 281.9951477050781, "learning_rate": 3.774816719474238e-06, "loss": 15.2822, "step": 310460 }, { "epoch": 0.6271690429344247, "grad_norm": 314.00091552734375, "learning_rate": 3.77447829829596e-06, "loss": 25.7773, "step": 310470 }, { "epoch": 0.6271892435671085, "grad_norm": 494.60626220703125, "learning_rate": 3.7741398830907256e-06, "loss": 23.9951, "step": 310480 }, { "epoch": 0.6272094441997923, "grad_norm": 486.4038391113281, "learning_rate": 3.7738014738601856e-06, "loss": 21.9056, "step": 310490 }, { "epoch": 0.6272296448324761, "grad_norm": 227.26071166992188, "learning_rate": 3.7734630706059873e-06, "loss": 17.4221, "step": 310500 }, { "epoch": 0.6272498454651599, "grad_norm": 357.25323486328125, "learning_rate": 3.7731246733297816e-06, "loss": 24.4226, "step": 310510 }, { "epoch": 0.6272700460978438, "grad_norm": 894.3665161132812, "learning_rate": 3.772786282033218e-06, "loss": 29.9102, "step": 310520 }, { "epoch": 0.6272902467305276, "grad_norm": 303.4424133300781, "learning_rate": 3.7724478967179457e-06, "loss": 21.5048, "step": 310530 }, { "epoch": 0.6273104473632114, "grad_norm": 275.12957763671875, "learning_rate": 3.7721095173856126e-06, "loss": 23.1699, "step": 310540 }, { "epoch": 0.6273306479958952, "grad_norm": 259.13592529296875, "learning_rate": 3.7717711440378695e-06, "loss": 17.8616, "step": 310550 }, { "epoch": 0.627350848628579, "grad_norm": 621.2598876953125, "learning_rate": 3.771432776676364e-06, "loss": 24.7536, "step": 310560 }, { "epoch": 0.6273710492612629, "grad_norm": 329.5142822265625, "learning_rate": 3.771094415302745e-06, "loss": 17.6308, "step": 310570 }, { "epoch": 0.6273912498939467, "grad_norm": 198.7763214111328, "learning_rate": 3.7707560599186664e-06, "loss": 28.4491, "step": 310580 }, { "epoch": 0.6274114505266305, "grad_norm": 44.59502410888672, "learning_rate": 3.7704177105257707e-06, "loss": 32.9233, "step": 310590 }, { "epoch": 0.6274316511593143, "grad_norm": 352.8734130859375, "learning_rate": 3.77007936712571e-06, "loss": 31.4575, "step": 310600 }, { "epoch": 0.6274518517919981, "grad_norm": 329.31781005859375, "learning_rate": 3.769741029720134e-06, "loss": 24.5506, "step": 310610 }, { "epoch": 0.627472052424682, "grad_norm": 182.24273681640625, "learning_rate": 3.769402698310692e-06, "loss": 10.2613, "step": 310620 }, { "epoch": 0.6274922530573658, "grad_norm": 437.28564453125, "learning_rate": 3.7690643728990306e-06, "loss": 19.5753, "step": 310630 }, { "epoch": 0.6275124536900496, "grad_norm": 510.079345703125, "learning_rate": 3.7687260534868e-06, "loss": 25.2743, "step": 310640 }, { "epoch": 0.6275326543227334, "grad_norm": 337.50396728515625, "learning_rate": 3.7683877400756513e-06, "loss": 12.5755, "step": 310650 }, { "epoch": 0.6275528549554172, "grad_norm": 550.4494018554688, "learning_rate": 3.768049432667229e-06, "loss": 12.3804, "step": 310660 }, { "epoch": 0.6275730555881011, "grad_norm": 406.44580078125, "learning_rate": 3.7677111312631848e-06, "loss": 9.9842, "step": 310670 }, { "epoch": 0.6275932562207849, "grad_norm": 127.70179748535156, "learning_rate": 3.7673728358651683e-06, "loss": 23.096, "step": 310680 }, { "epoch": 0.6276134568534687, "grad_norm": 211.1089630126953, "learning_rate": 3.7670345464748266e-06, "loss": 19.3764, "step": 310690 }, { "epoch": 0.6276336574861525, "grad_norm": 66.96332550048828, "learning_rate": 3.7666962630938084e-06, "loss": 9.051, "step": 310700 }, { "epoch": 0.6276538581188362, "grad_norm": 409.54998779296875, "learning_rate": 3.7663579857237642e-06, "loss": 11.4489, "step": 310710 }, { "epoch": 0.62767405875152, "grad_norm": 290.6512756347656, "learning_rate": 3.7660197143663407e-06, "loss": 15.9322, "step": 310720 }, { "epoch": 0.6276942593842039, "grad_norm": 266.2632141113281, "learning_rate": 3.7656814490231864e-06, "loss": 19.0427, "step": 310730 }, { "epoch": 0.6277144600168877, "grad_norm": 149.5699462890625, "learning_rate": 3.765343189695954e-06, "loss": 32.5793, "step": 310740 }, { "epoch": 0.6277346606495715, "grad_norm": 141.14752197265625, "learning_rate": 3.765004936386286e-06, "loss": 13.3277, "step": 310750 }, { "epoch": 0.6277548612822553, "grad_norm": 158.5519256591797, "learning_rate": 3.764666689095835e-06, "loss": 19.5602, "step": 310760 }, { "epoch": 0.6277750619149391, "grad_norm": 345.5571594238281, "learning_rate": 3.7643284478262494e-06, "loss": 19.2594, "step": 310770 }, { "epoch": 0.627795262547623, "grad_norm": 189.474365234375, "learning_rate": 3.7639902125791774e-06, "loss": 23.0055, "step": 310780 }, { "epoch": 0.6278154631803068, "grad_norm": 211.29554748535156, "learning_rate": 3.7636519833562668e-06, "loss": 27.8128, "step": 310790 }, { "epoch": 0.6278356638129906, "grad_norm": 0.0, "learning_rate": 3.7633137601591647e-06, "loss": 28.9083, "step": 310800 }, { "epoch": 0.6278558644456744, "grad_norm": 224.29153442382812, "learning_rate": 3.762975542989525e-06, "loss": 21.2868, "step": 310810 }, { "epoch": 0.6278760650783582, "grad_norm": 211.43809509277344, "learning_rate": 3.762637331848989e-06, "loss": 14.8172, "step": 310820 }, { "epoch": 0.6278962657110421, "grad_norm": 949.5530395507812, "learning_rate": 3.76229912673921e-06, "loss": 29.3615, "step": 310830 }, { "epoch": 0.6279164663437259, "grad_norm": 157.6688995361328, "learning_rate": 3.761960927661836e-06, "loss": 7.3075, "step": 310840 }, { "epoch": 0.6279366669764097, "grad_norm": 356.45562744140625, "learning_rate": 3.761622734618513e-06, "loss": 12.0315, "step": 310850 }, { "epoch": 0.6279568676090935, "grad_norm": 246.15086364746094, "learning_rate": 3.7612845476108906e-06, "loss": 14.7459, "step": 310860 }, { "epoch": 0.6279770682417773, "grad_norm": 312.881103515625, "learning_rate": 3.7609463666406175e-06, "loss": 13.2323, "step": 310870 }, { "epoch": 0.6279972688744612, "grad_norm": 222.76394653320312, "learning_rate": 3.7606081917093416e-06, "loss": 22.3634, "step": 310880 }, { "epoch": 0.628017469507145, "grad_norm": 219.07554626464844, "learning_rate": 3.7602700228187096e-06, "loss": 15.6005, "step": 310890 }, { "epoch": 0.6280376701398288, "grad_norm": 344.40618896484375, "learning_rate": 3.759931859970374e-06, "loss": 16.8645, "step": 310900 }, { "epoch": 0.6280578707725126, "grad_norm": 247.7204132080078, "learning_rate": 3.7595937031659775e-06, "loss": 12.7522, "step": 310910 }, { "epoch": 0.6280780714051964, "grad_norm": 197.33749389648438, "learning_rate": 3.7592555524071716e-06, "loss": 9.3818, "step": 310920 }, { "epoch": 0.6280982720378803, "grad_norm": 635.3742065429688, "learning_rate": 3.7589174076956036e-06, "loss": 28.5773, "step": 310930 }, { "epoch": 0.6281184726705641, "grad_norm": 160.76666259765625, "learning_rate": 3.7585792690329224e-06, "loss": 18.0601, "step": 310940 }, { "epoch": 0.6281386733032479, "grad_norm": 414.2234802246094, "learning_rate": 3.758241136420775e-06, "loss": 23.8784, "step": 310950 }, { "epoch": 0.6281588739359317, "grad_norm": 201.28970336914062, "learning_rate": 3.7579030098608077e-06, "loss": 12.4529, "step": 310960 }, { "epoch": 0.6281790745686154, "grad_norm": 161.4207000732422, "learning_rate": 3.7575648893546745e-06, "loss": 15.7154, "step": 310970 }, { "epoch": 0.6281992752012993, "grad_norm": 206.98614501953125, "learning_rate": 3.757226774904016e-06, "loss": 17.1496, "step": 310980 }, { "epoch": 0.6282194758339831, "grad_norm": 444.1446228027344, "learning_rate": 3.7568886665104836e-06, "loss": 17.4212, "step": 310990 }, { "epoch": 0.6282396764666669, "grad_norm": 25.087993621826172, "learning_rate": 3.756550564175727e-06, "loss": 15.1731, "step": 311000 }, { "epoch": 0.6282598770993507, "grad_norm": 478.459228515625, "learning_rate": 3.756212467901391e-06, "loss": 16.6644, "step": 311010 }, { "epoch": 0.6282800777320345, "grad_norm": 7.955764293670654, "learning_rate": 3.755874377689125e-06, "loss": 11.6546, "step": 311020 }, { "epoch": 0.6283002783647184, "grad_norm": 289.8153076171875, "learning_rate": 3.7555362935405766e-06, "loss": 12.0908, "step": 311030 }, { "epoch": 0.6283204789974022, "grad_norm": 112.26607513427734, "learning_rate": 3.7551982154573928e-06, "loss": 23.961, "step": 311040 }, { "epoch": 0.628340679630086, "grad_norm": 259.5083923339844, "learning_rate": 3.75486014344122e-06, "loss": 20.5809, "step": 311050 }, { "epoch": 0.6283608802627698, "grad_norm": 272.8937072753906, "learning_rate": 3.7545220774937115e-06, "loss": 31.074, "step": 311060 }, { "epoch": 0.6283810808954536, "grad_norm": 100.96112060546875, "learning_rate": 3.754184017616509e-06, "loss": 24.496, "step": 311070 }, { "epoch": 0.6284012815281375, "grad_norm": 87.9144287109375, "learning_rate": 3.7538459638112635e-06, "loss": 25.3128, "step": 311080 }, { "epoch": 0.6284214821608213, "grad_norm": 288.5827331542969, "learning_rate": 3.7535079160796207e-06, "loss": 20.1209, "step": 311090 }, { "epoch": 0.6284416827935051, "grad_norm": 286.95477294921875, "learning_rate": 3.7531698744232307e-06, "loss": 21.6432, "step": 311100 }, { "epoch": 0.6284618834261889, "grad_norm": 59.3373908996582, "learning_rate": 3.7528318388437375e-06, "loss": 28.1302, "step": 311110 }, { "epoch": 0.6284820840588727, "grad_norm": 247.0828857421875, "learning_rate": 3.752493809342791e-06, "loss": 15.169, "step": 311120 }, { "epoch": 0.6285022846915566, "grad_norm": 441.55908203125, "learning_rate": 3.7521557859220405e-06, "loss": 20.6404, "step": 311130 }, { "epoch": 0.6285224853242404, "grad_norm": 44.42560958862305, "learning_rate": 3.751817768583129e-06, "loss": 12.9761, "step": 311140 }, { "epoch": 0.6285426859569242, "grad_norm": 241.0531463623047, "learning_rate": 3.7514797573277075e-06, "loss": 21.7533, "step": 311150 }, { "epoch": 0.628562886589608, "grad_norm": 359.7437744140625, "learning_rate": 3.751141752157423e-06, "loss": 22.5416, "step": 311160 }, { "epoch": 0.6285830872222918, "grad_norm": 381.6856994628906, "learning_rate": 3.7508037530739207e-06, "loss": 22.5639, "step": 311170 }, { "epoch": 0.6286032878549757, "grad_norm": 59.69532775878906, "learning_rate": 3.7504657600788484e-06, "loss": 18.697, "step": 311180 }, { "epoch": 0.6286234884876595, "grad_norm": 413.96435546875, "learning_rate": 3.750127773173858e-06, "loss": 18.4311, "step": 311190 }, { "epoch": 0.6286436891203433, "grad_norm": 77.5674819946289, "learning_rate": 3.74978979236059e-06, "loss": 12.7161, "step": 311200 }, { "epoch": 0.6286638897530271, "grad_norm": 2254.15087890625, "learning_rate": 3.7494518176406956e-06, "loss": 27.291, "step": 311210 }, { "epoch": 0.6286840903857108, "grad_norm": 216.4525604248047, "learning_rate": 3.7491138490158213e-06, "loss": 12.1476, "step": 311220 }, { "epoch": 0.6287042910183946, "grad_norm": 173.27313232421875, "learning_rate": 3.7487758864876157e-06, "loss": 10.2134, "step": 311230 }, { "epoch": 0.6287244916510785, "grad_norm": 179.3358917236328, "learning_rate": 3.7484379300577233e-06, "loss": 16.6451, "step": 311240 }, { "epoch": 0.6287446922837623, "grad_norm": 456.4303894042969, "learning_rate": 3.748099979727792e-06, "loss": 18.745, "step": 311250 }, { "epoch": 0.6287648929164461, "grad_norm": 122.1128921508789, "learning_rate": 3.7477620354994733e-06, "loss": 14.8814, "step": 311260 }, { "epoch": 0.6287850935491299, "grad_norm": 4.529035568237305, "learning_rate": 3.7474240973744063e-06, "loss": 8.585, "step": 311270 }, { "epoch": 0.6288052941818137, "grad_norm": 20.991283416748047, "learning_rate": 3.7470861653542438e-06, "loss": 13.3442, "step": 311280 }, { "epoch": 0.6288254948144976, "grad_norm": 154.7242431640625, "learning_rate": 3.746748239440633e-06, "loss": 20.0317, "step": 311290 }, { "epoch": 0.6288456954471814, "grad_norm": 104.42123413085938, "learning_rate": 3.7464103196352176e-06, "loss": 16.9928, "step": 311300 }, { "epoch": 0.6288658960798652, "grad_norm": 1282.882080078125, "learning_rate": 3.746072405939646e-06, "loss": 50.8205, "step": 311310 }, { "epoch": 0.628886096712549, "grad_norm": 166.77943420410156, "learning_rate": 3.7457344983555666e-06, "loss": 28.4517, "step": 311320 }, { "epoch": 0.6289062973452328, "grad_norm": 320.562255859375, "learning_rate": 3.7453965968846244e-06, "loss": 7.2636, "step": 311330 }, { "epoch": 0.6289264979779167, "grad_norm": 278.0227966308594, "learning_rate": 3.7450587015284655e-06, "loss": 14.0742, "step": 311340 }, { "epoch": 0.6289466986106005, "grad_norm": 171.71270751953125, "learning_rate": 3.7447208122887425e-06, "loss": 26.8039, "step": 311350 }, { "epoch": 0.6289668992432843, "grad_norm": 817.5725708007812, "learning_rate": 3.744382929167094e-06, "loss": 21.6986, "step": 311360 }, { "epoch": 0.6289870998759681, "grad_norm": 379.41632080078125, "learning_rate": 3.744045052165172e-06, "loss": 19.9775, "step": 311370 }, { "epoch": 0.629007300508652, "grad_norm": 432.3046875, "learning_rate": 3.7437071812846216e-06, "loss": 11.5563, "step": 311380 }, { "epoch": 0.6290275011413358, "grad_norm": 166.07656860351562, "learning_rate": 3.7433693165270918e-06, "loss": 16.0235, "step": 311390 }, { "epoch": 0.6290477017740196, "grad_norm": 257.5623779296875, "learning_rate": 3.7430314578942263e-06, "loss": 14.7367, "step": 311400 }, { "epoch": 0.6290679024067034, "grad_norm": 334.16204833984375, "learning_rate": 3.7426936053876715e-06, "loss": 21.4727, "step": 311410 }, { "epoch": 0.6290881030393872, "grad_norm": 461.54217529296875, "learning_rate": 3.74235575900908e-06, "loss": 20.1366, "step": 311420 }, { "epoch": 0.629108303672071, "grad_norm": 3.0814929008483887, "learning_rate": 3.742017918760089e-06, "loss": 13.944, "step": 311430 }, { "epoch": 0.6291285043047549, "grad_norm": 274.78375244140625, "learning_rate": 3.741680084642353e-06, "loss": 12.6901, "step": 311440 }, { "epoch": 0.6291487049374387, "grad_norm": 338.3866882324219, "learning_rate": 3.7413422566575153e-06, "loss": 16.022, "step": 311450 }, { "epoch": 0.6291689055701225, "grad_norm": 77.720703125, "learning_rate": 3.741004434807223e-06, "loss": 14.5764, "step": 311460 }, { "epoch": 0.6291891062028063, "grad_norm": 512.5720825195312, "learning_rate": 3.7406666190931213e-06, "loss": 12.3219, "step": 311470 }, { "epoch": 0.62920930683549, "grad_norm": 237.6043701171875, "learning_rate": 3.740328809516859e-06, "loss": 10.2574, "step": 311480 }, { "epoch": 0.6292295074681739, "grad_norm": 348.8592224121094, "learning_rate": 3.7399910060800806e-06, "loss": 19.8151, "step": 311490 }, { "epoch": 0.6292497081008577, "grad_norm": 282.4590759277344, "learning_rate": 3.7396532087844318e-06, "loss": 18.9101, "step": 311500 }, { "epoch": 0.6292699087335415, "grad_norm": 464.2401123046875, "learning_rate": 3.7393154176315637e-06, "loss": 15.6861, "step": 311510 }, { "epoch": 0.6292901093662253, "grad_norm": 203.9333953857422, "learning_rate": 3.7389776326231163e-06, "loss": 15.1083, "step": 311520 }, { "epoch": 0.6293103099989091, "grad_norm": 132.67742919921875, "learning_rate": 3.73863985376074e-06, "loss": 34.4352, "step": 311530 }, { "epoch": 0.629330510631593, "grad_norm": 454.20648193359375, "learning_rate": 3.73830208104608e-06, "loss": 21.2693, "step": 311540 }, { "epoch": 0.6293507112642768, "grad_norm": 250.0864715576172, "learning_rate": 3.7379643144807835e-06, "loss": 21.0507, "step": 311550 }, { "epoch": 0.6293709118969606, "grad_norm": 339.6805114746094, "learning_rate": 3.737626554066495e-06, "loss": 28.867, "step": 311560 }, { "epoch": 0.6293911125296444, "grad_norm": 748.601806640625, "learning_rate": 3.7372887998048608e-06, "loss": 57.2672, "step": 311570 }, { "epoch": 0.6294113131623282, "grad_norm": 105.03008270263672, "learning_rate": 3.7369510516975303e-06, "loss": 15.5894, "step": 311580 }, { "epoch": 0.629431513795012, "grad_norm": 229.1537322998047, "learning_rate": 3.736613309746145e-06, "loss": 17.2978, "step": 311590 }, { "epoch": 0.6294517144276959, "grad_norm": 232.21670532226562, "learning_rate": 3.736275573952354e-06, "loss": 22.0504, "step": 311600 }, { "epoch": 0.6294719150603797, "grad_norm": 1.611695647239685, "learning_rate": 3.735937844317803e-06, "loss": 12.0019, "step": 311610 }, { "epoch": 0.6294921156930635, "grad_norm": 31.694395065307617, "learning_rate": 3.735600120844137e-06, "loss": 29.8588, "step": 311620 }, { "epoch": 0.6295123163257473, "grad_norm": 920.4688110351562, "learning_rate": 3.735262403533002e-06, "loss": 31.1003, "step": 311630 }, { "epoch": 0.6295325169584312, "grad_norm": 276.0377197265625, "learning_rate": 3.7349246923860465e-06, "loss": 10.0236, "step": 311640 }, { "epoch": 0.629552717591115, "grad_norm": 288.60174560546875, "learning_rate": 3.7345869874049136e-06, "loss": 15.3891, "step": 311650 }, { "epoch": 0.6295729182237988, "grad_norm": 621.805419921875, "learning_rate": 3.734249288591249e-06, "loss": 14.969, "step": 311660 }, { "epoch": 0.6295931188564826, "grad_norm": 578.4391479492188, "learning_rate": 3.733911595946701e-06, "loss": 18.3596, "step": 311670 }, { "epoch": 0.6296133194891664, "grad_norm": 389.93359375, "learning_rate": 3.7335739094729153e-06, "loss": 19.5449, "step": 311680 }, { "epoch": 0.6296335201218503, "grad_norm": 241.62063598632812, "learning_rate": 3.7332362291715353e-06, "loss": 17.2236, "step": 311690 }, { "epoch": 0.6296537207545341, "grad_norm": 693.076171875, "learning_rate": 3.7328985550442086e-06, "loss": 25.9599, "step": 311700 }, { "epoch": 0.6296739213872179, "grad_norm": 212.7881622314453, "learning_rate": 3.7325608870925817e-06, "loss": 24.7685, "step": 311710 }, { "epoch": 0.6296941220199017, "grad_norm": 260.7557373046875, "learning_rate": 3.7322232253182984e-06, "loss": 16.5121, "step": 311720 }, { "epoch": 0.6297143226525855, "grad_norm": 61.2099609375, "learning_rate": 3.731885569723004e-06, "loss": 14.298, "step": 311730 }, { "epoch": 0.6297345232852692, "grad_norm": 429.2540588378906, "learning_rate": 3.7315479203083483e-06, "loss": 8.165, "step": 311740 }, { "epoch": 0.6297547239179531, "grad_norm": 415.48736572265625, "learning_rate": 3.7312102770759724e-06, "loss": 15.9682, "step": 311750 }, { "epoch": 0.6297749245506369, "grad_norm": 34.38711929321289, "learning_rate": 3.7308726400275243e-06, "loss": 12.7238, "step": 311760 }, { "epoch": 0.6297951251833207, "grad_norm": 399.3262939453125, "learning_rate": 3.7305350091646496e-06, "loss": 24.4593, "step": 311770 }, { "epoch": 0.6298153258160045, "grad_norm": 425.7074279785156, "learning_rate": 3.7301973844889922e-06, "loss": 21.93, "step": 311780 }, { "epoch": 0.6298355264486883, "grad_norm": 58.11330795288086, "learning_rate": 3.729859766002198e-06, "loss": 10.7896, "step": 311790 }, { "epoch": 0.6298557270813722, "grad_norm": 13.040654182434082, "learning_rate": 3.7295221537059162e-06, "loss": 27.1178, "step": 311800 }, { "epoch": 0.629875927714056, "grad_norm": 131.2740020751953, "learning_rate": 3.729184547601786e-06, "loss": 29.9941, "step": 311810 }, { "epoch": 0.6298961283467398, "grad_norm": 291.7857666015625, "learning_rate": 3.728846947691458e-06, "loss": 22.0604, "step": 311820 }, { "epoch": 0.6299163289794236, "grad_norm": 199.2877960205078, "learning_rate": 3.7285093539765747e-06, "loss": 13.7979, "step": 311830 }, { "epoch": 0.6299365296121074, "grad_norm": 222.5818634033203, "learning_rate": 3.728171766458785e-06, "loss": 8.2953, "step": 311840 }, { "epoch": 0.6299567302447913, "grad_norm": 489.9476623535156, "learning_rate": 3.72783418513973e-06, "loss": 27.8479, "step": 311850 }, { "epoch": 0.6299769308774751, "grad_norm": 98.80862426757812, "learning_rate": 3.727496610021055e-06, "loss": 15.6872, "step": 311860 }, { "epoch": 0.6299971315101589, "grad_norm": 332.0095520019531, "learning_rate": 3.727159041104412e-06, "loss": 10.0108, "step": 311870 }, { "epoch": 0.6300173321428427, "grad_norm": 0.0, "learning_rate": 3.7268214783914375e-06, "loss": 17.8692, "step": 311880 }, { "epoch": 0.6300375327755265, "grad_norm": 136.3187255859375, "learning_rate": 3.7264839218837817e-06, "loss": 11.6416, "step": 311890 }, { "epoch": 0.6300577334082104, "grad_norm": 201.7420654296875, "learning_rate": 3.7261463715830902e-06, "loss": 10.2812, "step": 311900 }, { "epoch": 0.6300779340408942, "grad_norm": 446.7332458496094, "learning_rate": 3.7258088274910054e-06, "loss": 13.2118, "step": 311910 }, { "epoch": 0.630098134673578, "grad_norm": 1417.181396484375, "learning_rate": 3.725471289609174e-06, "loss": 40.3567, "step": 311920 }, { "epoch": 0.6301183353062618, "grad_norm": 448.58221435546875, "learning_rate": 3.7251337579392415e-06, "loss": 10.6712, "step": 311930 }, { "epoch": 0.6301385359389456, "grad_norm": 149.62139892578125, "learning_rate": 3.724796232482852e-06, "loss": 22.3753, "step": 311940 }, { "epoch": 0.6301587365716295, "grad_norm": 547.2337646484375, "learning_rate": 3.7244587132416497e-06, "loss": 26.8721, "step": 311950 }, { "epoch": 0.6301789372043133, "grad_norm": 397.14422607421875, "learning_rate": 3.7241212002172846e-06, "loss": 11.7029, "step": 311960 }, { "epoch": 0.6301991378369971, "grad_norm": 286.320556640625, "learning_rate": 3.723783693411394e-06, "loss": 19.6086, "step": 311970 }, { "epoch": 0.6302193384696809, "grad_norm": 9.396160125732422, "learning_rate": 3.723446192825628e-06, "loss": 12.8177, "step": 311980 }, { "epoch": 0.6302395391023646, "grad_norm": 278.76129150390625, "learning_rate": 3.7231086984616312e-06, "loss": 6.8438, "step": 311990 }, { "epoch": 0.6302597397350485, "grad_norm": 245.7313995361328, "learning_rate": 3.7227712103210485e-06, "loss": 10.3221, "step": 312000 }, { "epoch": 0.6302799403677323, "grad_norm": 147.922607421875, "learning_rate": 3.722433728405522e-06, "loss": 9.7016, "step": 312010 }, { "epoch": 0.6303001410004161, "grad_norm": 130.52450561523438, "learning_rate": 3.7220962527166994e-06, "loss": 10.5042, "step": 312020 }, { "epoch": 0.6303203416330999, "grad_norm": 307.421142578125, "learning_rate": 3.7217587832562264e-06, "loss": 22.5217, "step": 312030 }, { "epoch": 0.6303405422657837, "grad_norm": 88.36876678466797, "learning_rate": 3.7214213200257433e-06, "loss": 17.2442, "step": 312040 }, { "epoch": 0.6303607428984676, "grad_norm": 512.6255493164062, "learning_rate": 3.7210838630268986e-06, "loss": 13.2784, "step": 312050 }, { "epoch": 0.6303809435311514, "grad_norm": 399.8426513671875, "learning_rate": 3.720746412261337e-06, "loss": 12.5787, "step": 312060 }, { "epoch": 0.6304011441638352, "grad_norm": 251.91604614257812, "learning_rate": 3.7204089677307015e-06, "loss": 9.6357, "step": 312070 }, { "epoch": 0.630421344796519, "grad_norm": 31.64736557006836, "learning_rate": 3.7200715294366376e-06, "loss": 9.7649, "step": 312080 }, { "epoch": 0.6304415454292028, "grad_norm": 309.552734375, "learning_rate": 3.7197340973807905e-06, "loss": 12.2956, "step": 312090 }, { "epoch": 0.6304617460618867, "grad_norm": 240.76905822753906, "learning_rate": 3.7193966715648026e-06, "loss": 16.3798, "step": 312100 }, { "epoch": 0.6304819466945705, "grad_norm": 398.55462646484375, "learning_rate": 3.7190592519903198e-06, "loss": 14.0385, "step": 312110 }, { "epoch": 0.6305021473272543, "grad_norm": 191.8442840576172, "learning_rate": 3.71872183865899e-06, "loss": 24.7597, "step": 312120 }, { "epoch": 0.6305223479599381, "grad_norm": 258.1017150878906, "learning_rate": 3.7183844315724505e-06, "loss": 14.269, "step": 312130 }, { "epoch": 0.6305425485926219, "grad_norm": 228.763427734375, "learning_rate": 3.718047030732352e-06, "loss": 19.4222, "step": 312140 }, { "epoch": 0.6305627492253058, "grad_norm": 586.2548828125, "learning_rate": 3.7177096361403362e-06, "loss": 14.2582, "step": 312150 }, { "epoch": 0.6305829498579896, "grad_norm": 558.8289794921875, "learning_rate": 3.717372247798049e-06, "loss": 18.4018, "step": 312160 }, { "epoch": 0.6306031504906734, "grad_norm": 399.59527587890625, "learning_rate": 3.717034865707133e-06, "loss": 20.9171, "step": 312170 }, { "epoch": 0.6306233511233572, "grad_norm": 326.2173156738281, "learning_rate": 3.7166974898692324e-06, "loss": 21.3684, "step": 312180 }, { "epoch": 0.630643551756041, "grad_norm": 346.50213623046875, "learning_rate": 3.7163601202859963e-06, "loss": 23.952, "step": 312190 }, { "epoch": 0.6306637523887249, "grad_norm": 736.0863037109375, "learning_rate": 3.716022756959061e-06, "loss": 24.0658, "step": 312200 }, { "epoch": 0.6306839530214087, "grad_norm": 240.00802612304688, "learning_rate": 3.715685399890078e-06, "loss": 15.4818, "step": 312210 }, { "epoch": 0.6307041536540925, "grad_norm": 281.0865478515625, "learning_rate": 3.7153480490806883e-06, "loss": 10.2372, "step": 312220 }, { "epoch": 0.6307243542867763, "grad_norm": 260.1144104003906, "learning_rate": 3.715010704532535e-06, "loss": 14.7638, "step": 312230 }, { "epoch": 0.6307445549194601, "grad_norm": 293.3788146972656, "learning_rate": 3.7146733662472645e-06, "loss": 23.6026, "step": 312240 }, { "epoch": 0.6307647555521438, "grad_norm": 267.4674377441406, "learning_rate": 3.7143360342265206e-06, "loss": 24.2201, "step": 312250 }, { "epoch": 0.6307849561848277, "grad_norm": 384.4147033691406, "learning_rate": 3.7139987084719463e-06, "loss": 13.7532, "step": 312260 }, { "epoch": 0.6308051568175115, "grad_norm": 145.49034118652344, "learning_rate": 3.7136613889851847e-06, "loss": 14.134, "step": 312270 }, { "epoch": 0.6308253574501953, "grad_norm": 293.34906005859375, "learning_rate": 3.7133240757678835e-06, "loss": 14.2921, "step": 312280 }, { "epoch": 0.6308455580828791, "grad_norm": 590.4736938476562, "learning_rate": 3.7129867688216848e-06, "loss": 31.6446, "step": 312290 }, { "epoch": 0.6308657587155629, "grad_norm": 264.8109130859375, "learning_rate": 3.7126494681482317e-06, "loss": 28.9959, "step": 312300 }, { "epoch": 0.6308859593482468, "grad_norm": 103.6928939819336, "learning_rate": 3.712312173749169e-06, "loss": 26.3693, "step": 312310 }, { "epoch": 0.6309061599809306, "grad_norm": 389.7435302734375, "learning_rate": 3.7119748856261416e-06, "loss": 16.4251, "step": 312320 }, { "epoch": 0.6309263606136144, "grad_norm": 159.6335906982422, "learning_rate": 3.7116376037807915e-06, "loss": 15.4602, "step": 312330 }, { "epoch": 0.6309465612462982, "grad_norm": 404.8928527832031, "learning_rate": 3.7113003282147625e-06, "loss": 24.47, "step": 312340 }, { "epoch": 0.630966761878982, "grad_norm": 88.96041870117188, "learning_rate": 3.7109630589297014e-06, "loss": 17.3054, "step": 312350 }, { "epoch": 0.6309869625116659, "grad_norm": 253.132080078125, "learning_rate": 3.710625795927249e-06, "loss": 18.7442, "step": 312360 }, { "epoch": 0.6310071631443497, "grad_norm": 415.9126892089844, "learning_rate": 3.7102885392090497e-06, "loss": 13.7235, "step": 312370 }, { "epoch": 0.6310273637770335, "grad_norm": 488.16314697265625, "learning_rate": 3.709951288776749e-06, "loss": 15.5005, "step": 312380 }, { "epoch": 0.6310475644097173, "grad_norm": 305.920654296875, "learning_rate": 3.7096140446319884e-06, "loss": 26.9169, "step": 312390 }, { "epoch": 0.6310677650424011, "grad_norm": 543.7438354492188, "learning_rate": 3.709276806776412e-06, "loss": 27.8839, "step": 312400 }, { "epoch": 0.631087965675085, "grad_norm": 267.5404052734375, "learning_rate": 3.7089395752116653e-06, "loss": 29.8336, "step": 312410 }, { "epoch": 0.6311081663077688, "grad_norm": 256.7159729003906, "learning_rate": 3.7086023499393887e-06, "loss": 21.8409, "step": 312420 }, { "epoch": 0.6311283669404526, "grad_norm": 60.08151626586914, "learning_rate": 3.7082651309612283e-06, "loss": 16.7557, "step": 312430 }, { "epoch": 0.6311485675731364, "grad_norm": 507.7788391113281, "learning_rate": 3.7079279182788263e-06, "loss": 24.8461, "step": 312440 }, { "epoch": 0.6311687682058202, "grad_norm": 215.65121459960938, "learning_rate": 3.707590711893829e-06, "loss": 20.0936, "step": 312450 }, { "epoch": 0.6311889688385041, "grad_norm": 102.21309661865234, "learning_rate": 3.707253511807877e-06, "loss": 15.7694, "step": 312460 }, { "epoch": 0.6312091694711879, "grad_norm": 130.5305938720703, "learning_rate": 3.706916318022612e-06, "loss": 13.9427, "step": 312470 }, { "epoch": 0.6312293701038717, "grad_norm": 254.06326293945312, "learning_rate": 3.7065791305396846e-06, "loss": 22.0087, "step": 312480 }, { "epoch": 0.6312495707365555, "grad_norm": 215.6504364013672, "learning_rate": 3.70624194936073e-06, "loss": 16.7561, "step": 312490 }, { "epoch": 0.6312697713692392, "grad_norm": 142.6365509033203, "learning_rate": 3.705904774487396e-06, "loss": 13.9377, "step": 312500 }, { "epoch": 0.631289972001923, "grad_norm": 244.43226623535156, "learning_rate": 3.7055676059213265e-06, "loss": 14.4252, "step": 312510 }, { "epoch": 0.6313101726346069, "grad_norm": 265.65655517578125, "learning_rate": 3.705230443664163e-06, "loss": 19.5549, "step": 312520 }, { "epoch": 0.6313303732672907, "grad_norm": 28.356204986572266, "learning_rate": 3.704893287717548e-06, "loss": 28.269, "step": 312530 }, { "epoch": 0.6313505738999745, "grad_norm": 394.1058654785156, "learning_rate": 3.7045561380831287e-06, "loss": 23.4691, "step": 312540 }, { "epoch": 0.6313707745326583, "grad_norm": 195.56080627441406, "learning_rate": 3.704218994762543e-06, "loss": 16.6807, "step": 312550 }, { "epoch": 0.6313909751653421, "grad_norm": 13.253167152404785, "learning_rate": 3.7038818577574363e-06, "loss": 19.6141, "step": 312560 }, { "epoch": 0.631411175798026, "grad_norm": 68.04635620117188, "learning_rate": 3.7035447270694558e-06, "loss": 9.0746, "step": 312570 }, { "epoch": 0.6314313764307098, "grad_norm": 268.0568542480469, "learning_rate": 3.7032076027002377e-06, "loss": 21.4373, "step": 312580 }, { "epoch": 0.6314515770633936, "grad_norm": 381.2050476074219, "learning_rate": 3.7028704846514296e-06, "loss": 31.8898, "step": 312590 }, { "epoch": 0.6314717776960774, "grad_norm": 367.7728271484375, "learning_rate": 3.7025333729246733e-06, "loss": 17.771, "step": 312600 }, { "epoch": 0.6314919783287612, "grad_norm": 196.080078125, "learning_rate": 3.7021962675216126e-06, "loss": 15.2784, "step": 312610 }, { "epoch": 0.6315121789614451, "grad_norm": 82.24706268310547, "learning_rate": 3.70185916844389e-06, "loss": 13.5813, "step": 312620 }, { "epoch": 0.6315323795941289, "grad_norm": 237.77540588378906, "learning_rate": 3.701522075693146e-06, "loss": 16.7629, "step": 312630 }, { "epoch": 0.6315525802268127, "grad_norm": 263.4375305175781, "learning_rate": 3.7011849892710293e-06, "loss": 22.9795, "step": 312640 }, { "epoch": 0.6315727808594965, "grad_norm": 311.44183349609375, "learning_rate": 3.700847909179177e-06, "loss": 47.658, "step": 312650 }, { "epoch": 0.6315929814921803, "grad_norm": 262.5174255371094, "learning_rate": 3.7005108354192356e-06, "loss": 18.4144, "step": 312660 }, { "epoch": 0.6316131821248642, "grad_norm": 163.56884765625, "learning_rate": 3.7001737679928467e-06, "loss": 9.9149, "step": 312670 }, { "epoch": 0.631633382757548, "grad_norm": 509.4683837890625, "learning_rate": 3.6998367069016527e-06, "loss": 34.7653, "step": 312680 }, { "epoch": 0.6316535833902318, "grad_norm": 108.41532897949219, "learning_rate": 3.699499652147297e-06, "loss": 20.6908, "step": 312690 }, { "epoch": 0.6316737840229156, "grad_norm": 273.4623718261719, "learning_rate": 3.699162603731423e-06, "loss": 10.6264, "step": 312700 }, { "epoch": 0.6316939846555994, "grad_norm": 383.9814147949219, "learning_rate": 3.6988255616556725e-06, "loss": 11.5617, "step": 312710 }, { "epoch": 0.6317141852882833, "grad_norm": 166.4546356201172, "learning_rate": 3.6984885259216866e-06, "loss": 11.3979, "step": 312720 }, { "epoch": 0.6317343859209671, "grad_norm": 231.18899536132812, "learning_rate": 3.698151496531111e-06, "loss": 7.7128, "step": 312730 }, { "epoch": 0.6317545865536509, "grad_norm": 404.8709411621094, "learning_rate": 3.697814473485588e-06, "loss": 14.7757, "step": 312740 }, { "epoch": 0.6317747871863347, "grad_norm": 123.81889343261719, "learning_rate": 3.6974774567867586e-06, "loss": 15.2444, "step": 312750 }, { "epoch": 0.6317949878190184, "grad_norm": 351.76324462890625, "learning_rate": 3.6971404464362657e-06, "loss": 36.3919, "step": 312760 }, { "epoch": 0.6318151884517023, "grad_norm": 229.62603759765625, "learning_rate": 3.6968034424357535e-06, "loss": 21.5221, "step": 312770 }, { "epoch": 0.6318353890843861, "grad_norm": 165.27389526367188, "learning_rate": 3.6964664447868626e-06, "loss": 20.332, "step": 312780 }, { "epoch": 0.6318555897170699, "grad_norm": 256.791259765625, "learning_rate": 3.696129453491235e-06, "loss": 16.3332, "step": 312790 }, { "epoch": 0.6318757903497537, "grad_norm": 657.0265502929688, "learning_rate": 3.695792468550517e-06, "loss": 19.6565, "step": 312800 }, { "epoch": 0.6318959909824375, "grad_norm": 210.00546264648438, "learning_rate": 3.6954554899663454e-06, "loss": 16.2604, "step": 312810 }, { "epoch": 0.6319161916151214, "grad_norm": 314.94464111328125, "learning_rate": 3.6951185177403667e-06, "loss": 16.5251, "step": 312820 }, { "epoch": 0.6319363922478052, "grad_norm": 88.86231994628906, "learning_rate": 3.6947815518742226e-06, "loss": 12.7005, "step": 312830 }, { "epoch": 0.631956592880489, "grad_norm": 739.9567260742188, "learning_rate": 3.6944445923695542e-06, "loss": 22.3304, "step": 312840 }, { "epoch": 0.6319767935131728, "grad_norm": 160.5653076171875, "learning_rate": 3.694107639228005e-06, "loss": 11.7529, "step": 312850 }, { "epoch": 0.6319969941458566, "grad_norm": 124.45980072021484, "learning_rate": 3.6937706924512175e-06, "loss": 12.1366, "step": 312860 }, { "epoch": 0.6320171947785405, "grad_norm": 360.82696533203125, "learning_rate": 3.6934337520408313e-06, "loss": 10.6945, "step": 312870 }, { "epoch": 0.6320373954112243, "grad_norm": 219.11952209472656, "learning_rate": 3.6930968179984905e-06, "loss": 22.1858, "step": 312880 }, { "epoch": 0.6320575960439081, "grad_norm": 144.44671630859375, "learning_rate": 3.6927598903258375e-06, "loss": 12.8361, "step": 312890 }, { "epoch": 0.6320777966765919, "grad_norm": 124.19243621826172, "learning_rate": 3.6924229690245163e-06, "loss": 10.2922, "step": 312900 }, { "epoch": 0.6320979973092757, "grad_norm": 362.5108337402344, "learning_rate": 3.6920860540961656e-06, "loss": 17.2679, "step": 312910 }, { "epoch": 0.6321181979419596, "grad_norm": 345.67852783203125, "learning_rate": 3.6917491455424285e-06, "loss": 13.1962, "step": 312920 }, { "epoch": 0.6321383985746434, "grad_norm": 147.66104125976562, "learning_rate": 3.691412243364949e-06, "loss": 21.7728, "step": 312930 }, { "epoch": 0.6321585992073272, "grad_norm": 161.53424072265625, "learning_rate": 3.691075347565366e-06, "loss": 13.3574, "step": 312940 }, { "epoch": 0.632178799840011, "grad_norm": 275.5355224609375, "learning_rate": 3.690738458145322e-06, "loss": 12.8531, "step": 312950 }, { "epoch": 0.6321990004726948, "grad_norm": 414.12725830078125, "learning_rate": 3.6904015751064637e-06, "loss": 22.7744, "step": 312960 }, { "epoch": 0.6322192011053787, "grad_norm": 39.2047233581543, "learning_rate": 3.690064698450425e-06, "loss": 20.6745, "step": 312970 }, { "epoch": 0.6322394017380625, "grad_norm": 384.0627746582031, "learning_rate": 3.689727828178854e-06, "loss": 10.3753, "step": 312980 }, { "epoch": 0.6322596023707463, "grad_norm": 317.9337463378906, "learning_rate": 3.689390964293391e-06, "loss": 9.5181, "step": 312990 }, { "epoch": 0.6322798030034301, "grad_norm": 241.99366760253906, "learning_rate": 3.6890541067956775e-06, "loss": 10.4828, "step": 313000 }, { "epoch": 0.6323000036361138, "grad_norm": 271.6126708984375, "learning_rate": 3.6887172556873545e-06, "loss": 17.536, "step": 313010 }, { "epoch": 0.6323202042687976, "grad_norm": 131.71046447753906, "learning_rate": 3.688380410970066e-06, "loss": 18.6074, "step": 313020 }, { "epoch": 0.6323404049014815, "grad_norm": 350.4556579589844, "learning_rate": 3.68804357264545e-06, "loss": 23.0421, "step": 313030 }, { "epoch": 0.6323606055341653, "grad_norm": 167.21937561035156, "learning_rate": 3.6877067407151514e-06, "loss": 20.2272, "step": 313040 }, { "epoch": 0.6323808061668491, "grad_norm": 620.4806518554688, "learning_rate": 3.6873699151808105e-06, "loss": 16.567, "step": 313050 }, { "epoch": 0.6324010067995329, "grad_norm": 265.4610290527344, "learning_rate": 3.6870330960440713e-06, "loss": 29.1275, "step": 313060 }, { "epoch": 0.6324212074322167, "grad_norm": 151.8496856689453, "learning_rate": 3.686696283306572e-06, "loss": 9.2684, "step": 313070 }, { "epoch": 0.6324414080649006, "grad_norm": 161.9957733154297, "learning_rate": 3.686359476969957e-06, "loss": 9.0588, "step": 313080 }, { "epoch": 0.6324616086975844, "grad_norm": 508.14886474609375, "learning_rate": 3.6860226770358663e-06, "loss": 22.74, "step": 313090 }, { "epoch": 0.6324818093302682, "grad_norm": 189.8013916015625, "learning_rate": 3.68568588350594e-06, "loss": 14.3833, "step": 313100 }, { "epoch": 0.632502009962952, "grad_norm": 159.2688751220703, "learning_rate": 3.6853490963818224e-06, "loss": 16.4092, "step": 313110 }, { "epoch": 0.6325222105956358, "grad_norm": 0.0, "learning_rate": 3.6850123156651544e-06, "loss": 15.9755, "step": 313120 }, { "epoch": 0.6325424112283197, "grad_norm": 175.84410095214844, "learning_rate": 3.6846755413575764e-06, "loss": 16.4347, "step": 313130 }, { "epoch": 0.6325626118610035, "grad_norm": 434.4258728027344, "learning_rate": 3.6843387734607304e-06, "loss": 17.074, "step": 313140 }, { "epoch": 0.6325828124936873, "grad_norm": 13.98954963684082, "learning_rate": 3.684002011976259e-06, "loss": 11.0954, "step": 313150 }, { "epoch": 0.6326030131263711, "grad_norm": 514.1160888671875, "learning_rate": 3.6836652569057994e-06, "loss": 17.6079, "step": 313160 }, { "epoch": 0.632623213759055, "grad_norm": 304.1145324707031, "learning_rate": 3.6833285082509962e-06, "loss": 13.3035, "step": 313170 }, { "epoch": 0.6326434143917388, "grad_norm": 227.67115783691406, "learning_rate": 3.682991766013493e-06, "loss": 15.2199, "step": 313180 }, { "epoch": 0.6326636150244226, "grad_norm": 326.3795166015625, "learning_rate": 3.6826550301949248e-06, "loss": 20.1318, "step": 313190 }, { "epoch": 0.6326838156571064, "grad_norm": 264.03741455078125, "learning_rate": 3.6823183007969375e-06, "loss": 32.1759, "step": 313200 }, { "epoch": 0.6327040162897902, "grad_norm": 294.1405944824219, "learning_rate": 3.681981577821171e-06, "loss": 14.2544, "step": 313210 }, { "epoch": 0.632724216922474, "grad_norm": 407.8190002441406, "learning_rate": 3.681644861269267e-06, "loss": 13.5218, "step": 313220 }, { "epoch": 0.6327444175551579, "grad_norm": 426.8134460449219, "learning_rate": 3.681308151142866e-06, "loss": 19.0439, "step": 313230 }, { "epoch": 0.6327646181878417, "grad_norm": 140.0265655517578, "learning_rate": 3.6809714474436075e-06, "loss": 21.5695, "step": 313240 }, { "epoch": 0.6327848188205255, "grad_norm": 327.1999206542969, "learning_rate": 3.680634750173137e-06, "loss": 15.2896, "step": 313250 }, { "epoch": 0.6328050194532093, "grad_norm": 76.1364517211914, "learning_rate": 3.6802980593330893e-06, "loss": 22.0832, "step": 313260 }, { "epoch": 0.632825220085893, "grad_norm": 295.1089172363281, "learning_rate": 3.6799613749251105e-06, "loss": 9.7829, "step": 313270 }, { "epoch": 0.6328454207185769, "grad_norm": 46.40315628051758, "learning_rate": 3.6796246969508408e-06, "loss": 20.6966, "step": 313280 }, { "epoch": 0.6328656213512607, "grad_norm": 320.0140686035156, "learning_rate": 3.6792880254119195e-06, "loss": 20.6541, "step": 313290 }, { "epoch": 0.6328858219839445, "grad_norm": 513.673828125, "learning_rate": 3.678951360309988e-06, "loss": 21.4669, "step": 313300 }, { "epoch": 0.6329060226166283, "grad_norm": 141.2325897216797, "learning_rate": 3.678614701646688e-06, "loss": 13.7004, "step": 313310 }, { "epoch": 0.6329262232493121, "grad_norm": 260.0235900878906, "learning_rate": 3.678278049423659e-06, "loss": 18.2647, "step": 313320 }, { "epoch": 0.632946423881996, "grad_norm": 262.2864990234375, "learning_rate": 3.677941403642541e-06, "loss": 22.6314, "step": 313330 }, { "epoch": 0.6329666245146798, "grad_norm": 247.51910400390625, "learning_rate": 3.6776047643049777e-06, "loss": 15.0109, "step": 313340 }, { "epoch": 0.6329868251473636, "grad_norm": 442.2358093261719, "learning_rate": 3.6772681314126097e-06, "loss": 20.2799, "step": 313350 }, { "epoch": 0.6330070257800474, "grad_norm": 94.98969268798828, "learning_rate": 3.676931504967075e-06, "loss": 15.4025, "step": 313360 }, { "epoch": 0.6330272264127312, "grad_norm": 279.63409423828125, "learning_rate": 3.6765948849700155e-06, "loss": 26.461, "step": 313370 }, { "epoch": 0.6330474270454151, "grad_norm": 260.0960693359375, "learning_rate": 3.6762582714230733e-06, "loss": 15.8332, "step": 313380 }, { "epoch": 0.6330676276780989, "grad_norm": 290.3099060058594, "learning_rate": 3.6759216643278865e-06, "loss": 32.5975, "step": 313390 }, { "epoch": 0.6330878283107827, "grad_norm": 279.6096496582031, "learning_rate": 3.6755850636860956e-06, "loss": 35.5641, "step": 313400 }, { "epoch": 0.6331080289434665, "grad_norm": 123.82149505615234, "learning_rate": 3.675248469499346e-06, "loss": 41.5863, "step": 313410 }, { "epoch": 0.6331282295761503, "grad_norm": 85.4517593383789, "learning_rate": 3.674911881769272e-06, "loss": 8.2691, "step": 313420 }, { "epoch": 0.6331484302088342, "grad_norm": 268.63995361328125, "learning_rate": 3.674575300497517e-06, "loss": 21.1616, "step": 313430 }, { "epoch": 0.633168630841518, "grad_norm": 8306.4150390625, "learning_rate": 3.6742387256857224e-06, "loss": 31.8844, "step": 313440 }, { "epoch": 0.6331888314742018, "grad_norm": 133.6696014404297, "learning_rate": 3.6739021573355273e-06, "loss": 17.4917, "step": 313450 }, { "epoch": 0.6332090321068856, "grad_norm": 76.24140930175781, "learning_rate": 3.673565595448572e-06, "loss": 10.0239, "step": 313460 }, { "epoch": 0.6332292327395694, "grad_norm": 96.19934844970703, "learning_rate": 3.673229040026497e-06, "loss": 18.7337, "step": 313470 }, { "epoch": 0.6332494333722533, "grad_norm": 176.97816467285156, "learning_rate": 3.672892491070943e-06, "loss": 25.2333, "step": 313480 }, { "epoch": 0.6332696340049371, "grad_norm": 93.71099090576172, "learning_rate": 3.672555948583548e-06, "loss": 17.9128, "step": 313490 }, { "epoch": 0.6332898346376209, "grad_norm": 554.1264038085938, "learning_rate": 3.672219412565956e-06, "loss": 29.8852, "step": 313500 }, { "epoch": 0.6333100352703047, "grad_norm": 64.62335968017578, "learning_rate": 3.671882883019806e-06, "loss": 8.5988, "step": 313510 }, { "epoch": 0.6333302359029885, "grad_norm": 399.4989013671875, "learning_rate": 3.6715463599467372e-06, "loss": 22.8686, "step": 313520 }, { "epoch": 0.6333504365356722, "grad_norm": 172.01165771484375, "learning_rate": 3.67120984334839e-06, "loss": 17.6735, "step": 313530 }, { "epoch": 0.6333706371683561, "grad_norm": 224.0727996826172, "learning_rate": 3.670873333226407e-06, "loss": 16.1169, "step": 313540 }, { "epoch": 0.6333908378010399, "grad_norm": 568.7155151367188, "learning_rate": 3.670536829582424e-06, "loss": 28.8924, "step": 313550 }, { "epoch": 0.6334110384337237, "grad_norm": 1196.2305908203125, "learning_rate": 3.6702003324180823e-06, "loss": 23.4738, "step": 313560 }, { "epoch": 0.6334312390664075, "grad_norm": 190.74761962890625, "learning_rate": 3.669863841735026e-06, "loss": 10.9379, "step": 313570 }, { "epoch": 0.6334514396990913, "grad_norm": 221.78167724609375, "learning_rate": 3.669527357534889e-06, "loss": 25.5708, "step": 313580 }, { "epoch": 0.6334716403317752, "grad_norm": 244.38023376464844, "learning_rate": 3.6691908798193155e-06, "loss": 11.4495, "step": 313590 }, { "epoch": 0.633491840964459, "grad_norm": 400.46600341796875, "learning_rate": 3.668854408589945e-06, "loss": 23.2112, "step": 313600 }, { "epoch": 0.6335120415971428, "grad_norm": 157.48419189453125, "learning_rate": 3.668517943848416e-06, "loss": 12.4654, "step": 313610 }, { "epoch": 0.6335322422298266, "grad_norm": 245.6942596435547, "learning_rate": 3.6681814855963687e-06, "loss": 15.0347, "step": 313620 }, { "epoch": 0.6335524428625104, "grad_norm": 616.5487670898438, "learning_rate": 3.6678450338354443e-06, "loss": 25.4519, "step": 313630 }, { "epoch": 0.6335726434951943, "grad_norm": 171.87109375, "learning_rate": 3.667508588567281e-06, "loss": 12.2172, "step": 313640 }, { "epoch": 0.6335928441278781, "grad_norm": 22.596256256103516, "learning_rate": 3.6671721497935177e-06, "loss": 28.3764, "step": 313650 }, { "epoch": 0.6336130447605619, "grad_norm": 312.84515380859375, "learning_rate": 3.6668357175157974e-06, "loss": 19.2095, "step": 313660 }, { "epoch": 0.6336332453932457, "grad_norm": 338.5978088378906, "learning_rate": 3.666499291735759e-06, "loss": 14.897, "step": 313670 }, { "epoch": 0.6336534460259295, "grad_norm": 202.0045928955078, "learning_rate": 3.66616287245504e-06, "loss": 13.6192, "step": 313680 }, { "epoch": 0.6336736466586134, "grad_norm": 512.7861328125, "learning_rate": 3.6658264596752814e-06, "loss": 20.4466, "step": 313690 }, { "epoch": 0.6336938472912972, "grad_norm": 105.11566925048828, "learning_rate": 3.6654900533981234e-06, "loss": 33.1705, "step": 313700 }, { "epoch": 0.633714047923981, "grad_norm": 341.75872802734375, "learning_rate": 3.6651536536252047e-06, "loss": 14.4481, "step": 313710 }, { "epoch": 0.6337342485566648, "grad_norm": 283.7489318847656, "learning_rate": 3.664817260358164e-06, "loss": 11.2498, "step": 313720 }, { "epoch": 0.6337544491893486, "grad_norm": 284.7729797363281, "learning_rate": 3.6644808735986437e-06, "loss": 15.7539, "step": 313730 }, { "epoch": 0.6337746498220325, "grad_norm": 112.00802612304688, "learning_rate": 3.664144493348281e-06, "loss": 12.1406, "step": 313740 }, { "epoch": 0.6337948504547163, "grad_norm": 247.54251098632812, "learning_rate": 3.663808119608716e-06, "loss": 17.4069, "step": 313750 }, { "epoch": 0.6338150510874001, "grad_norm": 330.23992919921875, "learning_rate": 3.663471752381589e-06, "loss": 9.6154, "step": 313760 }, { "epoch": 0.6338352517200839, "grad_norm": 336.0794982910156, "learning_rate": 3.663135391668538e-06, "loss": 29.6656, "step": 313770 }, { "epoch": 0.6338554523527676, "grad_norm": 131.28431701660156, "learning_rate": 3.662799037471201e-06, "loss": 9.8605, "step": 313780 }, { "epoch": 0.6338756529854515, "grad_norm": 328.9375, "learning_rate": 3.6624626897912213e-06, "loss": 14.2972, "step": 313790 }, { "epoch": 0.6338958536181353, "grad_norm": 220.4369354248047, "learning_rate": 3.6621263486302373e-06, "loss": 10.7666, "step": 313800 }, { "epoch": 0.6339160542508191, "grad_norm": 357.22332763671875, "learning_rate": 3.6617900139898854e-06, "loss": 12.085, "step": 313810 }, { "epoch": 0.6339362548835029, "grad_norm": 64.193359375, "learning_rate": 3.6614536858718074e-06, "loss": 29.0031, "step": 313820 }, { "epoch": 0.6339564555161867, "grad_norm": 298.9850769042969, "learning_rate": 3.661117364277642e-06, "loss": 19.1942, "step": 313830 }, { "epoch": 0.6339766561488706, "grad_norm": 137.28953552246094, "learning_rate": 3.6607810492090278e-06, "loss": 9.9991, "step": 313840 }, { "epoch": 0.6339968567815544, "grad_norm": 59.01148986816406, "learning_rate": 3.6604447406676036e-06, "loss": 13.8817, "step": 313850 }, { "epoch": 0.6340170574142382, "grad_norm": 295.74560546875, "learning_rate": 3.6601084386550117e-06, "loss": 14.4669, "step": 313860 }, { "epoch": 0.634037258046922, "grad_norm": 246.55526733398438, "learning_rate": 3.659772143172886e-06, "loss": 21.7156, "step": 313870 }, { "epoch": 0.6340574586796058, "grad_norm": 235.5973663330078, "learning_rate": 3.659435854222869e-06, "loss": 30.5595, "step": 313880 }, { "epoch": 0.6340776593122897, "grad_norm": 163.76101684570312, "learning_rate": 3.6590995718066003e-06, "loss": 18.1926, "step": 313890 }, { "epoch": 0.6340978599449735, "grad_norm": 199.20704650878906, "learning_rate": 3.6587632959257168e-06, "loss": 19.1956, "step": 313900 }, { "epoch": 0.6341180605776573, "grad_norm": 585.552734375, "learning_rate": 3.658427026581858e-06, "loss": 26.6538, "step": 313910 }, { "epoch": 0.6341382612103411, "grad_norm": 140.07943725585938, "learning_rate": 3.6580907637766642e-06, "loss": 11.1061, "step": 313920 }, { "epoch": 0.6341584618430249, "grad_norm": 190.81179809570312, "learning_rate": 3.657754507511773e-06, "loss": 19.5807, "step": 313930 }, { "epoch": 0.6341786624757088, "grad_norm": 213.81968688964844, "learning_rate": 3.657418257788822e-06, "loss": 17.3365, "step": 313940 }, { "epoch": 0.6341988631083926, "grad_norm": 5.927175521850586, "learning_rate": 3.657082014609452e-06, "loss": 10.8081, "step": 313950 }, { "epoch": 0.6342190637410764, "grad_norm": 308.6035461425781, "learning_rate": 3.656745777975303e-06, "loss": 13.88, "step": 313960 }, { "epoch": 0.6342392643737602, "grad_norm": 72.1675033569336, "learning_rate": 3.6564095478880114e-06, "loss": 14.4682, "step": 313970 }, { "epoch": 0.634259465006444, "grad_norm": 111.05274200439453, "learning_rate": 3.656073324349216e-06, "loss": 10.1505, "step": 313980 }, { "epoch": 0.6342796656391279, "grad_norm": 55.28746795654297, "learning_rate": 3.6557371073605574e-06, "loss": 10.8465, "step": 313990 }, { "epoch": 0.6342998662718117, "grad_norm": 525.4016723632812, "learning_rate": 3.655400896923672e-06, "loss": 28.5347, "step": 314000 }, { "epoch": 0.6343200669044955, "grad_norm": 77.57219696044922, "learning_rate": 3.655064693040199e-06, "loss": 36.7641, "step": 314010 }, { "epoch": 0.6343402675371793, "grad_norm": 65.27912902832031, "learning_rate": 3.6547284957117805e-06, "loss": 20.7673, "step": 314020 }, { "epoch": 0.6343604681698631, "grad_norm": 443.2716369628906, "learning_rate": 3.6543923049400487e-06, "loss": 13.8213, "step": 314030 }, { "epoch": 0.6343806688025468, "grad_norm": 313.86517333984375, "learning_rate": 3.6540561207266475e-06, "loss": 9.4069, "step": 314040 }, { "epoch": 0.6344008694352307, "grad_norm": 74.61280822753906, "learning_rate": 3.653719943073214e-06, "loss": 16.8325, "step": 314050 }, { "epoch": 0.6344210700679145, "grad_norm": 436.91925048828125, "learning_rate": 3.653383771981385e-06, "loss": 21.2599, "step": 314060 }, { "epoch": 0.6344412707005983, "grad_norm": 298.6908264160156, "learning_rate": 3.6530476074528005e-06, "loss": 27.7245, "step": 314070 }, { "epoch": 0.6344614713332821, "grad_norm": 161.84210205078125, "learning_rate": 3.652711449489099e-06, "loss": 23.9463, "step": 314080 }, { "epoch": 0.6344816719659659, "grad_norm": 626.2216796875, "learning_rate": 3.6523752980919183e-06, "loss": 25.5705, "step": 314090 }, { "epoch": 0.6345018725986498, "grad_norm": 165.06883239746094, "learning_rate": 3.6520391532628953e-06, "loss": 7.5111, "step": 314100 }, { "epoch": 0.6345220732313336, "grad_norm": 303.15960693359375, "learning_rate": 3.6517030150036716e-06, "loss": 17.8984, "step": 314110 }, { "epoch": 0.6345422738640174, "grad_norm": 212.03924560546875, "learning_rate": 3.6513668833158846e-06, "loss": 22.4548, "step": 314120 }, { "epoch": 0.6345624744967012, "grad_norm": 193.156494140625, "learning_rate": 3.6510307582011706e-06, "loss": 8.3208, "step": 314130 }, { "epoch": 0.634582675129385, "grad_norm": 250.3523406982422, "learning_rate": 3.650694639661169e-06, "loss": 12.0598, "step": 314140 }, { "epoch": 0.6346028757620689, "grad_norm": 888.268310546875, "learning_rate": 3.6503585276975196e-06, "loss": 20.8779, "step": 314150 }, { "epoch": 0.6346230763947527, "grad_norm": 161.3168487548828, "learning_rate": 3.6500224223118576e-06, "loss": 17.2088, "step": 314160 }, { "epoch": 0.6346432770274365, "grad_norm": 0.0, "learning_rate": 3.6496863235058223e-06, "loss": 28.0337, "step": 314170 }, { "epoch": 0.6346634776601203, "grad_norm": 300.5113830566406, "learning_rate": 3.649350231281054e-06, "loss": 11.6859, "step": 314180 }, { "epoch": 0.6346836782928041, "grad_norm": 126.76072692871094, "learning_rate": 3.6490141456391864e-06, "loss": 18.2505, "step": 314190 }, { "epoch": 0.634703878925488, "grad_norm": 438.8286437988281, "learning_rate": 3.648678066581861e-06, "loss": 38.2602, "step": 314200 }, { "epoch": 0.6347240795581718, "grad_norm": 210.0537109375, "learning_rate": 3.6483419941107156e-06, "loss": 15.7942, "step": 314210 }, { "epoch": 0.6347442801908556, "grad_norm": 187.65684509277344, "learning_rate": 3.6480059282273872e-06, "loss": 10.2469, "step": 314220 }, { "epoch": 0.6347644808235394, "grad_norm": 144.2391815185547, "learning_rate": 3.647669868933513e-06, "loss": 12.7552, "step": 314230 }, { "epoch": 0.6347846814562232, "grad_norm": 326.87579345703125, "learning_rate": 3.6473338162307314e-06, "loss": 21.5999, "step": 314240 }, { "epoch": 0.6348048820889071, "grad_norm": 58.100440979003906, "learning_rate": 3.6469977701206833e-06, "loss": 12.7731, "step": 314250 }, { "epoch": 0.6348250827215909, "grad_norm": 267.8852844238281, "learning_rate": 3.6466617306050014e-06, "loss": 17.4556, "step": 314260 }, { "epoch": 0.6348452833542747, "grad_norm": 243.64529418945312, "learning_rate": 3.646325697685327e-06, "loss": 19.9132, "step": 314270 }, { "epoch": 0.6348654839869585, "grad_norm": 39.2636833190918, "learning_rate": 3.645989671363297e-06, "loss": 14.802, "step": 314280 }, { "epoch": 0.6348856846196422, "grad_norm": 148.18057250976562, "learning_rate": 3.6456536516405494e-06, "loss": 15.0318, "step": 314290 }, { "epoch": 0.634905885252326, "grad_norm": 311.2500915527344, "learning_rate": 3.645317638518721e-06, "loss": 13.9948, "step": 314300 }, { "epoch": 0.6349260858850099, "grad_norm": 0.0, "learning_rate": 3.6449816319994512e-06, "loss": 14.1342, "step": 314310 }, { "epoch": 0.6349462865176937, "grad_norm": 282.5542297363281, "learning_rate": 3.644645632084376e-06, "loss": 22.5003, "step": 314320 }, { "epoch": 0.6349664871503775, "grad_norm": 326.3429870605469, "learning_rate": 3.644309638775132e-06, "loss": 16.2273, "step": 314330 }, { "epoch": 0.6349866877830613, "grad_norm": 290.9770812988281, "learning_rate": 3.6439736520733606e-06, "loss": 9.8592, "step": 314340 }, { "epoch": 0.6350068884157452, "grad_norm": 457.53173828125, "learning_rate": 3.6436376719806965e-06, "loss": 28.9388, "step": 314350 }, { "epoch": 0.635027089048429, "grad_norm": 53.6711311340332, "learning_rate": 3.6433016984987774e-06, "loss": 21.1156, "step": 314360 }, { "epoch": 0.6350472896811128, "grad_norm": 206.02154541015625, "learning_rate": 3.642965731629242e-06, "loss": 19.0265, "step": 314370 }, { "epoch": 0.6350674903137966, "grad_norm": 469.5486145019531, "learning_rate": 3.6426297713737268e-06, "loss": 22.8118, "step": 314380 }, { "epoch": 0.6350876909464804, "grad_norm": 328.59234619140625, "learning_rate": 3.6422938177338695e-06, "loss": 13.2711, "step": 314390 }, { "epoch": 0.6351078915791643, "grad_norm": 349.1543884277344, "learning_rate": 3.6419578707113055e-06, "loss": 17.6395, "step": 314400 }, { "epoch": 0.6351280922118481, "grad_norm": 386.75164794921875, "learning_rate": 3.6416219303076772e-06, "loss": 14.0423, "step": 314410 }, { "epoch": 0.6351482928445319, "grad_norm": 883.1832275390625, "learning_rate": 3.6412859965246173e-06, "loss": 27.3597, "step": 314420 }, { "epoch": 0.6351684934772157, "grad_norm": 267.78363037109375, "learning_rate": 3.640950069363765e-06, "loss": 19.8727, "step": 314430 }, { "epoch": 0.6351886941098995, "grad_norm": 397.9726257324219, "learning_rate": 3.6406141488267575e-06, "loss": 16.0674, "step": 314440 }, { "epoch": 0.6352088947425834, "grad_norm": 220.41200256347656, "learning_rate": 3.640278234915232e-06, "loss": 22.3423, "step": 314450 }, { "epoch": 0.6352290953752672, "grad_norm": 61.90712356567383, "learning_rate": 3.639942327630823e-06, "loss": 27.4134, "step": 314460 }, { "epoch": 0.635249296007951, "grad_norm": 243.10006713867188, "learning_rate": 3.6396064269751747e-06, "loss": 18.1709, "step": 314470 }, { "epoch": 0.6352694966406348, "grad_norm": 324.9351501464844, "learning_rate": 3.6392705329499156e-06, "loss": 17.6682, "step": 314480 }, { "epoch": 0.6352896972733186, "grad_norm": 244.8361358642578, "learning_rate": 3.638934645556688e-06, "loss": 17.4464, "step": 314490 }, { "epoch": 0.6353098979060025, "grad_norm": 369.3509521484375, "learning_rate": 3.6385987647971287e-06, "loss": 14.873, "step": 314500 }, { "epoch": 0.6353300985386863, "grad_norm": 320.8244323730469, "learning_rate": 3.6382628906728735e-06, "loss": 17.3703, "step": 314510 }, { "epoch": 0.6353502991713701, "grad_norm": 266.0510559082031, "learning_rate": 3.63792702318556e-06, "loss": 22.0456, "step": 314520 }, { "epoch": 0.6353704998040539, "grad_norm": 281.3978576660156, "learning_rate": 3.6375911623368252e-06, "loss": 13.532, "step": 314530 }, { "epoch": 0.6353907004367377, "grad_norm": 265.64801025390625, "learning_rate": 3.637255308128305e-06, "loss": 11.176, "step": 314540 }, { "epoch": 0.6354109010694214, "grad_norm": 334.0043640136719, "learning_rate": 3.6369194605616364e-06, "loss": 11.4605, "step": 314550 }, { "epoch": 0.6354311017021053, "grad_norm": 175.85992431640625, "learning_rate": 3.636583619638458e-06, "loss": 38.5576, "step": 314560 }, { "epoch": 0.6354513023347891, "grad_norm": 671.420166015625, "learning_rate": 3.6362477853604066e-06, "loss": 25.9158, "step": 314570 }, { "epoch": 0.6354715029674729, "grad_norm": 414.4742431640625, "learning_rate": 3.635911957729117e-06, "loss": 17.2651, "step": 314580 }, { "epoch": 0.6354917036001567, "grad_norm": 316.853515625, "learning_rate": 3.6355761367462274e-06, "loss": 35.4632, "step": 314590 }, { "epoch": 0.6355119042328405, "grad_norm": 428.9239196777344, "learning_rate": 3.635240322413375e-06, "loss": 25.3012, "step": 314600 }, { "epoch": 0.6355321048655244, "grad_norm": 345.7099304199219, "learning_rate": 3.634904514732195e-06, "loss": 22.7598, "step": 314610 }, { "epoch": 0.6355523054982082, "grad_norm": 158.45574951171875, "learning_rate": 3.634568713704323e-06, "loss": 22.3823, "step": 314620 }, { "epoch": 0.635572506130892, "grad_norm": 367.8484802246094, "learning_rate": 3.634232919331401e-06, "loss": 22.5462, "step": 314630 }, { "epoch": 0.6355927067635758, "grad_norm": 312.975341796875, "learning_rate": 3.6338971316150593e-06, "loss": 16.0671, "step": 314640 }, { "epoch": 0.6356129073962596, "grad_norm": 23.701393127441406, "learning_rate": 3.6335613505569386e-06, "loss": 8.0283, "step": 314650 }, { "epoch": 0.6356331080289435, "grad_norm": 250.6197509765625, "learning_rate": 3.6332255761586745e-06, "loss": 30.8241, "step": 314660 }, { "epoch": 0.6356533086616273, "grad_norm": 48.33322525024414, "learning_rate": 3.6328898084219023e-06, "loss": 23.3623, "step": 314670 }, { "epoch": 0.6356735092943111, "grad_norm": 224.79061889648438, "learning_rate": 3.632554047348259e-06, "loss": 16.0274, "step": 314680 }, { "epoch": 0.6356937099269949, "grad_norm": 582.4691772460938, "learning_rate": 3.6322182929393833e-06, "loss": 14.4759, "step": 314690 }, { "epoch": 0.6357139105596787, "grad_norm": 78.73811340332031, "learning_rate": 3.6318825451969085e-06, "loss": 13.2903, "step": 314700 }, { "epoch": 0.6357341111923626, "grad_norm": 395.8401184082031, "learning_rate": 3.631546804122471e-06, "loss": 32.8851, "step": 314710 }, { "epoch": 0.6357543118250464, "grad_norm": 486.8900451660156, "learning_rate": 3.6312110697177095e-06, "loss": 18.8594, "step": 314720 }, { "epoch": 0.6357745124577302, "grad_norm": 131.15139770507812, "learning_rate": 3.63087534198426e-06, "loss": 17.7619, "step": 314730 }, { "epoch": 0.635794713090414, "grad_norm": 106.65361022949219, "learning_rate": 3.630539620923757e-06, "loss": 15.0951, "step": 314740 }, { "epoch": 0.6358149137230978, "grad_norm": 70.23684692382812, "learning_rate": 3.630203906537838e-06, "loss": 18.6719, "step": 314750 }, { "epoch": 0.6358351143557817, "grad_norm": 106.87405395507812, "learning_rate": 3.6298681988281405e-06, "loss": 19.8423, "step": 314760 }, { "epoch": 0.6358553149884655, "grad_norm": 67.42288970947266, "learning_rate": 3.6295324977962976e-06, "loss": 21.1392, "step": 314770 }, { "epoch": 0.6358755156211493, "grad_norm": 116.94200897216797, "learning_rate": 3.6291968034439463e-06, "loss": 16.345, "step": 314780 }, { "epoch": 0.6358957162538331, "grad_norm": 183.8971405029297, "learning_rate": 3.628861115772726e-06, "loss": 16.5754, "step": 314790 }, { "epoch": 0.6359159168865169, "grad_norm": 31.865367889404297, "learning_rate": 3.628525434784268e-06, "loss": 22.3156, "step": 314800 }, { "epoch": 0.6359361175192006, "grad_norm": 21.554040908813477, "learning_rate": 3.6281897604802113e-06, "loss": 20.1403, "step": 314810 }, { "epoch": 0.6359563181518845, "grad_norm": 240.13746643066406, "learning_rate": 3.6278540928621927e-06, "loss": 29.3342, "step": 314820 }, { "epoch": 0.6359765187845683, "grad_norm": 92.24823760986328, "learning_rate": 3.6275184319318456e-06, "loss": 8.5976, "step": 314830 }, { "epoch": 0.6359967194172521, "grad_norm": 268.13623046875, "learning_rate": 3.627182777690807e-06, "loss": 8.0317, "step": 314840 }, { "epoch": 0.6360169200499359, "grad_norm": 229.80813598632812, "learning_rate": 3.6268471301407127e-06, "loss": 13.6569, "step": 314850 }, { "epoch": 0.6360371206826197, "grad_norm": 263.35589599609375, "learning_rate": 3.626511489283201e-06, "loss": 17.2394, "step": 314860 }, { "epoch": 0.6360573213153036, "grad_norm": 37.49102020263672, "learning_rate": 3.6261758551199033e-06, "loss": 19.0017, "step": 314870 }, { "epoch": 0.6360775219479874, "grad_norm": 133.8287811279297, "learning_rate": 3.6258402276524585e-06, "loss": 15.5425, "step": 314880 }, { "epoch": 0.6360977225806712, "grad_norm": 354.69244384765625, "learning_rate": 3.6255046068825035e-06, "loss": 20.1426, "step": 314890 }, { "epoch": 0.636117923213355, "grad_norm": 451.70965576171875, "learning_rate": 3.625168992811671e-06, "loss": 15.3441, "step": 314900 }, { "epoch": 0.6361381238460388, "grad_norm": 46.83244323730469, "learning_rate": 3.6248333854415975e-06, "loss": 14.7444, "step": 314910 }, { "epoch": 0.6361583244787227, "grad_norm": 389.37152099609375, "learning_rate": 3.624497784773921e-06, "loss": 20.9808, "step": 314920 }, { "epoch": 0.6361785251114065, "grad_norm": 20.045778274536133, "learning_rate": 3.624162190810274e-06, "loss": 12.4882, "step": 314930 }, { "epoch": 0.6361987257440903, "grad_norm": 348.5186767578125, "learning_rate": 3.623826603552293e-06, "loss": 17.8435, "step": 314940 }, { "epoch": 0.6362189263767741, "grad_norm": 172.15533447265625, "learning_rate": 3.6234910230016173e-06, "loss": 11.7139, "step": 314950 }, { "epoch": 0.636239127009458, "grad_norm": 215.88381958007812, "learning_rate": 3.6231554491598766e-06, "loss": 18.2595, "step": 314960 }, { "epoch": 0.6362593276421418, "grad_norm": 1478.5848388671875, "learning_rate": 3.622819882028709e-06, "loss": 21.2824, "step": 314970 }, { "epoch": 0.6362795282748256, "grad_norm": 82.8841323852539, "learning_rate": 3.6224843216097526e-06, "loss": 10.8425, "step": 314980 }, { "epoch": 0.6362997289075094, "grad_norm": 215.9047088623047, "learning_rate": 3.6221487679046384e-06, "loss": 40.1015, "step": 314990 }, { "epoch": 0.6363199295401932, "grad_norm": 39.1889762878418, "learning_rate": 3.6218132209150047e-06, "loss": 12.1402, "step": 315000 }, { "epoch": 0.636340130172877, "grad_norm": 273.1037292480469, "learning_rate": 3.621477680642486e-06, "loss": 16.242, "step": 315010 }, { "epoch": 0.6363603308055609, "grad_norm": 0.0, "learning_rate": 3.6211421470887187e-06, "loss": 20.845, "step": 315020 }, { "epoch": 0.6363805314382447, "grad_norm": 71.53582000732422, "learning_rate": 3.620806620255336e-06, "loss": 14.9726, "step": 315030 }, { "epoch": 0.6364007320709285, "grad_norm": 127.14620971679688, "learning_rate": 3.6204711001439754e-06, "loss": 17.1757, "step": 315040 }, { "epoch": 0.6364209327036123, "grad_norm": 14.872736930847168, "learning_rate": 3.6201355867562725e-06, "loss": 19.647, "step": 315050 }, { "epoch": 0.636441133336296, "grad_norm": 332.30865478515625, "learning_rate": 3.61980008009386e-06, "loss": 14.1704, "step": 315060 }, { "epoch": 0.6364613339689799, "grad_norm": 185.18426513671875, "learning_rate": 3.6194645801583745e-06, "loss": 17.125, "step": 315070 }, { "epoch": 0.6364815346016637, "grad_norm": 235.90673828125, "learning_rate": 3.6191290869514523e-06, "loss": 16.2701, "step": 315080 }, { "epoch": 0.6365017352343475, "grad_norm": 591.3864135742188, "learning_rate": 3.6187936004747248e-06, "loss": 16.6374, "step": 315090 }, { "epoch": 0.6365219358670313, "grad_norm": 301.08892822265625, "learning_rate": 3.618458120729832e-06, "loss": 8.8979, "step": 315100 }, { "epoch": 0.6365421364997151, "grad_norm": 29.569978713989258, "learning_rate": 3.6181226477184074e-06, "loss": 21.033, "step": 315110 }, { "epoch": 0.636562337132399, "grad_norm": 0.0, "learning_rate": 3.617787181442084e-06, "loss": 12.9097, "step": 315120 }, { "epoch": 0.6365825377650828, "grad_norm": 273.4610290527344, "learning_rate": 3.6174517219024985e-06, "loss": 26.4971, "step": 315130 }, { "epoch": 0.6366027383977666, "grad_norm": 247.51548767089844, "learning_rate": 3.617116269101286e-06, "loss": 26.588, "step": 315140 }, { "epoch": 0.6366229390304504, "grad_norm": 91.10669708251953, "learning_rate": 3.616780823040081e-06, "loss": 9.1448, "step": 315150 }, { "epoch": 0.6366431396631342, "grad_norm": 434.4241638183594, "learning_rate": 3.616445383720517e-06, "loss": 17.6143, "step": 315160 }, { "epoch": 0.6366633402958181, "grad_norm": 316.5583801269531, "learning_rate": 3.616109951144231e-06, "loss": 27.6919, "step": 315170 }, { "epoch": 0.6366835409285019, "grad_norm": 207.5430145263672, "learning_rate": 3.615774525312859e-06, "loss": 10.5219, "step": 315180 }, { "epoch": 0.6367037415611857, "grad_norm": 108.87893676757812, "learning_rate": 3.6154391062280326e-06, "loss": 26.4617, "step": 315190 }, { "epoch": 0.6367239421938695, "grad_norm": 442.5435485839844, "learning_rate": 3.6151036938913887e-06, "loss": 24.3649, "step": 315200 }, { "epoch": 0.6367441428265533, "grad_norm": 338.3038024902344, "learning_rate": 3.614768288304562e-06, "loss": 16.1387, "step": 315210 }, { "epoch": 0.6367643434592372, "grad_norm": 196.8852996826172, "learning_rate": 3.6144328894691854e-06, "loss": 33.3886, "step": 315220 }, { "epoch": 0.636784544091921, "grad_norm": 152.10145568847656, "learning_rate": 3.614097497386894e-06, "loss": 17.6073, "step": 315230 }, { "epoch": 0.6368047447246048, "grad_norm": 171.79672241210938, "learning_rate": 3.613762112059327e-06, "loss": 19.236, "step": 315240 }, { "epoch": 0.6368249453572886, "grad_norm": 185.82521057128906, "learning_rate": 3.613426733488111e-06, "loss": 12.5716, "step": 315250 }, { "epoch": 0.6368451459899724, "grad_norm": 132.74436950683594, "learning_rate": 3.613091361674887e-06, "loss": 18.4026, "step": 315260 }, { "epoch": 0.6368653466226563, "grad_norm": 93.18102264404297, "learning_rate": 3.6127559966212885e-06, "loss": 15.5934, "step": 315270 }, { "epoch": 0.6368855472553401, "grad_norm": 355.94451904296875, "learning_rate": 3.6124206383289474e-06, "loss": 27.7077, "step": 315280 }, { "epoch": 0.6369057478880239, "grad_norm": 216.783447265625, "learning_rate": 3.6120852867995003e-06, "loss": 24.1885, "step": 315290 }, { "epoch": 0.6369259485207077, "grad_norm": 462.2900390625, "learning_rate": 3.61174994203458e-06, "loss": 18.243, "step": 315300 }, { "epoch": 0.6369461491533915, "grad_norm": 292.3565979003906, "learning_rate": 3.611414604035825e-06, "loss": 21.1184, "step": 315310 }, { "epoch": 0.6369663497860752, "grad_norm": 153.24417114257812, "learning_rate": 3.6110792728048636e-06, "loss": 9.613, "step": 315320 }, { "epoch": 0.6369865504187591, "grad_norm": 367.658203125, "learning_rate": 3.610743948343335e-06, "loss": 11.7124, "step": 315330 }, { "epoch": 0.6370067510514429, "grad_norm": 139.54835510253906, "learning_rate": 3.610408630652873e-06, "loss": 17.0226, "step": 315340 }, { "epoch": 0.6370269516841267, "grad_norm": 180.6556854248047, "learning_rate": 3.610073319735109e-06, "loss": 14.1828, "step": 315350 }, { "epoch": 0.6370471523168105, "grad_norm": 84.01644134521484, "learning_rate": 3.6097380155916795e-06, "loss": 19.3189, "step": 315360 }, { "epoch": 0.6370673529494943, "grad_norm": 356.1620788574219, "learning_rate": 3.609402718224219e-06, "loss": 11.345, "step": 315370 }, { "epoch": 0.6370875535821782, "grad_norm": 329.99285888671875, "learning_rate": 3.6090674276343608e-06, "loss": 16.5963, "step": 315380 }, { "epoch": 0.637107754214862, "grad_norm": 318.6948547363281, "learning_rate": 3.608732143823737e-06, "loss": 12.2305, "step": 315390 }, { "epoch": 0.6371279548475458, "grad_norm": 945.1468505859375, "learning_rate": 3.608396866793988e-06, "loss": 13.9304, "step": 315400 }, { "epoch": 0.6371481554802296, "grad_norm": 378.5052185058594, "learning_rate": 3.60806159654674e-06, "loss": 24.0988, "step": 315410 }, { "epoch": 0.6371683561129134, "grad_norm": 131.06988525390625, "learning_rate": 3.607726333083633e-06, "loss": 16.4079, "step": 315420 }, { "epoch": 0.6371885567455973, "grad_norm": 458.02642822265625, "learning_rate": 3.607391076406299e-06, "loss": 28.5304, "step": 315430 }, { "epoch": 0.6372087573782811, "grad_norm": 377.064453125, "learning_rate": 3.607055826516372e-06, "loss": 29.0945, "step": 315440 }, { "epoch": 0.6372289580109649, "grad_norm": 138.10397338867188, "learning_rate": 3.606720583415485e-06, "loss": 36.6493, "step": 315450 }, { "epoch": 0.6372491586436487, "grad_norm": 289.4404602050781, "learning_rate": 3.6063853471052724e-06, "loss": 25.8268, "step": 315460 }, { "epoch": 0.6372693592763325, "grad_norm": 57.423851013183594, "learning_rate": 3.606050117587372e-06, "loss": 15.7219, "step": 315470 }, { "epoch": 0.6372895599090164, "grad_norm": 442.754150390625, "learning_rate": 3.605714894863411e-06, "loss": 17.4788, "step": 315480 }, { "epoch": 0.6373097605417002, "grad_norm": 312.1666259765625, "learning_rate": 3.605379678935027e-06, "loss": 24.736, "step": 315490 }, { "epoch": 0.637329961174384, "grad_norm": 171.1634521484375, "learning_rate": 3.6050444698038547e-06, "loss": 10.4073, "step": 315500 }, { "epoch": 0.6373501618070678, "grad_norm": 406.0879211425781, "learning_rate": 3.6047092674715257e-06, "loss": 12.9178, "step": 315510 }, { "epoch": 0.6373703624397516, "grad_norm": 197.765869140625, "learning_rate": 3.6043740719396736e-06, "loss": 27.6929, "step": 315520 }, { "epoch": 0.6373905630724355, "grad_norm": 488.1368103027344, "learning_rate": 3.604038883209935e-06, "loss": 21.5949, "step": 315530 }, { "epoch": 0.6374107637051193, "grad_norm": 277.5425720214844, "learning_rate": 3.60370370128394e-06, "loss": 23.4579, "step": 315540 }, { "epoch": 0.6374309643378031, "grad_norm": 147.43731689453125, "learning_rate": 3.603368526163323e-06, "loss": 14.7775, "step": 315550 }, { "epoch": 0.6374511649704869, "grad_norm": 317.7743225097656, "learning_rate": 3.6030333578497213e-06, "loss": 14.0006, "step": 315560 }, { "epoch": 0.6374713656031706, "grad_norm": 327.7012634277344, "learning_rate": 3.602698196344763e-06, "loss": 12.2777, "step": 315570 }, { "epoch": 0.6374915662358545, "grad_norm": 429.0431213378906, "learning_rate": 3.6023630416500843e-06, "loss": 19.933, "step": 315580 }, { "epoch": 0.6375117668685383, "grad_norm": 203.08424377441406, "learning_rate": 3.6020278937673202e-06, "loss": 22.7825, "step": 315590 }, { "epoch": 0.6375319675012221, "grad_norm": 290.2354431152344, "learning_rate": 3.6016927526981014e-06, "loss": 11.1667, "step": 315600 }, { "epoch": 0.6375521681339059, "grad_norm": 154.86102294921875, "learning_rate": 3.601357618444063e-06, "loss": 15.0512, "step": 315610 }, { "epoch": 0.6375723687665897, "grad_norm": 108.37958526611328, "learning_rate": 3.6010224910068363e-06, "loss": 14.9358, "step": 315620 }, { "epoch": 0.6375925693992736, "grad_norm": 531.4585571289062, "learning_rate": 3.6006873703880595e-06, "loss": 17.2337, "step": 315630 }, { "epoch": 0.6376127700319574, "grad_norm": 140.11935424804688, "learning_rate": 3.60035225658936e-06, "loss": 19.4407, "step": 315640 }, { "epoch": 0.6376329706646412, "grad_norm": 354.5870666503906, "learning_rate": 3.600017149612375e-06, "loss": 11.0087, "step": 315650 }, { "epoch": 0.637653171297325, "grad_norm": 125.09190368652344, "learning_rate": 3.599682049458737e-06, "loss": 18.9757, "step": 315660 }, { "epoch": 0.6376733719300088, "grad_norm": 251.12478637695312, "learning_rate": 3.5993469561300785e-06, "loss": 14.9611, "step": 315670 }, { "epoch": 0.6376935725626927, "grad_norm": 23.162752151489258, "learning_rate": 3.599011869628033e-06, "loss": 14.7538, "step": 315680 }, { "epoch": 0.6377137731953765, "grad_norm": 271.4930419921875, "learning_rate": 3.598676789954234e-06, "loss": 15.2049, "step": 315690 }, { "epoch": 0.6377339738280603, "grad_norm": 338.39471435546875, "learning_rate": 3.598341717110313e-06, "loss": 16.747, "step": 315700 }, { "epoch": 0.6377541744607441, "grad_norm": 343.12298583984375, "learning_rate": 3.598006651097905e-06, "loss": 11.3906, "step": 315710 }, { "epoch": 0.6377743750934279, "grad_norm": 329.9395751953125, "learning_rate": 3.5976715919186443e-06, "loss": 12.4449, "step": 315720 }, { "epoch": 0.6377945757261118, "grad_norm": 523.908447265625, "learning_rate": 3.5973365395741612e-06, "loss": 57.1772, "step": 315730 }, { "epoch": 0.6378147763587956, "grad_norm": 438.4928283691406, "learning_rate": 3.597001494066089e-06, "loss": 14.031, "step": 315740 }, { "epoch": 0.6378349769914794, "grad_norm": 483.6122741699219, "learning_rate": 3.5966664553960622e-06, "loss": 33.6814, "step": 315750 }, { "epoch": 0.6378551776241632, "grad_norm": 94.60799407958984, "learning_rate": 3.596331423565712e-06, "loss": 10.715, "step": 315760 }, { "epoch": 0.637875378256847, "grad_norm": 448.385498046875, "learning_rate": 3.595996398576672e-06, "loss": 20.4818, "step": 315770 }, { "epoch": 0.6378955788895309, "grad_norm": 403.5329284667969, "learning_rate": 3.5956613804305755e-06, "loss": 14.3471, "step": 315780 }, { "epoch": 0.6379157795222147, "grad_norm": 378.4325866699219, "learning_rate": 3.5953263691290564e-06, "loss": 22.0534, "step": 315790 }, { "epoch": 0.6379359801548985, "grad_norm": 269.52947998046875, "learning_rate": 3.5949913646737456e-06, "loss": 16.1542, "step": 315800 }, { "epoch": 0.6379561807875823, "grad_norm": 136.25831604003906, "learning_rate": 3.594656367066276e-06, "loss": 18.3516, "step": 315810 }, { "epoch": 0.6379763814202661, "grad_norm": 420.5932922363281, "learning_rate": 3.594321376308282e-06, "loss": 28.5859, "step": 315820 }, { "epoch": 0.6379965820529498, "grad_norm": 0.0, "learning_rate": 3.5939863924013937e-06, "loss": 11.3004, "step": 315830 }, { "epoch": 0.6380167826856337, "grad_norm": 71.49604797363281, "learning_rate": 3.593651415347244e-06, "loss": 15.4709, "step": 315840 }, { "epoch": 0.6380369833183175, "grad_norm": 269.1575927734375, "learning_rate": 3.5933164451474708e-06, "loss": 15.49, "step": 315850 }, { "epoch": 0.6380571839510013, "grad_norm": 385.8111572265625, "learning_rate": 3.592981481803699e-06, "loss": 19.3025, "step": 315860 }, { "epoch": 0.6380773845836851, "grad_norm": 332.93377685546875, "learning_rate": 3.5926465253175656e-06, "loss": 24.5377, "step": 315870 }, { "epoch": 0.638097585216369, "grad_norm": 262.3724670410156, "learning_rate": 3.5923115756907033e-06, "loss": 17.1343, "step": 315880 }, { "epoch": 0.6381177858490528, "grad_norm": 155.1367645263672, "learning_rate": 3.591976632924743e-06, "loss": 19.1085, "step": 315890 }, { "epoch": 0.6381379864817366, "grad_norm": 155.60531616210938, "learning_rate": 3.5916416970213173e-06, "loss": 19.4691, "step": 315900 }, { "epoch": 0.6381581871144204, "grad_norm": 255.04368591308594, "learning_rate": 3.5913067679820592e-06, "loss": 12.8639, "step": 315910 }, { "epoch": 0.6381783877471042, "grad_norm": 204.04034423828125, "learning_rate": 3.5909718458086033e-06, "loss": 28.1893, "step": 315920 }, { "epoch": 0.638198588379788, "grad_norm": 308.69671630859375, "learning_rate": 3.5906369305025767e-06, "loss": 11.4822, "step": 315930 }, { "epoch": 0.6382187890124719, "grad_norm": 60.23744201660156, "learning_rate": 3.590302022065616e-06, "loss": 8.7246, "step": 315940 }, { "epoch": 0.6382389896451557, "grad_norm": 382.943603515625, "learning_rate": 3.5899671204993535e-06, "loss": 24.2899, "step": 315950 }, { "epoch": 0.6382591902778395, "grad_norm": 118.73155212402344, "learning_rate": 3.589632225805419e-06, "loss": 37.2489, "step": 315960 }, { "epoch": 0.6382793909105233, "grad_norm": 189.42088317871094, "learning_rate": 3.589297337985446e-06, "loss": 15.3714, "step": 315970 }, { "epoch": 0.6382995915432071, "grad_norm": 0.0, "learning_rate": 3.5889624570410675e-06, "loss": 11.2158, "step": 315980 }, { "epoch": 0.638319792175891, "grad_norm": 499.2741394042969, "learning_rate": 3.5886275829739144e-06, "loss": 17.3642, "step": 315990 }, { "epoch": 0.6383399928085748, "grad_norm": 313.43438720703125, "learning_rate": 3.5882927157856175e-06, "loss": 17.2317, "step": 316000 }, { "epoch": 0.6383601934412586, "grad_norm": 144.3086700439453, "learning_rate": 3.5879578554778137e-06, "loss": 32.3277, "step": 316010 }, { "epoch": 0.6383803940739424, "grad_norm": 109.6828384399414, "learning_rate": 3.5876230020521298e-06, "loss": 12.8811, "step": 316020 }, { "epoch": 0.6384005947066262, "grad_norm": 267.4474182128906, "learning_rate": 3.587288155510201e-06, "loss": 20.0526, "step": 316030 }, { "epoch": 0.6384207953393101, "grad_norm": 53.21384048461914, "learning_rate": 3.5869533158536583e-06, "loss": 31.6903, "step": 316040 }, { "epoch": 0.6384409959719939, "grad_norm": 370.4889221191406, "learning_rate": 3.586618483084134e-06, "loss": 17.6814, "step": 316050 }, { "epoch": 0.6384611966046777, "grad_norm": 407.1261901855469, "learning_rate": 3.586283657203259e-06, "loss": 21.381, "step": 316060 }, { "epoch": 0.6384813972373615, "grad_norm": 265.82513427734375, "learning_rate": 3.5859488382126656e-06, "loss": 12.5555, "step": 316070 }, { "epoch": 0.6385015978700452, "grad_norm": 273.15289306640625, "learning_rate": 3.585614026113989e-06, "loss": 15.0084, "step": 316080 }, { "epoch": 0.638521798502729, "grad_norm": 333.0440368652344, "learning_rate": 3.5852792209088543e-06, "loss": 18.8162, "step": 316090 }, { "epoch": 0.6385419991354129, "grad_norm": 280.3025817871094, "learning_rate": 3.584944422598899e-06, "loss": 16.6779, "step": 316100 }, { "epoch": 0.6385621997680967, "grad_norm": 399.392578125, "learning_rate": 3.5846096311857537e-06, "loss": 17.5452, "step": 316110 }, { "epoch": 0.6385824004007805, "grad_norm": 445.50054931640625, "learning_rate": 3.584274846671048e-06, "loss": 19.3864, "step": 316120 }, { "epoch": 0.6386026010334643, "grad_norm": 217.7563934326172, "learning_rate": 3.583940069056415e-06, "loss": 26.6186, "step": 316130 }, { "epoch": 0.6386228016661482, "grad_norm": 182.3772430419922, "learning_rate": 3.5836052983434878e-06, "loss": 28.1866, "step": 316140 }, { "epoch": 0.638643002298832, "grad_norm": 21.527517318725586, "learning_rate": 3.583270534533896e-06, "loss": 21.8148, "step": 316150 }, { "epoch": 0.6386632029315158, "grad_norm": 130.23443603515625, "learning_rate": 3.5829357776292694e-06, "loss": 17.5742, "step": 316160 }, { "epoch": 0.6386834035641996, "grad_norm": 70.52153778076172, "learning_rate": 3.582601027631246e-06, "loss": 18.152, "step": 316170 }, { "epoch": 0.6387036041968834, "grad_norm": 323.3556213378906, "learning_rate": 3.5822662845414502e-06, "loss": 14.0955, "step": 316180 }, { "epoch": 0.6387238048295673, "grad_norm": 173.76051330566406, "learning_rate": 3.5819315483615175e-06, "loss": 17.5079, "step": 316190 }, { "epoch": 0.6387440054622511, "grad_norm": 284.336181640625, "learning_rate": 3.5815968190930793e-06, "loss": 18.4699, "step": 316200 }, { "epoch": 0.6387642060949349, "grad_norm": 111.07291412353516, "learning_rate": 3.5812620967377653e-06, "loss": 9.7246, "step": 316210 }, { "epoch": 0.6387844067276187, "grad_norm": 398.2769470214844, "learning_rate": 3.5809273812972078e-06, "loss": 13.2699, "step": 316220 }, { "epoch": 0.6388046073603025, "grad_norm": 77.39501953125, "learning_rate": 3.5805926727730367e-06, "loss": 14.4045, "step": 316230 }, { "epoch": 0.6388248079929864, "grad_norm": 212.38565063476562, "learning_rate": 3.5802579711668883e-06, "loss": 26.2509, "step": 316240 }, { "epoch": 0.6388450086256702, "grad_norm": 227.17205810546875, "learning_rate": 3.579923276480387e-06, "loss": 32.0403, "step": 316250 }, { "epoch": 0.638865209258354, "grad_norm": 367.0043640136719, "learning_rate": 3.5795885887151687e-06, "loss": 18.6208, "step": 316260 }, { "epoch": 0.6388854098910378, "grad_norm": 168.98257446289062, "learning_rate": 3.5792539078728644e-06, "loss": 11.2499, "step": 316270 }, { "epoch": 0.6389056105237216, "grad_norm": 319.4594421386719, "learning_rate": 3.578919233955103e-06, "loss": 20.5946, "step": 316280 }, { "epoch": 0.6389258111564055, "grad_norm": 40.95768737792969, "learning_rate": 3.5785845669635165e-06, "loss": 16.4928, "step": 316290 }, { "epoch": 0.6389460117890893, "grad_norm": 141.94300842285156, "learning_rate": 3.5782499068997386e-06, "loss": 13.2107, "step": 316300 }, { "epoch": 0.6389662124217731, "grad_norm": 114.69023132324219, "learning_rate": 3.577915253765396e-06, "loss": 46.7745, "step": 316310 }, { "epoch": 0.6389864130544569, "grad_norm": 214.69725036621094, "learning_rate": 3.5775806075621215e-06, "loss": 15.8744, "step": 316320 }, { "epoch": 0.6390066136871407, "grad_norm": 122.32791900634766, "learning_rate": 3.5772459682915484e-06, "loss": 16.9263, "step": 316330 }, { "epoch": 0.6390268143198244, "grad_norm": 658.5464477539062, "learning_rate": 3.5769113359553055e-06, "loss": 24.168, "step": 316340 }, { "epoch": 0.6390470149525083, "grad_norm": 304.5755615234375, "learning_rate": 3.5765767105550236e-06, "loss": 17.2096, "step": 316350 }, { "epoch": 0.6390672155851921, "grad_norm": 118.37891387939453, "learning_rate": 3.576242092092334e-06, "loss": 18.2705, "step": 316360 }, { "epoch": 0.6390874162178759, "grad_norm": 297.8904113769531, "learning_rate": 3.5759074805688694e-06, "loss": 24.0013, "step": 316370 }, { "epoch": 0.6391076168505597, "grad_norm": 338.8775939941406, "learning_rate": 3.5755728759862573e-06, "loss": 28.6434, "step": 316380 }, { "epoch": 0.6391278174832435, "grad_norm": 318.78131103515625, "learning_rate": 3.5752382783461297e-06, "loss": 21.6727, "step": 316390 }, { "epoch": 0.6391480181159274, "grad_norm": 113.0171127319336, "learning_rate": 3.5749036876501196e-06, "loss": 16.1821, "step": 316400 }, { "epoch": 0.6391682187486112, "grad_norm": 239.63671875, "learning_rate": 3.5745691038998555e-06, "loss": 26.7651, "step": 316410 }, { "epoch": 0.639188419381295, "grad_norm": 358.8646240234375, "learning_rate": 3.5742345270969688e-06, "loss": 23.8672, "step": 316420 }, { "epoch": 0.6392086200139788, "grad_norm": 272.90802001953125, "learning_rate": 3.573899957243091e-06, "loss": 15.2323, "step": 316430 }, { "epoch": 0.6392288206466626, "grad_norm": 209.1858673095703, "learning_rate": 3.573565394339851e-06, "loss": 14.6791, "step": 316440 }, { "epoch": 0.6392490212793465, "grad_norm": 304.0673828125, "learning_rate": 3.573230838388878e-06, "loss": 18.6552, "step": 316450 }, { "epoch": 0.6392692219120303, "grad_norm": 42.91010665893555, "learning_rate": 3.572896289391809e-06, "loss": 20.6106, "step": 316460 }, { "epoch": 0.6392894225447141, "grad_norm": 42.27733612060547, "learning_rate": 3.5725617473502673e-06, "loss": 13.6132, "step": 316470 }, { "epoch": 0.6393096231773979, "grad_norm": 263.6181335449219, "learning_rate": 3.5722272122658874e-06, "loss": 9.72, "step": 316480 }, { "epoch": 0.6393298238100817, "grad_norm": 204.99472045898438, "learning_rate": 3.5718926841402993e-06, "loss": 8.6985, "step": 316490 }, { "epoch": 0.6393500244427656, "grad_norm": 387.9626159667969, "learning_rate": 3.571558162975133e-06, "loss": 16.354, "step": 316500 }, { "epoch": 0.6393702250754494, "grad_norm": 346.87860107421875, "learning_rate": 3.5712236487720185e-06, "loss": 18.9919, "step": 316510 }, { "epoch": 0.6393904257081332, "grad_norm": 80.12834167480469, "learning_rate": 3.570889141532586e-06, "loss": 16.4837, "step": 316520 }, { "epoch": 0.639410626340817, "grad_norm": 144.74374389648438, "learning_rate": 3.570554641258469e-06, "loss": 11.801, "step": 316530 }, { "epoch": 0.6394308269735008, "grad_norm": 197.5048065185547, "learning_rate": 3.570220147951292e-06, "loss": 17.6367, "step": 316540 }, { "epoch": 0.6394510276061847, "grad_norm": 134.37001037597656, "learning_rate": 3.569885661612691e-06, "loss": 10.5809, "step": 316550 }, { "epoch": 0.6394712282388685, "grad_norm": 419.03961181640625, "learning_rate": 3.5695511822442934e-06, "loss": 18.518, "step": 316560 }, { "epoch": 0.6394914288715523, "grad_norm": 157.8948211669922, "learning_rate": 3.5692167098477292e-06, "loss": 15.9945, "step": 316570 }, { "epoch": 0.6395116295042361, "grad_norm": 221.95289611816406, "learning_rate": 3.5688822444246297e-06, "loss": 18.5271, "step": 316580 }, { "epoch": 0.6395318301369199, "grad_norm": 217.68727111816406, "learning_rate": 3.5685477859766254e-06, "loss": 6.3584, "step": 316590 }, { "epoch": 0.6395520307696037, "grad_norm": 280.8382873535156, "learning_rate": 3.568213334505345e-06, "loss": 13.7371, "step": 316600 }, { "epoch": 0.6395722314022875, "grad_norm": 300.8899841308594, "learning_rate": 3.567878890012417e-06, "loss": 16.9608, "step": 316610 }, { "epoch": 0.6395924320349713, "grad_norm": 95.97379302978516, "learning_rate": 3.567544452499477e-06, "loss": 26.6272, "step": 316620 }, { "epoch": 0.6396126326676551, "grad_norm": 77.2156982421875, "learning_rate": 3.5672100219681495e-06, "loss": 17.3747, "step": 316630 }, { "epoch": 0.6396328333003389, "grad_norm": 109.87787628173828, "learning_rate": 3.5668755984200664e-06, "loss": 30.9218, "step": 316640 }, { "epoch": 0.6396530339330228, "grad_norm": 197.57540893554688, "learning_rate": 3.5665411818568596e-06, "loss": 16.6237, "step": 316650 }, { "epoch": 0.6396732345657066, "grad_norm": 20.104215621948242, "learning_rate": 3.5662067722801556e-06, "loss": 10.4299, "step": 316660 }, { "epoch": 0.6396934351983904, "grad_norm": 193.15260314941406, "learning_rate": 3.5658723696915864e-06, "loss": 14.9339, "step": 316670 }, { "epoch": 0.6397136358310742, "grad_norm": 279.2496032714844, "learning_rate": 3.5655379740927796e-06, "loss": 11.6433, "step": 316680 }, { "epoch": 0.639733836463758, "grad_norm": 374.71209716796875, "learning_rate": 3.5652035854853706e-06, "loss": 14.0445, "step": 316690 }, { "epoch": 0.6397540370964419, "grad_norm": 107.49427032470703, "learning_rate": 3.564869203870982e-06, "loss": 22.9181, "step": 316700 }, { "epoch": 0.6397742377291257, "grad_norm": 162.220458984375, "learning_rate": 3.564534829251248e-06, "loss": 13.6099, "step": 316710 }, { "epoch": 0.6397944383618095, "grad_norm": 307.990966796875, "learning_rate": 3.564200461627798e-06, "loss": 15.659, "step": 316720 }, { "epoch": 0.6398146389944933, "grad_norm": 321.3204345703125, "learning_rate": 3.5638661010022604e-06, "loss": 12.3454, "step": 316730 }, { "epoch": 0.6398348396271771, "grad_norm": 298.0869140625, "learning_rate": 3.5635317473762642e-06, "loss": 25.8137, "step": 316740 }, { "epoch": 0.639855040259861, "grad_norm": 175.01055908203125, "learning_rate": 3.5631974007514414e-06, "loss": 11.4894, "step": 316750 }, { "epoch": 0.6398752408925448, "grad_norm": 267.138671875, "learning_rate": 3.562863061129419e-06, "loss": 16.0703, "step": 316760 }, { "epoch": 0.6398954415252286, "grad_norm": 530.752197265625, "learning_rate": 3.562528728511827e-06, "loss": 17.2856, "step": 316770 }, { "epoch": 0.6399156421579124, "grad_norm": 509.8291015625, "learning_rate": 3.562194402900299e-06, "loss": 22.1841, "step": 316780 }, { "epoch": 0.6399358427905962, "grad_norm": 251.84913635253906, "learning_rate": 3.561860084296458e-06, "loss": 10.9584, "step": 316790 }, { "epoch": 0.63995604342328, "grad_norm": 247.45108032226562, "learning_rate": 3.561525772701937e-06, "loss": 12.2859, "step": 316800 }, { "epoch": 0.6399762440559639, "grad_norm": 213.12301635742188, "learning_rate": 3.5611914681183647e-06, "loss": 8.4347, "step": 316810 }, { "epoch": 0.6399964446886477, "grad_norm": 269.5357666015625, "learning_rate": 3.5608571705473725e-06, "loss": 17.9237, "step": 316820 }, { "epoch": 0.6400166453213315, "grad_norm": 1.6027940511703491, "learning_rate": 3.5605228799905865e-06, "loss": 13.9917, "step": 316830 }, { "epoch": 0.6400368459540153, "grad_norm": 635.3440551757812, "learning_rate": 3.5601885964496364e-06, "loss": 28.0116, "step": 316840 }, { "epoch": 0.640057046586699, "grad_norm": 377.0857849121094, "learning_rate": 3.559854319926156e-06, "loss": 21.3823, "step": 316850 }, { "epoch": 0.6400772472193829, "grad_norm": 229.0865478515625, "learning_rate": 3.559520050421767e-06, "loss": 15.3404, "step": 316860 }, { "epoch": 0.6400974478520667, "grad_norm": 350.0799865722656, "learning_rate": 3.559185787938104e-06, "loss": 25.8036, "step": 316870 }, { "epoch": 0.6401176484847505, "grad_norm": 111.81828308105469, "learning_rate": 3.558851532476796e-06, "loss": 14.9344, "step": 316880 }, { "epoch": 0.6401378491174343, "grad_norm": 637.1626586914062, "learning_rate": 3.5585172840394695e-06, "loss": 17.8768, "step": 316890 }, { "epoch": 0.6401580497501181, "grad_norm": 236.43414306640625, "learning_rate": 3.5581830426277554e-06, "loss": 30.6458, "step": 316900 }, { "epoch": 0.640178250382802, "grad_norm": 295.77239990234375, "learning_rate": 3.5578488082432828e-06, "loss": 19.7963, "step": 316910 }, { "epoch": 0.6401984510154858, "grad_norm": 311.8822937011719, "learning_rate": 3.557514580887679e-06, "loss": 11.5484, "step": 316920 }, { "epoch": 0.6402186516481696, "grad_norm": 268.68804931640625, "learning_rate": 3.5571803605625734e-06, "loss": 27.3334, "step": 316930 }, { "epoch": 0.6402388522808534, "grad_norm": 326.0564270019531, "learning_rate": 3.556846147269598e-06, "loss": 30.2876, "step": 316940 }, { "epoch": 0.6402590529135372, "grad_norm": 582.809814453125, "learning_rate": 3.556511941010378e-06, "loss": 20.9882, "step": 316950 }, { "epoch": 0.6402792535462211, "grad_norm": 150.3049774169922, "learning_rate": 3.5561777417865438e-06, "loss": 12.8303, "step": 316960 }, { "epoch": 0.6402994541789049, "grad_norm": 415.52178955078125, "learning_rate": 3.5558435495997245e-06, "loss": 26.8689, "step": 316970 }, { "epoch": 0.6403196548115887, "grad_norm": 11.618037223815918, "learning_rate": 3.5555093644515496e-06, "loss": 19.6628, "step": 316980 }, { "epoch": 0.6403398554442725, "grad_norm": 251.4128875732422, "learning_rate": 3.5551751863436458e-06, "loss": 9.3296, "step": 316990 }, { "epoch": 0.6403600560769563, "grad_norm": 256.5743408203125, "learning_rate": 3.5548410152776414e-06, "loss": 13.7435, "step": 317000 }, { "epoch": 0.6403802567096402, "grad_norm": 215.38888549804688, "learning_rate": 3.5545068512551695e-06, "loss": 12.3051, "step": 317010 }, { "epoch": 0.640400457342324, "grad_norm": 498.6082458496094, "learning_rate": 3.5541726942778544e-06, "loss": 12.8546, "step": 317020 }, { "epoch": 0.6404206579750078, "grad_norm": 861.7075805664062, "learning_rate": 3.553838544347326e-06, "loss": 28.301, "step": 317030 }, { "epoch": 0.6404408586076916, "grad_norm": 528.4608154296875, "learning_rate": 3.5535044014652143e-06, "loss": 22.1583, "step": 317040 }, { "epoch": 0.6404610592403754, "grad_norm": 239.03062438964844, "learning_rate": 3.553170265633146e-06, "loss": 14.7178, "step": 317050 }, { "epoch": 0.6404812598730593, "grad_norm": 416.8616027832031, "learning_rate": 3.5528361368527503e-06, "loss": 12.914, "step": 317060 }, { "epoch": 0.6405014605057431, "grad_norm": 306.7874755859375, "learning_rate": 3.552502015125656e-06, "loss": 14.4958, "step": 317070 }, { "epoch": 0.6405216611384269, "grad_norm": 276.0233154296875, "learning_rate": 3.5521679004534905e-06, "loss": 34.8238, "step": 317080 }, { "epoch": 0.6405418617711107, "grad_norm": 269.2386474609375, "learning_rate": 3.551833792837883e-06, "loss": 20.2083, "step": 317090 }, { "epoch": 0.6405620624037945, "grad_norm": 337.566650390625, "learning_rate": 3.5514996922804636e-06, "loss": 17.5518, "step": 317100 }, { "epoch": 0.6405822630364782, "grad_norm": 25.06216812133789, "learning_rate": 3.5511655987828583e-06, "loss": 19.5707, "step": 317110 }, { "epoch": 0.6406024636691621, "grad_norm": 292.42315673828125, "learning_rate": 3.550831512346695e-06, "loss": 11.1671, "step": 317120 }, { "epoch": 0.6406226643018459, "grad_norm": 263.9823913574219, "learning_rate": 3.550497432973603e-06, "loss": 9.5356, "step": 317130 }, { "epoch": 0.6406428649345297, "grad_norm": 310.7607421875, "learning_rate": 3.5501633606652143e-06, "loss": 11.3389, "step": 317140 }, { "epoch": 0.6406630655672135, "grad_norm": 375.8763427734375, "learning_rate": 3.5498292954231497e-06, "loss": 16.9913, "step": 317150 }, { "epoch": 0.6406832661998973, "grad_norm": 790.3228759765625, "learning_rate": 3.549495237249042e-06, "loss": 37.4331, "step": 317160 }, { "epoch": 0.6407034668325812, "grad_norm": 21.25695037841797, "learning_rate": 3.5491611861445198e-06, "loss": 11.3637, "step": 317170 }, { "epoch": 0.640723667465265, "grad_norm": 6.675368785858154, "learning_rate": 3.5488271421112093e-06, "loss": 17.7536, "step": 317180 }, { "epoch": 0.6407438680979488, "grad_norm": 151.0326690673828, "learning_rate": 3.5484931051507387e-06, "loss": 11.1824, "step": 317190 }, { "epoch": 0.6407640687306326, "grad_norm": 88.36953735351562, "learning_rate": 3.548159075264738e-06, "loss": 24.4601, "step": 317200 }, { "epoch": 0.6407842693633164, "grad_norm": 200.0333709716797, "learning_rate": 3.547825052454833e-06, "loss": 14.9318, "step": 317210 }, { "epoch": 0.6408044699960003, "grad_norm": 162.25796508789062, "learning_rate": 3.5474910367226517e-06, "loss": 11.6708, "step": 317220 }, { "epoch": 0.6408246706286841, "grad_norm": 243.4656219482422, "learning_rate": 3.5471570280698257e-06, "loss": 17.0741, "step": 317230 }, { "epoch": 0.6408448712613679, "grad_norm": 528.5477905273438, "learning_rate": 3.5468230264979774e-06, "loss": 18.8632, "step": 317240 }, { "epoch": 0.6408650718940517, "grad_norm": 167.81497192382812, "learning_rate": 3.5464890320087374e-06, "loss": 14.3983, "step": 317250 }, { "epoch": 0.6408852725267355, "grad_norm": 221.84219360351562, "learning_rate": 3.5461550446037363e-06, "loss": 24.4891, "step": 317260 }, { "epoch": 0.6409054731594194, "grad_norm": 233.11990356445312, "learning_rate": 3.545821064284597e-06, "loss": 16.3318, "step": 317270 }, { "epoch": 0.6409256737921032, "grad_norm": 96.61408233642578, "learning_rate": 3.5454870910529494e-06, "loss": 23.948, "step": 317280 }, { "epoch": 0.640945874424787, "grad_norm": 677.6549072265625, "learning_rate": 3.545153124910421e-06, "loss": 23.9225, "step": 317290 }, { "epoch": 0.6409660750574708, "grad_norm": 640.573486328125, "learning_rate": 3.5448191658586423e-06, "loss": 16.0887, "step": 317300 }, { "epoch": 0.6409862756901546, "grad_norm": 196.6647491455078, "learning_rate": 3.5444852138992357e-06, "loss": 6.822, "step": 317310 }, { "epoch": 0.6410064763228385, "grad_norm": 198.2216796875, "learning_rate": 3.544151269033832e-06, "loss": 13.5411, "step": 317320 }, { "epoch": 0.6410266769555223, "grad_norm": 198.4065399169922, "learning_rate": 3.54381733126406e-06, "loss": 9.9734, "step": 317330 }, { "epoch": 0.6410468775882061, "grad_norm": 215.68048095703125, "learning_rate": 3.5434834005915453e-06, "loss": 36.4048, "step": 317340 }, { "epoch": 0.6410670782208899, "grad_norm": 224.1598663330078, "learning_rate": 3.5431494770179154e-06, "loss": 20.6301, "step": 317350 }, { "epoch": 0.6410872788535736, "grad_norm": 417.1700134277344, "learning_rate": 3.5428155605447988e-06, "loss": 21.4458, "step": 317360 }, { "epoch": 0.6411074794862575, "grad_norm": 192.98165893554688, "learning_rate": 3.5424816511738213e-06, "loss": 17.9095, "step": 317370 }, { "epoch": 0.6411276801189413, "grad_norm": 166.26885986328125, "learning_rate": 3.5421477489066115e-06, "loss": 24.5506, "step": 317380 }, { "epoch": 0.6411478807516251, "grad_norm": 416.7253723144531, "learning_rate": 3.541813853744799e-06, "loss": 19.7476, "step": 317390 }, { "epoch": 0.6411680813843089, "grad_norm": 311.62164306640625, "learning_rate": 3.5414799656900057e-06, "loss": 20.1282, "step": 317400 }, { "epoch": 0.6411882820169927, "grad_norm": 259.79437255859375, "learning_rate": 3.541146084743864e-06, "loss": 17.0924, "step": 317410 }, { "epoch": 0.6412084826496766, "grad_norm": 138.6507110595703, "learning_rate": 3.540812210907999e-06, "loss": 16.5536, "step": 317420 }, { "epoch": 0.6412286832823604, "grad_norm": 430.5732421875, "learning_rate": 3.5404783441840383e-06, "loss": 12.0276, "step": 317430 }, { "epoch": 0.6412488839150442, "grad_norm": 186.7552032470703, "learning_rate": 3.5401444845736092e-06, "loss": 20.8473, "step": 317440 }, { "epoch": 0.641269084547728, "grad_norm": 145.25718688964844, "learning_rate": 3.539810632078338e-06, "loss": 7.5006, "step": 317450 }, { "epoch": 0.6412892851804118, "grad_norm": 220.06753540039062, "learning_rate": 3.5394767866998555e-06, "loss": 16.9935, "step": 317460 }, { "epoch": 0.6413094858130957, "grad_norm": 284.7437438964844, "learning_rate": 3.539142948439782e-06, "loss": 10.401, "step": 317470 }, { "epoch": 0.6413296864457795, "grad_norm": 10.688435554504395, "learning_rate": 3.538809117299751e-06, "loss": 15.0129, "step": 317480 }, { "epoch": 0.6413498870784633, "grad_norm": 181.26315307617188, "learning_rate": 3.538475293281387e-06, "loss": 16.0745, "step": 317490 }, { "epoch": 0.6413700877111471, "grad_norm": 230.80023193359375, "learning_rate": 3.538141476386317e-06, "loss": 25.7182, "step": 317500 }, { "epoch": 0.6413902883438309, "grad_norm": 130.51426696777344, "learning_rate": 3.5378076666161677e-06, "loss": 22.9114, "step": 317510 }, { "epoch": 0.6414104889765148, "grad_norm": 25.065185546875, "learning_rate": 3.537473863972568e-06, "loss": 12.8421, "step": 317520 }, { "epoch": 0.6414306896091986, "grad_norm": 664.7421264648438, "learning_rate": 3.537140068457142e-06, "loss": 24.6672, "step": 317530 }, { "epoch": 0.6414508902418824, "grad_norm": 640.2726440429688, "learning_rate": 3.5368062800715163e-06, "loss": 12.7307, "step": 317540 }, { "epoch": 0.6414710908745662, "grad_norm": 534.5606079101562, "learning_rate": 3.536472498817323e-06, "loss": 19.0642, "step": 317550 }, { "epoch": 0.64149129150725, "grad_norm": 204.64883422851562, "learning_rate": 3.536138724696182e-06, "loss": 17.7614, "step": 317560 }, { "epoch": 0.6415114921399339, "grad_norm": 545.7673950195312, "learning_rate": 3.535804957709724e-06, "loss": 19.2659, "step": 317570 }, { "epoch": 0.6415316927726177, "grad_norm": 136.4925994873047, "learning_rate": 3.5354711978595757e-06, "loss": 22.0761, "step": 317580 }, { "epoch": 0.6415518934053015, "grad_norm": 298.1298522949219, "learning_rate": 3.5351374451473643e-06, "loss": 15.8076, "step": 317590 }, { "epoch": 0.6415720940379853, "grad_norm": 424.038330078125, "learning_rate": 3.5348036995747135e-06, "loss": 13.5813, "step": 317600 }, { "epoch": 0.6415922946706691, "grad_norm": 138.11585998535156, "learning_rate": 3.5344699611432515e-06, "loss": 24.2572, "step": 317610 }, { "epoch": 0.6416124953033528, "grad_norm": 0.0, "learning_rate": 3.5341362298546077e-06, "loss": 11.5616, "step": 317620 }, { "epoch": 0.6416326959360367, "grad_norm": 442.903076171875, "learning_rate": 3.533802505710403e-06, "loss": 23.1013, "step": 317630 }, { "epoch": 0.6416528965687205, "grad_norm": 380.7187805175781, "learning_rate": 3.5334687887122687e-06, "loss": 22.7901, "step": 317640 }, { "epoch": 0.6416730972014043, "grad_norm": 211.47076416015625, "learning_rate": 3.5331350788618303e-06, "loss": 20.2536, "step": 317650 }, { "epoch": 0.6416932978340881, "grad_norm": 225.19363403320312, "learning_rate": 3.532801376160713e-06, "loss": 16.7696, "step": 317660 }, { "epoch": 0.641713498466772, "grad_norm": 372.05712890625, "learning_rate": 3.5324676806105428e-06, "loss": 19.734, "step": 317670 }, { "epoch": 0.6417336990994558, "grad_norm": 542.0853271484375, "learning_rate": 3.5321339922129493e-06, "loss": 21.0842, "step": 317680 }, { "epoch": 0.6417538997321396, "grad_norm": 119.28316497802734, "learning_rate": 3.5318003109695544e-06, "loss": 11.7826, "step": 317690 }, { "epoch": 0.6417741003648234, "grad_norm": 173.24627685546875, "learning_rate": 3.531466636881987e-06, "loss": 23.2182, "step": 317700 }, { "epoch": 0.6417943009975072, "grad_norm": 141.20045471191406, "learning_rate": 3.531132969951875e-06, "loss": 21.062, "step": 317710 }, { "epoch": 0.641814501630191, "grad_norm": 466.9609375, "learning_rate": 3.5307993101808415e-06, "loss": 23.1498, "step": 317720 }, { "epoch": 0.6418347022628749, "grad_norm": 330.0227966308594, "learning_rate": 3.5304656575705133e-06, "loss": 19.1739, "step": 317730 }, { "epoch": 0.6418549028955587, "grad_norm": 41.70853805541992, "learning_rate": 3.530132012122518e-06, "loss": 17.2184, "step": 317740 }, { "epoch": 0.6418751035282425, "grad_norm": 357.2669372558594, "learning_rate": 3.5297983738384813e-06, "loss": 20.8489, "step": 317750 }, { "epoch": 0.6418953041609263, "grad_norm": 340.2971496582031, "learning_rate": 3.529464742720028e-06, "loss": 15.4057, "step": 317760 }, { "epoch": 0.6419155047936101, "grad_norm": 70.8252182006836, "learning_rate": 3.5291311187687847e-06, "loss": 20.4043, "step": 317770 }, { "epoch": 0.641935705426294, "grad_norm": 330.3464050292969, "learning_rate": 3.5287975019863806e-06, "loss": 16.5129, "step": 317780 }, { "epoch": 0.6419559060589778, "grad_norm": 36.18974304199219, "learning_rate": 3.5284638923744373e-06, "loss": 12.1458, "step": 317790 }, { "epoch": 0.6419761066916616, "grad_norm": 73.36510467529297, "learning_rate": 3.5281302899345825e-06, "loss": 23.6719, "step": 317800 }, { "epoch": 0.6419963073243454, "grad_norm": 399.6190490722656, "learning_rate": 3.527796694668443e-06, "loss": 16.6277, "step": 317810 }, { "epoch": 0.6420165079570292, "grad_norm": 221.36312866210938, "learning_rate": 3.5274631065776433e-06, "loss": 29.4883, "step": 317820 }, { "epoch": 0.6420367085897131, "grad_norm": 327.0834045410156, "learning_rate": 3.527129525663808e-06, "loss": 11.5293, "step": 317830 }, { "epoch": 0.6420569092223969, "grad_norm": 179.50830078125, "learning_rate": 3.526795951928569e-06, "loss": 16.0016, "step": 317840 }, { "epoch": 0.6420771098550807, "grad_norm": 526.668212890625, "learning_rate": 3.5264623853735435e-06, "loss": 18.7278, "step": 317850 }, { "epoch": 0.6420973104877645, "grad_norm": 119.34868621826172, "learning_rate": 3.5261288260003635e-06, "loss": 16.4815, "step": 317860 }, { "epoch": 0.6421175111204483, "grad_norm": 96.84281921386719, "learning_rate": 3.5257952738106528e-06, "loss": 25.0928, "step": 317870 }, { "epoch": 0.6421377117531321, "grad_norm": 982.0217895507812, "learning_rate": 3.525461728806038e-06, "loss": 25.736, "step": 317880 }, { "epoch": 0.6421579123858159, "grad_norm": 301.0968322753906, "learning_rate": 3.525128190988143e-06, "loss": 11.8005, "step": 317890 }, { "epoch": 0.6421781130184997, "grad_norm": 210.52284240722656, "learning_rate": 3.524794660358593e-06, "loss": 15.9518, "step": 317900 }, { "epoch": 0.6421983136511835, "grad_norm": 157.07583618164062, "learning_rate": 3.5244611369190184e-06, "loss": 9.9868, "step": 317910 }, { "epoch": 0.6422185142838673, "grad_norm": 10.515547752380371, "learning_rate": 3.5241276206710374e-06, "loss": 22.7723, "step": 317920 }, { "epoch": 0.6422387149165512, "grad_norm": 384.8644104003906, "learning_rate": 3.5237941116162812e-06, "loss": 16.5733, "step": 317930 }, { "epoch": 0.642258915549235, "grad_norm": 251.51954650878906, "learning_rate": 3.523460609756374e-06, "loss": 14.8487, "step": 317940 }, { "epoch": 0.6422791161819188, "grad_norm": 168.44171142578125, "learning_rate": 3.5231271150929403e-06, "loss": 13.9267, "step": 317950 }, { "epoch": 0.6422993168146026, "grad_norm": 171.0758819580078, "learning_rate": 3.5227936276276055e-06, "loss": 9.1279, "step": 317960 }, { "epoch": 0.6423195174472864, "grad_norm": 219.159423828125, "learning_rate": 3.522460147361996e-06, "loss": 15.8026, "step": 317970 }, { "epoch": 0.6423397180799703, "grad_norm": 291.80157470703125, "learning_rate": 3.522126674297736e-06, "loss": 17.8505, "step": 317980 }, { "epoch": 0.6423599187126541, "grad_norm": 180.98829650878906, "learning_rate": 3.5217932084364505e-06, "loss": 14.0047, "step": 317990 }, { "epoch": 0.6423801193453379, "grad_norm": 535.4774169921875, "learning_rate": 3.521459749779769e-06, "loss": 17.1318, "step": 318000 }, { "epoch": 0.6424003199780217, "grad_norm": 265.114990234375, "learning_rate": 3.5211262983293094e-06, "loss": 15.2721, "step": 318010 }, { "epoch": 0.6424205206107055, "grad_norm": 450.7021179199219, "learning_rate": 3.520792854086702e-06, "loss": 13.2809, "step": 318020 }, { "epoch": 0.6424407212433894, "grad_norm": 365.6135559082031, "learning_rate": 3.520459417053571e-06, "loss": 11.4966, "step": 318030 }, { "epoch": 0.6424609218760732, "grad_norm": 682.0921630859375, "learning_rate": 3.520125987231542e-06, "loss": 20.3219, "step": 318040 }, { "epoch": 0.642481122508757, "grad_norm": 531.0994873046875, "learning_rate": 3.5197925646222387e-06, "loss": 22.3328, "step": 318050 }, { "epoch": 0.6425013231414408, "grad_norm": 43.4310188293457, "learning_rate": 3.5194591492272863e-06, "loss": 19.082, "step": 318060 }, { "epoch": 0.6425215237741246, "grad_norm": 992.97021484375, "learning_rate": 3.519125741048313e-06, "loss": 19.7935, "step": 318070 }, { "epoch": 0.6425417244068085, "grad_norm": 339.1274719238281, "learning_rate": 3.5187923400869384e-06, "loss": 22.5418, "step": 318080 }, { "epoch": 0.6425619250394923, "grad_norm": 340.5710754394531, "learning_rate": 3.5184589463447918e-06, "loss": 15.4831, "step": 318090 }, { "epoch": 0.6425821256721761, "grad_norm": 37.11964416503906, "learning_rate": 3.5181255598234963e-06, "loss": 22.2093, "step": 318100 }, { "epoch": 0.6426023263048599, "grad_norm": 217.34005737304688, "learning_rate": 3.5177921805246772e-06, "loss": 21.3329, "step": 318110 }, { "epoch": 0.6426225269375437, "grad_norm": 165.3251190185547, "learning_rate": 3.5174588084499594e-06, "loss": 11.6709, "step": 318120 }, { "epoch": 0.6426427275702274, "grad_norm": 452.8645935058594, "learning_rate": 3.5171254436009684e-06, "loss": 14.9893, "step": 318130 }, { "epoch": 0.6426629282029113, "grad_norm": 311.24688720703125, "learning_rate": 3.5167920859793263e-06, "loss": 16.3271, "step": 318140 }, { "epoch": 0.6426831288355951, "grad_norm": 338.81744384765625, "learning_rate": 3.5164587355866593e-06, "loss": 26.1871, "step": 318150 }, { "epoch": 0.6427033294682789, "grad_norm": 501.10455322265625, "learning_rate": 3.5161253924245955e-06, "loss": 23.0607, "step": 318160 }, { "epoch": 0.6427235301009627, "grad_norm": 446.5716857910156, "learning_rate": 3.5157920564947535e-06, "loss": 19.7232, "step": 318170 }, { "epoch": 0.6427437307336465, "grad_norm": 191.57679748535156, "learning_rate": 3.5154587277987618e-06, "loss": 13.1697, "step": 318180 }, { "epoch": 0.6427639313663304, "grad_norm": 186.5753173828125, "learning_rate": 3.5151254063382445e-06, "loss": 18.2323, "step": 318190 }, { "epoch": 0.6427841319990142, "grad_norm": 581.8048706054688, "learning_rate": 3.5147920921148267e-06, "loss": 13.5003, "step": 318200 }, { "epoch": 0.642804332631698, "grad_norm": 118.12873840332031, "learning_rate": 3.514458785130131e-06, "loss": 31.232, "step": 318210 }, { "epoch": 0.6428245332643818, "grad_norm": 462.60272216796875, "learning_rate": 3.514125485385782e-06, "loss": 17.3873, "step": 318220 }, { "epoch": 0.6428447338970656, "grad_norm": 80.0096206665039, "learning_rate": 3.5137921928834085e-06, "loss": 17.2134, "step": 318230 }, { "epoch": 0.6428649345297495, "grad_norm": 366.0083312988281, "learning_rate": 3.5134589076246284e-06, "loss": 30.5289, "step": 318240 }, { "epoch": 0.6428851351624333, "grad_norm": 211.55108642578125, "learning_rate": 3.5131256296110703e-06, "loss": 12.5471, "step": 318250 }, { "epoch": 0.6429053357951171, "grad_norm": 164.0587921142578, "learning_rate": 3.512792358844359e-06, "loss": 11.174, "step": 318260 }, { "epoch": 0.6429255364278009, "grad_norm": 335.4501647949219, "learning_rate": 3.5124590953261155e-06, "loss": 7.6604, "step": 318270 }, { "epoch": 0.6429457370604847, "grad_norm": 188.0418701171875, "learning_rate": 3.5121258390579667e-06, "loss": 24.2085, "step": 318280 }, { "epoch": 0.6429659376931686, "grad_norm": 295.1356201171875, "learning_rate": 3.511792590041537e-06, "loss": 16.4097, "step": 318290 }, { "epoch": 0.6429861383258524, "grad_norm": 551.4459228515625, "learning_rate": 3.511459348278448e-06, "loss": 28.4667, "step": 318300 }, { "epoch": 0.6430063389585362, "grad_norm": 258.7704772949219, "learning_rate": 3.511126113770325e-06, "loss": 17.0829, "step": 318310 }, { "epoch": 0.64302653959122, "grad_norm": 191.628662109375, "learning_rate": 3.510792886518795e-06, "loss": 22.9415, "step": 318320 }, { "epoch": 0.6430467402239038, "grad_norm": 452.72088623046875, "learning_rate": 3.5104596665254786e-06, "loss": 22.3515, "step": 318330 }, { "epoch": 0.6430669408565877, "grad_norm": 169.96054077148438, "learning_rate": 3.510126453792001e-06, "loss": 5.5515, "step": 318340 }, { "epoch": 0.6430871414892715, "grad_norm": 270.74822998046875, "learning_rate": 3.509793248319987e-06, "loss": 16.7429, "step": 318350 }, { "epoch": 0.6431073421219553, "grad_norm": 118.8292465209961, "learning_rate": 3.509460050111061e-06, "loss": 23.8458, "step": 318360 }, { "epoch": 0.6431275427546391, "grad_norm": 125.5928955078125, "learning_rate": 3.5091268591668427e-06, "loss": 21.2549, "step": 318370 }, { "epoch": 0.643147743387323, "grad_norm": 322.6814270019531, "learning_rate": 3.5087936754889614e-06, "loss": 15.9354, "step": 318380 }, { "epoch": 0.6431679440200067, "grad_norm": 204.8306427001953, "learning_rate": 3.5084604990790395e-06, "loss": 22.0734, "step": 318390 }, { "epoch": 0.6431881446526905, "grad_norm": 90.25094604492188, "learning_rate": 3.508127329938699e-06, "loss": 13.6706, "step": 318400 }, { "epoch": 0.6432083452853743, "grad_norm": 157.71861267089844, "learning_rate": 3.5077941680695653e-06, "loss": 21.738, "step": 318410 }, { "epoch": 0.6432285459180581, "grad_norm": 129.8929443359375, "learning_rate": 3.507461013473263e-06, "loss": 11.9791, "step": 318420 }, { "epoch": 0.6432487465507419, "grad_norm": 382.0925598144531, "learning_rate": 3.507127866151413e-06, "loss": 13.4, "step": 318430 }, { "epoch": 0.6432689471834258, "grad_norm": 484.6304626464844, "learning_rate": 3.50679472610564e-06, "loss": 25.0337, "step": 318440 }, { "epoch": 0.6432891478161096, "grad_norm": 0.0, "learning_rate": 3.5064615933375724e-06, "loss": 11.9142, "step": 318450 }, { "epoch": 0.6433093484487934, "grad_norm": 375.59539794921875, "learning_rate": 3.506128467848826e-06, "loss": 12.3797, "step": 318460 }, { "epoch": 0.6433295490814772, "grad_norm": 296.4993591308594, "learning_rate": 3.505795349641029e-06, "loss": 18.927, "step": 318470 }, { "epoch": 0.643349749714161, "grad_norm": 154.12249755859375, "learning_rate": 3.5054622387158044e-06, "loss": 36.9432, "step": 318480 }, { "epoch": 0.6433699503468449, "grad_norm": 429.15283203125, "learning_rate": 3.505129135074777e-06, "loss": 21.89, "step": 318490 }, { "epoch": 0.6433901509795287, "grad_norm": 154.5587615966797, "learning_rate": 3.5047960387195673e-06, "loss": 9.5271, "step": 318500 }, { "epoch": 0.6434103516122125, "grad_norm": 387.0033874511719, "learning_rate": 3.5044629496517997e-06, "loss": 24.1493, "step": 318510 }, { "epoch": 0.6434305522448963, "grad_norm": 276.69317626953125, "learning_rate": 3.5041298678731017e-06, "loss": 32.2945, "step": 318520 }, { "epoch": 0.6434507528775801, "grad_norm": 99.72978210449219, "learning_rate": 3.503796793385089e-06, "loss": 17.5933, "step": 318530 }, { "epoch": 0.643470953510264, "grad_norm": 263.653564453125, "learning_rate": 3.503463726189391e-06, "loss": 21.5141, "step": 318540 }, { "epoch": 0.6434911541429478, "grad_norm": 127.07489013671875, "learning_rate": 3.503130666287631e-06, "loss": 10.8784, "step": 318550 }, { "epoch": 0.6435113547756316, "grad_norm": 353.9801330566406, "learning_rate": 3.502797613681429e-06, "loss": 15.2904, "step": 318560 }, { "epoch": 0.6435315554083154, "grad_norm": 165.40101623535156, "learning_rate": 3.50246456837241e-06, "loss": 18.2257, "step": 318570 }, { "epoch": 0.6435517560409992, "grad_norm": 232.76248168945312, "learning_rate": 3.5021315303621973e-06, "loss": 10.5518, "step": 318580 }, { "epoch": 0.643571956673683, "grad_norm": 261.11993408203125, "learning_rate": 3.5017984996524134e-06, "loss": 8.4631, "step": 318590 }, { "epoch": 0.6435921573063669, "grad_norm": 201.26437377929688, "learning_rate": 3.501465476244681e-06, "loss": 15.7625, "step": 318600 }, { "epoch": 0.6436123579390507, "grad_norm": 457.0760192871094, "learning_rate": 3.501132460140627e-06, "loss": 17.9339, "step": 318610 }, { "epoch": 0.6436325585717345, "grad_norm": 272.7782897949219, "learning_rate": 3.5007994513418687e-06, "loss": 15.106, "step": 318620 }, { "epoch": 0.6436527592044183, "grad_norm": 186.78700256347656, "learning_rate": 3.500466449850033e-06, "loss": 17.8089, "step": 318630 }, { "epoch": 0.643672959837102, "grad_norm": 332.8668212890625, "learning_rate": 3.500133455666742e-06, "loss": 14.9414, "step": 318640 }, { "epoch": 0.6436931604697859, "grad_norm": 453.9659118652344, "learning_rate": 3.49980046879362e-06, "loss": 36.077, "step": 318650 }, { "epoch": 0.6437133611024697, "grad_norm": 245.55201721191406, "learning_rate": 3.4994674892322867e-06, "loss": 22.0074, "step": 318660 }, { "epoch": 0.6437335617351535, "grad_norm": 282.55670166015625, "learning_rate": 3.4991345169843666e-06, "loss": 31.9674, "step": 318670 }, { "epoch": 0.6437537623678373, "grad_norm": 373.16717529296875, "learning_rate": 3.4988015520514856e-06, "loss": 19.2149, "step": 318680 }, { "epoch": 0.6437739630005211, "grad_norm": 82.02903747558594, "learning_rate": 3.4984685944352604e-06, "loss": 18.7128, "step": 318690 }, { "epoch": 0.643794163633205, "grad_norm": 308.7587890625, "learning_rate": 3.498135644137318e-06, "loss": 40.1689, "step": 318700 }, { "epoch": 0.6438143642658888, "grad_norm": 233.81771850585938, "learning_rate": 3.4978027011592826e-06, "loss": 24.3444, "step": 318710 }, { "epoch": 0.6438345648985726, "grad_norm": 456.8699645996094, "learning_rate": 3.4974697655027724e-06, "loss": 13.877, "step": 318720 }, { "epoch": 0.6438547655312564, "grad_norm": 327.98028564453125, "learning_rate": 3.4971368371694126e-06, "loss": 31.2932, "step": 318730 }, { "epoch": 0.6438749661639402, "grad_norm": 674.7511596679688, "learning_rate": 3.496803916160827e-06, "loss": 24.9222, "step": 318740 }, { "epoch": 0.6438951667966241, "grad_norm": 47.281734466552734, "learning_rate": 3.4964710024786354e-06, "loss": 9.3571, "step": 318750 }, { "epoch": 0.6439153674293079, "grad_norm": 145.807373046875, "learning_rate": 3.4961380961244605e-06, "loss": 15.8402, "step": 318760 }, { "epoch": 0.6439355680619917, "grad_norm": 98.78150177001953, "learning_rate": 3.49580519709993e-06, "loss": 24.1519, "step": 318770 }, { "epoch": 0.6439557686946755, "grad_norm": 11.28227710723877, "learning_rate": 3.4954723054066593e-06, "loss": 13.6409, "step": 318780 }, { "epoch": 0.6439759693273593, "grad_norm": 179.0855255126953, "learning_rate": 3.4951394210462746e-06, "loss": 8.2722, "step": 318790 }, { "epoch": 0.6439961699600432, "grad_norm": 440.7255859375, "learning_rate": 3.4948065440203982e-06, "loss": 30.4665, "step": 318800 }, { "epoch": 0.644016370592727, "grad_norm": 279.1018371582031, "learning_rate": 3.494473674330653e-06, "loss": 14.4434, "step": 318810 }, { "epoch": 0.6440365712254108, "grad_norm": 403.313720703125, "learning_rate": 3.49414081197866e-06, "loss": 22.3927, "step": 318820 }, { "epoch": 0.6440567718580946, "grad_norm": 4.049921989440918, "learning_rate": 3.4938079569660398e-06, "loss": 27.5477, "step": 318830 }, { "epoch": 0.6440769724907784, "grad_norm": 129.38552856445312, "learning_rate": 3.493475109294421e-06, "loss": 13.2483, "step": 318840 }, { "epoch": 0.6440971731234623, "grad_norm": 191.96934509277344, "learning_rate": 3.4931422689654186e-06, "loss": 17.5202, "step": 318850 }, { "epoch": 0.6441173737561461, "grad_norm": 398.618896484375, "learning_rate": 3.492809435980659e-06, "loss": 10.5319, "step": 318860 }, { "epoch": 0.6441375743888299, "grad_norm": 248.68438720703125, "learning_rate": 3.4924766103417648e-06, "loss": 8.9156, "step": 318870 }, { "epoch": 0.6441577750215137, "grad_norm": 268.76434326171875, "learning_rate": 3.492143792050355e-06, "loss": 21.0746, "step": 318880 }, { "epoch": 0.6441779756541975, "grad_norm": 285.9759521484375, "learning_rate": 3.4918109811080535e-06, "loss": 9.1421, "step": 318890 }, { "epoch": 0.6441981762868813, "grad_norm": 236.83253479003906, "learning_rate": 3.491478177516484e-06, "loss": 12.0477, "step": 318900 }, { "epoch": 0.6442183769195651, "grad_norm": 165.1927032470703, "learning_rate": 3.4911453812772658e-06, "loss": 24.8632, "step": 318910 }, { "epoch": 0.6442385775522489, "grad_norm": 343.8407287597656, "learning_rate": 3.4908125923920204e-06, "loss": 18.9871, "step": 318920 }, { "epoch": 0.6442587781849327, "grad_norm": 22.461490631103516, "learning_rate": 3.490479810862373e-06, "loss": 16.3041, "step": 318930 }, { "epoch": 0.6442789788176165, "grad_norm": 951.3883056640625, "learning_rate": 3.490147036689945e-06, "loss": 21.844, "step": 318940 }, { "epoch": 0.6442991794503004, "grad_norm": 323.0085144042969, "learning_rate": 3.4898142698763555e-06, "loss": 13.1813, "step": 318950 }, { "epoch": 0.6443193800829842, "grad_norm": 334.7500915527344, "learning_rate": 3.4894815104232283e-06, "loss": 22.9656, "step": 318960 }, { "epoch": 0.644339580715668, "grad_norm": 93.52256774902344, "learning_rate": 3.489148758332186e-06, "loss": 25.0148, "step": 318970 }, { "epoch": 0.6443597813483518, "grad_norm": 276.42681884765625, "learning_rate": 3.4888160136048488e-06, "loss": 13.3142, "step": 318980 }, { "epoch": 0.6443799819810356, "grad_norm": 187.77999877929688, "learning_rate": 3.4884832762428376e-06, "loss": 25.5202, "step": 318990 }, { "epoch": 0.6444001826137195, "grad_norm": 149.11912536621094, "learning_rate": 3.488150546247778e-06, "loss": 13.6358, "step": 319000 }, { "epoch": 0.6444203832464033, "grad_norm": 404.0904235839844, "learning_rate": 3.4878178236212883e-06, "loss": 27.6624, "step": 319010 }, { "epoch": 0.6444405838790871, "grad_norm": 66.76666259765625, "learning_rate": 3.4874851083649906e-06, "loss": 15.8515, "step": 319020 }, { "epoch": 0.6444607845117709, "grad_norm": 35.33395004272461, "learning_rate": 3.487152400480509e-06, "loss": 22.2843, "step": 319030 }, { "epoch": 0.6444809851444547, "grad_norm": 101.22142791748047, "learning_rate": 3.4868196999694616e-06, "loss": 12.3805, "step": 319040 }, { "epoch": 0.6445011857771386, "grad_norm": 409.5107116699219, "learning_rate": 3.486487006833471e-06, "loss": 28.4716, "step": 319050 }, { "epoch": 0.6445213864098224, "grad_norm": 357.4282531738281, "learning_rate": 3.4861543210741607e-06, "loss": 15.6279, "step": 319060 }, { "epoch": 0.6445415870425062, "grad_norm": 476.4097595214844, "learning_rate": 3.485821642693148e-06, "loss": 22.8935, "step": 319070 }, { "epoch": 0.64456178767519, "grad_norm": 191.75006103515625, "learning_rate": 3.4854889716920588e-06, "loss": 15.9108, "step": 319080 }, { "epoch": 0.6445819883078738, "grad_norm": 15.893072128295898, "learning_rate": 3.485156308072512e-06, "loss": 19.291, "step": 319090 }, { "epoch": 0.6446021889405577, "grad_norm": 79.40172576904297, "learning_rate": 3.484823651836131e-06, "loss": 13.4627, "step": 319100 }, { "epoch": 0.6446223895732415, "grad_norm": 14.917325019836426, "learning_rate": 3.484491002984535e-06, "loss": 8.8235, "step": 319110 }, { "epoch": 0.6446425902059253, "grad_norm": 179.3166046142578, "learning_rate": 3.4841583615193444e-06, "loss": 14.5325, "step": 319120 }, { "epoch": 0.6446627908386091, "grad_norm": 717.0728149414062, "learning_rate": 3.4838257274421853e-06, "loss": 34.0131, "step": 319130 }, { "epoch": 0.6446829914712929, "grad_norm": 173.6001434326172, "learning_rate": 3.483493100754673e-06, "loss": 16.4991, "step": 319140 }, { "epoch": 0.6447031921039766, "grad_norm": 328.11492919921875, "learning_rate": 3.483160481458432e-06, "loss": 17.6726, "step": 319150 }, { "epoch": 0.6447233927366605, "grad_norm": 140.2810516357422, "learning_rate": 3.4828278695550845e-06, "loss": 19.6551, "step": 319160 }, { "epoch": 0.6447435933693443, "grad_norm": 293.0955505371094, "learning_rate": 3.4824952650462486e-06, "loss": 11.8954, "step": 319170 }, { "epoch": 0.6447637940020281, "grad_norm": 157.59092712402344, "learning_rate": 3.4821626679335464e-06, "loss": 9.2727, "step": 319180 }, { "epoch": 0.6447839946347119, "grad_norm": 580.5991821289062, "learning_rate": 3.4818300782186e-06, "loss": 22.2652, "step": 319190 }, { "epoch": 0.6448041952673957, "grad_norm": 456.9275817871094, "learning_rate": 3.4814974959030294e-06, "loss": 33.2147, "step": 319200 }, { "epoch": 0.6448243959000796, "grad_norm": 258.55499267578125, "learning_rate": 3.4811649209884544e-06, "loss": 12.4199, "step": 319210 }, { "epoch": 0.6448445965327634, "grad_norm": 314.80609130859375, "learning_rate": 3.480832353476501e-06, "loss": 17.7633, "step": 319220 }, { "epoch": 0.6448647971654472, "grad_norm": 191.04705810546875, "learning_rate": 3.480499793368783e-06, "loss": 9.9312, "step": 319230 }, { "epoch": 0.644884997798131, "grad_norm": 245.62152099609375, "learning_rate": 3.4801672406669253e-06, "loss": 16.2945, "step": 319240 }, { "epoch": 0.6449051984308148, "grad_norm": 420.9969482421875, "learning_rate": 3.4798346953725487e-06, "loss": 21.9969, "step": 319250 }, { "epoch": 0.6449253990634987, "grad_norm": 411.688720703125, "learning_rate": 3.4795021574872743e-06, "loss": 16.169, "step": 319260 }, { "epoch": 0.6449455996961825, "grad_norm": 23.599031448364258, "learning_rate": 3.479169627012721e-06, "loss": 8.7375, "step": 319270 }, { "epoch": 0.6449658003288663, "grad_norm": 156.9295654296875, "learning_rate": 3.478837103950509e-06, "loss": 17.7412, "step": 319280 }, { "epoch": 0.6449860009615501, "grad_norm": 307.07379150390625, "learning_rate": 3.4785045883022645e-06, "loss": 16.2166, "step": 319290 }, { "epoch": 0.6450062015942339, "grad_norm": 199.6287841796875, "learning_rate": 3.4781720800696006e-06, "loss": 17.555, "step": 319300 }, { "epoch": 0.6450264022269178, "grad_norm": 470.5386047363281, "learning_rate": 3.477839579254142e-06, "loss": 17.0211, "step": 319310 }, { "epoch": 0.6450466028596016, "grad_norm": 186.7948455810547, "learning_rate": 3.47750708585751e-06, "loss": 14.9202, "step": 319320 }, { "epoch": 0.6450668034922854, "grad_norm": 649.67333984375, "learning_rate": 3.4771745998813228e-06, "loss": 29.1016, "step": 319330 }, { "epoch": 0.6450870041249692, "grad_norm": 121.42948150634766, "learning_rate": 3.4768421213272017e-06, "loss": 13.1248, "step": 319340 }, { "epoch": 0.645107204757653, "grad_norm": 198.7672882080078, "learning_rate": 3.476509650196769e-06, "loss": 16.7074, "step": 319350 }, { "epoch": 0.6451274053903369, "grad_norm": 211.84751892089844, "learning_rate": 3.4761771864916415e-06, "loss": 24.3535, "step": 319360 }, { "epoch": 0.6451476060230207, "grad_norm": 314.4419860839844, "learning_rate": 3.4758447302134414e-06, "loss": 14.4986, "step": 319370 }, { "epoch": 0.6451678066557045, "grad_norm": 419.4031982421875, "learning_rate": 3.475512281363792e-06, "loss": 16.0715, "step": 319380 }, { "epoch": 0.6451880072883883, "grad_norm": 239.53692626953125, "learning_rate": 3.4751798399443075e-06, "loss": 28.24, "step": 319390 }, { "epoch": 0.6452082079210721, "grad_norm": 248.96046447753906, "learning_rate": 3.474847405956613e-06, "loss": 13.4703, "step": 319400 }, { "epoch": 0.6452284085537559, "grad_norm": 414.47601318359375, "learning_rate": 3.474514979402327e-06, "loss": 28.1491, "step": 319410 }, { "epoch": 0.6452486091864397, "grad_norm": 36.0577507019043, "learning_rate": 3.4741825602830716e-06, "loss": 11.6837, "step": 319420 }, { "epoch": 0.6452688098191235, "grad_norm": 873.613037109375, "learning_rate": 3.4738501486004632e-06, "loss": 27.5542, "step": 319430 }, { "epoch": 0.6452890104518073, "grad_norm": 272.81170654296875, "learning_rate": 3.4735177443561243e-06, "loss": 15.2189, "step": 319440 }, { "epoch": 0.6453092110844911, "grad_norm": 0.0, "learning_rate": 3.4731853475516763e-06, "loss": 18.3798, "step": 319450 }, { "epoch": 0.645329411717175, "grad_norm": 114.16248321533203, "learning_rate": 3.472852958188736e-06, "loss": 15.6346, "step": 319460 }, { "epoch": 0.6453496123498588, "grad_norm": 263.9646301269531, "learning_rate": 3.4725205762689256e-06, "loss": 28.9539, "step": 319470 }, { "epoch": 0.6453698129825426, "grad_norm": 194.0299530029297, "learning_rate": 3.472188201793866e-06, "loss": 13.0838, "step": 319480 }, { "epoch": 0.6453900136152264, "grad_norm": 537.886474609375, "learning_rate": 3.4718558347651742e-06, "loss": 18.0358, "step": 319490 }, { "epoch": 0.6454102142479102, "grad_norm": 469.5933532714844, "learning_rate": 3.471523475184472e-06, "loss": 13.6578, "step": 319500 }, { "epoch": 0.645430414880594, "grad_norm": 243.32740783691406, "learning_rate": 3.47119112305338e-06, "loss": 29.2812, "step": 319510 }, { "epoch": 0.6454506155132779, "grad_norm": 245.13772583007812, "learning_rate": 3.4708587783735164e-06, "loss": 14.7849, "step": 319520 }, { "epoch": 0.6454708161459617, "grad_norm": 382.14337158203125, "learning_rate": 3.4705264411465004e-06, "loss": 15.2452, "step": 319530 }, { "epoch": 0.6454910167786455, "grad_norm": 94.83063507080078, "learning_rate": 3.470194111373954e-06, "loss": 17.0346, "step": 319540 }, { "epoch": 0.6455112174113293, "grad_norm": 388.5849609375, "learning_rate": 3.4698617890574972e-06, "loss": 18.4564, "step": 319550 }, { "epoch": 0.6455314180440131, "grad_norm": 292.5734558105469, "learning_rate": 3.4695294741987474e-06, "loss": 25.1061, "step": 319560 }, { "epoch": 0.645551618676697, "grad_norm": 12.410697937011719, "learning_rate": 3.4691971667993254e-06, "loss": 21.34, "step": 319570 }, { "epoch": 0.6455718193093808, "grad_norm": 219.19419860839844, "learning_rate": 3.468864866860851e-06, "loss": 13.8718, "step": 319580 }, { "epoch": 0.6455920199420646, "grad_norm": 4.757787704467773, "learning_rate": 3.468532574384943e-06, "loss": 12.0153, "step": 319590 }, { "epoch": 0.6456122205747484, "grad_norm": 660.3803100585938, "learning_rate": 3.4682002893732203e-06, "loss": 27.8067, "step": 319600 }, { "epoch": 0.6456324212074322, "grad_norm": 8.08006477355957, "learning_rate": 3.467868011827306e-06, "loss": 16.022, "step": 319610 }, { "epoch": 0.6456526218401161, "grad_norm": 329.2829895019531, "learning_rate": 3.4675357417488163e-06, "loss": 22.904, "step": 319620 }, { "epoch": 0.6456728224727999, "grad_norm": 256.0654602050781, "learning_rate": 3.467203479139371e-06, "loss": 13.6017, "step": 319630 }, { "epoch": 0.6456930231054837, "grad_norm": 630.102783203125, "learning_rate": 3.4668712240005912e-06, "loss": 35.035, "step": 319640 }, { "epoch": 0.6457132237381675, "grad_norm": 224.76373291015625, "learning_rate": 3.4665389763340945e-06, "loss": 14.7071, "step": 319650 }, { "epoch": 0.6457334243708513, "grad_norm": 57.074954986572266, "learning_rate": 3.466206736141501e-06, "loss": 16.61, "step": 319660 }, { "epoch": 0.6457536250035351, "grad_norm": 70.996337890625, "learning_rate": 3.46587450342443e-06, "loss": 17.0045, "step": 319670 }, { "epoch": 0.6457738256362189, "grad_norm": 474.43743896484375, "learning_rate": 3.465542278184499e-06, "loss": 21.6983, "step": 319680 }, { "epoch": 0.6457940262689027, "grad_norm": 466.5363464355469, "learning_rate": 3.4652100604233296e-06, "loss": 10.1134, "step": 319690 }, { "epoch": 0.6458142269015865, "grad_norm": 518.2537841796875, "learning_rate": 3.464877850142541e-06, "loss": 15.8366, "step": 319700 }, { "epoch": 0.6458344275342703, "grad_norm": 84.25669860839844, "learning_rate": 3.464545647343751e-06, "loss": 19.5737, "step": 319710 }, { "epoch": 0.6458546281669542, "grad_norm": 344.373779296875, "learning_rate": 3.4642134520285796e-06, "loss": 15.7223, "step": 319720 }, { "epoch": 0.645874828799638, "grad_norm": 151.75637817382812, "learning_rate": 3.463881264198645e-06, "loss": 11.8075, "step": 319730 }, { "epoch": 0.6458950294323218, "grad_norm": 156.44544982910156, "learning_rate": 3.4635490838555687e-06, "loss": 18.6927, "step": 319740 }, { "epoch": 0.6459152300650056, "grad_norm": 170.59706115722656, "learning_rate": 3.463216911000965e-06, "loss": 8.6926, "step": 319750 }, { "epoch": 0.6459354306976894, "grad_norm": 376.662353515625, "learning_rate": 3.4628847456364567e-06, "loss": 18.1463, "step": 319760 }, { "epoch": 0.6459556313303733, "grad_norm": 79.43762969970703, "learning_rate": 3.462552587763663e-06, "loss": 26.1731, "step": 319770 }, { "epoch": 0.6459758319630571, "grad_norm": 218.59571838378906, "learning_rate": 3.4622204373842006e-06, "loss": 14.0986, "step": 319780 }, { "epoch": 0.6459960325957409, "grad_norm": 360.568115234375, "learning_rate": 3.461888294499689e-06, "loss": 14.1545, "step": 319790 }, { "epoch": 0.6460162332284247, "grad_norm": 175.55465698242188, "learning_rate": 3.4615561591117486e-06, "loss": 16.5464, "step": 319800 }, { "epoch": 0.6460364338611085, "grad_norm": 415.09466552734375, "learning_rate": 3.461224031221995e-06, "loss": 29.122, "step": 319810 }, { "epoch": 0.6460566344937924, "grad_norm": 580.9467163085938, "learning_rate": 3.4608919108320488e-06, "loss": 24.9952, "step": 319820 }, { "epoch": 0.6460768351264762, "grad_norm": 510.04833984375, "learning_rate": 3.4605597979435313e-06, "loss": 28.6637, "step": 319830 }, { "epoch": 0.64609703575916, "grad_norm": 121.9946517944336, "learning_rate": 3.460227692558056e-06, "loss": 17.101, "step": 319840 }, { "epoch": 0.6461172363918438, "grad_norm": 405.17132568359375, "learning_rate": 3.459895594677245e-06, "loss": 11.1699, "step": 319850 }, { "epoch": 0.6461374370245276, "grad_norm": 224.06748962402344, "learning_rate": 3.459563504302716e-06, "loss": 15.9972, "step": 319860 }, { "epoch": 0.6461576376572115, "grad_norm": 138.38540649414062, "learning_rate": 3.4592314214360888e-06, "loss": 11.7787, "step": 319870 }, { "epoch": 0.6461778382898953, "grad_norm": 136.37088012695312, "learning_rate": 3.4588993460789795e-06, "loss": 13.6865, "step": 319880 }, { "epoch": 0.6461980389225791, "grad_norm": 409.6867370605469, "learning_rate": 3.4585672782330072e-06, "loss": 24.0611, "step": 319890 }, { "epoch": 0.6462182395552629, "grad_norm": 297.72137451171875, "learning_rate": 3.4582352178997937e-06, "loss": 15.5891, "step": 319900 }, { "epoch": 0.6462384401879467, "grad_norm": 130.79922485351562, "learning_rate": 3.457903165080952e-06, "loss": 18.3116, "step": 319910 }, { "epoch": 0.6462586408206304, "grad_norm": 265.5534362792969, "learning_rate": 3.457571119778104e-06, "loss": 21.039, "step": 319920 }, { "epoch": 0.6462788414533143, "grad_norm": 185.9813690185547, "learning_rate": 3.4572390819928686e-06, "loss": 14.3417, "step": 319930 }, { "epoch": 0.6462990420859981, "grad_norm": 105.93415069580078, "learning_rate": 3.4569070517268616e-06, "loss": 31.9317, "step": 319940 }, { "epoch": 0.6463192427186819, "grad_norm": 468.8910217285156, "learning_rate": 3.4565750289817024e-06, "loss": 14.1007, "step": 319950 }, { "epoch": 0.6463394433513657, "grad_norm": 324.4515380859375, "learning_rate": 3.4562430137590107e-06, "loss": 20.0397, "step": 319960 }, { "epoch": 0.6463596439840495, "grad_norm": 392.64739990234375, "learning_rate": 3.4559110060604016e-06, "loss": 12.259, "step": 319970 }, { "epoch": 0.6463798446167334, "grad_norm": 250.71226501464844, "learning_rate": 3.455579005887495e-06, "loss": 24.7989, "step": 319980 }, { "epoch": 0.6464000452494172, "grad_norm": 125.34812927246094, "learning_rate": 3.455247013241909e-06, "loss": 16.5911, "step": 319990 }, { "epoch": 0.646420245882101, "grad_norm": 375.3134765625, "learning_rate": 3.4549150281252635e-06, "loss": 13.6728, "step": 320000 }, { "epoch": 0.6464404465147848, "grad_norm": 285.801513671875, "learning_rate": 3.454583050539173e-06, "loss": 19.0388, "step": 320010 }, { "epoch": 0.6464606471474686, "grad_norm": 176.94326782226562, "learning_rate": 3.454251080485258e-06, "loss": 15.3466, "step": 320020 }, { "epoch": 0.6464808477801525, "grad_norm": 302.7052917480469, "learning_rate": 3.4539191179651367e-06, "loss": 18.549, "step": 320030 }, { "epoch": 0.6465010484128363, "grad_norm": 343.50762939453125, "learning_rate": 3.4535871629804246e-06, "loss": 14.3336, "step": 320040 }, { "epoch": 0.6465212490455201, "grad_norm": 245.09580993652344, "learning_rate": 3.4532552155327404e-06, "loss": 16.6701, "step": 320050 }, { "epoch": 0.6465414496782039, "grad_norm": 209.85865783691406, "learning_rate": 3.4529232756237058e-06, "loss": 18.8621, "step": 320060 }, { "epoch": 0.6465616503108877, "grad_norm": 799.4150390625, "learning_rate": 3.4525913432549326e-06, "loss": 24.6476, "step": 320070 }, { "epoch": 0.6465818509435716, "grad_norm": 109.97142791748047, "learning_rate": 3.452259418428042e-06, "loss": 10.0598, "step": 320080 }, { "epoch": 0.6466020515762554, "grad_norm": 372.2641906738281, "learning_rate": 3.451927501144653e-06, "loss": 14.7427, "step": 320090 }, { "epoch": 0.6466222522089392, "grad_norm": 349.4522399902344, "learning_rate": 3.4515955914063796e-06, "loss": 17.4832, "step": 320100 }, { "epoch": 0.646642452841623, "grad_norm": 129.7165069580078, "learning_rate": 3.451263689214842e-06, "loss": 11.3999, "step": 320110 }, { "epoch": 0.6466626534743068, "grad_norm": 410.07098388671875, "learning_rate": 3.4509317945716585e-06, "loss": 14.3018, "step": 320120 }, { "epoch": 0.6466828541069907, "grad_norm": 225.54696655273438, "learning_rate": 3.4505999074784447e-06, "loss": 14.7799, "step": 320130 }, { "epoch": 0.6467030547396745, "grad_norm": 216.628662109375, "learning_rate": 3.450268027936817e-06, "loss": 13.9424, "step": 320140 }, { "epoch": 0.6467232553723583, "grad_norm": 117.55096435546875, "learning_rate": 3.4499361559483976e-06, "loss": 18.0558, "step": 320150 }, { "epoch": 0.6467434560050421, "grad_norm": 224.62527465820312, "learning_rate": 3.4496042915148008e-06, "loss": 19.4012, "step": 320160 }, { "epoch": 0.646763656637726, "grad_norm": 232.44410705566406, "learning_rate": 3.449272434637645e-06, "loss": 11.504, "step": 320170 }, { "epoch": 0.6467838572704097, "grad_norm": 285.39263916015625, "learning_rate": 3.4489405853185465e-06, "loss": 23.2704, "step": 320180 }, { "epoch": 0.6468040579030935, "grad_norm": 17.943130493164062, "learning_rate": 3.4486087435591243e-06, "loss": 18.7045, "step": 320190 }, { "epoch": 0.6468242585357773, "grad_norm": 42.09674072265625, "learning_rate": 3.4482769093609945e-06, "loss": 14.0899, "step": 320200 }, { "epoch": 0.6468444591684611, "grad_norm": 72.54357147216797, "learning_rate": 3.4479450827257733e-06, "loss": 23.4361, "step": 320210 }, { "epoch": 0.6468646598011449, "grad_norm": 311.1506652832031, "learning_rate": 3.447613263655083e-06, "loss": 22.139, "step": 320220 }, { "epoch": 0.6468848604338288, "grad_norm": 734.344970703125, "learning_rate": 3.447281452150534e-06, "loss": 22.5897, "step": 320230 }, { "epoch": 0.6469050610665126, "grad_norm": 278.7572326660156, "learning_rate": 3.4469496482137484e-06, "loss": 18.6043, "step": 320240 }, { "epoch": 0.6469252616991964, "grad_norm": 1544.2303466796875, "learning_rate": 3.4466178518463424e-06, "loss": 36.3877, "step": 320250 }, { "epoch": 0.6469454623318802, "grad_norm": 268.1660461425781, "learning_rate": 3.4462860630499316e-06, "loss": 15.9065, "step": 320260 }, { "epoch": 0.646965662964564, "grad_norm": 171.9043731689453, "learning_rate": 3.445954281826134e-06, "loss": 18.8517, "step": 320270 }, { "epoch": 0.6469858635972479, "grad_norm": 396.1708984375, "learning_rate": 3.4456225081765683e-06, "loss": 14.9025, "step": 320280 }, { "epoch": 0.6470060642299317, "grad_norm": 527.3031005859375, "learning_rate": 3.445290742102848e-06, "loss": 17.3653, "step": 320290 }, { "epoch": 0.6470262648626155, "grad_norm": 282.86737060546875, "learning_rate": 3.444958983606592e-06, "loss": 13.0961, "step": 320300 }, { "epoch": 0.6470464654952993, "grad_norm": 254.67092895507812, "learning_rate": 3.444627232689418e-06, "loss": 17.6238, "step": 320310 }, { "epoch": 0.6470666661279831, "grad_norm": 192.58218383789062, "learning_rate": 3.4442954893529436e-06, "loss": 16.7712, "step": 320320 }, { "epoch": 0.647086866760667, "grad_norm": 68.27947998046875, "learning_rate": 3.4439637535987825e-06, "loss": 23.5319, "step": 320330 }, { "epoch": 0.6471070673933508, "grad_norm": 356.7257080078125, "learning_rate": 3.4436320254285537e-06, "loss": 15.3612, "step": 320340 }, { "epoch": 0.6471272680260346, "grad_norm": 271.8177185058594, "learning_rate": 3.4433003048438748e-06, "loss": 24.1741, "step": 320350 }, { "epoch": 0.6471474686587184, "grad_norm": 288.5990295410156, "learning_rate": 3.442968591846359e-06, "loss": 17.6466, "step": 320360 }, { "epoch": 0.6471676692914022, "grad_norm": 29.086511611938477, "learning_rate": 3.442636886437627e-06, "loss": 31.5093, "step": 320370 }, { "epoch": 0.6471878699240861, "grad_norm": 368.41070556640625, "learning_rate": 3.4423051886192944e-06, "loss": 23.5995, "step": 320380 }, { "epoch": 0.6472080705567699, "grad_norm": 302.2815856933594, "learning_rate": 3.4419734983929763e-06, "loss": 22.5885, "step": 320390 }, { "epoch": 0.6472282711894537, "grad_norm": 394.27899169921875, "learning_rate": 3.441641815760291e-06, "loss": 10.8933, "step": 320400 }, { "epoch": 0.6472484718221375, "grad_norm": 19.499401092529297, "learning_rate": 3.4413101407228557e-06, "loss": 12.9023, "step": 320410 }, { "epoch": 0.6472686724548213, "grad_norm": 261.99481201171875, "learning_rate": 3.440978473282284e-06, "loss": 8.3835, "step": 320420 }, { "epoch": 0.647288873087505, "grad_norm": 101.0696029663086, "learning_rate": 3.440646813440193e-06, "loss": 18.3133, "step": 320430 }, { "epoch": 0.6473090737201889, "grad_norm": 142.61865234375, "learning_rate": 3.4403151611982016e-06, "loss": 21.4955, "step": 320440 }, { "epoch": 0.6473292743528727, "grad_norm": 399.8019714355469, "learning_rate": 3.4399835165579266e-06, "loss": 15.9547, "step": 320450 }, { "epoch": 0.6473494749855565, "grad_norm": 113.58306121826172, "learning_rate": 3.439651879520981e-06, "loss": 12.9868, "step": 320460 }, { "epoch": 0.6473696756182403, "grad_norm": 168.53309631347656, "learning_rate": 3.4393202500889827e-06, "loss": 12.6463, "step": 320470 }, { "epoch": 0.6473898762509241, "grad_norm": 173.22305297851562, "learning_rate": 3.43898862826355e-06, "loss": 20.4407, "step": 320480 }, { "epoch": 0.647410076883608, "grad_norm": 405.9932556152344, "learning_rate": 3.438657014046296e-06, "loss": 28.0771, "step": 320490 }, { "epoch": 0.6474302775162918, "grad_norm": 559.8690185546875, "learning_rate": 3.4383254074388373e-06, "loss": 11.3131, "step": 320500 }, { "epoch": 0.6474504781489756, "grad_norm": 352.6914367675781, "learning_rate": 3.437993808442794e-06, "loss": 20.3389, "step": 320510 }, { "epoch": 0.6474706787816594, "grad_norm": 363.4158630371094, "learning_rate": 3.437662217059776e-06, "loss": 16.9926, "step": 320520 }, { "epoch": 0.6474908794143432, "grad_norm": 467.6046142578125, "learning_rate": 3.437330633291405e-06, "loss": 12.3437, "step": 320530 }, { "epoch": 0.6475110800470271, "grad_norm": 80.23808288574219, "learning_rate": 3.436999057139295e-06, "loss": 18.618, "step": 320540 }, { "epoch": 0.6475312806797109, "grad_norm": 517.1422119140625, "learning_rate": 3.4366674886050618e-06, "loss": 22.1404, "step": 320550 }, { "epoch": 0.6475514813123947, "grad_norm": 340.6241455078125, "learning_rate": 3.436335927690321e-06, "loss": 17.3476, "step": 320560 }, { "epoch": 0.6475716819450785, "grad_norm": 338.519775390625, "learning_rate": 3.4360043743966907e-06, "loss": 9.1881, "step": 320570 }, { "epoch": 0.6475918825777623, "grad_norm": 47.337738037109375, "learning_rate": 3.4356728287257845e-06, "loss": 12.2806, "step": 320580 }, { "epoch": 0.6476120832104462, "grad_norm": 658.3490600585938, "learning_rate": 3.4353412906792175e-06, "loss": 26.7605, "step": 320590 }, { "epoch": 0.64763228384313, "grad_norm": 362.61865234375, "learning_rate": 3.4350097602586085e-06, "loss": 20.3889, "step": 320600 }, { "epoch": 0.6476524844758138, "grad_norm": 255.49586486816406, "learning_rate": 3.4346782374655743e-06, "loss": 19.5846, "step": 320610 }, { "epoch": 0.6476726851084976, "grad_norm": 482.1590576171875, "learning_rate": 3.4343467223017256e-06, "loss": 19.1329, "step": 320620 }, { "epoch": 0.6476928857411814, "grad_norm": 0.0, "learning_rate": 3.4340152147686824e-06, "loss": 12.6629, "step": 320630 }, { "epoch": 0.6477130863738653, "grad_norm": 195.891357421875, "learning_rate": 3.4336837148680595e-06, "loss": 27.6182, "step": 320640 }, { "epoch": 0.6477332870065491, "grad_norm": 0.0, "learning_rate": 3.4333522226014715e-06, "loss": 30.0489, "step": 320650 }, { "epoch": 0.6477534876392329, "grad_norm": 208.82249450683594, "learning_rate": 3.433020737970534e-06, "loss": 22.1956, "step": 320660 }, { "epoch": 0.6477736882719167, "grad_norm": 0.0, "learning_rate": 3.432689260976866e-06, "loss": 13.3992, "step": 320670 }, { "epoch": 0.6477938889046005, "grad_norm": 165.4186248779297, "learning_rate": 3.4323577916220773e-06, "loss": 24.343, "step": 320680 }, { "epoch": 0.6478140895372843, "grad_norm": 188.92584228515625, "learning_rate": 3.4320263299077877e-06, "loss": 34.2568, "step": 320690 }, { "epoch": 0.6478342901699681, "grad_norm": 365.12530517578125, "learning_rate": 3.4316948758356127e-06, "loss": 13.9312, "step": 320700 }, { "epoch": 0.6478544908026519, "grad_norm": 450.8101501464844, "learning_rate": 3.431363429407166e-06, "loss": 13.3519, "step": 320710 }, { "epoch": 0.6478746914353357, "grad_norm": 0.0, "learning_rate": 3.431031990624063e-06, "loss": 21.7189, "step": 320720 }, { "epoch": 0.6478948920680195, "grad_norm": 298.44384765625, "learning_rate": 3.4307005594879215e-06, "loss": 12.9082, "step": 320730 }, { "epoch": 0.6479150927007034, "grad_norm": 289.7355041503906, "learning_rate": 3.4303691360003533e-06, "loss": 15.8016, "step": 320740 }, { "epoch": 0.6479352933333872, "grad_norm": 552.3907470703125, "learning_rate": 3.4300377201629753e-06, "loss": 23.7424, "step": 320750 }, { "epoch": 0.647955493966071, "grad_norm": 211.20497131347656, "learning_rate": 3.4297063119774037e-06, "loss": 18.0398, "step": 320760 }, { "epoch": 0.6479756945987548, "grad_norm": 342.7974548339844, "learning_rate": 3.4293749114452546e-06, "loss": 27.4506, "step": 320770 }, { "epoch": 0.6479958952314386, "grad_norm": 224.96324157714844, "learning_rate": 3.4290435185681404e-06, "loss": 29.4553, "step": 320780 }, { "epoch": 0.6480160958641225, "grad_norm": 57.091758728027344, "learning_rate": 3.428712133347677e-06, "loss": 21.2605, "step": 320790 }, { "epoch": 0.6480362964968063, "grad_norm": 386.0959167480469, "learning_rate": 3.4283807557854814e-06, "loss": 23.3992, "step": 320800 }, { "epoch": 0.6480564971294901, "grad_norm": 362.60687255859375, "learning_rate": 3.4280493858831665e-06, "loss": 20.2135, "step": 320810 }, { "epoch": 0.6480766977621739, "grad_norm": 284.46649169921875, "learning_rate": 3.4277180236423467e-06, "loss": 17.9921, "step": 320820 }, { "epoch": 0.6480968983948577, "grad_norm": 167.98422241210938, "learning_rate": 3.427386669064643e-06, "loss": 22.459, "step": 320830 }, { "epoch": 0.6481170990275416, "grad_norm": 84.3082504272461, "learning_rate": 3.4270553221516618e-06, "loss": 16.4012, "step": 320840 }, { "epoch": 0.6481372996602254, "grad_norm": 278.8011169433594, "learning_rate": 3.426723982905023e-06, "loss": 24.3281, "step": 320850 }, { "epoch": 0.6481575002929092, "grad_norm": 0.0, "learning_rate": 3.4263926513263424e-06, "loss": 8.4972, "step": 320860 }, { "epoch": 0.648177700925593, "grad_norm": 65.4018325805664, "learning_rate": 3.4260613274172316e-06, "loss": 23.2382, "step": 320870 }, { "epoch": 0.6481979015582768, "grad_norm": 460.9776611328125, "learning_rate": 3.4257300111793073e-06, "loss": 27.79, "step": 320880 }, { "epoch": 0.6482181021909607, "grad_norm": 411.93359375, "learning_rate": 3.425398702614185e-06, "loss": 23.1866, "step": 320890 }, { "epoch": 0.6482383028236445, "grad_norm": 184.2687225341797, "learning_rate": 3.4250674017234774e-06, "loss": 14.8621, "step": 320900 }, { "epoch": 0.6482585034563283, "grad_norm": 465.2342529296875, "learning_rate": 3.4247361085087993e-06, "loss": 12.3505, "step": 320910 }, { "epoch": 0.6482787040890121, "grad_norm": 322.8871765136719, "learning_rate": 3.4244048229717676e-06, "loss": 22.381, "step": 320920 }, { "epoch": 0.6482989047216959, "grad_norm": 377.8033142089844, "learning_rate": 3.4240735451139963e-06, "loss": 23.5075, "step": 320930 }, { "epoch": 0.6483191053543798, "grad_norm": 384.82623291015625, "learning_rate": 3.4237422749370986e-06, "loss": 24.2414, "step": 320940 }, { "epoch": 0.6483393059870635, "grad_norm": 173.9273223876953, "learning_rate": 3.4234110124426893e-06, "loss": 12.2168, "step": 320950 }, { "epoch": 0.6483595066197473, "grad_norm": 38.5885124206543, "learning_rate": 3.4230797576323847e-06, "loss": 18.6576, "step": 320960 }, { "epoch": 0.6483797072524311, "grad_norm": 287.9279479980469, "learning_rate": 3.422748510507798e-06, "loss": 10.2657, "step": 320970 }, { "epoch": 0.6483999078851149, "grad_norm": 234.44705200195312, "learning_rate": 3.422417271070542e-06, "loss": 29.5507, "step": 320980 }, { "epoch": 0.6484201085177987, "grad_norm": 448.682861328125, "learning_rate": 3.4220860393222347e-06, "loss": 15.1717, "step": 320990 }, { "epoch": 0.6484403091504826, "grad_norm": 278.4072570800781, "learning_rate": 3.4217548152644887e-06, "loss": 12.6209, "step": 321000 }, { "epoch": 0.6484605097831664, "grad_norm": 94.63981628417969, "learning_rate": 3.4214235988989173e-06, "loss": 17.5855, "step": 321010 }, { "epoch": 0.6484807104158502, "grad_norm": 408.8166809082031, "learning_rate": 3.421092390227137e-06, "loss": 15.7114, "step": 321020 }, { "epoch": 0.648500911048534, "grad_norm": 39.559322357177734, "learning_rate": 3.42076118925076e-06, "loss": 15.8207, "step": 321030 }, { "epoch": 0.6485211116812178, "grad_norm": 535.9697265625, "learning_rate": 3.4204299959714006e-06, "loss": 17.1409, "step": 321040 }, { "epoch": 0.6485413123139017, "grad_norm": 144.4085235595703, "learning_rate": 3.4200988103906747e-06, "loss": 12.1061, "step": 321050 }, { "epoch": 0.6485615129465855, "grad_norm": 291.4454345703125, "learning_rate": 3.4197676325101965e-06, "loss": 17.6136, "step": 321060 }, { "epoch": 0.6485817135792693, "grad_norm": 292.8103332519531, "learning_rate": 3.419436462331578e-06, "loss": 16.5426, "step": 321070 }, { "epoch": 0.6486019142119531, "grad_norm": 135.78729248046875, "learning_rate": 3.4191052998564344e-06, "loss": 14.7225, "step": 321080 }, { "epoch": 0.6486221148446369, "grad_norm": 276.433349609375, "learning_rate": 3.4187741450863816e-06, "loss": 13.2902, "step": 321090 }, { "epoch": 0.6486423154773208, "grad_norm": 26.638858795166016, "learning_rate": 3.4184429980230305e-06, "loss": 10.3407, "step": 321100 }, { "epoch": 0.6486625161100046, "grad_norm": 477.56011962890625, "learning_rate": 3.418111858667995e-06, "loss": 27.0443, "step": 321110 }, { "epoch": 0.6486827167426884, "grad_norm": 421.36663818359375, "learning_rate": 3.4177807270228942e-06, "loss": 26.757, "step": 321120 }, { "epoch": 0.6487029173753722, "grad_norm": 368.3182067871094, "learning_rate": 3.4174496030893346e-06, "loss": 16.4876, "step": 321130 }, { "epoch": 0.648723118008056, "grad_norm": 867.3443603515625, "learning_rate": 3.4171184868689345e-06, "loss": 29.8134, "step": 321140 }, { "epoch": 0.6487433186407399, "grad_norm": 214.52784729003906, "learning_rate": 3.4167873783633087e-06, "loss": 15.293, "step": 321150 }, { "epoch": 0.6487635192734237, "grad_norm": 41.58778762817383, "learning_rate": 3.416456277574068e-06, "loss": 16.8226, "step": 321160 }, { "epoch": 0.6487837199061075, "grad_norm": 160.38673400878906, "learning_rate": 3.4161251845028264e-06, "loss": 9.4925, "step": 321170 }, { "epoch": 0.6488039205387913, "grad_norm": 247.2576446533203, "learning_rate": 3.4157940991512007e-06, "loss": 9.869, "step": 321180 }, { "epoch": 0.6488241211714751, "grad_norm": 350.81231689453125, "learning_rate": 3.4154630215208005e-06, "loss": 18.3456, "step": 321190 }, { "epoch": 0.6488443218041589, "grad_norm": 104.33922576904297, "learning_rate": 3.4151319516132414e-06, "loss": 23.0131, "step": 321200 }, { "epoch": 0.6488645224368427, "grad_norm": 403.7087097167969, "learning_rate": 3.4148008894301378e-06, "loss": 20.5104, "step": 321210 }, { "epoch": 0.6488847230695265, "grad_norm": 36.43886947631836, "learning_rate": 3.4144698349731025e-06, "loss": 8.6412, "step": 321220 }, { "epoch": 0.6489049237022103, "grad_norm": 347.2121887207031, "learning_rate": 3.4141387882437483e-06, "loss": 20.2782, "step": 321230 }, { "epoch": 0.6489251243348941, "grad_norm": 158.46954345703125, "learning_rate": 3.4138077492436896e-06, "loss": 19.3416, "step": 321240 }, { "epoch": 0.648945324967578, "grad_norm": 422.77252197265625, "learning_rate": 3.4134767179745404e-06, "loss": 19.5821, "step": 321250 }, { "epoch": 0.6489655256002618, "grad_norm": 105.5335693359375, "learning_rate": 3.4131456944379126e-06, "loss": 27.4482, "step": 321260 }, { "epoch": 0.6489857262329456, "grad_norm": 152.62051391601562, "learning_rate": 3.412814678635419e-06, "loss": 12.5996, "step": 321270 }, { "epoch": 0.6490059268656294, "grad_norm": 209.68710327148438, "learning_rate": 3.4124836705686765e-06, "loss": 38.549, "step": 321280 }, { "epoch": 0.6490261274983132, "grad_norm": 278.3358154296875, "learning_rate": 3.4121526702392938e-06, "loss": 12.1116, "step": 321290 }, { "epoch": 0.649046328130997, "grad_norm": 247.67758178710938, "learning_rate": 3.411821677648887e-06, "loss": 13.1859, "step": 321300 }, { "epoch": 0.6490665287636809, "grad_norm": 155.09056091308594, "learning_rate": 3.4114906927990697e-06, "loss": 18.5142, "step": 321310 }, { "epoch": 0.6490867293963647, "grad_norm": 221.20314025878906, "learning_rate": 3.4111597156914535e-06, "loss": 19.557, "step": 321320 }, { "epoch": 0.6491069300290485, "grad_norm": 390.0180969238281, "learning_rate": 3.4108287463276517e-06, "loss": 28.0897, "step": 321330 }, { "epoch": 0.6491271306617323, "grad_norm": 122.90689849853516, "learning_rate": 3.410497784709279e-06, "loss": 13.957, "step": 321340 }, { "epoch": 0.6491473312944162, "grad_norm": 169.58135986328125, "learning_rate": 3.4101668308379465e-06, "loss": 19.4486, "step": 321350 }, { "epoch": 0.6491675319271, "grad_norm": 8.644115447998047, "learning_rate": 3.409835884715267e-06, "loss": 12.8684, "step": 321360 }, { "epoch": 0.6491877325597838, "grad_norm": 76.62061309814453, "learning_rate": 3.4095049463428553e-06, "loss": 26.3374, "step": 321370 }, { "epoch": 0.6492079331924676, "grad_norm": 221.4744873046875, "learning_rate": 3.4091740157223253e-06, "loss": 15.919, "step": 321380 }, { "epoch": 0.6492281338251514, "grad_norm": 270.0904846191406, "learning_rate": 3.4088430928552863e-06, "loss": 16.3019, "step": 321390 }, { "epoch": 0.6492483344578353, "grad_norm": 350.4560852050781, "learning_rate": 3.4085121777433532e-06, "loss": 18.0888, "step": 321400 }, { "epoch": 0.6492685350905191, "grad_norm": 301.7528076171875, "learning_rate": 3.40818127038814e-06, "loss": 13.3618, "step": 321410 }, { "epoch": 0.6492887357232029, "grad_norm": 265.95513916015625, "learning_rate": 3.407850370791257e-06, "loss": 22.8676, "step": 321420 }, { "epoch": 0.6493089363558867, "grad_norm": 60.685909271240234, "learning_rate": 3.4075194789543174e-06, "loss": 11.4127, "step": 321430 }, { "epoch": 0.6493291369885705, "grad_norm": 216.17442321777344, "learning_rate": 3.407188594878938e-06, "loss": 9.1029, "step": 321440 }, { "epoch": 0.6493493376212544, "grad_norm": 491.58367919921875, "learning_rate": 3.4068577185667253e-06, "loss": 26.6141, "step": 321450 }, { "epoch": 0.6493695382539381, "grad_norm": 404.24798583984375, "learning_rate": 3.406526850019295e-06, "loss": 11.148, "step": 321460 }, { "epoch": 0.6493897388866219, "grad_norm": 28.388870239257812, "learning_rate": 3.4061959892382613e-06, "loss": 10.3503, "step": 321470 }, { "epoch": 0.6494099395193057, "grad_norm": 259.9290771484375, "learning_rate": 3.4058651362252337e-06, "loss": 14.7762, "step": 321480 }, { "epoch": 0.6494301401519895, "grad_norm": 447.7850646972656, "learning_rate": 3.4055342909818255e-06, "loss": 13.6164, "step": 321490 }, { "epoch": 0.6494503407846733, "grad_norm": 3.5834810733795166, "learning_rate": 3.40520345350965e-06, "loss": 21.7731, "step": 321500 }, { "epoch": 0.6494705414173572, "grad_norm": 28.949012756347656, "learning_rate": 3.4048726238103214e-06, "loss": 13.6307, "step": 321510 }, { "epoch": 0.649490742050041, "grad_norm": 97.5015640258789, "learning_rate": 3.404541801885448e-06, "loss": 24.3648, "step": 321520 }, { "epoch": 0.6495109426827248, "grad_norm": 228.18544006347656, "learning_rate": 3.4042109877366447e-06, "loss": 16.7979, "step": 321530 }, { "epoch": 0.6495311433154086, "grad_norm": 132.29238891601562, "learning_rate": 3.403880181365525e-06, "loss": 19.5102, "step": 321540 }, { "epoch": 0.6495513439480924, "grad_norm": 374.75030517578125, "learning_rate": 3.403549382773699e-06, "loss": 20.724, "step": 321550 }, { "epoch": 0.6495715445807763, "grad_norm": 182.02301025390625, "learning_rate": 3.4032185919627784e-06, "loss": 11.2051, "step": 321560 }, { "epoch": 0.6495917452134601, "grad_norm": 787.4822387695312, "learning_rate": 3.4028878089343784e-06, "loss": 27.5426, "step": 321570 }, { "epoch": 0.6496119458461439, "grad_norm": 396.4010314941406, "learning_rate": 3.402557033690109e-06, "loss": 40.9507, "step": 321580 }, { "epoch": 0.6496321464788277, "grad_norm": 884.9678955078125, "learning_rate": 3.4022262662315813e-06, "loss": 23.5371, "step": 321590 }, { "epoch": 0.6496523471115115, "grad_norm": 280.2633361816406, "learning_rate": 3.401895506560411e-06, "loss": 28.306, "step": 321600 }, { "epoch": 0.6496725477441954, "grad_norm": 153.69863891601562, "learning_rate": 3.401564754678207e-06, "loss": 7.9916, "step": 321610 }, { "epoch": 0.6496927483768792, "grad_norm": 71.65312957763672, "learning_rate": 3.401234010586583e-06, "loss": 12.791, "step": 321620 }, { "epoch": 0.649712949009563, "grad_norm": 527.1277465820312, "learning_rate": 3.4009032742871515e-06, "loss": 16.3878, "step": 321630 }, { "epoch": 0.6497331496422468, "grad_norm": 346.6917419433594, "learning_rate": 3.4005725457815225e-06, "loss": 16.6525, "step": 321640 }, { "epoch": 0.6497533502749306, "grad_norm": 271.9971008300781, "learning_rate": 3.400241825071309e-06, "loss": 17.1298, "step": 321650 }, { "epoch": 0.6497735509076145, "grad_norm": 97.93443298339844, "learning_rate": 3.3999111121581215e-06, "loss": 25.2223, "step": 321660 }, { "epoch": 0.6497937515402983, "grad_norm": 718.5128173828125, "learning_rate": 3.399580407043576e-06, "loss": 28.4354, "step": 321670 }, { "epoch": 0.6498139521729821, "grad_norm": 124.09073638916016, "learning_rate": 3.3992497097292786e-06, "loss": 20.5014, "step": 321680 }, { "epoch": 0.6498341528056659, "grad_norm": 0.0, "learning_rate": 3.3989190202168452e-06, "loss": 11.2301, "step": 321690 }, { "epoch": 0.6498543534383497, "grad_norm": 117.92030334472656, "learning_rate": 3.3985883385078875e-06, "loss": 16.4007, "step": 321700 }, { "epoch": 0.6498745540710335, "grad_norm": 171.7556915283203, "learning_rate": 3.398257664604015e-06, "loss": 15.6268, "step": 321710 }, { "epoch": 0.6498947547037173, "grad_norm": 264.80950927734375, "learning_rate": 3.3979269985068387e-06, "loss": 10.4463, "step": 321720 }, { "epoch": 0.6499149553364011, "grad_norm": 198.0549774169922, "learning_rate": 3.3975963402179756e-06, "loss": 19.4343, "step": 321730 }, { "epoch": 0.6499351559690849, "grad_norm": 111.02458190917969, "learning_rate": 3.39726568973903e-06, "loss": 30.8112, "step": 321740 }, { "epoch": 0.6499553566017687, "grad_norm": 832.3494873046875, "learning_rate": 3.396935047071619e-06, "loss": 28.9618, "step": 321750 }, { "epoch": 0.6499755572344526, "grad_norm": 710.1707763671875, "learning_rate": 3.3966044122173526e-06, "loss": 22.3302, "step": 321760 }, { "epoch": 0.6499957578671364, "grad_norm": 629.0470581054688, "learning_rate": 3.3962737851778406e-06, "loss": 6.0025, "step": 321770 }, { "epoch": 0.6500159584998202, "grad_norm": 27.751850128173828, "learning_rate": 3.3959431659546952e-06, "loss": 12.9521, "step": 321780 }, { "epoch": 0.650036159132504, "grad_norm": 282.68182373046875, "learning_rate": 3.39561255454953e-06, "loss": 16.2359, "step": 321790 }, { "epoch": 0.6500563597651878, "grad_norm": 341.8492126464844, "learning_rate": 3.3952819509639534e-06, "loss": 16.2149, "step": 321800 }, { "epoch": 0.6500765603978717, "grad_norm": 200.33517456054688, "learning_rate": 3.394951355199577e-06, "loss": 33.3638, "step": 321810 }, { "epoch": 0.6500967610305555, "grad_norm": 179.23341369628906, "learning_rate": 3.3946207672580144e-06, "loss": 22.3634, "step": 321820 }, { "epoch": 0.6501169616632393, "grad_norm": 148.15872192382812, "learning_rate": 3.3942901871408763e-06, "loss": 18.2931, "step": 321830 }, { "epoch": 0.6501371622959231, "grad_norm": 245.0345458984375, "learning_rate": 3.3939596148497717e-06, "loss": 9.7497, "step": 321840 }, { "epoch": 0.6501573629286069, "grad_norm": 78.28666687011719, "learning_rate": 3.3936290503863132e-06, "loss": 25.6003, "step": 321850 }, { "epoch": 0.6501775635612908, "grad_norm": 293.65216064453125, "learning_rate": 3.393298493752113e-06, "loss": 17.3127, "step": 321860 }, { "epoch": 0.6501977641939746, "grad_norm": 365.9211120605469, "learning_rate": 3.392967944948781e-06, "loss": 18.9226, "step": 321870 }, { "epoch": 0.6502179648266584, "grad_norm": 348.3866271972656, "learning_rate": 3.392637403977925e-06, "loss": 14.6757, "step": 321880 }, { "epoch": 0.6502381654593422, "grad_norm": 63.936851501464844, "learning_rate": 3.3923068708411645e-06, "loss": 23.6716, "step": 321890 }, { "epoch": 0.650258366092026, "grad_norm": 513.7722778320312, "learning_rate": 3.3919763455401016e-06, "loss": 19.2547, "step": 321900 }, { "epoch": 0.6502785667247098, "grad_norm": 249.60804748535156, "learning_rate": 3.391645828076352e-06, "loss": 16.6472, "step": 321910 }, { "epoch": 0.6502987673573937, "grad_norm": 309.6667175292969, "learning_rate": 3.391315318451527e-06, "loss": 14.967, "step": 321920 }, { "epoch": 0.6503189679900775, "grad_norm": 466.57989501953125, "learning_rate": 3.3909848166672343e-06, "loss": 20.1475, "step": 321930 }, { "epoch": 0.6503391686227613, "grad_norm": 355.3318786621094, "learning_rate": 3.3906543227250866e-06, "loss": 15.572, "step": 321940 }, { "epoch": 0.6503593692554451, "grad_norm": 710.9382934570312, "learning_rate": 3.3903238366266956e-06, "loss": 38.0077, "step": 321950 }, { "epoch": 0.650379569888129, "grad_norm": 60.629371643066406, "learning_rate": 3.38999335837367e-06, "loss": 11.3643, "step": 321960 }, { "epoch": 0.6503997705208127, "grad_norm": 136.39584350585938, "learning_rate": 3.389662887967621e-06, "loss": 16.3169, "step": 321970 }, { "epoch": 0.6504199711534965, "grad_norm": 401.38525390625, "learning_rate": 3.38933242541016e-06, "loss": 16.9265, "step": 321980 }, { "epoch": 0.6504401717861803, "grad_norm": 311.34747314453125, "learning_rate": 3.3890019707028987e-06, "loss": 16.4785, "step": 321990 }, { "epoch": 0.6504603724188641, "grad_norm": 229.0671844482422, "learning_rate": 3.3886715238474454e-06, "loss": 14.3879, "step": 322000 }, { "epoch": 0.6504805730515479, "grad_norm": 214.28990173339844, "learning_rate": 3.388341084845411e-06, "loss": 11.573, "step": 322010 }, { "epoch": 0.6505007736842318, "grad_norm": 113.25912475585938, "learning_rate": 3.388010653698409e-06, "loss": 10.6946, "step": 322020 }, { "epoch": 0.6505209743169156, "grad_norm": 431.58746337890625, "learning_rate": 3.3876802304080457e-06, "loss": 21.2362, "step": 322030 }, { "epoch": 0.6505411749495994, "grad_norm": 382.2159729003906, "learning_rate": 3.3873498149759325e-06, "loss": 10.3258, "step": 322040 }, { "epoch": 0.6505613755822832, "grad_norm": 256.9619445800781, "learning_rate": 3.3870194074036846e-06, "loss": 14.818, "step": 322050 }, { "epoch": 0.650581576214967, "grad_norm": 405.4591064453125, "learning_rate": 3.3866890076929036e-06, "loss": 20.6534, "step": 322060 }, { "epoch": 0.6506017768476509, "grad_norm": 111.41635131835938, "learning_rate": 3.3863586158452074e-06, "loss": 21.3111, "step": 322070 }, { "epoch": 0.6506219774803347, "grad_norm": 598.8419189453125, "learning_rate": 3.386028231862204e-06, "loss": 32.5114, "step": 322080 }, { "epoch": 0.6506421781130185, "grad_norm": 458.8524475097656, "learning_rate": 3.385697855745502e-06, "loss": 29.6846, "step": 322090 }, { "epoch": 0.6506623787457023, "grad_norm": 183.38121032714844, "learning_rate": 3.3853674874967134e-06, "loss": 33.8271, "step": 322100 }, { "epoch": 0.6506825793783861, "grad_norm": 472.6897888183594, "learning_rate": 3.3850371271174465e-06, "loss": 26.9158, "step": 322110 }, { "epoch": 0.65070278001107, "grad_norm": 219.9744110107422, "learning_rate": 3.384706774609316e-06, "loss": 20.7244, "step": 322120 }, { "epoch": 0.6507229806437538, "grad_norm": 336.4977722167969, "learning_rate": 3.3843764299739258e-06, "loss": 19.064, "step": 322130 }, { "epoch": 0.6507431812764376, "grad_norm": 0.0, "learning_rate": 3.3840460932128894e-06, "loss": 9.0769, "step": 322140 }, { "epoch": 0.6507633819091214, "grad_norm": 109.93680572509766, "learning_rate": 3.3837157643278173e-06, "loss": 29.2693, "step": 322150 }, { "epoch": 0.6507835825418052, "grad_norm": 329.5294494628906, "learning_rate": 3.3833854433203185e-06, "loss": 13.0824, "step": 322160 }, { "epoch": 0.6508037831744891, "grad_norm": 39.64339828491211, "learning_rate": 3.3830551301920024e-06, "loss": 24.5051, "step": 322170 }, { "epoch": 0.6508239838071729, "grad_norm": 54.77452850341797, "learning_rate": 3.38272482494448e-06, "loss": 17.04, "step": 322180 }, { "epoch": 0.6508441844398567, "grad_norm": 246.82981872558594, "learning_rate": 3.38239452757936e-06, "loss": 17.3043, "step": 322190 }, { "epoch": 0.6508643850725405, "grad_norm": 253.42015075683594, "learning_rate": 3.3820642380982527e-06, "loss": 31.8814, "step": 322200 }, { "epoch": 0.6508845857052243, "grad_norm": 225.7999725341797, "learning_rate": 3.38173395650277e-06, "loss": 12.907, "step": 322210 }, { "epoch": 0.650904786337908, "grad_norm": 110.16376495361328, "learning_rate": 3.3814036827945173e-06, "loss": 10.8874, "step": 322220 }, { "epoch": 0.6509249869705919, "grad_norm": 274.7072448730469, "learning_rate": 3.3810734169751075e-06, "loss": 22.9886, "step": 322230 }, { "epoch": 0.6509451876032757, "grad_norm": 280.6270751953125, "learning_rate": 3.3807431590461502e-06, "loss": 12.6438, "step": 322240 }, { "epoch": 0.6509653882359595, "grad_norm": 161.29458618164062, "learning_rate": 3.3804129090092542e-06, "loss": 14.1946, "step": 322250 }, { "epoch": 0.6509855888686433, "grad_norm": 240.7366180419922, "learning_rate": 3.3800826668660286e-06, "loss": 16.6152, "step": 322260 }, { "epoch": 0.6510057895013271, "grad_norm": 309.955322265625, "learning_rate": 3.3797524326180825e-06, "loss": 19.1124, "step": 322270 }, { "epoch": 0.651025990134011, "grad_norm": 417.1963806152344, "learning_rate": 3.379422206267029e-06, "loss": 20.7022, "step": 322280 }, { "epoch": 0.6510461907666948, "grad_norm": 369.1255798339844, "learning_rate": 3.3790919878144737e-06, "loss": 15.3813, "step": 322290 }, { "epoch": 0.6510663913993786, "grad_norm": 139.55157470703125, "learning_rate": 3.378761777262028e-06, "loss": 23.0452, "step": 322300 }, { "epoch": 0.6510865920320624, "grad_norm": 455.1922607421875, "learning_rate": 3.3784315746113017e-06, "loss": 18.7418, "step": 322310 }, { "epoch": 0.6511067926647462, "grad_norm": 211.1398162841797, "learning_rate": 3.378101379863902e-06, "loss": 20.2303, "step": 322320 }, { "epoch": 0.6511269932974301, "grad_norm": 441.3922424316406, "learning_rate": 3.377771193021439e-06, "loss": 14.6408, "step": 322330 }, { "epoch": 0.6511471939301139, "grad_norm": 376.42218017578125, "learning_rate": 3.377441014085524e-06, "loss": 20.3478, "step": 322340 }, { "epoch": 0.6511673945627977, "grad_norm": 441.7045593261719, "learning_rate": 3.3771108430577624e-06, "loss": 12.7798, "step": 322350 }, { "epoch": 0.6511875951954815, "grad_norm": 350.40350341796875, "learning_rate": 3.376780679939767e-06, "loss": 10.8654, "step": 322360 }, { "epoch": 0.6512077958281653, "grad_norm": 130.030517578125, "learning_rate": 3.376450524733147e-06, "loss": 19.3038, "step": 322370 }, { "epoch": 0.6512279964608492, "grad_norm": 388.2613830566406, "learning_rate": 3.3761203774395083e-06, "loss": 16.1681, "step": 322380 }, { "epoch": 0.651248197093533, "grad_norm": 374.2757873535156, "learning_rate": 3.3757902380604624e-06, "loss": 13.8915, "step": 322390 }, { "epoch": 0.6512683977262168, "grad_norm": 138.20362854003906, "learning_rate": 3.375460106597619e-06, "loss": 19.2703, "step": 322400 }, { "epoch": 0.6512885983589006, "grad_norm": 185.23121643066406, "learning_rate": 3.375129983052585e-06, "loss": 24.7281, "step": 322410 }, { "epoch": 0.6513087989915844, "grad_norm": 36.48655319213867, "learning_rate": 3.3747998674269693e-06, "loss": 20.505, "step": 322420 }, { "epoch": 0.6513289996242683, "grad_norm": 197.18927001953125, "learning_rate": 3.374469759722383e-06, "loss": 11.4508, "step": 322430 }, { "epoch": 0.6513492002569521, "grad_norm": 380.3363952636719, "learning_rate": 3.3741396599404353e-06, "loss": 16.805, "step": 322440 }, { "epoch": 0.6513694008896359, "grad_norm": 162.93756103515625, "learning_rate": 3.373809568082732e-06, "loss": 10.8471, "step": 322450 }, { "epoch": 0.6513896015223197, "grad_norm": 424.0399169921875, "learning_rate": 3.3734794841508838e-06, "loss": 17.1178, "step": 322460 }, { "epoch": 0.6514098021550035, "grad_norm": 168.81292724609375, "learning_rate": 3.3731494081465003e-06, "loss": 11.5668, "step": 322470 }, { "epoch": 0.6514300027876873, "grad_norm": 89.69908142089844, "learning_rate": 3.3728193400711884e-06, "loss": 10.4391, "step": 322480 }, { "epoch": 0.6514502034203711, "grad_norm": 0.0, "learning_rate": 3.3724892799265567e-06, "loss": 13.3689, "step": 322490 }, { "epoch": 0.6514704040530549, "grad_norm": 615.532470703125, "learning_rate": 3.372159227714218e-06, "loss": 20.0965, "step": 322500 }, { "epoch": 0.6514906046857387, "grad_norm": 119.39100646972656, "learning_rate": 3.371829183435775e-06, "loss": 23.8562, "step": 322510 }, { "epoch": 0.6515108053184225, "grad_norm": 246.71632385253906, "learning_rate": 3.3714991470928393e-06, "loss": 26.6279, "step": 322520 }, { "epoch": 0.6515310059511064, "grad_norm": 179.3936004638672, "learning_rate": 3.371169118687021e-06, "loss": 19.5887, "step": 322530 }, { "epoch": 0.6515512065837902, "grad_norm": 45.47096633911133, "learning_rate": 3.370839098219926e-06, "loss": 13.0715, "step": 322540 }, { "epoch": 0.651571407216474, "grad_norm": 278.6761169433594, "learning_rate": 3.3705090856931626e-06, "loss": 13.2687, "step": 322550 }, { "epoch": 0.6515916078491578, "grad_norm": 174.07553100585938, "learning_rate": 3.37017908110834e-06, "loss": 15.8201, "step": 322560 }, { "epoch": 0.6516118084818416, "grad_norm": 275.45318603515625, "learning_rate": 3.3698490844670693e-06, "loss": 11.6687, "step": 322570 }, { "epoch": 0.6516320091145255, "grad_norm": 266.3797607421875, "learning_rate": 3.3695190957709546e-06, "loss": 18.1456, "step": 322580 }, { "epoch": 0.6516522097472093, "grad_norm": 218.54225158691406, "learning_rate": 3.369189115021606e-06, "loss": 15.7091, "step": 322590 }, { "epoch": 0.6516724103798931, "grad_norm": 87.33358764648438, "learning_rate": 3.3688591422206333e-06, "loss": 16.8523, "step": 322600 }, { "epoch": 0.6516926110125769, "grad_norm": 148.19064331054688, "learning_rate": 3.3685291773696425e-06, "loss": 18.2298, "step": 322610 }, { "epoch": 0.6517128116452607, "grad_norm": 143.50933837890625, "learning_rate": 3.3681992204702425e-06, "loss": 12.0693, "step": 322620 }, { "epoch": 0.6517330122779446, "grad_norm": 121.80940246582031, "learning_rate": 3.3678692715240423e-06, "loss": 15.2746, "step": 322630 }, { "epoch": 0.6517532129106284, "grad_norm": 183.27291870117188, "learning_rate": 3.3675393305326487e-06, "loss": 11.4373, "step": 322640 }, { "epoch": 0.6517734135433122, "grad_norm": 758.2548217773438, "learning_rate": 3.367209397497669e-06, "loss": 21.9433, "step": 322650 }, { "epoch": 0.651793614175996, "grad_norm": 206.05531311035156, "learning_rate": 3.3668794724207153e-06, "loss": 17.3279, "step": 322660 }, { "epoch": 0.6518138148086798, "grad_norm": 95.45826721191406, "learning_rate": 3.3665495553033913e-06, "loss": 9.3516, "step": 322670 }, { "epoch": 0.6518340154413637, "grad_norm": 244.05368041992188, "learning_rate": 3.366219646147306e-06, "loss": 14.3465, "step": 322680 }, { "epoch": 0.6518542160740475, "grad_norm": 39.76646041870117, "learning_rate": 3.3658897449540707e-06, "loss": 17.9458, "step": 322690 }, { "epoch": 0.6518744167067313, "grad_norm": 110.48297882080078, "learning_rate": 3.3655598517252886e-06, "loss": 13.1673, "step": 322700 }, { "epoch": 0.6518946173394151, "grad_norm": 197.38754272460938, "learning_rate": 3.36522996646257e-06, "loss": 16.3914, "step": 322710 }, { "epoch": 0.6519148179720989, "grad_norm": 137.74668884277344, "learning_rate": 3.364900089167521e-06, "loss": 24.8601, "step": 322720 }, { "epoch": 0.6519350186047828, "grad_norm": 556.6971435546875, "learning_rate": 3.364570219841753e-06, "loss": 20.2775, "step": 322730 }, { "epoch": 0.6519552192374665, "grad_norm": 379.2054748535156, "learning_rate": 3.3642403584868694e-06, "loss": 23.0455, "step": 322740 }, { "epoch": 0.6519754198701503, "grad_norm": 481.7650451660156, "learning_rate": 3.3639105051044807e-06, "loss": 17.9298, "step": 322750 }, { "epoch": 0.6519956205028341, "grad_norm": 256.6677551269531, "learning_rate": 3.363580659696194e-06, "loss": 17.4863, "step": 322760 }, { "epoch": 0.6520158211355179, "grad_norm": 49.91913986206055, "learning_rate": 3.3632508222636163e-06, "loss": 18.1609, "step": 322770 }, { "epoch": 0.6520360217682017, "grad_norm": 293.44732666015625, "learning_rate": 3.362920992808356e-06, "loss": 12.8964, "step": 322780 }, { "epoch": 0.6520562224008856, "grad_norm": 173.57369995117188, "learning_rate": 3.3625911713320205e-06, "loss": 18.2581, "step": 322790 }, { "epoch": 0.6520764230335694, "grad_norm": 211.70343017578125, "learning_rate": 3.3622613578362162e-06, "loss": 14.6965, "step": 322800 }, { "epoch": 0.6520966236662532, "grad_norm": 394.8924865722656, "learning_rate": 3.3619315523225506e-06, "loss": 27.0308, "step": 322810 }, { "epoch": 0.652116824298937, "grad_norm": 237.7058868408203, "learning_rate": 3.361601754792635e-06, "loss": 19.2496, "step": 322820 }, { "epoch": 0.6521370249316208, "grad_norm": 120.5327377319336, "learning_rate": 3.3612719652480707e-06, "loss": 19.5002, "step": 322830 }, { "epoch": 0.6521572255643047, "grad_norm": 341.5913391113281, "learning_rate": 3.3609421836904688e-06, "loss": 20.662, "step": 322840 }, { "epoch": 0.6521774261969885, "grad_norm": 306.39447021484375, "learning_rate": 3.360612410121438e-06, "loss": 19.1723, "step": 322850 }, { "epoch": 0.6521976268296723, "grad_norm": 435.1361999511719, "learning_rate": 3.3602826445425805e-06, "loss": 29.9077, "step": 322860 }, { "epoch": 0.6522178274623561, "grad_norm": 212.20249938964844, "learning_rate": 3.3599528869555074e-06, "loss": 15.7556, "step": 322870 }, { "epoch": 0.65223802809504, "grad_norm": 134.6806640625, "learning_rate": 3.359623137361825e-06, "loss": 20.9005, "step": 322880 }, { "epoch": 0.6522582287277238, "grad_norm": 135.64891052246094, "learning_rate": 3.3592933957631424e-06, "loss": 36.3743, "step": 322890 }, { "epoch": 0.6522784293604076, "grad_norm": 5.713158130645752, "learning_rate": 3.358963662161062e-06, "loss": 11.4175, "step": 322900 }, { "epoch": 0.6522986299930914, "grad_norm": 304.3985900878906, "learning_rate": 3.358633936557195e-06, "loss": 22.2135, "step": 322910 }, { "epoch": 0.6523188306257752, "grad_norm": 224.18211364746094, "learning_rate": 3.358304218953148e-06, "loss": 25.6721, "step": 322920 }, { "epoch": 0.652339031258459, "grad_norm": 293.67486572265625, "learning_rate": 3.3579745093505256e-06, "loss": 22.6967, "step": 322930 }, { "epoch": 0.6523592318911429, "grad_norm": 129.72047424316406, "learning_rate": 3.3576448077509373e-06, "loss": 27.4594, "step": 322940 }, { "epoch": 0.6523794325238267, "grad_norm": 264.858642578125, "learning_rate": 3.357315114155989e-06, "loss": 20.9233, "step": 322950 }, { "epoch": 0.6523996331565105, "grad_norm": 403.5821533203125, "learning_rate": 3.356985428567287e-06, "loss": 36.5965, "step": 322960 }, { "epoch": 0.6524198337891943, "grad_norm": 656.4111938476562, "learning_rate": 3.3566557509864374e-06, "loss": 15.4534, "step": 322970 }, { "epoch": 0.6524400344218781, "grad_norm": 175.22967529296875, "learning_rate": 3.3563260814150512e-06, "loss": 19.7808, "step": 322980 }, { "epoch": 0.6524602350545619, "grad_norm": 382.2796630859375, "learning_rate": 3.3559964198547307e-06, "loss": 21.0566, "step": 322990 }, { "epoch": 0.6524804356872457, "grad_norm": 376.31439208984375, "learning_rate": 3.355666766307084e-06, "loss": 19.8586, "step": 323000 }, { "epoch": 0.6525006363199295, "grad_norm": 274.1259460449219, "learning_rate": 3.3553371207737183e-06, "loss": 13.7132, "step": 323010 }, { "epoch": 0.6525208369526133, "grad_norm": 200.595458984375, "learning_rate": 3.3550074832562417e-06, "loss": 20.6732, "step": 323020 }, { "epoch": 0.6525410375852971, "grad_norm": 616.9058837890625, "learning_rate": 3.3546778537562563e-06, "loss": 17.73, "step": 323030 }, { "epoch": 0.652561238217981, "grad_norm": 214.40284729003906, "learning_rate": 3.354348232275373e-06, "loss": 16.9974, "step": 323040 }, { "epoch": 0.6525814388506648, "grad_norm": 361.7709655761719, "learning_rate": 3.3540186188151976e-06, "loss": 30.9759, "step": 323050 }, { "epoch": 0.6526016394833486, "grad_norm": 337.248046875, "learning_rate": 3.3536890133773346e-06, "loss": 15.4961, "step": 323060 }, { "epoch": 0.6526218401160324, "grad_norm": 140.04551696777344, "learning_rate": 3.353359415963392e-06, "loss": 10.5733, "step": 323070 }, { "epoch": 0.6526420407487162, "grad_norm": 3.20036244392395, "learning_rate": 3.353029826574977e-06, "loss": 19.4072, "step": 323080 }, { "epoch": 0.6526622413814, "grad_norm": 392.67950439453125, "learning_rate": 3.352700245213693e-06, "loss": 18.8849, "step": 323090 }, { "epoch": 0.6526824420140839, "grad_norm": 69.2968521118164, "learning_rate": 3.352370671881148e-06, "loss": 11.9742, "step": 323100 }, { "epoch": 0.6527026426467677, "grad_norm": 214.62078857421875, "learning_rate": 3.3520411065789513e-06, "loss": 27.0314, "step": 323110 }, { "epoch": 0.6527228432794515, "grad_norm": 325.4024353027344, "learning_rate": 3.3517115493087036e-06, "loss": 21.3129, "step": 323120 }, { "epoch": 0.6527430439121353, "grad_norm": 322.307373046875, "learning_rate": 3.3513820000720145e-06, "loss": 24.7427, "step": 323130 }, { "epoch": 0.6527632445448192, "grad_norm": 287.1816711425781, "learning_rate": 3.3510524588704908e-06, "loss": 22.192, "step": 323140 }, { "epoch": 0.652783445177503, "grad_norm": 201.15943908691406, "learning_rate": 3.350722925705736e-06, "loss": 12.3711, "step": 323150 }, { "epoch": 0.6528036458101868, "grad_norm": 39.04935836791992, "learning_rate": 3.350393400579358e-06, "loss": 16.9133, "step": 323160 }, { "epoch": 0.6528238464428706, "grad_norm": 244.882568359375, "learning_rate": 3.350063883492962e-06, "loss": 17.7549, "step": 323170 }, { "epoch": 0.6528440470755544, "grad_norm": 0.0, "learning_rate": 3.349734374448157e-06, "loss": 10.4934, "step": 323180 }, { "epoch": 0.6528642477082383, "grad_norm": 563.5009155273438, "learning_rate": 3.3494048734465433e-06, "loss": 17.1558, "step": 323190 }, { "epoch": 0.6528844483409221, "grad_norm": 262.3085021972656, "learning_rate": 3.3490753804897315e-06, "loss": 14.0554, "step": 323200 }, { "epoch": 0.6529046489736059, "grad_norm": 205.68675231933594, "learning_rate": 3.3487458955793273e-06, "loss": 16.2493, "step": 323210 }, { "epoch": 0.6529248496062897, "grad_norm": 264.7431640625, "learning_rate": 3.3484164187169334e-06, "loss": 20.8426, "step": 323220 }, { "epoch": 0.6529450502389735, "grad_norm": 309.40838623046875, "learning_rate": 3.348086949904158e-06, "loss": 19.369, "step": 323230 }, { "epoch": 0.6529652508716574, "grad_norm": 10.928141593933105, "learning_rate": 3.347757489142608e-06, "loss": 21.4588, "step": 323240 }, { "epoch": 0.6529854515043411, "grad_norm": 241.86314392089844, "learning_rate": 3.347428036433886e-06, "loss": 10.9569, "step": 323250 }, { "epoch": 0.6530056521370249, "grad_norm": 334.70758056640625, "learning_rate": 3.3470985917795983e-06, "loss": 13.9922, "step": 323260 }, { "epoch": 0.6530258527697087, "grad_norm": 249.0175323486328, "learning_rate": 3.3467691551813547e-06, "loss": 16.9679, "step": 323270 }, { "epoch": 0.6530460534023925, "grad_norm": 197.69639587402344, "learning_rate": 3.3464397266407543e-06, "loss": 23.3112, "step": 323280 }, { "epoch": 0.6530662540350763, "grad_norm": 312.18829345703125, "learning_rate": 3.346110306159408e-06, "loss": 20.1349, "step": 323290 }, { "epoch": 0.6530864546677602, "grad_norm": 388.5511169433594, "learning_rate": 3.34578089373892e-06, "loss": 12.9114, "step": 323300 }, { "epoch": 0.653106655300444, "grad_norm": 6.705295562744141, "learning_rate": 3.3454514893808943e-06, "loss": 19.9051, "step": 323310 }, { "epoch": 0.6531268559331278, "grad_norm": 574.5878295898438, "learning_rate": 3.3451220930869377e-06, "loss": 14.485, "step": 323320 }, { "epoch": 0.6531470565658116, "grad_norm": 162.64161682128906, "learning_rate": 3.3447927048586538e-06, "loss": 15.9376, "step": 323330 }, { "epoch": 0.6531672571984954, "grad_norm": 318.2919616699219, "learning_rate": 3.3444633246976526e-06, "loss": 11.5188, "step": 323340 }, { "epoch": 0.6531874578311793, "grad_norm": 107.16033172607422, "learning_rate": 3.344133952605534e-06, "loss": 8.3864, "step": 323350 }, { "epoch": 0.6532076584638631, "grad_norm": 412.8953552246094, "learning_rate": 3.3438045885839053e-06, "loss": 17.2828, "step": 323360 }, { "epoch": 0.6532278590965469, "grad_norm": 66.33267974853516, "learning_rate": 3.3434752326343745e-06, "loss": 14.378, "step": 323370 }, { "epoch": 0.6532480597292307, "grad_norm": 128.6493682861328, "learning_rate": 3.343145884758543e-06, "loss": 21.2206, "step": 323380 }, { "epoch": 0.6532682603619145, "grad_norm": 651.96142578125, "learning_rate": 3.3428165449580174e-06, "loss": 14.0828, "step": 323390 }, { "epoch": 0.6532884609945984, "grad_norm": 219.1110076904297, "learning_rate": 3.3424872132344044e-06, "loss": 14.634, "step": 323400 }, { "epoch": 0.6533086616272822, "grad_norm": 115.49552917480469, "learning_rate": 3.3421578895893058e-06, "loss": 20.6429, "step": 323410 }, { "epoch": 0.653328862259966, "grad_norm": 211.14852905273438, "learning_rate": 3.3418285740243285e-06, "loss": 21.1357, "step": 323420 }, { "epoch": 0.6533490628926498, "grad_norm": 275.0884704589844, "learning_rate": 3.3414992665410806e-06, "loss": 16.6964, "step": 323430 }, { "epoch": 0.6533692635253336, "grad_norm": 36.277767181396484, "learning_rate": 3.34116996714116e-06, "loss": 12.615, "step": 323440 }, { "epoch": 0.6533894641580175, "grad_norm": 0.0, "learning_rate": 3.340840675826178e-06, "loss": 21.7612, "step": 323450 }, { "epoch": 0.6534096647907013, "grad_norm": 457.3116760253906, "learning_rate": 3.3405113925977383e-06, "loss": 24.0764, "step": 323460 }, { "epoch": 0.6534298654233851, "grad_norm": 170.75064086914062, "learning_rate": 3.340182117457443e-06, "loss": 19.7988, "step": 323470 }, { "epoch": 0.6534500660560689, "grad_norm": 230.8992462158203, "learning_rate": 3.3398528504068996e-06, "loss": 21.9843, "step": 323480 }, { "epoch": 0.6534702666887527, "grad_norm": 549.5276489257812, "learning_rate": 3.3395235914477104e-06, "loss": 33.4655, "step": 323490 }, { "epoch": 0.6534904673214365, "grad_norm": 159.0146026611328, "learning_rate": 3.339194340581485e-06, "loss": 25.9263, "step": 323500 }, { "epoch": 0.6535106679541203, "grad_norm": 94.80084228515625, "learning_rate": 3.3388650978098218e-06, "loss": 19.7699, "step": 323510 }, { "epoch": 0.6535308685868041, "grad_norm": 97.40782165527344, "learning_rate": 3.33853586313433e-06, "loss": 11.7635, "step": 323520 }, { "epoch": 0.6535510692194879, "grad_norm": 48.90718460083008, "learning_rate": 3.3382066365566133e-06, "loss": 11.767, "step": 323530 }, { "epoch": 0.6535712698521717, "grad_norm": 220.61534118652344, "learning_rate": 3.337877418078276e-06, "loss": 28.8489, "step": 323540 }, { "epoch": 0.6535914704848556, "grad_norm": 89.07369232177734, "learning_rate": 3.3375482077009213e-06, "loss": 23.6548, "step": 323550 }, { "epoch": 0.6536116711175394, "grad_norm": 313.00177001953125, "learning_rate": 3.3372190054261565e-06, "loss": 27.4134, "step": 323560 }, { "epoch": 0.6536318717502232, "grad_norm": 413.0423583984375, "learning_rate": 3.3368898112555843e-06, "loss": 21.0943, "step": 323570 }, { "epoch": 0.653652072382907, "grad_norm": 237.40223693847656, "learning_rate": 3.336560625190808e-06, "loss": 9.6915, "step": 323580 }, { "epoch": 0.6536722730155908, "grad_norm": 54.702293395996094, "learning_rate": 3.3362314472334353e-06, "loss": 13.0594, "step": 323590 }, { "epoch": 0.6536924736482747, "grad_norm": 306.16107177734375, "learning_rate": 3.3359022773850673e-06, "loss": 19.1068, "step": 323600 }, { "epoch": 0.6537126742809585, "grad_norm": 0.911095380783081, "learning_rate": 3.3355731156473105e-06, "loss": 16.7411, "step": 323610 }, { "epoch": 0.6537328749136423, "grad_norm": 86.71841430664062, "learning_rate": 3.335243962021768e-06, "loss": 20.8605, "step": 323620 }, { "epoch": 0.6537530755463261, "grad_norm": 292.8476257324219, "learning_rate": 3.334914816510046e-06, "loss": 17.4392, "step": 323630 }, { "epoch": 0.6537732761790099, "grad_norm": 630.5166625976562, "learning_rate": 3.3345856791137456e-06, "loss": 24.1279, "step": 323640 }, { "epoch": 0.6537934768116938, "grad_norm": 373.912841796875, "learning_rate": 3.334256549834472e-06, "loss": 18.4019, "step": 323650 }, { "epoch": 0.6538136774443776, "grad_norm": 356.7312927246094, "learning_rate": 3.333927428673832e-06, "loss": 18.8948, "step": 323660 }, { "epoch": 0.6538338780770614, "grad_norm": 41.01976013183594, "learning_rate": 3.333598315633426e-06, "loss": 10.5616, "step": 323670 }, { "epoch": 0.6538540787097452, "grad_norm": 211.14559936523438, "learning_rate": 3.3332692107148605e-06, "loss": 15.5211, "step": 323680 }, { "epoch": 0.653874279342429, "grad_norm": 105.25326538085938, "learning_rate": 3.3329401139197393e-06, "loss": 15.7409, "step": 323690 }, { "epoch": 0.6538944799751129, "grad_norm": 213.9519500732422, "learning_rate": 3.3326110252496652e-06, "loss": 14.8658, "step": 323700 }, { "epoch": 0.6539146806077967, "grad_norm": 179.0045928955078, "learning_rate": 3.3322819447062417e-06, "loss": 8.3534, "step": 323710 }, { "epoch": 0.6539348812404805, "grad_norm": 339.2384033203125, "learning_rate": 3.3319528722910767e-06, "loss": 28.0546, "step": 323720 }, { "epoch": 0.6539550818731643, "grad_norm": 455.1729736328125, "learning_rate": 3.3316238080057674e-06, "loss": 34.3696, "step": 323730 }, { "epoch": 0.6539752825058481, "grad_norm": 221.44630432128906, "learning_rate": 3.3312947518519228e-06, "loss": 17.1809, "step": 323740 }, { "epoch": 0.653995483138532, "grad_norm": 309.9968566894531, "learning_rate": 3.330965703831146e-06, "loss": 14.7928, "step": 323750 }, { "epoch": 0.6540156837712157, "grad_norm": 491.01611328125, "learning_rate": 3.3306366639450394e-06, "loss": 18.8377, "step": 323760 }, { "epoch": 0.6540358844038995, "grad_norm": 199.42874145507812, "learning_rate": 3.3303076321952066e-06, "loss": 13.8255, "step": 323770 }, { "epoch": 0.6540560850365833, "grad_norm": 406.9206848144531, "learning_rate": 3.329978608583252e-06, "loss": 12.3577, "step": 323780 }, { "epoch": 0.6540762856692671, "grad_norm": 151.03260803222656, "learning_rate": 3.329649593110781e-06, "loss": 11.9081, "step": 323790 }, { "epoch": 0.6540964863019509, "grad_norm": 270.6524963378906, "learning_rate": 3.3293205857793924e-06, "loss": 20.4379, "step": 323800 }, { "epoch": 0.6541166869346348, "grad_norm": 231.40188598632812, "learning_rate": 3.328991586590694e-06, "loss": 15.0756, "step": 323810 }, { "epoch": 0.6541368875673186, "grad_norm": 430.41937255859375, "learning_rate": 3.328662595546289e-06, "loss": 24.143, "step": 323820 }, { "epoch": 0.6541570882000024, "grad_norm": 365.3612976074219, "learning_rate": 3.3283336126477785e-06, "loss": 16.8604, "step": 323830 }, { "epoch": 0.6541772888326862, "grad_norm": 316.44805908203125, "learning_rate": 3.3280046378967673e-06, "loss": 18.2242, "step": 323840 }, { "epoch": 0.65419748946537, "grad_norm": 202.0590057373047, "learning_rate": 3.32767567129486e-06, "loss": 17.8001, "step": 323850 }, { "epoch": 0.6542176900980539, "grad_norm": 43.72981643676758, "learning_rate": 3.3273467128436575e-06, "loss": 9.0989, "step": 323860 }, { "epoch": 0.6542378907307377, "grad_norm": 250.11453247070312, "learning_rate": 3.3270177625447632e-06, "loss": 21.8141, "step": 323870 }, { "epoch": 0.6542580913634215, "grad_norm": 457.7359313964844, "learning_rate": 3.326688820399784e-06, "loss": 17.2255, "step": 323880 }, { "epoch": 0.6542782919961053, "grad_norm": 113.73314666748047, "learning_rate": 3.326359886410318e-06, "loss": 12.2093, "step": 323890 }, { "epoch": 0.6542984926287891, "grad_norm": 305.7065734863281, "learning_rate": 3.3260309605779717e-06, "loss": 21.0384, "step": 323900 }, { "epoch": 0.654318693261473, "grad_norm": 311.6148986816406, "learning_rate": 3.3257020429043485e-06, "loss": 14.813, "step": 323910 }, { "epoch": 0.6543388938941568, "grad_norm": 174.5467987060547, "learning_rate": 3.32537313339105e-06, "loss": 20.0505, "step": 323920 }, { "epoch": 0.6543590945268406, "grad_norm": 213.63119506835938, "learning_rate": 3.325044232039679e-06, "loss": 18.5961, "step": 323930 }, { "epoch": 0.6543792951595244, "grad_norm": 290.2833251953125, "learning_rate": 3.3247153388518387e-06, "loss": 28.0642, "step": 323940 }, { "epoch": 0.6543994957922082, "grad_norm": 402.0972900390625, "learning_rate": 3.3243864538291358e-06, "loss": 10.5388, "step": 323950 }, { "epoch": 0.6544196964248921, "grad_norm": 494.8009033203125, "learning_rate": 3.3240575769731662e-06, "loss": 29.7441, "step": 323960 }, { "epoch": 0.6544398970575759, "grad_norm": 301.2927551269531, "learning_rate": 3.3237287082855386e-06, "loss": 19.5243, "step": 323970 }, { "epoch": 0.6544600976902597, "grad_norm": 272.245849609375, "learning_rate": 3.3233998477678555e-06, "loss": 10.3614, "step": 323980 }, { "epoch": 0.6544802983229435, "grad_norm": 126.1307601928711, "learning_rate": 3.3230709954217156e-06, "loss": 14.6279, "step": 323990 }, { "epoch": 0.6545004989556273, "grad_norm": 225.13929748535156, "learning_rate": 3.322742151248726e-06, "loss": 25.5433, "step": 324000 }, { "epoch": 0.6545206995883112, "grad_norm": 207.26303100585938, "learning_rate": 3.3224133152504874e-06, "loss": 31.4215, "step": 324010 }, { "epoch": 0.6545409002209949, "grad_norm": 346.0664978027344, "learning_rate": 3.3220844874286017e-06, "loss": 15.5876, "step": 324020 }, { "epoch": 0.6545611008536787, "grad_norm": 475.8935852050781, "learning_rate": 3.321755667784673e-06, "loss": 20.083, "step": 324030 }, { "epoch": 0.6545813014863625, "grad_norm": 148.42385864257812, "learning_rate": 3.3214268563203056e-06, "loss": 18.6485, "step": 324040 }, { "epoch": 0.6546015021190463, "grad_norm": 0.0, "learning_rate": 3.3210980530370974e-06, "loss": 72.7635, "step": 324050 }, { "epoch": 0.6546217027517302, "grad_norm": 298.8637390136719, "learning_rate": 3.3207692579366548e-06, "loss": 10.3868, "step": 324060 }, { "epoch": 0.654641903384414, "grad_norm": 311.0837707519531, "learning_rate": 3.3204404710205786e-06, "loss": 14.2048, "step": 324070 }, { "epoch": 0.6546621040170978, "grad_norm": 356.5824890136719, "learning_rate": 3.3201116922904737e-06, "loss": 20.4218, "step": 324080 }, { "epoch": 0.6546823046497816, "grad_norm": 204.20864868164062, "learning_rate": 3.3197829217479396e-06, "loss": 11.3969, "step": 324090 }, { "epoch": 0.6547025052824654, "grad_norm": 263.9625549316406, "learning_rate": 3.319454159394578e-06, "loss": 10.9468, "step": 324100 }, { "epoch": 0.6547227059151493, "grad_norm": 208.89004516601562, "learning_rate": 3.3191254052319967e-06, "loss": 15.8986, "step": 324110 }, { "epoch": 0.6547429065478331, "grad_norm": 434.92279052734375, "learning_rate": 3.3187966592617905e-06, "loss": 18.4223, "step": 324120 }, { "epoch": 0.6547631071805169, "grad_norm": 237.6468963623047, "learning_rate": 3.318467921485567e-06, "loss": 22.26, "step": 324130 }, { "epoch": 0.6547833078132007, "grad_norm": 32.69999313354492, "learning_rate": 3.3181391919049277e-06, "loss": 22.4843, "step": 324140 }, { "epoch": 0.6548035084458845, "grad_norm": 384.46307373046875, "learning_rate": 3.317810470521473e-06, "loss": 11.6848, "step": 324150 }, { "epoch": 0.6548237090785684, "grad_norm": 168.52911376953125, "learning_rate": 3.3174817573368068e-06, "loss": 16.615, "step": 324160 }, { "epoch": 0.6548439097112522, "grad_norm": 330.1651916503906, "learning_rate": 3.317153052352531e-06, "loss": 15.1834, "step": 324170 }, { "epoch": 0.654864110343936, "grad_norm": 45.54159927368164, "learning_rate": 3.3168243555702455e-06, "loss": 12.9291, "step": 324180 }, { "epoch": 0.6548843109766198, "grad_norm": 44.321346282958984, "learning_rate": 3.316495666991554e-06, "loss": 12.3014, "step": 324190 }, { "epoch": 0.6549045116093036, "grad_norm": 203.79824829101562, "learning_rate": 3.31616698661806e-06, "loss": 15.981, "step": 324200 }, { "epoch": 0.6549247122419875, "grad_norm": 262.7236633300781, "learning_rate": 3.3158383144513618e-06, "loss": 6.2852, "step": 324210 }, { "epoch": 0.6549449128746713, "grad_norm": 225.0556182861328, "learning_rate": 3.315509650493065e-06, "loss": 9.561, "step": 324220 }, { "epoch": 0.6549651135073551, "grad_norm": 136.61697387695312, "learning_rate": 3.315180994744769e-06, "loss": 13.1737, "step": 324230 }, { "epoch": 0.6549853141400389, "grad_norm": 596.6488647460938, "learning_rate": 3.3148523472080773e-06, "loss": 16.1873, "step": 324240 }, { "epoch": 0.6550055147727227, "grad_norm": 286.2017822265625, "learning_rate": 3.3145237078845903e-06, "loss": 17.4893, "step": 324250 }, { "epoch": 0.6550257154054066, "grad_norm": 139.54600524902344, "learning_rate": 3.3141950767759096e-06, "loss": 14.5637, "step": 324260 }, { "epoch": 0.6550459160380903, "grad_norm": 334.0503234863281, "learning_rate": 3.3138664538836395e-06, "loss": 23.3504, "step": 324270 }, { "epoch": 0.6550661166707741, "grad_norm": 238.45306396484375, "learning_rate": 3.3135378392093788e-06, "loss": 14.3013, "step": 324280 }, { "epoch": 0.6550863173034579, "grad_norm": 296.9386291503906, "learning_rate": 3.3132092327547296e-06, "loss": 17.8876, "step": 324290 }, { "epoch": 0.6551065179361417, "grad_norm": 247.072021484375, "learning_rate": 3.312880634521295e-06, "loss": 22.6054, "step": 324300 }, { "epoch": 0.6551267185688255, "grad_norm": 11.488008499145508, "learning_rate": 3.3125520445106753e-06, "loss": 13.2484, "step": 324310 }, { "epoch": 0.6551469192015094, "grad_norm": 289.1090393066406, "learning_rate": 3.312223462724472e-06, "loss": 41.0566, "step": 324320 }, { "epoch": 0.6551671198341932, "grad_norm": 372.6353454589844, "learning_rate": 3.3118948891642875e-06, "loss": 15.3632, "step": 324330 }, { "epoch": 0.655187320466877, "grad_norm": 343.526123046875, "learning_rate": 3.311566323831721e-06, "loss": 17.5121, "step": 324340 }, { "epoch": 0.6552075210995608, "grad_norm": 188.16131591796875, "learning_rate": 3.3112377667283756e-06, "loss": 18.1935, "step": 324350 }, { "epoch": 0.6552277217322446, "grad_norm": 370.9345397949219, "learning_rate": 3.3109092178558546e-06, "loss": 20.8964, "step": 324360 }, { "epoch": 0.6552479223649285, "grad_norm": 397.97576904296875, "learning_rate": 3.3105806772157556e-06, "loss": 14.4062, "step": 324370 }, { "epoch": 0.6552681229976123, "grad_norm": 287.5545654296875, "learning_rate": 3.310252144809682e-06, "loss": 17.0462, "step": 324380 }, { "epoch": 0.6552883236302961, "grad_norm": 287.0768127441406, "learning_rate": 3.309923620639233e-06, "loss": 15.765, "step": 324390 }, { "epoch": 0.6553085242629799, "grad_norm": 273.1934509277344, "learning_rate": 3.3095951047060147e-06, "loss": 19.1174, "step": 324400 }, { "epoch": 0.6553287248956637, "grad_norm": 220.1702880859375, "learning_rate": 3.309266597011621e-06, "loss": 37.1042, "step": 324410 }, { "epoch": 0.6553489255283476, "grad_norm": 367.9901123046875, "learning_rate": 3.308938097557659e-06, "loss": 22.447, "step": 324420 }, { "epoch": 0.6553691261610314, "grad_norm": 316.996337890625, "learning_rate": 3.308609606345728e-06, "loss": 24.7443, "step": 324430 }, { "epoch": 0.6553893267937152, "grad_norm": 212.59063720703125, "learning_rate": 3.3082811233774277e-06, "loss": 15.1914, "step": 324440 }, { "epoch": 0.655409527426399, "grad_norm": 171.8090362548828, "learning_rate": 3.30795264865436e-06, "loss": 29.1355, "step": 324450 }, { "epoch": 0.6554297280590828, "grad_norm": 241.55224609375, "learning_rate": 3.307624182178127e-06, "loss": 16.7308, "step": 324460 }, { "epoch": 0.6554499286917667, "grad_norm": 210.81338500976562, "learning_rate": 3.3072957239503273e-06, "loss": 22.1579, "step": 324470 }, { "epoch": 0.6554701293244505, "grad_norm": 88.8290786743164, "learning_rate": 3.3069672739725616e-06, "loss": 34.33, "step": 324480 }, { "epoch": 0.6554903299571343, "grad_norm": 247.44424438476562, "learning_rate": 3.306638832246436e-06, "loss": 9.8465, "step": 324490 }, { "epoch": 0.6555105305898181, "grad_norm": 262.3728942871094, "learning_rate": 3.3063103987735433e-06, "loss": 11.7805, "step": 324500 }, { "epoch": 0.6555307312225019, "grad_norm": 356.46185302734375, "learning_rate": 3.30598197355549e-06, "loss": 23.0514, "step": 324510 }, { "epoch": 0.6555509318551858, "grad_norm": 317.2620849609375, "learning_rate": 3.3056535565938764e-06, "loss": 10.5034, "step": 324520 }, { "epoch": 0.6555711324878695, "grad_norm": 38.2398567199707, "learning_rate": 3.3053251478902996e-06, "loss": 17.7073, "step": 324530 }, { "epoch": 0.6555913331205533, "grad_norm": 208.78480529785156, "learning_rate": 3.3049967474463634e-06, "loss": 21.059, "step": 324540 }, { "epoch": 0.6556115337532371, "grad_norm": 343.25457763671875, "learning_rate": 3.3046683552636665e-06, "loss": 17.8103, "step": 324550 }, { "epoch": 0.6556317343859209, "grad_norm": 216.7662353515625, "learning_rate": 3.304339971343813e-06, "loss": 18.6192, "step": 324560 }, { "epoch": 0.6556519350186047, "grad_norm": 427.19512939453125, "learning_rate": 3.3040115956883984e-06, "loss": 21.3333, "step": 324570 }, { "epoch": 0.6556721356512886, "grad_norm": 711.686279296875, "learning_rate": 3.3036832282990263e-06, "loss": 25.2349, "step": 324580 }, { "epoch": 0.6556923362839724, "grad_norm": 173.58985900878906, "learning_rate": 3.3033548691772976e-06, "loss": 17.6516, "step": 324590 }, { "epoch": 0.6557125369166562, "grad_norm": 304.60833740234375, "learning_rate": 3.30302651832481e-06, "loss": 18.2354, "step": 324600 }, { "epoch": 0.65573273754934, "grad_norm": 517.9136962890625, "learning_rate": 3.302698175743165e-06, "loss": 26.6774, "step": 324610 }, { "epoch": 0.6557529381820238, "grad_norm": 228.74070739746094, "learning_rate": 3.3023698414339656e-06, "loss": 16.5512, "step": 324620 }, { "epoch": 0.6557731388147077, "grad_norm": 244.75994873046875, "learning_rate": 3.302041515398808e-06, "loss": 15.3053, "step": 324630 }, { "epoch": 0.6557933394473915, "grad_norm": 144.66360473632812, "learning_rate": 3.3017131976392926e-06, "loss": 12.7535, "step": 324640 }, { "epoch": 0.6558135400800753, "grad_norm": 385.5673522949219, "learning_rate": 3.3013848881570243e-06, "loss": 30.4632, "step": 324650 }, { "epoch": 0.6558337407127591, "grad_norm": 214.80625915527344, "learning_rate": 3.3010565869535976e-06, "loss": 28.1054, "step": 324660 }, { "epoch": 0.655853941345443, "grad_norm": 345.3190612792969, "learning_rate": 3.3007282940306155e-06, "loss": 23.4895, "step": 324670 }, { "epoch": 0.6558741419781268, "grad_norm": 591.991455078125, "learning_rate": 3.300400009389678e-06, "loss": 34.1445, "step": 324680 }, { "epoch": 0.6558943426108106, "grad_norm": 380.9379577636719, "learning_rate": 3.3000717330323857e-06, "loss": 16.3721, "step": 324690 }, { "epoch": 0.6559145432434944, "grad_norm": 471.51849365234375, "learning_rate": 3.2997434649603368e-06, "loss": 15.4707, "step": 324700 }, { "epoch": 0.6559347438761782, "grad_norm": 256.5867919921875, "learning_rate": 3.2994152051751305e-06, "loss": 33.0621, "step": 324710 }, { "epoch": 0.655954944508862, "grad_norm": 479.35345458984375, "learning_rate": 3.299086953678371e-06, "loss": 25.3935, "step": 324720 }, { "epoch": 0.6559751451415459, "grad_norm": 0.0, "learning_rate": 3.298758710471653e-06, "loss": 14.0681, "step": 324730 }, { "epoch": 0.6559953457742297, "grad_norm": 241.19390869140625, "learning_rate": 3.298430475556579e-06, "loss": 11.0385, "step": 324740 }, { "epoch": 0.6560155464069135, "grad_norm": 283.4515075683594, "learning_rate": 3.2981022489347503e-06, "loss": 13.9162, "step": 324750 }, { "epoch": 0.6560357470395973, "grad_norm": 317.4547119140625, "learning_rate": 3.297774030607763e-06, "loss": 10.7548, "step": 324760 }, { "epoch": 0.6560559476722811, "grad_norm": 273.15460205078125, "learning_rate": 3.2974458205772197e-06, "loss": 10.2955, "step": 324770 }, { "epoch": 0.6560761483049649, "grad_norm": 454.5055236816406, "learning_rate": 3.2971176188447196e-06, "loss": 15.4793, "step": 324780 }, { "epoch": 0.6560963489376487, "grad_norm": 94.57625579833984, "learning_rate": 3.2967894254118605e-06, "loss": 9.9015, "step": 324790 }, { "epoch": 0.6561165495703325, "grad_norm": 340.491455078125, "learning_rate": 3.2964612402802422e-06, "loss": 28.6349, "step": 324800 }, { "epoch": 0.6561367502030163, "grad_norm": 141.20443725585938, "learning_rate": 3.2961330634514676e-06, "loss": 17.2242, "step": 324810 }, { "epoch": 0.6561569508357001, "grad_norm": 459.1496887207031, "learning_rate": 3.2958048949271314e-06, "loss": 18.9374, "step": 324820 }, { "epoch": 0.656177151468384, "grad_norm": 577.8148193359375, "learning_rate": 3.2954767347088367e-06, "loss": 17.1285, "step": 324830 }, { "epoch": 0.6561973521010678, "grad_norm": 112.24983978271484, "learning_rate": 3.295148582798181e-06, "loss": 7.8313, "step": 324840 }, { "epoch": 0.6562175527337516, "grad_norm": 202.61277770996094, "learning_rate": 3.2948204391967657e-06, "loss": 16.2628, "step": 324850 }, { "epoch": 0.6562377533664354, "grad_norm": 29.069067001342773, "learning_rate": 3.294492303906188e-06, "loss": 24.1324, "step": 324860 }, { "epoch": 0.6562579539991192, "grad_norm": 470.06298828125, "learning_rate": 3.2941641769280464e-06, "loss": 13.7085, "step": 324870 }, { "epoch": 0.6562781546318031, "grad_norm": 157.38023376464844, "learning_rate": 3.293836058263945e-06, "loss": 19.67, "step": 324880 }, { "epoch": 0.6562983552644869, "grad_norm": 73.43772888183594, "learning_rate": 3.293507947915477e-06, "loss": 36.525, "step": 324890 }, { "epoch": 0.6563185558971707, "grad_norm": 324.37860107421875, "learning_rate": 3.293179845884245e-06, "loss": 32.0522, "step": 324900 }, { "epoch": 0.6563387565298545, "grad_norm": 479.7427978515625, "learning_rate": 3.2928517521718483e-06, "loss": 19.6705, "step": 324910 }, { "epoch": 0.6563589571625383, "grad_norm": 556.3734130859375, "learning_rate": 3.2925236667798843e-06, "loss": 29.4948, "step": 324920 }, { "epoch": 0.6563791577952222, "grad_norm": 197.95718383789062, "learning_rate": 3.2921955897099534e-06, "loss": 19.7293, "step": 324930 }, { "epoch": 0.656399358427906, "grad_norm": 330.6506042480469, "learning_rate": 3.2918675209636542e-06, "loss": 21.7396, "step": 324940 }, { "epoch": 0.6564195590605898, "grad_norm": 466.46160888671875, "learning_rate": 3.2915394605425836e-06, "loss": 14.4438, "step": 324950 }, { "epoch": 0.6564397596932736, "grad_norm": 261.424072265625, "learning_rate": 3.2912114084483437e-06, "loss": 20.4622, "step": 324960 }, { "epoch": 0.6564599603259574, "grad_norm": 203.6238555908203, "learning_rate": 3.290883364682533e-06, "loss": 12.3459, "step": 324970 }, { "epoch": 0.6564801609586413, "grad_norm": 111.94189453125, "learning_rate": 3.2905553292467487e-06, "loss": 13.8722, "step": 324980 }, { "epoch": 0.6565003615913251, "grad_norm": 318.91455078125, "learning_rate": 3.29022730214259e-06, "loss": 19.1081, "step": 324990 }, { "epoch": 0.6565205622240089, "grad_norm": 465.5526428222656, "learning_rate": 3.289899283371657e-06, "loss": 21.5725, "step": 325000 }, { "epoch": 0.6565407628566927, "grad_norm": 36.182857513427734, "learning_rate": 3.2895712729355477e-06, "loss": 24.4079, "step": 325010 }, { "epoch": 0.6565609634893765, "grad_norm": 202.29116821289062, "learning_rate": 3.2892432708358583e-06, "loss": 16.2554, "step": 325020 }, { "epoch": 0.6565811641220604, "grad_norm": 136.98094177246094, "learning_rate": 3.288915277074192e-06, "loss": 15.5304, "step": 325030 }, { "epoch": 0.6566013647547441, "grad_norm": 173.10157775878906, "learning_rate": 3.2885872916521445e-06, "loss": 18.4891, "step": 325040 }, { "epoch": 0.6566215653874279, "grad_norm": 163.14114379882812, "learning_rate": 3.2882593145713148e-06, "loss": 25.5887, "step": 325050 }, { "epoch": 0.6566417660201117, "grad_norm": 566.6318359375, "learning_rate": 3.2879313458333017e-06, "loss": 28.2239, "step": 325060 }, { "epoch": 0.6566619666527955, "grad_norm": 471.21197509765625, "learning_rate": 3.2876033854397037e-06, "loss": 13.1177, "step": 325070 }, { "epoch": 0.6566821672854793, "grad_norm": 214.6530303955078, "learning_rate": 3.287275433392119e-06, "loss": 11.005, "step": 325080 }, { "epoch": 0.6567023679181632, "grad_norm": 462.0964050292969, "learning_rate": 3.286947489692145e-06, "loss": 22.7775, "step": 325090 }, { "epoch": 0.656722568550847, "grad_norm": 352.9953308105469, "learning_rate": 3.2866195543413843e-06, "loss": 13.026, "step": 325100 }, { "epoch": 0.6567427691835308, "grad_norm": 420.5517883300781, "learning_rate": 3.2862916273414284e-06, "loss": 13.9931, "step": 325110 }, { "epoch": 0.6567629698162146, "grad_norm": 425.3837585449219, "learning_rate": 3.285963708693881e-06, "loss": 23.4562, "step": 325120 }, { "epoch": 0.6567831704488984, "grad_norm": 595.9458618164062, "learning_rate": 3.2856357984003382e-06, "loss": 12.4237, "step": 325130 }, { "epoch": 0.6568033710815823, "grad_norm": 745.153076171875, "learning_rate": 3.2853078964623995e-06, "loss": 19.2912, "step": 325140 }, { "epoch": 0.6568235717142661, "grad_norm": 154.6387481689453, "learning_rate": 3.2849800028816613e-06, "loss": 10.5305, "step": 325150 }, { "epoch": 0.6568437723469499, "grad_norm": 454.5603332519531, "learning_rate": 3.2846521176597217e-06, "loss": 26.4527, "step": 325160 }, { "epoch": 0.6568639729796337, "grad_norm": 3.6933395862579346, "learning_rate": 3.2843242407981823e-06, "loss": 17.045, "step": 325170 }, { "epoch": 0.6568841736123175, "grad_norm": 530.4935913085938, "learning_rate": 3.2839963722986356e-06, "loss": 20.4164, "step": 325180 }, { "epoch": 0.6569043742450014, "grad_norm": 254.14212036132812, "learning_rate": 3.283668512162684e-06, "loss": 17.5951, "step": 325190 }, { "epoch": 0.6569245748776852, "grad_norm": 303.5582580566406, "learning_rate": 3.2833406603919243e-06, "loss": 14.7522, "step": 325200 }, { "epoch": 0.656944775510369, "grad_norm": 315.11285400390625, "learning_rate": 3.2830128169879535e-06, "loss": 30.0942, "step": 325210 }, { "epoch": 0.6569649761430528, "grad_norm": 376.3750305175781, "learning_rate": 3.282684981952369e-06, "loss": 13.6722, "step": 325220 }, { "epoch": 0.6569851767757366, "grad_norm": 167.85760498046875, "learning_rate": 3.2823571552867717e-06, "loss": 16.644, "step": 325230 }, { "epoch": 0.6570053774084205, "grad_norm": 347.69476318359375, "learning_rate": 3.282029336992756e-06, "loss": 51.1261, "step": 325240 }, { "epoch": 0.6570255780411043, "grad_norm": 271.6500244140625, "learning_rate": 3.28170152707192e-06, "loss": 52.1869, "step": 325250 }, { "epoch": 0.6570457786737881, "grad_norm": 0.0, "learning_rate": 3.281373725525865e-06, "loss": 17.3325, "step": 325260 }, { "epoch": 0.6570659793064719, "grad_norm": 1.3119171857833862, "learning_rate": 3.2810459323561826e-06, "loss": 15.8116, "step": 325270 }, { "epoch": 0.6570861799391557, "grad_norm": 284.9716491699219, "learning_rate": 3.2807181475644755e-06, "loss": 30.6958, "step": 325280 }, { "epoch": 0.6571063805718395, "grad_norm": 431.0801696777344, "learning_rate": 3.28039037115234e-06, "loss": 22.6894, "step": 325290 }, { "epoch": 0.6571265812045233, "grad_norm": 251.68798828125, "learning_rate": 3.280062603121373e-06, "loss": 12.437, "step": 325300 }, { "epoch": 0.6571467818372071, "grad_norm": 103.37272644042969, "learning_rate": 3.2797348434731725e-06, "loss": 10.8606, "step": 325310 }, { "epoch": 0.6571669824698909, "grad_norm": 467.3813171386719, "learning_rate": 3.2794070922093347e-06, "loss": 13.4435, "step": 325320 }, { "epoch": 0.6571871831025747, "grad_norm": 79.60079193115234, "learning_rate": 3.2790793493314605e-06, "loss": 7.3849, "step": 325330 }, { "epoch": 0.6572073837352586, "grad_norm": 442.27001953125, "learning_rate": 3.2787516148411417e-06, "loss": 23.0764, "step": 325340 }, { "epoch": 0.6572275843679424, "grad_norm": 372.4307556152344, "learning_rate": 3.27842388873998e-06, "loss": 20.5486, "step": 325350 }, { "epoch": 0.6572477850006262, "grad_norm": 303.7315979003906, "learning_rate": 3.2780961710295727e-06, "loss": 21.2119, "step": 325360 }, { "epoch": 0.65726798563331, "grad_norm": 674.0523071289062, "learning_rate": 3.2777684617115145e-06, "loss": 33.8873, "step": 325370 }, { "epoch": 0.6572881862659938, "grad_norm": 0.6420018076896667, "learning_rate": 3.277440760787404e-06, "loss": 10.3838, "step": 325380 }, { "epoch": 0.6573083868986777, "grad_norm": 202.30039978027344, "learning_rate": 3.277113068258839e-06, "loss": 17.7272, "step": 325390 }, { "epoch": 0.6573285875313615, "grad_norm": 673.8109130859375, "learning_rate": 3.2767853841274154e-06, "loss": 19.2871, "step": 325400 }, { "epoch": 0.6573487881640453, "grad_norm": 492.9521484375, "learning_rate": 3.2764577083947303e-06, "loss": 22.3621, "step": 325410 }, { "epoch": 0.6573689887967291, "grad_norm": 278.39141845703125, "learning_rate": 3.2761300410623834e-06, "loss": 27.4161, "step": 325420 }, { "epoch": 0.6573891894294129, "grad_norm": 130.88331604003906, "learning_rate": 3.2758023821319673e-06, "loss": 29.0107, "step": 325430 }, { "epoch": 0.6574093900620968, "grad_norm": 373.3331604003906, "learning_rate": 3.2754747316050815e-06, "loss": 22.9643, "step": 325440 }, { "epoch": 0.6574295906947806, "grad_norm": 185.7371368408203, "learning_rate": 3.2751470894833236e-06, "loss": 12.2163, "step": 325450 }, { "epoch": 0.6574497913274644, "grad_norm": 239.1378631591797, "learning_rate": 3.27481945576829e-06, "loss": 22.7584, "step": 325460 }, { "epoch": 0.6574699919601482, "grad_norm": 98.81094360351562, "learning_rate": 3.2744918304615757e-06, "loss": 19.1879, "step": 325470 }, { "epoch": 0.657490192592832, "grad_norm": 270.07452392578125, "learning_rate": 3.2741642135647787e-06, "loss": 21.2678, "step": 325480 }, { "epoch": 0.6575103932255159, "grad_norm": 139.71871948242188, "learning_rate": 3.273836605079499e-06, "loss": 19.2434, "step": 325490 }, { "epoch": 0.6575305938581997, "grad_norm": 480.32598876953125, "learning_rate": 3.273509005007327e-06, "loss": 28.2172, "step": 325500 }, { "epoch": 0.6575507944908835, "grad_norm": 362.0687561035156, "learning_rate": 3.273181413349864e-06, "loss": 18.0052, "step": 325510 }, { "epoch": 0.6575709951235673, "grad_norm": 66.8957748413086, "learning_rate": 3.2728538301087066e-06, "loss": 17.9362, "step": 325520 }, { "epoch": 0.6575911957562511, "grad_norm": 385.7140808105469, "learning_rate": 3.2725262552854485e-06, "loss": 20.9619, "step": 325530 }, { "epoch": 0.657611396388935, "grad_norm": 126.38581848144531, "learning_rate": 3.272198688881688e-06, "loss": 15.072, "step": 325540 }, { "epoch": 0.6576315970216187, "grad_norm": 141.0269775390625, "learning_rate": 3.2718711308990226e-06, "loss": 11.0777, "step": 325550 }, { "epoch": 0.6576517976543025, "grad_norm": 0.0, "learning_rate": 3.271543581339047e-06, "loss": 12.217, "step": 325560 }, { "epoch": 0.6576719982869863, "grad_norm": 373.1741943359375, "learning_rate": 3.271216040203357e-06, "loss": 11.2479, "step": 325570 }, { "epoch": 0.6576921989196701, "grad_norm": 93.78208923339844, "learning_rate": 3.2708885074935515e-06, "loss": 19.0182, "step": 325580 }, { "epoch": 0.6577123995523539, "grad_norm": 155.4285888671875, "learning_rate": 3.270560983211227e-06, "loss": 13.8564, "step": 325590 }, { "epoch": 0.6577326001850378, "grad_norm": 382.2638244628906, "learning_rate": 3.2702334673579765e-06, "loss": 15.4268, "step": 325600 }, { "epoch": 0.6577528008177216, "grad_norm": 126.21075439453125, "learning_rate": 3.2699059599353987e-06, "loss": 19.6077, "step": 325610 }, { "epoch": 0.6577730014504054, "grad_norm": 171.00563049316406, "learning_rate": 3.2695784609450908e-06, "loss": 21.567, "step": 325620 }, { "epoch": 0.6577932020830892, "grad_norm": 288.177734375, "learning_rate": 3.2692509703886467e-06, "loss": 21.9067, "step": 325630 }, { "epoch": 0.657813402715773, "grad_norm": 293.68463134765625, "learning_rate": 3.2689234882676622e-06, "loss": 19.5552, "step": 325640 }, { "epoch": 0.6578336033484569, "grad_norm": 2.1527037620544434, "learning_rate": 3.268596014583737e-06, "loss": 25.0803, "step": 325650 }, { "epoch": 0.6578538039811407, "grad_norm": 14.163626670837402, "learning_rate": 3.2682685493384636e-06, "loss": 8.1648, "step": 325660 }, { "epoch": 0.6578740046138245, "grad_norm": 220.7692108154297, "learning_rate": 3.2679410925334394e-06, "loss": 10.6251, "step": 325670 }, { "epoch": 0.6578942052465083, "grad_norm": 361.1679992675781, "learning_rate": 3.267613644170261e-06, "loss": 27.944, "step": 325680 }, { "epoch": 0.6579144058791921, "grad_norm": 153.73065185546875, "learning_rate": 3.2672862042505227e-06, "loss": 16.2979, "step": 325690 }, { "epoch": 0.657934606511876, "grad_norm": 210.1148223876953, "learning_rate": 3.26695877277582e-06, "loss": 21.2036, "step": 325700 }, { "epoch": 0.6579548071445598, "grad_norm": 153.427490234375, "learning_rate": 3.266631349747753e-06, "loss": 14.3112, "step": 325710 }, { "epoch": 0.6579750077772436, "grad_norm": 203.58016967773438, "learning_rate": 3.266303935167912e-06, "loss": 15.0888, "step": 325720 }, { "epoch": 0.6579952084099274, "grad_norm": 413.0564880371094, "learning_rate": 3.2659765290378963e-06, "loss": 16.0376, "step": 325730 }, { "epoch": 0.6580154090426112, "grad_norm": 140.69017028808594, "learning_rate": 3.265649131359301e-06, "loss": 27.3984, "step": 325740 }, { "epoch": 0.6580356096752951, "grad_norm": 299.8586730957031, "learning_rate": 3.2653217421337213e-06, "loss": 11.0125, "step": 325750 }, { "epoch": 0.6580558103079789, "grad_norm": 584.1429443359375, "learning_rate": 3.264994361362753e-06, "loss": 13.3475, "step": 325760 }, { "epoch": 0.6580760109406627, "grad_norm": 96.61929321289062, "learning_rate": 3.26466698904799e-06, "loss": 12.359, "step": 325770 }, { "epoch": 0.6580962115733465, "grad_norm": 184.85232543945312, "learning_rate": 3.2643396251910338e-06, "loss": 13.8569, "step": 325780 }, { "epoch": 0.6581164122060303, "grad_norm": 431.3117980957031, "learning_rate": 3.2640122697934716e-06, "loss": 16.2702, "step": 325790 }, { "epoch": 0.6581366128387142, "grad_norm": 244.78298950195312, "learning_rate": 3.263684922856905e-06, "loss": 12.9071, "step": 325800 }, { "epoch": 0.6581568134713979, "grad_norm": 220.46090698242188, "learning_rate": 3.2633575843829278e-06, "loss": 15.3793, "step": 325810 }, { "epoch": 0.6581770141040817, "grad_norm": 342.6137390136719, "learning_rate": 3.2630302543731347e-06, "loss": 25.481, "step": 325820 }, { "epoch": 0.6581972147367655, "grad_norm": 331.4032897949219, "learning_rate": 3.262702932829121e-06, "loss": 11.5014, "step": 325830 }, { "epoch": 0.6582174153694493, "grad_norm": 723.5531005859375, "learning_rate": 3.262375619752484e-06, "loss": 14.2811, "step": 325840 }, { "epoch": 0.6582376160021332, "grad_norm": 490.7746887207031, "learning_rate": 3.262048315144816e-06, "loss": 19.302, "step": 325850 }, { "epoch": 0.658257816634817, "grad_norm": 82.62498474121094, "learning_rate": 3.2617210190077132e-06, "loss": 21.3958, "step": 325860 }, { "epoch": 0.6582780172675008, "grad_norm": 252.25057983398438, "learning_rate": 3.2613937313427735e-06, "loss": 14.9211, "step": 325870 }, { "epoch": 0.6582982179001846, "grad_norm": 39.40414810180664, "learning_rate": 3.2610664521515874e-06, "loss": 17.8797, "step": 325880 }, { "epoch": 0.6583184185328684, "grad_norm": 216.58355712890625, "learning_rate": 3.2607391814357537e-06, "loss": 14.9972, "step": 325890 }, { "epoch": 0.6583386191655523, "grad_norm": 202.657470703125, "learning_rate": 3.260411919196866e-06, "loss": 15.9348, "step": 325900 }, { "epoch": 0.6583588197982361, "grad_norm": 397.97186279296875, "learning_rate": 3.2600846654365202e-06, "loss": 13.3681, "step": 325910 }, { "epoch": 0.6583790204309199, "grad_norm": 145.8855438232422, "learning_rate": 3.2597574201563104e-06, "loss": 29.3084, "step": 325920 }, { "epoch": 0.6583992210636037, "grad_norm": 398.48468017578125, "learning_rate": 3.2594301833578307e-06, "loss": 17.574, "step": 325930 }, { "epoch": 0.6584194216962875, "grad_norm": 387.21697998046875, "learning_rate": 3.25910295504268e-06, "loss": 15.5125, "step": 325940 }, { "epoch": 0.6584396223289714, "grad_norm": 301.341796875, "learning_rate": 3.258775735212447e-06, "loss": 23.7251, "step": 325950 }, { "epoch": 0.6584598229616552, "grad_norm": 619.9978637695312, "learning_rate": 3.2584485238687318e-06, "loss": 21.0933, "step": 325960 }, { "epoch": 0.658480023594339, "grad_norm": 190.62985229492188, "learning_rate": 3.258121321013128e-06, "loss": 6.9802, "step": 325970 }, { "epoch": 0.6585002242270228, "grad_norm": 533.8370361328125, "learning_rate": 3.257794126647228e-06, "loss": 12.1284, "step": 325980 }, { "epoch": 0.6585204248597066, "grad_norm": 125.36014556884766, "learning_rate": 3.257466940772629e-06, "loss": 18.2319, "step": 325990 }, { "epoch": 0.6585406254923905, "grad_norm": 735.6994018554688, "learning_rate": 3.2571397633909252e-06, "loss": 20.9219, "step": 326000 }, { "epoch": 0.6585608261250743, "grad_norm": 33.67034149169922, "learning_rate": 3.2568125945037098e-06, "loss": 7.1089, "step": 326010 }, { "epoch": 0.6585810267577581, "grad_norm": 780.8348999023438, "learning_rate": 3.256485434112578e-06, "loss": 27.645, "step": 326020 }, { "epoch": 0.6586012273904419, "grad_norm": 536.1378784179688, "learning_rate": 3.2561582822191273e-06, "loss": 13.1114, "step": 326030 }, { "epoch": 0.6586214280231257, "grad_norm": 520.2908325195312, "learning_rate": 3.2558311388249465e-06, "loss": 16.2573, "step": 326040 }, { "epoch": 0.6586416286558096, "grad_norm": 311.37176513671875, "learning_rate": 3.2555040039316344e-06, "loss": 14.3754, "step": 326050 }, { "epoch": 0.6586618292884933, "grad_norm": 484.6363220214844, "learning_rate": 3.255176877540784e-06, "loss": 21.1377, "step": 326060 }, { "epoch": 0.6586820299211771, "grad_norm": 0.0, "learning_rate": 3.2548497596539907e-06, "loss": 12.2284, "step": 326070 }, { "epoch": 0.6587022305538609, "grad_norm": 518.36474609375, "learning_rate": 3.2545226502728477e-06, "loss": 22.9116, "step": 326080 }, { "epoch": 0.6587224311865447, "grad_norm": 270.0567321777344, "learning_rate": 3.254195549398948e-06, "loss": 12.4069, "step": 326090 }, { "epoch": 0.6587426318192285, "grad_norm": 321.67742919921875, "learning_rate": 3.2538684570338908e-06, "loss": 9.0036, "step": 326100 }, { "epoch": 0.6587628324519124, "grad_norm": 212.58126831054688, "learning_rate": 3.253541373179264e-06, "loss": 15.1088, "step": 326110 }, { "epoch": 0.6587830330845962, "grad_norm": 257.3951721191406, "learning_rate": 3.2532142978366654e-06, "loss": 32.6853, "step": 326120 }, { "epoch": 0.65880323371728, "grad_norm": 180.008544921875, "learning_rate": 3.252887231007689e-06, "loss": 23.3574, "step": 326130 }, { "epoch": 0.6588234343499638, "grad_norm": 247.4143524169922, "learning_rate": 3.2525601726939283e-06, "loss": 23.2259, "step": 326140 }, { "epoch": 0.6588436349826476, "grad_norm": 153.5231170654297, "learning_rate": 3.2522331228969774e-06, "loss": 22.2066, "step": 326150 }, { "epoch": 0.6588638356153315, "grad_norm": 222.22396850585938, "learning_rate": 3.2519060816184307e-06, "loss": 18.292, "step": 326160 }, { "epoch": 0.6588840362480153, "grad_norm": 333.21868896484375, "learning_rate": 3.251579048859881e-06, "loss": 17.979, "step": 326170 }, { "epoch": 0.6589042368806991, "grad_norm": 243.64358520507812, "learning_rate": 3.2512520246229217e-06, "loss": 25.9495, "step": 326180 }, { "epoch": 0.6589244375133829, "grad_norm": 517.3349609375, "learning_rate": 3.2509250089091494e-06, "loss": 22.6385, "step": 326190 }, { "epoch": 0.6589446381460667, "grad_norm": 47.990482330322266, "learning_rate": 3.2505980017201564e-06, "loss": 22.2003, "step": 326200 }, { "epoch": 0.6589648387787506, "grad_norm": 320.6095886230469, "learning_rate": 3.250271003057537e-06, "loss": 23.6228, "step": 326210 }, { "epoch": 0.6589850394114344, "grad_norm": 371.9893798828125, "learning_rate": 3.249944012922883e-06, "loss": 21.1938, "step": 326220 }, { "epoch": 0.6590052400441182, "grad_norm": 924.9978637695312, "learning_rate": 3.249617031317792e-06, "loss": 26.109, "step": 326230 }, { "epoch": 0.659025440676802, "grad_norm": 439.5033874511719, "learning_rate": 3.2492900582438537e-06, "loss": 30.7055, "step": 326240 }, { "epoch": 0.6590456413094858, "grad_norm": 255.3453826904297, "learning_rate": 3.248963093702663e-06, "loss": 17.2933, "step": 326250 }, { "epoch": 0.6590658419421697, "grad_norm": 172.68479919433594, "learning_rate": 3.248636137695815e-06, "loss": 8.3022, "step": 326260 }, { "epoch": 0.6590860425748535, "grad_norm": 86.05797576904297, "learning_rate": 3.2483091902249008e-06, "loss": 15.3602, "step": 326270 }, { "epoch": 0.6591062432075373, "grad_norm": 181.65016174316406, "learning_rate": 3.247982251291516e-06, "loss": 8.7742, "step": 326280 }, { "epoch": 0.6591264438402211, "grad_norm": 0.0, "learning_rate": 3.247655320897254e-06, "loss": 18.4672, "step": 326290 }, { "epoch": 0.6591466444729049, "grad_norm": 226.94357299804688, "learning_rate": 3.247328399043706e-06, "loss": 14.9022, "step": 326300 }, { "epoch": 0.6591668451055888, "grad_norm": 319.6770935058594, "learning_rate": 3.2470014857324673e-06, "loss": 25.9639, "step": 326310 }, { "epoch": 0.6591870457382725, "grad_norm": 335.3619079589844, "learning_rate": 3.2466745809651312e-06, "loss": 12.7606, "step": 326320 }, { "epoch": 0.6592072463709563, "grad_norm": 439.4655456542969, "learning_rate": 3.2463476847432883e-06, "loss": 18.3547, "step": 326330 }, { "epoch": 0.6592274470036401, "grad_norm": 177.63717651367188, "learning_rate": 3.2460207970685363e-06, "loss": 10.7974, "step": 326340 }, { "epoch": 0.6592476476363239, "grad_norm": 90.918212890625, "learning_rate": 3.245693917942465e-06, "loss": 12.2972, "step": 326350 }, { "epoch": 0.6592678482690078, "grad_norm": 193.43048095703125, "learning_rate": 3.245367047366671e-06, "loss": 24.8898, "step": 326360 }, { "epoch": 0.6592880489016916, "grad_norm": 232.1269073486328, "learning_rate": 3.2450401853427432e-06, "loss": 19.1005, "step": 326370 }, { "epoch": 0.6593082495343754, "grad_norm": 93.41244506835938, "learning_rate": 3.2447133318722756e-06, "loss": 22.3845, "step": 326380 }, { "epoch": 0.6593284501670592, "grad_norm": 221.07508850097656, "learning_rate": 3.2443864869568666e-06, "loss": 24.1764, "step": 326390 }, { "epoch": 0.659348650799743, "grad_norm": 103.7944107055664, "learning_rate": 3.2440596505981005e-06, "loss": 15.8362, "step": 326400 }, { "epoch": 0.6593688514324269, "grad_norm": 115.04316711425781, "learning_rate": 3.243732822797576e-06, "loss": 10.2934, "step": 326410 }, { "epoch": 0.6593890520651107, "grad_norm": 162.42124938964844, "learning_rate": 3.243406003556886e-06, "loss": 17.145, "step": 326420 }, { "epoch": 0.6594092526977945, "grad_norm": 194.11508178710938, "learning_rate": 3.2430791928776217e-06, "loss": 31.6078, "step": 326430 }, { "epoch": 0.6594294533304783, "grad_norm": 286.7826232910156, "learning_rate": 3.2427523907613755e-06, "loss": 20.9251, "step": 326440 }, { "epoch": 0.6594496539631621, "grad_norm": 48.10585021972656, "learning_rate": 3.242425597209742e-06, "loss": 23.1173, "step": 326450 }, { "epoch": 0.659469854595846, "grad_norm": 337.0145263671875, "learning_rate": 3.2420988122243123e-06, "loss": 15.7862, "step": 326460 }, { "epoch": 0.6594900552285298, "grad_norm": 258.88055419921875, "learning_rate": 3.2417720358066785e-06, "loss": 23.1719, "step": 326470 }, { "epoch": 0.6595102558612136, "grad_norm": 53.77909469604492, "learning_rate": 3.241445267958438e-06, "loss": 23.0353, "step": 326480 }, { "epoch": 0.6595304564938974, "grad_norm": 473.7021179199219, "learning_rate": 3.2411185086811763e-06, "loss": 18.8911, "step": 326490 }, { "epoch": 0.6595506571265812, "grad_norm": 210.07496643066406, "learning_rate": 3.2407917579764914e-06, "loss": 14.8715, "step": 326500 }, { "epoch": 0.659570857759265, "grad_norm": 178.1087188720703, "learning_rate": 3.2404650158459737e-06, "loss": 23.5613, "step": 326510 }, { "epoch": 0.6595910583919489, "grad_norm": 71.56072998046875, "learning_rate": 3.240138282291217e-06, "loss": 10.0993, "step": 326520 }, { "epoch": 0.6596112590246327, "grad_norm": 202.05714416503906, "learning_rate": 3.2398115573138123e-06, "loss": 22.8878, "step": 326530 }, { "epoch": 0.6596314596573165, "grad_norm": 392.0143127441406, "learning_rate": 3.2394848409153514e-06, "loss": 25.5134, "step": 326540 }, { "epoch": 0.6596516602900003, "grad_norm": 364.37945556640625, "learning_rate": 3.2391581330974307e-06, "loss": 26.3639, "step": 326550 }, { "epoch": 0.6596718609226842, "grad_norm": 219.4434356689453, "learning_rate": 3.238831433861637e-06, "loss": 13.1391, "step": 326560 }, { "epoch": 0.6596920615553679, "grad_norm": 251.89373779296875, "learning_rate": 3.2385047432095656e-06, "loss": 15.6009, "step": 326570 }, { "epoch": 0.6597122621880517, "grad_norm": 266.72564697265625, "learning_rate": 3.23817806114281e-06, "loss": 11.716, "step": 326580 }, { "epoch": 0.6597324628207355, "grad_norm": 117.76457214355469, "learning_rate": 3.23785138766296e-06, "loss": 16.2663, "step": 326590 }, { "epoch": 0.6597526634534193, "grad_norm": 269.0596923828125, "learning_rate": 3.2375247227716077e-06, "loss": 18.7865, "step": 326600 }, { "epoch": 0.6597728640861031, "grad_norm": 299.82415771484375, "learning_rate": 3.2371980664703486e-06, "loss": 25.7875, "step": 326610 }, { "epoch": 0.659793064718787, "grad_norm": 0.0, "learning_rate": 3.2368714187607696e-06, "loss": 12.2419, "step": 326620 }, { "epoch": 0.6598132653514708, "grad_norm": 239.4378662109375, "learning_rate": 3.236544779644466e-06, "loss": 10.2024, "step": 326630 }, { "epoch": 0.6598334659841546, "grad_norm": 291.7877197265625, "learning_rate": 3.2362181491230295e-06, "loss": 13.3036, "step": 326640 }, { "epoch": 0.6598536666168384, "grad_norm": 109.96804809570312, "learning_rate": 3.235891527198053e-06, "loss": 14.6738, "step": 326650 }, { "epoch": 0.6598738672495222, "grad_norm": 421.67803955078125, "learning_rate": 3.235564913871126e-06, "loss": 18.0582, "step": 326660 }, { "epoch": 0.6598940678822061, "grad_norm": 128.79254150390625, "learning_rate": 3.235238309143842e-06, "loss": 17.0872, "step": 326670 }, { "epoch": 0.6599142685148899, "grad_norm": 354.4855651855469, "learning_rate": 3.234911713017793e-06, "loss": 20.8762, "step": 326680 }, { "epoch": 0.6599344691475737, "grad_norm": 0.0, "learning_rate": 3.2345851254945695e-06, "loss": 8.3976, "step": 326690 }, { "epoch": 0.6599546697802575, "grad_norm": 2.3975155353546143, "learning_rate": 3.2342585465757625e-06, "loss": 4.9695, "step": 326700 }, { "epoch": 0.6599748704129413, "grad_norm": 242.79547119140625, "learning_rate": 3.2339319762629694e-06, "loss": 21.4148, "step": 326710 }, { "epoch": 0.6599950710456252, "grad_norm": 523.0574951171875, "learning_rate": 3.2336054145577735e-06, "loss": 14.6115, "step": 326720 }, { "epoch": 0.660015271678309, "grad_norm": 272.7829895019531, "learning_rate": 3.233278861461772e-06, "loss": 10.2054, "step": 326730 }, { "epoch": 0.6600354723109928, "grad_norm": 676.5116577148438, "learning_rate": 3.2329523169765566e-06, "loss": 39.6245, "step": 326740 }, { "epoch": 0.6600556729436766, "grad_norm": 240.65060424804688, "learning_rate": 3.2326257811037154e-06, "loss": 17.2182, "step": 326750 }, { "epoch": 0.6600758735763604, "grad_norm": 56.322227478027344, "learning_rate": 3.2322992538448418e-06, "loss": 25.4168, "step": 326760 }, { "epoch": 0.6600960742090443, "grad_norm": 258.57073974609375, "learning_rate": 3.2319727352015286e-06, "loss": 16.7178, "step": 326770 }, { "epoch": 0.6601162748417281, "grad_norm": 350.1269836425781, "learning_rate": 3.2316462251753646e-06, "loss": 17.8799, "step": 326780 }, { "epoch": 0.6601364754744119, "grad_norm": 92.82058715820312, "learning_rate": 3.2313197237679416e-06, "loss": 25.0099, "step": 326790 }, { "epoch": 0.6601566761070957, "grad_norm": 300.45367431640625, "learning_rate": 3.230993230980853e-06, "loss": 16.324, "step": 326800 }, { "epoch": 0.6601768767397795, "grad_norm": 441.9029235839844, "learning_rate": 3.2306667468156895e-06, "loss": 31.9588, "step": 326810 }, { "epoch": 0.6601970773724634, "grad_norm": 825.07958984375, "learning_rate": 3.2303402712740404e-06, "loss": 34.9184, "step": 326820 }, { "epoch": 0.6602172780051471, "grad_norm": 251.2619171142578, "learning_rate": 3.2300138043574992e-06, "loss": 15.6919, "step": 326830 }, { "epoch": 0.6602374786378309, "grad_norm": 338.3788146972656, "learning_rate": 3.2296873460676557e-06, "loss": 19.12, "step": 326840 }, { "epoch": 0.6602576792705147, "grad_norm": 12.16672420501709, "learning_rate": 3.229360896406102e-06, "loss": 10.7813, "step": 326850 }, { "epoch": 0.6602778799031985, "grad_norm": 146.46725463867188, "learning_rate": 3.229034455374426e-06, "loss": 17.0615, "step": 326860 }, { "epoch": 0.6602980805358823, "grad_norm": 215.05221557617188, "learning_rate": 3.2287080229742253e-06, "loss": 19.0325, "step": 326870 }, { "epoch": 0.6603182811685662, "grad_norm": 359.9397888183594, "learning_rate": 3.228381599207083e-06, "loss": 15.0846, "step": 326880 }, { "epoch": 0.66033848180125, "grad_norm": 473.69207763671875, "learning_rate": 3.2280551840745953e-06, "loss": 25.0703, "step": 326890 }, { "epoch": 0.6603586824339338, "grad_norm": 263.0552062988281, "learning_rate": 3.227728777578353e-06, "loss": 9.5725, "step": 326900 }, { "epoch": 0.6603788830666176, "grad_norm": 428.03985595703125, "learning_rate": 3.2274023797199446e-06, "loss": 34.6077, "step": 326910 }, { "epoch": 0.6603990836993014, "grad_norm": 275.6466064453125, "learning_rate": 3.227075990500962e-06, "loss": 10.7516, "step": 326920 }, { "epoch": 0.6604192843319853, "grad_norm": 564.428955078125, "learning_rate": 3.226749609922997e-06, "loss": 22.3673, "step": 326930 }, { "epoch": 0.6604394849646691, "grad_norm": 61.24484634399414, "learning_rate": 3.226423237987637e-06, "loss": 10.2674, "step": 326940 }, { "epoch": 0.6604596855973529, "grad_norm": 424.0412292480469, "learning_rate": 3.226096874696476e-06, "loss": 19.6093, "step": 326950 }, { "epoch": 0.6604798862300367, "grad_norm": 128.7362518310547, "learning_rate": 3.2257705200511035e-06, "loss": 8.9401, "step": 326960 }, { "epoch": 0.6605000868627205, "grad_norm": 249.3404998779297, "learning_rate": 3.2254441740531124e-06, "loss": 19.12, "step": 326970 }, { "epoch": 0.6605202874954044, "grad_norm": 370.0439453125, "learning_rate": 3.225117836704089e-06, "loss": 9.5898, "step": 326980 }, { "epoch": 0.6605404881280882, "grad_norm": 11.001893043518066, "learning_rate": 3.224791508005627e-06, "loss": 12.0435, "step": 326990 }, { "epoch": 0.660560688760772, "grad_norm": 194.4591064453125, "learning_rate": 3.224465187959316e-06, "loss": 29.441, "step": 327000 }, { "epoch": 0.6605808893934558, "grad_norm": 176.9925994873047, "learning_rate": 3.224138876566745e-06, "loss": 7.7294, "step": 327010 }, { "epoch": 0.6606010900261396, "grad_norm": 200.58013916015625, "learning_rate": 3.2238125738295063e-06, "loss": 9.6497, "step": 327020 }, { "epoch": 0.6606212906588235, "grad_norm": 287.3268127441406, "learning_rate": 3.2234862797491905e-06, "loss": 13.3691, "step": 327030 }, { "epoch": 0.6606414912915073, "grad_norm": 183.89501953125, "learning_rate": 3.2231599943273865e-06, "loss": 13.7846, "step": 327040 }, { "epoch": 0.6606616919241911, "grad_norm": 195.7413787841797, "learning_rate": 3.2228337175656856e-06, "loss": 32.1246, "step": 327050 }, { "epoch": 0.6606818925568749, "grad_norm": 172.09678649902344, "learning_rate": 3.222507449465678e-06, "loss": 15.3339, "step": 327060 }, { "epoch": 0.6607020931895587, "grad_norm": 341.8531799316406, "learning_rate": 3.2221811900289524e-06, "loss": 16.6524, "step": 327070 }, { "epoch": 0.6607222938222426, "grad_norm": 161.6706085205078, "learning_rate": 3.221854939257099e-06, "loss": 18.2873, "step": 327080 }, { "epoch": 0.6607424944549263, "grad_norm": 143.52655029296875, "learning_rate": 3.2215286971517123e-06, "loss": 14.6089, "step": 327090 }, { "epoch": 0.6607626950876101, "grad_norm": 497.94110107421875, "learning_rate": 3.2212024637143756e-06, "loss": 14.929, "step": 327100 }, { "epoch": 0.6607828957202939, "grad_norm": 193.99810791015625, "learning_rate": 3.220876238946684e-06, "loss": 17.4537, "step": 327110 }, { "epoch": 0.6608030963529777, "grad_norm": 83.85682678222656, "learning_rate": 3.2205500228502257e-06, "loss": 12.3017, "step": 327120 }, { "epoch": 0.6608232969856616, "grad_norm": 168.2505340576172, "learning_rate": 3.220223815426592e-06, "loss": 15.8399, "step": 327130 }, { "epoch": 0.6608434976183454, "grad_norm": 553.30029296875, "learning_rate": 3.21989761667737e-06, "loss": 18.5892, "step": 327140 }, { "epoch": 0.6608636982510292, "grad_norm": 110.0310287475586, "learning_rate": 3.21957142660415e-06, "loss": 22.5185, "step": 327150 }, { "epoch": 0.660883898883713, "grad_norm": 39.11213684082031, "learning_rate": 3.2192452452085265e-06, "loss": 17.6712, "step": 327160 }, { "epoch": 0.6609040995163968, "grad_norm": 164.17599487304688, "learning_rate": 3.218919072492082e-06, "loss": 23.7526, "step": 327170 }, { "epoch": 0.6609243001490807, "grad_norm": 111.22543334960938, "learning_rate": 3.2185929084564115e-06, "loss": 8.1492, "step": 327180 }, { "epoch": 0.6609445007817645, "grad_norm": 225.23658752441406, "learning_rate": 3.2182667531031044e-06, "loss": 15.2497, "step": 327190 }, { "epoch": 0.6609647014144483, "grad_norm": 235.8397216796875, "learning_rate": 3.217940606433747e-06, "loss": 18.9651, "step": 327200 }, { "epoch": 0.6609849020471321, "grad_norm": 231.15354919433594, "learning_rate": 3.2176144684499315e-06, "loss": 13.3758, "step": 327210 }, { "epoch": 0.6610051026798159, "grad_norm": 387.40655517578125, "learning_rate": 3.2172883391532484e-06, "loss": 13.0113, "step": 327220 }, { "epoch": 0.6610253033124998, "grad_norm": 233.1168670654297, "learning_rate": 3.216962218545284e-06, "loss": 14.1502, "step": 327230 }, { "epoch": 0.6610455039451836, "grad_norm": 474.86578369140625, "learning_rate": 3.2166361066276287e-06, "loss": 30.812, "step": 327240 }, { "epoch": 0.6610657045778674, "grad_norm": 284.93560791015625, "learning_rate": 3.2163100034018735e-06, "loss": 19.1191, "step": 327250 }, { "epoch": 0.6610859052105512, "grad_norm": 0.0, "learning_rate": 3.2159839088696088e-06, "loss": 14.5604, "step": 327260 }, { "epoch": 0.661106105843235, "grad_norm": 427.17974853515625, "learning_rate": 3.21565782303242e-06, "loss": 13.531, "step": 327270 }, { "epoch": 0.6611263064759189, "grad_norm": 100.81487274169922, "learning_rate": 3.2153317458918997e-06, "loss": 22.8328, "step": 327280 }, { "epoch": 0.6611465071086027, "grad_norm": 315.03594970703125, "learning_rate": 3.2150056774496363e-06, "loss": 19.3659, "step": 327290 }, { "epoch": 0.6611667077412865, "grad_norm": 382.0511779785156, "learning_rate": 3.2146796177072183e-06, "loss": 18.2029, "step": 327300 }, { "epoch": 0.6611869083739703, "grad_norm": 11.172293663024902, "learning_rate": 3.214353566666234e-06, "loss": 15.5864, "step": 327310 }, { "epoch": 0.6612071090066541, "grad_norm": 299.9978332519531, "learning_rate": 3.2140275243282765e-06, "loss": 17.9478, "step": 327320 }, { "epoch": 0.661227309639338, "grad_norm": 191.23272705078125, "learning_rate": 3.2137014906949295e-06, "loss": 18.8358, "step": 327330 }, { "epoch": 0.6612475102720217, "grad_norm": 330.0998229980469, "learning_rate": 3.2133754657677857e-06, "loss": 11.7128, "step": 327340 }, { "epoch": 0.6612677109047055, "grad_norm": 202.90525817871094, "learning_rate": 3.2130494495484345e-06, "loss": 22.7998, "step": 327350 }, { "epoch": 0.6612879115373893, "grad_norm": 355.19244384765625, "learning_rate": 3.2127234420384624e-06, "loss": 18.4039, "step": 327360 }, { "epoch": 0.6613081121700731, "grad_norm": 313.8470153808594, "learning_rate": 3.212397443239459e-06, "loss": 13.0787, "step": 327370 }, { "epoch": 0.661328312802757, "grad_norm": 214.9947509765625, "learning_rate": 3.212071453153015e-06, "loss": 24.2101, "step": 327380 }, { "epoch": 0.6613485134354408, "grad_norm": 360.4667053222656, "learning_rate": 3.2117454717807174e-06, "loss": 12.0551, "step": 327390 }, { "epoch": 0.6613687140681246, "grad_norm": 65.8126449584961, "learning_rate": 3.211419499124154e-06, "loss": 10.2824, "step": 327400 }, { "epoch": 0.6613889147008084, "grad_norm": 326.3389587402344, "learning_rate": 3.2110935351849158e-06, "loss": 17.3033, "step": 327410 }, { "epoch": 0.6614091153334922, "grad_norm": 163.67202758789062, "learning_rate": 3.2107675799645923e-06, "loss": 18.1344, "step": 327420 }, { "epoch": 0.661429315966176, "grad_norm": 562.8267822265625, "learning_rate": 3.210441633464769e-06, "loss": 17.3613, "step": 327430 }, { "epoch": 0.6614495165988599, "grad_norm": 332.34039306640625, "learning_rate": 3.2101156956870367e-06, "loss": 19.9791, "step": 327440 }, { "epoch": 0.6614697172315437, "grad_norm": 141.43856811523438, "learning_rate": 3.209789766632984e-06, "loss": 14.0907, "step": 327450 }, { "epoch": 0.6614899178642275, "grad_norm": 274.4481201171875, "learning_rate": 3.209463846304198e-06, "loss": 16.5337, "step": 327460 }, { "epoch": 0.6615101184969113, "grad_norm": 209.8513946533203, "learning_rate": 3.209137934702267e-06, "loss": 15.4026, "step": 327470 }, { "epoch": 0.6615303191295951, "grad_norm": 349.7495422363281, "learning_rate": 3.2088120318287843e-06, "loss": 11.5828, "step": 327480 }, { "epoch": 0.661550519762279, "grad_norm": 89.10489654541016, "learning_rate": 3.2084861376853304e-06, "loss": 23.0678, "step": 327490 }, { "epoch": 0.6615707203949628, "grad_norm": 401.8861083984375, "learning_rate": 3.2081602522734987e-06, "loss": 9.7407, "step": 327500 }, { "epoch": 0.6615909210276466, "grad_norm": 340.9719543457031, "learning_rate": 3.2078343755948783e-06, "loss": 17.8865, "step": 327510 }, { "epoch": 0.6616111216603304, "grad_norm": 1.0342674255371094, "learning_rate": 3.2075085076510548e-06, "loss": 16.5126, "step": 327520 }, { "epoch": 0.6616313222930142, "grad_norm": 344.8098449707031, "learning_rate": 3.207182648443617e-06, "loss": 11.8553, "step": 327530 }, { "epoch": 0.6616515229256981, "grad_norm": 128.34625244140625, "learning_rate": 3.206856797974155e-06, "loss": 12.8328, "step": 327540 }, { "epoch": 0.6616717235583819, "grad_norm": 13.705146789550781, "learning_rate": 3.2065309562442536e-06, "loss": 20.8026, "step": 327550 }, { "epoch": 0.6616919241910657, "grad_norm": 137.8618621826172, "learning_rate": 3.2062051232555024e-06, "loss": 10.9887, "step": 327560 }, { "epoch": 0.6617121248237495, "grad_norm": 385.4226989746094, "learning_rate": 3.205879299009491e-06, "loss": 16.0471, "step": 327570 }, { "epoch": 0.6617323254564333, "grad_norm": 367.6513366699219, "learning_rate": 3.2055534835078075e-06, "loss": 22.0955, "step": 327580 }, { "epoch": 0.6617525260891172, "grad_norm": 181.55325317382812, "learning_rate": 3.205227676752037e-06, "loss": 20.2743, "step": 327590 }, { "epoch": 0.6617727267218009, "grad_norm": 175.49917602539062, "learning_rate": 3.2049018787437693e-06, "loss": 10.8197, "step": 327600 }, { "epoch": 0.6617929273544847, "grad_norm": 374.81976318359375, "learning_rate": 3.2045760894845932e-06, "loss": 30.8354, "step": 327610 }, { "epoch": 0.6618131279871685, "grad_norm": 181.06578063964844, "learning_rate": 3.2042503089760934e-06, "loss": 25.7601, "step": 327620 }, { "epoch": 0.6618333286198523, "grad_norm": 108.80001831054688, "learning_rate": 3.2039245372198613e-06, "loss": 11.1302, "step": 327630 }, { "epoch": 0.6618535292525362, "grad_norm": 225.58242797851562, "learning_rate": 3.203598774217484e-06, "loss": 13.5496, "step": 327640 }, { "epoch": 0.66187372988522, "grad_norm": 153.93894958496094, "learning_rate": 3.2032730199705477e-06, "loss": 11.1781, "step": 327650 }, { "epoch": 0.6618939305179038, "grad_norm": 425.7506103515625, "learning_rate": 3.20294727448064e-06, "loss": 35.8962, "step": 327660 }, { "epoch": 0.6619141311505876, "grad_norm": 253.01351928710938, "learning_rate": 3.2026215377493507e-06, "loss": 24.0507, "step": 327670 }, { "epoch": 0.6619343317832714, "grad_norm": 196.513427734375, "learning_rate": 3.2022958097782646e-06, "loss": 36.8028, "step": 327680 }, { "epoch": 0.6619545324159553, "grad_norm": 686.6356811523438, "learning_rate": 3.20197009056897e-06, "loss": 21.4402, "step": 327690 }, { "epoch": 0.6619747330486391, "grad_norm": 487.2127990722656, "learning_rate": 3.201644380123056e-06, "loss": 19.8976, "step": 327700 }, { "epoch": 0.6619949336813229, "grad_norm": 238.9039764404297, "learning_rate": 3.201318678442111e-06, "loss": 35.8058, "step": 327710 }, { "epoch": 0.6620151343140067, "grad_norm": 225.4444122314453, "learning_rate": 3.2009929855277187e-06, "loss": 34.4569, "step": 327720 }, { "epoch": 0.6620353349466905, "grad_norm": 99.00267028808594, "learning_rate": 3.200667301381468e-06, "loss": 20.4052, "step": 327730 }, { "epoch": 0.6620555355793744, "grad_norm": 374.1791076660156, "learning_rate": 3.2003416260049493e-06, "loss": 20.204, "step": 327740 }, { "epoch": 0.6620757362120582, "grad_norm": 396.3586120605469, "learning_rate": 3.2000159593997447e-06, "loss": 20.7164, "step": 327750 }, { "epoch": 0.662095936844742, "grad_norm": 151.03948974609375, "learning_rate": 3.1996903015674434e-06, "loss": 10.998, "step": 327760 }, { "epoch": 0.6621161374774258, "grad_norm": 448.80084228515625, "learning_rate": 3.1993646525096368e-06, "loss": 14.9308, "step": 327770 }, { "epoch": 0.6621363381101096, "grad_norm": 252.24801635742188, "learning_rate": 3.1990390122279046e-06, "loss": 12.9454, "step": 327780 }, { "epoch": 0.6621565387427935, "grad_norm": 284.669189453125, "learning_rate": 3.198713380723839e-06, "loss": 9.1188, "step": 327790 }, { "epoch": 0.6621767393754773, "grad_norm": 150.355224609375, "learning_rate": 3.1983877579990276e-06, "loss": 22.5252, "step": 327800 }, { "epoch": 0.6621969400081611, "grad_norm": 338.27960205078125, "learning_rate": 3.198062144055054e-06, "loss": 10.9142, "step": 327810 }, { "epoch": 0.6622171406408449, "grad_norm": 682.7586059570312, "learning_rate": 3.1977365388935076e-06, "loss": 19.279, "step": 327820 }, { "epoch": 0.6622373412735287, "grad_norm": 78.7805404663086, "learning_rate": 3.1974109425159754e-06, "loss": 10.6139, "step": 327830 }, { "epoch": 0.6622575419062126, "grad_norm": 394.5622863769531, "learning_rate": 3.1970853549240425e-06, "loss": 17.5633, "step": 327840 }, { "epoch": 0.6622777425388963, "grad_norm": 371.48724365234375, "learning_rate": 3.196759776119296e-06, "loss": 33.9955, "step": 327850 }, { "epoch": 0.6622979431715801, "grad_norm": 195.62338256835938, "learning_rate": 3.1964342061033247e-06, "loss": 11.7567, "step": 327860 }, { "epoch": 0.6623181438042639, "grad_norm": 537.2147827148438, "learning_rate": 3.1961086448777157e-06, "loss": 21.3133, "step": 327870 }, { "epoch": 0.6623383444369477, "grad_norm": 16.89143180847168, "learning_rate": 3.1957830924440524e-06, "loss": 13.7263, "step": 327880 }, { "epoch": 0.6623585450696315, "grad_norm": 128.0252227783203, "learning_rate": 3.195457548803925e-06, "loss": 29.5408, "step": 327890 }, { "epoch": 0.6623787457023154, "grad_norm": 406.5066833496094, "learning_rate": 3.195132013958918e-06, "loss": 14.6329, "step": 327900 }, { "epoch": 0.6623989463349992, "grad_norm": 467.2767639160156, "learning_rate": 3.1948064879106187e-06, "loss": 14.9914, "step": 327910 }, { "epoch": 0.662419146967683, "grad_norm": 691.7493896484375, "learning_rate": 3.1944809706606123e-06, "loss": 34.614, "step": 327920 }, { "epoch": 0.6624393476003668, "grad_norm": 33.41098403930664, "learning_rate": 3.1941554622104897e-06, "loss": 16.2717, "step": 327930 }, { "epoch": 0.6624595482330506, "grad_norm": 435.9576110839844, "learning_rate": 3.1938299625618313e-06, "loss": 18.7853, "step": 327940 }, { "epoch": 0.6624797488657345, "grad_norm": 142.20916748046875, "learning_rate": 3.193504471716228e-06, "loss": 8.8489, "step": 327950 }, { "epoch": 0.6624999494984183, "grad_norm": 217.9789276123047, "learning_rate": 3.1931789896752654e-06, "loss": 15.5868, "step": 327960 }, { "epoch": 0.6625201501311021, "grad_norm": 783.4512329101562, "learning_rate": 3.192853516440528e-06, "loss": 22.5112, "step": 327970 }, { "epoch": 0.6625403507637859, "grad_norm": 318.3432312011719, "learning_rate": 3.192528052013604e-06, "loss": 11.8663, "step": 327980 }, { "epoch": 0.6625605513964697, "grad_norm": 242.25428771972656, "learning_rate": 3.1922025963960796e-06, "loss": 20.828, "step": 327990 }, { "epoch": 0.6625807520291536, "grad_norm": 280.2621154785156, "learning_rate": 3.1918771495895395e-06, "loss": 29.4464, "step": 328000 }, { "epoch": 0.6626009526618374, "grad_norm": 276.1973876953125, "learning_rate": 3.1915517115955704e-06, "loss": 16.4701, "step": 328010 }, { "epoch": 0.6626211532945212, "grad_norm": 256.4225769042969, "learning_rate": 3.1912262824157592e-06, "loss": 16.2036, "step": 328020 }, { "epoch": 0.662641353927205, "grad_norm": 98.25212860107422, "learning_rate": 3.1909008620516933e-06, "loss": 14.8008, "step": 328030 }, { "epoch": 0.6626615545598888, "grad_norm": 221.53724670410156, "learning_rate": 3.190575450504956e-06, "loss": 9.205, "step": 328040 }, { "epoch": 0.6626817551925727, "grad_norm": 423.1664733886719, "learning_rate": 3.190250047777134e-06, "loss": 28.2495, "step": 328050 }, { "epoch": 0.6627019558252565, "grad_norm": 154.92788696289062, "learning_rate": 3.1899246538698157e-06, "loss": 12.0119, "step": 328060 }, { "epoch": 0.6627221564579403, "grad_norm": 98.53589630126953, "learning_rate": 3.1895992687845836e-06, "loss": 11.4873, "step": 328070 }, { "epoch": 0.6627423570906241, "grad_norm": 547.826416015625, "learning_rate": 3.1892738925230236e-06, "loss": 17.4032, "step": 328080 }, { "epoch": 0.6627625577233079, "grad_norm": 44.38571548461914, "learning_rate": 3.188948525086727e-06, "loss": 20.8625, "step": 328090 }, { "epoch": 0.6627827583559918, "grad_norm": 213.43472290039062, "learning_rate": 3.188623166477272e-06, "loss": 17.5372, "step": 328100 }, { "epoch": 0.6628029589886755, "grad_norm": 163.25497436523438, "learning_rate": 3.188297816696249e-06, "loss": 25.3139, "step": 328110 }, { "epoch": 0.6628231596213593, "grad_norm": 116.71949768066406, "learning_rate": 3.187972475745244e-06, "loss": 17.6574, "step": 328120 }, { "epoch": 0.6628433602540431, "grad_norm": 115.16315460205078, "learning_rate": 3.1876471436258407e-06, "loss": 20.2535, "step": 328130 }, { "epoch": 0.6628635608867269, "grad_norm": 98.1766128540039, "learning_rate": 3.1873218203396246e-06, "loss": 24.0647, "step": 328140 }, { "epoch": 0.6628837615194108, "grad_norm": 102.27338409423828, "learning_rate": 3.1869965058881836e-06, "loss": 20.1172, "step": 328150 }, { "epoch": 0.6629039621520946, "grad_norm": 174.74249267578125, "learning_rate": 3.1866712002731004e-06, "loss": 16.2557, "step": 328160 }, { "epoch": 0.6629241627847784, "grad_norm": 325.7478942871094, "learning_rate": 3.186345903495961e-06, "loss": 10.9675, "step": 328170 }, { "epoch": 0.6629443634174622, "grad_norm": 190.34957885742188, "learning_rate": 3.1860206155583527e-06, "loss": 32.621, "step": 328180 }, { "epoch": 0.662964564050146, "grad_norm": 174.83380126953125, "learning_rate": 3.185695336461861e-06, "loss": 14.5338, "step": 328190 }, { "epoch": 0.6629847646828299, "grad_norm": 234.81553649902344, "learning_rate": 3.185370066208069e-06, "loss": 6.9583, "step": 328200 }, { "epoch": 0.6630049653155137, "grad_norm": 69.43287658691406, "learning_rate": 3.185044804798564e-06, "loss": 17.2796, "step": 328210 }, { "epoch": 0.6630251659481975, "grad_norm": 562.757568359375, "learning_rate": 3.1847195522349305e-06, "loss": 27.6862, "step": 328220 }, { "epoch": 0.6630453665808813, "grad_norm": 18.539575576782227, "learning_rate": 3.1843943085187527e-06, "loss": 17.3579, "step": 328230 }, { "epoch": 0.6630655672135651, "grad_norm": 91.31442260742188, "learning_rate": 3.1840690736516166e-06, "loss": 9.3702, "step": 328240 }, { "epoch": 0.663085767846249, "grad_norm": 277.3453369140625, "learning_rate": 3.183743847635109e-06, "loss": 15.1834, "step": 328250 }, { "epoch": 0.6631059684789328, "grad_norm": 14.979848861694336, "learning_rate": 3.1834186304708126e-06, "loss": 18.551, "step": 328260 }, { "epoch": 0.6631261691116166, "grad_norm": 61.85576248168945, "learning_rate": 3.183093422160314e-06, "loss": 12.8246, "step": 328270 }, { "epoch": 0.6631463697443004, "grad_norm": 218.59933471679688, "learning_rate": 3.182768222705198e-06, "loss": 12.6696, "step": 328280 }, { "epoch": 0.6631665703769842, "grad_norm": 46.90803146362305, "learning_rate": 3.182443032107049e-06, "loss": 16.5812, "step": 328290 }, { "epoch": 0.663186771009668, "grad_norm": 219.76593017578125, "learning_rate": 3.1821178503674515e-06, "loss": 11.0109, "step": 328300 }, { "epoch": 0.6632069716423519, "grad_norm": 290.1388854980469, "learning_rate": 3.1817926774879903e-06, "loss": 29.0246, "step": 328310 }, { "epoch": 0.6632271722750357, "grad_norm": 317.0000305175781, "learning_rate": 3.1814675134702534e-06, "loss": 19.3551, "step": 328320 }, { "epoch": 0.6632473729077195, "grad_norm": 368.84783935546875, "learning_rate": 3.181142358315822e-06, "loss": 19.7271, "step": 328330 }, { "epoch": 0.6632675735404033, "grad_norm": 629.0137329101562, "learning_rate": 3.1808172120262824e-06, "loss": 30.8046, "step": 328340 }, { "epoch": 0.6632877741730872, "grad_norm": 239.26258850097656, "learning_rate": 3.1804920746032197e-06, "loss": 25.2518, "step": 328350 }, { "epoch": 0.6633079748057709, "grad_norm": 242.98611450195312, "learning_rate": 3.1801669460482176e-06, "loss": 34.7126, "step": 328360 }, { "epoch": 0.6633281754384547, "grad_norm": 292.62481689453125, "learning_rate": 3.1798418263628595e-06, "loss": 21.8885, "step": 328370 }, { "epoch": 0.6633483760711385, "grad_norm": 355.77105712890625, "learning_rate": 3.179516715548735e-06, "loss": 19.9501, "step": 328380 }, { "epoch": 0.6633685767038223, "grad_norm": 205.372314453125, "learning_rate": 3.179191613607422e-06, "loss": 12.2449, "step": 328390 }, { "epoch": 0.6633887773365061, "grad_norm": 123.54949188232422, "learning_rate": 3.178866520540509e-06, "loss": 25.0976, "step": 328400 }, { "epoch": 0.66340897796919, "grad_norm": 25.5798282623291, "learning_rate": 3.1785414363495808e-06, "loss": 18.2194, "step": 328410 }, { "epoch": 0.6634291786018738, "grad_norm": 291.1321716308594, "learning_rate": 3.17821636103622e-06, "loss": 15.4615, "step": 328420 }, { "epoch": 0.6634493792345576, "grad_norm": 176.7991485595703, "learning_rate": 3.1778912946020114e-06, "loss": 32.0946, "step": 328430 }, { "epoch": 0.6634695798672414, "grad_norm": 426.8163146972656, "learning_rate": 3.1775662370485406e-06, "loss": 19.7823, "step": 328440 }, { "epoch": 0.6634897804999252, "grad_norm": 183.1822509765625, "learning_rate": 3.17724118837739e-06, "loss": 19.427, "step": 328450 }, { "epoch": 0.6635099811326091, "grad_norm": 135.07452392578125, "learning_rate": 3.1769161485901445e-06, "loss": 14.2264, "step": 328460 }, { "epoch": 0.6635301817652929, "grad_norm": 505.7695617675781, "learning_rate": 3.176591117688389e-06, "loss": 15.1813, "step": 328470 }, { "epoch": 0.6635503823979767, "grad_norm": 350.0630187988281, "learning_rate": 3.176266095673708e-06, "loss": 10.0568, "step": 328480 }, { "epoch": 0.6635705830306605, "grad_norm": 345.45660400390625, "learning_rate": 3.175941082547684e-06, "loss": 28.3207, "step": 328490 }, { "epoch": 0.6635907836633443, "grad_norm": 347.3276672363281, "learning_rate": 3.1756160783119015e-06, "loss": 19.0781, "step": 328500 }, { "epoch": 0.6636109842960282, "grad_norm": 191.92466735839844, "learning_rate": 3.175291082967947e-06, "loss": 29.0587, "step": 328510 }, { "epoch": 0.663631184928712, "grad_norm": 17.330217361450195, "learning_rate": 3.1749660965174007e-06, "loss": 15.0873, "step": 328520 }, { "epoch": 0.6636513855613958, "grad_norm": 1466.05126953125, "learning_rate": 3.1746411189618478e-06, "loss": 16.2506, "step": 328530 }, { "epoch": 0.6636715861940796, "grad_norm": 129.29611206054688, "learning_rate": 3.174316150302875e-06, "loss": 23.9151, "step": 328540 }, { "epoch": 0.6636917868267634, "grad_norm": 292.1615905761719, "learning_rate": 3.1739911905420617e-06, "loss": 25.0983, "step": 328550 }, { "epoch": 0.6637119874594473, "grad_norm": 361.19378662109375, "learning_rate": 3.1736662396809936e-06, "loss": 15.8222, "step": 328560 }, { "epoch": 0.6637321880921311, "grad_norm": 341.584716796875, "learning_rate": 3.173341297721257e-06, "loss": 12.7655, "step": 328570 }, { "epoch": 0.6637523887248149, "grad_norm": 101.7635726928711, "learning_rate": 3.1730163646644317e-06, "loss": 17.3331, "step": 328580 }, { "epoch": 0.6637725893574987, "grad_norm": 167.9801483154297, "learning_rate": 3.1726914405121034e-06, "loss": 6.8435, "step": 328590 }, { "epoch": 0.6637927899901825, "grad_norm": 211.2415008544922, "learning_rate": 3.1723665252658564e-06, "loss": 7.537, "step": 328600 }, { "epoch": 0.6638129906228664, "grad_norm": 635.2185668945312, "learning_rate": 3.172041618927272e-06, "loss": 25.6663, "step": 328610 }, { "epoch": 0.6638331912555501, "grad_norm": 469.54364013671875, "learning_rate": 3.171716721497934e-06, "loss": 12.983, "step": 328620 }, { "epoch": 0.6638533918882339, "grad_norm": 423.7010498046875, "learning_rate": 3.171391832979428e-06, "loss": 19.9065, "step": 328630 }, { "epoch": 0.6638735925209177, "grad_norm": 305.1524963378906, "learning_rate": 3.171066953373338e-06, "loss": 15.8969, "step": 328640 }, { "epoch": 0.6638937931536015, "grad_norm": 816.8417358398438, "learning_rate": 3.170742082681244e-06, "loss": 20.4286, "step": 328650 }, { "epoch": 0.6639139937862854, "grad_norm": 433.6932678222656, "learning_rate": 3.1704172209047324e-06, "loss": 10.186, "step": 328660 }, { "epoch": 0.6639341944189692, "grad_norm": 623.5365600585938, "learning_rate": 3.1700923680453855e-06, "loss": 20.6232, "step": 328670 }, { "epoch": 0.663954395051653, "grad_norm": 346.6846618652344, "learning_rate": 3.1697675241047852e-06, "loss": 16.0772, "step": 328680 }, { "epoch": 0.6639745956843368, "grad_norm": 155.77227783203125, "learning_rate": 3.1694426890845155e-06, "loss": 14.2962, "step": 328690 }, { "epoch": 0.6639947963170206, "grad_norm": 330.6006774902344, "learning_rate": 3.169117862986163e-06, "loss": 24.6247, "step": 328700 }, { "epoch": 0.6640149969497045, "grad_norm": 432.1536560058594, "learning_rate": 3.168793045811305e-06, "loss": 18.6168, "step": 328710 }, { "epoch": 0.6640351975823883, "grad_norm": 258.6457824707031, "learning_rate": 3.1684682375615283e-06, "loss": 12.7236, "step": 328720 }, { "epoch": 0.6640553982150721, "grad_norm": 401.7471618652344, "learning_rate": 3.168143438238417e-06, "loss": 30.0694, "step": 328730 }, { "epoch": 0.6640755988477559, "grad_norm": 398.5086975097656, "learning_rate": 3.1678186478435508e-06, "loss": 13.64, "step": 328740 }, { "epoch": 0.6640957994804397, "grad_norm": 101.19783782958984, "learning_rate": 3.167493866378514e-06, "loss": 10.1689, "step": 328750 }, { "epoch": 0.6641160001131236, "grad_norm": 243.6019744873047, "learning_rate": 3.1671690938448895e-06, "loss": 17.3492, "step": 328760 }, { "epoch": 0.6641362007458074, "grad_norm": 362.1288757324219, "learning_rate": 3.166844330244263e-06, "loss": 13.4636, "step": 328770 }, { "epoch": 0.6641564013784912, "grad_norm": 69.41883087158203, "learning_rate": 3.166519575578213e-06, "loss": 15.4202, "step": 328780 }, { "epoch": 0.664176602011175, "grad_norm": 181.4620361328125, "learning_rate": 3.1661948298483243e-06, "loss": 13.2216, "step": 328790 }, { "epoch": 0.6641968026438588, "grad_norm": 263.1123352050781, "learning_rate": 3.16587009305618e-06, "loss": 18.9125, "step": 328800 }, { "epoch": 0.6642170032765427, "grad_norm": 153.07974243164062, "learning_rate": 3.165545365203363e-06, "loss": 12.4234, "step": 328810 }, { "epoch": 0.6642372039092265, "grad_norm": 325.9620666503906, "learning_rate": 3.1652206462914542e-06, "loss": 20.599, "step": 328820 }, { "epoch": 0.6642574045419103, "grad_norm": 185.7403106689453, "learning_rate": 3.164895936322039e-06, "loss": 18.2439, "step": 328830 }, { "epoch": 0.6642776051745941, "grad_norm": 389.2598571777344, "learning_rate": 3.1645712352966967e-06, "loss": 19.838, "step": 328840 }, { "epoch": 0.6642978058072779, "grad_norm": 252.95315551757812, "learning_rate": 3.164246543217011e-06, "loss": 19.7948, "step": 328850 }, { "epoch": 0.6643180064399618, "grad_norm": 115.37670135498047, "learning_rate": 3.1639218600845673e-06, "loss": 5.3637, "step": 328860 }, { "epoch": 0.6643382070726456, "grad_norm": 239.08619689941406, "learning_rate": 3.1635971859009444e-06, "loss": 14.7204, "step": 328870 }, { "epoch": 0.6643584077053293, "grad_norm": 407.8770751953125, "learning_rate": 3.1632725206677264e-06, "loss": 11.223, "step": 328880 }, { "epoch": 0.6643786083380131, "grad_norm": 176.52923583984375, "learning_rate": 3.1629478643864963e-06, "loss": 15.2725, "step": 328890 }, { "epoch": 0.6643988089706969, "grad_norm": 336.4420166015625, "learning_rate": 3.1626232170588343e-06, "loss": 23.5987, "step": 328900 }, { "epoch": 0.6644190096033807, "grad_norm": 364.7860107421875, "learning_rate": 3.1622985786863236e-06, "loss": 28.8834, "step": 328910 }, { "epoch": 0.6644392102360646, "grad_norm": 42.82365798950195, "learning_rate": 3.1619739492705464e-06, "loss": 8.593, "step": 328920 }, { "epoch": 0.6644594108687484, "grad_norm": 44.66585922241211, "learning_rate": 3.1616493288130866e-06, "loss": 9.9886, "step": 328930 }, { "epoch": 0.6644796115014322, "grad_norm": 272.505859375, "learning_rate": 3.1613247173155247e-06, "loss": 18.5452, "step": 328940 }, { "epoch": 0.664499812134116, "grad_norm": 330.61322021484375, "learning_rate": 3.161000114779443e-06, "loss": 13.9614, "step": 328950 }, { "epoch": 0.6645200127667998, "grad_norm": 470.10638427734375, "learning_rate": 3.1606755212064246e-06, "loss": 17.2895, "step": 328960 }, { "epoch": 0.6645402133994837, "grad_norm": 460.28912353515625, "learning_rate": 3.1603509365980495e-06, "loss": 7.4429, "step": 328970 }, { "epoch": 0.6645604140321675, "grad_norm": 224.4679718017578, "learning_rate": 3.1600263609559005e-06, "loss": 10.7333, "step": 328980 }, { "epoch": 0.6645806146648513, "grad_norm": 132.04293823242188, "learning_rate": 3.159701794281561e-06, "loss": 14.2284, "step": 328990 }, { "epoch": 0.6646008152975351, "grad_norm": 555.3931884765625, "learning_rate": 3.1593772365766107e-06, "loss": 15.4211, "step": 329000 }, { "epoch": 0.6646210159302189, "grad_norm": 159.13580322265625, "learning_rate": 3.1590526878426326e-06, "loss": 22.0639, "step": 329010 }, { "epoch": 0.6646412165629028, "grad_norm": 373.58697509765625, "learning_rate": 3.1587281480812093e-06, "loss": 17.4181, "step": 329020 }, { "epoch": 0.6646614171955866, "grad_norm": 338.4696350097656, "learning_rate": 3.1584036172939213e-06, "loss": 29.631, "step": 329030 }, { "epoch": 0.6646816178282704, "grad_norm": 130.6535186767578, "learning_rate": 3.1580790954823505e-06, "loss": 11.8957, "step": 329040 }, { "epoch": 0.6647018184609542, "grad_norm": 434.26971435546875, "learning_rate": 3.157754582648079e-06, "loss": 19.9165, "step": 329050 }, { "epoch": 0.664722019093638, "grad_norm": 309.7406005859375, "learning_rate": 3.1574300787926883e-06, "loss": 11.8892, "step": 329060 }, { "epoch": 0.6647422197263219, "grad_norm": 301.3302917480469, "learning_rate": 3.1571055839177583e-06, "loss": 25.3925, "step": 329070 }, { "epoch": 0.6647624203590057, "grad_norm": 335.4089660644531, "learning_rate": 3.156781098024874e-06, "loss": 13.1523, "step": 329080 }, { "epoch": 0.6647826209916895, "grad_norm": 33.909812927246094, "learning_rate": 3.156456621115615e-06, "loss": 8.7046, "step": 329090 }, { "epoch": 0.6648028216243733, "grad_norm": 103.00626373291016, "learning_rate": 3.1561321531915622e-06, "loss": 18.4697, "step": 329100 }, { "epoch": 0.6648230222570571, "grad_norm": 45.780765533447266, "learning_rate": 3.155807694254298e-06, "loss": 21.9889, "step": 329110 }, { "epoch": 0.664843222889741, "grad_norm": 429.17718505859375, "learning_rate": 3.155483244305404e-06, "loss": 19.8272, "step": 329120 }, { "epoch": 0.6648634235224247, "grad_norm": 141.12945556640625, "learning_rate": 3.15515880334646e-06, "loss": 20.9811, "step": 329130 }, { "epoch": 0.6648836241551085, "grad_norm": 401.8570251464844, "learning_rate": 3.1548343713790474e-06, "loss": 13.814, "step": 329140 }, { "epoch": 0.6649038247877923, "grad_norm": 72.95146179199219, "learning_rate": 3.1545099484047514e-06, "loss": 9.2453, "step": 329150 }, { "epoch": 0.6649240254204761, "grad_norm": 225.23095703125, "learning_rate": 3.154185534425147e-06, "loss": 21.134, "step": 329160 }, { "epoch": 0.66494422605316, "grad_norm": 1317.222900390625, "learning_rate": 3.153861129441819e-06, "loss": 18.031, "step": 329170 }, { "epoch": 0.6649644266858438, "grad_norm": 202.93807983398438, "learning_rate": 3.1535367334563493e-06, "loss": 10.3172, "step": 329180 }, { "epoch": 0.6649846273185276, "grad_norm": 320.64837646484375, "learning_rate": 3.153212346470317e-06, "loss": 16.3942, "step": 329190 }, { "epoch": 0.6650048279512114, "grad_norm": 371.21270751953125, "learning_rate": 3.152887968485303e-06, "loss": 15.8807, "step": 329200 }, { "epoch": 0.6650250285838952, "grad_norm": 513.0034790039062, "learning_rate": 3.1525635995028884e-06, "loss": 15.6988, "step": 329210 }, { "epoch": 0.665045229216579, "grad_norm": 265.98583984375, "learning_rate": 3.1522392395246584e-06, "loss": 13.8008, "step": 329220 }, { "epoch": 0.6650654298492629, "grad_norm": 267.44720458984375, "learning_rate": 3.151914888552186e-06, "loss": 12.9357, "step": 329230 }, { "epoch": 0.6650856304819467, "grad_norm": 187.77993774414062, "learning_rate": 3.1515905465870576e-06, "loss": 17.8416, "step": 329240 }, { "epoch": 0.6651058311146305, "grad_norm": 118.65718078613281, "learning_rate": 3.151266213630854e-06, "loss": 22.3871, "step": 329250 }, { "epoch": 0.6651260317473143, "grad_norm": 419.2017822265625, "learning_rate": 3.150941889685154e-06, "loss": 23.6734, "step": 329260 }, { "epoch": 0.6651462323799981, "grad_norm": 264.1424560546875, "learning_rate": 3.1506175747515384e-06, "loss": 18.4256, "step": 329270 }, { "epoch": 0.665166433012682, "grad_norm": 536.053955078125, "learning_rate": 3.1502932688315897e-06, "loss": 22.1468, "step": 329280 }, { "epoch": 0.6651866336453658, "grad_norm": 327.3392333984375, "learning_rate": 3.1499689719268854e-06, "loss": 18.1499, "step": 329290 }, { "epoch": 0.6652068342780496, "grad_norm": 375.1274719238281, "learning_rate": 3.149644684039008e-06, "loss": 19.9864, "step": 329300 }, { "epoch": 0.6652270349107334, "grad_norm": 371.9472351074219, "learning_rate": 3.1493204051695407e-06, "loss": 10.9677, "step": 329310 }, { "epoch": 0.6652472355434172, "grad_norm": 463.853271484375, "learning_rate": 3.148996135320058e-06, "loss": 24.6589, "step": 329320 }, { "epoch": 0.6652674361761011, "grad_norm": 464.0895080566406, "learning_rate": 3.148671874492145e-06, "loss": 26.1861, "step": 329330 }, { "epoch": 0.6652876368087849, "grad_norm": 203.8291473388672, "learning_rate": 3.1483476226873822e-06, "loss": 14.7731, "step": 329340 }, { "epoch": 0.6653078374414687, "grad_norm": 269.4259338378906, "learning_rate": 3.1480233799073467e-06, "loss": 16.5227, "step": 329350 }, { "epoch": 0.6653280380741525, "grad_norm": 175.5221710205078, "learning_rate": 3.147699146153621e-06, "loss": 12.0148, "step": 329360 }, { "epoch": 0.6653482387068363, "grad_norm": 51.1705207824707, "learning_rate": 3.147374921427784e-06, "loss": 11.8172, "step": 329370 }, { "epoch": 0.6653684393395202, "grad_norm": 174.22669982910156, "learning_rate": 3.14705070573142e-06, "loss": 10.6218, "step": 329380 }, { "epoch": 0.6653886399722039, "grad_norm": 293.23675537109375, "learning_rate": 3.146726499066103e-06, "loss": 15.1288, "step": 329390 }, { "epoch": 0.6654088406048877, "grad_norm": 549.660888671875, "learning_rate": 3.1464023014334164e-06, "loss": 19.6882, "step": 329400 }, { "epoch": 0.6654290412375715, "grad_norm": 178.77833557128906, "learning_rate": 3.146078112834943e-06, "loss": 17.0214, "step": 329410 }, { "epoch": 0.6654492418702553, "grad_norm": 353.603759765625, "learning_rate": 3.1457539332722577e-06, "loss": 18.9934, "step": 329420 }, { "epoch": 0.6654694425029392, "grad_norm": 478.4796447753906, "learning_rate": 3.145429762746943e-06, "loss": 19.2612, "step": 329430 }, { "epoch": 0.665489643135623, "grad_norm": 125.69647979736328, "learning_rate": 3.1451056012605796e-06, "loss": 10.6766, "step": 329440 }, { "epoch": 0.6655098437683068, "grad_norm": 424.0111389160156, "learning_rate": 3.144781448814746e-06, "loss": 21.525, "step": 329450 }, { "epoch": 0.6655300444009906, "grad_norm": 417.8822326660156, "learning_rate": 3.1444573054110216e-06, "loss": 13.5652, "step": 329460 }, { "epoch": 0.6655502450336744, "grad_norm": 300.3384704589844, "learning_rate": 3.14413317105099e-06, "loss": 30.4885, "step": 329470 }, { "epoch": 0.6655704456663583, "grad_norm": 323.8936767578125, "learning_rate": 3.1438090457362253e-06, "loss": 22.0413, "step": 329480 }, { "epoch": 0.6655906462990421, "grad_norm": 697.3681640625, "learning_rate": 3.1434849294683113e-06, "loss": 14.8028, "step": 329490 }, { "epoch": 0.6656108469317259, "grad_norm": 165.16598510742188, "learning_rate": 3.1431608222488276e-06, "loss": 17.4015, "step": 329500 }, { "epoch": 0.6656310475644097, "grad_norm": 89.66180419921875, "learning_rate": 3.1428367240793513e-06, "loss": 19.0066, "step": 329510 }, { "epoch": 0.6656512481970935, "grad_norm": 269.5770263671875, "learning_rate": 3.1425126349614636e-06, "loss": 16.4889, "step": 329520 }, { "epoch": 0.6656714488297774, "grad_norm": 600.0156860351562, "learning_rate": 3.1421885548967436e-06, "loss": 22.6312, "step": 329530 }, { "epoch": 0.6656916494624612, "grad_norm": 291.55084228515625, "learning_rate": 3.141864483886774e-06, "loss": 13.4993, "step": 329540 }, { "epoch": 0.665711850095145, "grad_norm": 54.24449157714844, "learning_rate": 3.1415404219331287e-06, "loss": 19.5029, "step": 329550 }, { "epoch": 0.6657320507278288, "grad_norm": 1.5172042846679688, "learning_rate": 3.141216369037391e-06, "loss": 12.9723, "step": 329560 }, { "epoch": 0.6657522513605126, "grad_norm": 104.76483917236328, "learning_rate": 3.14089232520114e-06, "loss": 14.0206, "step": 329570 }, { "epoch": 0.6657724519931965, "grad_norm": 109.83952331542969, "learning_rate": 3.1405682904259534e-06, "loss": 18.305, "step": 329580 }, { "epoch": 0.6657926526258803, "grad_norm": 291.440185546875, "learning_rate": 3.1402442647134115e-06, "loss": 16.838, "step": 329590 }, { "epoch": 0.6658128532585641, "grad_norm": 207.01580810546875, "learning_rate": 3.139920248065095e-06, "loss": 20.0622, "step": 329600 }, { "epoch": 0.6658330538912479, "grad_norm": 283.66448974609375, "learning_rate": 3.1395962404825787e-06, "loss": 20.5891, "step": 329610 }, { "epoch": 0.6658532545239317, "grad_norm": 110.69061279296875, "learning_rate": 3.139272241967446e-06, "loss": 11.697, "step": 329620 }, { "epoch": 0.6658734551566156, "grad_norm": 207.20028686523438, "learning_rate": 3.1389482525212753e-06, "loss": 15.6329, "step": 329630 }, { "epoch": 0.6658936557892993, "grad_norm": 192.45013427734375, "learning_rate": 3.138624272145645e-06, "loss": 13.5762, "step": 329640 }, { "epoch": 0.6659138564219831, "grad_norm": 40.7459831237793, "learning_rate": 3.1383003008421336e-06, "loss": 24.5271, "step": 329650 }, { "epoch": 0.6659340570546669, "grad_norm": 288.3659973144531, "learning_rate": 3.137976338612322e-06, "loss": 10.9337, "step": 329660 }, { "epoch": 0.6659542576873507, "grad_norm": 322.1324768066406, "learning_rate": 3.1376523854577866e-06, "loss": 15.6807, "step": 329670 }, { "epoch": 0.6659744583200345, "grad_norm": 239.38699340820312, "learning_rate": 3.1373284413801075e-06, "loss": 21.0629, "step": 329680 }, { "epoch": 0.6659946589527184, "grad_norm": 197.8610382080078, "learning_rate": 3.137004506380864e-06, "loss": 16.7041, "step": 329690 }, { "epoch": 0.6660148595854022, "grad_norm": 546.2587280273438, "learning_rate": 3.1366805804616353e-06, "loss": 28.2993, "step": 329700 }, { "epoch": 0.666035060218086, "grad_norm": 83.45526123046875, "learning_rate": 3.1363566636239983e-06, "loss": 28.9386, "step": 329710 }, { "epoch": 0.6660552608507698, "grad_norm": 249.18356323242188, "learning_rate": 3.1360327558695336e-06, "loss": 9.8699, "step": 329720 }, { "epoch": 0.6660754614834536, "grad_norm": 321.2801818847656, "learning_rate": 3.1357088571998203e-06, "loss": 12.5479, "step": 329730 }, { "epoch": 0.6660956621161375, "grad_norm": 269.49188232421875, "learning_rate": 3.1353849676164344e-06, "loss": 13.4194, "step": 329740 }, { "epoch": 0.6661158627488213, "grad_norm": 366.74945068359375, "learning_rate": 3.1350610871209553e-06, "loss": 27.1737, "step": 329750 }, { "epoch": 0.6661360633815051, "grad_norm": 108.90770721435547, "learning_rate": 3.1347372157149647e-06, "loss": 16.094, "step": 329760 }, { "epoch": 0.6661562640141889, "grad_norm": 326.3755798339844, "learning_rate": 3.1344133534000364e-06, "loss": 18.6985, "step": 329770 }, { "epoch": 0.6661764646468727, "grad_norm": 346.4378356933594, "learning_rate": 3.1340895001777518e-06, "loss": 13.9988, "step": 329780 }, { "epoch": 0.6661966652795566, "grad_norm": 547.8566284179688, "learning_rate": 3.13376565604969e-06, "loss": 30.2986, "step": 329790 }, { "epoch": 0.6662168659122404, "grad_norm": 447.8743591308594, "learning_rate": 3.1334418210174268e-06, "loss": 21.7103, "step": 329800 }, { "epoch": 0.6662370665449242, "grad_norm": 394.9977111816406, "learning_rate": 3.1331179950825415e-06, "loss": 14.2382, "step": 329810 }, { "epoch": 0.666257267177608, "grad_norm": 738.65283203125, "learning_rate": 3.132794178246612e-06, "loss": 25.6669, "step": 329820 }, { "epoch": 0.6662774678102918, "grad_norm": 226.13914489746094, "learning_rate": 3.1324703705112203e-06, "loss": 14.885, "step": 329830 }, { "epoch": 0.6662976684429757, "grad_norm": 208.6623077392578, "learning_rate": 3.1321465718779375e-06, "loss": 15.6992, "step": 329840 }, { "epoch": 0.6663178690756595, "grad_norm": 201.5078125, "learning_rate": 3.1318227823483483e-06, "loss": 11.5514, "step": 329850 }, { "epoch": 0.6663380697083433, "grad_norm": 344.0553283691406, "learning_rate": 3.1314990019240283e-06, "loss": 11.1086, "step": 329860 }, { "epoch": 0.6663582703410271, "grad_norm": 427.4706115722656, "learning_rate": 3.1311752306065547e-06, "loss": 14.1368, "step": 329870 }, { "epoch": 0.666378470973711, "grad_norm": 176.13424682617188, "learning_rate": 3.1308514683975067e-06, "loss": 19.937, "step": 329880 }, { "epoch": 0.6663986716063948, "grad_norm": 341.6733093261719, "learning_rate": 3.1305277152984624e-06, "loss": 11.5544, "step": 329890 }, { "epoch": 0.6664188722390785, "grad_norm": 386.2061462402344, "learning_rate": 3.130203971310999e-06, "loss": 32.3193, "step": 329900 }, { "epoch": 0.6664390728717623, "grad_norm": 291.69354248046875, "learning_rate": 3.129880236436693e-06, "loss": 19.2647, "step": 329910 }, { "epoch": 0.6664592735044461, "grad_norm": 112.29643249511719, "learning_rate": 3.1295565106771275e-06, "loss": 19.2167, "step": 329920 }, { "epoch": 0.6664794741371299, "grad_norm": 238.74856567382812, "learning_rate": 3.129232794033873e-06, "loss": 16.5354, "step": 329930 }, { "epoch": 0.6664996747698138, "grad_norm": 338.0630187988281, "learning_rate": 3.1289090865085124e-06, "loss": 16.9998, "step": 329940 }, { "epoch": 0.6665198754024976, "grad_norm": 362.2852478027344, "learning_rate": 3.128585388102623e-06, "loss": 20.9235, "step": 329950 }, { "epoch": 0.6665400760351814, "grad_norm": 405.906005859375, "learning_rate": 3.1282616988177806e-06, "loss": 23.7336, "step": 329960 }, { "epoch": 0.6665602766678652, "grad_norm": 304.0990905761719, "learning_rate": 3.127938018655563e-06, "loss": 19.3071, "step": 329970 }, { "epoch": 0.666580477300549, "grad_norm": 259.2118225097656, "learning_rate": 3.127614347617548e-06, "loss": 8.7356, "step": 329980 }, { "epoch": 0.6666006779332329, "grad_norm": 187.90420532226562, "learning_rate": 3.1272906857053164e-06, "loss": 19.7438, "step": 329990 }, { "epoch": 0.6666208785659167, "grad_norm": 93.44966125488281, "learning_rate": 3.12696703292044e-06, "loss": 17.5113, "step": 330000 }, { "epoch": 0.6666410791986005, "grad_norm": 0.6111046075820923, "learning_rate": 3.1266433892645e-06, "loss": 13.0142, "step": 330010 }, { "epoch": 0.6666612798312843, "grad_norm": 862.162109375, "learning_rate": 3.126319754739074e-06, "loss": 33.2872, "step": 330020 }, { "epoch": 0.6666814804639681, "grad_norm": 336.31939697265625, "learning_rate": 3.1259961293457373e-06, "loss": 17.0852, "step": 330030 }, { "epoch": 0.666701681096652, "grad_norm": 416.84991455078125, "learning_rate": 3.1256725130860676e-06, "loss": 10.043, "step": 330040 }, { "epoch": 0.6667218817293358, "grad_norm": 306.9471740722656, "learning_rate": 3.1253489059616448e-06, "loss": 6.4227, "step": 330050 }, { "epoch": 0.6667420823620196, "grad_norm": 372.6961364746094, "learning_rate": 3.1250253079740423e-06, "loss": 12.9093, "step": 330060 }, { "epoch": 0.6667622829947034, "grad_norm": 102.91746520996094, "learning_rate": 3.124701719124838e-06, "loss": 17.2012, "step": 330070 }, { "epoch": 0.6667824836273872, "grad_norm": 311.12640380859375, "learning_rate": 3.1243781394156138e-06, "loss": 14.2873, "step": 330080 }, { "epoch": 0.666802684260071, "grad_norm": 273.5013732910156, "learning_rate": 3.1240545688479395e-06, "loss": 15.0098, "step": 330090 }, { "epoch": 0.6668228848927549, "grad_norm": 95.75477600097656, "learning_rate": 3.1237310074233964e-06, "loss": 16.6949, "step": 330100 }, { "epoch": 0.6668430855254387, "grad_norm": 437.8410949707031, "learning_rate": 3.1234074551435624e-06, "loss": 17.598, "step": 330110 }, { "epoch": 0.6668632861581225, "grad_norm": 304.8205871582031, "learning_rate": 3.1230839120100114e-06, "loss": 17.2939, "step": 330120 }, { "epoch": 0.6668834867908063, "grad_norm": 465.0726318359375, "learning_rate": 3.122760378024322e-06, "loss": 18.6992, "step": 330130 }, { "epoch": 0.6669036874234902, "grad_norm": 277.9466247558594, "learning_rate": 3.12243685318807e-06, "loss": 16.4442, "step": 330140 }, { "epoch": 0.666923888056174, "grad_norm": 233.32357788085938, "learning_rate": 3.1221133375028357e-06, "loss": 15.1137, "step": 330150 }, { "epoch": 0.6669440886888577, "grad_norm": 94.81449127197266, "learning_rate": 3.1217898309701903e-06, "loss": 15.4507, "step": 330160 }, { "epoch": 0.6669642893215415, "grad_norm": 627.6292114257812, "learning_rate": 3.121466333591715e-06, "loss": 18.662, "step": 330170 }, { "epoch": 0.6669844899542253, "grad_norm": 226.99801635742188, "learning_rate": 3.1211428453689853e-06, "loss": 19.6593, "step": 330180 }, { "epoch": 0.6670046905869091, "grad_norm": 588.7395629882812, "learning_rate": 3.120819366303577e-06, "loss": 33.6431, "step": 330190 }, { "epoch": 0.667024891219593, "grad_norm": 204.64736938476562, "learning_rate": 3.1204958963970666e-06, "loss": 18.1832, "step": 330200 }, { "epoch": 0.6670450918522768, "grad_norm": 352.9015197753906, "learning_rate": 3.1201724356510328e-06, "loss": 18.1837, "step": 330210 }, { "epoch": 0.6670652924849606, "grad_norm": 529.1224975585938, "learning_rate": 3.1198489840670494e-06, "loss": 18.6674, "step": 330220 }, { "epoch": 0.6670854931176444, "grad_norm": 189.75636291503906, "learning_rate": 3.119525541646693e-06, "loss": 10.4157, "step": 330230 }, { "epoch": 0.6671056937503282, "grad_norm": 247.81578063964844, "learning_rate": 3.1192021083915424e-06, "loss": 17.5992, "step": 330240 }, { "epoch": 0.6671258943830121, "grad_norm": 287.9820556640625, "learning_rate": 3.1188786843031727e-06, "loss": 23.6369, "step": 330250 }, { "epoch": 0.6671460950156959, "grad_norm": 172.9054412841797, "learning_rate": 3.1185552693831595e-06, "loss": 18.6106, "step": 330260 }, { "epoch": 0.6671662956483797, "grad_norm": 359.9023132324219, "learning_rate": 3.1182318636330798e-06, "loss": 19.6593, "step": 330270 }, { "epoch": 0.6671864962810635, "grad_norm": 14.932379722595215, "learning_rate": 3.1179084670545114e-06, "loss": 13.4993, "step": 330280 }, { "epoch": 0.6672066969137473, "grad_norm": 146.74087524414062, "learning_rate": 3.117585079649026e-06, "loss": 17.5585, "step": 330290 }, { "epoch": 0.6672268975464312, "grad_norm": 380.2978210449219, "learning_rate": 3.117261701418204e-06, "loss": 23.6818, "step": 330300 }, { "epoch": 0.667247098179115, "grad_norm": 143.3151092529297, "learning_rate": 3.1169383323636205e-06, "loss": 6.0168, "step": 330310 }, { "epoch": 0.6672672988117988, "grad_norm": 216.28244018554688, "learning_rate": 3.116614972486851e-06, "loss": 18.3759, "step": 330320 }, { "epoch": 0.6672874994444826, "grad_norm": 270.51129150390625, "learning_rate": 3.1162916217894713e-06, "loss": 21.1778, "step": 330330 }, { "epoch": 0.6673077000771664, "grad_norm": 224.6033172607422, "learning_rate": 3.115968280273059e-06, "loss": 27.8389, "step": 330340 }, { "epoch": 0.6673279007098503, "grad_norm": 180.8910369873047, "learning_rate": 3.1156449479391876e-06, "loss": 17.5037, "step": 330350 }, { "epoch": 0.6673481013425341, "grad_norm": 56.40607833862305, "learning_rate": 3.115321624789433e-06, "loss": 11.6342, "step": 330360 }, { "epoch": 0.6673683019752179, "grad_norm": 326.2639465332031, "learning_rate": 3.1149983108253754e-06, "loss": 12.1102, "step": 330370 }, { "epoch": 0.6673885026079017, "grad_norm": 175.93812561035156, "learning_rate": 3.1146750060485847e-06, "loss": 16.6841, "step": 330380 }, { "epoch": 0.6674087032405855, "grad_norm": 7.7756123542785645, "learning_rate": 3.1143517104606404e-06, "loss": 14.245, "step": 330390 }, { "epoch": 0.6674289038732694, "grad_norm": 201.9288787841797, "learning_rate": 3.114028424063118e-06, "loss": 11.6891, "step": 330400 }, { "epoch": 0.6674491045059531, "grad_norm": 279.9373474121094, "learning_rate": 3.1137051468575905e-06, "loss": 13.7667, "step": 330410 }, { "epoch": 0.6674693051386369, "grad_norm": 415.5817565917969, "learning_rate": 3.113381878845636e-06, "loss": 15.6253, "step": 330420 }, { "epoch": 0.6674895057713207, "grad_norm": 185.3625946044922, "learning_rate": 3.113058620028829e-06, "loss": 13.8818, "step": 330430 }, { "epoch": 0.6675097064040045, "grad_norm": 478.21588134765625, "learning_rate": 3.1127353704087477e-06, "loss": 17.9427, "step": 330440 }, { "epoch": 0.6675299070366884, "grad_norm": 138.28536987304688, "learning_rate": 3.1124121299869627e-06, "loss": 22.906, "step": 330450 }, { "epoch": 0.6675501076693722, "grad_norm": 596.7040405273438, "learning_rate": 3.112088898765052e-06, "loss": 24.7098, "step": 330460 }, { "epoch": 0.667570308302056, "grad_norm": 228.25437927246094, "learning_rate": 3.1117656767445936e-06, "loss": 21.0886, "step": 330470 }, { "epoch": 0.6675905089347398, "grad_norm": 341.1817321777344, "learning_rate": 3.1114424639271584e-06, "loss": 15.1444, "step": 330480 }, { "epoch": 0.6676107095674236, "grad_norm": 10.294116973876953, "learning_rate": 3.1111192603143235e-06, "loss": 13.3331, "step": 330490 }, { "epoch": 0.6676309102001075, "grad_norm": 369.7087707519531, "learning_rate": 3.110796065907665e-06, "loss": 19.8188, "step": 330500 }, { "epoch": 0.6676511108327913, "grad_norm": 356.1034240722656, "learning_rate": 3.110472880708757e-06, "loss": 16.1073, "step": 330510 }, { "epoch": 0.6676713114654751, "grad_norm": 25.542749404907227, "learning_rate": 3.110149704719174e-06, "loss": 15.1766, "step": 330520 }, { "epoch": 0.6676915120981589, "grad_norm": 100.43976593017578, "learning_rate": 3.1098265379404953e-06, "loss": 16.5437, "step": 330530 }, { "epoch": 0.6677117127308427, "grad_norm": 554.3204956054688, "learning_rate": 3.109503380374289e-06, "loss": 20.147, "step": 330540 }, { "epoch": 0.6677319133635266, "grad_norm": 364.3846130371094, "learning_rate": 3.109180232022135e-06, "loss": 20.0007, "step": 330550 }, { "epoch": 0.6677521139962104, "grad_norm": 150.53797912597656, "learning_rate": 3.1088570928856087e-06, "loss": 13.098, "step": 330560 }, { "epoch": 0.6677723146288942, "grad_norm": 89.09352111816406, "learning_rate": 3.1085339629662826e-06, "loss": 33.3035, "step": 330570 }, { "epoch": 0.667792515261578, "grad_norm": 498.39642333984375, "learning_rate": 3.1082108422657328e-06, "loss": 16.6546, "step": 330580 }, { "epoch": 0.6678127158942618, "grad_norm": 393.2024841308594, "learning_rate": 3.1078877307855325e-06, "loss": 8.432, "step": 330590 }, { "epoch": 0.6678329165269457, "grad_norm": 164.40597534179688, "learning_rate": 3.1075646285272608e-06, "loss": 27.7001, "step": 330600 }, { "epoch": 0.6678531171596295, "grad_norm": 254.5586700439453, "learning_rate": 3.1072415354924866e-06, "loss": 23.2955, "step": 330610 }, { "epoch": 0.6678733177923133, "grad_norm": 267.0837097167969, "learning_rate": 3.106918451682789e-06, "loss": 17.1097, "step": 330620 }, { "epoch": 0.6678935184249971, "grad_norm": 213.9842987060547, "learning_rate": 3.1065953770997416e-06, "loss": 20.1418, "step": 330630 }, { "epoch": 0.6679137190576809, "grad_norm": 187.84165954589844, "learning_rate": 3.106272311744918e-06, "loss": 12.2254, "step": 330640 }, { "epoch": 0.6679339196903648, "grad_norm": 294.5089416503906, "learning_rate": 3.1059492556198933e-06, "loss": 29.2558, "step": 330650 }, { "epoch": 0.6679541203230486, "grad_norm": 142.8616485595703, "learning_rate": 3.1056262087262432e-06, "loss": 21.543, "step": 330660 }, { "epoch": 0.6679743209557323, "grad_norm": 35.32051086425781, "learning_rate": 3.105303171065541e-06, "loss": 18.4599, "step": 330670 }, { "epoch": 0.6679945215884161, "grad_norm": 156.32113647460938, "learning_rate": 3.10498014263936e-06, "loss": 15.7582, "step": 330680 }, { "epoch": 0.6680147222210999, "grad_norm": 140.69869995117188, "learning_rate": 3.1046571234492782e-06, "loss": 15.5799, "step": 330690 }, { "epoch": 0.6680349228537837, "grad_norm": 16.674842834472656, "learning_rate": 3.1043341134968653e-06, "loss": 23.6184, "step": 330700 }, { "epoch": 0.6680551234864676, "grad_norm": 227.7049102783203, "learning_rate": 3.1040111127836994e-06, "loss": 33.7488, "step": 330710 }, { "epoch": 0.6680753241191514, "grad_norm": 273.486328125, "learning_rate": 3.1036881213113533e-06, "loss": 22.2706, "step": 330720 }, { "epoch": 0.6680955247518352, "grad_norm": 404.769287109375, "learning_rate": 3.1033651390814012e-06, "loss": 15.208, "step": 330730 }, { "epoch": 0.668115725384519, "grad_norm": 358.1103515625, "learning_rate": 3.103042166095417e-06, "loss": 20.7348, "step": 330740 }, { "epoch": 0.6681359260172028, "grad_norm": 179.81631469726562, "learning_rate": 3.102719202354974e-06, "loss": 14.1117, "step": 330750 }, { "epoch": 0.6681561266498867, "grad_norm": 924.6339111328125, "learning_rate": 3.102396247861651e-06, "loss": 29.2721, "step": 330760 }, { "epoch": 0.6681763272825705, "grad_norm": 539.9767456054688, "learning_rate": 3.102073302617015e-06, "loss": 12.1628, "step": 330770 }, { "epoch": 0.6681965279152543, "grad_norm": 3718.938720703125, "learning_rate": 3.101750366622645e-06, "loss": 27.0186, "step": 330780 }, { "epoch": 0.6682167285479381, "grad_norm": 491.4143371582031, "learning_rate": 3.101427439880115e-06, "loss": 18.5518, "step": 330790 }, { "epoch": 0.6682369291806219, "grad_norm": 367.20404052734375, "learning_rate": 3.1011045223909954e-06, "loss": 19.123, "step": 330800 }, { "epoch": 0.6682571298133058, "grad_norm": 317.1374206542969, "learning_rate": 3.1007816141568625e-06, "loss": 14.184, "step": 330810 }, { "epoch": 0.6682773304459896, "grad_norm": 526.2254028320312, "learning_rate": 3.1004587151792903e-06, "loss": 25.4683, "step": 330820 }, { "epoch": 0.6682975310786734, "grad_norm": 131.69354248046875, "learning_rate": 3.1001358254598512e-06, "loss": 12.1002, "step": 330830 }, { "epoch": 0.6683177317113572, "grad_norm": 265.9306945800781, "learning_rate": 3.0998129450001184e-06, "loss": 14.0064, "step": 330840 }, { "epoch": 0.668337932344041, "grad_norm": 239.4925994873047, "learning_rate": 3.0994900738016693e-06, "loss": 14.878, "step": 330850 }, { "epoch": 0.6683581329767249, "grad_norm": 350.140380859375, "learning_rate": 3.099167211866074e-06, "loss": 12.7391, "step": 330860 }, { "epoch": 0.6683783336094087, "grad_norm": 132.3457794189453, "learning_rate": 3.098844359194907e-06, "loss": 13.8073, "step": 330870 }, { "epoch": 0.6683985342420925, "grad_norm": 205.7363739013672, "learning_rate": 3.098521515789742e-06, "loss": 15.8544, "step": 330880 }, { "epoch": 0.6684187348747763, "grad_norm": 234.84852600097656, "learning_rate": 3.0981986816521536e-06, "loss": 19.6206, "step": 330890 }, { "epoch": 0.6684389355074601, "grad_norm": 507.22625732421875, "learning_rate": 3.097875856783713e-06, "loss": 25.8105, "step": 330900 }, { "epoch": 0.668459136140144, "grad_norm": 312.27618408203125, "learning_rate": 3.097553041185993e-06, "loss": 17.3199, "step": 330910 }, { "epoch": 0.6684793367728277, "grad_norm": 314.1047058105469, "learning_rate": 3.097230234860572e-06, "loss": 13.5322, "step": 330920 }, { "epoch": 0.6684995374055115, "grad_norm": 28.259309768676758, "learning_rate": 3.096907437809019e-06, "loss": 17.7154, "step": 330930 }, { "epoch": 0.6685197380381953, "grad_norm": 474.27978515625, "learning_rate": 3.096584650032908e-06, "loss": 30.235, "step": 330940 }, { "epoch": 0.6685399386708791, "grad_norm": 338.90167236328125, "learning_rate": 3.0962618715338135e-06, "loss": 15.824, "step": 330950 }, { "epoch": 0.668560139303563, "grad_norm": 332.6732177734375, "learning_rate": 3.095939102313307e-06, "loss": 19.0384, "step": 330960 }, { "epoch": 0.6685803399362468, "grad_norm": 143.47605895996094, "learning_rate": 3.0956163423729624e-06, "loss": 18.1524, "step": 330970 }, { "epoch": 0.6686005405689306, "grad_norm": 185.57691955566406, "learning_rate": 3.0952935917143533e-06, "loss": 15.944, "step": 330980 }, { "epoch": 0.6686207412016144, "grad_norm": 384.8312072753906, "learning_rate": 3.0949708503390507e-06, "loss": 15.5547, "step": 330990 }, { "epoch": 0.6686409418342982, "grad_norm": 234.44459533691406, "learning_rate": 3.09464811824863e-06, "loss": 11.069, "step": 331000 }, { "epoch": 0.668661142466982, "grad_norm": 117.56761169433594, "learning_rate": 3.094325395444664e-06, "loss": 10.1443, "step": 331010 }, { "epoch": 0.6686813430996659, "grad_norm": 301.17144775390625, "learning_rate": 3.094002681928724e-06, "loss": 16.4104, "step": 331020 }, { "epoch": 0.6687015437323497, "grad_norm": 441.94830322265625, "learning_rate": 3.093679977702384e-06, "loss": 24.3502, "step": 331030 }, { "epoch": 0.6687217443650335, "grad_norm": 245.93753051757812, "learning_rate": 3.0933572827672155e-06, "loss": 25.2998, "step": 331040 }, { "epoch": 0.6687419449977173, "grad_norm": 213.0681915283203, "learning_rate": 3.093034597124795e-06, "loss": 14.4417, "step": 331050 }, { "epoch": 0.6687621456304012, "grad_norm": 48.06312561035156, "learning_rate": 3.09271192077669e-06, "loss": 11.5683, "step": 331060 }, { "epoch": 0.668782346263085, "grad_norm": 377.4251403808594, "learning_rate": 3.092389253724476e-06, "loss": 9.651, "step": 331070 }, { "epoch": 0.6688025468957688, "grad_norm": 212.8891143798828, "learning_rate": 3.092066595969727e-06, "loss": 17.9551, "step": 331080 }, { "epoch": 0.6688227475284526, "grad_norm": 273.8845520019531, "learning_rate": 3.0917439475140133e-06, "loss": 16.3949, "step": 331090 }, { "epoch": 0.6688429481611364, "grad_norm": 274.19598388671875, "learning_rate": 3.0914213083589086e-06, "loss": 22.4552, "step": 331100 }, { "epoch": 0.6688631487938203, "grad_norm": 314.1948547363281, "learning_rate": 3.091098678505985e-06, "loss": 11.8473, "step": 331110 }, { "epoch": 0.6688833494265041, "grad_norm": 338.0782775878906, "learning_rate": 3.0907760579568135e-06, "loss": 26.9315, "step": 331120 }, { "epoch": 0.6689035500591879, "grad_norm": 521.5413818359375, "learning_rate": 3.0904534467129677e-06, "loss": 13.1927, "step": 331130 }, { "epoch": 0.6689237506918717, "grad_norm": 256.4883728027344, "learning_rate": 3.0901308447760236e-06, "loss": 8.9395, "step": 331140 }, { "epoch": 0.6689439513245555, "grad_norm": 87.1355209350586, "learning_rate": 3.0898082521475463e-06, "loss": 16.4136, "step": 331150 }, { "epoch": 0.6689641519572394, "grad_norm": 803.7642211914062, "learning_rate": 3.089485668829113e-06, "loss": 30.4588, "step": 331160 }, { "epoch": 0.6689843525899232, "grad_norm": 399.9955749511719, "learning_rate": 3.089163094822296e-06, "loss": 27.5492, "step": 331170 }, { "epoch": 0.6690045532226069, "grad_norm": 872.3381958007812, "learning_rate": 3.0888405301286662e-06, "loss": 22.2828, "step": 331180 }, { "epoch": 0.6690247538552907, "grad_norm": 45.302310943603516, "learning_rate": 3.088517974749795e-06, "loss": 7.9757, "step": 331190 }, { "epoch": 0.6690449544879745, "grad_norm": 189.54185485839844, "learning_rate": 3.088195428687254e-06, "loss": 16.7829, "step": 331200 }, { "epoch": 0.6690651551206583, "grad_norm": 202.18508911132812, "learning_rate": 3.0878728919426203e-06, "loss": 13.3182, "step": 331210 }, { "epoch": 0.6690853557533422, "grad_norm": 142.65182495117188, "learning_rate": 3.0875503645174586e-06, "loss": 21.926, "step": 331220 }, { "epoch": 0.669105556386026, "grad_norm": 390.1210632324219, "learning_rate": 3.087227846413346e-06, "loss": 18.2274, "step": 331230 }, { "epoch": 0.6691257570187098, "grad_norm": 316.0498046875, "learning_rate": 3.0869053376318538e-06, "loss": 20.5961, "step": 331240 }, { "epoch": 0.6691459576513936, "grad_norm": 215.49990844726562, "learning_rate": 3.0865828381745515e-06, "loss": 21.0117, "step": 331250 }, { "epoch": 0.6691661582840774, "grad_norm": 330.5447692871094, "learning_rate": 3.086260348043013e-06, "loss": 20.8303, "step": 331260 }, { "epoch": 0.6691863589167613, "grad_norm": 318.5399475097656, "learning_rate": 3.08593786723881e-06, "loss": 15.861, "step": 331270 }, { "epoch": 0.6692065595494451, "grad_norm": 436.04046630859375, "learning_rate": 3.0856153957635127e-06, "loss": 13.6951, "step": 331280 }, { "epoch": 0.6692267601821289, "grad_norm": 146.41897583007812, "learning_rate": 3.085292933618693e-06, "loss": 9.0057, "step": 331290 }, { "epoch": 0.6692469608148127, "grad_norm": 181.30020141601562, "learning_rate": 3.0849704808059266e-06, "loss": 17.9174, "step": 331300 }, { "epoch": 0.6692671614474965, "grad_norm": 262.21710205078125, "learning_rate": 3.0846480373267783e-06, "loss": 27.3874, "step": 331310 }, { "epoch": 0.6692873620801804, "grad_norm": 219.46510314941406, "learning_rate": 3.0843256031828245e-06, "loss": 10.8851, "step": 331320 }, { "epoch": 0.6693075627128642, "grad_norm": 684.2416381835938, "learning_rate": 3.0840031783756353e-06, "loss": 28.435, "step": 331330 }, { "epoch": 0.669327763345548, "grad_norm": 230.88534545898438, "learning_rate": 3.0836807629067828e-06, "loss": 13.2468, "step": 331340 }, { "epoch": 0.6693479639782318, "grad_norm": 437.8140563964844, "learning_rate": 3.083358356777837e-06, "loss": 22.1787, "step": 331350 }, { "epoch": 0.6693681646109156, "grad_norm": 161.6771240234375, "learning_rate": 3.083035959990369e-06, "loss": 8.6034, "step": 331360 }, { "epoch": 0.6693883652435995, "grad_norm": 247.14523315429688, "learning_rate": 3.082713572545955e-06, "loss": 15.9326, "step": 331370 }, { "epoch": 0.6694085658762833, "grad_norm": 203.67486572265625, "learning_rate": 3.0823911944461593e-06, "loss": 13.0478, "step": 331380 }, { "epoch": 0.6694287665089671, "grad_norm": 181.4001922607422, "learning_rate": 3.082068825692557e-06, "loss": 7.7641, "step": 331390 }, { "epoch": 0.6694489671416509, "grad_norm": 191.79580688476562, "learning_rate": 3.0817464662867192e-06, "loss": 20.1509, "step": 331400 }, { "epoch": 0.6694691677743347, "grad_norm": 213.02264404296875, "learning_rate": 3.081424116230216e-06, "loss": 25.6745, "step": 331410 }, { "epoch": 0.6694893684070186, "grad_norm": 199.1134033203125, "learning_rate": 3.0811017755246185e-06, "loss": 8.721, "step": 331420 }, { "epoch": 0.6695095690397023, "grad_norm": 30.609054565429688, "learning_rate": 3.0807794441715e-06, "loss": 16.8152, "step": 331430 }, { "epoch": 0.6695297696723861, "grad_norm": 275.2300720214844, "learning_rate": 3.080457122172429e-06, "loss": 10.0681, "step": 331440 }, { "epoch": 0.6695499703050699, "grad_norm": 32.076087951660156, "learning_rate": 3.080134809528975e-06, "loss": 20.8148, "step": 331450 }, { "epoch": 0.6695701709377537, "grad_norm": 0.0, "learning_rate": 3.0798125062427143e-06, "loss": 19.073, "step": 331460 }, { "epoch": 0.6695903715704375, "grad_norm": 166.53225708007812, "learning_rate": 3.079490212315212e-06, "loss": 21.7737, "step": 331470 }, { "epoch": 0.6696105722031214, "grad_norm": 311.2042236328125, "learning_rate": 3.0791679277480422e-06, "loss": 11.5151, "step": 331480 }, { "epoch": 0.6696307728358052, "grad_norm": 326.3489685058594, "learning_rate": 3.078845652542775e-06, "loss": 20.8001, "step": 331490 }, { "epoch": 0.669650973468489, "grad_norm": 306.3893127441406, "learning_rate": 3.078523386700982e-06, "loss": 22.2422, "step": 331500 }, { "epoch": 0.6696711741011728, "grad_norm": 458.99102783203125, "learning_rate": 3.0782011302242326e-06, "loss": 15.4389, "step": 331510 }, { "epoch": 0.6696913747338566, "grad_norm": 313.07781982421875, "learning_rate": 3.077878883114096e-06, "loss": 16.9378, "step": 331520 }, { "epoch": 0.6697115753665405, "grad_norm": 351.966064453125, "learning_rate": 3.0775566453721475e-06, "loss": 27.5594, "step": 331530 }, { "epoch": 0.6697317759992243, "grad_norm": 432.4601745605469, "learning_rate": 3.077234416999953e-06, "loss": 11.5481, "step": 331540 }, { "epoch": 0.6697519766319081, "grad_norm": 206.66233825683594, "learning_rate": 3.0769121979990845e-06, "loss": 20.3376, "step": 331550 }, { "epoch": 0.6697721772645919, "grad_norm": 687.8001098632812, "learning_rate": 3.0765899883711148e-06, "loss": 31.9339, "step": 331560 }, { "epoch": 0.6697923778972757, "grad_norm": 1346.5186767578125, "learning_rate": 3.0762677881176102e-06, "loss": 27.994, "step": 331570 }, { "epoch": 0.6698125785299596, "grad_norm": 220.98043823242188, "learning_rate": 3.075945597240143e-06, "loss": 23.4637, "step": 331580 }, { "epoch": 0.6698327791626434, "grad_norm": 278.98773193359375, "learning_rate": 3.0756234157402854e-06, "loss": 25.1914, "step": 331590 }, { "epoch": 0.6698529797953272, "grad_norm": 176.63973999023438, "learning_rate": 3.0753012436196033e-06, "loss": 29.1256, "step": 331600 }, { "epoch": 0.669873180428011, "grad_norm": 300.1166687011719, "learning_rate": 3.074979080879671e-06, "loss": 10.915, "step": 331610 }, { "epoch": 0.6698933810606948, "grad_norm": 261.4768371582031, "learning_rate": 3.074656927522058e-06, "loss": 26.0751, "step": 331620 }, { "epoch": 0.6699135816933787, "grad_norm": 277.7759094238281, "learning_rate": 3.0743347835483323e-06, "loss": 30.0766, "step": 331630 }, { "epoch": 0.6699337823260625, "grad_norm": 219.34262084960938, "learning_rate": 3.074012648960065e-06, "loss": 16.2877, "step": 331640 }, { "epoch": 0.6699539829587463, "grad_norm": 179.38046264648438, "learning_rate": 3.0736905237588275e-06, "loss": 16.0905, "step": 331650 }, { "epoch": 0.6699741835914301, "grad_norm": 275.59912109375, "learning_rate": 3.073368407946189e-06, "loss": 15.6146, "step": 331660 }, { "epoch": 0.669994384224114, "grad_norm": 58.24690246582031, "learning_rate": 3.0730463015237177e-06, "loss": 11.9256, "step": 331670 }, { "epoch": 0.6700145848567978, "grad_norm": 91.89273071289062, "learning_rate": 3.072724204492985e-06, "loss": 12.3811, "step": 331680 }, { "epoch": 0.6700347854894815, "grad_norm": 206.01744079589844, "learning_rate": 3.072402116855563e-06, "loss": 15.4052, "step": 331690 }, { "epoch": 0.6700549861221653, "grad_norm": 224.93360900878906, "learning_rate": 3.0720800386130176e-06, "loss": 19.2328, "step": 331700 }, { "epoch": 0.6700751867548491, "grad_norm": 48.017799377441406, "learning_rate": 3.071757969766921e-06, "loss": 16.6667, "step": 331710 }, { "epoch": 0.6700953873875329, "grad_norm": 452.0176086425781, "learning_rate": 3.0714359103188422e-06, "loss": 13.7728, "step": 331720 }, { "epoch": 0.6701155880202168, "grad_norm": 251.083984375, "learning_rate": 3.0711138602703505e-06, "loss": 17.1112, "step": 331730 }, { "epoch": 0.6701357886529006, "grad_norm": 359.21026611328125, "learning_rate": 3.0707918196230147e-06, "loss": 11.2905, "step": 331740 }, { "epoch": 0.6701559892855844, "grad_norm": 119.56275939941406, "learning_rate": 3.0704697883784083e-06, "loss": 18.997, "step": 331750 }, { "epoch": 0.6701761899182682, "grad_norm": 341.324951171875, "learning_rate": 3.0701477665380958e-06, "loss": 15.3908, "step": 331760 }, { "epoch": 0.670196390550952, "grad_norm": 222.26373291015625, "learning_rate": 3.069825754103649e-06, "loss": 17.9971, "step": 331770 }, { "epoch": 0.6702165911836359, "grad_norm": 272.79486083984375, "learning_rate": 3.0695037510766384e-06, "loss": 27.3258, "step": 331780 }, { "epoch": 0.6702367918163197, "grad_norm": 442.05096435546875, "learning_rate": 3.069181757458633e-06, "loss": 31.3219, "step": 331790 }, { "epoch": 0.6702569924490035, "grad_norm": 718.4354248046875, "learning_rate": 3.0688597732512004e-06, "loss": 24.0957, "step": 331800 }, { "epoch": 0.6702771930816873, "grad_norm": 423.1435241699219, "learning_rate": 3.0685377984559104e-06, "loss": 23.6844, "step": 331810 }, { "epoch": 0.6702973937143711, "grad_norm": 155.46742248535156, "learning_rate": 3.0682158330743363e-06, "loss": 17.1495, "step": 331820 }, { "epoch": 0.670317594347055, "grad_norm": 52.52433395385742, "learning_rate": 3.0678938771080395e-06, "loss": 24.5135, "step": 331830 }, { "epoch": 0.6703377949797388, "grad_norm": 301.4659118652344, "learning_rate": 3.067571930558596e-06, "loss": 36.0611, "step": 331840 }, { "epoch": 0.6703579956124226, "grad_norm": 181.6322784423828, "learning_rate": 3.0672499934275725e-06, "loss": 20.3914, "step": 331850 }, { "epoch": 0.6703781962451064, "grad_norm": 27.045597076416016, "learning_rate": 3.066928065716538e-06, "loss": 16.404, "step": 331860 }, { "epoch": 0.6703983968777902, "grad_norm": 252.5352325439453, "learning_rate": 3.066606147427061e-06, "loss": 18.6995, "step": 331870 }, { "epoch": 0.6704185975104741, "grad_norm": 211.10256958007812, "learning_rate": 3.066284238560713e-06, "loss": 13.0739, "step": 331880 }, { "epoch": 0.6704387981431579, "grad_norm": 261.5566101074219, "learning_rate": 3.0659623391190583e-06, "loss": 19.8656, "step": 331890 }, { "epoch": 0.6704589987758417, "grad_norm": 183.52748107910156, "learning_rate": 3.0656404491036696e-06, "loss": 15.3951, "step": 331900 }, { "epoch": 0.6704791994085255, "grad_norm": 549.4513549804688, "learning_rate": 3.0653185685161168e-06, "loss": 25.6294, "step": 331910 }, { "epoch": 0.6704994000412093, "grad_norm": 100.58812713623047, "learning_rate": 3.064996697357964e-06, "loss": 14.0695, "step": 331920 }, { "epoch": 0.6705196006738932, "grad_norm": 0.47702687978744507, "learning_rate": 3.0646748356307833e-06, "loss": 24.0718, "step": 331930 }, { "epoch": 0.670539801306577, "grad_norm": 200.68484497070312, "learning_rate": 3.0643529833361425e-06, "loss": 9.8391, "step": 331940 }, { "epoch": 0.6705600019392607, "grad_norm": 382.94122314453125, "learning_rate": 3.0640311404756116e-06, "loss": 31.7526, "step": 331950 }, { "epoch": 0.6705802025719445, "grad_norm": 0.0, "learning_rate": 3.063709307050757e-06, "loss": 6.295, "step": 331960 }, { "epoch": 0.6706004032046283, "grad_norm": 421.0754699707031, "learning_rate": 3.063387483063148e-06, "loss": 18.0826, "step": 331970 }, { "epoch": 0.6706206038373121, "grad_norm": 455.7759094238281, "learning_rate": 3.063065668514357e-06, "loss": 27.9424, "step": 331980 }, { "epoch": 0.670640804469996, "grad_norm": 59.10359191894531, "learning_rate": 3.0627438634059447e-06, "loss": 17.4189, "step": 331990 }, { "epoch": 0.6706610051026798, "grad_norm": 223.65179443359375, "learning_rate": 3.0624220677394854e-06, "loss": 15.53, "step": 332000 }, { "epoch": 0.6706812057353636, "grad_norm": 277.2689514160156, "learning_rate": 3.062100281516547e-06, "loss": 23.4384, "step": 332010 }, { "epoch": 0.6707014063680474, "grad_norm": 235.39572143554688, "learning_rate": 3.0617785047386953e-06, "loss": 13.0383, "step": 332020 }, { "epoch": 0.6707216070007312, "grad_norm": 364.91680908203125, "learning_rate": 3.0614567374075007e-06, "loss": 11.3449, "step": 332030 }, { "epoch": 0.6707418076334151, "grad_norm": 287.73370361328125, "learning_rate": 3.0611349795245317e-06, "loss": 22.1853, "step": 332040 }, { "epoch": 0.6707620082660989, "grad_norm": 220.0065460205078, "learning_rate": 3.060813231091354e-06, "loss": 8.4529, "step": 332050 }, { "epoch": 0.6707822088987827, "grad_norm": 515.931640625, "learning_rate": 3.0604914921095373e-06, "loss": 18.45, "step": 332060 }, { "epoch": 0.6708024095314665, "grad_norm": 460.24267578125, "learning_rate": 3.0601697625806527e-06, "loss": 27.9974, "step": 332070 }, { "epoch": 0.6708226101641503, "grad_norm": 322.69915771484375, "learning_rate": 3.0598480425062626e-06, "loss": 11.548, "step": 332080 }, { "epoch": 0.6708428107968342, "grad_norm": 210.76174926757812, "learning_rate": 3.0595263318879383e-06, "loss": 24.3369, "step": 332090 }, { "epoch": 0.670863011429518, "grad_norm": 174.38644409179688, "learning_rate": 3.059204630727247e-06, "loss": 18.1271, "step": 332100 }, { "epoch": 0.6708832120622018, "grad_norm": 171.72262573242188, "learning_rate": 3.0588829390257592e-06, "loss": 16.4482, "step": 332110 }, { "epoch": 0.6709034126948856, "grad_norm": 170.3275909423828, "learning_rate": 3.0585612567850385e-06, "loss": 14.4047, "step": 332120 }, { "epoch": 0.6709236133275694, "grad_norm": 203.87681579589844, "learning_rate": 3.0582395840066537e-06, "loss": 10.2488, "step": 332130 }, { "epoch": 0.6709438139602533, "grad_norm": 223.12313842773438, "learning_rate": 3.0579179206921773e-06, "loss": 27.8801, "step": 332140 }, { "epoch": 0.6709640145929371, "grad_norm": 259.4765625, "learning_rate": 3.0575962668431704e-06, "loss": 18.9796, "step": 332150 }, { "epoch": 0.6709842152256209, "grad_norm": 458.16595458984375, "learning_rate": 3.057274622461204e-06, "loss": 17.1179, "step": 332160 }, { "epoch": 0.6710044158583047, "grad_norm": 190.01815795898438, "learning_rate": 3.0569529875478465e-06, "loss": 10.2253, "step": 332170 }, { "epoch": 0.6710246164909885, "grad_norm": 50.003536224365234, "learning_rate": 3.0566313621046638e-06, "loss": 19.0613, "step": 332180 }, { "epoch": 0.6710448171236724, "grad_norm": 213.7586212158203, "learning_rate": 3.0563097461332246e-06, "loss": 10.5072, "step": 332190 }, { "epoch": 0.6710650177563561, "grad_norm": 271.9057922363281, "learning_rate": 3.0559881396350967e-06, "loss": 35.236, "step": 332200 }, { "epoch": 0.6710852183890399, "grad_norm": 386.05206298828125, "learning_rate": 3.0556665426118456e-06, "loss": 24.5937, "step": 332210 }, { "epoch": 0.6711054190217237, "grad_norm": 300.0298767089844, "learning_rate": 3.055344955065039e-06, "loss": 17.7199, "step": 332220 }, { "epoch": 0.6711256196544075, "grad_norm": 450.6624755859375, "learning_rate": 3.0550233769962468e-06, "loss": 17.4476, "step": 332230 }, { "epoch": 0.6711458202870914, "grad_norm": 318.8321228027344, "learning_rate": 3.0547018084070344e-06, "loss": 15.4875, "step": 332240 }, { "epoch": 0.6711660209197752, "grad_norm": 207.26226806640625, "learning_rate": 3.0543802492989693e-06, "loss": 23.1521, "step": 332250 }, { "epoch": 0.671186221552459, "grad_norm": 222.77835083007812, "learning_rate": 3.054058699673619e-06, "loss": 9.1562, "step": 332260 }, { "epoch": 0.6712064221851428, "grad_norm": 170.42669677734375, "learning_rate": 3.0537371595325515e-06, "loss": 12.2088, "step": 332270 }, { "epoch": 0.6712266228178266, "grad_norm": 419.1339416503906, "learning_rate": 3.0534156288773307e-06, "loss": 13.9474, "step": 332280 }, { "epoch": 0.6712468234505105, "grad_norm": 131.76470947265625, "learning_rate": 3.0530941077095273e-06, "loss": 27.2033, "step": 332290 }, { "epoch": 0.6712670240831943, "grad_norm": 451.1739501953125, "learning_rate": 3.0527725960307083e-06, "loss": 12.5233, "step": 332300 }, { "epoch": 0.6712872247158781, "grad_norm": 180.2779083251953, "learning_rate": 3.0524510938424377e-06, "loss": 15.3138, "step": 332310 }, { "epoch": 0.6713074253485619, "grad_norm": 415.981689453125, "learning_rate": 3.0521296011462852e-06, "loss": 15.3159, "step": 332320 }, { "epoch": 0.6713276259812457, "grad_norm": 269.37030029296875, "learning_rate": 3.0518081179438173e-06, "loss": 9.4502, "step": 332330 }, { "epoch": 0.6713478266139296, "grad_norm": 187.88238525390625, "learning_rate": 3.0514866442365998e-06, "loss": 21.815, "step": 332340 }, { "epoch": 0.6713680272466134, "grad_norm": 173.30953979492188, "learning_rate": 3.0511651800261987e-06, "loss": 22.3439, "step": 332350 }, { "epoch": 0.6713882278792972, "grad_norm": 58.64520263671875, "learning_rate": 3.0508437253141855e-06, "loss": 14.9716, "step": 332360 }, { "epoch": 0.671408428511981, "grad_norm": 405.302734375, "learning_rate": 3.0505222801021193e-06, "loss": 22.312, "step": 332370 }, { "epoch": 0.6714286291446648, "grad_norm": 284.43121337890625, "learning_rate": 3.0502008443915733e-06, "loss": 16.5736, "step": 332380 }, { "epoch": 0.6714488297773487, "grad_norm": 988.026611328125, "learning_rate": 3.049879418184112e-06, "loss": 23.978, "step": 332390 }, { "epoch": 0.6714690304100325, "grad_norm": 312.5244140625, "learning_rate": 3.049558001481302e-06, "loss": 18.5936, "step": 332400 }, { "epoch": 0.6714892310427163, "grad_norm": 368.8445739746094, "learning_rate": 3.0492365942847097e-06, "loss": 22.0872, "step": 332410 }, { "epoch": 0.6715094316754001, "grad_norm": 16.96695327758789, "learning_rate": 3.0489151965958998e-06, "loss": 10.0879, "step": 332420 }, { "epoch": 0.6715296323080839, "grad_norm": 259.5863342285156, "learning_rate": 3.0485938084164445e-06, "loss": 22.3564, "step": 332430 }, { "epoch": 0.6715498329407678, "grad_norm": 206.19822692871094, "learning_rate": 3.048272429747903e-06, "loss": 18.0177, "step": 332440 }, { "epoch": 0.6715700335734516, "grad_norm": 185.2842559814453, "learning_rate": 3.047951060591845e-06, "loss": 14.124, "step": 332450 }, { "epoch": 0.6715902342061353, "grad_norm": 501.524658203125, "learning_rate": 3.047629700949839e-06, "loss": 17.2793, "step": 332460 }, { "epoch": 0.6716104348388191, "grad_norm": 289.24578857421875, "learning_rate": 3.0473083508234474e-06, "loss": 20.8321, "step": 332470 }, { "epoch": 0.6716306354715029, "grad_norm": 227.49168395996094, "learning_rate": 3.0469870102142387e-06, "loss": 15.5785, "step": 332480 }, { "epoch": 0.6716508361041867, "grad_norm": 225.64573669433594, "learning_rate": 3.046665679123778e-06, "loss": 17.7805, "step": 332490 }, { "epoch": 0.6716710367368706, "grad_norm": 236.8920440673828, "learning_rate": 3.0463443575536324e-06, "loss": 22.3429, "step": 332500 }, { "epoch": 0.6716912373695544, "grad_norm": 440.2791442871094, "learning_rate": 3.046023045505366e-06, "loss": 12.7643, "step": 332510 }, { "epoch": 0.6717114380022382, "grad_norm": 483.2056579589844, "learning_rate": 3.045701742980549e-06, "loss": 31.9693, "step": 332520 }, { "epoch": 0.671731638634922, "grad_norm": 194.8018341064453, "learning_rate": 3.0453804499807416e-06, "loss": 13.2133, "step": 332530 }, { "epoch": 0.6717518392676058, "grad_norm": 274.1737976074219, "learning_rate": 3.045059166507515e-06, "loss": 17.2357, "step": 332540 }, { "epoch": 0.6717720399002897, "grad_norm": 186.00022888183594, "learning_rate": 3.0447378925624316e-06, "loss": 29.1649, "step": 332550 }, { "epoch": 0.6717922405329735, "grad_norm": 138.18545532226562, "learning_rate": 3.04441662814706e-06, "loss": 14.8852, "step": 332560 }, { "epoch": 0.6718124411656573, "grad_norm": 107.45458984375, "learning_rate": 3.044095373262963e-06, "loss": 20.0961, "step": 332570 }, { "epoch": 0.6718326417983411, "grad_norm": 155.92657470703125, "learning_rate": 3.043774127911707e-06, "loss": 19.4465, "step": 332580 }, { "epoch": 0.6718528424310249, "grad_norm": 138.7040557861328, "learning_rate": 3.0434528920948626e-06, "loss": 15.1347, "step": 332590 }, { "epoch": 0.6718730430637088, "grad_norm": 156.09307861328125, "learning_rate": 3.043131665813988e-06, "loss": 15.4319, "step": 332600 }, { "epoch": 0.6718932436963926, "grad_norm": 551.9111938476562, "learning_rate": 3.0428104490706536e-06, "loss": 18.3667, "step": 332610 }, { "epoch": 0.6719134443290764, "grad_norm": 29.304195404052734, "learning_rate": 3.0424892418664244e-06, "loss": 5.4627, "step": 332620 }, { "epoch": 0.6719336449617602, "grad_norm": 123.6796875, "learning_rate": 3.0421680442028644e-06, "loss": 11.9492, "step": 332630 }, { "epoch": 0.671953845594444, "grad_norm": 370.7620849609375, "learning_rate": 3.0418468560815396e-06, "loss": 15.9277, "step": 332640 }, { "epoch": 0.6719740462271279, "grad_norm": 0.0, "learning_rate": 3.041525677504017e-06, "loss": 12.2666, "step": 332650 }, { "epoch": 0.6719942468598117, "grad_norm": 343.7939147949219, "learning_rate": 3.0412045084718597e-06, "loss": 12.2568, "step": 332660 }, { "epoch": 0.6720144474924955, "grad_norm": 537.5650634765625, "learning_rate": 3.0408833489866326e-06, "loss": 20.992, "step": 332670 }, { "epoch": 0.6720346481251793, "grad_norm": 359.6012878417969, "learning_rate": 3.040562199049906e-06, "loss": 24.1977, "step": 332680 }, { "epoch": 0.6720548487578631, "grad_norm": 316.32476806640625, "learning_rate": 3.040241058663238e-06, "loss": 18.1037, "step": 332690 }, { "epoch": 0.672075049390547, "grad_norm": 760.1456298828125, "learning_rate": 3.0399199278281986e-06, "loss": 24.6471, "step": 332700 }, { "epoch": 0.6720952500232307, "grad_norm": 56.18035888671875, "learning_rate": 3.039598806546352e-06, "loss": 11.5116, "step": 332710 }, { "epoch": 0.6721154506559145, "grad_norm": 1.2302381992340088, "learning_rate": 3.039277694819264e-06, "loss": 13.1916, "step": 332720 }, { "epoch": 0.6721356512885983, "grad_norm": 389.9693603515625, "learning_rate": 3.0389565926484974e-06, "loss": 14.9929, "step": 332730 }, { "epoch": 0.6721558519212821, "grad_norm": 133.5906524658203, "learning_rate": 3.038635500035618e-06, "loss": 18.0681, "step": 332740 }, { "epoch": 0.672176052553966, "grad_norm": 153.5557403564453, "learning_rate": 3.0383144169821944e-06, "loss": 24.7632, "step": 332750 }, { "epoch": 0.6721962531866498, "grad_norm": 1895.0758056640625, "learning_rate": 3.0379933434897846e-06, "loss": 44.9274, "step": 332760 }, { "epoch": 0.6722164538193336, "grad_norm": 198.71966552734375, "learning_rate": 3.0376722795599587e-06, "loss": 11.8911, "step": 332770 }, { "epoch": 0.6722366544520174, "grad_norm": 113.03650665283203, "learning_rate": 3.0373512251942817e-06, "loss": 14.0951, "step": 332780 }, { "epoch": 0.6722568550847012, "grad_norm": 4.049925327301025, "learning_rate": 3.0370301803943147e-06, "loss": 17.4549, "step": 332790 }, { "epoch": 0.672277055717385, "grad_norm": 327.6578674316406, "learning_rate": 3.0367091451616254e-06, "loss": 14.3203, "step": 332800 }, { "epoch": 0.6722972563500689, "grad_norm": 346.0973205566406, "learning_rate": 3.0363881194977784e-06, "loss": 22.1619, "step": 332810 }, { "epoch": 0.6723174569827527, "grad_norm": 29.40131187438965, "learning_rate": 3.0360671034043365e-06, "loss": 7.8458, "step": 332820 }, { "epoch": 0.6723376576154365, "grad_norm": 168.28353881835938, "learning_rate": 3.035746096882864e-06, "loss": 12.7973, "step": 332830 }, { "epoch": 0.6723578582481203, "grad_norm": 228.4603729248047, "learning_rate": 3.0354250999349277e-06, "loss": 23.6359, "step": 332840 }, { "epoch": 0.6723780588808042, "grad_norm": 315.78863525390625, "learning_rate": 3.035104112562093e-06, "loss": 15.1958, "step": 332850 }, { "epoch": 0.672398259513488, "grad_norm": 434.2249450683594, "learning_rate": 3.034783134765921e-06, "loss": 28.8911, "step": 332860 }, { "epoch": 0.6724184601461718, "grad_norm": 527.519775390625, "learning_rate": 3.034462166547978e-06, "loss": 23.1367, "step": 332870 }, { "epoch": 0.6724386607788556, "grad_norm": 292.14013671875, "learning_rate": 3.0341412079098285e-06, "loss": 19.1296, "step": 332880 }, { "epoch": 0.6724588614115394, "grad_norm": 257.3221740722656, "learning_rate": 3.033820258853035e-06, "loss": 19.6835, "step": 332890 }, { "epoch": 0.6724790620442233, "grad_norm": 149.30613708496094, "learning_rate": 3.033499319379163e-06, "loss": 18.3438, "step": 332900 }, { "epoch": 0.6724992626769071, "grad_norm": 245.29299926757812, "learning_rate": 3.033178389489779e-06, "loss": 10.3864, "step": 332910 }, { "epoch": 0.6725194633095909, "grad_norm": 361.60418701171875, "learning_rate": 3.0328574691864434e-06, "loss": 20.7258, "step": 332920 }, { "epoch": 0.6725396639422747, "grad_norm": 151.79356384277344, "learning_rate": 3.0325365584707216e-06, "loss": 14.7982, "step": 332930 }, { "epoch": 0.6725598645749585, "grad_norm": 189.50558471679688, "learning_rate": 3.03221565734418e-06, "loss": 9.0093, "step": 332940 }, { "epoch": 0.6725800652076424, "grad_norm": 359.44854736328125, "learning_rate": 3.0318947658083787e-06, "loss": 13.7786, "step": 332950 }, { "epoch": 0.6726002658403262, "grad_norm": 297.88836669921875, "learning_rate": 3.031573883864882e-06, "loss": 7.8709, "step": 332960 }, { "epoch": 0.6726204664730099, "grad_norm": 0.0, "learning_rate": 3.03125301151526e-06, "loss": 14.3071, "step": 332970 }, { "epoch": 0.6726406671056937, "grad_norm": 214.26348876953125, "learning_rate": 3.0309321487610682e-06, "loss": 16.2189, "step": 332980 }, { "epoch": 0.6726608677383775, "grad_norm": 0.0, "learning_rate": 3.030611295603876e-06, "loss": 14.4701, "step": 332990 }, { "epoch": 0.6726810683710613, "grad_norm": 510.4120178222656, "learning_rate": 3.030290452045245e-06, "loss": 17.7457, "step": 333000 }, { "epoch": 0.6727012690037452, "grad_norm": 280.46966552734375, "learning_rate": 3.02996961808674e-06, "loss": 21.9193, "step": 333010 }, { "epoch": 0.672721469636429, "grad_norm": 229.24493408203125, "learning_rate": 3.0296487937299235e-06, "loss": 11.9222, "step": 333020 }, { "epoch": 0.6727416702691128, "grad_norm": 229.23916625976562, "learning_rate": 3.0293279789763584e-06, "loss": 19.3467, "step": 333030 }, { "epoch": 0.6727618709017966, "grad_norm": 293.7392883300781, "learning_rate": 3.0290071738276132e-06, "loss": 20.4984, "step": 333040 }, { "epoch": 0.6727820715344804, "grad_norm": 568.3408203125, "learning_rate": 3.028686378285245e-06, "loss": 20.6434, "step": 333050 }, { "epoch": 0.6728022721671643, "grad_norm": 288.4912109375, "learning_rate": 3.0283655923508214e-06, "loss": 24.4921, "step": 333060 }, { "epoch": 0.6728224727998481, "grad_norm": 0.0, "learning_rate": 3.0280448160259057e-06, "loss": 24.9482, "step": 333070 }, { "epoch": 0.6728426734325319, "grad_norm": 139.5825958251953, "learning_rate": 3.027724049312059e-06, "loss": 17.2446, "step": 333080 }, { "epoch": 0.6728628740652157, "grad_norm": 237.73155212402344, "learning_rate": 3.0274032922108465e-06, "loss": 15.7391, "step": 333090 }, { "epoch": 0.6728830746978995, "grad_norm": 349.579833984375, "learning_rate": 3.0270825447238316e-06, "loss": 18.9809, "step": 333100 }, { "epoch": 0.6729032753305834, "grad_norm": 323.3945617675781, "learning_rate": 3.0267618068525773e-06, "loss": 15.4135, "step": 333110 }, { "epoch": 0.6729234759632672, "grad_norm": 344.6173400878906, "learning_rate": 3.0264410785986444e-06, "loss": 20.2486, "step": 333120 }, { "epoch": 0.672943676595951, "grad_norm": 429.5448913574219, "learning_rate": 3.026120359963602e-06, "loss": 12.5403, "step": 333130 }, { "epoch": 0.6729638772286348, "grad_norm": 461.1971740722656, "learning_rate": 3.025799650949006e-06, "loss": 14.139, "step": 333140 }, { "epoch": 0.6729840778613186, "grad_norm": 172.7980194091797, "learning_rate": 3.025478951556424e-06, "loss": 13.1029, "step": 333150 }, { "epoch": 0.6730042784940025, "grad_norm": 450.5623779296875, "learning_rate": 3.0251582617874187e-06, "loss": 19.7937, "step": 333160 }, { "epoch": 0.6730244791266863, "grad_norm": 235.656005859375, "learning_rate": 3.0248375816435537e-06, "loss": 24.5668, "step": 333170 }, { "epoch": 0.6730446797593701, "grad_norm": 234.2226104736328, "learning_rate": 3.024516911126389e-06, "loss": 12.365, "step": 333180 }, { "epoch": 0.6730648803920539, "grad_norm": 383.22607421875, "learning_rate": 3.024196250237489e-06, "loss": 20.3241, "step": 333190 }, { "epoch": 0.6730850810247377, "grad_norm": 138.4535369873047, "learning_rate": 3.023875598978419e-06, "loss": 18.2311, "step": 333200 }, { "epoch": 0.6731052816574216, "grad_norm": 286.28411865234375, "learning_rate": 3.0235549573507374e-06, "loss": 29.8482, "step": 333210 }, { "epoch": 0.6731254822901054, "grad_norm": 411.116943359375, "learning_rate": 3.0232343253560097e-06, "loss": 15.7359, "step": 333220 }, { "epoch": 0.6731456829227891, "grad_norm": 174.0987548828125, "learning_rate": 3.0229137029957993e-06, "loss": 19.315, "step": 333230 }, { "epoch": 0.6731658835554729, "grad_norm": 28.78957176208496, "learning_rate": 3.022593090271667e-06, "loss": 12.1534, "step": 333240 }, { "epoch": 0.6731860841881567, "grad_norm": 193.32630920410156, "learning_rate": 3.022272487185176e-06, "loss": 13.2468, "step": 333250 }, { "epoch": 0.6732062848208406, "grad_norm": 236.72549438476562, "learning_rate": 3.02195189373789e-06, "loss": 20.0106, "step": 333260 }, { "epoch": 0.6732264854535244, "grad_norm": 251.849365234375, "learning_rate": 3.0216313099313694e-06, "loss": 7.7679, "step": 333270 }, { "epoch": 0.6732466860862082, "grad_norm": 260.9183349609375, "learning_rate": 3.0213107357671767e-06, "loss": 5.8174, "step": 333280 }, { "epoch": 0.673266886718892, "grad_norm": 416.3025207519531, "learning_rate": 3.020990171246879e-06, "loss": 15.3497, "step": 333290 }, { "epoch": 0.6732870873515758, "grad_norm": 90.58746337890625, "learning_rate": 3.0206696163720317e-06, "loss": 21.8824, "step": 333300 }, { "epoch": 0.6733072879842597, "grad_norm": 192.61141967773438, "learning_rate": 3.0203490711442018e-06, "loss": 16.8602, "step": 333310 }, { "epoch": 0.6733274886169435, "grad_norm": 376.946533203125, "learning_rate": 3.0200285355649504e-06, "loss": 18.6512, "step": 333320 }, { "epoch": 0.6733476892496273, "grad_norm": 106.3067855834961, "learning_rate": 3.019708009635841e-06, "loss": 23.1537, "step": 333330 }, { "epoch": 0.6733678898823111, "grad_norm": 248.8255157470703, "learning_rate": 3.019387493358434e-06, "loss": 14.271, "step": 333340 }, { "epoch": 0.6733880905149949, "grad_norm": 188.04515075683594, "learning_rate": 3.0190669867342902e-06, "loss": 16.5977, "step": 333350 }, { "epoch": 0.6734082911476788, "grad_norm": 363.1977844238281, "learning_rate": 3.0187464897649774e-06, "loss": 23.6264, "step": 333360 }, { "epoch": 0.6734284917803626, "grad_norm": 248.37371826171875, "learning_rate": 3.0184260024520508e-06, "loss": 16.7568, "step": 333370 }, { "epoch": 0.6734486924130464, "grad_norm": 213.97256469726562, "learning_rate": 3.018105524797076e-06, "loss": 27.6112, "step": 333380 }, { "epoch": 0.6734688930457302, "grad_norm": 139.2911376953125, "learning_rate": 3.017785056801615e-06, "loss": 24.084, "step": 333390 }, { "epoch": 0.673489093678414, "grad_norm": 360.55548095703125, "learning_rate": 3.0174645984672298e-06, "loss": 13.2899, "step": 333400 }, { "epoch": 0.6735092943110979, "grad_norm": 356.72039794921875, "learning_rate": 3.0171441497954807e-06, "loss": 10.5405, "step": 333410 }, { "epoch": 0.6735294949437817, "grad_norm": 444.1822509765625, "learning_rate": 3.0168237107879315e-06, "loss": 20.2657, "step": 333420 }, { "epoch": 0.6735496955764655, "grad_norm": 323.69281005859375, "learning_rate": 3.0165032814461426e-06, "loss": 27.457, "step": 333430 }, { "epoch": 0.6735698962091493, "grad_norm": 344.68408203125, "learning_rate": 3.016182861771675e-06, "loss": 31.2071, "step": 333440 }, { "epoch": 0.6735900968418331, "grad_norm": 285.93145751953125, "learning_rate": 3.015862451766092e-06, "loss": 20.6553, "step": 333450 }, { "epoch": 0.673610297474517, "grad_norm": 394.1928405761719, "learning_rate": 3.0155420514309563e-06, "loss": 39.827, "step": 333460 }, { "epoch": 0.6736304981072008, "grad_norm": 199.3101348876953, "learning_rate": 3.0152216607678262e-06, "loss": 16.1378, "step": 333470 }, { "epoch": 0.6736506987398845, "grad_norm": 466.6239013671875, "learning_rate": 3.0149012797782655e-06, "loss": 28.3533, "step": 333480 }, { "epoch": 0.6736708993725683, "grad_norm": 112.45263671875, "learning_rate": 3.0145809084638366e-06, "loss": 22.5187, "step": 333490 }, { "epoch": 0.6736911000052521, "grad_norm": 221.2534942626953, "learning_rate": 3.0142605468260976e-06, "loss": 35.7436, "step": 333500 }, { "epoch": 0.6737113006379359, "grad_norm": 196.92904663085938, "learning_rate": 3.0139401948666112e-06, "loss": 13.4609, "step": 333510 }, { "epoch": 0.6737315012706198, "grad_norm": 155.88671875, "learning_rate": 3.013619852586942e-06, "loss": 27.1207, "step": 333520 }, { "epoch": 0.6737517019033036, "grad_norm": 54.74507522583008, "learning_rate": 3.013299519988647e-06, "loss": 14.7579, "step": 333530 }, { "epoch": 0.6737719025359874, "grad_norm": 9.543238639831543, "learning_rate": 3.0129791970732904e-06, "loss": 9.9017, "step": 333540 }, { "epoch": 0.6737921031686712, "grad_norm": 284.885009765625, "learning_rate": 3.012658883842432e-06, "loss": 12.1271, "step": 333550 }, { "epoch": 0.673812303801355, "grad_norm": 15.25284194946289, "learning_rate": 3.0123385802976323e-06, "loss": 15.8716, "step": 333560 }, { "epoch": 0.6738325044340389, "grad_norm": 140.10400390625, "learning_rate": 3.0120182864404535e-06, "loss": 15.0286, "step": 333570 }, { "epoch": 0.6738527050667227, "grad_norm": 643.2660522460938, "learning_rate": 3.0116980022724575e-06, "loss": 15.7307, "step": 333580 }, { "epoch": 0.6738729056994065, "grad_norm": 600.9986572265625, "learning_rate": 3.011377727795202e-06, "loss": 22.1904, "step": 333590 }, { "epoch": 0.6738931063320903, "grad_norm": 5.184057235717773, "learning_rate": 3.011057463010252e-06, "loss": 12.9883, "step": 333600 }, { "epoch": 0.6739133069647741, "grad_norm": 18.739248275756836, "learning_rate": 3.0107372079191656e-06, "loss": 10.7025, "step": 333610 }, { "epoch": 0.673933507597458, "grad_norm": 504.1667785644531, "learning_rate": 3.010416962523507e-06, "loss": 19.2096, "step": 333620 }, { "epoch": 0.6739537082301418, "grad_norm": 344.1811828613281, "learning_rate": 3.0100967268248326e-06, "loss": 12.2017, "step": 333630 }, { "epoch": 0.6739739088628256, "grad_norm": 310.1883544921875, "learning_rate": 3.009776500824706e-06, "loss": 8.6039, "step": 333640 }, { "epoch": 0.6739941094955094, "grad_norm": 167.34413146972656, "learning_rate": 3.009456284524688e-06, "loss": 13.9664, "step": 333650 }, { "epoch": 0.6740143101281932, "grad_norm": 270.0379333496094, "learning_rate": 3.0091360779263373e-06, "loss": 30.4156, "step": 333660 }, { "epoch": 0.6740345107608771, "grad_norm": 270.7711486816406, "learning_rate": 3.0088158810312163e-06, "loss": 16.2952, "step": 333670 }, { "epoch": 0.6740547113935609, "grad_norm": 228.63400268554688, "learning_rate": 3.0084956938408873e-06, "loss": 13.3605, "step": 333680 }, { "epoch": 0.6740749120262447, "grad_norm": 371.4848327636719, "learning_rate": 3.0081755163569066e-06, "loss": 21.6191, "step": 333690 }, { "epoch": 0.6740951126589285, "grad_norm": 227.57423400878906, "learning_rate": 3.007855348580837e-06, "loss": 24.9421, "step": 333700 }, { "epoch": 0.6741153132916123, "grad_norm": 238.7169647216797, "learning_rate": 3.0075351905142404e-06, "loss": 26.662, "step": 333710 }, { "epoch": 0.6741355139242962, "grad_norm": 435.3594970703125, "learning_rate": 3.0072150421586737e-06, "loss": 24.4591, "step": 333720 }, { "epoch": 0.67415571455698, "grad_norm": 272.6650085449219, "learning_rate": 3.0068949035156984e-06, "loss": 13.8834, "step": 333730 }, { "epoch": 0.6741759151896637, "grad_norm": 417.0477294921875, "learning_rate": 3.006574774586879e-06, "loss": 18.0192, "step": 333740 }, { "epoch": 0.6741961158223475, "grad_norm": 259.11151123046875, "learning_rate": 3.0062546553737692e-06, "loss": 9.2082, "step": 333750 }, { "epoch": 0.6742163164550313, "grad_norm": 131.14370727539062, "learning_rate": 3.0059345458779337e-06, "loss": 15.1615, "step": 333760 }, { "epoch": 0.6742365170877151, "grad_norm": 327.9660949707031, "learning_rate": 3.0056144461009313e-06, "loss": 12.2816, "step": 333770 }, { "epoch": 0.674256717720399, "grad_norm": 427.3758850097656, "learning_rate": 3.005294356044323e-06, "loss": 17.1479, "step": 333780 }, { "epoch": 0.6742769183530828, "grad_norm": 30.397554397583008, "learning_rate": 3.0049742757096666e-06, "loss": 12.0004, "step": 333790 }, { "epoch": 0.6742971189857666, "grad_norm": 344.6239929199219, "learning_rate": 3.004654205098524e-06, "loss": 17.6553, "step": 333800 }, { "epoch": 0.6743173196184504, "grad_norm": 233.8643341064453, "learning_rate": 3.004334144212456e-06, "loss": 12.9875, "step": 333810 }, { "epoch": 0.6743375202511342, "grad_norm": 400.61761474609375, "learning_rate": 3.0040140930530198e-06, "loss": 20.4, "step": 333820 }, { "epoch": 0.6743577208838181, "grad_norm": 7.976931571960449, "learning_rate": 3.003694051621777e-06, "loss": 14.5721, "step": 333830 }, { "epoch": 0.6743779215165019, "grad_norm": 202.4419708251953, "learning_rate": 3.003374019920289e-06, "loss": 11.2411, "step": 333840 }, { "epoch": 0.6743981221491857, "grad_norm": 298.5362243652344, "learning_rate": 3.003053997950112e-06, "loss": 12.0924, "step": 333850 }, { "epoch": 0.6744183227818695, "grad_norm": 706.1337890625, "learning_rate": 3.002733985712808e-06, "loss": 16.9609, "step": 333860 }, { "epoch": 0.6744385234145533, "grad_norm": 300.58477783203125, "learning_rate": 3.0024139832099374e-06, "loss": 13.9438, "step": 333870 }, { "epoch": 0.6744587240472372, "grad_norm": 226.91845703125, "learning_rate": 3.002093990443058e-06, "loss": 8.5824, "step": 333880 }, { "epoch": 0.674478924679921, "grad_norm": 102.35189819335938, "learning_rate": 3.001774007413729e-06, "loss": 9.4686, "step": 333890 }, { "epoch": 0.6744991253126048, "grad_norm": 187.64259338378906, "learning_rate": 3.001454034123512e-06, "loss": 18.6417, "step": 333900 }, { "epoch": 0.6745193259452886, "grad_norm": 217.66893005371094, "learning_rate": 3.0011340705739665e-06, "loss": 22.6871, "step": 333910 }, { "epoch": 0.6745395265779724, "grad_norm": 363.0769348144531, "learning_rate": 3.0008141167666505e-06, "loss": 17.7864, "step": 333920 }, { "epoch": 0.6745597272106563, "grad_norm": 44.632755279541016, "learning_rate": 3.0004941727031233e-06, "loss": 26.3007, "step": 333930 }, { "epoch": 0.6745799278433401, "grad_norm": 510.9098815917969, "learning_rate": 3.0001742383849464e-06, "loss": 28.2598, "step": 333940 }, { "epoch": 0.6746001284760239, "grad_norm": 287.8717346191406, "learning_rate": 2.9998543138136773e-06, "loss": 14.4654, "step": 333950 }, { "epoch": 0.6746203291087077, "grad_norm": 132.33734130859375, "learning_rate": 2.9995343989908743e-06, "loss": 17.6363, "step": 333960 }, { "epoch": 0.6746405297413915, "grad_norm": 264.1392822265625, "learning_rate": 2.9992144939181007e-06, "loss": 13.3397, "step": 333970 }, { "epoch": 0.6746607303740754, "grad_norm": 320.266845703125, "learning_rate": 2.99889459859691e-06, "loss": 11.1795, "step": 333980 }, { "epoch": 0.6746809310067591, "grad_norm": 231.57948303222656, "learning_rate": 2.9985747130288657e-06, "loss": 15.3678, "step": 333990 }, { "epoch": 0.6747011316394429, "grad_norm": 384.5777282714844, "learning_rate": 2.9982548372155264e-06, "loss": 19.7484, "step": 334000 }, { "epoch": 0.6747213322721267, "grad_norm": 317.07171630859375, "learning_rate": 2.9979349711584494e-06, "loss": 18.2188, "step": 334010 }, { "epoch": 0.6747415329048105, "grad_norm": 174.42295837402344, "learning_rate": 2.9976151148591937e-06, "loss": 28.9059, "step": 334020 }, { "epoch": 0.6747617335374944, "grad_norm": 513.528076171875, "learning_rate": 2.9972952683193207e-06, "loss": 29.6054, "step": 334030 }, { "epoch": 0.6747819341701782, "grad_norm": 701.4049072265625, "learning_rate": 2.9969754315403865e-06, "loss": 23.0436, "step": 334040 }, { "epoch": 0.674802134802862, "grad_norm": 210.93896484375, "learning_rate": 2.9966556045239504e-06, "loss": 14.8437, "step": 334050 }, { "epoch": 0.6748223354355458, "grad_norm": 395.0344543457031, "learning_rate": 2.9963357872715727e-06, "loss": 14.0737, "step": 334060 }, { "epoch": 0.6748425360682296, "grad_norm": 403.160400390625, "learning_rate": 2.9960159797848123e-06, "loss": 24.7082, "step": 334070 }, { "epoch": 0.6748627367009135, "grad_norm": 28.574522018432617, "learning_rate": 2.9956961820652265e-06, "loss": 33.9797, "step": 334080 }, { "epoch": 0.6748829373335973, "grad_norm": 261.02685546875, "learning_rate": 2.995376394114374e-06, "loss": 11.3927, "step": 334090 }, { "epoch": 0.6749031379662811, "grad_norm": 100.39602661132812, "learning_rate": 2.9950566159338146e-06, "loss": 8.1176, "step": 334100 }, { "epoch": 0.6749233385989649, "grad_norm": 79.23101806640625, "learning_rate": 2.9947368475251048e-06, "loss": 13.5636, "step": 334110 }, { "epoch": 0.6749435392316487, "grad_norm": 188.2363739013672, "learning_rate": 2.9944170888898037e-06, "loss": 9.9348, "step": 334120 }, { "epoch": 0.6749637398643326, "grad_norm": 232.61624145507812, "learning_rate": 2.994097340029474e-06, "loss": 23.9825, "step": 334130 }, { "epoch": 0.6749839404970164, "grad_norm": 346.6346740722656, "learning_rate": 2.9937776009456675e-06, "loss": 13.4831, "step": 334140 }, { "epoch": 0.6750041411297002, "grad_norm": 335.8316955566406, "learning_rate": 2.9934578716399465e-06, "loss": 20.5093, "step": 334150 }, { "epoch": 0.675024341762384, "grad_norm": 520.9103393554688, "learning_rate": 2.9931381521138693e-06, "loss": 19.4972, "step": 334160 }, { "epoch": 0.6750445423950678, "grad_norm": 718.1553955078125, "learning_rate": 2.9928184423689923e-06, "loss": 30.1972, "step": 334170 }, { "epoch": 0.6750647430277517, "grad_norm": 305.7046813964844, "learning_rate": 2.992498742406875e-06, "loss": 28.1806, "step": 334180 }, { "epoch": 0.6750849436604355, "grad_norm": 437.6705627441406, "learning_rate": 2.992179052229076e-06, "loss": 15.4074, "step": 334190 }, { "epoch": 0.6751051442931193, "grad_norm": 2.6685197353363037, "learning_rate": 2.991859371837151e-06, "loss": 13.1452, "step": 334200 }, { "epoch": 0.6751253449258031, "grad_norm": 346.10308837890625, "learning_rate": 2.991539701232661e-06, "loss": 11.801, "step": 334210 }, { "epoch": 0.6751455455584869, "grad_norm": 119.46017456054688, "learning_rate": 2.991220040417162e-06, "loss": 26.7348, "step": 334220 }, { "epoch": 0.6751657461911708, "grad_norm": 432.4471435546875, "learning_rate": 2.990900389392215e-06, "loss": 25.3948, "step": 334230 }, { "epoch": 0.6751859468238546, "grad_norm": 157.8863983154297, "learning_rate": 2.9905807481593746e-06, "loss": 7.5649, "step": 334240 }, { "epoch": 0.6752061474565383, "grad_norm": 331.9554138183594, "learning_rate": 2.9902611167202e-06, "loss": 28.8898, "step": 334250 }, { "epoch": 0.6752263480892221, "grad_norm": 154.85861206054688, "learning_rate": 2.9899414950762497e-06, "loss": 13.6966, "step": 334260 }, { "epoch": 0.6752465487219059, "grad_norm": 629.69091796875, "learning_rate": 2.9896218832290784e-06, "loss": 20.178, "step": 334270 }, { "epoch": 0.6752667493545897, "grad_norm": 206.41444396972656, "learning_rate": 2.9893022811802474e-06, "loss": 13.4715, "step": 334280 }, { "epoch": 0.6752869499872736, "grad_norm": 47.90966033935547, "learning_rate": 2.9889826889313144e-06, "loss": 14.2568, "step": 334290 }, { "epoch": 0.6753071506199574, "grad_norm": 521.7230224609375, "learning_rate": 2.9886631064838355e-06, "loss": 16.3984, "step": 334300 }, { "epoch": 0.6753273512526412, "grad_norm": 276.8358154296875, "learning_rate": 2.9883435338393674e-06, "loss": 10.5313, "step": 334310 }, { "epoch": 0.675347551885325, "grad_norm": 14.009960174560547, "learning_rate": 2.988023970999471e-06, "loss": 10.6222, "step": 334320 }, { "epoch": 0.6753677525180088, "grad_norm": 679.9617309570312, "learning_rate": 2.9877044179657e-06, "loss": 15.722, "step": 334330 }, { "epoch": 0.6753879531506927, "grad_norm": 376.03643798828125, "learning_rate": 2.9873848747396135e-06, "loss": 12.879, "step": 334340 }, { "epoch": 0.6754081537833765, "grad_norm": 615.2562866210938, "learning_rate": 2.9870653413227692e-06, "loss": 27.8578, "step": 334350 }, { "epoch": 0.6754283544160603, "grad_norm": 129.52642822265625, "learning_rate": 2.986745817716725e-06, "loss": 18.8399, "step": 334360 }, { "epoch": 0.6754485550487441, "grad_norm": 147.2682647705078, "learning_rate": 2.9864263039230378e-06, "loss": 15.6483, "step": 334370 }, { "epoch": 0.675468755681428, "grad_norm": 285.31781005859375, "learning_rate": 2.9861067999432634e-06, "loss": 16.2043, "step": 334380 }, { "epoch": 0.6754889563141118, "grad_norm": 0.0, "learning_rate": 2.9857873057789623e-06, "loss": 25.2436, "step": 334390 }, { "epoch": 0.6755091569467956, "grad_norm": 452.0600280761719, "learning_rate": 2.9854678214316875e-06, "loss": 27.2671, "step": 334400 }, { "epoch": 0.6755293575794794, "grad_norm": 257.0804138183594, "learning_rate": 2.9851483469029975e-06, "loss": 22.988, "step": 334410 }, { "epoch": 0.6755495582121632, "grad_norm": 282.0709228515625, "learning_rate": 2.984828882194453e-06, "loss": 11.2098, "step": 334420 }, { "epoch": 0.675569758844847, "grad_norm": 40.5400505065918, "learning_rate": 2.984509427307606e-06, "loss": 15.7022, "step": 334430 }, { "epoch": 0.6755899594775309, "grad_norm": 207.8923797607422, "learning_rate": 2.984189982244016e-06, "loss": 26.8828, "step": 334440 }, { "epoch": 0.6756101601102147, "grad_norm": 591.5491943359375, "learning_rate": 2.9838705470052397e-06, "loss": 21.185, "step": 334450 }, { "epoch": 0.6756303607428985, "grad_norm": 205.2107696533203, "learning_rate": 2.983551121592834e-06, "loss": 11.653, "step": 334460 }, { "epoch": 0.6756505613755823, "grad_norm": 342.103515625, "learning_rate": 2.983231706008355e-06, "loss": 17.4676, "step": 334470 }, { "epoch": 0.6756707620082661, "grad_norm": 0.0, "learning_rate": 2.982912300253361e-06, "loss": 30.1744, "step": 334480 }, { "epoch": 0.67569096264095, "grad_norm": 272.14044189453125, "learning_rate": 2.982592904329407e-06, "loss": 9.6941, "step": 334490 }, { "epoch": 0.6757111632736337, "grad_norm": 222.7139434814453, "learning_rate": 2.98227351823805e-06, "loss": 15.5204, "step": 334500 }, { "epoch": 0.6757313639063175, "grad_norm": 522.0514526367188, "learning_rate": 2.981954141980847e-06, "loss": 28.3308, "step": 334510 }, { "epoch": 0.6757515645390013, "grad_norm": 186.33221435546875, "learning_rate": 2.981634775559357e-06, "loss": 20.1568, "step": 334520 }, { "epoch": 0.6757717651716851, "grad_norm": 233.86412048339844, "learning_rate": 2.9813154189751327e-06, "loss": 15.2201, "step": 334530 }, { "epoch": 0.675791965804369, "grad_norm": 208.9385223388672, "learning_rate": 2.980996072229732e-06, "loss": 22.414, "step": 334540 }, { "epoch": 0.6758121664370528, "grad_norm": 295.3261413574219, "learning_rate": 2.9806767353247127e-06, "loss": 8.0875, "step": 334550 }, { "epoch": 0.6758323670697366, "grad_norm": 429.62750244140625, "learning_rate": 2.9803574082616294e-06, "loss": 19.2018, "step": 334560 }, { "epoch": 0.6758525677024204, "grad_norm": 292.5521545410156, "learning_rate": 2.980038091042038e-06, "loss": 13.8187, "step": 334570 }, { "epoch": 0.6758727683351042, "grad_norm": 382.871826171875, "learning_rate": 2.979718783667499e-06, "loss": 19.0677, "step": 334580 }, { "epoch": 0.675892968967788, "grad_norm": 28.216249465942383, "learning_rate": 2.9793994861395625e-06, "loss": 14.5465, "step": 334590 }, { "epoch": 0.6759131696004719, "grad_norm": 423.2978820800781, "learning_rate": 2.9790801984597885e-06, "loss": 14.6192, "step": 334600 }, { "epoch": 0.6759333702331557, "grad_norm": 305.8994445800781, "learning_rate": 2.978760920629734e-06, "loss": 25.4267, "step": 334610 }, { "epoch": 0.6759535708658395, "grad_norm": 165.6741485595703, "learning_rate": 2.9784416526509525e-06, "loss": 13.6677, "step": 334620 }, { "epoch": 0.6759737714985233, "grad_norm": 8.597569465637207, "learning_rate": 2.978122394525001e-06, "loss": 13.9246, "step": 334630 }, { "epoch": 0.6759939721312072, "grad_norm": 339.7349548339844, "learning_rate": 2.977803146253437e-06, "loss": 17.522, "step": 334640 }, { "epoch": 0.676014172763891, "grad_norm": 123.51504516601562, "learning_rate": 2.977483907837814e-06, "loss": 19.0131, "step": 334650 }, { "epoch": 0.6760343733965748, "grad_norm": 191.90304565429688, "learning_rate": 2.977164679279688e-06, "loss": 10.874, "step": 334660 }, { "epoch": 0.6760545740292586, "grad_norm": 311.2796630859375, "learning_rate": 2.9768454605806176e-06, "loss": 13.0502, "step": 334670 }, { "epoch": 0.6760747746619424, "grad_norm": 24.22745132446289, "learning_rate": 2.976526251742158e-06, "loss": 13.9182, "step": 334680 }, { "epoch": 0.6760949752946263, "grad_norm": 341.2733154296875, "learning_rate": 2.9762070527658628e-06, "loss": 24.9714, "step": 334690 }, { "epoch": 0.6761151759273101, "grad_norm": 184.78457641601562, "learning_rate": 2.9758878636532884e-06, "loss": 17.3146, "step": 334700 }, { "epoch": 0.6761353765599939, "grad_norm": 416.2509765625, "learning_rate": 2.975568684405993e-06, "loss": 22.7253, "step": 334710 }, { "epoch": 0.6761555771926777, "grad_norm": 475.3142395019531, "learning_rate": 2.9752495150255284e-06, "loss": 14.0495, "step": 334720 }, { "epoch": 0.6761757778253615, "grad_norm": 239.0454559326172, "learning_rate": 2.9749303555134512e-06, "loss": 32.6133, "step": 334730 }, { "epoch": 0.6761959784580454, "grad_norm": 0.0, "learning_rate": 2.9746112058713218e-06, "loss": 13.8926, "step": 334740 }, { "epoch": 0.6762161790907292, "grad_norm": 188.58450317382812, "learning_rate": 2.974292066100688e-06, "loss": 22.3667, "step": 334750 }, { "epoch": 0.6762363797234129, "grad_norm": 50.23929977416992, "learning_rate": 2.97397293620311e-06, "loss": 16.845, "step": 334760 }, { "epoch": 0.6762565803560967, "grad_norm": 324.8430480957031, "learning_rate": 2.9736538161801433e-06, "loss": 23.2228, "step": 334770 }, { "epoch": 0.6762767809887805, "grad_norm": 274.1354675292969, "learning_rate": 2.9733347060333408e-06, "loss": 16.5989, "step": 334780 }, { "epoch": 0.6762969816214643, "grad_norm": 261.1979675292969, "learning_rate": 2.9730156057642595e-06, "loss": 23.3204, "step": 334790 }, { "epoch": 0.6763171822541482, "grad_norm": 448.7723388671875, "learning_rate": 2.972696515374455e-06, "loss": 17.1181, "step": 334800 }, { "epoch": 0.676337382886832, "grad_norm": 410.4703674316406, "learning_rate": 2.972377434865481e-06, "loss": 17.0161, "step": 334810 }, { "epoch": 0.6763575835195158, "grad_norm": 122.3060302734375, "learning_rate": 2.972058364238892e-06, "loss": 15.7067, "step": 334820 }, { "epoch": 0.6763777841521996, "grad_norm": 271.7814025878906, "learning_rate": 2.9717393034962468e-06, "loss": 18.4608, "step": 334830 }, { "epoch": 0.6763979847848834, "grad_norm": 382.2372741699219, "learning_rate": 2.9714202526390985e-06, "loss": 13.3691, "step": 334840 }, { "epoch": 0.6764181854175673, "grad_norm": 157.1560516357422, "learning_rate": 2.9711012116690007e-06, "loss": 17.8249, "step": 334850 }, { "epoch": 0.6764383860502511, "grad_norm": 353.357177734375, "learning_rate": 2.97078218058751e-06, "loss": 11.3617, "step": 334860 }, { "epoch": 0.6764585866829349, "grad_norm": 606.1605834960938, "learning_rate": 2.9704631593961815e-06, "loss": 21.0824, "step": 334870 }, { "epoch": 0.6764787873156187, "grad_norm": 277.4498596191406, "learning_rate": 2.9701441480965683e-06, "loss": 23.3938, "step": 334880 }, { "epoch": 0.6764989879483025, "grad_norm": 366.080810546875, "learning_rate": 2.969825146690225e-06, "loss": 39.7464, "step": 334890 }, { "epoch": 0.6765191885809864, "grad_norm": 429.1801452636719, "learning_rate": 2.969506155178711e-06, "loss": 21.6854, "step": 334900 }, { "epoch": 0.6765393892136702, "grad_norm": 394.3572082519531, "learning_rate": 2.9691871735635753e-06, "loss": 26.4113, "step": 334910 }, { "epoch": 0.676559589846354, "grad_norm": 442.2204284667969, "learning_rate": 2.9688682018463755e-06, "loss": 22.6967, "step": 334920 }, { "epoch": 0.6765797904790378, "grad_norm": 109.42274475097656, "learning_rate": 2.968549240028667e-06, "loss": 7.5151, "step": 334930 }, { "epoch": 0.6765999911117216, "grad_norm": 24.885469436645508, "learning_rate": 2.968230288112002e-06, "loss": 10.362, "step": 334940 }, { "epoch": 0.6766201917444055, "grad_norm": 610.4267578125, "learning_rate": 2.9679113460979347e-06, "loss": 19.931, "step": 334950 }, { "epoch": 0.6766403923770893, "grad_norm": 249.1737518310547, "learning_rate": 2.967592413988023e-06, "loss": 23.8852, "step": 334960 }, { "epoch": 0.6766605930097731, "grad_norm": 539.6043701171875, "learning_rate": 2.9672734917838198e-06, "loss": 27.9937, "step": 334970 }, { "epoch": 0.6766807936424569, "grad_norm": 51.20063781738281, "learning_rate": 2.9669545794868777e-06, "loss": 7.8013, "step": 334980 }, { "epoch": 0.6767009942751407, "grad_norm": 131.95687866210938, "learning_rate": 2.9666356770987524e-06, "loss": 19.267, "step": 334990 }, { "epoch": 0.6767211949078246, "grad_norm": 98.27149200439453, "learning_rate": 2.966316784621e-06, "loss": 26.0336, "step": 335000 }, { "epoch": 0.6767413955405084, "grad_norm": 272.7865295410156, "learning_rate": 2.965997902055171e-06, "loss": 21.7937, "step": 335010 }, { "epoch": 0.6767615961731921, "grad_norm": 466.4196472167969, "learning_rate": 2.9656790294028216e-06, "loss": 12.675, "step": 335020 }, { "epoch": 0.6767817968058759, "grad_norm": 239.3995819091797, "learning_rate": 2.965360166665508e-06, "loss": 13.9852, "step": 335030 }, { "epoch": 0.6768019974385597, "grad_norm": 234.71551513671875, "learning_rate": 2.9650413138447797e-06, "loss": 23.1658, "step": 335040 }, { "epoch": 0.6768221980712436, "grad_norm": 37.0322380065918, "learning_rate": 2.964722470942194e-06, "loss": 16.2047, "step": 335050 }, { "epoch": 0.6768423987039274, "grad_norm": 495.7079162597656, "learning_rate": 2.964403637959305e-06, "loss": 23.6381, "step": 335060 }, { "epoch": 0.6768625993366112, "grad_norm": 260.9351806640625, "learning_rate": 2.9640848148976655e-06, "loss": 13.934, "step": 335070 }, { "epoch": 0.676882799969295, "grad_norm": 332.4140319824219, "learning_rate": 2.963766001758829e-06, "loss": 13.4507, "step": 335080 }, { "epoch": 0.6769030006019788, "grad_norm": 12.574808120727539, "learning_rate": 2.96344719854435e-06, "loss": 8.9157, "step": 335090 }, { "epoch": 0.6769232012346627, "grad_norm": 353.19061279296875, "learning_rate": 2.963128405255783e-06, "loss": 24.036, "step": 335100 }, { "epoch": 0.6769434018673465, "grad_norm": 273.3915710449219, "learning_rate": 2.96280962189468e-06, "loss": 15.1003, "step": 335110 }, { "epoch": 0.6769636025000303, "grad_norm": 151.5830078125, "learning_rate": 2.962490848462596e-06, "loss": 8.5987, "step": 335120 }, { "epoch": 0.6769838031327141, "grad_norm": 67.70381164550781, "learning_rate": 2.9621720849610857e-06, "loss": 13.0091, "step": 335130 }, { "epoch": 0.6770040037653979, "grad_norm": 227.26345825195312, "learning_rate": 2.961853331391701e-06, "loss": 35.5315, "step": 335140 }, { "epoch": 0.6770242043980818, "grad_norm": 144.87222290039062, "learning_rate": 2.9615345877559953e-06, "loss": 11.4046, "step": 335150 }, { "epoch": 0.6770444050307656, "grad_norm": 176.42308044433594, "learning_rate": 2.9612158540555245e-06, "loss": 18.6313, "step": 335160 }, { "epoch": 0.6770646056634494, "grad_norm": 47.78717803955078, "learning_rate": 2.9608971302918387e-06, "loss": 15.9485, "step": 335170 }, { "epoch": 0.6770848062961332, "grad_norm": 90.01335906982422, "learning_rate": 2.9605784164664925e-06, "loss": 12.9272, "step": 335180 }, { "epoch": 0.677105006928817, "grad_norm": 682.9651489257812, "learning_rate": 2.960259712581043e-06, "loss": 25.1159, "step": 335190 }, { "epoch": 0.6771252075615009, "grad_norm": 315.4779968261719, "learning_rate": 2.9599410186370363e-06, "loss": 27.4732, "step": 335200 }, { "epoch": 0.6771454081941847, "grad_norm": 490.7204284667969, "learning_rate": 2.959622334636031e-06, "loss": 16.3713, "step": 335210 }, { "epoch": 0.6771656088268685, "grad_norm": 244.84397888183594, "learning_rate": 2.95930366057958e-06, "loss": 19.5893, "step": 335220 }, { "epoch": 0.6771858094595523, "grad_norm": 538.170166015625, "learning_rate": 2.9589849964692352e-06, "loss": 12.9429, "step": 335230 }, { "epoch": 0.6772060100922361, "grad_norm": 233.0391387939453, "learning_rate": 2.9586663423065487e-06, "loss": 17.384, "step": 335240 }, { "epoch": 0.67722621072492, "grad_norm": 188.72152709960938, "learning_rate": 2.9583476980930768e-06, "loss": 13.7159, "step": 335250 }, { "epoch": 0.6772464113576038, "grad_norm": 398.97247314453125, "learning_rate": 2.9580290638303692e-06, "loss": 16.8337, "step": 335260 }, { "epoch": 0.6772666119902875, "grad_norm": 296.93353271484375, "learning_rate": 2.9577104395199795e-06, "loss": 10.1107, "step": 335270 }, { "epoch": 0.6772868126229713, "grad_norm": 474.47747802734375, "learning_rate": 2.9573918251634627e-06, "loss": 18.468, "step": 335280 }, { "epoch": 0.6773070132556551, "grad_norm": 301.44049072265625, "learning_rate": 2.957073220762371e-06, "loss": 19.0423, "step": 335290 }, { "epoch": 0.6773272138883389, "grad_norm": 411.773193359375, "learning_rate": 2.9567546263182554e-06, "loss": 19.4522, "step": 335300 }, { "epoch": 0.6773474145210228, "grad_norm": 354.2513732910156, "learning_rate": 2.9564360418326698e-06, "loss": 11.544, "step": 335310 }, { "epoch": 0.6773676151537066, "grad_norm": 393.323974609375, "learning_rate": 2.956117467307169e-06, "loss": 18.9092, "step": 335320 }, { "epoch": 0.6773878157863904, "grad_norm": 373.2540588378906, "learning_rate": 2.955798902743302e-06, "loss": 22.6163, "step": 335330 }, { "epoch": 0.6774080164190742, "grad_norm": 309.7513732910156, "learning_rate": 2.9554803481426223e-06, "loss": 7.6129, "step": 335340 }, { "epoch": 0.677428217051758, "grad_norm": 271.4678039550781, "learning_rate": 2.9551618035066863e-06, "loss": 19.9346, "step": 335350 }, { "epoch": 0.6774484176844419, "grad_norm": 292.7792663574219, "learning_rate": 2.954843268837041e-06, "loss": 19.9284, "step": 335360 }, { "epoch": 0.6774686183171257, "grad_norm": 265.24505615234375, "learning_rate": 2.954524744135243e-06, "loss": 24.5112, "step": 335370 }, { "epoch": 0.6774888189498095, "grad_norm": 102.1655044555664, "learning_rate": 2.954206229402844e-06, "loss": 13.1835, "step": 335380 }, { "epoch": 0.6775090195824933, "grad_norm": 210.73858642578125, "learning_rate": 2.9538877246413943e-06, "loss": 20.412, "step": 335390 }, { "epoch": 0.6775292202151771, "grad_norm": 137.14471435546875, "learning_rate": 2.9535692298524477e-06, "loss": 17.1019, "step": 335400 }, { "epoch": 0.677549420847861, "grad_norm": 436.45977783203125, "learning_rate": 2.953250745037556e-06, "loss": 16.4012, "step": 335410 }, { "epoch": 0.6775696214805448, "grad_norm": 271.258544921875, "learning_rate": 2.9529322701982744e-06, "loss": 11.0468, "step": 335420 }, { "epoch": 0.6775898221132286, "grad_norm": 224.8548583984375, "learning_rate": 2.9526138053361496e-06, "loss": 16.0006, "step": 335430 }, { "epoch": 0.6776100227459124, "grad_norm": 72.34127807617188, "learning_rate": 2.952295350452738e-06, "loss": 25.2652, "step": 335440 }, { "epoch": 0.6776302233785962, "grad_norm": 487.3221740722656, "learning_rate": 2.9519769055495917e-06, "loss": 39.2748, "step": 335450 }, { "epoch": 0.6776504240112801, "grad_norm": 467.5898742675781, "learning_rate": 2.9516584706282604e-06, "loss": 26.0418, "step": 335460 }, { "epoch": 0.6776706246439639, "grad_norm": 141.44091796875, "learning_rate": 2.9513400456902975e-06, "loss": 26.9904, "step": 335470 }, { "epoch": 0.6776908252766477, "grad_norm": 531.0645751953125, "learning_rate": 2.951021630737255e-06, "loss": 25.6533, "step": 335480 }, { "epoch": 0.6777110259093315, "grad_norm": 238.02101135253906, "learning_rate": 2.950703225770684e-06, "loss": 14.7189, "step": 335490 }, { "epoch": 0.6777312265420153, "grad_norm": 177.22532653808594, "learning_rate": 2.9503848307921363e-06, "loss": 14.1931, "step": 335500 }, { "epoch": 0.6777514271746992, "grad_norm": 344.86334228515625, "learning_rate": 2.9500664458031656e-06, "loss": 16.4789, "step": 335510 }, { "epoch": 0.677771627807383, "grad_norm": 473.8887634277344, "learning_rate": 2.949748070805322e-06, "loss": 26.7647, "step": 335520 }, { "epoch": 0.6777918284400667, "grad_norm": 128.1311492919922, "learning_rate": 2.9494297058001575e-06, "loss": 17.0805, "step": 335530 }, { "epoch": 0.6778120290727505, "grad_norm": 201.39273071289062, "learning_rate": 2.949111350789225e-06, "loss": 30.4145, "step": 335540 }, { "epoch": 0.6778322297054343, "grad_norm": 31.96744155883789, "learning_rate": 2.948793005774074e-06, "loss": 17.2311, "step": 335550 }, { "epoch": 0.6778524303381182, "grad_norm": 608.8126831054688, "learning_rate": 2.9484746707562573e-06, "loss": 19.1241, "step": 335560 }, { "epoch": 0.677872630970802, "grad_norm": 147.7123565673828, "learning_rate": 2.9481563457373247e-06, "loss": 8.0438, "step": 335570 }, { "epoch": 0.6778928316034858, "grad_norm": 242.12118530273438, "learning_rate": 2.9478380307188316e-06, "loss": 8.2122, "step": 335580 }, { "epoch": 0.6779130322361696, "grad_norm": 113.20966339111328, "learning_rate": 2.947519725702326e-06, "loss": 17.3038, "step": 335590 }, { "epoch": 0.6779332328688534, "grad_norm": 41.793975830078125, "learning_rate": 2.9472014306893605e-06, "loss": 17.677, "step": 335600 }, { "epoch": 0.6779534335015373, "grad_norm": 126.19673919677734, "learning_rate": 2.946883145681486e-06, "loss": 16.5543, "step": 335610 }, { "epoch": 0.6779736341342211, "grad_norm": 121.84140014648438, "learning_rate": 2.946564870680255e-06, "loss": 23.0243, "step": 335620 }, { "epoch": 0.6779938347669049, "grad_norm": 522.8529663085938, "learning_rate": 2.946246605687215e-06, "loss": 16.9214, "step": 335630 }, { "epoch": 0.6780140353995887, "grad_norm": 310.84246826171875, "learning_rate": 2.945928350703924e-06, "loss": 19.9505, "step": 335640 }, { "epoch": 0.6780342360322725, "grad_norm": 26.59088706970215, "learning_rate": 2.9456101057319266e-06, "loss": 14.7345, "step": 335650 }, { "epoch": 0.6780544366649564, "grad_norm": 172.07818603515625, "learning_rate": 2.945291870772776e-06, "loss": 19.1953, "step": 335660 }, { "epoch": 0.6780746372976402, "grad_norm": 292.6214294433594, "learning_rate": 2.9449736458280253e-06, "loss": 25.915, "step": 335670 }, { "epoch": 0.678094837930324, "grad_norm": 0.0, "learning_rate": 2.9446554308992227e-06, "loss": 9.6047, "step": 335680 }, { "epoch": 0.6781150385630078, "grad_norm": 307.8442687988281, "learning_rate": 2.94433722598792e-06, "loss": 23.0081, "step": 335690 }, { "epoch": 0.6781352391956916, "grad_norm": 163.4735565185547, "learning_rate": 2.94401903109567e-06, "loss": 21.701, "step": 335700 }, { "epoch": 0.6781554398283755, "grad_norm": 257.3848876953125, "learning_rate": 2.94370084622402e-06, "loss": 19.059, "step": 335710 }, { "epoch": 0.6781756404610593, "grad_norm": 726.9503173828125, "learning_rate": 2.943382671374523e-06, "loss": 28.3533, "step": 335720 }, { "epoch": 0.6781958410937431, "grad_norm": 261.3546142578125, "learning_rate": 2.9430645065487296e-06, "loss": 10.326, "step": 335730 }, { "epoch": 0.6782160417264269, "grad_norm": 63.78850173950195, "learning_rate": 2.9427463517481913e-06, "loss": 12.8562, "step": 335740 }, { "epoch": 0.6782362423591107, "grad_norm": 122.61705780029297, "learning_rate": 2.9424282069744564e-06, "loss": 8.556, "step": 335750 }, { "epoch": 0.6782564429917946, "grad_norm": 171.08038330078125, "learning_rate": 2.9421100722290774e-06, "loss": 23.1368, "step": 335760 }, { "epoch": 0.6782766436244784, "grad_norm": 67.91187286376953, "learning_rate": 2.9417919475136053e-06, "loss": 11.8939, "step": 335770 }, { "epoch": 0.6782968442571621, "grad_norm": 235.80157470703125, "learning_rate": 2.9414738328295884e-06, "loss": 10.7207, "step": 335780 }, { "epoch": 0.6783170448898459, "grad_norm": 368.4795227050781, "learning_rate": 2.9411557281785772e-06, "loss": 14.8314, "step": 335790 }, { "epoch": 0.6783372455225297, "grad_norm": 268.9847106933594, "learning_rate": 2.940837633562127e-06, "loss": 13.3034, "step": 335800 }, { "epoch": 0.6783574461552135, "grad_norm": 381.00604248046875, "learning_rate": 2.9405195489817805e-06, "loss": 25.5786, "step": 335810 }, { "epoch": 0.6783776467878974, "grad_norm": 419.58673095703125, "learning_rate": 2.9402014744390937e-06, "loss": 22.3636, "step": 335820 }, { "epoch": 0.6783978474205812, "grad_norm": 489.3803405761719, "learning_rate": 2.9398834099356155e-06, "loss": 19.4189, "step": 335830 }, { "epoch": 0.678418048053265, "grad_norm": 246.4154052734375, "learning_rate": 2.9395653554728955e-06, "loss": 23.7629, "step": 335840 }, { "epoch": 0.6784382486859488, "grad_norm": 396.004150390625, "learning_rate": 2.9392473110524834e-06, "loss": 21.9475, "step": 335850 }, { "epoch": 0.6784584493186326, "grad_norm": 304.5555725097656, "learning_rate": 2.9389292766759313e-06, "loss": 22.2852, "step": 335860 }, { "epoch": 0.6784786499513165, "grad_norm": 84.48942565917969, "learning_rate": 2.9386112523447863e-06, "loss": 9.2009, "step": 335870 }, { "epoch": 0.6784988505840003, "grad_norm": 309.5727233886719, "learning_rate": 2.9382932380606e-06, "loss": 13.4771, "step": 335880 }, { "epoch": 0.6785190512166841, "grad_norm": 368.2673034667969, "learning_rate": 2.9379752338249223e-06, "loss": 18.6957, "step": 335890 }, { "epoch": 0.6785392518493679, "grad_norm": 4.455811977386475, "learning_rate": 2.9376572396393047e-06, "loss": 18.6829, "step": 335900 }, { "epoch": 0.6785594524820517, "grad_norm": 401.92657470703125, "learning_rate": 2.937339255505295e-06, "loss": 8.5755, "step": 335910 }, { "epoch": 0.6785796531147356, "grad_norm": 457.0036926269531, "learning_rate": 2.9370212814244436e-06, "loss": 15.458, "step": 335920 }, { "epoch": 0.6785998537474194, "grad_norm": 39.46978759765625, "learning_rate": 2.9367033173983006e-06, "loss": 18.9986, "step": 335930 }, { "epoch": 0.6786200543801032, "grad_norm": 152.43338012695312, "learning_rate": 2.9363853634284143e-06, "loss": 12.2813, "step": 335940 }, { "epoch": 0.678640255012787, "grad_norm": 503.3367004394531, "learning_rate": 2.9360674195163354e-06, "loss": 31.0365, "step": 335950 }, { "epoch": 0.6786604556454708, "grad_norm": 421.3941650390625, "learning_rate": 2.935749485663616e-06, "loss": 27.3865, "step": 335960 }, { "epoch": 0.6786806562781547, "grad_norm": 198.50814819335938, "learning_rate": 2.9354315618718005e-06, "loss": 19.4412, "step": 335970 }, { "epoch": 0.6787008569108385, "grad_norm": 1.3054885864257812, "learning_rate": 2.9351136481424413e-06, "loss": 6.9169, "step": 335980 }, { "epoch": 0.6787210575435223, "grad_norm": 294.55194091796875, "learning_rate": 2.93479574447709e-06, "loss": 16.8749, "step": 335990 }, { "epoch": 0.6787412581762061, "grad_norm": 729.118408203125, "learning_rate": 2.934477850877292e-06, "loss": 33.6749, "step": 336000 }, { "epoch": 0.6787614588088899, "grad_norm": 210.07481384277344, "learning_rate": 2.9341599673445988e-06, "loss": 20.1822, "step": 336010 }, { "epoch": 0.6787816594415738, "grad_norm": 362.321044921875, "learning_rate": 2.933842093880558e-06, "loss": 34.5612, "step": 336020 }, { "epoch": 0.6788018600742576, "grad_norm": 193.6448974609375, "learning_rate": 2.9335242304867233e-06, "loss": 16.0499, "step": 336030 }, { "epoch": 0.6788220607069413, "grad_norm": 172.9117889404297, "learning_rate": 2.933206377164638e-06, "loss": 11.6066, "step": 336040 }, { "epoch": 0.6788422613396251, "grad_norm": 224.3739013671875, "learning_rate": 2.9328885339158554e-06, "loss": 27.4052, "step": 336050 }, { "epoch": 0.6788624619723089, "grad_norm": 61.55097961425781, "learning_rate": 2.9325707007419235e-06, "loss": 15.0616, "step": 336060 }, { "epoch": 0.6788826626049927, "grad_norm": 0.0, "learning_rate": 2.9322528776443917e-06, "loss": 16.0783, "step": 336070 }, { "epoch": 0.6789028632376766, "grad_norm": 200.20626831054688, "learning_rate": 2.9319350646248075e-06, "loss": 12.3466, "step": 336080 }, { "epoch": 0.6789230638703604, "grad_norm": 60.26162338256836, "learning_rate": 2.931617261684722e-06, "loss": 12.4856, "step": 336090 }, { "epoch": 0.6789432645030442, "grad_norm": 235.81759643554688, "learning_rate": 2.931299468825682e-06, "loss": 16.3048, "step": 336100 }, { "epoch": 0.678963465135728, "grad_norm": 322.51629638671875, "learning_rate": 2.930981686049237e-06, "loss": 27.785, "step": 336110 }, { "epoch": 0.6789836657684118, "grad_norm": 77.17876434326172, "learning_rate": 2.9306639133569393e-06, "loss": 25.292, "step": 336120 }, { "epoch": 0.6790038664010957, "grad_norm": 321.0661315917969, "learning_rate": 2.930346150750332e-06, "loss": 23.3647, "step": 336130 }, { "epoch": 0.6790240670337795, "grad_norm": 164.3569793701172, "learning_rate": 2.930028398230966e-06, "loss": 16.2935, "step": 336140 }, { "epoch": 0.6790442676664633, "grad_norm": 99.91030883789062, "learning_rate": 2.929710655800393e-06, "loss": 18.0698, "step": 336150 }, { "epoch": 0.6790644682991471, "grad_norm": 513.3517456054688, "learning_rate": 2.929392923460158e-06, "loss": 19.3383, "step": 336160 }, { "epoch": 0.679084668931831, "grad_norm": 278.4613037109375, "learning_rate": 2.9290752012118105e-06, "loss": 25.6326, "step": 336170 }, { "epoch": 0.6791048695645148, "grad_norm": 300.3846435546875, "learning_rate": 2.9287574890568982e-06, "loss": 19.5321, "step": 336180 }, { "epoch": 0.6791250701971986, "grad_norm": 152.0339813232422, "learning_rate": 2.928439786996973e-06, "loss": 15.0994, "step": 336190 }, { "epoch": 0.6791452708298824, "grad_norm": 484.6660461425781, "learning_rate": 2.92812209503358e-06, "loss": 40.7918, "step": 336200 }, { "epoch": 0.6791654714625662, "grad_norm": 175.43467712402344, "learning_rate": 2.927804413168268e-06, "loss": 22.205, "step": 336210 }, { "epoch": 0.67918567209525, "grad_norm": 147.68663024902344, "learning_rate": 2.9274867414025876e-06, "loss": 13.7179, "step": 336220 }, { "epoch": 0.6792058727279339, "grad_norm": 386.96502685546875, "learning_rate": 2.927169079738084e-06, "loss": 14.704, "step": 336230 }, { "epoch": 0.6792260733606177, "grad_norm": 402.2447204589844, "learning_rate": 2.9268514281763072e-06, "loss": 26.4416, "step": 336240 }, { "epoch": 0.6792462739933015, "grad_norm": 297.23284912109375, "learning_rate": 2.926533786718806e-06, "loss": 21.168, "step": 336250 }, { "epoch": 0.6792664746259853, "grad_norm": 443.52752685546875, "learning_rate": 2.926216155367126e-06, "loss": 14.0488, "step": 336260 }, { "epoch": 0.6792866752586691, "grad_norm": 238.8980255126953, "learning_rate": 2.9258985341228174e-06, "loss": 19.3176, "step": 336270 }, { "epoch": 0.679306875891353, "grad_norm": 166.70726013183594, "learning_rate": 2.9255809229874287e-06, "loss": 21.269, "step": 336280 }, { "epoch": 0.6793270765240368, "grad_norm": 613.914306640625, "learning_rate": 2.9252633219625073e-06, "loss": 17.0876, "step": 336290 }, { "epoch": 0.6793472771567205, "grad_norm": 489.29052734375, "learning_rate": 2.9249457310495994e-06, "loss": 12.22, "step": 336300 }, { "epoch": 0.6793674777894043, "grad_norm": 365.3419494628906, "learning_rate": 2.924628150250256e-06, "loss": 21.2881, "step": 336310 }, { "epoch": 0.6793876784220881, "grad_norm": 265.9783630371094, "learning_rate": 2.924310579566022e-06, "loss": 9.6641, "step": 336320 }, { "epoch": 0.679407879054772, "grad_norm": 220.37794494628906, "learning_rate": 2.9239930189984458e-06, "loss": 16.313, "step": 336330 }, { "epoch": 0.6794280796874558, "grad_norm": 355.0464782714844, "learning_rate": 2.9236754685490764e-06, "loss": 19.7515, "step": 336340 }, { "epoch": 0.6794482803201396, "grad_norm": 182.87744140625, "learning_rate": 2.9233579282194617e-06, "loss": 14.7637, "step": 336350 }, { "epoch": 0.6794684809528234, "grad_norm": 208.55581665039062, "learning_rate": 2.9230403980111482e-06, "loss": 16.4723, "step": 336360 }, { "epoch": 0.6794886815855072, "grad_norm": 397.05084228515625, "learning_rate": 2.922722877925683e-06, "loss": 17.2, "step": 336370 }, { "epoch": 0.6795088822181911, "grad_norm": 118.7071304321289, "learning_rate": 2.922405367964617e-06, "loss": 17.6289, "step": 336380 }, { "epoch": 0.6795290828508749, "grad_norm": 304.11639404296875, "learning_rate": 2.9220878681294935e-06, "loss": 13.6217, "step": 336390 }, { "epoch": 0.6795492834835587, "grad_norm": 247.48855590820312, "learning_rate": 2.921770378421861e-06, "loss": 15.8468, "step": 336400 }, { "epoch": 0.6795694841162425, "grad_norm": 326.9728698730469, "learning_rate": 2.92145289884327e-06, "loss": 21.1412, "step": 336410 }, { "epoch": 0.6795896847489263, "grad_norm": 366.3603210449219, "learning_rate": 2.9211354293952632e-06, "loss": 21.8225, "step": 336420 }, { "epoch": 0.6796098853816102, "grad_norm": 452.29058837890625, "learning_rate": 2.9208179700793905e-06, "loss": 26.805, "step": 336430 }, { "epoch": 0.679630086014294, "grad_norm": 199.14080810546875, "learning_rate": 2.9205005208972e-06, "loss": 11.3173, "step": 336440 }, { "epoch": 0.6796502866469778, "grad_norm": 337.0888671875, "learning_rate": 2.920183081850237e-06, "loss": 18.5599, "step": 336450 }, { "epoch": 0.6796704872796616, "grad_norm": 361.8758544921875, "learning_rate": 2.919865652940049e-06, "loss": 11.1404, "step": 336460 }, { "epoch": 0.6796906879123454, "grad_norm": 290.538330078125, "learning_rate": 2.919548234168183e-06, "loss": 15.0119, "step": 336470 }, { "epoch": 0.6797108885450293, "grad_norm": 249.3459930419922, "learning_rate": 2.9192308255361895e-06, "loss": 15.7843, "step": 336480 }, { "epoch": 0.6797310891777131, "grad_norm": 178.51451110839844, "learning_rate": 2.918913427045609e-06, "loss": 16.2926, "step": 336490 }, { "epoch": 0.6797512898103969, "grad_norm": 660.9615478515625, "learning_rate": 2.918596038697995e-06, "loss": 25.6947, "step": 336500 }, { "epoch": 0.6797714904430807, "grad_norm": 368.5863952636719, "learning_rate": 2.918278660494891e-06, "loss": 11.3112, "step": 336510 }, { "epoch": 0.6797916910757645, "grad_norm": 380.2193603515625, "learning_rate": 2.917961292437842e-06, "loss": 19.8173, "step": 336520 }, { "epoch": 0.6798118917084484, "grad_norm": 381.1828308105469, "learning_rate": 2.917643934528398e-06, "loss": 26.5868, "step": 336530 }, { "epoch": 0.6798320923411322, "grad_norm": 333.8994445800781, "learning_rate": 2.917326586768106e-06, "loss": 16.1523, "step": 336540 }, { "epoch": 0.6798522929738159, "grad_norm": 251.45159912109375, "learning_rate": 2.9170092491585122e-06, "loss": 19.9458, "step": 336550 }, { "epoch": 0.6798724936064997, "grad_norm": 183.83004760742188, "learning_rate": 2.91669192170116e-06, "loss": 10.0276, "step": 336560 }, { "epoch": 0.6798926942391835, "grad_norm": 250.56103515625, "learning_rate": 2.9163746043976014e-06, "loss": 12.3138, "step": 336570 }, { "epoch": 0.6799128948718673, "grad_norm": 187.8640594482422, "learning_rate": 2.91605729724938e-06, "loss": 18.5117, "step": 336580 }, { "epoch": 0.6799330955045512, "grad_norm": 39.37859344482422, "learning_rate": 2.9157400002580407e-06, "loss": 18.4926, "step": 336590 }, { "epoch": 0.679953296137235, "grad_norm": 154.15272521972656, "learning_rate": 2.915422713425134e-06, "loss": 20.5154, "step": 336600 }, { "epoch": 0.6799734967699188, "grad_norm": 229.36044311523438, "learning_rate": 2.9151054367522013e-06, "loss": 13.0612, "step": 336610 }, { "epoch": 0.6799936974026026, "grad_norm": 36.189308166503906, "learning_rate": 2.914788170240795e-06, "loss": 12.1213, "step": 336620 }, { "epoch": 0.6800138980352864, "grad_norm": 289.9989318847656, "learning_rate": 2.9144709138924556e-06, "loss": 16.0764, "step": 336630 }, { "epoch": 0.6800340986679703, "grad_norm": 255.24679565429688, "learning_rate": 2.9141536677087346e-06, "loss": 16.6553, "step": 336640 }, { "epoch": 0.6800542993006541, "grad_norm": 431.2161865234375, "learning_rate": 2.9138364316911747e-06, "loss": 12.1475, "step": 336650 }, { "epoch": 0.6800744999333379, "grad_norm": 173.5029754638672, "learning_rate": 2.9135192058413212e-06, "loss": 38.0555, "step": 336660 }, { "epoch": 0.6800947005660217, "grad_norm": 212.28530883789062, "learning_rate": 2.9132019901607246e-06, "loss": 20.8181, "step": 336670 }, { "epoch": 0.6801149011987055, "grad_norm": 188.73883056640625, "learning_rate": 2.912884784650926e-06, "loss": 26.7896, "step": 336680 }, { "epoch": 0.6801351018313894, "grad_norm": 931.8991088867188, "learning_rate": 2.9125675893134762e-06, "loss": 31.2758, "step": 336690 }, { "epoch": 0.6801553024640732, "grad_norm": 317.1272277832031, "learning_rate": 2.912250404149918e-06, "loss": 33.6222, "step": 336700 }, { "epoch": 0.680175503096757, "grad_norm": 478.4288024902344, "learning_rate": 2.9119332291617974e-06, "loss": 21.0125, "step": 336710 }, { "epoch": 0.6801957037294408, "grad_norm": 349.8617858886719, "learning_rate": 2.911616064350662e-06, "loss": 25.466, "step": 336720 }, { "epoch": 0.6802159043621246, "grad_norm": 95.23271179199219, "learning_rate": 2.9112989097180567e-06, "loss": 17.582, "step": 336730 }, { "epoch": 0.6802361049948085, "grad_norm": 832.002197265625, "learning_rate": 2.9109817652655253e-06, "loss": 16.8009, "step": 336740 }, { "epoch": 0.6802563056274923, "grad_norm": 308.3057556152344, "learning_rate": 2.910664630994615e-06, "loss": 19.3256, "step": 336750 }, { "epoch": 0.6802765062601761, "grad_norm": 185.88406372070312, "learning_rate": 2.9103475069068763e-06, "loss": 12.436, "step": 336760 }, { "epoch": 0.6802967068928599, "grad_norm": 104.121337890625, "learning_rate": 2.910030393003847e-06, "loss": 20.7507, "step": 336770 }, { "epoch": 0.6803169075255437, "grad_norm": 295.08697509765625, "learning_rate": 2.9097132892870757e-06, "loss": 14.2439, "step": 336780 }, { "epoch": 0.6803371081582276, "grad_norm": 311.9501647949219, "learning_rate": 2.9093961957581096e-06, "loss": 17.362, "step": 336790 }, { "epoch": 0.6803573087909114, "grad_norm": 272.4087829589844, "learning_rate": 2.9090791124184934e-06, "loss": 10.1153, "step": 336800 }, { "epoch": 0.6803775094235951, "grad_norm": 213.18466186523438, "learning_rate": 2.9087620392697703e-06, "loss": 9.2944, "step": 336810 }, { "epoch": 0.6803977100562789, "grad_norm": 255.3109893798828, "learning_rate": 2.908444976313487e-06, "loss": 17.9244, "step": 336820 }, { "epoch": 0.6804179106889627, "grad_norm": 115.09384155273438, "learning_rate": 2.908127923551194e-06, "loss": 11.8238, "step": 336830 }, { "epoch": 0.6804381113216466, "grad_norm": 964.028564453125, "learning_rate": 2.9078108809844264e-06, "loss": 21.2664, "step": 336840 }, { "epoch": 0.6804583119543304, "grad_norm": 213.50389099121094, "learning_rate": 2.9074938486147357e-06, "loss": 20.7351, "step": 336850 }, { "epoch": 0.6804785125870142, "grad_norm": 260.22900390625, "learning_rate": 2.9071768264436685e-06, "loss": 13.3025, "step": 336860 }, { "epoch": 0.680498713219698, "grad_norm": 338.62225341796875, "learning_rate": 2.9068598144727666e-06, "loss": 22.6616, "step": 336870 }, { "epoch": 0.6805189138523818, "grad_norm": 369.41766357421875, "learning_rate": 2.9065428127035743e-06, "loss": 33.4414, "step": 336880 }, { "epoch": 0.6805391144850657, "grad_norm": 301.8964538574219, "learning_rate": 2.9062258211376414e-06, "loss": 17.6325, "step": 336890 }, { "epoch": 0.6805593151177495, "grad_norm": 249.56863403320312, "learning_rate": 2.905908839776509e-06, "loss": 36.5252, "step": 336900 }, { "epoch": 0.6805795157504333, "grad_norm": 249.23077392578125, "learning_rate": 2.9055918686217212e-06, "loss": 11.195, "step": 336910 }, { "epoch": 0.6805997163831171, "grad_norm": 231.7198486328125, "learning_rate": 2.9052749076748266e-06, "loss": 33.6404, "step": 336920 }, { "epoch": 0.6806199170158009, "grad_norm": 372.733642578125, "learning_rate": 2.904957956937366e-06, "loss": 23.1114, "step": 336930 }, { "epoch": 0.6806401176484848, "grad_norm": 361.156982421875, "learning_rate": 2.9046410164108883e-06, "loss": 15.1005, "step": 336940 }, { "epoch": 0.6806603182811686, "grad_norm": 222.05404663085938, "learning_rate": 2.9043240860969342e-06, "loss": 19.5661, "step": 336950 }, { "epoch": 0.6806805189138524, "grad_norm": 112.5550537109375, "learning_rate": 2.904007165997052e-06, "loss": 5.7794, "step": 336960 }, { "epoch": 0.6807007195465362, "grad_norm": 472.69091796875, "learning_rate": 2.903690256112785e-06, "loss": 14.3773, "step": 336970 }, { "epoch": 0.68072092017922, "grad_norm": 406.447021484375, "learning_rate": 2.9033733564456756e-06, "loss": 14.0932, "step": 336980 }, { "epoch": 0.6807411208119039, "grad_norm": 239.74978637695312, "learning_rate": 2.9030564669972717e-06, "loss": 8.4182, "step": 336990 }, { "epoch": 0.6807613214445877, "grad_norm": 0.0, "learning_rate": 2.9027395877691143e-06, "loss": 25.1309, "step": 337000 }, { "epoch": 0.6807815220772715, "grad_norm": 302.2629699707031, "learning_rate": 2.902422718762752e-06, "loss": 21.483, "step": 337010 }, { "epoch": 0.6808017227099553, "grad_norm": 366.3304748535156, "learning_rate": 2.902105859979727e-06, "loss": 14.2281, "step": 337020 }, { "epoch": 0.6808219233426391, "grad_norm": 173.06027221679688, "learning_rate": 2.9017890114215814e-06, "loss": 12.9961, "step": 337030 }, { "epoch": 0.680842123975323, "grad_norm": 361.0361022949219, "learning_rate": 2.9014721730898637e-06, "loss": 22.7924, "step": 337040 }, { "epoch": 0.6808623246080068, "grad_norm": 685.5557861328125, "learning_rate": 2.9011553449861163e-06, "loss": 22.774, "step": 337050 }, { "epoch": 0.6808825252406905, "grad_norm": 181.66285705566406, "learning_rate": 2.900838527111881e-06, "loss": 21.4096, "step": 337060 }, { "epoch": 0.6809027258733743, "grad_norm": 375.5563049316406, "learning_rate": 2.900521719468704e-06, "loss": 20.4382, "step": 337070 }, { "epoch": 0.6809229265060581, "grad_norm": 108.95927429199219, "learning_rate": 2.900204922058132e-06, "loss": 13.1826, "step": 337080 }, { "epoch": 0.6809431271387419, "grad_norm": 481.4999694824219, "learning_rate": 2.899888134881706e-06, "loss": 21.2447, "step": 337090 }, { "epoch": 0.6809633277714258, "grad_norm": 285.0144958496094, "learning_rate": 2.899571357940969e-06, "loss": 15.8772, "step": 337100 }, { "epoch": 0.6809835284041096, "grad_norm": 248.12042236328125, "learning_rate": 2.8992545912374683e-06, "loss": 23.6694, "step": 337110 }, { "epoch": 0.6810037290367934, "grad_norm": 336.8533020019531, "learning_rate": 2.8989378347727453e-06, "loss": 16.0839, "step": 337120 }, { "epoch": 0.6810239296694772, "grad_norm": 218.89495849609375, "learning_rate": 2.8986210885483436e-06, "loss": 10.9906, "step": 337130 }, { "epoch": 0.681044130302161, "grad_norm": 240.262939453125, "learning_rate": 2.8983043525658065e-06, "loss": 23.5942, "step": 337140 }, { "epoch": 0.6810643309348449, "grad_norm": 146.49398803710938, "learning_rate": 2.897987626826682e-06, "loss": 17.6763, "step": 337150 }, { "epoch": 0.6810845315675287, "grad_norm": 121.29399108886719, "learning_rate": 2.8976709113325107e-06, "loss": 19.1087, "step": 337160 }, { "epoch": 0.6811047322002125, "grad_norm": 124.50176239013672, "learning_rate": 2.8973542060848347e-06, "loss": 12.7464, "step": 337170 }, { "epoch": 0.6811249328328963, "grad_norm": 458.1210021972656, "learning_rate": 2.897037511085201e-06, "loss": 22.866, "step": 337180 }, { "epoch": 0.6811451334655801, "grad_norm": 126.03080749511719, "learning_rate": 2.896720826335151e-06, "loss": 13.8534, "step": 337190 }, { "epoch": 0.681165334098264, "grad_norm": 537.2393798828125, "learning_rate": 2.896404151836227e-06, "loss": 21.5943, "step": 337200 }, { "epoch": 0.6811855347309478, "grad_norm": 159.58612060546875, "learning_rate": 2.896087487589976e-06, "loss": 13.7876, "step": 337210 }, { "epoch": 0.6812057353636316, "grad_norm": 218.7309112548828, "learning_rate": 2.8957708335979373e-06, "loss": 17.2851, "step": 337220 }, { "epoch": 0.6812259359963154, "grad_norm": 499.6978454589844, "learning_rate": 2.8954541898616585e-06, "loss": 20.5221, "step": 337230 }, { "epoch": 0.6812461366289992, "grad_norm": 263.2171936035156, "learning_rate": 2.895137556382679e-06, "loss": 16.6844, "step": 337240 }, { "epoch": 0.6812663372616831, "grad_norm": 282.1981201171875, "learning_rate": 2.8948209331625454e-06, "loss": 14.1329, "step": 337250 }, { "epoch": 0.6812865378943669, "grad_norm": 66.52291870117188, "learning_rate": 2.8945043202027987e-06, "loss": 23.6377, "step": 337260 }, { "epoch": 0.6813067385270507, "grad_norm": 182.78829956054688, "learning_rate": 2.8941877175049815e-06, "loss": 14.6913, "step": 337270 }, { "epoch": 0.6813269391597345, "grad_norm": 189.55577087402344, "learning_rate": 2.8938711250706397e-06, "loss": 14.6228, "step": 337280 }, { "epoch": 0.6813471397924183, "grad_norm": 203.63307189941406, "learning_rate": 2.8935545429013123e-06, "loss": 26.891, "step": 337290 }, { "epoch": 0.6813673404251022, "grad_norm": 253.27078247070312, "learning_rate": 2.893237970998547e-06, "loss": 12.0616, "step": 337300 }, { "epoch": 0.681387541057786, "grad_norm": 247.86729431152344, "learning_rate": 2.892921409363884e-06, "loss": 22.4096, "step": 337310 }, { "epoch": 0.6814077416904697, "grad_norm": 347.1783447265625, "learning_rate": 2.8926048579988647e-06, "loss": 19.8026, "step": 337320 }, { "epoch": 0.6814279423231535, "grad_norm": 143.5614471435547, "learning_rate": 2.8922883169050354e-06, "loss": 16.0684, "step": 337330 }, { "epoch": 0.6814481429558373, "grad_norm": 112.46509552001953, "learning_rate": 2.891971786083938e-06, "loss": 18.1797, "step": 337340 }, { "epoch": 0.6814683435885212, "grad_norm": 421.9353332519531, "learning_rate": 2.8916552655371117e-06, "loss": 23.4069, "step": 337350 }, { "epoch": 0.681488544221205, "grad_norm": 520.8557739257812, "learning_rate": 2.891338755266102e-06, "loss": 20.361, "step": 337360 }, { "epoch": 0.6815087448538888, "grad_norm": 387.4977111816406, "learning_rate": 2.8910222552724552e-06, "loss": 14.4099, "step": 337370 }, { "epoch": 0.6815289454865726, "grad_norm": 182.18067932128906, "learning_rate": 2.8907057655577066e-06, "loss": 8.8123, "step": 337380 }, { "epoch": 0.6815491461192564, "grad_norm": 395.0787353515625, "learning_rate": 2.8903892861234023e-06, "loss": 16.3045, "step": 337390 }, { "epoch": 0.6815693467519403, "grad_norm": 213.30841064453125, "learning_rate": 2.8900728169710866e-06, "loss": 4.0944, "step": 337400 }, { "epoch": 0.6815895473846241, "grad_norm": 1825.2296142578125, "learning_rate": 2.8897563581023e-06, "loss": 44.5224, "step": 337410 }, { "epoch": 0.6816097480173079, "grad_norm": 127.44966888427734, "learning_rate": 2.8894399095185833e-06, "loss": 18.6846, "step": 337420 }, { "epoch": 0.6816299486499917, "grad_norm": 407.293212890625, "learning_rate": 2.8891234712214798e-06, "loss": 39.2627, "step": 337430 }, { "epoch": 0.6816501492826755, "grad_norm": 312.8309631347656, "learning_rate": 2.888807043212537e-06, "loss": 17.3076, "step": 337440 }, { "epoch": 0.6816703499153594, "grad_norm": 149.46005249023438, "learning_rate": 2.888490625493289e-06, "loss": 11.2893, "step": 337450 }, { "epoch": 0.6816905505480432, "grad_norm": 114.9730453491211, "learning_rate": 2.8881742180652813e-06, "loss": 14.163, "step": 337460 }, { "epoch": 0.681710751180727, "grad_norm": 467.4602966308594, "learning_rate": 2.8878578209300576e-06, "loss": 18.3262, "step": 337470 }, { "epoch": 0.6817309518134108, "grad_norm": 237.85484313964844, "learning_rate": 2.8875414340891596e-06, "loss": 14.8854, "step": 337480 }, { "epoch": 0.6817511524460946, "grad_norm": 57.29453659057617, "learning_rate": 2.887225057544126e-06, "loss": 19.7379, "step": 337490 }, { "epoch": 0.6817713530787785, "grad_norm": 767.3331909179688, "learning_rate": 2.886908691296504e-06, "loss": 10.2886, "step": 337500 }, { "epoch": 0.6817915537114623, "grad_norm": 170.16807556152344, "learning_rate": 2.886592335347832e-06, "loss": 11.7349, "step": 337510 }, { "epoch": 0.6818117543441461, "grad_norm": 251.73239135742188, "learning_rate": 2.8862759896996507e-06, "loss": 21.3601, "step": 337520 }, { "epoch": 0.6818319549768299, "grad_norm": 312.6261901855469, "learning_rate": 2.885959654353504e-06, "loss": 12.8986, "step": 337530 }, { "epoch": 0.6818521556095137, "grad_norm": 231.30508422851562, "learning_rate": 2.8856433293109355e-06, "loss": 22.0849, "step": 337540 }, { "epoch": 0.6818723562421976, "grad_norm": 461.048583984375, "learning_rate": 2.8853270145734846e-06, "loss": 20.471, "step": 337550 }, { "epoch": 0.6818925568748814, "grad_norm": 0.0, "learning_rate": 2.8850107101426916e-06, "loss": 13.9587, "step": 337560 }, { "epoch": 0.6819127575075651, "grad_norm": 225.0816650390625, "learning_rate": 2.884694416020102e-06, "loss": 23.4605, "step": 337570 }, { "epoch": 0.6819329581402489, "grad_norm": 374.3337097167969, "learning_rate": 2.884378132207255e-06, "loss": 15.3794, "step": 337580 }, { "epoch": 0.6819531587729327, "grad_norm": 223.9132843017578, "learning_rate": 2.884061858705691e-06, "loss": 11.8631, "step": 337590 }, { "epoch": 0.6819733594056165, "grad_norm": 185.85818481445312, "learning_rate": 2.8837455955169547e-06, "loss": 16.2787, "step": 337600 }, { "epoch": 0.6819935600383004, "grad_norm": 7.504227161407471, "learning_rate": 2.883429342642583e-06, "loss": 19.3667, "step": 337610 }, { "epoch": 0.6820137606709842, "grad_norm": 427.7292175292969, "learning_rate": 2.8831131000841227e-06, "loss": 17.8733, "step": 337620 }, { "epoch": 0.682033961303668, "grad_norm": 20.55327033996582, "learning_rate": 2.882796867843112e-06, "loss": 15.8398, "step": 337630 }, { "epoch": 0.6820541619363518, "grad_norm": 404.1729736328125, "learning_rate": 2.8824806459210907e-06, "loss": 16.39, "step": 337640 }, { "epoch": 0.6820743625690356, "grad_norm": 402.6095275878906, "learning_rate": 2.8821644343196042e-06, "loss": 19.1106, "step": 337650 }, { "epoch": 0.6820945632017195, "grad_norm": 64.06078338623047, "learning_rate": 2.881848233040191e-06, "loss": 24.3614, "step": 337660 }, { "epoch": 0.6821147638344033, "grad_norm": 200.83689880371094, "learning_rate": 2.8815320420843906e-06, "loss": 16.2936, "step": 337670 }, { "epoch": 0.6821349644670871, "grad_norm": 352.10546875, "learning_rate": 2.8812158614537465e-06, "loss": 18.3353, "step": 337680 }, { "epoch": 0.6821551650997709, "grad_norm": 291.7635498046875, "learning_rate": 2.8808996911498012e-06, "loss": 8.4016, "step": 337690 }, { "epoch": 0.6821753657324547, "grad_norm": 340.2513427734375, "learning_rate": 2.8805835311740933e-06, "loss": 15.8309, "step": 337700 }, { "epoch": 0.6821955663651386, "grad_norm": 646.6484375, "learning_rate": 2.8802673815281622e-06, "loss": 11.1653, "step": 337710 }, { "epoch": 0.6822157669978224, "grad_norm": 348.8888244628906, "learning_rate": 2.8799512422135534e-06, "loss": 18.1211, "step": 337720 }, { "epoch": 0.6822359676305062, "grad_norm": 277.909423828125, "learning_rate": 2.8796351132318047e-06, "loss": 13.4967, "step": 337730 }, { "epoch": 0.68225616826319, "grad_norm": 457.6668701171875, "learning_rate": 2.879318994584456e-06, "loss": 11.1521, "step": 337740 }, { "epoch": 0.6822763688958738, "grad_norm": 155.47250366210938, "learning_rate": 2.8790028862730487e-06, "loss": 18.1624, "step": 337750 }, { "epoch": 0.6822965695285577, "grad_norm": 353.048583984375, "learning_rate": 2.8786867882991276e-06, "loss": 18.4389, "step": 337760 }, { "epoch": 0.6823167701612415, "grad_norm": 902.6870727539062, "learning_rate": 2.8783707006642266e-06, "loss": 18.6763, "step": 337770 }, { "epoch": 0.6823369707939253, "grad_norm": 533.7274780273438, "learning_rate": 2.878054623369889e-06, "loss": 16.0041, "step": 337780 }, { "epoch": 0.6823571714266091, "grad_norm": 586.1728515625, "learning_rate": 2.8777385564176585e-06, "loss": 10.1919, "step": 337790 }, { "epoch": 0.6823773720592929, "grad_norm": 216.79629516601562, "learning_rate": 2.877422499809072e-06, "loss": 20.6243, "step": 337800 }, { "epoch": 0.6823975726919768, "grad_norm": 262.71734619140625, "learning_rate": 2.8771064535456696e-06, "loss": 20.3543, "step": 337810 }, { "epoch": 0.6824177733246606, "grad_norm": 518.3030395507812, "learning_rate": 2.876790417628994e-06, "loss": 20.0688, "step": 337820 }, { "epoch": 0.6824379739573443, "grad_norm": 1.1225916147232056, "learning_rate": 2.8764743920605855e-06, "loss": 18.0719, "step": 337830 }, { "epoch": 0.6824581745900281, "grad_norm": 393.7471923828125, "learning_rate": 2.87615837684198e-06, "loss": 14.8554, "step": 337840 }, { "epoch": 0.6824783752227119, "grad_norm": 176.36019897460938, "learning_rate": 2.875842371974722e-06, "loss": 11.7576, "step": 337850 }, { "epoch": 0.6824985758553958, "grad_norm": 249.6238555908203, "learning_rate": 2.875526377460352e-06, "loss": 20.9444, "step": 337860 }, { "epoch": 0.6825187764880796, "grad_norm": 433.8548889160156, "learning_rate": 2.8752103933004095e-06, "loss": 22.9013, "step": 337870 }, { "epoch": 0.6825389771207634, "grad_norm": 54.54343032836914, "learning_rate": 2.874894419496431e-06, "loss": 9.3707, "step": 337880 }, { "epoch": 0.6825591777534472, "grad_norm": 567.9315185546875, "learning_rate": 2.874578456049961e-06, "loss": 23.3074, "step": 337890 }, { "epoch": 0.682579378386131, "grad_norm": 94.39813232421875, "learning_rate": 2.874262502962537e-06, "loss": 15.7049, "step": 337900 }, { "epoch": 0.6825995790188149, "grad_norm": 421.9408264160156, "learning_rate": 2.8739465602357014e-06, "loss": 9.7967, "step": 337910 }, { "epoch": 0.6826197796514987, "grad_norm": 324.3566589355469, "learning_rate": 2.8736306278709923e-06, "loss": 17.9908, "step": 337920 }, { "epoch": 0.6826399802841825, "grad_norm": 621.4271240234375, "learning_rate": 2.8733147058699473e-06, "loss": 16.5175, "step": 337930 }, { "epoch": 0.6826601809168663, "grad_norm": 96.36259460449219, "learning_rate": 2.872998794234111e-06, "loss": 15.1206, "step": 337940 }, { "epoch": 0.6826803815495501, "grad_norm": 476.7060852050781, "learning_rate": 2.8726828929650206e-06, "loss": 18.5454, "step": 337950 }, { "epoch": 0.682700582182234, "grad_norm": 59.466426849365234, "learning_rate": 2.8723670020642137e-06, "loss": 21.3155, "step": 337960 }, { "epoch": 0.6827207828149178, "grad_norm": 360.0834045410156, "learning_rate": 2.8720511215332314e-06, "loss": 17.1238, "step": 337970 }, { "epoch": 0.6827409834476016, "grad_norm": 0.0, "learning_rate": 2.871735251373616e-06, "loss": 13.3316, "step": 337980 }, { "epoch": 0.6827611840802854, "grad_norm": 274.7656555175781, "learning_rate": 2.871419391586906e-06, "loss": 18.9901, "step": 337990 }, { "epoch": 0.6827813847129692, "grad_norm": 448.28436279296875, "learning_rate": 2.871103542174637e-06, "loss": 19.7402, "step": 338000 }, { "epoch": 0.682801585345653, "grad_norm": 344.9901123046875, "learning_rate": 2.8707877031383523e-06, "loss": 20.014, "step": 338010 }, { "epoch": 0.6828217859783369, "grad_norm": 287.8844299316406, "learning_rate": 2.870471874479591e-06, "loss": 15.2984, "step": 338020 }, { "epoch": 0.6828419866110207, "grad_norm": 1984.8511962890625, "learning_rate": 2.870156056199889e-06, "loss": 28.2589, "step": 338030 }, { "epoch": 0.6828621872437045, "grad_norm": 384.9743347167969, "learning_rate": 2.8698402483007885e-06, "loss": 23.5043, "step": 338040 }, { "epoch": 0.6828823878763883, "grad_norm": 62.157958984375, "learning_rate": 2.8695244507838326e-06, "loss": 17.2494, "step": 338050 }, { "epoch": 0.6829025885090722, "grad_norm": 286.4145202636719, "learning_rate": 2.869208663650551e-06, "loss": 16.9402, "step": 338060 }, { "epoch": 0.682922789141756, "grad_norm": 296.9025573730469, "learning_rate": 2.868892886902489e-06, "loss": 17.5553, "step": 338070 }, { "epoch": 0.6829429897744398, "grad_norm": 238.07823181152344, "learning_rate": 2.8685771205411862e-06, "loss": 18.5039, "step": 338080 }, { "epoch": 0.6829631904071235, "grad_norm": 346.1646423339844, "learning_rate": 2.86826136456818e-06, "loss": 23.8138, "step": 338090 }, { "epoch": 0.6829833910398073, "grad_norm": 427.39593505859375, "learning_rate": 2.8679456189850076e-06, "loss": 20.6147, "step": 338100 }, { "epoch": 0.6830035916724911, "grad_norm": 427.6315002441406, "learning_rate": 2.8676298837932116e-06, "loss": 27.7349, "step": 338110 }, { "epoch": 0.683023792305175, "grad_norm": 845.6320190429688, "learning_rate": 2.8673141589943285e-06, "loss": 16.4816, "step": 338120 }, { "epoch": 0.6830439929378588, "grad_norm": 78.56681823730469, "learning_rate": 2.866998444589896e-06, "loss": 29.6357, "step": 338130 }, { "epoch": 0.6830641935705426, "grad_norm": 54.13490295410156, "learning_rate": 2.8666827405814535e-06, "loss": 7.9405, "step": 338140 }, { "epoch": 0.6830843942032264, "grad_norm": 411.3318786621094, "learning_rate": 2.8663670469705434e-06, "loss": 19.6019, "step": 338150 }, { "epoch": 0.6831045948359102, "grad_norm": 235.86595153808594, "learning_rate": 2.8660513637587016e-06, "loss": 16.7559, "step": 338160 }, { "epoch": 0.6831247954685941, "grad_norm": 566.3510131835938, "learning_rate": 2.8657356909474644e-06, "loss": 16.5376, "step": 338170 }, { "epoch": 0.6831449961012779, "grad_norm": 263.1259460449219, "learning_rate": 2.865420028538375e-06, "loss": 14.0904, "step": 338180 }, { "epoch": 0.6831651967339617, "grad_norm": 310.77984619140625, "learning_rate": 2.865104376532969e-06, "loss": 22.3565, "step": 338190 }, { "epoch": 0.6831853973666455, "grad_norm": 320.32855224609375, "learning_rate": 2.864788734932783e-06, "loss": 8.6116, "step": 338200 }, { "epoch": 0.6832055979993293, "grad_norm": 151.59494018554688, "learning_rate": 2.8644731037393605e-06, "loss": 11.9854, "step": 338210 }, { "epoch": 0.6832257986320132, "grad_norm": 1714.510009765625, "learning_rate": 2.864157482954234e-06, "loss": 20.8913, "step": 338220 }, { "epoch": 0.683245999264697, "grad_norm": 109.7911605834961, "learning_rate": 2.863841872578948e-06, "loss": 19.8624, "step": 338230 }, { "epoch": 0.6832661998973808, "grad_norm": 190.69773864746094, "learning_rate": 2.863526272615037e-06, "loss": 15.7261, "step": 338240 }, { "epoch": 0.6832864005300646, "grad_norm": 176.0821533203125, "learning_rate": 2.863210683064038e-06, "loss": 11.5691, "step": 338250 }, { "epoch": 0.6833066011627484, "grad_norm": 96.04032897949219, "learning_rate": 2.8628951039274933e-06, "loss": 41.0111, "step": 338260 }, { "epoch": 0.6833268017954323, "grad_norm": 335.63897705078125, "learning_rate": 2.8625795352069385e-06, "loss": 26.6477, "step": 338270 }, { "epoch": 0.6833470024281161, "grad_norm": 121.83785247802734, "learning_rate": 2.8622639769039094e-06, "loss": 17.7441, "step": 338280 }, { "epoch": 0.6833672030607999, "grad_norm": 153.68785095214844, "learning_rate": 2.861948429019947e-06, "loss": 14.4136, "step": 338290 }, { "epoch": 0.6833874036934837, "grad_norm": 22.147253036499023, "learning_rate": 2.8616328915565907e-06, "loss": 8.0332, "step": 338300 }, { "epoch": 0.6834076043261675, "grad_norm": 18.95619010925293, "learning_rate": 2.861317364515376e-06, "loss": 10.1848, "step": 338310 }, { "epoch": 0.6834278049588514, "grad_norm": 53.55995178222656, "learning_rate": 2.8610018478978387e-06, "loss": 17.1014, "step": 338320 }, { "epoch": 0.6834480055915352, "grad_norm": 336.89349365234375, "learning_rate": 2.8606863417055216e-06, "loss": 13.2474, "step": 338330 }, { "epoch": 0.6834682062242189, "grad_norm": 306.18133544921875, "learning_rate": 2.8603708459399594e-06, "loss": 11.2188, "step": 338340 }, { "epoch": 0.6834884068569027, "grad_norm": 8.513721466064453, "learning_rate": 2.8600553606026883e-06, "loss": 19.5594, "step": 338350 }, { "epoch": 0.6835086074895865, "grad_norm": 243.7360076904297, "learning_rate": 2.8597398856952473e-06, "loss": 11.7994, "step": 338360 }, { "epoch": 0.6835288081222703, "grad_norm": 70.63182067871094, "learning_rate": 2.8594244212191796e-06, "loss": 15.6911, "step": 338370 }, { "epoch": 0.6835490087549542, "grad_norm": 487.42401123046875, "learning_rate": 2.859108967176013e-06, "loss": 20.1221, "step": 338380 }, { "epoch": 0.683569209387638, "grad_norm": 392.18634033203125, "learning_rate": 2.8587935235672897e-06, "loss": 7.058, "step": 338390 }, { "epoch": 0.6835894100203218, "grad_norm": 355.30548095703125, "learning_rate": 2.858478090394549e-06, "loss": 14.0506, "step": 338400 }, { "epoch": 0.6836096106530056, "grad_norm": 126.27824401855469, "learning_rate": 2.8581626676593264e-06, "loss": 14.3339, "step": 338410 }, { "epoch": 0.6836298112856894, "grad_norm": 257.8165588378906, "learning_rate": 2.8578472553631577e-06, "loss": 33.9921, "step": 338420 }, { "epoch": 0.6836500119183733, "grad_norm": 306.0044250488281, "learning_rate": 2.857531853507583e-06, "loss": 18.524, "step": 338430 }, { "epoch": 0.6836702125510571, "grad_norm": 276.1943054199219, "learning_rate": 2.8572164620941385e-06, "loss": 20.5038, "step": 338440 }, { "epoch": 0.6836904131837409, "grad_norm": 285.0834045410156, "learning_rate": 2.856901081124359e-06, "loss": 17.165, "step": 338450 }, { "epoch": 0.6837106138164247, "grad_norm": 84.69071197509766, "learning_rate": 2.856585710599784e-06, "loss": 20.893, "step": 338460 }, { "epoch": 0.6837308144491085, "grad_norm": 54.28360366821289, "learning_rate": 2.8562703505219513e-06, "loss": 19.0804, "step": 338470 }, { "epoch": 0.6837510150817924, "grad_norm": 509.4893798828125, "learning_rate": 2.855955000892398e-06, "loss": 45.3353, "step": 338480 }, { "epoch": 0.6837712157144762, "grad_norm": 389.2441711425781, "learning_rate": 2.8556396617126575e-06, "loss": 10.6489, "step": 338490 }, { "epoch": 0.68379141634716, "grad_norm": 224.62164306640625, "learning_rate": 2.8553243329842715e-06, "loss": 30.6537, "step": 338500 }, { "epoch": 0.6838116169798438, "grad_norm": 281.3319396972656, "learning_rate": 2.855009014708774e-06, "loss": 19.751, "step": 338510 }, { "epoch": 0.6838318176125276, "grad_norm": 149.65371704101562, "learning_rate": 2.8546937068877013e-06, "loss": 26.6973, "step": 338520 }, { "epoch": 0.6838520182452115, "grad_norm": 229.4172821044922, "learning_rate": 2.8543784095225923e-06, "loss": 12.1654, "step": 338530 }, { "epoch": 0.6838722188778953, "grad_norm": 298.12158203125, "learning_rate": 2.8540631226149813e-06, "loss": 23.4672, "step": 338540 }, { "epoch": 0.6838924195105791, "grad_norm": 10.520437240600586, "learning_rate": 2.853747846166408e-06, "loss": 6.13, "step": 338550 }, { "epoch": 0.6839126201432629, "grad_norm": 406.66302490234375, "learning_rate": 2.853432580178408e-06, "loss": 15.4185, "step": 338560 }, { "epoch": 0.6839328207759467, "grad_norm": 289.27642822265625, "learning_rate": 2.8531173246525144e-06, "loss": 23.0656, "step": 338570 }, { "epoch": 0.6839530214086306, "grad_norm": 1008.6165771484375, "learning_rate": 2.8528020795902672e-06, "loss": 13.0402, "step": 338580 }, { "epoch": 0.6839732220413144, "grad_norm": 318.14617919921875, "learning_rate": 2.852486844993204e-06, "loss": 14.2301, "step": 338590 }, { "epoch": 0.6839934226739981, "grad_norm": 329.3001708984375, "learning_rate": 2.8521716208628597e-06, "loss": 15.1885, "step": 338600 }, { "epoch": 0.6840136233066819, "grad_norm": 248.4498291015625, "learning_rate": 2.851856407200768e-06, "loss": 11.6, "step": 338610 }, { "epoch": 0.6840338239393657, "grad_norm": 548.3978271484375, "learning_rate": 2.85154120400847e-06, "loss": 22.296, "step": 338620 }, { "epoch": 0.6840540245720496, "grad_norm": 145.66209411621094, "learning_rate": 2.8512260112874994e-06, "loss": 15.2133, "step": 338630 }, { "epoch": 0.6840742252047334, "grad_norm": 120.56087493896484, "learning_rate": 2.8509108290393907e-06, "loss": 20.0328, "step": 338640 }, { "epoch": 0.6840944258374172, "grad_norm": 141.44293212890625, "learning_rate": 2.850595657265682e-06, "loss": 8.3148, "step": 338650 }, { "epoch": 0.684114626470101, "grad_norm": 459.21844482421875, "learning_rate": 2.850280495967913e-06, "loss": 16.808, "step": 338660 }, { "epoch": 0.6841348271027848, "grad_norm": 277.9759826660156, "learning_rate": 2.8499653451476126e-06, "loss": 17.8072, "step": 338670 }, { "epoch": 0.6841550277354687, "grad_norm": 273.11181640625, "learning_rate": 2.84965020480632e-06, "loss": 19.0548, "step": 338680 }, { "epoch": 0.6841752283681525, "grad_norm": 181.99716186523438, "learning_rate": 2.8493350749455737e-06, "loss": 35.4504, "step": 338690 }, { "epoch": 0.6841954290008363, "grad_norm": 117.74273681640625, "learning_rate": 2.849019955566908e-06, "loss": 17.9569, "step": 338700 }, { "epoch": 0.6842156296335201, "grad_norm": 299.0187072753906, "learning_rate": 2.848704846671856e-06, "loss": 20.8207, "step": 338710 }, { "epoch": 0.6842358302662039, "grad_norm": 237.4781494140625, "learning_rate": 2.8483897482619566e-06, "loss": 26.1763, "step": 338720 }, { "epoch": 0.6842560308988878, "grad_norm": 159.9395294189453, "learning_rate": 2.8480746603387453e-06, "loss": 18.4871, "step": 338730 }, { "epoch": 0.6842762315315716, "grad_norm": 514.5390625, "learning_rate": 2.847759582903755e-06, "loss": 19.7355, "step": 338740 }, { "epoch": 0.6842964321642554, "grad_norm": 285.404052734375, "learning_rate": 2.8474445159585235e-06, "loss": 10.7217, "step": 338750 }, { "epoch": 0.6843166327969392, "grad_norm": 249.74851989746094, "learning_rate": 2.8471294595045886e-06, "loss": 10.8245, "step": 338760 }, { "epoch": 0.684336833429623, "grad_norm": 410.8199462890625, "learning_rate": 2.8468144135434827e-06, "loss": 23.5627, "step": 338770 }, { "epoch": 0.6843570340623069, "grad_norm": 313.645263671875, "learning_rate": 2.8464993780767414e-06, "loss": 31.2936, "step": 338780 }, { "epoch": 0.6843772346949907, "grad_norm": 326.6199645996094, "learning_rate": 2.846184353105902e-06, "loss": 26.6149, "step": 338790 }, { "epoch": 0.6843974353276745, "grad_norm": 226.2424774169922, "learning_rate": 2.8458693386325e-06, "loss": 11.9223, "step": 338800 }, { "epoch": 0.6844176359603583, "grad_norm": 145.3861846923828, "learning_rate": 2.845554334658066e-06, "loss": 24.3292, "step": 338810 }, { "epoch": 0.6844378365930421, "grad_norm": 171.6868133544922, "learning_rate": 2.8452393411841418e-06, "loss": 37.8657, "step": 338820 }, { "epoch": 0.684458037225726, "grad_norm": 57.693992614746094, "learning_rate": 2.8449243582122577e-06, "loss": 17.2697, "step": 338830 }, { "epoch": 0.6844782378584098, "grad_norm": 310.4919738769531, "learning_rate": 2.8446093857439527e-06, "loss": 17.5195, "step": 338840 }, { "epoch": 0.6844984384910935, "grad_norm": 424.4579772949219, "learning_rate": 2.84429442378076e-06, "loss": 16.2504, "step": 338850 }, { "epoch": 0.6845186391237773, "grad_norm": 223.93661499023438, "learning_rate": 2.843979472324213e-06, "loss": 40.176, "step": 338860 }, { "epoch": 0.6845388397564611, "grad_norm": 202.6236114501953, "learning_rate": 2.843664531375851e-06, "loss": 8.1196, "step": 338870 }, { "epoch": 0.684559040389145, "grad_norm": 177.59127807617188, "learning_rate": 2.8433496009372063e-06, "loss": 11.6913, "step": 338880 }, { "epoch": 0.6845792410218288, "grad_norm": 194.08518981933594, "learning_rate": 2.843034681009812e-06, "loss": 19.0148, "step": 338890 }, { "epoch": 0.6845994416545126, "grad_norm": 127.3999252319336, "learning_rate": 2.8427197715952047e-06, "loss": 14.0098, "step": 338900 }, { "epoch": 0.6846196422871964, "grad_norm": 313.1684265136719, "learning_rate": 2.8424048726949215e-06, "loss": 14.6083, "step": 338910 }, { "epoch": 0.6846398429198802, "grad_norm": 223.4031524658203, "learning_rate": 2.842089984310496e-06, "loss": 10.5082, "step": 338920 }, { "epoch": 0.684660043552564, "grad_norm": 304.5423583984375, "learning_rate": 2.8417751064434596e-06, "loss": 15.9512, "step": 338930 }, { "epoch": 0.6846802441852479, "grad_norm": 185.18467712402344, "learning_rate": 2.8414602390953515e-06, "loss": 12.8051, "step": 338940 }, { "epoch": 0.6847004448179317, "grad_norm": 373.9627380371094, "learning_rate": 2.841145382267705e-06, "loss": 14.0647, "step": 338950 }, { "epoch": 0.6847206454506155, "grad_norm": 514.2100219726562, "learning_rate": 2.840830535962052e-06, "loss": 22.39, "step": 338960 }, { "epoch": 0.6847408460832993, "grad_norm": 87.05348205566406, "learning_rate": 2.840515700179929e-06, "loss": 16.3721, "step": 338970 }, { "epoch": 0.6847610467159831, "grad_norm": 238.4167938232422, "learning_rate": 2.8402008749228737e-06, "loss": 41.9951, "step": 338980 }, { "epoch": 0.684781247348667, "grad_norm": 205.0364532470703, "learning_rate": 2.8398860601924145e-06, "loss": 9.9563, "step": 338990 }, { "epoch": 0.6848014479813508, "grad_norm": 190.67532348632812, "learning_rate": 2.839571255990088e-06, "loss": 8.6869, "step": 339000 }, { "epoch": 0.6848216486140346, "grad_norm": 203.41053771972656, "learning_rate": 2.8392564623174314e-06, "loss": 20.405, "step": 339010 }, { "epoch": 0.6848418492467184, "grad_norm": 219.16696166992188, "learning_rate": 2.838941679175977e-06, "loss": 25.6745, "step": 339020 }, { "epoch": 0.6848620498794022, "grad_norm": 317.7598571777344, "learning_rate": 2.838626906567257e-06, "loss": 18.2399, "step": 339030 }, { "epoch": 0.6848822505120861, "grad_norm": 198.759765625, "learning_rate": 2.8383121444928063e-06, "loss": 11.1251, "step": 339040 }, { "epoch": 0.6849024511447699, "grad_norm": 415.86895751953125, "learning_rate": 2.837997392954165e-06, "loss": 16.222, "step": 339050 }, { "epoch": 0.6849226517774537, "grad_norm": 323.9634094238281, "learning_rate": 2.8376826519528572e-06, "loss": 12.5985, "step": 339060 }, { "epoch": 0.6849428524101375, "grad_norm": 302.14556884765625, "learning_rate": 2.8373679214904225e-06, "loss": 29.9102, "step": 339070 }, { "epoch": 0.6849630530428213, "grad_norm": 0.0, "learning_rate": 2.837053201568396e-06, "loss": 14.9801, "step": 339080 }, { "epoch": 0.6849832536755052, "grad_norm": 323.6229553222656, "learning_rate": 2.83673849218831e-06, "loss": 18.053, "step": 339090 }, { "epoch": 0.685003454308189, "grad_norm": 210.7010955810547, "learning_rate": 2.8364237933516964e-06, "loss": 12.6758, "step": 339100 }, { "epoch": 0.6850236549408727, "grad_norm": 432.0805358886719, "learning_rate": 2.836109105060093e-06, "loss": 19.2762, "step": 339110 }, { "epoch": 0.6850438555735565, "grad_norm": 50.94422912597656, "learning_rate": 2.8357944273150304e-06, "loss": 14.2385, "step": 339120 }, { "epoch": 0.6850640562062403, "grad_norm": 196.6386260986328, "learning_rate": 2.835479760118042e-06, "loss": 15.8631, "step": 339130 }, { "epoch": 0.6850842568389242, "grad_norm": 374.4574890136719, "learning_rate": 2.835165103470665e-06, "loss": 12.7144, "step": 339140 }, { "epoch": 0.685104457471608, "grad_norm": 150.11158752441406, "learning_rate": 2.8348504573744283e-06, "loss": 8.1272, "step": 339150 }, { "epoch": 0.6851246581042918, "grad_norm": 312.1383361816406, "learning_rate": 2.83453582183087e-06, "loss": 16.6361, "step": 339160 }, { "epoch": 0.6851448587369756, "grad_norm": 31.36189842224121, "learning_rate": 2.834221196841521e-06, "loss": 15.4187, "step": 339170 }, { "epoch": 0.6851650593696594, "grad_norm": 280.8974914550781, "learning_rate": 2.8339065824079137e-06, "loss": 13.3992, "step": 339180 }, { "epoch": 0.6851852600023433, "grad_norm": 8.085240364074707, "learning_rate": 2.8335919785315854e-06, "loss": 12.2096, "step": 339190 }, { "epoch": 0.6852054606350271, "grad_norm": 114.46437072753906, "learning_rate": 2.8332773852140644e-06, "loss": 12.708, "step": 339200 }, { "epoch": 0.6852256612677109, "grad_norm": 265.13116455078125, "learning_rate": 2.832962802456889e-06, "loss": 12.6, "step": 339210 }, { "epoch": 0.6852458619003947, "grad_norm": 209.31825256347656, "learning_rate": 2.8326482302615875e-06, "loss": 17.8758, "step": 339220 }, { "epoch": 0.6852660625330785, "grad_norm": 129.11228942871094, "learning_rate": 2.832333668629698e-06, "loss": 13.1146, "step": 339230 }, { "epoch": 0.6852862631657624, "grad_norm": 135.97164916992188, "learning_rate": 2.8320191175627517e-06, "loss": 13.9375, "step": 339240 }, { "epoch": 0.6853064637984462, "grad_norm": 411.833740234375, "learning_rate": 2.8317045770622784e-06, "loss": 20.5868, "step": 339250 }, { "epoch": 0.68532666443113, "grad_norm": 208.72357177734375, "learning_rate": 2.831390047129815e-06, "loss": 26.8529, "step": 339260 }, { "epoch": 0.6853468650638138, "grad_norm": 173.89260864257812, "learning_rate": 2.8310755277668966e-06, "loss": 17.9189, "step": 339270 }, { "epoch": 0.6853670656964976, "grad_norm": 125.65071105957031, "learning_rate": 2.8307610189750496e-06, "loss": 15.7048, "step": 339280 }, { "epoch": 0.6853872663291815, "grad_norm": 352.8778991699219, "learning_rate": 2.8304465207558103e-06, "loss": 21.5474, "step": 339290 }, { "epoch": 0.6854074669618653, "grad_norm": 377.7938537597656, "learning_rate": 2.830132033110713e-06, "loss": 21.0909, "step": 339300 }, { "epoch": 0.6854276675945491, "grad_norm": 314.4924011230469, "learning_rate": 2.829817556041289e-06, "loss": 12.7041, "step": 339310 }, { "epoch": 0.6854478682272329, "grad_norm": 514.864501953125, "learning_rate": 2.82950308954907e-06, "loss": 24.3611, "step": 339320 }, { "epoch": 0.6854680688599167, "grad_norm": 421.17535400390625, "learning_rate": 2.829188633635591e-06, "loss": 14.0805, "step": 339330 }, { "epoch": 0.6854882694926006, "grad_norm": 398.4611511230469, "learning_rate": 2.828874188302383e-06, "loss": 19.574, "step": 339340 }, { "epoch": 0.6855084701252844, "grad_norm": 229.5233154296875, "learning_rate": 2.8285597535509775e-06, "loss": 25.7486, "step": 339350 }, { "epoch": 0.6855286707579682, "grad_norm": 87.54146575927734, "learning_rate": 2.8282453293829083e-06, "loss": 11.795, "step": 339360 }, { "epoch": 0.6855488713906519, "grad_norm": 934.9027099609375, "learning_rate": 2.8279309157997093e-06, "loss": 29.5969, "step": 339370 }, { "epoch": 0.6855690720233357, "grad_norm": 474.83758544921875, "learning_rate": 2.827616512802912e-06, "loss": 13.5883, "step": 339380 }, { "epoch": 0.6855892726560195, "grad_norm": 317.31536865234375, "learning_rate": 2.8273021203940466e-06, "loss": 20.6695, "step": 339390 }, { "epoch": 0.6856094732887034, "grad_norm": 491.9213562011719, "learning_rate": 2.826987738574649e-06, "loss": 14.8892, "step": 339400 }, { "epoch": 0.6856296739213872, "grad_norm": 313.0912170410156, "learning_rate": 2.8266733673462497e-06, "loss": 17.0046, "step": 339410 }, { "epoch": 0.685649874554071, "grad_norm": 9.914559364318848, "learning_rate": 2.8263590067103785e-06, "loss": 24.5255, "step": 339420 }, { "epoch": 0.6856700751867548, "grad_norm": 208.270751953125, "learning_rate": 2.8260446566685723e-06, "loss": 14.9028, "step": 339430 }, { "epoch": 0.6856902758194386, "grad_norm": 498.1207580566406, "learning_rate": 2.825730317222358e-06, "loss": 15.6302, "step": 339440 }, { "epoch": 0.6857104764521225, "grad_norm": 259.03045654296875, "learning_rate": 2.8254159883732735e-06, "loss": 13.9694, "step": 339450 }, { "epoch": 0.6857306770848063, "grad_norm": 372.5885925292969, "learning_rate": 2.8251016701228475e-06, "loss": 16.3373, "step": 339460 }, { "epoch": 0.6857508777174901, "grad_norm": 202.61692810058594, "learning_rate": 2.82478736247261e-06, "loss": 19.6051, "step": 339470 }, { "epoch": 0.6857710783501739, "grad_norm": 195.3619384765625, "learning_rate": 2.824473065424096e-06, "loss": 25.686, "step": 339480 }, { "epoch": 0.6857912789828577, "grad_norm": 59.01321029663086, "learning_rate": 2.824158778978838e-06, "loss": 25.8505, "step": 339490 }, { "epoch": 0.6858114796155416, "grad_norm": 138.91726684570312, "learning_rate": 2.8238445031383634e-06, "loss": 17.5435, "step": 339500 }, { "epoch": 0.6858316802482254, "grad_norm": 216.23086547851562, "learning_rate": 2.823530237904207e-06, "loss": 22.8989, "step": 339510 }, { "epoch": 0.6858518808809092, "grad_norm": 341.4381103515625, "learning_rate": 2.8232159832779018e-06, "loss": 16.6567, "step": 339520 }, { "epoch": 0.685872081513593, "grad_norm": 299.5925598144531, "learning_rate": 2.8229017392609782e-06, "loss": 16.5859, "step": 339530 }, { "epoch": 0.6858922821462768, "grad_norm": 289.94622802734375, "learning_rate": 2.8225875058549656e-06, "loss": 10.6966, "step": 339540 }, { "epoch": 0.6859124827789607, "grad_norm": 223.54388427734375, "learning_rate": 2.8222732830613995e-06, "loss": 12.4171, "step": 339550 }, { "epoch": 0.6859326834116445, "grad_norm": 438.7062683105469, "learning_rate": 2.821959070881809e-06, "loss": 12.2003, "step": 339560 }, { "epoch": 0.6859528840443283, "grad_norm": 109.360595703125, "learning_rate": 2.821644869317724e-06, "loss": 15.0864, "step": 339570 }, { "epoch": 0.6859730846770121, "grad_norm": 706.90966796875, "learning_rate": 2.8213306783706774e-06, "loss": 26.0983, "step": 339580 }, { "epoch": 0.6859932853096959, "grad_norm": 226.94696044921875, "learning_rate": 2.821016498042205e-06, "loss": 15.9367, "step": 339590 }, { "epoch": 0.6860134859423798, "grad_norm": 42.13834762573242, "learning_rate": 2.8207023283338304e-06, "loss": 21.0953, "step": 339600 }, { "epoch": 0.6860336865750636, "grad_norm": 147.52127075195312, "learning_rate": 2.820388169247088e-06, "loss": 9.54, "step": 339610 }, { "epoch": 0.6860538872077473, "grad_norm": 408.3017883300781, "learning_rate": 2.820074020783511e-06, "loss": 14.3117, "step": 339620 }, { "epoch": 0.6860740878404311, "grad_norm": 58.523048400878906, "learning_rate": 2.8197598829446294e-06, "loss": 13.0691, "step": 339630 }, { "epoch": 0.6860942884731149, "grad_norm": 88.6180191040039, "learning_rate": 2.819445755731971e-06, "loss": 11.194, "step": 339640 }, { "epoch": 0.6861144891057988, "grad_norm": 232.06723022460938, "learning_rate": 2.8191316391470703e-06, "loss": 15.3004, "step": 339650 }, { "epoch": 0.6861346897384826, "grad_norm": 556.272705078125, "learning_rate": 2.8188175331914608e-06, "loss": 30.7243, "step": 339660 }, { "epoch": 0.6861548903711664, "grad_norm": 167.112060546875, "learning_rate": 2.8185034378666666e-06, "loss": 47.7307, "step": 339670 }, { "epoch": 0.6861750910038502, "grad_norm": 223.5148468017578, "learning_rate": 2.818189353174221e-06, "loss": 10.7417, "step": 339680 }, { "epoch": 0.686195291636534, "grad_norm": 207.59060668945312, "learning_rate": 2.8178752791156593e-06, "loss": 15.3082, "step": 339690 }, { "epoch": 0.6862154922692179, "grad_norm": 264.2839050292969, "learning_rate": 2.8175612156925082e-06, "loss": 22.6776, "step": 339700 }, { "epoch": 0.6862356929019017, "grad_norm": 290.211669921875, "learning_rate": 2.817247162906297e-06, "loss": 11.4765, "step": 339710 }, { "epoch": 0.6862558935345855, "grad_norm": 340.8152770996094, "learning_rate": 2.8169331207585603e-06, "loss": 17.8053, "step": 339720 }, { "epoch": 0.6862760941672693, "grad_norm": 340.97991943359375, "learning_rate": 2.816619089250827e-06, "loss": 20.6355, "step": 339730 }, { "epoch": 0.6862962947999531, "grad_norm": 415.151123046875, "learning_rate": 2.8163050683846256e-06, "loss": 19.8931, "step": 339740 }, { "epoch": 0.686316495432637, "grad_norm": 144.4873504638672, "learning_rate": 2.8159910581614904e-06, "loss": 18.6926, "step": 339750 }, { "epoch": 0.6863366960653208, "grad_norm": 568.6773071289062, "learning_rate": 2.8156770585829475e-06, "loss": 22.8486, "step": 339760 }, { "epoch": 0.6863568966980046, "grad_norm": 363.1664733886719, "learning_rate": 2.815363069650532e-06, "loss": 9.0003, "step": 339770 }, { "epoch": 0.6863770973306884, "grad_norm": 419.908447265625, "learning_rate": 2.8150490913657713e-06, "loss": 22.3024, "step": 339780 }, { "epoch": 0.6863972979633722, "grad_norm": 331.9164123535156, "learning_rate": 2.8147351237301957e-06, "loss": 22.4161, "step": 339790 }, { "epoch": 0.686417498596056, "grad_norm": 396.203125, "learning_rate": 2.814421166745337e-06, "loss": 15.8826, "step": 339800 }, { "epoch": 0.6864376992287399, "grad_norm": 124.1891860961914, "learning_rate": 2.814107220412723e-06, "loss": 22.3197, "step": 339810 }, { "epoch": 0.6864578998614237, "grad_norm": 33.2392692565918, "learning_rate": 2.8137932847338866e-06, "loss": 7.9459, "step": 339820 }, { "epoch": 0.6864781004941075, "grad_norm": 294.9792785644531, "learning_rate": 2.813479359710355e-06, "loss": 14.1076, "step": 339830 }, { "epoch": 0.6864983011267913, "grad_norm": 330.5185546875, "learning_rate": 2.813165445343662e-06, "loss": 25.5452, "step": 339840 }, { "epoch": 0.6865185017594752, "grad_norm": 395.9746398925781, "learning_rate": 2.8128515416353345e-06, "loss": 23.7983, "step": 339850 }, { "epoch": 0.686538702392159, "grad_norm": 625.8849487304688, "learning_rate": 2.8125376485869023e-06, "loss": 17.5696, "step": 339860 }, { "epoch": 0.6865589030248428, "grad_norm": 173.73037719726562, "learning_rate": 2.812223766199898e-06, "loss": 18.5959, "step": 339870 }, { "epoch": 0.6865791036575265, "grad_norm": 171.08273315429688, "learning_rate": 2.8119098944758494e-06, "loss": 11.126, "step": 339880 }, { "epoch": 0.6865993042902103, "grad_norm": 368.9931945800781, "learning_rate": 2.811596033416285e-06, "loss": 11.4446, "step": 339890 }, { "epoch": 0.6866195049228941, "grad_norm": 168.0408172607422, "learning_rate": 2.811282183022736e-06, "loss": 19.7777, "step": 339900 }, { "epoch": 0.686639705555578, "grad_norm": 314.8152770996094, "learning_rate": 2.8109683432967346e-06, "loss": 29.7186, "step": 339910 }, { "epoch": 0.6866599061882618, "grad_norm": 387.86187744140625, "learning_rate": 2.8106545142398073e-06, "loss": 33.1137, "step": 339920 }, { "epoch": 0.6866801068209456, "grad_norm": 106.47785186767578, "learning_rate": 2.810340695853483e-06, "loss": 13.4603, "step": 339930 }, { "epoch": 0.6867003074536294, "grad_norm": 127.79004669189453, "learning_rate": 2.810026888139294e-06, "loss": 12.033, "step": 339940 }, { "epoch": 0.6867205080863132, "grad_norm": 144.76902770996094, "learning_rate": 2.809713091098768e-06, "loss": 29.4184, "step": 339950 }, { "epoch": 0.6867407087189971, "grad_norm": 373.94921875, "learning_rate": 2.8093993047334333e-06, "loss": 14.7346, "step": 339960 }, { "epoch": 0.6867609093516809, "grad_norm": 141.96832275390625, "learning_rate": 2.809085529044821e-06, "loss": 23.0856, "step": 339970 }, { "epoch": 0.6867811099843647, "grad_norm": 616.0906982421875, "learning_rate": 2.808771764034462e-06, "loss": 12.1633, "step": 339980 }, { "epoch": 0.6868013106170485, "grad_norm": 19.50041389465332, "learning_rate": 2.8084580097038834e-06, "loss": 18.9119, "step": 339990 }, { "epoch": 0.6868215112497323, "grad_norm": 9.78260326385498, "learning_rate": 2.8081442660546126e-06, "loss": 15.1902, "step": 340000 }, { "epoch": 0.6868417118824162, "grad_norm": 246.78842163085938, "learning_rate": 2.8078305330881826e-06, "loss": 25.347, "step": 340010 }, { "epoch": 0.6868619125151, "grad_norm": 269.1762390136719, "learning_rate": 2.8075168108061213e-06, "loss": 14.6162, "step": 340020 }, { "epoch": 0.6868821131477838, "grad_norm": 345.9813537597656, "learning_rate": 2.8072030992099552e-06, "loss": 15.4148, "step": 340030 }, { "epoch": 0.6869023137804676, "grad_norm": 748.6514282226562, "learning_rate": 2.806889398301217e-06, "loss": 26.6646, "step": 340040 }, { "epoch": 0.6869225144131514, "grad_norm": 275.6638488769531, "learning_rate": 2.8065757080814315e-06, "loss": 14.4994, "step": 340050 }, { "epoch": 0.6869427150458353, "grad_norm": 196.80052185058594, "learning_rate": 2.8062620285521325e-06, "loss": 17.4936, "step": 340060 }, { "epoch": 0.6869629156785191, "grad_norm": 402.67266845703125, "learning_rate": 2.8059483597148457e-06, "loss": 16.3139, "step": 340070 }, { "epoch": 0.6869831163112029, "grad_norm": 479.2933044433594, "learning_rate": 2.8056347015710987e-06, "loss": 24.0266, "step": 340080 }, { "epoch": 0.6870033169438867, "grad_norm": 335.16510009765625, "learning_rate": 2.805321054122424e-06, "loss": 26.7442, "step": 340090 }, { "epoch": 0.6870235175765705, "grad_norm": 135.63587951660156, "learning_rate": 2.805007417370347e-06, "loss": 11.6539, "step": 340100 }, { "epoch": 0.6870437182092544, "grad_norm": 304.1763610839844, "learning_rate": 2.804693791316399e-06, "loss": 17.7206, "step": 340110 }, { "epoch": 0.6870639188419382, "grad_norm": 216.4729766845703, "learning_rate": 2.8043801759621053e-06, "loss": 16.3793, "step": 340120 }, { "epoch": 0.6870841194746219, "grad_norm": 1090.58056640625, "learning_rate": 2.804066571308998e-06, "loss": 20.4393, "step": 340130 }, { "epoch": 0.6871043201073057, "grad_norm": 293.1880798339844, "learning_rate": 2.8037529773586047e-06, "loss": 16.1893, "step": 340140 }, { "epoch": 0.6871245207399895, "grad_norm": 264.0450134277344, "learning_rate": 2.8034393941124505e-06, "loss": 11.2531, "step": 340150 }, { "epoch": 0.6871447213726734, "grad_norm": 23.019031524658203, "learning_rate": 2.803125821572068e-06, "loss": 18.1912, "step": 340160 }, { "epoch": 0.6871649220053572, "grad_norm": 67.81073760986328, "learning_rate": 2.802812259738984e-06, "loss": 16.3042, "step": 340170 }, { "epoch": 0.687185122638041, "grad_norm": 1.3191018104553223, "learning_rate": 2.8024987086147247e-06, "loss": 15.1347, "step": 340180 }, { "epoch": 0.6872053232707248, "grad_norm": 263.4264831542969, "learning_rate": 2.8021851682008205e-06, "loss": 21.1191, "step": 340190 }, { "epoch": 0.6872255239034086, "grad_norm": 427.7806701660156, "learning_rate": 2.8018716384988034e-06, "loss": 13.2316, "step": 340200 }, { "epoch": 0.6872457245360925, "grad_norm": 338.65081787109375, "learning_rate": 2.8015581195101927e-06, "loss": 24.9303, "step": 340210 }, { "epoch": 0.6872659251687763, "grad_norm": 209.65223693847656, "learning_rate": 2.801244611236521e-06, "loss": 10.7763, "step": 340220 }, { "epoch": 0.6872861258014601, "grad_norm": 91.00494384765625, "learning_rate": 2.800931113679318e-06, "loss": 13.4644, "step": 340230 }, { "epoch": 0.6873063264341439, "grad_norm": 299.9567565917969, "learning_rate": 2.8006176268401107e-06, "loss": 15.7832, "step": 340240 }, { "epoch": 0.6873265270668277, "grad_norm": 559.4628295898438, "learning_rate": 2.800304150720424e-06, "loss": 17.1158, "step": 340250 }, { "epoch": 0.6873467276995116, "grad_norm": 274.56134033203125, "learning_rate": 2.7999906853217885e-06, "loss": 22.0728, "step": 340260 }, { "epoch": 0.6873669283321954, "grad_norm": 210.6419219970703, "learning_rate": 2.7996772306457354e-06, "loss": 12.9209, "step": 340270 }, { "epoch": 0.6873871289648792, "grad_norm": 300.4085388183594, "learning_rate": 2.799363786693785e-06, "loss": 34.8599, "step": 340280 }, { "epoch": 0.687407329597563, "grad_norm": 277.74017333984375, "learning_rate": 2.7990503534674684e-06, "loss": 16.9735, "step": 340290 }, { "epoch": 0.6874275302302468, "grad_norm": 306.0054931640625, "learning_rate": 2.798736930968315e-06, "loss": 19.7926, "step": 340300 }, { "epoch": 0.6874477308629307, "grad_norm": 162.1536407470703, "learning_rate": 2.798423519197851e-06, "loss": 12.5338, "step": 340310 }, { "epoch": 0.6874679314956145, "grad_norm": 17.60430908203125, "learning_rate": 2.798110118157602e-06, "loss": 17.2728, "step": 340320 }, { "epoch": 0.6874881321282983, "grad_norm": 293.7109069824219, "learning_rate": 2.797796727849099e-06, "loss": 14.5096, "step": 340330 }, { "epoch": 0.6875083327609821, "grad_norm": 7.160959243774414, "learning_rate": 2.7974833482738674e-06, "loss": 10.1006, "step": 340340 }, { "epoch": 0.6875285333936659, "grad_norm": 259.8037109375, "learning_rate": 2.7971699794334332e-06, "loss": 24.6745, "step": 340350 }, { "epoch": 0.6875487340263498, "grad_norm": 352.8831787109375, "learning_rate": 2.7968566213293276e-06, "loss": 26.1246, "step": 340360 }, { "epoch": 0.6875689346590336, "grad_norm": 259.9747619628906, "learning_rate": 2.796543273963073e-06, "loss": 15.3169, "step": 340370 }, { "epoch": 0.6875891352917174, "grad_norm": 510.27716064453125, "learning_rate": 2.796229937336202e-06, "loss": 26.9268, "step": 340380 }, { "epoch": 0.6876093359244011, "grad_norm": 197.9273223876953, "learning_rate": 2.795916611450238e-06, "loss": 12.3185, "step": 340390 }, { "epoch": 0.6876295365570849, "grad_norm": 141.8062744140625, "learning_rate": 2.795603296306708e-06, "loss": 15.2211, "step": 340400 }, { "epoch": 0.6876497371897687, "grad_norm": 82.5296859741211, "learning_rate": 2.7952899919071417e-06, "loss": 25.8656, "step": 340410 }, { "epoch": 0.6876699378224526, "grad_norm": 271.29571533203125, "learning_rate": 2.7949766982530624e-06, "loss": 9.0762, "step": 340420 }, { "epoch": 0.6876901384551364, "grad_norm": 419.59063720703125, "learning_rate": 2.7946634153460016e-06, "loss": 18.7889, "step": 340430 }, { "epoch": 0.6877103390878202, "grad_norm": 301.0346984863281, "learning_rate": 2.794350143187482e-06, "loss": 23.3221, "step": 340440 }, { "epoch": 0.687730539720504, "grad_norm": 493.9353332519531, "learning_rate": 2.794036881779034e-06, "loss": 22.6649, "step": 340450 }, { "epoch": 0.6877507403531878, "grad_norm": 248.61807250976562, "learning_rate": 2.7937236311221827e-06, "loss": 19.1316, "step": 340460 }, { "epoch": 0.6877709409858717, "grad_norm": 491.20648193359375, "learning_rate": 2.793410391218453e-06, "loss": 22.3717, "step": 340470 }, { "epoch": 0.6877911416185555, "grad_norm": 135.40472412109375, "learning_rate": 2.7930971620693746e-06, "loss": 14.5522, "step": 340480 }, { "epoch": 0.6878113422512393, "grad_norm": 284.3876647949219, "learning_rate": 2.792783943676474e-06, "loss": 21.7135, "step": 340490 }, { "epoch": 0.6878315428839231, "grad_norm": 135.27139282226562, "learning_rate": 2.7924707360412743e-06, "loss": 22.78, "step": 340500 }, { "epoch": 0.6878517435166069, "grad_norm": 829.8208618164062, "learning_rate": 2.7921575391653048e-06, "loss": 27.8397, "step": 340510 }, { "epoch": 0.6878719441492908, "grad_norm": 6173.36083984375, "learning_rate": 2.791844353050094e-06, "loss": 57.659, "step": 340520 }, { "epoch": 0.6878921447819746, "grad_norm": 530.3958129882812, "learning_rate": 2.7915311776971655e-06, "loss": 15.6974, "step": 340530 }, { "epoch": 0.6879123454146584, "grad_norm": 322.80291748046875, "learning_rate": 2.7912180131080434e-06, "loss": 19.7468, "step": 340540 }, { "epoch": 0.6879325460473422, "grad_norm": 489.6497497558594, "learning_rate": 2.7909048592842602e-06, "loss": 17.3272, "step": 340550 }, { "epoch": 0.687952746680026, "grad_norm": 386.1725769042969, "learning_rate": 2.7905917162273377e-06, "loss": 12.0283, "step": 340560 }, { "epoch": 0.6879729473127099, "grad_norm": 27.580801010131836, "learning_rate": 2.790278583938802e-06, "loss": 9.537, "step": 340570 }, { "epoch": 0.6879931479453937, "grad_norm": 30.262462615966797, "learning_rate": 2.78996546242018e-06, "loss": 13.5578, "step": 340580 }, { "epoch": 0.6880133485780775, "grad_norm": 192.761962890625, "learning_rate": 2.7896523516730005e-06, "loss": 10.3436, "step": 340590 }, { "epoch": 0.6880335492107613, "grad_norm": 343.3981018066406, "learning_rate": 2.7893392516987873e-06, "loss": 11.4366, "step": 340600 }, { "epoch": 0.6880537498434451, "grad_norm": 429.6634826660156, "learning_rate": 2.7890261624990643e-06, "loss": 9.5331, "step": 340610 }, { "epoch": 0.688073950476129, "grad_norm": 294.7521057128906, "learning_rate": 2.788713084075362e-06, "loss": 12.402, "step": 340620 }, { "epoch": 0.6880941511088128, "grad_norm": 483.3129577636719, "learning_rate": 2.7884000164292034e-06, "loss": 12.084, "step": 340630 }, { "epoch": 0.6881143517414965, "grad_norm": 212.83811950683594, "learning_rate": 2.7880869595621134e-06, "loss": 18.6208, "step": 340640 }, { "epoch": 0.6881345523741803, "grad_norm": 194.93597412109375, "learning_rate": 2.787773913475621e-06, "loss": 9.7537, "step": 340650 }, { "epoch": 0.6881547530068641, "grad_norm": 228.0302276611328, "learning_rate": 2.7874608781712486e-06, "loss": 23.0566, "step": 340660 }, { "epoch": 0.688174953639548, "grad_norm": 602.1693725585938, "learning_rate": 2.7871478536505253e-06, "loss": 27.3996, "step": 340670 }, { "epoch": 0.6881951542722318, "grad_norm": 294.9978332519531, "learning_rate": 2.7868348399149747e-06, "loss": 23.4534, "step": 340680 }, { "epoch": 0.6882153549049156, "grad_norm": 148.57330322265625, "learning_rate": 2.786521836966121e-06, "loss": 19.2814, "step": 340690 }, { "epoch": 0.6882355555375994, "grad_norm": 186.44735717773438, "learning_rate": 2.7862088448054936e-06, "loss": 18.1762, "step": 340700 }, { "epoch": 0.6882557561702832, "grad_norm": 60.999366760253906, "learning_rate": 2.7858958634346132e-06, "loss": 16.5648, "step": 340710 }, { "epoch": 0.688275956802967, "grad_norm": 181.47361755371094, "learning_rate": 2.78558289285501e-06, "loss": 12.396, "step": 340720 }, { "epoch": 0.6882961574356509, "grad_norm": 923.2548828125, "learning_rate": 2.7852699330682056e-06, "loss": 16.8675, "step": 340730 }, { "epoch": 0.6883163580683347, "grad_norm": 303.1159973144531, "learning_rate": 2.7849569840757284e-06, "loss": 22.5029, "step": 340740 }, { "epoch": 0.6883365587010185, "grad_norm": 210.6165771484375, "learning_rate": 2.7846440458791024e-06, "loss": 22.205, "step": 340750 }, { "epoch": 0.6883567593337023, "grad_norm": 213.90054321289062, "learning_rate": 2.784331118479851e-06, "loss": 16.1712, "step": 340760 }, { "epoch": 0.6883769599663861, "grad_norm": 209.6103057861328, "learning_rate": 2.7840182018795025e-06, "loss": 10.9281, "step": 340770 }, { "epoch": 0.68839716059907, "grad_norm": 893.9132080078125, "learning_rate": 2.7837052960795807e-06, "loss": 26.5943, "step": 340780 }, { "epoch": 0.6884173612317538, "grad_norm": 169.06317138671875, "learning_rate": 2.783392401081609e-06, "loss": 17.5806, "step": 340790 }, { "epoch": 0.6884375618644376, "grad_norm": 303.6059265136719, "learning_rate": 2.7830795168871127e-06, "loss": 14.8916, "step": 340800 }, { "epoch": 0.6884577624971214, "grad_norm": 169.93405151367188, "learning_rate": 2.782766643497623e-06, "loss": 12.5936, "step": 340810 }, { "epoch": 0.6884779631298052, "grad_norm": 425.8782043457031, "learning_rate": 2.7824537809146555e-06, "loss": 15.654, "step": 340820 }, { "epoch": 0.6884981637624891, "grad_norm": 184.14013671875, "learning_rate": 2.7821409291397394e-06, "loss": 15.5141, "step": 340830 }, { "epoch": 0.6885183643951729, "grad_norm": 441.58184814453125, "learning_rate": 2.7818280881744007e-06, "loss": 20.81, "step": 340840 }, { "epoch": 0.6885385650278567, "grad_norm": 355.8289489746094, "learning_rate": 2.7815152580201637e-06, "loss": 19.2715, "step": 340850 }, { "epoch": 0.6885587656605405, "grad_norm": 63.84025573730469, "learning_rate": 2.7812024386785495e-06, "loss": 15.7396, "step": 340860 }, { "epoch": 0.6885789662932243, "grad_norm": 35.13255310058594, "learning_rate": 2.7808896301510867e-06, "loss": 13.2578, "step": 340870 }, { "epoch": 0.6885991669259082, "grad_norm": 789.5782470703125, "learning_rate": 2.7805768324393017e-06, "loss": 24.5819, "step": 340880 }, { "epoch": 0.688619367558592, "grad_norm": 28.122760772705078, "learning_rate": 2.7802640455447123e-06, "loss": 9.1132, "step": 340890 }, { "epoch": 0.6886395681912757, "grad_norm": 255.1892852783203, "learning_rate": 2.779951269468847e-06, "loss": 11.7301, "step": 340900 }, { "epoch": 0.6886597688239595, "grad_norm": 170.89015197753906, "learning_rate": 2.779638504213231e-06, "loss": 9.4827, "step": 340910 }, { "epoch": 0.6886799694566433, "grad_norm": 22.775035858154297, "learning_rate": 2.7793257497793892e-06, "loss": 11.2175, "step": 340920 }, { "epoch": 0.6887001700893272, "grad_norm": 292.677490234375, "learning_rate": 2.7790130061688416e-06, "loss": 12.2483, "step": 340930 }, { "epoch": 0.688720370722011, "grad_norm": 276.52117919921875, "learning_rate": 2.7787002733831166e-06, "loss": 18.2052, "step": 340940 }, { "epoch": 0.6887405713546948, "grad_norm": 137.6195526123047, "learning_rate": 2.7783875514237373e-06, "loss": 14.0167, "step": 340950 }, { "epoch": 0.6887607719873786, "grad_norm": 55.18342208862305, "learning_rate": 2.7780748402922263e-06, "loss": 9.3758, "step": 340960 }, { "epoch": 0.6887809726200624, "grad_norm": 312.9708251953125, "learning_rate": 2.77776213999011e-06, "loss": 24.9783, "step": 340970 }, { "epoch": 0.6888011732527463, "grad_norm": 46.63174819946289, "learning_rate": 2.777449450518911e-06, "loss": 14.5769, "step": 340980 }, { "epoch": 0.6888213738854301, "grad_norm": 3.8797006607055664, "learning_rate": 2.7771367718801546e-06, "loss": 10.2696, "step": 340990 }, { "epoch": 0.6888415745181139, "grad_norm": 583.0717163085938, "learning_rate": 2.776824104075364e-06, "loss": 25.7372, "step": 341000 }, { "epoch": 0.6888617751507977, "grad_norm": 0.0, "learning_rate": 2.776511447106062e-06, "loss": 4.5691, "step": 341010 }, { "epoch": 0.6888819757834815, "grad_norm": 416.3438415527344, "learning_rate": 2.7761988009737746e-06, "loss": 18.1692, "step": 341020 }, { "epoch": 0.6889021764161654, "grad_norm": 84.36569213867188, "learning_rate": 2.7758861656800227e-06, "loss": 10.1953, "step": 341030 }, { "epoch": 0.6889223770488492, "grad_norm": 283.19720458984375, "learning_rate": 2.775573541226334e-06, "loss": 14.4463, "step": 341040 }, { "epoch": 0.688942577681533, "grad_norm": 318.64886474609375, "learning_rate": 2.7752609276142282e-06, "loss": 16.6373, "step": 341050 }, { "epoch": 0.6889627783142168, "grad_norm": 208.3494873046875, "learning_rate": 2.7749483248452324e-06, "loss": 14.839, "step": 341060 }, { "epoch": 0.6889829789469006, "grad_norm": 180.2059783935547, "learning_rate": 2.7746357329208693e-06, "loss": 9.0927, "step": 341070 }, { "epoch": 0.6890031795795845, "grad_norm": 82.21674346923828, "learning_rate": 2.7743231518426594e-06, "loss": 25.1301, "step": 341080 }, { "epoch": 0.6890233802122683, "grad_norm": 264.3471984863281, "learning_rate": 2.7740105816121306e-06, "loss": 19.2503, "step": 341090 }, { "epoch": 0.6890435808449521, "grad_norm": 177.59988403320312, "learning_rate": 2.7736980222308042e-06, "loss": 15.8212, "step": 341100 }, { "epoch": 0.6890637814776359, "grad_norm": 0.0, "learning_rate": 2.773385473700201e-06, "loss": 23.2588, "step": 341110 }, { "epoch": 0.6890839821103197, "grad_norm": 260.74603271484375, "learning_rate": 2.7730729360218478e-06, "loss": 19.9589, "step": 341120 }, { "epoch": 0.6891041827430036, "grad_norm": 155.02992248535156, "learning_rate": 2.7727604091972687e-06, "loss": 11.0473, "step": 341130 }, { "epoch": 0.6891243833756874, "grad_norm": 206.4021759033203, "learning_rate": 2.772447893227985e-06, "loss": 14.4769, "step": 341140 }, { "epoch": 0.6891445840083712, "grad_norm": 285.6437072753906, "learning_rate": 2.772135388115519e-06, "loss": 14.7653, "step": 341150 }, { "epoch": 0.6891647846410549, "grad_norm": 274.6597595214844, "learning_rate": 2.7718228938613955e-06, "loss": 8.2765, "step": 341160 }, { "epoch": 0.6891849852737387, "grad_norm": 248.26171875, "learning_rate": 2.7715104104671377e-06, "loss": 21.5744, "step": 341170 }, { "epoch": 0.6892051859064225, "grad_norm": 174.59637451171875, "learning_rate": 2.7711979379342658e-06, "loss": 21.8561, "step": 341180 }, { "epoch": 0.6892253865391064, "grad_norm": 461.5825500488281, "learning_rate": 2.7708854762643055e-06, "loss": 23.8102, "step": 341190 }, { "epoch": 0.6892455871717902, "grad_norm": 318.005615234375, "learning_rate": 2.7705730254587802e-06, "loss": 35.0691, "step": 341200 }, { "epoch": 0.689265787804474, "grad_norm": 31.433475494384766, "learning_rate": 2.770260585519212e-06, "loss": 15.677, "step": 341210 }, { "epoch": 0.6892859884371578, "grad_norm": 168.8619384765625, "learning_rate": 2.769948156447121e-06, "loss": 12.831, "step": 341220 }, { "epoch": 0.6893061890698416, "grad_norm": 441.2991027832031, "learning_rate": 2.7696357382440344e-06, "loss": 18.5693, "step": 341230 }, { "epoch": 0.6893263897025255, "grad_norm": 188.9196014404297, "learning_rate": 2.769323330911472e-06, "loss": 16.6694, "step": 341240 }, { "epoch": 0.6893465903352093, "grad_norm": 436.6351318359375, "learning_rate": 2.7690109344509563e-06, "loss": 13.7446, "step": 341250 }, { "epoch": 0.6893667909678931, "grad_norm": 410.3186340332031, "learning_rate": 2.768698548864012e-06, "loss": 12.834, "step": 341260 }, { "epoch": 0.6893869916005769, "grad_norm": 0.0, "learning_rate": 2.768386174152159e-06, "loss": 16.0137, "step": 341270 }, { "epoch": 0.6894071922332607, "grad_norm": 302.7804870605469, "learning_rate": 2.7680738103169223e-06, "loss": 23.9539, "step": 341280 }, { "epoch": 0.6894273928659446, "grad_norm": 189.09552001953125, "learning_rate": 2.7677614573598232e-06, "loss": 19.8784, "step": 341290 }, { "epoch": 0.6894475934986284, "grad_norm": 103.01606750488281, "learning_rate": 2.7674491152823825e-06, "loss": 12.8659, "step": 341300 }, { "epoch": 0.6894677941313122, "grad_norm": 225.367431640625, "learning_rate": 2.7671367840861256e-06, "loss": 13.1195, "step": 341310 }, { "epoch": 0.689487994763996, "grad_norm": 260.82989501953125, "learning_rate": 2.766824463772572e-06, "loss": 48.0352, "step": 341320 }, { "epoch": 0.6895081953966798, "grad_norm": 185.74270629882812, "learning_rate": 2.766512154343246e-06, "loss": 16.0125, "step": 341330 }, { "epoch": 0.6895283960293637, "grad_norm": 55.44172668457031, "learning_rate": 2.766199855799667e-06, "loss": 10.5074, "step": 341340 }, { "epoch": 0.6895485966620475, "grad_norm": 421.7201843261719, "learning_rate": 2.765887568143362e-06, "loss": 20.1338, "step": 341350 }, { "epoch": 0.6895687972947313, "grad_norm": 432.34063720703125, "learning_rate": 2.7655752913758494e-06, "loss": 23.0531, "step": 341360 }, { "epoch": 0.6895889979274151, "grad_norm": 242.3428497314453, "learning_rate": 2.76526302549865e-06, "loss": 21.3343, "step": 341370 }, { "epoch": 0.689609198560099, "grad_norm": 126.8756332397461, "learning_rate": 2.7649507705132894e-06, "loss": 14.3651, "step": 341380 }, { "epoch": 0.6896293991927828, "grad_norm": 316.8723449707031, "learning_rate": 2.764638526421287e-06, "loss": 15.0102, "step": 341390 }, { "epoch": 0.6896495998254666, "grad_norm": 227.7664794921875, "learning_rate": 2.7643262932241642e-06, "loss": 15.8536, "step": 341400 }, { "epoch": 0.6896698004581503, "grad_norm": 336.2080383300781, "learning_rate": 2.7640140709234444e-06, "loss": 19.3386, "step": 341410 }, { "epoch": 0.6896900010908341, "grad_norm": 229.47789001464844, "learning_rate": 2.763701859520652e-06, "loss": 20.1041, "step": 341420 }, { "epoch": 0.6897102017235179, "grad_norm": 209.94549560546875, "learning_rate": 2.7633896590173014e-06, "loss": 12.1767, "step": 341430 }, { "epoch": 0.6897304023562018, "grad_norm": 3.371816396713257, "learning_rate": 2.763077469414919e-06, "loss": 13.3829, "step": 341440 }, { "epoch": 0.6897506029888856, "grad_norm": 157.7925567626953, "learning_rate": 2.7627652907150272e-06, "loss": 17.0306, "step": 341450 }, { "epoch": 0.6897708036215694, "grad_norm": 341.9382019042969, "learning_rate": 2.7624531229191453e-06, "loss": 14.4435, "step": 341460 }, { "epoch": 0.6897910042542532, "grad_norm": 147.28314208984375, "learning_rate": 2.7621409660287944e-06, "loss": 17.5242, "step": 341470 }, { "epoch": 0.689811204886937, "grad_norm": 522.7060546875, "learning_rate": 2.7618288200454966e-06, "loss": 18.4653, "step": 341480 }, { "epoch": 0.6898314055196209, "grad_norm": 444.67919921875, "learning_rate": 2.7615166849707786e-06, "loss": 18.711, "step": 341490 }, { "epoch": 0.6898516061523047, "grad_norm": 549.37646484375, "learning_rate": 2.761204560806152e-06, "loss": 29.2091, "step": 341500 }, { "epoch": 0.6898718067849885, "grad_norm": 0.0, "learning_rate": 2.760892447553143e-06, "loss": 9.6891, "step": 341510 }, { "epoch": 0.6898920074176723, "grad_norm": 194.48849487304688, "learning_rate": 2.7605803452132753e-06, "loss": 15.7047, "step": 341520 }, { "epoch": 0.6899122080503561, "grad_norm": 86.70940399169922, "learning_rate": 2.7602682537880663e-06, "loss": 9.3729, "step": 341530 }, { "epoch": 0.68993240868304, "grad_norm": 394.42193603515625, "learning_rate": 2.7599561732790364e-06, "loss": 12.3349, "step": 341540 }, { "epoch": 0.6899526093157238, "grad_norm": 259.52423095703125, "learning_rate": 2.759644103687711e-06, "loss": 20.2, "step": 341550 }, { "epoch": 0.6899728099484076, "grad_norm": 0.0, "learning_rate": 2.759332045015608e-06, "loss": 16.2145, "step": 341560 }, { "epoch": 0.6899930105810914, "grad_norm": 237.67442321777344, "learning_rate": 2.759019997264247e-06, "loss": 15.8497, "step": 341570 }, { "epoch": 0.6900132112137752, "grad_norm": 768.954833984375, "learning_rate": 2.758707960435153e-06, "loss": 26.1659, "step": 341580 }, { "epoch": 0.690033411846459, "grad_norm": 135.27297973632812, "learning_rate": 2.7583959345298416e-06, "loss": 23.0318, "step": 341590 }, { "epoch": 0.6900536124791429, "grad_norm": 224.9412841796875, "learning_rate": 2.7580839195498397e-06, "loss": 18.6575, "step": 341600 }, { "epoch": 0.6900738131118267, "grad_norm": 277.32647705078125, "learning_rate": 2.757771915496662e-06, "loss": 27.8354, "step": 341610 }, { "epoch": 0.6900940137445105, "grad_norm": 367.2697448730469, "learning_rate": 2.7574599223718347e-06, "loss": 13.7816, "step": 341620 }, { "epoch": 0.6901142143771943, "grad_norm": 389.6089172363281, "learning_rate": 2.7571479401768754e-06, "loss": 25.425, "step": 341630 }, { "epoch": 0.6901344150098782, "grad_norm": 119.62403869628906, "learning_rate": 2.7568359689133027e-06, "loss": 14.9218, "step": 341640 }, { "epoch": 0.690154615642562, "grad_norm": 450.5560302734375, "learning_rate": 2.7565240085826423e-06, "loss": 16.8854, "step": 341650 }, { "epoch": 0.6901748162752458, "grad_norm": 341.7157897949219, "learning_rate": 2.7562120591864093e-06, "loss": 13.8976, "step": 341660 }, { "epoch": 0.6901950169079295, "grad_norm": 160.52012634277344, "learning_rate": 2.755900120726128e-06, "loss": 11.3964, "step": 341670 }, { "epoch": 0.6902152175406133, "grad_norm": 78.66749572753906, "learning_rate": 2.7555881932033186e-06, "loss": 18.9098, "step": 341680 }, { "epoch": 0.6902354181732971, "grad_norm": 307.0433044433594, "learning_rate": 2.7552762766194975e-06, "loss": 8.6636, "step": 341690 }, { "epoch": 0.690255618805981, "grad_norm": 116.51161193847656, "learning_rate": 2.75496437097619e-06, "loss": 17.212, "step": 341700 }, { "epoch": 0.6902758194386648, "grad_norm": 256.6884460449219, "learning_rate": 2.7546524762749126e-06, "loss": 23.3818, "step": 341710 }, { "epoch": 0.6902960200713486, "grad_norm": 445.5699768066406, "learning_rate": 2.7543405925171855e-06, "loss": 22.4573, "step": 341720 }, { "epoch": 0.6903162207040324, "grad_norm": 450.0277099609375, "learning_rate": 2.75402871970453e-06, "loss": 22.7461, "step": 341730 }, { "epoch": 0.6903364213367162, "grad_norm": 204.2549285888672, "learning_rate": 2.7537168578384703e-06, "loss": 19.0349, "step": 341740 }, { "epoch": 0.6903566219694001, "grad_norm": 4.9850993156433105, "learning_rate": 2.753405006920518e-06, "loss": 25.3727, "step": 341750 }, { "epoch": 0.6903768226020839, "grad_norm": 112.3517837524414, "learning_rate": 2.7530931669521975e-06, "loss": 23.3093, "step": 341760 }, { "epoch": 0.6903970232347677, "grad_norm": 124.19197082519531, "learning_rate": 2.75278133793503e-06, "loss": 7.9564, "step": 341770 }, { "epoch": 0.6904172238674515, "grad_norm": 649.01220703125, "learning_rate": 2.752469519870534e-06, "loss": 26.4093, "step": 341780 }, { "epoch": 0.6904374245001353, "grad_norm": 196.8349609375, "learning_rate": 2.752157712760226e-06, "loss": 7.5557, "step": 341790 }, { "epoch": 0.6904576251328192, "grad_norm": 293.317626953125, "learning_rate": 2.75184591660563e-06, "loss": 15.3541, "step": 341800 }, { "epoch": 0.690477825765503, "grad_norm": 191.6157989501953, "learning_rate": 2.7515341314082657e-06, "loss": 23.9486, "step": 341810 }, { "epoch": 0.6904980263981868, "grad_norm": 432.789306640625, "learning_rate": 2.7512223571696515e-06, "loss": 19.4279, "step": 341820 }, { "epoch": 0.6905182270308706, "grad_norm": 230.3594970703125, "learning_rate": 2.750910593891305e-06, "loss": 18.3598, "step": 341830 }, { "epoch": 0.6905384276635544, "grad_norm": 131.36083984375, "learning_rate": 2.7505988415747486e-06, "loss": 11.9883, "step": 341840 }, { "epoch": 0.6905586282962383, "grad_norm": 901.9329223632812, "learning_rate": 2.7502871002215016e-06, "loss": 31.5508, "step": 341850 }, { "epoch": 0.6905788289289221, "grad_norm": 1231.3544921875, "learning_rate": 2.74997536983308e-06, "loss": 33.2476, "step": 341860 }, { "epoch": 0.6905990295616059, "grad_norm": 10.289811134338379, "learning_rate": 2.7496636504110077e-06, "loss": 19.3662, "step": 341870 }, { "epoch": 0.6906192301942897, "grad_norm": 173.14378356933594, "learning_rate": 2.7493519419567995e-06, "loss": 15.0695, "step": 341880 }, { "epoch": 0.6906394308269735, "grad_norm": 263.6166687011719, "learning_rate": 2.749040244471979e-06, "loss": 26.1531, "step": 341890 }, { "epoch": 0.6906596314596574, "grad_norm": 301.2082824707031, "learning_rate": 2.7487285579580635e-06, "loss": 29.3147, "step": 341900 }, { "epoch": 0.6906798320923412, "grad_norm": 421.2867126464844, "learning_rate": 2.7484168824165702e-06, "loss": 20.4113, "step": 341910 }, { "epoch": 0.6907000327250249, "grad_norm": 243.79605102539062, "learning_rate": 2.748105217849022e-06, "loss": 19.2626, "step": 341920 }, { "epoch": 0.6907202333577087, "grad_norm": 345.16552734375, "learning_rate": 2.747793564256933e-06, "loss": 11.9275, "step": 341930 }, { "epoch": 0.6907404339903925, "grad_norm": 338.89984130859375, "learning_rate": 2.747481921641828e-06, "loss": 12.8213, "step": 341940 }, { "epoch": 0.6907606346230764, "grad_norm": 142.1809539794922, "learning_rate": 2.7471702900052204e-06, "loss": 9.9142, "step": 341950 }, { "epoch": 0.6907808352557602, "grad_norm": 190.35525512695312, "learning_rate": 2.746858669348634e-06, "loss": 12.1233, "step": 341960 }, { "epoch": 0.690801035888444, "grad_norm": 79.70649719238281, "learning_rate": 2.7465470596735843e-06, "loss": 15.5349, "step": 341970 }, { "epoch": 0.6908212365211278, "grad_norm": 13.29676342010498, "learning_rate": 2.74623546098159e-06, "loss": 8.6642, "step": 341980 }, { "epoch": 0.6908414371538116, "grad_norm": 140.2567596435547, "learning_rate": 2.745923873274172e-06, "loss": 22.1159, "step": 341990 }, { "epoch": 0.6908616377864955, "grad_norm": 253.65176391601562, "learning_rate": 2.7456122965528475e-06, "loss": 12.9124, "step": 342000 }, { "epoch": 0.6908818384191793, "grad_norm": 132.8623504638672, "learning_rate": 2.745300730819134e-06, "loss": 18.9456, "step": 342010 }, { "epoch": 0.6909020390518631, "grad_norm": 265.0022888183594, "learning_rate": 2.7449891760745504e-06, "loss": 23.2335, "step": 342020 }, { "epoch": 0.6909222396845469, "grad_norm": 108.9799575805664, "learning_rate": 2.744677632320621e-06, "loss": 12.1314, "step": 342030 }, { "epoch": 0.6909424403172307, "grad_norm": 380.326904296875, "learning_rate": 2.744366099558855e-06, "loss": 23.194, "step": 342040 }, { "epoch": 0.6909626409499146, "grad_norm": 84.37869262695312, "learning_rate": 2.7440545777907747e-06, "loss": 21.711, "step": 342050 }, { "epoch": 0.6909828415825984, "grad_norm": 1182.5955810546875, "learning_rate": 2.743743067017901e-06, "loss": 36.9169, "step": 342060 }, { "epoch": 0.6910030422152822, "grad_norm": 184.29852294921875, "learning_rate": 2.7434315672417493e-06, "loss": 11.7107, "step": 342070 }, { "epoch": 0.691023242847966, "grad_norm": 318.0179748535156, "learning_rate": 2.743120078463837e-06, "loss": 15.526, "step": 342080 }, { "epoch": 0.6910434434806498, "grad_norm": 270.5400695800781, "learning_rate": 2.7428086006856843e-06, "loss": 14.5027, "step": 342090 }, { "epoch": 0.6910636441133337, "grad_norm": 0.0, "learning_rate": 2.742497133908812e-06, "loss": 13.528, "step": 342100 }, { "epoch": 0.6910838447460175, "grad_norm": 241.1555633544922, "learning_rate": 2.7421856781347313e-06, "loss": 14.072, "step": 342110 }, { "epoch": 0.6911040453787013, "grad_norm": 481.85693359375, "learning_rate": 2.7418742333649628e-06, "loss": 12.3749, "step": 342120 }, { "epoch": 0.6911242460113851, "grad_norm": 56.153568267822266, "learning_rate": 2.7415627996010287e-06, "loss": 16.9773, "step": 342130 }, { "epoch": 0.6911444466440689, "grad_norm": 281.88775634765625, "learning_rate": 2.741251376844443e-06, "loss": 30.5029, "step": 342140 }, { "epoch": 0.6911646472767528, "grad_norm": 187.47096252441406, "learning_rate": 2.7409399650967217e-06, "loss": 15.0932, "step": 342150 }, { "epoch": 0.6911848479094366, "grad_norm": 245.1338653564453, "learning_rate": 2.7406285643593875e-06, "loss": 22.5773, "step": 342160 }, { "epoch": 0.6912050485421204, "grad_norm": 252.22482299804688, "learning_rate": 2.740317174633955e-06, "loss": 11.2142, "step": 342170 }, { "epoch": 0.6912252491748041, "grad_norm": 302.16412353515625, "learning_rate": 2.7400057959219416e-06, "loss": 14.72, "step": 342180 }, { "epoch": 0.6912454498074879, "grad_norm": 420.3443298339844, "learning_rate": 2.7396944282248672e-06, "loss": 16.3042, "step": 342190 }, { "epoch": 0.6912656504401717, "grad_norm": 375.9707336425781, "learning_rate": 2.739383071544246e-06, "loss": 16.9783, "step": 342200 }, { "epoch": 0.6912858510728556, "grad_norm": 293.03814697265625, "learning_rate": 2.7390717258816003e-06, "loss": 29.6855, "step": 342210 }, { "epoch": 0.6913060517055394, "grad_norm": 211.92660522460938, "learning_rate": 2.738760391238442e-06, "loss": 19.3171, "step": 342220 }, { "epoch": 0.6913262523382232, "grad_norm": 15.83637809753418, "learning_rate": 2.7384490676162932e-06, "loss": 16.9785, "step": 342230 }, { "epoch": 0.691346452970907, "grad_norm": 32.71183776855469, "learning_rate": 2.73813775501667e-06, "loss": 12.3276, "step": 342240 }, { "epoch": 0.6913666536035908, "grad_norm": 230.49520874023438, "learning_rate": 2.7378264534410865e-06, "loss": 20.489, "step": 342250 }, { "epoch": 0.6913868542362747, "grad_norm": 71.25537109375, "learning_rate": 2.7375151628910645e-06, "loss": 26.4358, "step": 342260 }, { "epoch": 0.6914070548689585, "grad_norm": 328.27960205078125, "learning_rate": 2.7372038833681176e-06, "loss": 11.9042, "step": 342270 }, { "epoch": 0.6914272555016423, "grad_norm": 205.02017211914062, "learning_rate": 2.7368926148737663e-06, "loss": 18.6762, "step": 342280 }, { "epoch": 0.6914474561343261, "grad_norm": 263.44818115234375, "learning_rate": 2.736581357409526e-06, "loss": 26.4917, "step": 342290 }, { "epoch": 0.6914676567670099, "grad_norm": 67.21831512451172, "learning_rate": 2.736270110976912e-06, "loss": 9.691, "step": 342300 }, { "epoch": 0.6914878573996938, "grad_norm": 133.0511474609375, "learning_rate": 2.7359588755774437e-06, "loss": 24.1192, "step": 342310 }, { "epoch": 0.6915080580323776, "grad_norm": 544.6162719726562, "learning_rate": 2.7356476512126386e-06, "loss": 22.6008, "step": 342320 }, { "epoch": 0.6915282586650614, "grad_norm": 199.18919372558594, "learning_rate": 2.7353364378840096e-06, "loss": 18.3942, "step": 342330 }, { "epoch": 0.6915484592977452, "grad_norm": 372.781005859375, "learning_rate": 2.735025235593076e-06, "loss": 22.3254, "step": 342340 }, { "epoch": 0.691568659930429, "grad_norm": 336.9535827636719, "learning_rate": 2.734714044341359e-06, "loss": 24.4345, "step": 342350 }, { "epoch": 0.6915888605631129, "grad_norm": 254.72177124023438, "learning_rate": 2.7344028641303667e-06, "loss": 11.2093, "step": 342360 }, { "epoch": 0.6916090611957967, "grad_norm": 208.11685180664062, "learning_rate": 2.7340916949616204e-06, "loss": 48.7933, "step": 342370 }, { "epoch": 0.6916292618284805, "grad_norm": 469.31951904296875, "learning_rate": 2.733780536836638e-06, "loss": 17.6818, "step": 342380 }, { "epoch": 0.6916494624611643, "grad_norm": 237.2303466796875, "learning_rate": 2.733469389756934e-06, "loss": 15.7424, "step": 342390 }, { "epoch": 0.6916696630938481, "grad_norm": 320.9919738769531, "learning_rate": 2.7331582537240243e-06, "loss": 20.0666, "step": 342400 }, { "epoch": 0.691689863726532, "grad_norm": 474.9415283203125, "learning_rate": 2.7328471287394265e-06, "loss": 22.3151, "step": 342410 }, { "epoch": 0.6917100643592158, "grad_norm": 505.9415588378906, "learning_rate": 2.7325360148046598e-06, "loss": 24.9847, "step": 342420 }, { "epoch": 0.6917302649918996, "grad_norm": 383.67584228515625, "learning_rate": 2.732224911921234e-06, "loss": 9.1689, "step": 342430 }, { "epoch": 0.6917504656245833, "grad_norm": 149.74673461914062, "learning_rate": 2.731913820090669e-06, "loss": 23.7714, "step": 342440 }, { "epoch": 0.6917706662572671, "grad_norm": 244.2144317626953, "learning_rate": 2.7316027393144827e-06, "loss": 30.3117, "step": 342450 }, { "epoch": 0.691790866889951, "grad_norm": 84.88583374023438, "learning_rate": 2.73129166959419e-06, "loss": 11.7452, "step": 342460 }, { "epoch": 0.6918110675226348, "grad_norm": 451.3070373535156, "learning_rate": 2.730980610931304e-06, "loss": 16.1434, "step": 342470 }, { "epoch": 0.6918312681553186, "grad_norm": 252.49594116210938, "learning_rate": 2.7306695633273454e-06, "loss": 10.2724, "step": 342480 }, { "epoch": 0.6918514687880024, "grad_norm": 66.78912353515625, "learning_rate": 2.7303585267838263e-06, "loss": 16.1678, "step": 342490 }, { "epoch": 0.6918716694206862, "grad_norm": 346.90985107421875, "learning_rate": 2.7300475013022666e-06, "loss": 20.1226, "step": 342500 }, { "epoch": 0.69189187005337, "grad_norm": 323.6413879394531, "learning_rate": 2.7297364868841803e-06, "loss": 15.6195, "step": 342510 }, { "epoch": 0.6919120706860539, "grad_norm": 303.3016357421875, "learning_rate": 2.72942548353108e-06, "loss": 10.8086, "step": 342520 }, { "epoch": 0.6919322713187377, "grad_norm": 197.99685668945312, "learning_rate": 2.729114491244487e-06, "loss": 10.726, "step": 342530 }, { "epoch": 0.6919524719514215, "grad_norm": 303.71832275390625, "learning_rate": 2.728803510025913e-06, "loss": 18.5186, "step": 342540 }, { "epoch": 0.6919726725841053, "grad_norm": 323.1642150878906, "learning_rate": 2.728492539876877e-06, "loss": 14.0504, "step": 342550 }, { "epoch": 0.6919928732167892, "grad_norm": 361.6695861816406, "learning_rate": 2.728181580798891e-06, "loss": 11.3928, "step": 342560 }, { "epoch": 0.692013073849473, "grad_norm": 172.9194793701172, "learning_rate": 2.727870632793474e-06, "loss": 12.9563, "step": 342570 }, { "epoch": 0.6920332744821568, "grad_norm": 342.8000183105469, "learning_rate": 2.7275596958621397e-06, "loss": 28.0032, "step": 342580 }, { "epoch": 0.6920534751148406, "grad_norm": 520.6215209960938, "learning_rate": 2.7272487700064027e-06, "loss": 23.6152, "step": 342590 }, { "epoch": 0.6920736757475244, "grad_norm": 215.4933624267578, "learning_rate": 2.726937855227781e-06, "loss": 33.8782, "step": 342600 }, { "epoch": 0.6920938763802083, "grad_norm": 350.8011169433594, "learning_rate": 2.7266269515277887e-06, "loss": 13.9652, "step": 342610 }, { "epoch": 0.6921140770128921, "grad_norm": 238.95143127441406, "learning_rate": 2.7263160589079385e-06, "loss": 33.8557, "step": 342620 }, { "epoch": 0.6921342776455759, "grad_norm": 253.28363037109375, "learning_rate": 2.726005177369749e-06, "loss": 19.1592, "step": 342630 }, { "epoch": 0.6921544782782597, "grad_norm": 205.84927368164062, "learning_rate": 2.7256943069147375e-06, "loss": 4.1795, "step": 342640 }, { "epoch": 0.6921746789109435, "grad_norm": 438.2761535644531, "learning_rate": 2.7253834475444126e-06, "loss": 20.4323, "step": 342650 }, { "epoch": 0.6921948795436274, "grad_norm": 300.16741943359375, "learning_rate": 2.7250725992602926e-06, "loss": 13.0166, "step": 342660 }, { "epoch": 0.6922150801763112, "grad_norm": 363.2713317871094, "learning_rate": 2.724761762063895e-06, "loss": 12.1546, "step": 342670 }, { "epoch": 0.692235280808995, "grad_norm": 537.4882202148438, "learning_rate": 2.724450935956733e-06, "loss": 17.7037, "step": 342680 }, { "epoch": 0.6922554814416787, "grad_norm": 568.9134521484375, "learning_rate": 2.7241401209403185e-06, "loss": 20.5459, "step": 342690 }, { "epoch": 0.6922756820743625, "grad_norm": 188.62692260742188, "learning_rate": 2.723829317016169e-06, "loss": 13.1501, "step": 342700 }, { "epoch": 0.6922958827070463, "grad_norm": 314.7463073730469, "learning_rate": 2.723518524185804e-06, "loss": 27.495, "step": 342710 }, { "epoch": 0.6923160833397302, "grad_norm": 74.24058532714844, "learning_rate": 2.723207742450729e-06, "loss": 6.0177, "step": 342720 }, { "epoch": 0.692336283972414, "grad_norm": 632.0563354492188, "learning_rate": 2.7228969718124638e-06, "loss": 25.347, "step": 342730 }, { "epoch": 0.6923564846050978, "grad_norm": 180.76133728027344, "learning_rate": 2.7225862122725243e-06, "loss": 26.9047, "step": 342740 }, { "epoch": 0.6923766852377816, "grad_norm": 255.9215545654297, "learning_rate": 2.7222754638324235e-06, "loss": 17.3395, "step": 342750 }, { "epoch": 0.6923968858704654, "grad_norm": 385.28289794921875, "learning_rate": 2.7219647264936733e-06, "loss": 26.2885, "step": 342760 }, { "epoch": 0.6924170865031493, "grad_norm": 10.766157150268555, "learning_rate": 2.7216540002577933e-06, "loss": 19.8219, "step": 342770 }, { "epoch": 0.6924372871358331, "grad_norm": 7.469274997711182, "learning_rate": 2.7213432851262955e-06, "loss": 11.1095, "step": 342780 }, { "epoch": 0.6924574877685169, "grad_norm": 270.940673828125, "learning_rate": 2.721032581100692e-06, "loss": 13.2375, "step": 342790 }, { "epoch": 0.6924776884012007, "grad_norm": 347.6497802734375, "learning_rate": 2.7207218881825016e-06, "loss": 18.0023, "step": 342800 }, { "epoch": 0.6924978890338845, "grad_norm": 513.4388427734375, "learning_rate": 2.7204112063732337e-06, "loss": 10.9845, "step": 342810 }, { "epoch": 0.6925180896665684, "grad_norm": 150.4086456298828, "learning_rate": 2.720100535674407e-06, "loss": 7.4495, "step": 342820 }, { "epoch": 0.6925382902992522, "grad_norm": 299.72186279296875, "learning_rate": 2.719789876087532e-06, "loss": 23.0661, "step": 342830 }, { "epoch": 0.692558490931936, "grad_norm": 1.9504543542861938, "learning_rate": 2.719479227614127e-06, "loss": 11.1104, "step": 342840 }, { "epoch": 0.6925786915646198, "grad_norm": 361.8651428222656, "learning_rate": 2.719168590255703e-06, "loss": 19.6274, "step": 342850 }, { "epoch": 0.6925988921973036, "grad_norm": 147.86993408203125, "learning_rate": 2.718857964013773e-06, "loss": 22.4746, "step": 342860 }, { "epoch": 0.6926190928299875, "grad_norm": 250.3414764404297, "learning_rate": 2.7185473488898544e-06, "loss": 34.7419, "step": 342870 }, { "epoch": 0.6926392934626713, "grad_norm": 327.48492431640625, "learning_rate": 2.7182367448854573e-06, "loss": 14.6069, "step": 342880 }, { "epoch": 0.6926594940953551, "grad_norm": 229.16285705566406, "learning_rate": 2.7179261520021005e-06, "loss": 12.6503, "step": 342890 }, { "epoch": 0.6926796947280389, "grad_norm": 16.580154418945312, "learning_rate": 2.717615570241294e-06, "loss": 12.138, "step": 342900 }, { "epoch": 0.6926998953607227, "grad_norm": 76.9366455078125, "learning_rate": 2.71730499960455e-06, "loss": 12.6384, "step": 342910 }, { "epoch": 0.6927200959934066, "grad_norm": 348.6324768066406, "learning_rate": 2.7169944400933872e-06, "loss": 12.1295, "step": 342920 }, { "epoch": 0.6927402966260904, "grad_norm": 140.69085693359375, "learning_rate": 2.7166838917093163e-06, "loss": 21.397, "step": 342930 }, { "epoch": 0.6927604972587742, "grad_norm": 238.13778686523438, "learning_rate": 2.7163733544538496e-06, "loss": 20.2771, "step": 342940 }, { "epoch": 0.6927806978914579, "grad_norm": 86.5898666381836, "learning_rate": 2.716062828328502e-06, "loss": 9.2241, "step": 342950 }, { "epoch": 0.6928008985241417, "grad_norm": 141.1080780029297, "learning_rate": 2.7157523133347906e-06, "loss": 11.0812, "step": 342960 }, { "epoch": 0.6928210991568255, "grad_norm": 192.6549530029297, "learning_rate": 2.7154418094742222e-06, "loss": 6.9085, "step": 342970 }, { "epoch": 0.6928412997895094, "grad_norm": 102.81583404541016, "learning_rate": 2.7151313167483127e-06, "loss": 21.3678, "step": 342980 }, { "epoch": 0.6928615004221932, "grad_norm": 327.85955810546875, "learning_rate": 2.714820835158579e-06, "loss": 21.9639, "step": 342990 }, { "epoch": 0.692881701054877, "grad_norm": 127.41747283935547, "learning_rate": 2.714510364706531e-06, "loss": 15.1918, "step": 343000 }, { "epoch": 0.6929019016875608, "grad_norm": 246.952392578125, "learning_rate": 2.7141999053936795e-06, "loss": 19.6271, "step": 343010 }, { "epoch": 0.6929221023202446, "grad_norm": 356.96771240234375, "learning_rate": 2.713889457221541e-06, "loss": 23.4931, "step": 343020 }, { "epoch": 0.6929423029529285, "grad_norm": 116.05514526367188, "learning_rate": 2.713579020191632e-06, "loss": 20.6633, "step": 343030 }, { "epoch": 0.6929625035856123, "grad_norm": 188.68154907226562, "learning_rate": 2.713268594305458e-06, "loss": 22.4994, "step": 343040 }, { "epoch": 0.6929827042182961, "grad_norm": 296.92608642578125, "learning_rate": 2.712958179564535e-06, "loss": 14.0727, "step": 343050 }, { "epoch": 0.6930029048509799, "grad_norm": 308.0699157714844, "learning_rate": 2.7126477759703786e-06, "loss": 14.6162, "step": 343060 }, { "epoch": 0.6930231054836637, "grad_norm": 200.09530639648438, "learning_rate": 2.7123373835244994e-06, "loss": 16.0952, "step": 343070 }, { "epoch": 0.6930433061163476, "grad_norm": 762.2777709960938, "learning_rate": 2.712027002228408e-06, "loss": 30.2562, "step": 343080 }, { "epoch": 0.6930635067490314, "grad_norm": 339.58514404296875, "learning_rate": 2.711716632083622e-06, "loss": 10.7224, "step": 343090 }, { "epoch": 0.6930837073817152, "grad_norm": 99.20783233642578, "learning_rate": 2.7114062730916513e-06, "loss": 14.7129, "step": 343100 }, { "epoch": 0.693103908014399, "grad_norm": 401.3277282714844, "learning_rate": 2.711095925254007e-06, "loss": 21.2819, "step": 343110 }, { "epoch": 0.6931241086470828, "grad_norm": 832.515625, "learning_rate": 2.7107855885722036e-06, "loss": 15.9415, "step": 343120 }, { "epoch": 0.6931443092797667, "grad_norm": 77.59825134277344, "learning_rate": 2.710475263047756e-06, "loss": 15.6931, "step": 343130 }, { "epoch": 0.6931645099124505, "grad_norm": 276.7977294921875, "learning_rate": 2.7101649486821735e-06, "loss": 19.8327, "step": 343140 }, { "epoch": 0.6931847105451343, "grad_norm": 128.58102416992188, "learning_rate": 2.7098546454769683e-06, "loss": 10.7453, "step": 343150 }, { "epoch": 0.6932049111778181, "grad_norm": 261.4659118652344, "learning_rate": 2.7095443534336545e-06, "loss": 12.9575, "step": 343160 }, { "epoch": 0.693225111810502, "grad_norm": 162.5779266357422, "learning_rate": 2.7092340725537446e-06, "loss": 18.2003, "step": 343170 }, { "epoch": 0.6932453124431858, "grad_norm": 739.537109375, "learning_rate": 2.708923802838748e-06, "loss": 23.1864, "step": 343180 }, { "epoch": 0.6932655130758696, "grad_norm": 207.94837951660156, "learning_rate": 2.7086135442901806e-06, "loss": 15.1758, "step": 343190 }, { "epoch": 0.6932857137085533, "grad_norm": 462.3773498535156, "learning_rate": 2.708303296909551e-06, "loss": 24.2764, "step": 343200 }, { "epoch": 0.6933059143412371, "grad_norm": 325.4755859375, "learning_rate": 2.7079930606983753e-06, "loss": 13.1309, "step": 343210 }, { "epoch": 0.6933261149739209, "grad_norm": 133.61415100097656, "learning_rate": 2.707682835658163e-06, "loss": 19.9787, "step": 343220 }, { "epoch": 0.6933463156066048, "grad_norm": 276.5166015625, "learning_rate": 2.7073726217904247e-06, "loss": 19.1519, "step": 343230 }, { "epoch": 0.6933665162392886, "grad_norm": 175.46463012695312, "learning_rate": 2.7070624190966744e-06, "loss": 14.8242, "step": 343240 }, { "epoch": 0.6933867168719724, "grad_norm": 126.52288055419922, "learning_rate": 2.7067522275784275e-06, "loss": 13.6673, "step": 343250 }, { "epoch": 0.6934069175046562, "grad_norm": 269.3690185546875, "learning_rate": 2.7064420472371876e-06, "loss": 11.4177, "step": 343260 }, { "epoch": 0.69342711813734, "grad_norm": 337.2025451660156, "learning_rate": 2.706131878074472e-06, "loss": 13.5493, "step": 343270 }, { "epoch": 0.6934473187700239, "grad_norm": 130.85826110839844, "learning_rate": 2.7058217200917934e-06, "loss": 13.5878, "step": 343280 }, { "epoch": 0.6934675194027077, "grad_norm": 373.82318115234375, "learning_rate": 2.705511573290661e-06, "loss": 17.3691, "step": 343290 }, { "epoch": 0.6934877200353915, "grad_norm": 212.09457397460938, "learning_rate": 2.705201437672585e-06, "loss": 14.4979, "step": 343300 }, { "epoch": 0.6935079206680753, "grad_norm": 489.9969177246094, "learning_rate": 2.7048913132390787e-06, "loss": 25.5774, "step": 343310 }, { "epoch": 0.6935281213007591, "grad_norm": 393.2461242675781, "learning_rate": 2.7045811999916583e-06, "loss": 23.6755, "step": 343320 }, { "epoch": 0.693548321933443, "grad_norm": 232.14187622070312, "learning_rate": 2.7042710979318266e-06, "loss": 13.1532, "step": 343330 }, { "epoch": 0.6935685225661268, "grad_norm": 211.55636596679688, "learning_rate": 2.703961007061099e-06, "loss": 12.1626, "step": 343340 }, { "epoch": 0.6935887231988106, "grad_norm": 96.13494873046875, "learning_rate": 2.7036509273809887e-06, "loss": 12.8028, "step": 343350 }, { "epoch": 0.6936089238314944, "grad_norm": 205.5885467529297, "learning_rate": 2.7033408588930054e-06, "loss": 21.7739, "step": 343360 }, { "epoch": 0.6936291244641782, "grad_norm": 320.22064208984375, "learning_rate": 2.7030308015986583e-06, "loss": 19.8798, "step": 343370 }, { "epoch": 0.6936493250968621, "grad_norm": 239.6497344970703, "learning_rate": 2.7027207554994627e-06, "loss": 24.2973, "step": 343380 }, { "epoch": 0.6936695257295459, "grad_norm": 162.96957397460938, "learning_rate": 2.702410720596927e-06, "loss": 9.7987, "step": 343390 }, { "epoch": 0.6936897263622297, "grad_norm": 125.40640258789062, "learning_rate": 2.7021006968925613e-06, "loss": 16.118, "step": 343400 }, { "epoch": 0.6937099269949135, "grad_norm": 22.81417465209961, "learning_rate": 2.7017906843878795e-06, "loss": 17.1652, "step": 343410 }, { "epoch": 0.6937301276275973, "grad_norm": 135.7185821533203, "learning_rate": 2.7014806830843897e-06, "loss": 13.2401, "step": 343420 }, { "epoch": 0.6937503282602812, "grad_norm": 218.56695556640625, "learning_rate": 2.7011706929836056e-06, "loss": 20.1292, "step": 343430 }, { "epoch": 0.693770528892965, "grad_norm": 423.072509765625, "learning_rate": 2.700860714087036e-06, "loss": 11.4804, "step": 343440 }, { "epoch": 0.6937907295256488, "grad_norm": 83.82049560546875, "learning_rate": 2.700550746396193e-06, "loss": 16.3022, "step": 343450 }, { "epoch": 0.6938109301583325, "grad_norm": 226.42047119140625, "learning_rate": 2.7002407899125883e-06, "loss": 14.825, "step": 343460 }, { "epoch": 0.6938311307910163, "grad_norm": 395.5869140625, "learning_rate": 2.699930844637728e-06, "loss": 24.5972, "step": 343470 }, { "epoch": 0.6938513314237001, "grad_norm": 368.0987243652344, "learning_rate": 2.6996209105731273e-06, "loss": 23.0788, "step": 343480 }, { "epoch": 0.693871532056384, "grad_norm": 372.47235107421875, "learning_rate": 2.6993109877202942e-06, "loss": 7.374, "step": 343490 }, { "epoch": 0.6938917326890678, "grad_norm": 351.5311279296875, "learning_rate": 2.699001076080742e-06, "loss": 24.8169, "step": 343500 }, { "epoch": 0.6939119333217516, "grad_norm": 383.5364685058594, "learning_rate": 2.6986911756559795e-06, "loss": 21.905, "step": 343510 }, { "epoch": 0.6939321339544354, "grad_norm": 634.5228271484375, "learning_rate": 2.698381286447516e-06, "loss": 29.0457, "step": 343520 }, { "epoch": 0.6939523345871192, "grad_norm": 358.40289306640625, "learning_rate": 2.698071408456864e-06, "loss": 21.2191, "step": 343530 }, { "epoch": 0.6939725352198031, "grad_norm": 333.7301330566406, "learning_rate": 2.6977615416855325e-06, "loss": 14.2159, "step": 343540 }, { "epoch": 0.6939927358524869, "grad_norm": 294.4639892578125, "learning_rate": 2.697451686135031e-06, "loss": 19.7787, "step": 343550 }, { "epoch": 0.6940129364851707, "grad_norm": 27.5255126953125, "learning_rate": 2.6971418418068696e-06, "loss": 22.7074, "step": 343560 }, { "epoch": 0.6940331371178545, "grad_norm": 315.99560546875, "learning_rate": 2.696832008702564e-06, "loss": 23.2883, "step": 343570 }, { "epoch": 0.6940533377505383, "grad_norm": 342.694580078125, "learning_rate": 2.6965221868236156e-06, "loss": 28.4695, "step": 343580 }, { "epoch": 0.6940735383832222, "grad_norm": 189.20652770996094, "learning_rate": 2.6962123761715395e-06, "loss": 14.239, "step": 343590 }, { "epoch": 0.694093739015906, "grad_norm": 219.95936584472656, "learning_rate": 2.6959025767478466e-06, "loss": 13.8209, "step": 343600 }, { "epoch": 0.6941139396485898, "grad_norm": 216.48301696777344, "learning_rate": 2.6955927885540444e-06, "loss": 11.7367, "step": 343610 }, { "epoch": 0.6941341402812736, "grad_norm": 488.8514709472656, "learning_rate": 2.6952830115916417e-06, "loss": 16.9852, "step": 343620 }, { "epoch": 0.6941543409139574, "grad_norm": 287.40008544921875, "learning_rate": 2.6949732458621502e-06, "loss": 10.8809, "step": 343630 }, { "epoch": 0.6941745415466413, "grad_norm": 247.60888671875, "learning_rate": 2.694663491367084e-06, "loss": 19.4117, "step": 343640 }, { "epoch": 0.6941947421793251, "grad_norm": 470.2730407714844, "learning_rate": 2.694353748107944e-06, "loss": 27.9732, "step": 343650 }, { "epoch": 0.6942149428120089, "grad_norm": 108.20856475830078, "learning_rate": 2.694044016086244e-06, "loss": 12.7168, "step": 343660 }, { "epoch": 0.6942351434446927, "grad_norm": 131.0286102294922, "learning_rate": 2.6937342953034963e-06, "loss": 16.7073, "step": 343670 }, { "epoch": 0.6942553440773765, "grad_norm": 323.0220642089844, "learning_rate": 2.6934245857612074e-06, "loss": 21.9992, "step": 343680 }, { "epoch": 0.6942755447100604, "grad_norm": 289.8670349121094, "learning_rate": 2.6931148874608854e-06, "loss": 21.3318, "step": 343690 }, { "epoch": 0.6942957453427442, "grad_norm": 399.7106628417969, "learning_rate": 2.692805200404044e-06, "loss": 15.0985, "step": 343700 }, { "epoch": 0.6943159459754279, "grad_norm": 433.7299499511719, "learning_rate": 2.69249552459219e-06, "loss": 34.2574, "step": 343710 }, { "epoch": 0.6943361466081117, "grad_norm": 257.5885009765625, "learning_rate": 2.6921858600268304e-06, "loss": 14.7948, "step": 343720 }, { "epoch": 0.6943563472407955, "grad_norm": 344.57293701171875, "learning_rate": 2.6918762067094776e-06, "loss": 18.7397, "step": 343730 }, { "epoch": 0.6943765478734794, "grad_norm": 274.9375915527344, "learning_rate": 2.6915665646416423e-06, "loss": 21.5214, "step": 343740 }, { "epoch": 0.6943967485061632, "grad_norm": 0.0, "learning_rate": 2.6912569338248317e-06, "loss": 6.8166, "step": 343750 }, { "epoch": 0.694416949138847, "grad_norm": 196.8888702392578, "learning_rate": 2.6909473142605522e-06, "loss": 13.2893, "step": 343760 }, { "epoch": 0.6944371497715308, "grad_norm": 302.96240234375, "learning_rate": 2.6906377059503176e-06, "loss": 20.6133, "step": 343770 }, { "epoch": 0.6944573504042146, "grad_norm": 284.4427185058594, "learning_rate": 2.690328108895635e-06, "loss": 19.0261, "step": 343780 }, { "epoch": 0.6944775510368985, "grad_norm": 61.584999084472656, "learning_rate": 2.6900185230980115e-06, "loss": 17.5418, "step": 343790 }, { "epoch": 0.6944977516695823, "grad_norm": 262.02471923828125, "learning_rate": 2.6897089485589584e-06, "loss": 17.1455, "step": 343800 }, { "epoch": 0.6945179523022661, "grad_norm": 369.0280456542969, "learning_rate": 2.6893993852799827e-06, "loss": 25.124, "step": 343810 }, { "epoch": 0.6945381529349499, "grad_norm": 269.75311279296875, "learning_rate": 2.689089833262595e-06, "loss": 13.7746, "step": 343820 }, { "epoch": 0.6945583535676337, "grad_norm": 145.6909942626953, "learning_rate": 2.688780292508304e-06, "loss": 12.4405, "step": 343830 }, { "epoch": 0.6945785542003176, "grad_norm": 346.66400146484375, "learning_rate": 2.6884707630186158e-06, "loss": 35.364, "step": 343840 }, { "epoch": 0.6945987548330014, "grad_norm": 459.71209716796875, "learning_rate": 2.6881612447950425e-06, "loss": 12.7529, "step": 343850 }, { "epoch": 0.6946189554656852, "grad_norm": 287.1859130859375, "learning_rate": 2.6878517378390906e-06, "loss": 17.674, "step": 343860 }, { "epoch": 0.694639156098369, "grad_norm": 359.2180480957031, "learning_rate": 2.6875422421522667e-06, "loss": 13.0261, "step": 343870 }, { "epoch": 0.6946593567310528, "grad_norm": 178.345703125, "learning_rate": 2.6872327577360822e-06, "loss": 13.9737, "step": 343880 }, { "epoch": 0.6946795573637367, "grad_norm": 304.72540283203125, "learning_rate": 2.6869232845920466e-06, "loss": 18.1446, "step": 343890 }, { "epoch": 0.6946997579964205, "grad_norm": 192.28604125976562, "learning_rate": 2.686613822721666e-06, "loss": 28.3725, "step": 343900 }, { "epoch": 0.6947199586291043, "grad_norm": 229.0576629638672, "learning_rate": 2.686304372126447e-06, "loss": 15.2163, "step": 343910 }, { "epoch": 0.6947401592617881, "grad_norm": 299.8327331542969, "learning_rate": 2.6859949328079005e-06, "loss": 19.1392, "step": 343920 }, { "epoch": 0.6947603598944719, "grad_norm": 64.99732971191406, "learning_rate": 2.685685504767537e-06, "loss": 13.3539, "step": 343930 }, { "epoch": 0.6947805605271558, "grad_norm": 564.297607421875, "learning_rate": 2.6853760880068587e-06, "loss": 35.8961, "step": 343940 }, { "epoch": 0.6948007611598396, "grad_norm": 328.0784606933594, "learning_rate": 2.6850666825273762e-06, "loss": 14.608, "step": 343950 }, { "epoch": 0.6948209617925234, "grad_norm": 138.04087829589844, "learning_rate": 2.6847572883305993e-06, "loss": 19.8922, "step": 343960 }, { "epoch": 0.6948411624252071, "grad_norm": 44.134037017822266, "learning_rate": 2.6844479054180354e-06, "loss": 15.9002, "step": 343970 }, { "epoch": 0.6948613630578909, "grad_norm": 374.0654296875, "learning_rate": 2.6841385337911895e-06, "loss": 16.0402, "step": 343980 }, { "epoch": 0.6948815636905747, "grad_norm": 627.4871826171875, "learning_rate": 2.683829173451573e-06, "loss": 25.8222, "step": 343990 }, { "epoch": 0.6949017643232586, "grad_norm": 219.1624298095703, "learning_rate": 2.683519824400693e-06, "loss": 20.2074, "step": 344000 }, { "epoch": 0.6949219649559424, "grad_norm": 207.17225646972656, "learning_rate": 2.683210486640054e-06, "loss": 8.2709, "step": 344010 }, { "epoch": 0.6949421655886262, "grad_norm": 250.61923217773438, "learning_rate": 2.682901160171168e-06, "loss": 16.4194, "step": 344020 }, { "epoch": 0.69496236622131, "grad_norm": 157.60777282714844, "learning_rate": 2.6825918449955386e-06, "loss": 14.3761, "step": 344030 }, { "epoch": 0.6949825668539938, "grad_norm": 267.9568786621094, "learning_rate": 2.682282541114678e-06, "loss": 10.8626, "step": 344040 }, { "epoch": 0.6950027674866777, "grad_norm": 197.6457977294922, "learning_rate": 2.6819732485300887e-06, "loss": 19.3097, "step": 344050 }, { "epoch": 0.6950229681193615, "grad_norm": 143.51956176757812, "learning_rate": 2.6816639672432826e-06, "loss": 13.2991, "step": 344060 }, { "epoch": 0.6950431687520453, "grad_norm": 383.8836975097656, "learning_rate": 2.681354697255765e-06, "loss": 13.3057, "step": 344070 }, { "epoch": 0.6950633693847291, "grad_norm": 258.791015625, "learning_rate": 2.681045438569042e-06, "loss": 19.8061, "step": 344080 }, { "epoch": 0.6950835700174129, "grad_norm": 89.33431243896484, "learning_rate": 2.680736191184624e-06, "loss": 18.5694, "step": 344090 }, { "epoch": 0.6951037706500968, "grad_norm": 197.98373413085938, "learning_rate": 2.680426955104014e-06, "loss": 9.0865, "step": 344100 }, { "epoch": 0.6951239712827806, "grad_norm": 220.17433166503906, "learning_rate": 2.6801177303287247e-06, "loss": 14.7511, "step": 344110 }, { "epoch": 0.6951441719154644, "grad_norm": 226.1342010498047, "learning_rate": 2.6798085168602595e-06, "loss": 21.9007, "step": 344120 }, { "epoch": 0.6951643725481482, "grad_norm": 271.6692810058594, "learning_rate": 2.6794993147001246e-06, "loss": 20.9934, "step": 344130 }, { "epoch": 0.695184573180832, "grad_norm": 192.8466339111328, "learning_rate": 2.6791901238498302e-06, "loss": 11.1333, "step": 344140 }, { "epoch": 0.6952047738135159, "grad_norm": 186.29881286621094, "learning_rate": 2.678880944310882e-06, "loss": 20.9091, "step": 344150 }, { "epoch": 0.6952249744461997, "grad_norm": 0.0, "learning_rate": 2.678571776084784e-06, "loss": 11.4033, "step": 344160 }, { "epoch": 0.6952451750788835, "grad_norm": 207.1684112548828, "learning_rate": 2.6782626191730466e-06, "loss": 24.123, "step": 344170 }, { "epoch": 0.6952653757115673, "grad_norm": 1005.1881713867188, "learning_rate": 2.677953473577177e-06, "loss": 17.5204, "step": 344180 }, { "epoch": 0.6952855763442511, "grad_norm": 1043.0443115234375, "learning_rate": 2.6776443392986808e-06, "loss": 36.2505, "step": 344190 }, { "epoch": 0.695305776976935, "grad_norm": 332.6207275390625, "learning_rate": 2.677335216339062e-06, "loss": 17.4547, "step": 344200 }, { "epoch": 0.6953259776096188, "grad_norm": 388.066650390625, "learning_rate": 2.6770261046998315e-06, "loss": 17.4636, "step": 344210 }, { "epoch": 0.6953461782423026, "grad_norm": 186.64410400390625, "learning_rate": 2.6767170043824942e-06, "loss": 23.638, "step": 344220 }, { "epoch": 0.6953663788749863, "grad_norm": 42.887420654296875, "learning_rate": 2.6764079153885547e-06, "loss": 13.6926, "step": 344230 }, { "epoch": 0.6953865795076701, "grad_norm": 301.3786926269531, "learning_rate": 2.6760988377195206e-06, "loss": 10.0114, "step": 344240 }, { "epoch": 0.695406780140354, "grad_norm": 337.5215148925781, "learning_rate": 2.675789771376904e-06, "loss": 25.9722, "step": 344250 }, { "epoch": 0.6954269807730378, "grad_norm": 410.2828369140625, "learning_rate": 2.6754807163622014e-06, "loss": 15.8466, "step": 344260 }, { "epoch": 0.6954471814057216, "grad_norm": 48.2470703125, "learning_rate": 2.6751716726769237e-06, "loss": 14.1513, "step": 344270 }, { "epoch": 0.6954673820384054, "grad_norm": 239.15049743652344, "learning_rate": 2.67486264032258e-06, "loss": 24.6195, "step": 344280 }, { "epoch": 0.6954875826710892, "grad_norm": 247.29421997070312, "learning_rate": 2.674553619300673e-06, "loss": 23.5757, "step": 344290 }, { "epoch": 0.695507783303773, "grad_norm": 205.9752197265625, "learning_rate": 2.6742446096127086e-06, "loss": 12.2952, "step": 344300 }, { "epoch": 0.6955279839364569, "grad_norm": 309.6461181640625, "learning_rate": 2.673935611260195e-06, "loss": 14.5948, "step": 344310 }, { "epoch": 0.6955481845691407, "grad_norm": 343.7037353515625, "learning_rate": 2.6736266242446372e-06, "loss": 9.265, "step": 344320 }, { "epoch": 0.6955683852018245, "grad_norm": 165.88909912109375, "learning_rate": 2.6733176485675396e-06, "loss": 16.6943, "step": 344330 }, { "epoch": 0.6955885858345083, "grad_norm": 727.321533203125, "learning_rate": 2.6730086842304093e-06, "loss": 17.7172, "step": 344340 }, { "epoch": 0.6956087864671922, "grad_norm": 34.70158767700195, "learning_rate": 2.6726997312347546e-06, "loss": 20.0248, "step": 344350 }, { "epoch": 0.695628987099876, "grad_norm": 86.23654174804688, "learning_rate": 2.672390789582079e-06, "loss": 19.6538, "step": 344360 }, { "epoch": 0.6956491877325598, "grad_norm": 257.8867492675781, "learning_rate": 2.672081859273886e-06, "loss": 22.2662, "step": 344370 }, { "epoch": 0.6956693883652436, "grad_norm": 94.520751953125, "learning_rate": 2.6717729403116866e-06, "loss": 7.3663, "step": 344380 }, { "epoch": 0.6956895889979274, "grad_norm": 333.9255065917969, "learning_rate": 2.671464032696982e-06, "loss": 24.8913, "step": 344390 }, { "epoch": 0.6957097896306113, "grad_norm": 841.7655029296875, "learning_rate": 2.671155136431279e-06, "loss": 27.3469, "step": 344400 }, { "epoch": 0.6957299902632951, "grad_norm": 453.4447937011719, "learning_rate": 2.6708462515160845e-06, "loss": 27.4163, "step": 344410 }, { "epoch": 0.6957501908959789, "grad_norm": 460.78875732421875, "learning_rate": 2.670537377952901e-06, "loss": 16.3187, "step": 344420 }, { "epoch": 0.6957703915286627, "grad_norm": 11.942303657531738, "learning_rate": 2.670228515743238e-06, "loss": 13.549, "step": 344430 }, { "epoch": 0.6957905921613465, "grad_norm": 376.7830810546875, "learning_rate": 2.6699196648885984e-06, "loss": 24.8201, "step": 344440 }, { "epoch": 0.6958107927940304, "grad_norm": 121.2013168334961, "learning_rate": 2.6696108253904856e-06, "loss": 16.5796, "step": 344450 }, { "epoch": 0.6958309934267142, "grad_norm": 60.11631774902344, "learning_rate": 2.669301997250409e-06, "loss": 23.5999, "step": 344460 }, { "epoch": 0.695851194059398, "grad_norm": 156.37088012695312, "learning_rate": 2.668993180469872e-06, "loss": 12.6075, "step": 344470 }, { "epoch": 0.6958713946920817, "grad_norm": 239.71775817871094, "learning_rate": 2.668684375050378e-06, "loss": 9.9218, "step": 344480 }, { "epoch": 0.6958915953247655, "grad_norm": 149.97901916503906, "learning_rate": 2.668375580993433e-06, "loss": 13.9268, "step": 344490 }, { "epoch": 0.6959117959574493, "grad_norm": 421.1890869140625, "learning_rate": 2.6680667983005446e-06, "loss": 14.0084, "step": 344500 }, { "epoch": 0.6959319965901332, "grad_norm": 375.1965026855469, "learning_rate": 2.667758026973216e-06, "loss": 21.391, "step": 344510 }, { "epoch": 0.695952197222817, "grad_norm": 267.7335510253906, "learning_rate": 2.667449267012949e-06, "loss": 20.4071, "step": 344520 }, { "epoch": 0.6959723978555008, "grad_norm": 76.54393768310547, "learning_rate": 2.667140518421255e-06, "loss": 12.2195, "step": 344530 }, { "epoch": 0.6959925984881846, "grad_norm": 441.9671936035156, "learning_rate": 2.6668317811996342e-06, "loss": 16.4225, "step": 344540 }, { "epoch": 0.6960127991208684, "grad_norm": 192.47715759277344, "learning_rate": 2.66652305534959e-06, "loss": 22.3227, "step": 344550 }, { "epoch": 0.6960329997535523, "grad_norm": 50.54800796508789, "learning_rate": 2.6662143408726306e-06, "loss": 18.2847, "step": 344560 }, { "epoch": 0.6960532003862361, "grad_norm": 140.75784301757812, "learning_rate": 2.6659056377702606e-06, "loss": 16.3975, "step": 344570 }, { "epoch": 0.6960734010189199, "grad_norm": 437.0973815917969, "learning_rate": 2.6655969460439835e-06, "loss": 25.4025, "step": 344580 }, { "epoch": 0.6960936016516037, "grad_norm": 685.494140625, "learning_rate": 2.6652882656953016e-06, "loss": 27.5215, "step": 344590 }, { "epoch": 0.6961138022842875, "grad_norm": 153.52780151367188, "learning_rate": 2.6649795967257243e-06, "loss": 18.1376, "step": 344600 }, { "epoch": 0.6961340029169714, "grad_norm": 229.4714813232422, "learning_rate": 2.6646709391367524e-06, "loss": 16.3532, "step": 344610 }, { "epoch": 0.6961542035496552, "grad_norm": 401.6508483886719, "learning_rate": 2.6643622929298896e-06, "loss": 29.4087, "step": 344620 }, { "epoch": 0.696174404182339, "grad_norm": 347.1051025390625, "learning_rate": 2.6640536581066434e-06, "loss": 21.0328, "step": 344630 }, { "epoch": 0.6961946048150228, "grad_norm": 391.7100830078125, "learning_rate": 2.6637450346685145e-06, "loss": 21.3655, "step": 344640 }, { "epoch": 0.6962148054477066, "grad_norm": 152.13804626464844, "learning_rate": 2.6634364226170105e-06, "loss": 7.0684, "step": 344650 }, { "epoch": 0.6962350060803905, "grad_norm": 242.8523406982422, "learning_rate": 2.663127821953633e-06, "loss": 9.1683, "step": 344660 }, { "epoch": 0.6962552067130743, "grad_norm": 160.66146850585938, "learning_rate": 2.6628192326798884e-06, "loss": 16.738, "step": 344670 }, { "epoch": 0.6962754073457581, "grad_norm": 205.23162841796875, "learning_rate": 2.662510654797279e-06, "loss": 5.989, "step": 344680 }, { "epoch": 0.6962956079784419, "grad_norm": 271.1011657714844, "learning_rate": 2.662202088307308e-06, "loss": 15.596, "step": 344690 }, { "epoch": 0.6963158086111257, "grad_norm": 422.9322814941406, "learning_rate": 2.661893533211482e-06, "loss": 22.2214, "step": 344700 }, { "epoch": 0.6963360092438096, "grad_norm": 585.9033203125, "learning_rate": 2.6615849895113014e-06, "loss": 17.8715, "step": 344710 }, { "epoch": 0.6963562098764934, "grad_norm": 36.622249603271484, "learning_rate": 2.661276457208274e-06, "loss": 25.9618, "step": 344720 }, { "epoch": 0.6963764105091772, "grad_norm": 44.508888244628906, "learning_rate": 2.660967936303902e-06, "loss": 8.4224, "step": 344730 }, { "epoch": 0.6963966111418609, "grad_norm": 0.0, "learning_rate": 2.6606594267996853e-06, "loss": 15.994, "step": 344740 }, { "epoch": 0.6964168117745447, "grad_norm": 465.09454345703125, "learning_rate": 2.6603509286971342e-06, "loss": 16.1805, "step": 344750 }, { "epoch": 0.6964370124072286, "grad_norm": 353.5810241699219, "learning_rate": 2.660042441997748e-06, "loss": 17.7305, "step": 344760 }, { "epoch": 0.6964572130399124, "grad_norm": 186.38682556152344, "learning_rate": 2.6597339667030296e-06, "loss": 16.7407, "step": 344770 }, { "epoch": 0.6964774136725962, "grad_norm": 552.1394653320312, "learning_rate": 2.659425502814484e-06, "loss": 16.6637, "step": 344780 }, { "epoch": 0.69649761430528, "grad_norm": 130.45643615722656, "learning_rate": 2.659117050333616e-06, "loss": 21.8957, "step": 344790 }, { "epoch": 0.6965178149379638, "grad_norm": 137.75352478027344, "learning_rate": 2.658808609261928e-06, "loss": 9.4482, "step": 344800 }, { "epoch": 0.6965380155706477, "grad_norm": 257.7386474609375, "learning_rate": 2.658500179600921e-06, "loss": 18.3375, "step": 344810 }, { "epoch": 0.6965582162033315, "grad_norm": 81.74398803710938, "learning_rate": 2.6581917613521026e-06, "loss": 13.4884, "step": 344820 }, { "epoch": 0.6965784168360153, "grad_norm": 293.43682861328125, "learning_rate": 2.657883354516973e-06, "loss": 10.8181, "step": 344830 }, { "epoch": 0.6965986174686991, "grad_norm": 292.984375, "learning_rate": 2.6575749590970336e-06, "loss": 13.7714, "step": 344840 }, { "epoch": 0.6966188181013829, "grad_norm": 320.69287109375, "learning_rate": 2.6572665750937898e-06, "loss": 17.0815, "step": 344850 }, { "epoch": 0.6966390187340668, "grad_norm": 252.84384155273438, "learning_rate": 2.656958202508749e-06, "loss": 23.546, "step": 344860 }, { "epoch": 0.6966592193667506, "grad_norm": 303.4278259277344, "learning_rate": 2.656649841343406e-06, "loss": 14.7255, "step": 344870 }, { "epoch": 0.6966794199994344, "grad_norm": 223.25927734375, "learning_rate": 2.656341491599267e-06, "loss": 10.6432, "step": 344880 }, { "epoch": 0.6966996206321182, "grad_norm": 127.36978912353516, "learning_rate": 2.6560331532778373e-06, "loss": 12.2615, "step": 344890 }, { "epoch": 0.696719821264802, "grad_norm": 381.87347412109375, "learning_rate": 2.6557248263806175e-06, "loss": 17.7136, "step": 344900 }, { "epoch": 0.6967400218974859, "grad_norm": 311.8560485839844, "learning_rate": 2.655416510909109e-06, "loss": 6.2437, "step": 344910 }, { "epoch": 0.6967602225301697, "grad_norm": 296.4872131347656, "learning_rate": 2.655108206864818e-06, "loss": 14.7941, "step": 344920 }, { "epoch": 0.6967804231628535, "grad_norm": 277.21636962890625, "learning_rate": 2.654799914249245e-06, "loss": 13.6128, "step": 344930 }, { "epoch": 0.6968006237955373, "grad_norm": 132.92312622070312, "learning_rate": 2.65449163306389e-06, "loss": 14.5502, "step": 344940 }, { "epoch": 0.6968208244282211, "grad_norm": 16.54475212097168, "learning_rate": 2.65418336331026e-06, "loss": 22.0164, "step": 344950 }, { "epoch": 0.696841025060905, "grad_norm": 319.759765625, "learning_rate": 2.653875104989857e-06, "loss": 16.6792, "step": 344960 }, { "epoch": 0.6968612256935888, "grad_norm": 209.79656982421875, "learning_rate": 2.653566858104182e-06, "loss": 12.5109, "step": 344970 }, { "epoch": 0.6968814263262726, "grad_norm": 333.3680725097656, "learning_rate": 2.6532586226547354e-06, "loss": 19.2069, "step": 344980 }, { "epoch": 0.6969016269589563, "grad_norm": 243.13088989257812, "learning_rate": 2.652950398643024e-06, "loss": 14.5807, "step": 344990 }, { "epoch": 0.6969218275916401, "grad_norm": 270.1582946777344, "learning_rate": 2.6526421860705474e-06, "loss": 27.6865, "step": 345000 }, { "epoch": 0.6969420282243239, "grad_norm": 389.8337097167969, "learning_rate": 2.6523339849388065e-06, "loss": 23.3782, "step": 345010 }, { "epoch": 0.6969622288570078, "grad_norm": 327.0071105957031, "learning_rate": 2.6520257952493066e-06, "loss": 19.0053, "step": 345020 }, { "epoch": 0.6969824294896916, "grad_norm": 23.788143157958984, "learning_rate": 2.6517176170035463e-06, "loss": 12.7242, "step": 345030 }, { "epoch": 0.6970026301223754, "grad_norm": 384.8385925292969, "learning_rate": 2.651409450203032e-06, "loss": 12.1121, "step": 345040 }, { "epoch": 0.6970228307550592, "grad_norm": 409.720703125, "learning_rate": 2.6511012948492625e-06, "loss": 19.5513, "step": 345050 }, { "epoch": 0.697043031387743, "grad_norm": 304.9658508300781, "learning_rate": 2.650793150943739e-06, "loss": 19.6173, "step": 345060 }, { "epoch": 0.6970632320204269, "grad_norm": 575.296630859375, "learning_rate": 2.650485018487966e-06, "loss": 14.4055, "step": 345070 }, { "epoch": 0.6970834326531107, "grad_norm": 394.16192626953125, "learning_rate": 2.6501768974834453e-06, "loss": 18.9299, "step": 345080 }, { "epoch": 0.6971036332857945, "grad_norm": 248.3156280517578, "learning_rate": 2.649868787931674e-06, "loss": 11.3435, "step": 345090 }, { "epoch": 0.6971238339184783, "grad_norm": 314.5496520996094, "learning_rate": 2.649560689834158e-06, "loss": 33.3921, "step": 345100 }, { "epoch": 0.6971440345511621, "grad_norm": 344.4354553222656, "learning_rate": 2.6492526031924005e-06, "loss": 36.9348, "step": 345110 }, { "epoch": 0.697164235183846, "grad_norm": 217.18212890625, "learning_rate": 2.6489445280078998e-06, "loss": 8.0162, "step": 345120 }, { "epoch": 0.6971844358165298, "grad_norm": 178.1165008544922, "learning_rate": 2.6486364642821565e-06, "loss": 21.29, "step": 345130 }, { "epoch": 0.6972046364492136, "grad_norm": 71.80766296386719, "learning_rate": 2.6483284120166762e-06, "loss": 7.8408, "step": 345140 }, { "epoch": 0.6972248370818974, "grad_norm": 198.8499755859375, "learning_rate": 2.6480203712129583e-06, "loss": 6.7553, "step": 345150 }, { "epoch": 0.6972450377145812, "grad_norm": 414.4875793457031, "learning_rate": 2.647712341872501e-06, "loss": 26.4482, "step": 345160 }, { "epoch": 0.6972652383472651, "grad_norm": 426.157958984375, "learning_rate": 2.647404323996809e-06, "loss": 20.2683, "step": 345170 }, { "epoch": 0.6972854389799489, "grad_norm": 151.0731201171875, "learning_rate": 2.647096317587385e-06, "loss": 10.9334, "step": 345180 }, { "epoch": 0.6973056396126327, "grad_norm": 287.5469665527344, "learning_rate": 2.646788322645728e-06, "loss": 14.2373, "step": 345190 }, { "epoch": 0.6973258402453165, "grad_norm": 229.46047973632812, "learning_rate": 2.646480339173337e-06, "loss": 18.5093, "step": 345200 }, { "epoch": 0.6973460408780003, "grad_norm": 359.3277282714844, "learning_rate": 2.6461723671717177e-06, "loss": 18.314, "step": 345210 }, { "epoch": 0.6973662415106842, "grad_norm": 0.0, "learning_rate": 2.645864406642369e-06, "loss": 18.8599, "step": 345220 }, { "epoch": 0.697386442143368, "grad_norm": 116.42794799804688, "learning_rate": 2.6455564575867893e-06, "loss": 23.7611, "step": 345230 }, { "epoch": 0.6974066427760518, "grad_norm": 220.0534210205078, "learning_rate": 2.645248520006482e-06, "loss": 16.6516, "step": 345240 }, { "epoch": 0.6974268434087355, "grad_norm": 327.06903076171875, "learning_rate": 2.64494059390295e-06, "loss": 29.9014, "step": 345250 }, { "epoch": 0.6974470440414193, "grad_norm": 379.2898254394531, "learning_rate": 2.644632679277692e-06, "loss": 14.2125, "step": 345260 }, { "epoch": 0.6974672446741031, "grad_norm": 797.7236938476562, "learning_rate": 2.644324776132206e-06, "loss": 27.2397, "step": 345270 }, { "epoch": 0.697487445306787, "grad_norm": 380.19683837890625, "learning_rate": 2.6440168844679983e-06, "loss": 20.5129, "step": 345280 }, { "epoch": 0.6975076459394708, "grad_norm": 635.1932983398438, "learning_rate": 2.6437090042865655e-06, "loss": 14.0589, "step": 345290 }, { "epoch": 0.6975278465721546, "grad_norm": 131.4951629638672, "learning_rate": 2.6434011355894074e-06, "loss": 14.794, "step": 345300 }, { "epoch": 0.6975480472048384, "grad_norm": 408.5314636230469, "learning_rate": 2.643093278378029e-06, "loss": 15.4484, "step": 345310 }, { "epoch": 0.6975682478375222, "grad_norm": 120.25297546386719, "learning_rate": 2.642785432653926e-06, "loss": 11.4847, "step": 345320 }, { "epoch": 0.6975884484702061, "grad_norm": 360.92596435546875, "learning_rate": 2.6424775984186024e-06, "loss": 16.1386, "step": 345330 }, { "epoch": 0.6976086491028899, "grad_norm": 216.2003173828125, "learning_rate": 2.6421697756735577e-06, "loss": 23.4902, "step": 345340 }, { "epoch": 0.6976288497355737, "grad_norm": 308.5124206542969, "learning_rate": 2.641861964420289e-06, "loss": 11.1765, "step": 345350 }, { "epoch": 0.6976490503682575, "grad_norm": 144.05426025390625, "learning_rate": 2.641554164660301e-06, "loss": 16.4104, "step": 345360 }, { "epoch": 0.6976692510009413, "grad_norm": 105.9632797241211, "learning_rate": 2.6412463763950925e-06, "loss": 11.1716, "step": 345370 }, { "epoch": 0.6976894516336252, "grad_norm": 51.65191650390625, "learning_rate": 2.6409385996261606e-06, "loss": 14.5467, "step": 345380 }, { "epoch": 0.697709652266309, "grad_norm": 272.7182922363281, "learning_rate": 2.640630834355008e-06, "loss": 15.4342, "step": 345390 }, { "epoch": 0.6977298528989928, "grad_norm": 339.2647705078125, "learning_rate": 2.640323080583137e-06, "loss": 22.2667, "step": 345400 }, { "epoch": 0.6977500535316766, "grad_norm": 308.88128662109375, "learning_rate": 2.640015338312044e-06, "loss": 14.5979, "step": 345410 }, { "epoch": 0.6977702541643604, "grad_norm": 318.2347412109375, "learning_rate": 2.6397076075432294e-06, "loss": 12.6226, "step": 345420 }, { "epoch": 0.6977904547970443, "grad_norm": 367.200439453125, "learning_rate": 2.6393998882781945e-06, "loss": 16.9682, "step": 345430 }, { "epoch": 0.6978106554297281, "grad_norm": 528.543701171875, "learning_rate": 2.6390921805184387e-06, "loss": 29.5176, "step": 345440 }, { "epoch": 0.6978308560624119, "grad_norm": 112.80250549316406, "learning_rate": 2.63878448426546e-06, "loss": 13.877, "step": 345450 }, { "epoch": 0.6978510566950957, "grad_norm": 21.603837966918945, "learning_rate": 2.6384767995207584e-06, "loss": 18.3603, "step": 345460 }, { "epoch": 0.6978712573277795, "grad_norm": 181.7675018310547, "learning_rate": 2.6381691262858385e-06, "loss": 10.1129, "step": 345470 }, { "epoch": 0.6978914579604634, "grad_norm": 143.9803924560547, "learning_rate": 2.6378614645621916e-06, "loss": 26.3889, "step": 345480 }, { "epoch": 0.6979116585931472, "grad_norm": 140.01226806640625, "learning_rate": 2.6375538143513225e-06, "loss": 9.031, "step": 345490 }, { "epoch": 0.697931859225831, "grad_norm": 190.9214630126953, "learning_rate": 2.637246175654731e-06, "loss": 9.3296, "step": 345500 }, { "epoch": 0.6979520598585147, "grad_norm": 140.49229431152344, "learning_rate": 2.6369385484739143e-06, "loss": 24.2084, "step": 345510 }, { "epoch": 0.6979722604911985, "grad_norm": 266.3796691894531, "learning_rate": 2.636630932810371e-06, "loss": 18.8799, "step": 345520 }, { "epoch": 0.6979924611238824, "grad_norm": 416.7222900390625, "learning_rate": 2.6363233286656044e-06, "loss": 16.8718, "step": 345530 }, { "epoch": 0.6980126617565662, "grad_norm": 411.9473571777344, "learning_rate": 2.636015736041111e-06, "loss": 19.8759, "step": 345540 }, { "epoch": 0.69803286238925, "grad_norm": 252.95343017578125, "learning_rate": 2.6357081549383877e-06, "loss": 14.2049, "step": 345550 }, { "epoch": 0.6980530630219338, "grad_norm": 518.8953247070312, "learning_rate": 2.635400585358937e-06, "loss": 21.4068, "step": 345560 }, { "epoch": 0.6980732636546176, "grad_norm": 343.766357421875, "learning_rate": 2.6350930273042587e-06, "loss": 16.7277, "step": 345570 }, { "epoch": 0.6980934642873015, "grad_norm": 406.9803771972656, "learning_rate": 2.63478548077585e-06, "loss": 15.6907, "step": 345580 }, { "epoch": 0.6981136649199853, "grad_norm": 306.5556945800781, "learning_rate": 2.634477945775208e-06, "loss": 9.6094, "step": 345590 }, { "epoch": 0.6981338655526691, "grad_norm": 279.78424072265625, "learning_rate": 2.634170422303835e-06, "loss": 14.6132, "step": 345600 }, { "epoch": 0.6981540661853529, "grad_norm": 230.0062255859375, "learning_rate": 2.633862910363229e-06, "loss": 22.0503, "step": 345610 }, { "epoch": 0.6981742668180367, "grad_norm": 184.11976623535156, "learning_rate": 2.6335554099548865e-06, "loss": 10.1526, "step": 345620 }, { "epoch": 0.6981944674507206, "grad_norm": 309.5509948730469, "learning_rate": 2.63324792108031e-06, "loss": 27.751, "step": 345630 }, { "epoch": 0.6982146680834044, "grad_norm": 0.0, "learning_rate": 2.6329404437409934e-06, "loss": 16.5663, "step": 345640 }, { "epoch": 0.6982348687160882, "grad_norm": 6.145326614379883, "learning_rate": 2.6326329779384397e-06, "loss": 12.3875, "step": 345650 }, { "epoch": 0.698255069348772, "grad_norm": 182.503173828125, "learning_rate": 2.6323255236741465e-06, "loss": 13.6935, "step": 345660 }, { "epoch": 0.6982752699814558, "grad_norm": 285.671630859375, "learning_rate": 2.63201808094961e-06, "loss": 26.0428, "step": 345670 }, { "epoch": 0.6982954706141397, "grad_norm": 265.443359375, "learning_rate": 2.6317106497663316e-06, "loss": 6.9872, "step": 345680 }, { "epoch": 0.6983156712468235, "grad_norm": 113.62970733642578, "learning_rate": 2.6314032301258072e-06, "loss": 27.4334, "step": 345690 }, { "epoch": 0.6983358718795073, "grad_norm": 252.06092834472656, "learning_rate": 2.6310958220295356e-06, "loss": 15.0635, "step": 345700 }, { "epoch": 0.6983560725121911, "grad_norm": 211.58103942871094, "learning_rate": 2.630788425479015e-06, "loss": 17.2726, "step": 345710 }, { "epoch": 0.6983762731448749, "grad_norm": 460.66827392578125, "learning_rate": 2.6304810404757465e-06, "loss": 23.3982, "step": 345720 }, { "epoch": 0.6983964737775588, "grad_norm": 441.8160095214844, "learning_rate": 2.6301736670212263e-06, "loss": 20.0738, "step": 345730 }, { "epoch": 0.6984166744102426, "grad_norm": 77.04154205322266, "learning_rate": 2.62986630511695e-06, "loss": 23.5868, "step": 345740 }, { "epoch": 0.6984368750429264, "grad_norm": 572.130615234375, "learning_rate": 2.6295589547644195e-06, "loss": 31.2209, "step": 345750 }, { "epoch": 0.6984570756756101, "grad_norm": 73.70989990234375, "learning_rate": 2.6292516159651317e-06, "loss": 31.9652, "step": 345760 }, { "epoch": 0.6984772763082939, "grad_norm": 284.5567626953125, "learning_rate": 2.628944288720582e-06, "loss": 21.235, "step": 345770 }, { "epoch": 0.6984974769409777, "grad_norm": 535.0555419921875, "learning_rate": 2.6286369730322693e-06, "loss": 22.4997, "step": 345780 }, { "epoch": 0.6985176775736616, "grad_norm": 209.2477569580078, "learning_rate": 2.6283296689016953e-06, "loss": 30.3591, "step": 345790 }, { "epoch": 0.6985378782063454, "grad_norm": 424.6797790527344, "learning_rate": 2.6280223763303546e-06, "loss": 13.2859, "step": 345800 }, { "epoch": 0.6985580788390292, "grad_norm": 214.94456481933594, "learning_rate": 2.6277150953197427e-06, "loss": 11.0492, "step": 345810 }, { "epoch": 0.698578279471713, "grad_norm": 184.83187866210938, "learning_rate": 2.6274078258713626e-06, "loss": 12.1451, "step": 345820 }, { "epoch": 0.6985984801043968, "grad_norm": 459.0548095703125, "learning_rate": 2.627100567986709e-06, "loss": 26.5543, "step": 345830 }, { "epoch": 0.6986186807370807, "grad_norm": 379.66143798828125, "learning_rate": 2.626793321667277e-06, "loss": 35.0773, "step": 345840 }, { "epoch": 0.6986388813697645, "grad_norm": 0.0, "learning_rate": 2.626486086914566e-06, "loss": 18.732, "step": 345850 }, { "epoch": 0.6986590820024483, "grad_norm": 557.0204467773438, "learning_rate": 2.626178863730077e-06, "loss": 28.3154, "step": 345860 }, { "epoch": 0.6986792826351321, "grad_norm": 167.0358123779297, "learning_rate": 2.6258716521153034e-06, "loss": 11.2493, "step": 345870 }, { "epoch": 0.698699483267816, "grad_norm": 177.48049926757812, "learning_rate": 2.6255644520717417e-06, "loss": 18.7716, "step": 345880 }, { "epoch": 0.6987196839004998, "grad_norm": 304.3970642089844, "learning_rate": 2.6252572636008934e-06, "loss": 27.9613, "step": 345890 }, { "epoch": 0.6987398845331836, "grad_norm": 522.2423706054688, "learning_rate": 2.6249500867042523e-06, "loss": 14.7751, "step": 345900 }, { "epoch": 0.6987600851658674, "grad_norm": 493.5438537597656, "learning_rate": 2.6246429213833146e-06, "loss": 15.1731, "step": 345910 }, { "epoch": 0.6987802857985512, "grad_norm": 139.34776306152344, "learning_rate": 2.624335767639582e-06, "loss": 12.5572, "step": 345920 }, { "epoch": 0.698800486431235, "grad_norm": 11.340527534484863, "learning_rate": 2.624028625474546e-06, "loss": 17.0386, "step": 345930 }, { "epoch": 0.6988206870639189, "grad_norm": 256.91748046875, "learning_rate": 2.6237214948897084e-06, "loss": 11.481, "step": 345940 }, { "epoch": 0.6988408876966027, "grad_norm": 89.09701538085938, "learning_rate": 2.623414375886564e-06, "loss": 17.1179, "step": 345950 }, { "epoch": 0.6988610883292865, "grad_norm": 512.7328491210938, "learning_rate": 2.623107268466608e-06, "loss": 26.0922, "step": 345960 }, { "epoch": 0.6988812889619703, "grad_norm": 447.1874694824219, "learning_rate": 2.6228001726313406e-06, "loss": 14.7329, "step": 345970 }, { "epoch": 0.6989014895946541, "grad_norm": 310.8492736816406, "learning_rate": 2.622493088382257e-06, "loss": 36.2294, "step": 345980 }, { "epoch": 0.698921690227338, "grad_norm": 205.91094970703125, "learning_rate": 2.6221860157208516e-06, "loss": 30.7016, "step": 345990 }, { "epoch": 0.6989418908600218, "grad_norm": 198.66073608398438, "learning_rate": 2.6218789546486235e-06, "loss": 14.1518, "step": 346000 }, { "epoch": 0.6989620914927056, "grad_norm": 0.0, "learning_rate": 2.6215719051670705e-06, "loss": 8.559, "step": 346010 }, { "epoch": 0.6989822921253893, "grad_norm": 393.9818115234375, "learning_rate": 2.6212648672776874e-06, "loss": 13.2495, "step": 346020 }, { "epoch": 0.6990024927580731, "grad_norm": 81.71190643310547, "learning_rate": 2.620957840981969e-06, "loss": 13.8032, "step": 346030 }, { "epoch": 0.699022693390757, "grad_norm": 475.8874206542969, "learning_rate": 2.6206508262814164e-06, "loss": 14.9215, "step": 346040 }, { "epoch": 0.6990428940234408, "grad_norm": 67.189453125, "learning_rate": 2.6203438231775224e-06, "loss": 10.968, "step": 346050 }, { "epoch": 0.6990630946561246, "grad_norm": 124.3048095703125, "learning_rate": 2.6200368316717816e-06, "loss": 16.5743, "step": 346060 }, { "epoch": 0.6990832952888084, "grad_norm": 228.88531494140625, "learning_rate": 2.6197298517656933e-06, "loss": 13.1439, "step": 346070 }, { "epoch": 0.6991034959214922, "grad_norm": 312.7608947753906, "learning_rate": 2.6194228834607567e-06, "loss": 41.2349, "step": 346080 }, { "epoch": 0.699123696554176, "grad_norm": 231.67665100097656, "learning_rate": 2.6191159267584604e-06, "loss": 17.8209, "step": 346090 }, { "epoch": 0.6991438971868599, "grad_norm": 163.4546661376953, "learning_rate": 2.618808981660304e-06, "loss": 23.6889, "step": 346100 }, { "epoch": 0.6991640978195437, "grad_norm": 170.64263916015625, "learning_rate": 2.618502048167786e-06, "loss": 12.5805, "step": 346110 }, { "epoch": 0.6991842984522275, "grad_norm": 203.02117919921875, "learning_rate": 2.6181951262824e-06, "loss": 21.51, "step": 346120 }, { "epoch": 0.6992044990849113, "grad_norm": 153.3561553955078, "learning_rate": 2.617888216005641e-06, "loss": 22.4079, "step": 346130 }, { "epoch": 0.6992246997175952, "grad_norm": 318.71331787109375, "learning_rate": 2.6175813173390063e-06, "loss": 31.6518, "step": 346140 }, { "epoch": 0.699244900350279, "grad_norm": 168.7410430908203, "learning_rate": 2.6172744302839925e-06, "loss": 21.737, "step": 346150 }, { "epoch": 0.6992651009829628, "grad_norm": 271.9602355957031, "learning_rate": 2.616967554842092e-06, "loss": 19.6237, "step": 346160 }, { "epoch": 0.6992853016156466, "grad_norm": 184.9193878173828, "learning_rate": 2.6166606910148024e-06, "loss": 13.2054, "step": 346170 }, { "epoch": 0.6993055022483304, "grad_norm": 99.36495971679688, "learning_rate": 2.6163538388036213e-06, "loss": 22.4093, "step": 346180 }, { "epoch": 0.6993257028810143, "grad_norm": 243.88763427734375, "learning_rate": 2.6160469982100426e-06, "loss": 11.5003, "step": 346190 }, { "epoch": 0.6993459035136981, "grad_norm": 173.6141815185547, "learning_rate": 2.61574016923556e-06, "loss": 13.4351, "step": 346200 }, { "epoch": 0.6993661041463819, "grad_norm": 1368.3143310546875, "learning_rate": 2.6154333518816727e-06, "loss": 31.9259, "step": 346210 }, { "epoch": 0.6993863047790657, "grad_norm": 199.296630859375, "learning_rate": 2.6151265461498737e-06, "loss": 11.7958, "step": 346220 }, { "epoch": 0.6994065054117495, "grad_norm": 216.81198120117188, "learning_rate": 2.6148197520416567e-06, "loss": 30.9193, "step": 346230 }, { "epoch": 0.6994267060444334, "grad_norm": 286.1857604980469, "learning_rate": 2.6145129695585213e-06, "loss": 11.4904, "step": 346240 }, { "epoch": 0.6994469066771172, "grad_norm": 294.7663879394531, "learning_rate": 2.614206198701958e-06, "loss": 15.3433, "step": 346250 }, { "epoch": 0.699467107309801, "grad_norm": 45.54020309448242, "learning_rate": 2.6138994394734663e-06, "loss": 7.8238, "step": 346260 }, { "epoch": 0.6994873079424847, "grad_norm": 400.48370361328125, "learning_rate": 2.613592691874539e-06, "loss": 15.1277, "step": 346270 }, { "epoch": 0.6995075085751685, "grad_norm": 62.3965950012207, "learning_rate": 2.6132859559066704e-06, "loss": 10.4325, "step": 346280 }, { "epoch": 0.6995277092078523, "grad_norm": 291.0249938964844, "learning_rate": 2.6129792315713576e-06, "loss": 12.7414, "step": 346290 }, { "epoch": 0.6995479098405362, "grad_norm": 911.003662109375, "learning_rate": 2.612672518870093e-06, "loss": 37.6752, "step": 346300 }, { "epoch": 0.69956811047322, "grad_norm": 513.9570922851562, "learning_rate": 2.6123658178043753e-06, "loss": 15.7684, "step": 346310 }, { "epoch": 0.6995883111059038, "grad_norm": 135.79627990722656, "learning_rate": 2.6120591283756946e-06, "loss": 29.7545, "step": 346320 }, { "epoch": 0.6996085117385876, "grad_norm": 615.8648681640625, "learning_rate": 2.6117524505855507e-06, "loss": 25.3289, "step": 346330 }, { "epoch": 0.6996287123712714, "grad_norm": 203.61375427246094, "learning_rate": 2.611445784435435e-06, "loss": 13.8239, "step": 346340 }, { "epoch": 0.6996489130039553, "grad_norm": 411.6136779785156, "learning_rate": 2.6111391299268406e-06, "loss": 12.9615, "step": 346350 }, { "epoch": 0.6996691136366391, "grad_norm": 89.21186065673828, "learning_rate": 2.6108324870612674e-06, "loss": 13.316, "step": 346360 }, { "epoch": 0.6996893142693229, "grad_norm": 162.12142944335938, "learning_rate": 2.610525855840206e-06, "loss": 11.1959, "step": 346370 }, { "epoch": 0.6997095149020067, "grad_norm": 207.82821655273438, "learning_rate": 2.61021923626515e-06, "loss": 32.6829, "step": 346380 }, { "epoch": 0.6997297155346905, "grad_norm": 311.7283020019531, "learning_rate": 2.609912628337596e-06, "loss": 17.2984, "step": 346390 }, { "epoch": 0.6997499161673744, "grad_norm": 15.112031936645508, "learning_rate": 2.6096060320590393e-06, "loss": 3.1982, "step": 346400 }, { "epoch": 0.6997701168000582, "grad_norm": 179.36631774902344, "learning_rate": 2.609299447430973e-06, "loss": 15.4749, "step": 346410 }, { "epoch": 0.699790317432742, "grad_norm": 265.3296813964844, "learning_rate": 2.60899287445489e-06, "loss": 16.2185, "step": 346420 }, { "epoch": 0.6998105180654258, "grad_norm": 30.505084991455078, "learning_rate": 2.608686313132287e-06, "loss": 15.5216, "step": 346430 }, { "epoch": 0.6998307186981096, "grad_norm": 84.05213928222656, "learning_rate": 2.6083797634646567e-06, "loss": 13.3066, "step": 346440 }, { "epoch": 0.6998509193307935, "grad_norm": 228.66018676757812, "learning_rate": 2.608073225453492e-06, "loss": 16.3103, "step": 346450 }, { "epoch": 0.6998711199634773, "grad_norm": 38.11866760253906, "learning_rate": 2.607766699100288e-06, "loss": 21.8206, "step": 346460 }, { "epoch": 0.6998913205961611, "grad_norm": 178.9840850830078, "learning_rate": 2.6074601844065407e-06, "loss": 12.8891, "step": 346470 }, { "epoch": 0.6999115212288449, "grad_norm": 151.69993591308594, "learning_rate": 2.607153681373743e-06, "loss": 22.8289, "step": 346480 }, { "epoch": 0.6999317218615287, "grad_norm": 239.28439331054688, "learning_rate": 2.6068471900033852e-06, "loss": 24.5404, "step": 346490 }, { "epoch": 0.6999519224942126, "grad_norm": 110.54222106933594, "learning_rate": 2.6065407102969664e-06, "loss": 17.8867, "step": 346500 }, { "epoch": 0.6999721231268964, "grad_norm": 213.84764099121094, "learning_rate": 2.6062342422559776e-06, "loss": 12.9955, "step": 346510 }, { "epoch": 0.6999923237595802, "grad_norm": 240.2434539794922, "learning_rate": 2.605927785881911e-06, "loss": 17.3904, "step": 346520 }, { "epoch": 0.7000125243922639, "grad_norm": 410.007080078125, "learning_rate": 2.6056213411762645e-06, "loss": 18.0369, "step": 346530 }, { "epoch": 0.7000327250249477, "grad_norm": 300.08575439453125, "learning_rate": 2.6053149081405267e-06, "loss": 13.9884, "step": 346540 }, { "epoch": 0.7000529256576316, "grad_norm": 343.0250244140625, "learning_rate": 2.6050084867761953e-06, "loss": 18.6676, "step": 346550 }, { "epoch": 0.7000731262903154, "grad_norm": 642.0438842773438, "learning_rate": 2.6047020770847618e-06, "loss": 33.2815, "step": 346560 }, { "epoch": 0.7000933269229992, "grad_norm": 179.66151428222656, "learning_rate": 2.6043956790677195e-06, "loss": 20.354, "step": 346570 }, { "epoch": 0.700113527555683, "grad_norm": 366.6961364746094, "learning_rate": 2.6040892927265627e-06, "loss": 19.6986, "step": 346580 }, { "epoch": 0.7001337281883668, "grad_norm": 108.36640167236328, "learning_rate": 2.603782918062784e-06, "loss": 17.3592, "step": 346590 }, { "epoch": 0.7001539288210507, "grad_norm": 107.12744140625, "learning_rate": 2.6034765550778753e-06, "loss": 15.9785, "step": 346600 }, { "epoch": 0.7001741294537345, "grad_norm": 178.5842742919922, "learning_rate": 2.603170203773331e-06, "loss": 18.7062, "step": 346610 }, { "epoch": 0.7001943300864183, "grad_norm": 410.6225891113281, "learning_rate": 2.6028638641506464e-06, "loss": 12.0936, "step": 346620 }, { "epoch": 0.7002145307191021, "grad_norm": 188.04368591308594, "learning_rate": 2.602557536211313e-06, "loss": 26.4735, "step": 346630 }, { "epoch": 0.7002347313517859, "grad_norm": 315.7301940917969, "learning_rate": 2.6022512199568205e-06, "loss": 18.1058, "step": 346640 }, { "epoch": 0.7002549319844698, "grad_norm": 64.2870864868164, "learning_rate": 2.601944915388668e-06, "loss": 22.1386, "step": 346650 }, { "epoch": 0.7002751326171536, "grad_norm": 580.0894165039062, "learning_rate": 2.6016386225083438e-06, "loss": 18.1165, "step": 346660 }, { "epoch": 0.7002953332498374, "grad_norm": 433.09381103515625, "learning_rate": 2.6013323413173408e-06, "loss": 18.8425, "step": 346670 }, { "epoch": 0.7003155338825212, "grad_norm": 211.10374450683594, "learning_rate": 2.601026071817153e-06, "loss": 21.9885, "step": 346680 }, { "epoch": 0.700335734515205, "grad_norm": 120.1490707397461, "learning_rate": 2.600719814009277e-06, "loss": 20.2726, "step": 346690 }, { "epoch": 0.7003559351478889, "grad_norm": 223.96144104003906, "learning_rate": 2.600413567895198e-06, "loss": 13.5989, "step": 346700 }, { "epoch": 0.7003761357805727, "grad_norm": 386.4174499511719, "learning_rate": 2.6001073334764117e-06, "loss": 8.9478, "step": 346710 }, { "epoch": 0.7003963364132565, "grad_norm": 151.5372314453125, "learning_rate": 2.5998011107544134e-06, "loss": 10.5415, "step": 346720 }, { "epoch": 0.7004165370459403, "grad_norm": 260.2442321777344, "learning_rate": 2.5994948997306935e-06, "loss": 33.8876, "step": 346730 }, { "epoch": 0.7004367376786241, "grad_norm": 359.6511535644531, "learning_rate": 2.599188700406743e-06, "loss": 19.618, "step": 346740 }, { "epoch": 0.700456938311308, "grad_norm": 236.60992431640625, "learning_rate": 2.5988825127840547e-06, "loss": 14.4536, "step": 346750 }, { "epoch": 0.7004771389439918, "grad_norm": 336.78302001953125, "learning_rate": 2.5985763368641253e-06, "loss": 15.1754, "step": 346760 }, { "epoch": 0.7004973395766756, "grad_norm": 222.2974853515625, "learning_rate": 2.5982701726484405e-06, "loss": 29.3119, "step": 346770 }, { "epoch": 0.7005175402093593, "grad_norm": 229.99041748046875, "learning_rate": 2.5979640201384953e-06, "loss": 24.4565, "step": 346780 }, { "epoch": 0.7005377408420431, "grad_norm": 308.305419921875, "learning_rate": 2.597657879335784e-06, "loss": 25.3269, "step": 346790 }, { "epoch": 0.7005579414747269, "grad_norm": 187.6962890625, "learning_rate": 2.5973517502417966e-06, "loss": 13.9503, "step": 346800 }, { "epoch": 0.7005781421074108, "grad_norm": 117.80341339111328, "learning_rate": 2.597045632858024e-06, "loss": 9.5614, "step": 346810 }, { "epoch": 0.7005983427400946, "grad_norm": 395.6351623535156, "learning_rate": 2.5967395271859614e-06, "loss": 24.4477, "step": 346820 }, { "epoch": 0.7006185433727784, "grad_norm": 74.99068450927734, "learning_rate": 2.596433433227099e-06, "loss": 7.4345, "step": 346830 }, { "epoch": 0.7006387440054622, "grad_norm": 241.41452026367188, "learning_rate": 2.596127350982926e-06, "loss": 5.2495, "step": 346840 }, { "epoch": 0.700658944638146, "grad_norm": 398.7728576660156, "learning_rate": 2.5958212804549387e-06, "loss": 17.5931, "step": 346850 }, { "epoch": 0.7006791452708299, "grad_norm": 186.95957946777344, "learning_rate": 2.5955152216446255e-06, "loss": 16.0516, "step": 346860 }, { "epoch": 0.7006993459035137, "grad_norm": 149.8652801513672, "learning_rate": 2.595209174553481e-06, "loss": 12.2977, "step": 346870 }, { "epoch": 0.7007195465361975, "grad_norm": 68.26073455810547, "learning_rate": 2.594903139182996e-06, "loss": 16.3132, "step": 346880 }, { "epoch": 0.7007397471688813, "grad_norm": 307.28228759765625, "learning_rate": 2.594597115534658e-06, "loss": 18.0545, "step": 346890 }, { "epoch": 0.7007599478015651, "grad_norm": 899.593994140625, "learning_rate": 2.5942911036099657e-06, "loss": 25.3155, "step": 346900 }, { "epoch": 0.700780148434249, "grad_norm": 283.0326232910156, "learning_rate": 2.5939851034104035e-06, "loss": 9.075, "step": 346910 }, { "epoch": 0.7008003490669328, "grad_norm": 383.098388671875, "learning_rate": 2.5936791149374686e-06, "loss": 13.7009, "step": 346920 }, { "epoch": 0.7008205496996166, "grad_norm": 201.9897918701172, "learning_rate": 2.5933731381926473e-06, "loss": 21.0716, "step": 346930 }, { "epoch": 0.7008407503323004, "grad_norm": 182.5355224609375, "learning_rate": 2.593067173177436e-06, "loss": 44.2455, "step": 346940 }, { "epoch": 0.7008609509649842, "grad_norm": 43.81498336791992, "learning_rate": 2.5927612198933237e-06, "loss": 21.2114, "step": 346950 }, { "epoch": 0.7008811515976681, "grad_norm": 239.87637329101562, "learning_rate": 2.592455278341799e-06, "loss": 20.4829, "step": 346960 }, { "epoch": 0.7009013522303519, "grad_norm": 42.89900588989258, "learning_rate": 2.5921493485243566e-06, "loss": 7.6063, "step": 346970 }, { "epoch": 0.7009215528630357, "grad_norm": 167.0325927734375, "learning_rate": 2.5918434304424867e-06, "loss": 17.6791, "step": 346980 }, { "epoch": 0.7009417534957195, "grad_norm": 404.3670349121094, "learning_rate": 2.591537524097678e-06, "loss": 18.269, "step": 346990 }, { "epoch": 0.7009619541284033, "grad_norm": 153.3993682861328, "learning_rate": 2.5912316294914232e-06, "loss": 11.0305, "step": 347000 }, { "epoch": 0.7009821547610872, "grad_norm": 344.1334533691406, "learning_rate": 2.590925746625217e-06, "loss": 16.6333, "step": 347010 }, { "epoch": 0.701002355393771, "grad_norm": 264.6736755371094, "learning_rate": 2.590619875500543e-06, "loss": 11.8515, "step": 347020 }, { "epoch": 0.7010225560264548, "grad_norm": 424.8674011230469, "learning_rate": 2.590314016118895e-06, "loss": 16.5479, "step": 347030 }, { "epoch": 0.7010427566591385, "grad_norm": 91.76130676269531, "learning_rate": 2.5900081684817667e-06, "loss": 20.8188, "step": 347040 }, { "epoch": 0.7010629572918223, "grad_norm": 29.77527618408203, "learning_rate": 2.5897023325906458e-06, "loss": 21.2911, "step": 347050 }, { "epoch": 0.7010831579245062, "grad_norm": 266.32928466796875, "learning_rate": 2.589396508447022e-06, "loss": 15.9362, "step": 347060 }, { "epoch": 0.70110335855719, "grad_norm": 366.26776123046875, "learning_rate": 2.5890906960523865e-06, "loss": 12.9929, "step": 347070 }, { "epoch": 0.7011235591898738, "grad_norm": 197.54962158203125, "learning_rate": 2.588784895408235e-06, "loss": 16.9128, "step": 347080 }, { "epoch": 0.7011437598225576, "grad_norm": 299.3553771972656, "learning_rate": 2.58847910651605e-06, "loss": 11.6385, "step": 347090 }, { "epoch": 0.7011639604552414, "grad_norm": 1461.1033935546875, "learning_rate": 2.588173329377324e-06, "loss": 31.4373, "step": 347100 }, { "epoch": 0.7011841610879253, "grad_norm": 624.1131591796875, "learning_rate": 2.587867563993552e-06, "loss": 12.5539, "step": 347110 }, { "epoch": 0.7012043617206091, "grad_norm": 70.44783020019531, "learning_rate": 2.5875618103662204e-06, "loss": 11.4005, "step": 347120 }, { "epoch": 0.7012245623532929, "grad_norm": 30.635406494140625, "learning_rate": 2.5872560684968175e-06, "loss": 12.2721, "step": 347130 }, { "epoch": 0.7012447629859767, "grad_norm": 165.531982421875, "learning_rate": 2.5869503383868387e-06, "loss": 42.9217, "step": 347140 }, { "epoch": 0.7012649636186605, "grad_norm": 242.2230682373047, "learning_rate": 2.5866446200377688e-06, "loss": 14.3591, "step": 347150 }, { "epoch": 0.7012851642513444, "grad_norm": 55.6727180480957, "learning_rate": 2.5863389134511024e-06, "loss": 9.5087, "step": 347160 }, { "epoch": 0.7013053648840282, "grad_norm": 0.0, "learning_rate": 2.5860332186283277e-06, "loss": 9.6281, "step": 347170 }, { "epoch": 0.701325565516712, "grad_norm": 206.20286560058594, "learning_rate": 2.5857275355709317e-06, "loss": 10.2114, "step": 347180 }, { "epoch": 0.7013457661493958, "grad_norm": 97.25345611572266, "learning_rate": 2.585421864280409e-06, "loss": 20.1403, "step": 347190 }, { "epoch": 0.7013659667820796, "grad_norm": 2362.9794921875, "learning_rate": 2.5851162047582477e-06, "loss": 24.9064, "step": 347200 }, { "epoch": 0.7013861674147635, "grad_norm": 233.4389190673828, "learning_rate": 2.5848105570059346e-06, "loss": 16.4369, "step": 347210 }, { "epoch": 0.7014063680474473, "grad_norm": 187.66217041015625, "learning_rate": 2.584504921024963e-06, "loss": 17.3993, "step": 347220 }, { "epoch": 0.7014265686801311, "grad_norm": 260.5898742675781, "learning_rate": 2.5841992968168224e-06, "loss": 10.7409, "step": 347230 }, { "epoch": 0.7014467693128149, "grad_norm": 475.7524108886719, "learning_rate": 2.5838936843830015e-06, "loss": 13.3148, "step": 347240 }, { "epoch": 0.7014669699454987, "grad_norm": 236.5043487548828, "learning_rate": 2.5835880837249884e-06, "loss": 23.1501, "step": 347250 }, { "epoch": 0.7014871705781826, "grad_norm": 336.2888488769531, "learning_rate": 2.5832824948442747e-06, "loss": 14.7097, "step": 347260 }, { "epoch": 0.7015073712108664, "grad_norm": 265.3834228515625, "learning_rate": 2.5829769177423504e-06, "loss": 26.8732, "step": 347270 }, { "epoch": 0.7015275718435502, "grad_norm": 217.67015075683594, "learning_rate": 2.5826713524207e-06, "loss": 7.8048, "step": 347280 }, { "epoch": 0.701547772476234, "grad_norm": 150.7046661376953, "learning_rate": 2.5823657988808176e-06, "loss": 12.092, "step": 347290 }, { "epoch": 0.7015679731089177, "grad_norm": 451.7633972167969, "learning_rate": 2.582060257124195e-06, "loss": 20.0696, "step": 347300 }, { "epoch": 0.7015881737416015, "grad_norm": 416.1869201660156, "learning_rate": 2.5817547271523124e-06, "loss": 12.2106, "step": 347310 }, { "epoch": 0.7016083743742854, "grad_norm": 216.8610382080078, "learning_rate": 2.5814492089666642e-06, "loss": 17.1354, "step": 347320 }, { "epoch": 0.7016285750069692, "grad_norm": 363.6202392578125, "learning_rate": 2.581143702568742e-06, "loss": 27.8853, "step": 347330 }, { "epoch": 0.701648775639653, "grad_norm": 234.42169189453125, "learning_rate": 2.5808382079600315e-06, "loss": 13.7998, "step": 347340 }, { "epoch": 0.7016689762723368, "grad_norm": 384.5220642089844, "learning_rate": 2.5805327251420205e-06, "loss": 11.8911, "step": 347350 }, { "epoch": 0.7016891769050206, "grad_norm": 0.0, "learning_rate": 2.580227254116199e-06, "loss": 14.7541, "step": 347360 }, { "epoch": 0.7017093775377045, "grad_norm": 114.2410888671875, "learning_rate": 2.5799217948840603e-06, "loss": 21.1221, "step": 347370 }, { "epoch": 0.7017295781703883, "grad_norm": 370.33416748046875, "learning_rate": 2.579616347447086e-06, "loss": 23.2368, "step": 347380 }, { "epoch": 0.7017497788030721, "grad_norm": 316.9123840332031, "learning_rate": 2.579310911806768e-06, "loss": 20.4362, "step": 347390 }, { "epoch": 0.7017699794357559, "grad_norm": 254.54913330078125, "learning_rate": 2.5790054879645964e-06, "loss": 12.6677, "step": 347400 }, { "epoch": 0.7017901800684397, "grad_norm": 325.73223876953125, "learning_rate": 2.5787000759220592e-06, "loss": 13.0118, "step": 347410 }, { "epoch": 0.7018103807011236, "grad_norm": 368.7377624511719, "learning_rate": 2.578394675680641e-06, "loss": 20.1089, "step": 347420 }, { "epoch": 0.7018305813338074, "grad_norm": 229.97528076171875, "learning_rate": 2.578089287241836e-06, "loss": 17.208, "step": 347430 }, { "epoch": 0.7018507819664912, "grad_norm": 310.5882873535156, "learning_rate": 2.5777839106071308e-06, "loss": 17.0378, "step": 347440 }, { "epoch": 0.701870982599175, "grad_norm": 106.28009796142578, "learning_rate": 2.5774785457780107e-06, "loss": 13.4259, "step": 347450 }, { "epoch": 0.7018911832318588, "grad_norm": 125.17448425292969, "learning_rate": 2.577173192755968e-06, "loss": 13.3793, "step": 347460 }, { "epoch": 0.7019113838645427, "grad_norm": 402.678466796875, "learning_rate": 2.576867851542487e-06, "loss": 18.375, "step": 347470 }, { "epoch": 0.7019315844972265, "grad_norm": 305.856689453125, "learning_rate": 2.576562522139061e-06, "loss": 16.159, "step": 347480 }, { "epoch": 0.7019517851299103, "grad_norm": 81.94393157958984, "learning_rate": 2.5762572045471744e-06, "loss": 18.4199, "step": 347490 }, { "epoch": 0.7019719857625941, "grad_norm": 254.60360717773438, "learning_rate": 2.5759518987683154e-06, "loss": 18.9999, "step": 347500 }, { "epoch": 0.7019921863952779, "grad_norm": 360.7272033691406, "learning_rate": 2.575646604803974e-06, "loss": 38.4894, "step": 347510 }, { "epoch": 0.7020123870279618, "grad_norm": 623.376220703125, "learning_rate": 2.5753413226556356e-06, "loss": 19.2507, "step": 347520 }, { "epoch": 0.7020325876606456, "grad_norm": 425.13116455078125, "learning_rate": 2.575036052324791e-06, "loss": 15.2803, "step": 347530 }, { "epoch": 0.7020527882933294, "grad_norm": 411.73846435546875, "learning_rate": 2.5747307938129245e-06, "loss": 16.1834, "step": 347540 }, { "epoch": 0.7020729889260131, "grad_norm": 513.9067993164062, "learning_rate": 2.5744255471215284e-06, "loss": 20.065, "step": 347550 }, { "epoch": 0.7020931895586969, "grad_norm": 332.93658447265625, "learning_rate": 2.5741203122520876e-06, "loss": 17.0064, "step": 347560 }, { "epoch": 0.7021133901913807, "grad_norm": 214.276123046875, "learning_rate": 2.573815089206089e-06, "loss": 28.5199, "step": 347570 }, { "epoch": 0.7021335908240646, "grad_norm": 216.5589599609375, "learning_rate": 2.573509877985022e-06, "loss": 18.0489, "step": 347580 }, { "epoch": 0.7021537914567484, "grad_norm": 197.97312927246094, "learning_rate": 2.5732046785903744e-06, "loss": 15.1222, "step": 347590 }, { "epoch": 0.7021739920894322, "grad_norm": 0.0, "learning_rate": 2.5728994910236304e-06, "loss": 15.9878, "step": 347600 }, { "epoch": 0.702194192722116, "grad_norm": 144.37887573242188, "learning_rate": 2.572594315286281e-06, "loss": 23.5872, "step": 347610 }, { "epoch": 0.7022143933547998, "grad_norm": 216.16163635253906, "learning_rate": 2.5722891513798156e-06, "loss": 17.0219, "step": 347620 }, { "epoch": 0.7022345939874837, "grad_norm": 193.36175537109375, "learning_rate": 2.5719839993057143e-06, "loss": 23.2335, "step": 347630 }, { "epoch": 0.7022547946201675, "grad_norm": 123.34534454345703, "learning_rate": 2.571678859065469e-06, "loss": 34.7214, "step": 347640 }, { "epoch": 0.7022749952528513, "grad_norm": 367.65093994140625, "learning_rate": 2.571373730660568e-06, "loss": 22.8132, "step": 347650 }, { "epoch": 0.7022951958855351, "grad_norm": 384.6547546386719, "learning_rate": 2.571068614092497e-06, "loss": 15.2494, "step": 347660 }, { "epoch": 0.702315396518219, "grad_norm": 293.819580078125, "learning_rate": 2.5707635093627415e-06, "loss": 33.5734, "step": 347670 }, { "epoch": 0.7023355971509028, "grad_norm": 193.62088012695312, "learning_rate": 2.5704584164727898e-06, "loss": 26.7251, "step": 347680 }, { "epoch": 0.7023557977835866, "grad_norm": 12.735861778259277, "learning_rate": 2.5701533354241325e-06, "loss": 14.2314, "step": 347690 }, { "epoch": 0.7023759984162704, "grad_norm": 137.93109130859375, "learning_rate": 2.5698482662182494e-06, "loss": 17.8916, "step": 347700 }, { "epoch": 0.7023961990489542, "grad_norm": 7.778996467590332, "learning_rate": 2.5695432088566313e-06, "loss": 9.7263, "step": 347710 }, { "epoch": 0.702416399681638, "grad_norm": 229.75839233398438, "learning_rate": 2.5692381633407672e-06, "loss": 17.3874, "step": 347720 }, { "epoch": 0.7024366003143219, "grad_norm": 298.7098388671875, "learning_rate": 2.568933129672141e-06, "loss": 19.8187, "step": 347730 }, { "epoch": 0.7024568009470057, "grad_norm": 318.75079345703125, "learning_rate": 2.568628107852238e-06, "loss": 21.584, "step": 347740 }, { "epoch": 0.7024770015796895, "grad_norm": 484.76519775390625, "learning_rate": 2.568323097882548e-06, "loss": 33.7262, "step": 347750 }, { "epoch": 0.7024972022123733, "grad_norm": 293.33746337890625, "learning_rate": 2.5680180997645577e-06, "loss": 10.2868, "step": 347760 }, { "epoch": 0.7025174028450571, "grad_norm": 394.8092956542969, "learning_rate": 2.567713113499749e-06, "loss": 26.1994, "step": 347770 }, { "epoch": 0.702537603477741, "grad_norm": 223.03599548339844, "learning_rate": 2.5674081390896146e-06, "loss": 14.0151, "step": 347780 }, { "epoch": 0.7025578041104248, "grad_norm": 51.88089370727539, "learning_rate": 2.567103176535635e-06, "loss": 10.4355, "step": 347790 }, { "epoch": 0.7025780047431086, "grad_norm": 249.95745849609375, "learning_rate": 2.5667982258393016e-06, "loss": 25.1647, "step": 347800 }, { "epoch": 0.7025982053757923, "grad_norm": 131.9540252685547, "learning_rate": 2.5664932870020966e-06, "loss": 13.8622, "step": 347810 }, { "epoch": 0.7026184060084761, "grad_norm": 266.95556640625, "learning_rate": 2.5661883600255107e-06, "loss": 10.2427, "step": 347820 }, { "epoch": 0.70263860664116, "grad_norm": 330.9670104980469, "learning_rate": 2.565883444911025e-06, "loss": 21.0824, "step": 347830 }, { "epoch": 0.7026588072738438, "grad_norm": 325.3670654296875, "learning_rate": 2.5655785416601297e-06, "loss": 19.2591, "step": 347840 }, { "epoch": 0.7026790079065276, "grad_norm": 642.6085815429688, "learning_rate": 2.5652736502743105e-06, "loss": 22.1796, "step": 347850 }, { "epoch": 0.7026992085392114, "grad_norm": 361.10455322265625, "learning_rate": 2.56496877075505e-06, "loss": 15.0355, "step": 347860 }, { "epoch": 0.7027194091718952, "grad_norm": 1.8259851932525635, "learning_rate": 2.564663903103838e-06, "loss": 11.7165, "step": 347870 }, { "epoch": 0.7027396098045791, "grad_norm": 1.4690319299697876, "learning_rate": 2.564359047322158e-06, "loss": 7.2652, "step": 347880 }, { "epoch": 0.7027598104372629, "grad_norm": 311.776123046875, "learning_rate": 2.5640542034114955e-06, "loss": 14.195, "step": 347890 }, { "epoch": 0.7027800110699467, "grad_norm": 222.57626342773438, "learning_rate": 2.5637493713733376e-06, "loss": 18.8325, "step": 347900 }, { "epoch": 0.7028002117026305, "grad_norm": 104.86906433105469, "learning_rate": 2.5634445512091733e-06, "loss": 11.2025, "step": 347910 }, { "epoch": 0.7028204123353143, "grad_norm": 62.60617446899414, "learning_rate": 2.563139742920481e-06, "loss": 14.1305, "step": 347920 }, { "epoch": 0.7028406129679982, "grad_norm": 145.1639862060547, "learning_rate": 2.5628349465087498e-06, "loss": 17.9204, "step": 347930 }, { "epoch": 0.702860813600682, "grad_norm": 685.5185546875, "learning_rate": 2.5625301619754678e-06, "loss": 37.5169, "step": 347940 }, { "epoch": 0.7028810142333658, "grad_norm": 309.2491455078125, "learning_rate": 2.5622253893221176e-06, "loss": 19.8371, "step": 347950 }, { "epoch": 0.7029012148660496, "grad_norm": 156.4233856201172, "learning_rate": 2.561920628550184e-06, "loss": 17.2896, "step": 347960 }, { "epoch": 0.7029214154987334, "grad_norm": 77.4073715209961, "learning_rate": 2.5616158796611527e-06, "loss": 15.2854, "step": 347970 }, { "epoch": 0.7029416161314173, "grad_norm": 361.83917236328125, "learning_rate": 2.5613111426565144e-06, "loss": 20.8415, "step": 347980 }, { "epoch": 0.7029618167641011, "grad_norm": 260.6099548339844, "learning_rate": 2.5610064175377456e-06, "loss": 18.1782, "step": 347990 }, { "epoch": 0.7029820173967849, "grad_norm": 271.6692810058594, "learning_rate": 2.560701704306336e-06, "loss": 18.1363, "step": 348000 }, { "epoch": 0.7030022180294687, "grad_norm": 194.40420532226562, "learning_rate": 2.5603970029637727e-06, "loss": 12.7686, "step": 348010 }, { "epoch": 0.7030224186621525, "grad_norm": 308.1908874511719, "learning_rate": 2.5600923135115374e-06, "loss": 33.9205, "step": 348020 }, { "epoch": 0.7030426192948364, "grad_norm": 304.5624694824219, "learning_rate": 2.5597876359511153e-06, "loss": 21.7723, "step": 348030 }, { "epoch": 0.7030628199275202, "grad_norm": 203.70851135253906, "learning_rate": 2.5594829702839937e-06, "loss": 17.2871, "step": 348040 }, { "epoch": 0.703083020560204, "grad_norm": 126.14859771728516, "learning_rate": 2.5591783165116563e-06, "loss": 12.3823, "step": 348050 }, { "epoch": 0.7031032211928877, "grad_norm": 85.2481689453125, "learning_rate": 2.5588736746355858e-06, "loss": 20.1589, "step": 348060 }, { "epoch": 0.7031234218255715, "grad_norm": 94.36994934082031, "learning_rate": 2.5585690446572708e-06, "loss": 24.6662, "step": 348070 }, { "epoch": 0.7031436224582553, "grad_norm": 448.2389221191406, "learning_rate": 2.558264426578192e-06, "loss": 19.3974, "step": 348080 }, { "epoch": 0.7031638230909392, "grad_norm": 479.63232421875, "learning_rate": 2.557959820399839e-06, "loss": 16.3215, "step": 348090 }, { "epoch": 0.703184023723623, "grad_norm": 236.3374786376953, "learning_rate": 2.557655226123693e-06, "loss": 12.6287, "step": 348100 }, { "epoch": 0.7032042243563068, "grad_norm": 381.2812805175781, "learning_rate": 2.5573506437512374e-06, "loss": 34.7496, "step": 348110 }, { "epoch": 0.7032244249889906, "grad_norm": 646.8052978515625, "learning_rate": 2.55704607328396e-06, "loss": 21.5273, "step": 348120 }, { "epoch": 0.7032446256216744, "grad_norm": 357.189697265625, "learning_rate": 2.556741514723342e-06, "loss": 24.6834, "step": 348130 }, { "epoch": 0.7032648262543583, "grad_norm": 11.752696990966797, "learning_rate": 2.556436968070872e-06, "loss": 16.7813, "step": 348140 }, { "epoch": 0.7032850268870421, "grad_norm": 423.0076904296875, "learning_rate": 2.55613243332803e-06, "loss": 17.3934, "step": 348150 }, { "epoch": 0.7033052275197259, "grad_norm": 168.2597198486328, "learning_rate": 2.5558279104963037e-06, "loss": 15.0412, "step": 348160 }, { "epoch": 0.7033254281524097, "grad_norm": 392.1938171386719, "learning_rate": 2.5555233995771757e-06, "loss": 17.8499, "step": 348170 }, { "epoch": 0.7033456287850935, "grad_norm": 317.8109130859375, "learning_rate": 2.555218900572128e-06, "loss": 10.4028, "step": 348180 }, { "epoch": 0.7033658294177774, "grad_norm": 8.336645126342773, "learning_rate": 2.5549144134826487e-06, "loss": 13.0217, "step": 348190 }, { "epoch": 0.7033860300504612, "grad_norm": 581.3880615234375, "learning_rate": 2.5546099383102206e-06, "loss": 13.4373, "step": 348200 }, { "epoch": 0.703406230683145, "grad_norm": 413.43511962890625, "learning_rate": 2.5543054750563246e-06, "loss": 11.8371, "step": 348210 }, { "epoch": 0.7034264313158288, "grad_norm": 29.742794036865234, "learning_rate": 2.5540010237224476e-06, "loss": 11.8604, "step": 348220 }, { "epoch": 0.7034466319485126, "grad_norm": 457.5856628417969, "learning_rate": 2.5536965843100764e-06, "loss": 19.7211, "step": 348230 }, { "epoch": 0.7034668325811965, "grad_norm": 438.69024658203125, "learning_rate": 2.5533921568206876e-06, "loss": 12.3323, "step": 348240 }, { "epoch": 0.7034870332138803, "grad_norm": 250.9090576171875, "learning_rate": 2.5530877412557684e-06, "loss": 13.0639, "step": 348250 }, { "epoch": 0.7035072338465641, "grad_norm": 307.6768798828125, "learning_rate": 2.5527833376168055e-06, "loss": 11.0638, "step": 348260 }, { "epoch": 0.7035274344792479, "grad_norm": 271.70074462890625, "learning_rate": 2.552478945905279e-06, "loss": 13.125, "step": 348270 }, { "epoch": 0.7035476351119317, "grad_norm": 381.0058898925781, "learning_rate": 2.5521745661226717e-06, "loss": 15.1313, "step": 348280 }, { "epoch": 0.7035678357446156, "grad_norm": 277.0640869140625, "learning_rate": 2.5518701982704684e-06, "loss": 12.1382, "step": 348290 }, { "epoch": 0.7035880363772994, "grad_norm": 331.2669677734375, "learning_rate": 2.5515658423501573e-06, "loss": 13.3111, "step": 348300 }, { "epoch": 0.7036082370099832, "grad_norm": 63.776275634765625, "learning_rate": 2.551261498363213e-06, "loss": 20.4549, "step": 348310 }, { "epoch": 0.7036284376426669, "grad_norm": 192.5443572998047, "learning_rate": 2.5509571663111233e-06, "loss": 17.8504, "step": 348320 }, { "epoch": 0.7036486382753507, "grad_norm": 296.7326965332031, "learning_rate": 2.5506528461953726e-06, "loss": 5.9315, "step": 348330 }, { "epoch": 0.7036688389080346, "grad_norm": 33.18772888183594, "learning_rate": 2.5503485380174443e-06, "loss": 27.3068, "step": 348340 }, { "epoch": 0.7036890395407184, "grad_norm": 276.4188537597656, "learning_rate": 2.5500442417788176e-06, "loss": 15.9432, "step": 348350 }, { "epoch": 0.7037092401734022, "grad_norm": 233.8216094970703, "learning_rate": 2.549739957480979e-06, "loss": 25.8944, "step": 348360 }, { "epoch": 0.703729440806086, "grad_norm": 94.33599853515625, "learning_rate": 2.549435685125412e-06, "loss": 12.554, "step": 348370 }, { "epoch": 0.7037496414387698, "grad_norm": 24.560529708862305, "learning_rate": 2.5491314247135955e-06, "loss": 19.2767, "step": 348380 }, { "epoch": 0.7037698420714537, "grad_norm": 179.38047790527344, "learning_rate": 2.5488271762470172e-06, "loss": 37.901, "step": 348390 }, { "epoch": 0.7037900427041375, "grad_norm": 252.54075622558594, "learning_rate": 2.5485229397271567e-06, "loss": 19.1394, "step": 348400 }, { "epoch": 0.7038102433368213, "grad_norm": 441.7610778808594, "learning_rate": 2.5482187151554994e-06, "loss": 18.99, "step": 348410 }, { "epoch": 0.7038304439695051, "grad_norm": 235.57687377929688, "learning_rate": 2.547914502533525e-06, "loss": 26.2501, "step": 348420 }, { "epoch": 0.7038506446021889, "grad_norm": 312.2933044433594, "learning_rate": 2.5476103018627195e-06, "loss": 16.7905, "step": 348430 }, { "epoch": 0.7038708452348728, "grad_norm": 190.17523193359375, "learning_rate": 2.547306113144564e-06, "loss": 23.4348, "step": 348440 }, { "epoch": 0.7038910458675566, "grad_norm": 690.16357421875, "learning_rate": 2.54700193638054e-06, "loss": 25.717, "step": 348450 }, { "epoch": 0.7039112465002404, "grad_norm": 69.60706329345703, "learning_rate": 2.546697771572132e-06, "loss": 16.8264, "step": 348460 }, { "epoch": 0.7039314471329242, "grad_norm": 353.5439147949219, "learning_rate": 2.5463936187208198e-06, "loss": 21.916, "step": 348470 }, { "epoch": 0.703951647765608, "grad_norm": 228.9514617919922, "learning_rate": 2.54608947782809e-06, "loss": 17.7854, "step": 348480 }, { "epoch": 0.7039718483982919, "grad_norm": 239.3609619140625, "learning_rate": 2.5457853488954214e-06, "loss": 20.1393, "step": 348490 }, { "epoch": 0.7039920490309757, "grad_norm": 289.32232666015625, "learning_rate": 2.545481231924296e-06, "loss": 11.2856, "step": 348500 }, { "epoch": 0.7040122496636595, "grad_norm": 245.21517944335938, "learning_rate": 2.5451771269161996e-06, "loss": 21.6539, "step": 348510 }, { "epoch": 0.7040324502963433, "grad_norm": 363.141357421875, "learning_rate": 2.544873033872611e-06, "loss": 20.9977, "step": 348520 }, { "epoch": 0.7040526509290271, "grad_norm": 265.5771789550781, "learning_rate": 2.5445689527950135e-06, "loss": 23.5311, "step": 348530 }, { "epoch": 0.704072851561711, "grad_norm": 306.8912353515625, "learning_rate": 2.5442648836848877e-06, "loss": 17.6952, "step": 348540 }, { "epoch": 0.7040930521943948, "grad_norm": 110.81255340576172, "learning_rate": 2.5439608265437186e-06, "loss": 13.2758, "step": 348550 }, { "epoch": 0.7041132528270786, "grad_norm": 464.3107604980469, "learning_rate": 2.5436567813729877e-06, "loss": 13.9884, "step": 348560 }, { "epoch": 0.7041334534597624, "grad_norm": 443.259765625, "learning_rate": 2.543352748174173e-06, "loss": 16.8096, "step": 348570 }, { "epoch": 0.7041536540924461, "grad_norm": 398.48187255859375, "learning_rate": 2.54304872694876e-06, "loss": 18.2448, "step": 348580 }, { "epoch": 0.7041738547251299, "grad_norm": 218.93748474121094, "learning_rate": 2.5427447176982323e-06, "loss": 23.884, "step": 348590 }, { "epoch": 0.7041940553578138, "grad_norm": 383.3680419921875, "learning_rate": 2.5424407204240653e-06, "loss": 22.0538, "step": 348600 }, { "epoch": 0.7042142559904976, "grad_norm": 418.7093200683594, "learning_rate": 2.542136735127744e-06, "loss": 11.6536, "step": 348610 }, { "epoch": 0.7042344566231814, "grad_norm": 167.60394287109375, "learning_rate": 2.541832761810753e-06, "loss": 30.6999, "step": 348620 }, { "epoch": 0.7042546572558652, "grad_norm": 367.192138671875, "learning_rate": 2.5415288004745697e-06, "loss": 10.4374, "step": 348630 }, { "epoch": 0.704274857888549, "grad_norm": 334.8513488769531, "learning_rate": 2.541224851120676e-06, "loss": 14.0388, "step": 348640 }, { "epoch": 0.7042950585212329, "grad_norm": 18.695825576782227, "learning_rate": 2.540920913750555e-06, "loss": 12.2868, "step": 348650 }, { "epoch": 0.7043152591539167, "grad_norm": 527.21484375, "learning_rate": 2.5406169883656883e-06, "loss": 25.4894, "step": 348660 }, { "epoch": 0.7043354597866005, "grad_norm": 130.0140380859375, "learning_rate": 2.5403130749675537e-06, "loss": 11.7902, "step": 348670 }, { "epoch": 0.7043556604192843, "grad_norm": 82.9089126586914, "learning_rate": 2.540009173557637e-06, "loss": 15.5937, "step": 348680 }, { "epoch": 0.7043758610519681, "grad_norm": 13.149439811706543, "learning_rate": 2.5397052841374147e-06, "loss": 10.2368, "step": 348690 }, { "epoch": 0.704396061684652, "grad_norm": 717.584228515625, "learning_rate": 2.539401406708373e-06, "loss": 26.4888, "step": 348700 }, { "epoch": 0.7044162623173358, "grad_norm": 671.0147705078125, "learning_rate": 2.5390975412719897e-06, "loss": 16.7704, "step": 348710 }, { "epoch": 0.7044364629500196, "grad_norm": 175.2325897216797, "learning_rate": 2.5387936878297452e-06, "loss": 44.3192, "step": 348720 }, { "epoch": 0.7044566635827034, "grad_norm": 145.66966247558594, "learning_rate": 2.5384898463831237e-06, "loss": 17.3699, "step": 348730 }, { "epoch": 0.7044768642153872, "grad_norm": 326.99774169921875, "learning_rate": 2.538186016933602e-06, "loss": 24.1674, "step": 348740 }, { "epoch": 0.7044970648480711, "grad_norm": 351.0392150878906, "learning_rate": 2.5378821994826654e-06, "loss": 16.1929, "step": 348750 }, { "epoch": 0.7045172654807549, "grad_norm": 677.080078125, "learning_rate": 2.53757839403179e-06, "loss": 23.2367, "step": 348760 }, { "epoch": 0.7045374661134387, "grad_norm": 317.0199890136719, "learning_rate": 2.5372746005824605e-06, "loss": 13.6504, "step": 348770 }, { "epoch": 0.7045576667461225, "grad_norm": 525.6347045898438, "learning_rate": 2.5369708191361565e-06, "loss": 14.1305, "step": 348780 }, { "epoch": 0.7045778673788063, "grad_norm": 75.94295501708984, "learning_rate": 2.5366670496943557e-06, "loss": 28.292, "step": 348790 }, { "epoch": 0.7045980680114902, "grad_norm": 171.15785217285156, "learning_rate": 2.536363292258543e-06, "loss": 12.4337, "step": 348800 }, { "epoch": 0.704618268644174, "grad_norm": 208.4932403564453, "learning_rate": 2.5360595468301966e-06, "loss": 12.6965, "step": 348810 }, { "epoch": 0.7046384692768578, "grad_norm": 16.81102752685547, "learning_rate": 2.5357558134107958e-06, "loss": 13.7253, "step": 348820 }, { "epoch": 0.7046586699095415, "grad_norm": 326.5244140625, "learning_rate": 2.5354520920018215e-06, "loss": 17.5987, "step": 348830 }, { "epoch": 0.7046788705422253, "grad_norm": 318.1314697265625, "learning_rate": 2.5351483826047595e-06, "loss": 15.8454, "step": 348840 }, { "epoch": 0.7046990711749092, "grad_norm": 368.0579833984375, "learning_rate": 2.5348446852210807e-06, "loss": 24.6372, "step": 348850 }, { "epoch": 0.704719271807593, "grad_norm": 25.999401092529297, "learning_rate": 2.5345409998522704e-06, "loss": 10.1987, "step": 348860 }, { "epoch": 0.7047394724402768, "grad_norm": 347.935546875, "learning_rate": 2.53423732649981e-06, "loss": 30.2983, "step": 348870 }, { "epoch": 0.7047596730729606, "grad_norm": 279.5258483886719, "learning_rate": 2.533933665165178e-06, "loss": 13.249, "step": 348880 }, { "epoch": 0.7047798737056444, "grad_norm": 271.9861755371094, "learning_rate": 2.5336300158498518e-06, "loss": 22.856, "step": 348890 }, { "epoch": 0.7048000743383283, "grad_norm": 487.5909118652344, "learning_rate": 2.533326378555314e-06, "loss": 20.3653, "step": 348900 }, { "epoch": 0.7048202749710121, "grad_norm": 147.26593017578125, "learning_rate": 2.5330227532830483e-06, "loss": 14.679, "step": 348910 }, { "epoch": 0.7048404756036959, "grad_norm": 266.7790832519531, "learning_rate": 2.5327191400345262e-06, "loss": 13.9846, "step": 348920 }, { "epoch": 0.7048606762363797, "grad_norm": 533.738037109375, "learning_rate": 2.5324155388112326e-06, "loss": 34.004, "step": 348930 }, { "epoch": 0.7048808768690635, "grad_norm": 649.9088134765625, "learning_rate": 2.5321119496146472e-06, "loss": 22.6454, "step": 348940 }, { "epoch": 0.7049010775017474, "grad_norm": 1158.45947265625, "learning_rate": 2.5318083724462495e-06, "loss": 27.357, "step": 348950 }, { "epoch": 0.7049212781344312, "grad_norm": 297.6795349121094, "learning_rate": 2.5315048073075166e-06, "loss": 15.1442, "step": 348960 }, { "epoch": 0.704941478767115, "grad_norm": 280.863037109375, "learning_rate": 2.531201254199932e-06, "loss": 17.5355, "step": 348970 }, { "epoch": 0.7049616793997988, "grad_norm": 285.72564697265625, "learning_rate": 2.5308977131249724e-06, "loss": 18.6862, "step": 348980 }, { "epoch": 0.7049818800324826, "grad_norm": 611.4855346679688, "learning_rate": 2.5305941840841163e-06, "loss": 22.074, "step": 348990 }, { "epoch": 0.7050020806651665, "grad_norm": 166.6228790283203, "learning_rate": 2.5302906670788463e-06, "loss": 16.0506, "step": 349000 }, { "epoch": 0.7050222812978503, "grad_norm": 207.1941680908203, "learning_rate": 2.5299871621106387e-06, "loss": 17.6623, "step": 349010 }, { "epoch": 0.7050424819305341, "grad_norm": 112.44235229492188, "learning_rate": 2.529683669180976e-06, "loss": 14.0018, "step": 349020 }, { "epoch": 0.7050626825632179, "grad_norm": 175.87213134765625, "learning_rate": 2.5293801882913327e-06, "loss": 12.3447, "step": 349030 }, { "epoch": 0.7050828831959017, "grad_norm": 377.23162841796875, "learning_rate": 2.529076719443193e-06, "loss": 10.8825, "step": 349040 }, { "epoch": 0.7051030838285856, "grad_norm": 205.1239776611328, "learning_rate": 2.528773262638034e-06, "loss": 8.6319, "step": 349050 }, { "epoch": 0.7051232844612694, "grad_norm": 430.6372375488281, "learning_rate": 2.5284698178773327e-06, "loss": 20.1784, "step": 349060 }, { "epoch": 0.7051434850939532, "grad_norm": 275.3390197753906, "learning_rate": 2.5281663851625703e-06, "loss": 21.284, "step": 349070 }, { "epoch": 0.705163685726637, "grad_norm": 374.3188171386719, "learning_rate": 2.5278629644952245e-06, "loss": 18.0172, "step": 349080 }, { "epoch": 0.7051838863593207, "grad_norm": 266.5592041015625, "learning_rate": 2.527559555876776e-06, "loss": 16.569, "step": 349090 }, { "epoch": 0.7052040869920045, "grad_norm": 186.26742553710938, "learning_rate": 2.527256159308703e-06, "loss": 16.9592, "step": 349100 }, { "epoch": 0.7052242876246884, "grad_norm": 351.2922058105469, "learning_rate": 2.5269527747924816e-06, "loss": 39.1314, "step": 349110 }, { "epoch": 0.7052444882573722, "grad_norm": 125.51981353759766, "learning_rate": 2.526649402329594e-06, "loss": 19.1425, "step": 349120 }, { "epoch": 0.705264688890056, "grad_norm": 46.11008071899414, "learning_rate": 2.526346041921518e-06, "loss": 19.0613, "step": 349130 }, { "epoch": 0.7052848895227398, "grad_norm": 298.0798034667969, "learning_rate": 2.5260426935697286e-06, "loss": 23.2404, "step": 349140 }, { "epoch": 0.7053050901554236, "grad_norm": 357.057861328125, "learning_rate": 2.5257393572757073e-06, "loss": 19.0543, "step": 349150 }, { "epoch": 0.7053252907881075, "grad_norm": 197.87393188476562, "learning_rate": 2.5254360330409343e-06, "loss": 14.7655, "step": 349160 }, { "epoch": 0.7053454914207913, "grad_norm": 138.6476593017578, "learning_rate": 2.5251327208668856e-06, "loss": 16.2939, "step": 349170 }, { "epoch": 0.7053656920534751, "grad_norm": 657.3358154296875, "learning_rate": 2.5248294207550383e-06, "loss": 17.5319, "step": 349180 }, { "epoch": 0.7053858926861589, "grad_norm": 509.403564453125, "learning_rate": 2.5245261327068736e-06, "loss": 17.3076, "step": 349190 }, { "epoch": 0.7054060933188427, "grad_norm": 273.9102478027344, "learning_rate": 2.524222856723869e-06, "loss": 11.2148, "step": 349200 }, { "epoch": 0.7054262939515266, "grad_norm": 603.2604370117188, "learning_rate": 2.5239195928075e-06, "loss": 17.3059, "step": 349210 }, { "epoch": 0.7054464945842104, "grad_norm": 146.45236206054688, "learning_rate": 2.5236163409592464e-06, "loss": 21.4402, "step": 349220 }, { "epoch": 0.7054666952168942, "grad_norm": 219.3915557861328, "learning_rate": 2.523313101180588e-06, "loss": 19.5302, "step": 349230 }, { "epoch": 0.705486895849578, "grad_norm": 58.90530014038086, "learning_rate": 2.5230098734730014e-06, "loss": 6.9375, "step": 349240 }, { "epoch": 0.7055070964822618, "grad_norm": 413.8292541503906, "learning_rate": 2.5227066578379624e-06, "loss": 18.3508, "step": 349250 }, { "epoch": 0.7055272971149457, "grad_norm": 314.6954345703125, "learning_rate": 2.522403454276952e-06, "loss": 14.3125, "step": 349260 }, { "epoch": 0.7055474977476295, "grad_norm": 174.3856658935547, "learning_rate": 2.522100262791447e-06, "loss": 15.7679, "step": 349270 }, { "epoch": 0.7055676983803133, "grad_norm": 265.6531066894531, "learning_rate": 2.521797083382923e-06, "loss": 23.4776, "step": 349280 }, { "epoch": 0.7055878990129971, "grad_norm": 341.89642333984375, "learning_rate": 2.521493916052862e-06, "loss": 32.6622, "step": 349290 }, { "epoch": 0.7056080996456809, "grad_norm": 312.4505615234375, "learning_rate": 2.5211907608027366e-06, "loss": 22.1106, "step": 349300 }, { "epoch": 0.7056283002783648, "grad_norm": 10.99889087677002, "learning_rate": 2.5208876176340285e-06, "loss": 18.6878, "step": 349310 }, { "epoch": 0.7056485009110486, "grad_norm": 108.87661743164062, "learning_rate": 2.5205844865482115e-06, "loss": 11.2447, "step": 349320 }, { "epoch": 0.7056687015437324, "grad_norm": 225.00823974609375, "learning_rate": 2.5202813675467675e-06, "loss": 47.1723, "step": 349330 }, { "epoch": 0.7056889021764161, "grad_norm": 490.76947021484375, "learning_rate": 2.5199782606311708e-06, "loss": 28.6436, "step": 349340 }, { "epoch": 0.7057091028090999, "grad_norm": 160.7596435546875, "learning_rate": 2.5196751658028972e-06, "loss": 17.2862, "step": 349350 }, { "epoch": 0.7057293034417838, "grad_norm": 73.6541976928711, "learning_rate": 2.5193720830634284e-06, "loss": 18.9523, "step": 349360 }, { "epoch": 0.7057495040744676, "grad_norm": 39.35419464111328, "learning_rate": 2.5190690124142368e-06, "loss": 8.5508, "step": 349370 }, { "epoch": 0.7057697047071514, "grad_norm": 209.56333923339844, "learning_rate": 2.5187659538568043e-06, "loss": 12.7346, "step": 349380 }, { "epoch": 0.7057899053398352, "grad_norm": 189.8730926513672, "learning_rate": 2.518462907392606e-06, "loss": 16.1758, "step": 349390 }, { "epoch": 0.705810105972519, "grad_norm": 532.4152221679688, "learning_rate": 2.518159873023116e-06, "loss": 11.6738, "step": 349400 }, { "epoch": 0.7058303066052029, "grad_norm": 18.742412567138672, "learning_rate": 2.5178568507498156e-06, "loss": 22.5311, "step": 349410 }, { "epoch": 0.7058505072378867, "grad_norm": 476.9742431640625, "learning_rate": 2.51755384057418e-06, "loss": 23.8515, "step": 349420 }, { "epoch": 0.7058707078705705, "grad_norm": 170.4061737060547, "learning_rate": 2.5172508424976837e-06, "loss": 30.3736, "step": 349430 }, { "epoch": 0.7058909085032543, "grad_norm": 251.6022186279297, "learning_rate": 2.5169478565218065e-06, "loss": 12.5531, "step": 349440 }, { "epoch": 0.7059111091359381, "grad_norm": 281.6571044921875, "learning_rate": 2.5166448826480274e-06, "loss": 9.7164, "step": 349450 }, { "epoch": 0.705931309768622, "grad_norm": 274.7341613769531, "learning_rate": 2.516341920877816e-06, "loss": 17.2851, "step": 349460 }, { "epoch": 0.7059515104013058, "grad_norm": 431.5691833496094, "learning_rate": 2.5160389712126537e-06, "loss": 28.6025, "step": 349470 }, { "epoch": 0.7059717110339896, "grad_norm": 260.8550720214844, "learning_rate": 2.5157360336540175e-06, "loss": 17.3071, "step": 349480 }, { "epoch": 0.7059919116666734, "grad_norm": 467.966796875, "learning_rate": 2.5154331082033823e-06, "loss": 12.2936, "step": 349490 }, { "epoch": 0.7060121122993572, "grad_norm": 155.87904357910156, "learning_rate": 2.5151301948622235e-06, "loss": 15.5602, "step": 349500 }, { "epoch": 0.706032312932041, "grad_norm": 445.4872131347656, "learning_rate": 2.5148272936320186e-06, "loss": 15.7395, "step": 349510 }, { "epoch": 0.7060525135647249, "grad_norm": 120.43058776855469, "learning_rate": 2.514524404514248e-06, "loss": 7.1809, "step": 349520 }, { "epoch": 0.7060727141974087, "grad_norm": 5053.6513671875, "learning_rate": 2.51422152751038e-06, "loss": 18.8541, "step": 349530 }, { "epoch": 0.7060929148300925, "grad_norm": 59.72167205810547, "learning_rate": 2.513918662621894e-06, "loss": 24.5071, "step": 349540 }, { "epoch": 0.7061131154627763, "grad_norm": 705.0901489257812, "learning_rate": 2.51361580985027e-06, "loss": 18.196, "step": 349550 }, { "epoch": 0.7061333160954602, "grad_norm": 450.8055725097656, "learning_rate": 2.5133129691969806e-06, "loss": 13.7009, "step": 349560 }, { "epoch": 0.706153516728144, "grad_norm": 667.2728271484375, "learning_rate": 2.5130101406635e-06, "loss": 22.9818, "step": 349570 }, { "epoch": 0.7061737173608278, "grad_norm": 80.49085235595703, "learning_rate": 2.5127073242513083e-06, "loss": 36.1174, "step": 349580 }, { "epoch": 0.7061939179935116, "grad_norm": 155.21636962890625, "learning_rate": 2.5124045199618795e-06, "loss": 19.2898, "step": 349590 }, { "epoch": 0.7062141186261953, "grad_norm": 26.85996437072754, "learning_rate": 2.5121017277966875e-06, "loss": 17.7773, "step": 349600 }, { "epoch": 0.7062343192588791, "grad_norm": 351.96783447265625, "learning_rate": 2.5117989477572126e-06, "loss": 13.4482, "step": 349610 }, { "epoch": 0.706254519891563, "grad_norm": 245.1033935546875, "learning_rate": 2.5114961798449245e-06, "loss": 12.531, "step": 349620 }, { "epoch": 0.7062747205242468, "grad_norm": 321.2890625, "learning_rate": 2.511193424061305e-06, "loss": 14.2802, "step": 349630 }, { "epoch": 0.7062949211569306, "grad_norm": 513.55078125, "learning_rate": 2.510890680407825e-06, "loss": 15.2627, "step": 349640 }, { "epoch": 0.7063151217896144, "grad_norm": 184.0408172607422, "learning_rate": 2.5105879488859635e-06, "loss": 11.4659, "step": 349650 }, { "epoch": 0.7063353224222982, "grad_norm": 222.261474609375, "learning_rate": 2.510285229497195e-06, "loss": 14.198, "step": 349660 }, { "epoch": 0.7063555230549821, "grad_norm": 305.46942138671875, "learning_rate": 2.5099825222429918e-06, "loss": 11.9718, "step": 349670 }, { "epoch": 0.7063757236876659, "grad_norm": 307.5295715332031, "learning_rate": 2.5096798271248337e-06, "loss": 13.1814, "step": 349680 }, { "epoch": 0.7063959243203497, "grad_norm": 120.30352783203125, "learning_rate": 2.5093771441441923e-06, "loss": 9.0062, "step": 349690 }, { "epoch": 0.7064161249530335, "grad_norm": 165.01031494140625, "learning_rate": 2.509074473302546e-06, "loss": 7.8089, "step": 349700 }, { "epoch": 0.7064363255857173, "grad_norm": 446.050537109375, "learning_rate": 2.5087718146013697e-06, "loss": 19.3071, "step": 349710 }, { "epoch": 0.7064565262184012, "grad_norm": 233.69146728515625, "learning_rate": 2.5084691680421346e-06, "loss": 26.0207, "step": 349720 }, { "epoch": 0.706476726851085, "grad_norm": 156.0323486328125, "learning_rate": 2.508166533626321e-06, "loss": 9.8543, "step": 349730 }, { "epoch": 0.7064969274837688, "grad_norm": 145.5287628173828, "learning_rate": 2.5078639113554017e-06, "loss": 17.0833, "step": 349740 }, { "epoch": 0.7065171281164526, "grad_norm": 363.6911315917969, "learning_rate": 2.507561301230849e-06, "loss": 33.3618, "step": 349750 }, { "epoch": 0.7065373287491364, "grad_norm": 22.19895362854004, "learning_rate": 2.5072587032541407e-06, "loss": 25.3804, "step": 349760 }, { "epoch": 0.7065575293818203, "grad_norm": 580.9697875976562, "learning_rate": 2.5069561174267524e-06, "loss": 22.8463, "step": 349770 }, { "epoch": 0.7065777300145041, "grad_norm": 295.9018249511719, "learning_rate": 2.5066535437501584e-06, "loss": 14.8996, "step": 349780 }, { "epoch": 0.7065979306471879, "grad_norm": 155.03756713867188, "learning_rate": 2.50635098222583e-06, "loss": 16.4644, "step": 349790 }, { "epoch": 0.7066181312798717, "grad_norm": 381.5072937011719, "learning_rate": 2.506048432855247e-06, "loss": 25.8488, "step": 349800 }, { "epoch": 0.7066383319125555, "grad_norm": 177.57391357421875, "learning_rate": 2.5057458956398806e-06, "loss": 17.2102, "step": 349810 }, { "epoch": 0.7066585325452394, "grad_norm": 0.0, "learning_rate": 2.5054433705812054e-06, "loss": 11.8921, "step": 349820 }, { "epoch": 0.7066787331779232, "grad_norm": 198.71238708496094, "learning_rate": 2.505140857680696e-06, "loss": 27.3686, "step": 349830 }, { "epoch": 0.706698933810607, "grad_norm": 250.73497009277344, "learning_rate": 2.504838356939829e-06, "loss": 14.0587, "step": 349840 }, { "epoch": 0.7067191344432907, "grad_norm": 656.5536499023438, "learning_rate": 2.504535868360078e-06, "loss": 24.7947, "step": 349850 }, { "epoch": 0.7067393350759745, "grad_norm": 191.30416870117188, "learning_rate": 2.504233391942914e-06, "loss": 17.4928, "step": 349860 }, { "epoch": 0.7067595357086583, "grad_norm": 170.77069091796875, "learning_rate": 2.503930927689816e-06, "loss": 17.2683, "step": 349870 }, { "epoch": 0.7067797363413422, "grad_norm": 334.8910217285156, "learning_rate": 2.503628475602256e-06, "loss": 18.8883, "step": 349880 }, { "epoch": 0.706799936974026, "grad_norm": 260.25311279296875, "learning_rate": 2.5033260356817056e-06, "loss": 19.4103, "step": 349890 }, { "epoch": 0.7068201376067098, "grad_norm": 270.9356689453125, "learning_rate": 2.5030236079296443e-06, "loss": 13.6365, "step": 349900 }, { "epoch": 0.7068403382393936, "grad_norm": 573.7694091796875, "learning_rate": 2.50272119234754e-06, "loss": 20.3914, "step": 349910 }, { "epoch": 0.7068605388720774, "grad_norm": 133.8550262451172, "learning_rate": 2.502418788936872e-06, "loss": 13.1934, "step": 349920 }, { "epoch": 0.7068807395047613, "grad_norm": 90.46905517578125, "learning_rate": 2.5021163976991103e-06, "loss": 9.7101, "step": 349930 }, { "epoch": 0.7069009401374451, "grad_norm": 439.9556884765625, "learning_rate": 2.501814018635732e-06, "loss": 25.0004, "step": 349940 }, { "epoch": 0.7069211407701289, "grad_norm": 365.26690673828125, "learning_rate": 2.5015116517482097e-06, "loss": 19.7331, "step": 349950 }, { "epoch": 0.7069413414028127, "grad_norm": 528.6624755859375, "learning_rate": 2.501209297038014e-06, "loss": 21.2626, "step": 349960 }, { "epoch": 0.7069615420354965, "grad_norm": 136.1773681640625, "learning_rate": 2.500906954506623e-06, "loss": 9.9798, "step": 349970 }, { "epoch": 0.7069817426681804, "grad_norm": 205.59205627441406, "learning_rate": 2.5006046241555073e-06, "loss": 8.0601, "step": 349980 }, { "epoch": 0.7070019433008642, "grad_norm": 386.6914978027344, "learning_rate": 2.500302305986142e-06, "loss": 26.4879, "step": 349990 }, { "epoch": 0.707022143933548, "grad_norm": 285.6610412597656, "learning_rate": 2.5000000000000015e-06, "loss": 26.2824, "step": 350000 }, { "epoch": 0.7070423445662318, "grad_norm": 118.21389770507812, "learning_rate": 2.499697706198555e-06, "loss": 16.7968, "step": 350010 }, { "epoch": 0.7070625451989156, "grad_norm": 319.5116882324219, "learning_rate": 2.499395424583281e-06, "loss": 16.3366, "step": 350020 }, { "epoch": 0.7070827458315995, "grad_norm": 146.75320434570312, "learning_rate": 2.49909315515565e-06, "loss": 13.8728, "step": 350030 }, { "epoch": 0.7071029464642833, "grad_norm": 246.2157440185547, "learning_rate": 2.498790897917134e-06, "loss": 37.2531, "step": 350040 }, { "epoch": 0.7071231470969671, "grad_norm": 211.33740234375, "learning_rate": 2.4984886528692076e-06, "loss": 18.3498, "step": 350050 }, { "epoch": 0.7071433477296509, "grad_norm": 409.8561096191406, "learning_rate": 2.4981864200133483e-06, "loss": 15.0073, "step": 350060 }, { "epoch": 0.7071635483623347, "grad_norm": 369.26611328125, "learning_rate": 2.4978841993510213e-06, "loss": 18.8529, "step": 350070 }, { "epoch": 0.7071837489950186, "grad_norm": 139.4558563232422, "learning_rate": 2.4975819908837024e-06, "loss": 13.9905, "step": 350080 }, { "epoch": 0.7072039496277024, "grad_norm": 171.58377075195312, "learning_rate": 2.497279794612868e-06, "loss": 17.0017, "step": 350090 }, { "epoch": 0.7072241502603862, "grad_norm": 401.6951904296875, "learning_rate": 2.496977610539988e-06, "loss": 28.4515, "step": 350100 }, { "epoch": 0.7072443508930699, "grad_norm": 725.1420288085938, "learning_rate": 2.496675438666534e-06, "loss": 17.1079, "step": 350110 }, { "epoch": 0.7072645515257537, "grad_norm": 190.858642578125, "learning_rate": 2.49637327899398e-06, "loss": 43.4194, "step": 350120 }, { "epoch": 0.7072847521584376, "grad_norm": 745.1611938476562, "learning_rate": 2.4960711315238027e-06, "loss": 20.261, "step": 350130 }, { "epoch": 0.7073049527911214, "grad_norm": 563.7824096679688, "learning_rate": 2.495768996257467e-06, "loss": 26.0083, "step": 350140 }, { "epoch": 0.7073251534238052, "grad_norm": 345.357421875, "learning_rate": 2.49546687319645e-06, "loss": 17.7123, "step": 350150 }, { "epoch": 0.707345354056489, "grad_norm": 201.03018188476562, "learning_rate": 2.4951647623422256e-06, "loss": 27.8287, "step": 350160 }, { "epoch": 0.7073655546891728, "grad_norm": 383.0355224609375, "learning_rate": 2.4948626636962643e-06, "loss": 18.5252, "step": 350170 }, { "epoch": 0.7073857553218567, "grad_norm": 314.98089599609375, "learning_rate": 2.4945605772600364e-06, "loss": 19.0957, "step": 350180 }, { "epoch": 0.7074059559545405, "grad_norm": 169.2843017578125, "learning_rate": 2.494258503035018e-06, "loss": 14.889, "step": 350190 }, { "epoch": 0.7074261565872243, "grad_norm": 335.96142578125, "learning_rate": 2.49395644102268e-06, "loss": 19.2191, "step": 350200 }, { "epoch": 0.7074463572199081, "grad_norm": 339.69384765625, "learning_rate": 2.493654391224493e-06, "loss": 37.6358, "step": 350210 }, { "epoch": 0.7074665578525919, "grad_norm": 257.7926330566406, "learning_rate": 2.493352353641932e-06, "loss": 18.7103, "step": 350220 }, { "epoch": 0.7074867584852758, "grad_norm": 76.00088500976562, "learning_rate": 2.4930503282764658e-06, "loss": 15.1341, "step": 350230 }, { "epoch": 0.7075069591179596, "grad_norm": 299.51617431640625, "learning_rate": 2.49274831512957e-06, "loss": 13.8161, "step": 350240 }, { "epoch": 0.7075271597506434, "grad_norm": 197.71124267578125, "learning_rate": 2.492446314202713e-06, "loss": 9.4883, "step": 350250 }, { "epoch": 0.7075473603833272, "grad_norm": 252.1544952392578, "learning_rate": 2.4921443254973705e-06, "loss": 10.7743, "step": 350260 }, { "epoch": 0.707567561016011, "grad_norm": 76.52326202392578, "learning_rate": 2.491842349015012e-06, "loss": 13.8154, "step": 350270 }, { "epoch": 0.7075877616486949, "grad_norm": 559.22119140625, "learning_rate": 2.4915403847571083e-06, "loss": 14.136, "step": 350280 }, { "epoch": 0.7076079622813787, "grad_norm": 9.5958890914917, "learning_rate": 2.4912384327251344e-06, "loss": 10.7637, "step": 350290 }, { "epoch": 0.7076281629140625, "grad_norm": 7.723456859588623, "learning_rate": 2.4909364929205575e-06, "loss": 18.8013, "step": 350300 }, { "epoch": 0.7076483635467463, "grad_norm": 338.4454650878906, "learning_rate": 2.4906345653448545e-06, "loss": 20.4869, "step": 350310 }, { "epoch": 0.7076685641794301, "grad_norm": 229.25643920898438, "learning_rate": 2.490332649999494e-06, "loss": 15.195, "step": 350320 }, { "epoch": 0.707688764812114, "grad_norm": 179.3012237548828, "learning_rate": 2.490030746885946e-06, "loss": 23.9183, "step": 350330 }, { "epoch": 0.7077089654447978, "grad_norm": 208.29281616210938, "learning_rate": 2.4897288560056854e-06, "loss": 10.0416, "step": 350340 }, { "epoch": 0.7077291660774816, "grad_norm": 117.67689514160156, "learning_rate": 2.489426977360182e-06, "loss": 18.4149, "step": 350350 }, { "epoch": 0.7077493667101654, "grad_norm": 218.89202880859375, "learning_rate": 2.4891251109509053e-06, "loss": 15.8845, "step": 350360 }, { "epoch": 0.7077695673428491, "grad_norm": 57.19772720336914, "learning_rate": 2.488823256779328e-06, "loss": 7.6531, "step": 350370 }, { "epoch": 0.707789767975533, "grad_norm": 30.725284576416016, "learning_rate": 2.488521414846923e-06, "loss": 13.0164, "step": 350380 }, { "epoch": 0.7078099686082168, "grad_norm": 92.16533660888672, "learning_rate": 2.488219585155161e-06, "loss": 16.7374, "step": 350390 }, { "epoch": 0.7078301692409006, "grad_norm": 600.9649658203125, "learning_rate": 2.48791776770551e-06, "loss": 23.0557, "step": 350400 }, { "epoch": 0.7078503698735844, "grad_norm": 220.83905029296875, "learning_rate": 2.4876159624994443e-06, "loss": 17.5036, "step": 350410 }, { "epoch": 0.7078705705062682, "grad_norm": 295.2484436035156, "learning_rate": 2.4873141695384346e-06, "loss": 19.7002, "step": 350420 }, { "epoch": 0.707890771138952, "grad_norm": 356.18426513671875, "learning_rate": 2.4870123888239485e-06, "loss": 16.3784, "step": 350430 }, { "epoch": 0.7079109717716359, "grad_norm": 165.99801635742188, "learning_rate": 2.486710620357459e-06, "loss": 12.1482, "step": 350440 }, { "epoch": 0.7079311724043197, "grad_norm": 226.3193359375, "learning_rate": 2.4864088641404398e-06, "loss": 14.3498, "step": 350450 }, { "epoch": 0.7079513730370035, "grad_norm": 268.6060485839844, "learning_rate": 2.4861071201743586e-06, "loss": 11.4052, "step": 350460 }, { "epoch": 0.7079715736696873, "grad_norm": 292.2978210449219, "learning_rate": 2.4858053884606843e-06, "loss": 19.6125, "step": 350470 }, { "epoch": 0.7079917743023711, "grad_norm": 143.71823120117188, "learning_rate": 2.4855036690008918e-06, "loss": 6.0524, "step": 350480 }, { "epoch": 0.708011974935055, "grad_norm": 350.1965637207031, "learning_rate": 2.485201961796449e-06, "loss": 18.3651, "step": 350490 }, { "epoch": 0.7080321755677388, "grad_norm": 191.3245391845703, "learning_rate": 2.484900266848825e-06, "loss": 20.3575, "step": 350500 }, { "epoch": 0.7080523762004226, "grad_norm": 118.70997619628906, "learning_rate": 2.484598584159494e-06, "loss": 8.6528, "step": 350510 }, { "epoch": 0.7080725768331064, "grad_norm": 252.58786010742188, "learning_rate": 2.484296913729923e-06, "loss": 11.6546, "step": 350520 }, { "epoch": 0.7080927774657902, "grad_norm": 315.3838806152344, "learning_rate": 2.4839952555615846e-06, "loss": 15.3217, "step": 350530 }, { "epoch": 0.7081129780984741, "grad_norm": 121.49755859375, "learning_rate": 2.483693609655947e-06, "loss": 13.6987, "step": 350540 }, { "epoch": 0.7081331787311579, "grad_norm": 305.43157958984375, "learning_rate": 2.4833919760144838e-06, "loss": 30.1544, "step": 350550 }, { "epoch": 0.7081533793638417, "grad_norm": 175.07762145996094, "learning_rate": 2.483090354638662e-06, "loss": 12.3006, "step": 350560 }, { "epoch": 0.7081735799965255, "grad_norm": 323.2102355957031, "learning_rate": 2.4827887455299516e-06, "loss": 12.8012, "step": 350570 }, { "epoch": 0.7081937806292093, "grad_norm": 234.5757598876953, "learning_rate": 2.4824871486898244e-06, "loss": 24.4312, "step": 350580 }, { "epoch": 0.7082139812618932, "grad_norm": 206.27162170410156, "learning_rate": 2.4821855641197483e-06, "loss": 32.4019, "step": 350590 }, { "epoch": 0.708234181894577, "grad_norm": 265.391845703125, "learning_rate": 2.4818839918211963e-06, "loss": 15.8074, "step": 350600 }, { "epoch": 0.7082543825272608, "grad_norm": 126.33395385742188, "learning_rate": 2.4815824317956363e-06, "loss": 18.0399, "step": 350610 }, { "epoch": 0.7082745831599445, "grad_norm": 2.7335045337677, "learning_rate": 2.4812808840445357e-06, "loss": 17.1516, "step": 350620 }, { "epoch": 0.7082947837926283, "grad_norm": 89.18004608154297, "learning_rate": 2.480979348569369e-06, "loss": 20.2761, "step": 350630 }, { "epoch": 0.7083149844253122, "grad_norm": 335.4591979980469, "learning_rate": 2.480677825371603e-06, "loss": 19.4986, "step": 350640 }, { "epoch": 0.708335185057996, "grad_norm": 91.69691467285156, "learning_rate": 2.480376314452706e-06, "loss": 10.4895, "step": 350650 }, { "epoch": 0.7083553856906798, "grad_norm": 240.77105712890625, "learning_rate": 2.48007481581415e-06, "loss": 29.8018, "step": 350660 }, { "epoch": 0.7083755863233636, "grad_norm": 240.066650390625, "learning_rate": 2.479773329457406e-06, "loss": 22.4243, "step": 350670 }, { "epoch": 0.7083957869560474, "grad_norm": 59.86264419555664, "learning_rate": 2.4794718553839387e-06, "loss": 27.5235, "step": 350680 }, { "epoch": 0.7084159875887313, "grad_norm": 331.40423583984375, "learning_rate": 2.4791703935952193e-06, "loss": 16.6865, "step": 350690 }, { "epoch": 0.7084361882214151, "grad_norm": 448.7181396484375, "learning_rate": 2.4788689440927193e-06, "loss": 10.3491, "step": 350700 }, { "epoch": 0.7084563888540989, "grad_norm": 363.33319091796875, "learning_rate": 2.478567506877907e-06, "loss": 10.7199, "step": 350710 }, { "epoch": 0.7084765894867827, "grad_norm": 497.0529479980469, "learning_rate": 2.478266081952248e-06, "loss": 9.0395, "step": 350720 }, { "epoch": 0.7084967901194665, "grad_norm": 330.35235595703125, "learning_rate": 2.477964669317215e-06, "loss": 16.2397, "step": 350730 }, { "epoch": 0.7085169907521504, "grad_norm": 304.560302734375, "learning_rate": 2.4776632689742803e-06, "loss": 9.8854, "step": 350740 }, { "epoch": 0.7085371913848342, "grad_norm": 341.1455078125, "learning_rate": 2.4773618809249045e-06, "loss": 26.3387, "step": 350750 }, { "epoch": 0.708557392017518, "grad_norm": 348.2223205566406, "learning_rate": 2.477060505170561e-06, "loss": 33.963, "step": 350760 }, { "epoch": 0.7085775926502018, "grad_norm": 64.66560363769531, "learning_rate": 2.4767591417127207e-06, "loss": 19.0017, "step": 350770 }, { "epoch": 0.7085977932828856, "grad_norm": 298.3678283691406, "learning_rate": 2.4764577905528503e-06, "loss": 27.6841, "step": 350780 }, { "epoch": 0.7086179939155695, "grad_norm": 309.9102783203125, "learning_rate": 2.476156451692416e-06, "loss": 28.5848, "step": 350790 }, { "epoch": 0.7086381945482533, "grad_norm": 1253.6309814453125, "learning_rate": 2.4758551251328923e-06, "loss": 20.6656, "step": 350800 }, { "epoch": 0.7086583951809371, "grad_norm": 83.83746337890625, "learning_rate": 2.4755538108757436e-06, "loss": 22.3287, "step": 350810 }, { "epoch": 0.7086785958136209, "grad_norm": 337.9351501464844, "learning_rate": 2.475252508922438e-06, "loss": 13.1038, "step": 350820 }, { "epoch": 0.7086987964463047, "grad_norm": 281.2683410644531, "learning_rate": 2.4749512192744473e-06, "loss": 9.4089, "step": 350830 }, { "epoch": 0.7087189970789886, "grad_norm": 158.22508239746094, "learning_rate": 2.474649941933236e-06, "loss": 16.4156, "step": 350840 }, { "epoch": 0.7087391977116724, "grad_norm": 262.76507568359375, "learning_rate": 2.4743486769002767e-06, "loss": 14.0356, "step": 350850 }, { "epoch": 0.7087593983443562, "grad_norm": 435.8996887207031, "learning_rate": 2.4740474241770333e-06, "loss": 14.359, "step": 350860 }, { "epoch": 0.70877959897704, "grad_norm": 59.79398727416992, "learning_rate": 2.4737461837649784e-06, "loss": 17.0244, "step": 350870 }, { "epoch": 0.7087997996097237, "grad_norm": 235.62188720703125, "learning_rate": 2.4734449556655786e-06, "loss": 14.4613, "step": 350880 }, { "epoch": 0.7088200002424075, "grad_norm": 154.51319885253906, "learning_rate": 2.4731437398802998e-06, "loss": 13.2924, "step": 350890 }, { "epoch": 0.7088402008750914, "grad_norm": 76.861572265625, "learning_rate": 2.4728425364106136e-06, "loss": 23.0162, "step": 350900 }, { "epoch": 0.7088604015077752, "grad_norm": 176.60244750976562, "learning_rate": 2.472541345257984e-06, "loss": 13.0463, "step": 350910 }, { "epoch": 0.708880602140459, "grad_norm": 237.8243408203125, "learning_rate": 2.4722401664238837e-06, "loss": 14.3805, "step": 350920 }, { "epoch": 0.7089008027731428, "grad_norm": 372.865478515625, "learning_rate": 2.4719389999097787e-06, "loss": 11.7265, "step": 350930 }, { "epoch": 0.7089210034058266, "grad_norm": 169.18214416503906, "learning_rate": 2.471637845717134e-06, "loss": 17.0091, "step": 350940 }, { "epoch": 0.7089412040385105, "grad_norm": 146.45516967773438, "learning_rate": 2.471336703847422e-06, "loss": 9.6476, "step": 350950 }, { "epoch": 0.7089614046711943, "grad_norm": 83.6109619140625, "learning_rate": 2.4710355743021077e-06, "loss": 17.1232, "step": 350960 }, { "epoch": 0.7089816053038781, "grad_norm": 446.2963562011719, "learning_rate": 2.4707344570826576e-06, "loss": 23.5262, "step": 350970 }, { "epoch": 0.7090018059365619, "grad_norm": 157.11439514160156, "learning_rate": 2.470433352190541e-06, "loss": 15.9184, "step": 350980 }, { "epoch": 0.7090220065692457, "grad_norm": 1028.93798828125, "learning_rate": 2.470132259627227e-06, "loss": 33.5038, "step": 350990 }, { "epoch": 0.7090422072019296, "grad_norm": 240.01573181152344, "learning_rate": 2.469831179394182e-06, "loss": 19.5688, "step": 351000 }, { "epoch": 0.7090624078346134, "grad_norm": 336.3686828613281, "learning_rate": 2.469530111492871e-06, "loss": 14.1886, "step": 351010 }, { "epoch": 0.7090826084672972, "grad_norm": 385.7926025390625, "learning_rate": 2.4692290559247652e-06, "loss": 13.9835, "step": 351020 }, { "epoch": 0.709102809099981, "grad_norm": 297.9305725097656, "learning_rate": 2.4689280126913302e-06, "loss": 15.2822, "step": 351030 }, { "epoch": 0.7091230097326648, "grad_norm": 160.84756469726562, "learning_rate": 2.4686269817940306e-06, "loss": 11.1708, "step": 351040 }, { "epoch": 0.7091432103653487, "grad_norm": 214.4310760498047, "learning_rate": 2.4683259632343363e-06, "loss": 21.7615, "step": 351050 }, { "epoch": 0.7091634109980325, "grad_norm": 112.11554718017578, "learning_rate": 2.4680249570137166e-06, "loss": 22.6386, "step": 351060 }, { "epoch": 0.7091836116307163, "grad_norm": 119.98857879638672, "learning_rate": 2.467723963133636e-06, "loss": 19.1286, "step": 351070 }, { "epoch": 0.7092038122634001, "grad_norm": 135.04200744628906, "learning_rate": 2.4674229815955596e-06, "loss": 10.0188, "step": 351080 }, { "epoch": 0.7092240128960839, "grad_norm": 398.4815979003906, "learning_rate": 2.467122012400958e-06, "loss": 36.1733, "step": 351090 }, { "epoch": 0.7092442135287678, "grad_norm": 332.0245666503906, "learning_rate": 2.4668210555512974e-06, "loss": 14.6752, "step": 351100 }, { "epoch": 0.7092644141614516, "grad_norm": 124.1589126586914, "learning_rate": 2.466520111048041e-06, "loss": 15.0318, "step": 351110 }, { "epoch": 0.7092846147941354, "grad_norm": 126.54069519042969, "learning_rate": 2.46621917889266e-06, "loss": 14.4044, "step": 351120 }, { "epoch": 0.7093048154268191, "grad_norm": 697.0535278320312, "learning_rate": 2.4659182590866183e-06, "loss": 23.2431, "step": 351130 }, { "epoch": 0.7093250160595029, "grad_norm": 330.3814697265625, "learning_rate": 2.4656173516313852e-06, "loss": 13.7731, "step": 351140 }, { "epoch": 0.7093452166921868, "grad_norm": 601.4826049804688, "learning_rate": 2.465316456528424e-06, "loss": 24.579, "step": 351150 }, { "epoch": 0.7093654173248706, "grad_norm": 242.60794067382812, "learning_rate": 2.465015573779205e-06, "loss": 31.9337, "step": 351160 }, { "epoch": 0.7093856179575544, "grad_norm": 194.93441772460938, "learning_rate": 2.464714703385192e-06, "loss": 10.5911, "step": 351170 }, { "epoch": 0.7094058185902382, "grad_norm": 262.32672119140625, "learning_rate": 2.4644138453478504e-06, "loss": 17.5492, "step": 351180 }, { "epoch": 0.709426019222922, "grad_norm": 118.04582214355469, "learning_rate": 2.464112999668651e-06, "loss": 15.1708, "step": 351190 }, { "epoch": 0.7094462198556059, "grad_norm": 184.2508087158203, "learning_rate": 2.4638121663490546e-06, "loss": 20.1951, "step": 351200 }, { "epoch": 0.7094664204882897, "grad_norm": 275.2691955566406, "learning_rate": 2.463511345390532e-06, "loss": 9.4905, "step": 351210 }, { "epoch": 0.7094866211209735, "grad_norm": 257.0596923828125, "learning_rate": 2.463210536794547e-06, "loss": 14.1999, "step": 351220 }, { "epoch": 0.7095068217536573, "grad_norm": 279.8252868652344, "learning_rate": 2.4629097405625645e-06, "loss": 19.346, "step": 351230 }, { "epoch": 0.7095270223863411, "grad_norm": 328.4770812988281, "learning_rate": 2.4626089566960546e-06, "loss": 14.7055, "step": 351240 }, { "epoch": 0.709547223019025, "grad_norm": 306.1147766113281, "learning_rate": 2.462308185196481e-06, "loss": 13.662, "step": 351250 }, { "epoch": 0.7095674236517088, "grad_norm": 195.86851501464844, "learning_rate": 2.462007426065307e-06, "loss": 18.9942, "step": 351260 }, { "epoch": 0.7095876242843926, "grad_norm": 244.86041259765625, "learning_rate": 2.4617066793040012e-06, "loss": 22.5407, "step": 351270 }, { "epoch": 0.7096078249170764, "grad_norm": 346.0923767089844, "learning_rate": 2.461405944914033e-06, "loss": 18.9949, "step": 351280 }, { "epoch": 0.7096280255497602, "grad_norm": 26.868728637695312, "learning_rate": 2.4611052228968606e-06, "loss": 11.7181, "step": 351290 }, { "epoch": 0.709648226182444, "grad_norm": 295.7042236328125, "learning_rate": 2.4608045132539536e-06, "loss": 20.9081, "step": 351300 }, { "epoch": 0.7096684268151279, "grad_norm": 537.2589721679688, "learning_rate": 2.460503815986779e-06, "loss": 17.2755, "step": 351310 }, { "epoch": 0.7096886274478117, "grad_norm": 319.1598205566406, "learning_rate": 2.4602031310968013e-06, "loss": 13.3103, "step": 351320 }, { "epoch": 0.7097088280804955, "grad_norm": 243.71324157714844, "learning_rate": 2.459902458585483e-06, "loss": 9.0223, "step": 351330 }, { "epoch": 0.7097290287131793, "grad_norm": 60.524600982666016, "learning_rate": 2.459601798454292e-06, "loss": 16.6258, "step": 351340 }, { "epoch": 0.7097492293458632, "grad_norm": 214.17135620117188, "learning_rate": 2.4593011507046976e-06, "loss": 27.9575, "step": 351350 }, { "epoch": 0.709769429978547, "grad_norm": 83.36215209960938, "learning_rate": 2.459000515338158e-06, "loss": 20.3355, "step": 351360 }, { "epoch": 0.7097896306112308, "grad_norm": 326.882568359375, "learning_rate": 2.4586998923561412e-06, "loss": 16.9598, "step": 351370 }, { "epoch": 0.7098098312439146, "grad_norm": 358.0440979003906, "learning_rate": 2.458399281760115e-06, "loss": 14.9201, "step": 351380 }, { "epoch": 0.7098300318765983, "grad_norm": 687.9678955078125, "learning_rate": 2.4580986835515423e-06, "loss": 22.7137, "step": 351390 }, { "epoch": 0.7098502325092821, "grad_norm": 194.2820587158203, "learning_rate": 2.4577980977318866e-06, "loss": 15.4186, "step": 351400 }, { "epoch": 0.709870433141966, "grad_norm": 20.19388771057129, "learning_rate": 2.457497524302616e-06, "loss": 16.6103, "step": 351410 }, { "epoch": 0.7098906337746498, "grad_norm": 268.2904052734375, "learning_rate": 2.457196963265195e-06, "loss": 11.607, "step": 351420 }, { "epoch": 0.7099108344073336, "grad_norm": 509.0209655761719, "learning_rate": 2.456896414621085e-06, "loss": 25.703, "step": 351430 }, { "epoch": 0.7099310350400174, "grad_norm": 68.9505844116211, "learning_rate": 2.4565958783717534e-06, "loss": 18.8714, "step": 351440 }, { "epoch": 0.7099512356727012, "grad_norm": 301.3570251464844, "learning_rate": 2.4562953545186675e-06, "loss": 9.9792, "step": 351450 }, { "epoch": 0.7099714363053851, "grad_norm": 307.7007141113281, "learning_rate": 2.455994843063289e-06, "loss": 21.6206, "step": 351460 }, { "epoch": 0.7099916369380689, "grad_norm": 235.3525390625, "learning_rate": 2.455694344007082e-06, "loss": 32.6422, "step": 351470 }, { "epoch": 0.7100118375707527, "grad_norm": 255.0052032470703, "learning_rate": 2.455393857351513e-06, "loss": 16.5655, "step": 351480 }, { "epoch": 0.7100320382034365, "grad_norm": 192.5288543701172, "learning_rate": 2.455093383098046e-06, "loss": 19.1789, "step": 351490 }, { "epoch": 0.7100522388361203, "grad_norm": 717.632568359375, "learning_rate": 2.4547929212481436e-06, "loss": 32.0171, "step": 351500 }, { "epoch": 0.7100724394688042, "grad_norm": 405.9718017578125, "learning_rate": 2.454492471803274e-06, "loss": 38.777, "step": 351510 }, { "epoch": 0.710092640101488, "grad_norm": 94.05525970458984, "learning_rate": 2.454192034764897e-06, "loss": 10.9168, "step": 351520 }, { "epoch": 0.7101128407341718, "grad_norm": 262.3117980957031, "learning_rate": 2.4538916101344806e-06, "loss": 22.0907, "step": 351530 }, { "epoch": 0.7101330413668556, "grad_norm": 521.1732788085938, "learning_rate": 2.4535911979134884e-06, "loss": 16.8042, "step": 351540 }, { "epoch": 0.7101532419995394, "grad_norm": 154.8276824951172, "learning_rate": 2.4532907981033822e-06, "loss": 10.4961, "step": 351550 }, { "epoch": 0.7101734426322233, "grad_norm": 0.0, "learning_rate": 2.452990410705629e-06, "loss": 15.0326, "step": 351560 }, { "epoch": 0.7101936432649071, "grad_norm": 460.53656005859375, "learning_rate": 2.452690035721692e-06, "loss": 13.94, "step": 351570 }, { "epoch": 0.7102138438975909, "grad_norm": 104.2068099975586, "learning_rate": 2.4523896731530327e-06, "loss": 12.6456, "step": 351580 }, { "epoch": 0.7102340445302747, "grad_norm": 265.8614501953125, "learning_rate": 2.4520893230011174e-06, "loss": 23.0547, "step": 351590 }, { "epoch": 0.7102542451629585, "grad_norm": 188.75746154785156, "learning_rate": 2.4517889852674114e-06, "loss": 15.5079, "step": 351600 }, { "epoch": 0.7102744457956424, "grad_norm": 389.2986755371094, "learning_rate": 2.4514886599533773e-06, "loss": 20.0783, "step": 351610 }, { "epoch": 0.7102946464283262, "grad_norm": 120.8795394897461, "learning_rate": 2.4511883470604757e-06, "loss": 17.6397, "step": 351620 }, { "epoch": 0.71031484706101, "grad_norm": 63.12157440185547, "learning_rate": 2.450888046590175e-06, "loss": 15.2393, "step": 351630 }, { "epoch": 0.7103350476936938, "grad_norm": 121.38809204101562, "learning_rate": 2.4505877585439376e-06, "loss": 16.1526, "step": 351640 }, { "epoch": 0.7103552483263775, "grad_norm": 783.6080322265625, "learning_rate": 2.4502874829232238e-06, "loss": 22.3579, "step": 351650 }, { "epoch": 0.7103754489590614, "grad_norm": 318.6197814941406, "learning_rate": 2.4499872197294992e-06, "loss": 14.048, "step": 351660 }, { "epoch": 0.7103956495917452, "grad_norm": 293.58447265625, "learning_rate": 2.449686968964232e-06, "loss": 19.4337, "step": 351670 }, { "epoch": 0.710415850224429, "grad_norm": 424.3562316894531, "learning_rate": 2.4493867306288772e-06, "loss": 22.6119, "step": 351680 }, { "epoch": 0.7104360508571128, "grad_norm": 287.13360595703125, "learning_rate": 2.449086504724902e-06, "loss": 45.1938, "step": 351690 }, { "epoch": 0.7104562514897966, "grad_norm": 425.26654052734375, "learning_rate": 2.448786291253772e-06, "loss": 24.882, "step": 351700 }, { "epoch": 0.7104764521224805, "grad_norm": 193.7147979736328, "learning_rate": 2.4484860902169477e-06, "loss": 25.5574, "step": 351710 }, { "epoch": 0.7104966527551643, "grad_norm": 253.30288696289062, "learning_rate": 2.4481859016158913e-06, "loss": 16.423, "step": 351720 }, { "epoch": 0.7105168533878481, "grad_norm": 314.57940673828125, "learning_rate": 2.4478857254520688e-06, "loss": 17.3227, "step": 351730 }, { "epoch": 0.7105370540205319, "grad_norm": 594.4584350585938, "learning_rate": 2.44758556172694e-06, "loss": 27.0756, "step": 351740 }, { "epoch": 0.7105572546532157, "grad_norm": 393.97711181640625, "learning_rate": 2.4472854104419717e-06, "loss": 19.3182, "step": 351750 }, { "epoch": 0.7105774552858996, "grad_norm": 103.99803924560547, "learning_rate": 2.4469852715986232e-06, "loss": 15.8749, "step": 351760 }, { "epoch": 0.7105976559185834, "grad_norm": 292.9165954589844, "learning_rate": 2.44668514519836e-06, "loss": 12.4438, "step": 351770 }, { "epoch": 0.7106178565512672, "grad_norm": 251.22280883789062, "learning_rate": 2.446385031242644e-06, "loss": 13.655, "step": 351780 }, { "epoch": 0.710638057183951, "grad_norm": 425.0433044433594, "learning_rate": 2.4460849297329355e-06, "loss": 16.6338, "step": 351790 }, { "epoch": 0.7106582578166348, "grad_norm": 483.10821533203125, "learning_rate": 2.4457848406707014e-06, "loss": 18.4825, "step": 351800 }, { "epoch": 0.7106784584493187, "grad_norm": 249.06735229492188, "learning_rate": 2.4454847640574004e-06, "loss": 10.263, "step": 351810 }, { "epoch": 0.7106986590820025, "grad_norm": 390.7125549316406, "learning_rate": 2.4451846998944985e-06, "loss": 13.1548, "step": 351820 }, { "epoch": 0.7107188597146863, "grad_norm": 229.27676391601562, "learning_rate": 2.4448846481834566e-06, "loss": 15.3595, "step": 351830 }, { "epoch": 0.7107390603473701, "grad_norm": 414.14178466796875, "learning_rate": 2.4445846089257354e-06, "loss": 16.436, "step": 351840 }, { "epoch": 0.7107592609800539, "grad_norm": 35.286102294921875, "learning_rate": 2.4442845821228005e-06, "loss": 11.7502, "step": 351850 }, { "epoch": 0.7107794616127378, "grad_norm": 242.687744140625, "learning_rate": 2.4439845677761124e-06, "loss": 16.4773, "step": 351860 }, { "epoch": 0.7107996622454216, "grad_norm": 248.52188110351562, "learning_rate": 2.4436845658871317e-06, "loss": 14.9384, "step": 351870 }, { "epoch": 0.7108198628781054, "grad_norm": 411.87994384765625, "learning_rate": 2.4433845764573225e-06, "loss": 22.9784, "step": 351880 }, { "epoch": 0.7108400635107892, "grad_norm": 310.3200988769531, "learning_rate": 2.4430845994881507e-06, "loss": 30.7847, "step": 351890 }, { "epoch": 0.7108602641434729, "grad_norm": 538.3544921875, "learning_rate": 2.442784634981071e-06, "loss": 19.931, "step": 351900 }, { "epoch": 0.7108804647761567, "grad_norm": 116.541748046875, "learning_rate": 2.442484682937548e-06, "loss": 39.5629, "step": 351910 }, { "epoch": 0.7109006654088406, "grad_norm": 158.8994903564453, "learning_rate": 2.4421847433590466e-06, "loss": 11.3997, "step": 351920 }, { "epoch": 0.7109208660415244, "grad_norm": 456.78289794921875, "learning_rate": 2.4418848162470273e-06, "loss": 20.9704, "step": 351930 }, { "epoch": 0.7109410666742082, "grad_norm": 235.18605041503906, "learning_rate": 2.441584901602948e-06, "loss": 12.0296, "step": 351940 }, { "epoch": 0.710961267306892, "grad_norm": 216.93711853027344, "learning_rate": 2.4412849994282744e-06, "loss": 10.7923, "step": 351950 }, { "epoch": 0.7109814679395758, "grad_norm": 274.1366882324219, "learning_rate": 2.4409851097244708e-06, "loss": 21.8105, "step": 351960 }, { "epoch": 0.7110016685722597, "grad_norm": 348.54193115234375, "learning_rate": 2.440685232492992e-06, "loss": 14.3724, "step": 351970 }, { "epoch": 0.7110218692049435, "grad_norm": 200.27676391601562, "learning_rate": 2.440385367735303e-06, "loss": 22.5995, "step": 351980 }, { "epoch": 0.7110420698376273, "grad_norm": 204.17494201660156, "learning_rate": 2.440085515452867e-06, "loss": 20.1661, "step": 351990 }, { "epoch": 0.7110622704703111, "grad_norm": 0.0, "learning_rate": 2.4397856756471435e-06, "loss": 31.145, "step": 352000 }, { "epoch": 0.7110824711029949, "grad_norm": 868.4529418945312, "learning_rate": 2.4394858483195923e-06, "loss": 29.7094, "step": 352010 }, { "epoch": 0.7111026717356788, "grad_norm": 220.2210235595703, "learning_rate": 2.4391860334716783e-06, "loss": 12.8077, "step": 352020 }, { "epoch": 0.7111228723683626, "grad_norm": 121.30075073242188, "learning_rate": 2.438886231104861e-06, "loss": 20.5221, "step": 352030 }, { "epoch": 0.7111430730010464, "grad_norm": 337.8889465332031, "learning_rate": 2.4385864412206e-06, "loss": 26.9225, "step": 352040 }, { "epoch": 0.7111632736337302, "grad_norm": 86.79560089111328, "learning_rate": 2.4382866638203578e-06, "loss": 14.8003, "step": 352050 }, { "epoch": 0.711183474266414, "grad_norm": 19.37428092956543, "learning_rate": 2.4379868989055976e-06, "loss": 18.108, "step": 352060 }, { "epoch": 0.7112036748990979, "grad_norm": 173.17977905273438, "learning_rate": 2.4376871464777792e-06, "loss": 10.5876, "step": 352070 }, { "epoch": 0.7112238755317817, "grad_norm": 181.65447998046875, "learning_rate": 2.43738740653836e-06, "loss": 18.6692, "step": 352080 }, { "epoch": 0.7112440761644655, "grad_norm": 123.4256362915039, "learning_rate": 2.437087679088806e-06, "loss": 13.498, "step": 352090 }, { "epoch": 0.7112642767971493, "grad_norm": 159.5789794921875, "learning_rate": 2.4367879641305757e-06, "loss": 17.5504, "step": 352100 }, { "epoch": 0.7112844774298331, "grad_norm": 324.58990478515625, "learning_rate": 2.4364882616651288e-06, "loss": 14.5119, "step": 352110 }, { "epoch": 0.711304678062517, "grad_norm": 267.72344970703125, "learning_rate": 2.436188571693928e-06, "loss": 18.6343, "step": 352120 }, { "epoch": 0.7113248786952008, "grad_norm": 105.14009094238281, "learning_rate": 2.4358888942184324e-06, "loss": 15.8817, "step": 352130 }, { "epoch": 0.7113450793278846, "grad_norm": 14.768479347229004, "learning_rate": 2.4355892292401044e-06, "loss": 20.5033, "step": 352140 }, { "epoch": 0.7113652799605684, "grad_norm": 181.22152709960938, "learning_rate": 2.4352895767604036e-06, "loss": 15.0534, "step": 352150 }, { "epoch": 0.7113854805932521, "grad_norm": 0.0, "learning_rate": 2.4349899367807885e-06, "loss": 13.0101, "step": 352160 }, { "epoch": 0.711405681225936, "grad_norm": 591.1431884765625, "learning_rate": 2.4346903093027237e-06, "loss": 23.4942, "step": 352170 }, { "epoch": 0.7114258818586198, "grad_norm": 225.46389770507812, "learning_rate": 2.434390694327666e-06, "loss": 22.0566, "step": 352180 }, { "epoch": 0.7114460824913036, "grad_norm": 238.13368225097656, "learning_rate": 2.434091091857076e-06, "loss": 19.2214, "step": 352190 }, { "epoch": 0.7114662831239874, "grad_norm": 93.51820373535156, "learning_rate": 2.4337915018924147e-06, "loss": 14.3301, "step": 352200 }, { "epoch": 0.7114864837566712, "grad_norm": 360.79974365234375, "learning_rate": 2.433491924435144e-06, "loss": 15.5983, "step": 352210 }, { "epoch": 0.711506684389355, "grad_norm": 85.74699401855469, "learning_rate": 2.433192359486723e-06, "loss": 13.8914, "step": 352220 }, { "epoch": 0.7115268850220389, "grad_norm": 355.16168212890625, "learning_rate": 2.4328928070486086e-06, "loss": 17.9465, "step": 352230 }, { "epoch": 0.7115470856547227, "grad_norm": 146.91104125976562, "learning_rate": 2.432593267122265e-06, "loss": 11.7181, "step": 352240 }, { "epoch": 0.7115672862874065, "grad_norm": 141.04425048828125, "learning_rate": 2.432293739709151e-06, "loss": 15.6023, "step": 352250 }, { "epoch": 0.7115874869200903, "grad_norm": 214.25721740722656, "learning_rate": 2.4319942248107236e-06, "loss": 38.2805, "step": 352260 }, { "epoch": 0.7116076875527741, "grad_norm": 384.62664794921875, "learning_rate": 2.4316947224284454e-06, "loss": 14.0202, "step": 352270 }, { "epoch": 0.711627888185458, "grad_norm": 231.8857879638672, "learning_rate": 2.431395232563779e-06, "loss": 10.1359, "step": 352280 }, { "epoch": 0.7116480888181418, "grad_norm": 408.15289306640625, "learning_rate": 2.431095755218177e-06, "loss": 13.1414, "step": 352290 }, { "epoch": 0.7116682894508256, "grad_norm": 103.86375427246094, "learning_rate": 2.4307962903931025e-06, "loss": 8.8187, "step": 352300 }, { "epoch": 0.7116884900835094, "grad_norm": 206.07855224609375, "learning_rate": 2.430496838090017e-06, "loss": 28.359, "step": 352310 }, { "epoch": 0.7117086907161932, "grad_norm": 332.19793701171875, "learning_rate": 2.4301973983103793e-06, "loss": 11.5104, "step": 352320 }, { "epoch": 0.7117288913488771, "grad_norm": 189.5696563720703, "learning_rate": 2.429897971055645e-06, "loss": 17.8832, "step": 352330 }, { "epoch": 0.7117490919815609, "grad_norm": 376.2629089355469, "learning_rate": 2.4295985563272785e-06, "loss": 16.8055, "step": 352340 }, { "epoch": 0.7117692926142447, "grad_norm": 366.5769348144531, "learning_rate": 2.4292991541267368e-06, "loss": 19.7703, "step": 352350 }, { "epoch": 0.7117894932469285, "grad_norm": 435.5685729980469, "learning_rate": 2.4289997644554775e-06, "loss": 45.3416, "step": 352360 }, { "epoch": 0.7118096938796123, "grad_norm": 749.5015258789062, "learning_rate": 2.428700387314961e-06, "loss": 12.4316, "step": 352370 }, { "epoch": 0.7118298945122962, "grad_norm": 193.76766967773438, "learning_rate": 2.4284010227066495e-06, "loss": 18.7707, "step": 352380 }, { "epoch": 0.71185009514498, "grad_norm": 314.447509765625, "learning_rate": 2.4281016706319992e-06, "loss": 20.7724, "step": 352390 }, { "epoch": 0.7118702957776638, "grad_norm": 346.3161315917969, "learning_rate": 2.4278023310924676e-06, "loss": 14.3975, "step": 352400 }, { "epoch": 0.7118904964103475, "grad_norm": 521.2062377929688, "learning_rate": 2.4275030040895178e-06, "loss": 10.4058, "step": 352410 }, { "epoch": 0.7119106970430313, "grad_norm": 535.8650512695312, "learning_rate": 2.4272036896246054e-06, "loss": 27.2397, "step": 352420 }, { "epoch": 0.7119308976757152, "grad_norm": 337.22283935546875, "learning_rate": 2.4269043876991888e-06, "loss": 18.4178, "step": 352430 }, { "epoch": 0.711951098308399, "grad_norm": 452.8020324707031, "learning_rate": 2.4266050983147298e-06, "loss": 20.9573, "step": 352440 }, { "epoch": 0.7119712989410828, "grad_norm": 385.9393615722656, "learning_rate": 2.4263058214726844e-06, "loss": 12.9102, "step": 352450 }, { "epoch": 0.7119914995737666, "grad_norm": 343.26129150390625, "learning_rate": 2.426006557174513e-06, "loss": 17.3087, "step": 352460 }, { "epoch": 0.7120117002064504, "grad_norm": 99.2619857788086, "learning_rate": 2.425707305421674e-06, "loss": 16.9882, "step": 352470 }, { "epoch": 0.7120319008391343, "grad_norm": 267.15179443359375, "learning_rate": 2.425408066215623e-06, "loss": 15.49, "step": 352480 }, { "epoch": 0.7120521014718181, "grad_norm": 130.62176513671875, "learning_rate": 2.4251088395578214e-06, "loss": 27.495, "step": 352490 }, { "epoch": 0.7120723021045019, "grad_norm": 531.9510498046875, "learning_rate": 2.424809625449729e-06, "loss": 29.3416, "step": 352500 }, { "epoch": 0.7120925027371857, "grad_norm": 161.21682739257812, "learning_rate": 2.424510423892802e-06, "loss": 11.3117, "step": 352510 }, { "epoch": 0.7121127033698695, "grad_norm": 120.74435424804688, "learning_rate": 2.424211234888497e-06, "loss": 13.8729, "step": 352520 }, { "epoch": 0.7121329040025534, "grad_norm": 607.02490234375, "learning_rate": 2.4239120584382757e-06, "loss": 20.9928, "step": 352530 }, { "epoch": 0.7121531046352372, "grad_norm": 195.99024963378906, "learning_rate": 2.4236128945435944e-06, "loss": 17.6383, "step": 352540 }, { "epoch": 0.712173305267921, "grad_norm": 683.2008056640625, "learning_rate": 2.42331374320591e-06, "loss": 26.9866, "step": 352550 }, { "epoch": 0.7121935059006048, "grad_norm": 814.26611328125, "learning_rate": 2.423014604426682e-06, "loss": 23.4956, "step": 352560 }, { "epoch": 0.7122137065332886, "grad_norm": 121.1082534790039, "learning_rate": 2.4227154782073716e-06, "loss": 15.2093, "step": 352570 }, { "epoch": 0.7122339071659725, "grad_norm": 106.73023986816406, "learning_rate": 2.422416364549429e-06, "loss": 13.9653, "step": 352580 }, { "epoch": 0.7122541077986563, "grad_norm": 20.475276947021484, "learning_rate": 2.4221172634543177e-06, "loss": 13.5268, "step": 352590 }, { "epoch": 0.7122743084313401, "grad_norm": 170.05555725097656, "learning_rate": 2.4218181749234954e-06, "loss": 18.5094, "step": 352600 }, { "epoch": 0.7122945090640239, "grad_norm": 328.6314392089844, "learning_rate": 2.4215190989584187e-06, "loss": 15.7557, "step": 352610 }, { "epoch": 0.7123147096967077, "grad_norm": 181.9149932861328, "learning_rate": 2.4212200355605433e-06, "loss": 32.7081, "step": 352620 }, { "epoch": 0.7123349103293916, "grad_norm": 271.09918212890625, "learning_rate": 2.4209209847313302e-06, "loss": 10.3416, "step": 352630 }, { "epoch": 0.7123551109620754, "grad_norm": 291.9587707519531, "learning_rate": 2.4206219464722356e-06, "loss": 15.0117, "step": 352640 }, { "epoch": 0.7123753115947592, "grad_norm": 226.0620880126953, "learning_rate": 2.4203229207847155e-06, "loss": 12.5378, "step": 352650 }, { "epoch": 0.712395512227443, "grad_norm": 647.5647583007812, "learning_rate": 2.420023907670228e-06, "loss": 30.5374, "step": 352660 }, { "epoch": 0.7124157128601267, "grad_norm": 126.0721664428711, "learning_rate": 2.419724907130233e-06, "loss": 17.6807, "step": 352670 }, { "epoch": 0.7124359134928105, "grad_norm": 198.65089416503906, "learning_rate": 2.4194259191661864e-06, "loss": 20.8414, "step": 352680 }, { "epoch": 0.7124561141254944, "grad_norm": 606.13916015625, "learning_rate": 2.419126943779543e-06, "loss": 25.7228, "step": 352690 }, { "epoch": 0.7124763147581782, "grad_norm": 223.71522521972656, "learning_rate": 2.418827980971763e-06, "loss": 19.6184, "step": 352700 }, { "epoch": 0.712496515390862, "grad_norm": 179.10682678222656, "learning_rate": 2.4185290307443025e-06, "loss": 20.5874, "step": 352710 }, { "epoch": 0.7125167160235458, "grad_norm": 409.8794860839844, "learning_rate": 2.418230093098617e-06, "loss": 21.023, "step": 352720 }, { "epoch": 0.7125369166562296, "grad_norm": 0.0, "learning_rate": 2.417931168036166e-06, "loss": 21.3731, "step": 352730 }, { "epoch": 0.7125571172889135, "grad_norm": 8.283265113830566, "learning_rate": 2.417632255558404e-06, "loss": 10.8767, "step": 352740 }, { "epoch": 0.7125773179215973, "grad_norm": 20.391284942626953, "learning_rate": 2.4173333556667912e-06, "loss": 26.7526, "step": 352750 }, { "epoch": 0.7125975185542811, "grad_norm": 350.8326721191406, "learning_rate": 2.417034468362782e-06, "loss": 21.704, "step": 352760 }, { "epoch": 0.7126177191869649, "grad_norm": 162.24313354492188, "learning_rate": 2.416735593647832e-06, "loss": 16.9836, "step": 352770 }, { "epoch": 0.7126379198196487, "grad_norm": 136.50164794921875, "learning_rate": 2.416436731523401e-06, "loss": 13.4317, "step": 352780 }, { "epoch": 0.7126581204523326, "grad_norm": 478.86480712890625, "learning_rate": 2.4161378819909444e-06, "loss": 24.2575, "step": 352790 }, { "epoch": 0.7126783210850164, "grad_norm": 247.8748016357422, "learning_rate": 2.415839045051916e-06, "loss": 18.7655, "step": 352800 }, { "epoch": 0.7126985217177002, "grad_norm": 74.93177032470703, "learning_rate": 2.415540220707775e-06, "loss": 19.3253, "step": 352810 }, { "epoch": 0.712718722350384, "grad_norm": 165.69171142578125, "learning_rate": 2.4152414089599798e-06, "loss": 16.428, "step": 352820 }, { "epoch": 0.7127389229830678, "grad_norm": 116.23603057861328, "learning_rate": 2.4149426098099836e-06, "loss": 15.582, "step": 352830 }, { "epoch": 0.7127591236157517, "grad_norm": 156.26605224609375, "learning_rate": 2.4146438232592425e-06, "loss": 13.093, "step": 352840 }, { "epoch": 0.7127793242484355, "grad_norm": 237.94102478027344, "learning_rate": 2.4143450493092146e-06, "loss": 10.7996, "step": 352850 }, { "epoch": 0.7127995248811193, "grad_norm": 243.3196563720703, "learning_rate": 2.414046287961356e-06, "loss": 23.141, "step": 352860 }, { "epoch": 0.7128197255138031, "grad_norm": 2.4362449645996094, "learning_rate": 2.4137475392171204e-06, "loss": 14.1768, "step": 352870 }, { "epoch": 0.712839926146487, "grad_norm": 158.68258666992188, "learning_rate": 2.4134488030779657e-06, "loss": 13.5108, "step": 352880 }, { "epoch": 0.7128601267791708, "grad_norm": 667.369384765625, "learning_rate": 2.4131500795453515e-06, "loss": 25.143, "step": 352890 }, { "epoch": 0.7128803274118546, "grad_norm": 165.81906127929688, "learning_rate": 2.412851368620726e-06, "loss": 20.2163, "step": 352900 }, { "epoch": 0.7129005280445384, "grad_norm": 143.31089782714844, "learning_rate": 2.41255267030555e-06, "loss": 18.6422, "step": 352910 }, { "epoch": 0.7129207286772221, "grad_norm": 398.47430419921875, "learning_rate": 2.412253984601279e-06, "loss": 23.7065, "step": 352920 }, { "epoch": 0.7129409293099059, "grad_norm": 289.5444641113281, "learning_rate": 2.411955311509369e-06, "loss": 32.2902, "step": 352930 }, { "epoch": 0.7129611299425898, "grad_norm": 0.0, "learning_rate": 2.4116566510312734e-06, "loss": 19.2282, "step": 352940 }, { "epoch": 0.7129813305752736, "grad_norm": 185.40342712402344, "learning_rate": 2.4113580031684487e-06, "loss": 18.6474, "step": 352950 }, { "epoch": 0.7130015312079574, "grad_norm": 179.93614196777344, "learning_rate": 2.4110593679223547e-06, "loss": 19.8821, "step": 352960 }, { "epoch": 0.7130217318406412, "grad_norm": 278.1278381347656, "learning_rate": 2.41076074529444e-06, "loss": 17.7081, "step": 352970 }, { "epoch": 0.713041932473325, "grad_norm": 330.80712890625, "learning_rate": 2.4104621352861633e-06, "loss": 19.2912, "step": 352980 }, { "epoch": 0.7130621331060089, "grad_norm": 192.93603515625, "learning_rate": 2.4101635378989823e-06, "loss": 20.1075, "step": 352990 }, { "epoch": 0.7130823337386927, "grad_norm": 312.5501403808594, "learning_rate": 2.40986495313435e-06, "loss": 18.6931, "step": 353000 }, { "epoch": 0.7131025343713765, "grad_norm": 222.98081970214844, "learning_rate": 2.4095663809937198e-06, "loss": 16.6207, "step": 353010 }, { "epoch": 0.7131227350040603, "grad_norm": 157.7907257080078, "learning_rate": 2.4092678214785508e-06, "loss": 7.3208, "step": 353020 }, { "epoch": 0.7131429356367441, "grad_norm": 378.01678466796875, "learning_rate": 2.408969274590296e-06, "loss": 18.1997, "step": 353030 }, { "epoch": 0.713163136269428, "grad_norm": 283.3150939941406, "learning_rate": 2.408670740330409e-06, "loss": 13.6088, "step": 353040 }, { "epoch": 0.7131833369021118, "grad_norm": 490.0519714355469, "learning_rate": 2.4083722187003483e-06, "loss": 24.7449, "step": 353050 }, { "epoch": 0.7132035375347956, "grad_norm": 224.01846313476562, "learning_rate": 2.408073709701565e-06, "loss": 32.4008, "step": 353060 }, { "epoch": 0.7132237381674794, "grad_norm": 298.1646423339844, "learning_rate": 2.407775213335518e-06, "loss": 12.3091, "step": 353070 }, { "epoch": 0.7132439388001632, "grad_norm": 243.4937744140625, "learning_rate": 2.407476729603661e-06, "loss": 18.7442, "step": 353080 }, { "epoch": 0.713264139432847, "grad_norm": 199.86663818359375, "learning_rate": 2.4071782585074453e-06, "loss": 17.7693, "step": 353090 }, { "epoch": 0.7132843400655309, "grad_norm": 232.44381713867188, "learning_rate": 2.4068798000483306e-06, "loss": 27.4549, "step": 353100 }, { "epoch": 0.7133045406982147, "grad_norm": 447.2940368652344, "learning_rate": 2.406581354227767e-06, "loss": 25.9322, "step": 353110 }, { "epoch": 0.7133247413308985, "grad_norm": 399.7952575683594, "learning_rate": 2.406282921047213e-06, "loss": 23.0577, "step": 353120 }, { "epoch": 0.7133449419635823, "grad_norm": 315.1277770996094, "learning_rate": 2.40598450050812e-06, "loss": 19.056, "step": 353130 }, { "epoch": 0.7133651425962662, "grad_norm": 0.0, "learning_rate": 2.405686092611946e-06, "loss": 7.0096, "step": 353140 }, { "epoch": 0.71338534322895, "grad_norm": 194.9683074951172, "learning_rate": 2.405387697360143e-06, "loss": 26.2746, "step": 353150 }, { "epoch": 0.7134055438616338, "grad_norm": 0.0, "learning_rate": 2.4050893147541643e-06, "loss": 11.177, "step": 353160 }, { "epoch": 0.7134257444943176, "grad_norm": 266.8296813964844, "learning_rate": 2.4047909447954647e-06, "loss": 12.4989, "step": 353170 }, { "epoch": 0.7134459451270013, "grad_norm": 228.2368927001953, "learning_rate": 2.4044925874855035e-06, "loss": 22.139, "step": 353180 }, { "epoch": 0.7134661457596851, "grad_norm": 37.13764953613281, "learning_rate": 2.404194242825727e-06, "loss": 9.4032, "step": 353190 }, { "epoch": 0.713486346392369, "grad_norm": 84.45889282226562, "learning_rate": 2.403895910817593e-06, "loss": 19.8702, "step": 353200 }, { "epoch": 0.7135065470250528, "grad_norm": 611.88671875, "learning_rate": 2.403597591462557e-06, "loss": 13.5691, "step": 353210 }, { "epoch": 0.7135267476577366, "grad_norm": 128.01402282714844, "learning_rate": 2.403299284762071e-06, "loss": 13.4262, "step": 353220 }, { "epoch": 0.7135469482904204, "grad_norm": 251.7296142578125, "learning_rate": 2.403000990717588e-06, "loss": 13.2859, "step": 353230 }, { "epoch": 0.7135671489231042, "grad_norm": 181.200439453125, "learning_rate": 2.4027027093305655e-06, "loss": 15.8237, "step": 353240 }, { "epoch": 0.7135873495557881, "grad_norm": 111.572021484375, "learning_rate": 2.402404440602455e-06, "loss": 13.522, "step": 353250 }, { "epoch": 0.7136075501884719, "grad_norm": 122.22949981689453, "learning_rate": 2.4021061845347076e-06, "loss": 27.5464, "step": 353260 }, { "epoch": 0.7136277508211557, "grad_norm": 489.8574523925781, "learning_rate": 2.40180794112878e-06, "loss": 25.6534, "step": 353270 }, { "epoch": 0.7136479514538395, "grad_norm": 298.7007141113281, "learning_rate": 2.401509710386127e-06, "loss": 13.4128, "step": 353280 }, { "epoch": 0.7136681520865233, "grad_norm": 63.383277893066406, "learning_rate": 2.4012114923082007e-06, "loss": 15.283, "step": 353290 }, { "epoch": 0.7136883527192072, "grad_norm": 115.21827697753906, "learning_rate": 2.4009132868964525e-06, "loss": 12.473, "step": 353300 }, { "epoch": 0.713708553351891, "grad_norm": 373.3875732421875, "learning_rate": 2.400615094152339e-06, "loss": 16.8818, "step": 353310 }, { "epoch": 0.7137287539845748, "grad_norm": 130.26870727539062, "learning_rate": 2.4003169140773132e-06, "loss": 20.0722, "step": 353320 }, { "epoch": 0.7137489546172586, "grad_norm": 460.4097900390625, "learning_rate": 2.4000187466728253e-06, "loss": 26.2353, "step": 353330 }, { "epoch": 0.7137691552499424, "grad_norm": 242.73875427246094, "learning_rate": 2.3997205919403323e-06, "loss": 12.4362, "step": 353340 }, { "epoch": 0.7137893558826263, "grad_norm": 236.0693359375, "learning_rate": 2.399422449881284e-06, "loss": 13.6348, "step": 353350 }, { "epoch": 0.7138095565153101, "grad_norm": 273.381591796875, "learning_rate": 2.399124320497137e-06, "loss": 26.1795, "step": 353360 }, { "epoch": 0.7138297571479939, "grad_norm": 256.4305114746094, "learning_rate": 2.398826203789343e-06, "loss": 19.5674, "step": 353370 }, { "epoch": 0.7138499577806777, "grad_norm": 17.331628799438477, "learning_rate": 2.3985280997593523e-06, "loss": 11.8567, "step": 353380 }, { "epoch": 0.7138701584133615, "grad_norm": 200.576416015625, "learning_rate": 2.3982300084086224e-06, "loss": 14.0443, "step": 353390 }, { "epoch": 0.7138903590460454, "grad_norm": 410.2752990722656, "learning_rate": 2.3979319297386035e-06, "loss": 22.605, "step": 353400 }, { "epoch": 0.7139105596787292, "grad_norm": 91.58063507080078, "learning_rate": 2.397633863750747e-06, "loss": 19.7899, "step": 353410 }, { "epoch": 0.713930760311413, "grad_norm": 534.0927124023438, "learning_rate": 2.397335810446508e-06, "loss": 18.4944, "step": 353420 }, { "epoch": 0.7139509609440968, "grad_norm": 141.9009552001953, "learning_rate": 2.3970377698273396e-06, "loss": 58.6307, "step": 353430 }, { "epoch": 0.7139711615767805, "grad_norm": 232.02023315429688, "learning_rate": 2.3967397418946937e-06, "loss": 12.7046, "step": 353440 }, { "epoch": 0.7139913622094644, "grad_norm": 329.62420654296875, "learning_rate": 2.396441726650021e-06, "loss": 10.9314, "step": 353450 }, { "epoch": 0.7140115628421482, "grad_norm": 129.09683227539062, "learning_rate": 2.396143724094777e-06, "loss": 10.95, "step": 353460 }, { "epoch": 0.714031763474832, "grad_norm": 398.72314453125, "learning_rate": 2.395845734230413e-06, "loss": 10.2976, "step": 353470 }, { "epoch": 0.7140519641075158, "grad_norm": 159.85179138183594, "learning_rate": 2.395547757058379e-06, "loss": 21.9988, "step": 353480 }, { "epoch": 0.7140721647401996, "grad_norm": 195.1329345703125, "learning_rate": 2.395249792580129e-06, "loss": 15.038, "step": 353490 }, { "epoch": 0.7140923653728835, "grad_norm": 177.12173461914062, "learning_rate": 2.39495184079712e-06, "loss": 16.9907, "step": 353500 }, { "epoch": 0.7141125660055673, "grad_norm": 142.09645080566406, "learning_rate": 2.3946539017107963e-06, "loss": 54.1735, "step": 353510 }, { "epoch": 0.7141327666382511, "grad_norm": 4.593077182769775, "learning_rate": 2.3943559753226124e-06, "loss": 24.3707, "step": 353520 }, { "epoch": 0.7141529672709349, "grad_norm": 42.968753814697266, "learning_rate": 2.3940580616340244e-06, "loss": 22.6724, "step": 353530 }, { "epoch": 0.7141731679036187, "grad_norm": 450.1331481933594, "learning_rate": 2.3937601606464807e-06, "loss": 21.5994, "step": 353540 }, { "epoch": 0.7141933685363026, "grad_norm": 146.7470703125, "learning_rate": 2.393462272361432e-06, "loss": 16.0, "step": 353550 }, { "epoch": 0.7142135691689864, "grad_norm": 84.10382080078125, "learning_rate": 2.393164396780332e-06, "loss": 17.509, "step": 353560 }, { "epoch": 0.7142337698016702, "grad_norm": 346.3281555175781, "learning_rate": 2.3928665339046363e-06, "loss": 9.8881, "step": 353570 }, { "epoch": 0.714253970434354, "grad_norm": 419.9554138183594, "learning_rate": 2.3925686837357898e-06, "loss": 11.5624, "step": 353580 }, { "epoch": 0.7142741710670378, "grad_norm": 25.687410354614258, "learning_rate": 2.3922708462752466e-06, "loss": 12.8815, "step": 353590 }, { "epoch": 0.7142943716997217, "grad_norm": 201.30165100097656, "learning_rate": 2.391973021524461e-06, "loss": 13.2541, "step": 353600 }, { "epoch": 0.7143145723324055, "grad_norm": 393.03350830078125, "learning_rate": 2.391675209484883e-06, "loss": 16.8847, "step": 353610 }, { "epoch": 0.7143347729650893, "grad_norm": 20.35003662109375, "learning_rate": 2.391377410157961e-06, "loss": 20.3793, "step": 353620 }, { "epoch": 0.7143549735977731, "grad_norm": 121.69145965576172, "learning_rate": 2.391079623545152e-06, "loss": 13.4734, "step": 353630 }, { "epoch": 0.7143751742304569, "grad_norm": 356.5768127441406, "learning_rate": 2.390781849647904e-06, "loss": 19.6873, "step": 353640 }, { "epoch": 0.7143953748631408, "grad_norm": 110.36219024658203, "learning_rate": 2.3904840884676665e-06, "loss": 14.8299, "step": 353650 }, { "epoch": 0.7144155754958246, "grad_norm": 605.5739135742188, "learning_rate": 2.3901863400058954e-06, "loss": 9.8424, "step": 353660 }, { "epoch": 0.7144357761285084, "grad_norm": 82.11370849609375, "learning_rate": 2.389888604264038e-06, "loss": 11.5088, "step": 353670 }, { "epoch": 0.7144559767611922, "grad_norm": 252.95205688476562, "learning_rate": 2.389590881243548e-06, "loss": 11.2178, "step": 353680 }, { "epoch": 0.7144761773938759, "grad_norm": 313.48077392578125, "learning_rate": 2.389293170945876e-06, "loss": 22.7313, "step": 353690 }, { "epoch": 0.7144963780265597, "grad_norm": 420.6854248046875, "learning_rate": 2.3889954733724708e-06, "loss": 22.449, "step": 353700 }, { "epoch": 0.7145165786592436, "grad_norm": 384.7112121582031, "learning_rate": 2.3886977885247866e-06, "loss": 32.8997, "step": 353710 }, { "epoch": 0.7145367792919274, "grad_norm": 448.9643249511719, "learning_rate": 2.388400116404271e-06, "loss": 20.9246, "step": 353720 }, { "epoch": 0.7145569799246112, "grad_norm": 96.74007415771484, "learning_rate": 2.3881024570123777e-06, "loss": 18.0719, "step": 353730 }, { "epoch": 0.714577180557295, "grad_norm": 542.7215576171875, "learning_rate": 2.387804810350555e-06, "loss": 7.6773, "step": 353740 }, { "epoch": 0.7145973811899788, "grad_norm": 348.0303955078125, "learning_rate": 2.387507176420256e-06, "loss": 18.5705, "step": 353750 }, { "epoch": 0.7146175818226627, "grad_norm": 186.53369140625, "learning_rate": 2.387209555222931e-06, "loss": 34.6282, "step": 353760 }, { "epoch": 0.7146377824553465, "grad_norm": 275.4837951660156, "learning_rate": 2.3869119467600273e-06, "loss": 22.5404, "step": 353770 }, { "epoch": 0.7146579830880303, "grad_norm": 294.6781311035156, "learning_rate": 2.3866143510329998e-06, "loss": 10.6335, "step": 353780 }, { "epoch": 0.7146781837207141, "grad_norm": 295.5734558105469, "learning_rate": 2.3863167680432975e-06, "loss": 23.86, "step": 353790 }, { "epoch": 0.7146983843533979, "grad_norm": 271.676025390625, "learning_rate": 2.3860191977923673e-06, "loss": 21.1659, "step": 353800 }, { "epoch": 0.7147185849860818, "grad_norm": 431.0244445800781, "learning_rate": 2.3857216402816635e-06, "loss": 17.8049, "step": 353810 }, { "epoch": 0.7147387856187656, "grad_norm": 380.23919677734375, "learning_rate": 2.385424095512637e-06, "loss": 12.2795, "step": 353820 }, { "epoch": 0.7147589862514494, "grad_norm": 188.64822387695312, "learning_rate": 2.3851265634867358e-06, "loss": 12.785, "step": 353830 }, { "epoch": 0.7147791868841332, "grad_norm": 469.4999084472656, "learning_rate": 2.3848290442054096e-06, "loss": 31.6223, "step": 353840 }, { "epoch": 0.714799387516817, "grad_norm": 30.78319549560547, "learning_rate": 2.3845315376701112e-06, "loss": 6.9123, "step": 353850 }, { "epoch": 0.7148195881495009, "grad_norm": 167.8297576904297, "learning_rate": 2.384234043882288e-06, "loss": 16.876, "step": 353860 }, { "epoch": 0.7148397887821847, "grad_norm": 48.25760269165039, "learning_rate": 2.38393656284339e-06, "loss": 15.2047, "step": 353870 }, { "epoch": 0.7148599894148685, "grad_norm": 311.8377380371094, "learning_rate": 2.3836390945548672e-06, "loss": 24.598, "step": 353880 }, { "epoch": 0.7148801900475523, "grad_norm": 200.6234588623047, "learning_rate": 2.3833416390181723e-06, "loss": 10.2543, "step": 353890 }, { "epoch": 0.7149003906802361, "grad_norm": 89.95429229736328, "learning_rate": 2.3830441962347528e-06, "loss": 29.0288, "step": 353900 }, { "epoch": 0.71492059131292, "grad_norm": 0.0, "learning_rate": 2.3827467662060565e-06, "loss": 15.5198, "step": 353910 }, { "epoch": 0.7149407919456038, "grad_norm": 387.5794677734375, "learning_rate": 2.382449348933537e-06, "loss": 10.4855, "step": 353920 }, { "epoch": 0.7149609925782876, "grad_norm": 4.072193145751953, "learning_rate": 2.382151944418642e-06, "loss": 14.5278, "step": 353930 }, { "epoch": 0.7149811932109714, "grad_norm": 184.029296875, "learning_rate": 2.381854552662819e-06, "loss": 15.7296, "step": 353940 }, { "epoch": 0.7150013938436551, "grad_norm": 147.13211059570312, "learning_rate": 2.3815571736675214e-06, "loss": 14.2783, "step": 353950 }, { "epoch": 0.715021594476339, "grad_norm": 268.27899169921875, "learning_rate": 2.381259807434194e-06, "loss": 13.8663, "step": 353960 }, { "epoch": 0.7150417951090228, "grad_norm": 230.50526428222656, "learning_rate": 2.3809624539642913e-06, "loss": 32.817, "step": 353970 }, { "epoch": 0.7150619957417066, "grad_norm": 39.21169662475586, "learning_rate": 2.3806651132592597e-06, "loss": 12.96, "step": 353980 }, { "epoch": 0.7150821963743904, "grad_norm": 212.66748046875, "learning_rate": 2.3803677853205465e-06, "loss": 15.8933, "step": 353990 }, { "epoch": 0.7151023970070742, "grad_norm": 7129.67724609375, "learning_rate": 2.380070470149605e-06, "loss": 29.04, "step": 354000 }, { "epoch": 0.715122597639758, "grad_norm": 416.3603210449219, "learning_rate": 2.3797731677478808e-06, "loss": 30.2236, "step": 354010 }, { "epoch": 0.7151427982724419, "grad_norm": 593.6258544921875, "learning_rate": 2.379475878116826e-06, "loss": 18.8144, "step": 354020 }, { "epoch": 0.7151629989051257, "grad_norm": 156.53395080566406, "learning_rate": 2.379178601257886e-06, "loss": 17.3862, "step": 354030 }, { "epoch": 0.7151831995378095, "grad_norm": 11.683676719665527, "learning_rate": 2.3788813371725133e-06, "loss": 15.8112, "step": 354040 }, { "epoch": 0.7152034001704933, "grad_norm": 392.6048278808594, "learning_rate": 2.3785840858621556e-06, "loss": 18.0596, "step": 354050 }, { "epoch": 0.7152236008031772, "grad_norm": 1.5868643522262573, "learning_rate": 2.3782868473282587e-06, "loss": 25.5911, "step": 354060 }, { "epoch": 0.715243801435861, "grad_norm": 279.7609558105469, "learning_rate": 2.3779896215722765e-06, "loss": 15.5919, "step": 354070 }, { "epoch": 0.7152640020685448, "grad_norm": 130.58641052246094, "learning_rate": 2.3776924085956536e-06, "loss": 17.1513, "step": 354080 }, { "epoch": 0.7152842027012286, "grad_norm": 302.2381896972656, "learning_rate": 2.3773952083998392e-06, "loss": 17.5509, "step": 354090 }, { "epoch": 0.7153044033339124, "grad_norm": 76.13607025146484, "learning_rate": 2.3770980209862814e-06, "loss": 24.9492, "step": 354100 }, { "epoch": 0.7153246039665963, "grad_norm": 283.043212890625, "learning_rate": 2.376800846356434e-06, "loss": 19.7922, "step": 354110 }, { "epoch": 0.7153448045992801, "grad_norm": 247.35804748535156, "learning_rate": 2.3765036845117373e-06, "loss": 24.1628, "step": 354120 }, { "epoch": 0.7153650052319639, "grad_norm": 131.46121215820312, "learning_rate": 2.3762065354536436e-06, "loss": 5.7815, "step": 354130 }, { "epoch": 0.7153852058646477, "grad_norm": 414.3635559082031, "learning_rate": 2.375909399183603e-06, "loss": 21.67, "step": 354140 }, { "epoch": 0.7154054064973315, "grad_norm": 282.8853454589844, "learning_rate": 2.3756122757030614e-06, "loss": 17.049, "step": 354150 }, { "epoch": 0.7154256071300154, "grad_norm": 0.0, "learning_rate": 2.3753151650134655e-06, "loss": 11.8311, "step": 354160 }, { "epoch": 0.7154458077626992, "grad_norm": 184.10940551757812, "learning_rate": 2.3750180671162656e-06, "loss": 11.2087, "step": 354170 }, { "epoch": 0.715466008395383, "grad_norm": 956.0592651367188, "learning_rate": 2.3747209820129117e-06, "loss": 25.2183, "step": 354180 }, { "epoch": 0.7154862090280668, "grad_norm": 765.4486083984375, "learning_rate": 2.3744239097048465e-06, "loss": 26.1804, "step": 354190 }, { "epoch": 0.7155064096607505, "grad_norm": 0.0, "learning_rate": 2.3741268501935212e-06, "loss": 14.3389, "step": 354200 }, { "epoch": 0.7155266102934343, "grad_norm": 350.7500305175781, "learning_rate": 2.373829803480384e-06, "loss": 10.7627, "step": 354210 }, { "epoch": 0.7155468109261182, "grad_norm": 12.2223482131958, "learning_rate": 2.3735327695668823e-06, "loss": 14.2389, "step": 354220 }, { "epoch": 0.715567011558802, "grad_norm": 275.4072570800781, "learning_rate": 2.3732357484544616e-06, "loss": 14.3166, "step": 354230 }, { "epoch": 0.7155872121914858, "grad_norm": 288.8053894042969, "learning_rate": 2.372938740144573e-06, "loss": 20.9849, "step": 354240 }, { "epoch": 0.7156074128241696, "grad_norm": 139.51295471191406, "learning_rate": 2.372641744638662e-06, "loss": 14.3773, "step": 354250 }, { "epoch": 0.7156276134568534, "grad_norm": 424.760009765625, "learning_rate": 2.3723447619381756e-06, "loss": 22.5918, "step": 354260 }, { "epoch": 0.7156478140895373, "grad_norm": 354.06146240234375, "learning_rate": 2.3720477920445633e-06, "loss": 11.3565, "step": 354270 }, { "epoch": 0.7156680147222211, "grad_norm": 403.6581726074219, "learning_rate": 2.3717508349592695e-06, "loss": 19.9509, "step": 354280 }, { "epoch": 0.7156882153549049, "grad_norm": 237.10166931152344, "learning_rate": 2.3714538906837452e-06, "loss": 25.5127, "step": 354290 }, { "epoch": 0.7157084159875887, "grad_norm": 330.4504089355469, "learning_rate": 2.3711569592194363e-06, "loss": 25.4154, "step": 354300 }, { "epoch": 0.7157286166202725, "grad_norm": 196.739501953125, "learning_rate": 2.370860040567787e-06, "loss": 15.2483, "step": 354310 }, { "epoch": 0.7157488172529564, "grad_norm": 101.1941909790039, "learning_rate": 2.3705631347302492e-06, "loss": 14.3821, "step": 354320 }, { "epoch": 0.7157690178856402, "grad_norm": 342.1519775390625, "learning_rate": 2.3702662417082655e-06, "loss": 12.7249, "step": 354330 }, { "epoch": 0.715789218518324, "grad_norm": 502.88250732421875, "learning_rate": 2.369969361503288e-06, "loss": 11.5893, "step": 354340 }, { "epoch": 0.7158094191510078, "grad_norm": 245.72726440429688, "learning_rate": 2.3696724941167583e-06, "loss": 13.7673, "step": 354350 }, { "epoch": 0.7158296197836916, "grad_norm": 469.6466064453125, "learning_rate": 2.369375639550127e-06, "loss": 15.7236, "step": 354360 }, { "epoch": 0.7158498204163755, "grad_norm": 0.0, "learning_rate": 2.369078797804841e-06, "loss": 31.5405, "step": 354370 }, { "epoch": 0.7158700210490593, "grad_norm": 195.83689880371094, "learning_rate": 2.368781968882343e-06, "loss": 23.0507, "step": 354380 }, { "epoch": 0.7158902216817431, "grad_norm": 201.952880859375, "learning_rate": 2.368485152784086e-06, "loss": 11.6569, "step": 354390 }, { "epoch": 0.7159104223144269, "grad_norm": 327.7806396484375, "learning_rate": 2.3681883495115114e-06, "loss": 16.7715, "step": 354400 }, { "epoch": 0.7159306229471107, "grad_norm": 305.88665771484375, "learning_rate": 2.3678915590660667e-06, "loss": 13.3868, "step": 354410 }, { "epoch": 0.7159508235797946, "grad_norm": 306.1297607421875, "learning_rate": 2.367594781449199e-06, "loss": 17.1757, "step": 354420 }, { "epoch": 0.7159710242124784, "grad_norm": 356.8263244628906, "learning_rate": 2.367298016662357e-06, "loss": 6.446, "step": 354430 }, { "epoch": 0.7159912248451622, "grad_norm": 237.99049377441406, "learning_rate": 2.3670012647069852e-06, "loss": 17.4464, "step": 354440 }, { "epoch": 0.716011425477846, "grad_norm": 180.61436462402344, "learning_rate": 2.3667045255845276e-06, "loss": 17.1062, "step": 354450 }, { "epoch": 0.7160316261105297, "grad_norm": 365.78271484375, "learning_rate": 2.3664077992964356e-06, "loss": 12.166, "step": 354460 }, { "epoch": 0.7160518267432135, "grad_norm": 172.00302124023438, "learning_rate": 2.3661110858441517e-06, "loss": 10.5201, "step": 354470 }, { "epoch": 0.7160720273758974, "grad_norm": 486.7910461425781, "learning_rate": 2.3658143852291214e-06, "loss": 15.8174, "step": 354480 }, { "epoch": 0.7160922280085812, "grad_norm": 267.6249694824219, "learning_rate": 2.3655176974527922e-06, "loss": 15.3962, "step": 354490 }, { "epoch": 0.716112428641265, "grad_norm": 337.75634765625, "learning_rate": 2.3652210225166122e-06, "loss": 12.4524, "step": 354500 }, { "epoch": 0.7161326292739488, "grad_norm": 215.66354370117188, "learning_rate": 2.364924360422025e-06, "loss": 21.7918, "step": 354510 }, { "epoch": 0.7161528299066326, "grad_norm": 161.68557739257812, "learning_rate": 2.3646277111704756e-06, "loss": 16.5264, "step": 354520 }, { "epoch": 0.7161730305393165, "grad_norm": 213.16802978515625, "learning_rate": 2.364331074763413e-06, "loss": 15.8733, "step": 354530 }, { "epoch": 0.7161932311720003, "grad_norm": 276.3595275878906, "learning_rate": 2.3640344512022807e-06, "loss": 12.7766, "step": 354540 }, { "epoch": 0.7162134318046841, "grad_norm": 463.7532043457031, "learning_rate": 2.3637378404885224e-06, "loss": 15.9682, "step": 354550 }, { "epoch": 0.7162336324373679, "grad_norm": 147.43666076660156, "learning_rate": 2.3634412426235886e-06, "loss": 17.8192, "step": 354560 }, { "epoch": 0.7162538330700517, "grad_norm": 145.8763885498047, "learning_rate": 2.3631446576089205e-06, "loss": 22.4045, "step": 354570 }, { "epoch": 0.7162740337027356, "grad_norm": 417.7742614746094, "learning_rate": 2.362848085445968e-06, "loss": 17.0611, "step": 354580 }, { "epoch": 0.7162942343354194, "grad_norm": 252.11483764648438, "learning_rate": 2.362551526136173e-06, "loss": 17.7715, "step": 354590 }, { "epoch": 0.7163144349681032, "grad_norm": 89.77654266357422, "learning_rate": 2.3622549796809807e-06, "loss": 20.279, "step": 354600 }, { "epoch": 0.716334635600787, "grad_norm": 1.7783671617507935, "learning_rate": 2.3619584460818397e-06, "loss": 14.7522, "step": 354610 }, { "epoch": 0.7163548362334708, "grad_norm": 206.63169860839844, "learning_rate": 2.3616619253401913e-06, "loss": 7.678, "step": 354620 }, { "epoch": 0.7163750368661547, "grad_norm": 89.13861846923828, "learning_rate": 2.361365417457484e-06, "loss": 14.6398, "step": 354630 }, { "epoch": 0.7163952374988385, "grad_norm": 201.0657196044922, "learning_rate": 2.36106892243516e-06, "loss": 14.6554, "step": 354640 }, { "epoch": 0.7164154381315223, "grad_norm": 465.0264892578125, "learning_rate": 2.3607724402746685e-06, "loss": 12.0094, "step": 354650 }, { "epoch": 0.7164356387642061, "grad_norm": 405.1797790527344, "learning_rate": 2.3604759709774514e-06, "loss": 32.2766, "step": 354660 }, { "epoch": 0.71645583939689, "grad_norm": 294.2959289550781, "learning_rate": 2.3601795145449525e-06, "loss": 14.9119, "step": 354670 }, { "epoch": 0.7164760400295738, "grad_norm": 8.618574142456055, "learning_rate": 2.3598830709786206e-06, "loss": 19.0408, "step": 354680 }, { "epoch": 0.7164962406622576, "grad_norm": 104.5858383178711, "learning_rate": 2.3595866402798983e-06, "loss": 15.4574, "step": 354690 }, { "epoch": 0.7165164412949414, "grad_norm": 605.813720703125, "learning_rate": 2.3592902224502284e-06, "loss": 12.4017, "step": 354700 }, { "epoch": 0.7165366419276252, "grad_norm": 172.08010864257812, "learning_rate": 2.3589938174910577e-06, "loss": 19.7606, "step": 354710 }, { "epoch": 0.7165568425603089, "grad_norm": 356.94781494140625, "learning_rate": 2.3586974254038347e-06, "loss": 15.1911, "step": 354720 }, { "epoch": 0.7165770431929928, "grad_norm": 162.90879821777344, "learning_rate": 2.3584010461899966e-06, "loss": 14.6527, "step": 354730 }, { "epoch": 0.7165972438256766, "grad_norm": 232.06663513183594, "learning_rate": 2.358104679850991e-06, "loss": 11.0932, "step": 354740 }, { "epoch": 0.7166174444583604, "grad_norm": 273.7134094238281, "learning_rate": 2.357808326388265e-06, "loss": 19.5308, "step": 354750 }, { "epoch": 0.7166376450910442, "grad_norm": 104.89708709716797, "learning_rate": 2.3575119858032604e-06, "loss": 32.7084, "step": 354760 }, { "epoch": 0.716657845723728, "grad_norm": 245.3611602783203, "learning_rate": 2.3572156580974205e-06, "loss": 16.8508, "step": 354770 }, { "epoch": 0.7166780463564119, "grad_norm": 807.2177734375, "learning_rate": 2.3569193432721904e-06, "loss": 17.3702, "step": 354780 }, { "epoch": 0.7166982469890957, "grad_norm": 431.61376953125, "learning_rate": 2.3566230413290186e-06, "loss": 24.9729, "step": 354790 }, { "epoch": 0.7167184476217795, "grad_norm": 234.71368408203125, "learning_rate": 2.356326752269342e-06, "loss": 14.2825, "step": 354800 }, { "epoch": 0.7167386482544633, "grad_norm": 100.16107177734375, "learning_rate": 2.356030476094608e-06, "loss": 22.3937, "step": 354810 }, { "epoch": 0.7167588488871471, "grad_norm": 332.8794860839844, "learning_rate": 2.355734212806263e-06, "loss": 18.6385, "step": 354820 }, { "epoch": 0.716779049519831, "grad_norm": 311.058837890625, "learning_rate": 2.3554379624057485e-06, "loss": 37.4643, "step": 354830 }, { "epoch": 0.7167992501525148, "grad_norm": 502.5896911621094, "learning_rate": 2.355141724894507e-06, "loss": 18.2773, "step": 354840 }, { "epoch": 0.7168194507851986, "grad_norm": 194.74864196777344, "learning_rate": 2.354845500273985e-06, "loss": 21.1159, "step": 354850 }, { "epoch": 0.7168396514178824, "grad_norm": 296.8035583496094, "learning_rate": 2.354549288545626e-06, "loss": 18.1475, "step": 354860 }, { "epoch": 0.7168598520505662, "grad_norm": 336.57342529296875, "learning_rate": 2.354253089710871e-06, "loss": 28.6001, "step": 354870 }, { "epoch": 0.7168800526832501, "grad_norm": 142.26649475097656, "learning_rate": 2.3539569037711675e-06, "loss": 9.7356, "step": 354880 }, { "epoch": 0.7169002533159339, "grad_norm": 319.9284362792969, "learning_rate": 2.3536607307279546e-06, "loss": 6.011, "step": 354890 }, { "epoch": 0.7169204539486177, "grad_norm": 580.8311157226562, "learning_rate": 2.353364570582681e-06, "loss": 25.0288, "step": 354900 }, { "epoch": 0.7169406545813015, "grad_norm": 57.09507369995117, "learning_rate": 2.353068423336787e-06, "loss": 12.4054, "step": 354910 }, { "epoch": 0.7169608552139853, "grad_norm": 468.3412170410156, "learning_rate": 2.3527722889917147e-06, "loss": 20.8453, "step": 354920 }, { "epoch": 0.7169810558466692, "grad_norm": 198.58851623535156, "learning_rate": 2.352476167548911e-06, "loss": 10.5139, "step": 354930 }, { "epoch": 0.717001256479353, "grad_norm": 13.878609657287598, "learning_rate": 2.3521800590098153e-06, "loss": 11.1506, "step": 354940 }, { "epoch": 0.7170214571120368, "grad_norm": 122.72370147705078, "learning_rate": 2.351883963375875e-06, "loss": 24.3835, "step": 354950 }, { "epoch": 0.7170416577447206, "grad_norm": 190.20407104492188, "learning_rate": 2.3515878806485292e-06, "loss": 29.9604, "step": 354960 }, { "epoch": 0.7170618583774043, "grad_norm": 543.7489624023438, "learning_rate": 2.351291810829225e-06, "loss": 17.3431, "step": 354970 }, { "epoch": 0.7170820590100881, "grad_norm": 7.788311958312988, "learning_rate": 2.3509957539194028e-06, "loss": 16.1919, "step": 354980 }, { "epoch": 0.717102259642772, "grad_norm": 64.21256256103516, "learning_rate": 2.350699709920504e-06, "loss": 17.0085, "step": 354990 }, { "epoch": 0.7171224602754558, "grad_norm": 169.13629150390625, "learning_rate": 2.3504036788339763e-06, "loss": 17.5118, "step": 355000 }, { "epoch": 0.7171426609081396, "grad_norm": 170.46426391601562, "learning_rate": 2.3501076606612587e-06, "loss": 16.1402, "step": 355010 }, { "epoch": 0.7171628615408234, "grad_norm": 236.36224365234375, "learning_rate": 2.3498116554037937e-06, "loss": 27.8285, "step": 355020 }, { "epoch": 0.7171830621735072, "grad_norm": 143.0253448486328, "learning_rate": 2.349515663063025e-06, "loss": 11.6968, "step": 355030 }, { "epoch": 0.7172032628061911, "grad_norm": 137.09854125976562, "learning_rate": 2.3492196836403974e-06, "loss": 15.3658, "step": 355040 }, { "epoch": 0.7172234634388749, "grad_norm": 196.21775817871094, "learning_rate": 2.348923717137352e-06, "loss": 19.283, "step": 355050 }, { "epoch": 0.7172436640715587, "grad_norm": 301.66864013671875, "learning_rate": 2.3486277635553282e-06, "loss": 17.3696, "step": 355060 }, { "epoch": 0.7172638647042425, "grad_norm": 404.5790100097656, "learning_rate": 2.3483318228957734e-06, "loss": 13.8187, "step": 355070 }, { "epoch": 0.7172840653369263, "grad_norm": 65.60882568359375, "learning_rate": 2.348035895160128e-06, "loss": 10.0269, "step": 355080 }, { "epoch": 0.7173042659696102, "grad_norm": 71.39765930175781, "learning_rate": 2.347739980349831e-06, "loss": 13.9046, "step": 355090 }, { "epoch": 0.717324466602294, "grad_norm": 594.5914916992188, "learning_rate": 2.3474440784663287e-06, "loss": 22.3832, "step": 355100 }, { "epoch": 0.7173446672349778, "grad_norm": 160.2620849609375, "learning_rate": 2.3471481895110643e-06, "loss": 17.3095, "step": 355110 }, { "epoch": 0.7173648678676616, "grad_norm": 322.81036376953125, "learning_rate": 2.346852313485477e-06, "loss": 44.3455, "step": 355120 }, { "epoch": 0.7173850685003454, "grad_norm": 419.8510437011719, "learning_rate": 2.346556450391009e-06, "loss": 17.1731, "step": 355130 }, { "epoch": 0.7174052691330293, "grad_norm": 326.14898681640625, "learning_rate": 2.346260600229104e-06, "loss": 17.7804, "step": 355140 }, { "epoch": 0.7174254697657131, "grad_norm": 167.5782928466797, "learning_rate": 2.3459647630012026e-06, "loss": 19.9829, "step": 355150 }, { "epoch": 0.7174456703983969, "grad_norm": 32.91395950317383, "learning_rate": 2.345668938708746e-06, "loss": 16.4312, "step": 355160 }, { "epoch": 0.7174658710310807, "grad_norm": 99.42413330078125, "learning_rate": 2.345373127353179e-06, "loss": 8.9055, "step": 355170 }, { "epoch": 0.7174860716637645, "grad_norm": 364.1810302734375, "learning_rate": 2.345077328935939e-06, "loss": 24.6566, "step": 355180 }, { "epoch": 0.7175062722964484, "grad_norm": 177.75204467773438, "learning_rate": 2.344781543458472e-06, "loss": 19.8081, "step": 355190 }, { "epoch": 0.7175264729291322, "grad_norm": 38.1491813659668, "learning_rate": 2.344485770922218e-06, "loss": 14.8056, "step": 355200 }, { "epoch": 0.717546673561816, "grad_norm": 0.0, "learning_rate": 2.3441900113286164e-06, "loss": 24.4775, "step": 355210 }, { "epoch": 0.7175668741944998, "grad_norm": 318.7830810546875, "learning_rate": 2.3438942646791125e-06, "loss": 22.2196, "step": 355220 }, { "epoch": 0.7175870748271835, "grad_norm": 102.6142807006836, "learning_rate": 2.3435985309751436e-06, "loss": 28.2446, "step": 355230 }, { "epoch": 0.7176072754598674, "grad_norm": 137.196533203125, "learning_rate": 2.3433028102181553e-06, "loss": 25.7067, "step": 355240 }, { "epoch": 0.7176274760925512, "grad_norm": 543.185791015625, "learning_rate": 2.3430071024095853e-06, "loss": 25.6877, "step": 355250 }, { "epoch": 0.717647676725235, "grad_norm": 257.4171447753906, "learning_rate": 2.3427114075508776e-06, "loss": 18.9436, "step": 355260 }, { "epoch": 0.7176678773579188, "grad_norm": 327.5001525878906, "learning_rate": 2.342415725643473e-06, "loss": 17.9442, "step": 355270 }, { "epoch": 0.7176880779906026, "grad_norm": 230.5413818359375, "learning_rate": 2.3421200566888096e-06, "loss": 12.1092, "step": 355280 }, { "epoch": 0.7177082786232865, "grad_norm": 142.1453399658203, "learning_rate": 2.341824400688333e-06, "loss": 17.2307, "step": 355290 }, { "epoch": 0.7177284792559703, "grad_norm": 400.1680908203125, "learning_rate": 2.3415287576434807e-06, "loss": 18.6483, "step": 355300 }, { "epoch": 0.7177486798886541, "grad_norm": 327.1651306152344, "learning_rate": 2.3412331275556936e-06, "loss": 36.0024, "step": 355310 }, { "epoch": 0.7177688805213379, "grad_norm": 190.54571533203125, "learning_rate": 2.3409375104264144e-06, "loss": 36.4655, "step": 355320 }, { "epoch": 0.7177890811540217, "grad_norm": 157.03826904296875, "learning_rate": 2.340641906257086e-06, "loss": 15.2952, "step": 355330 }, { "epoch": 0.7178092817867056, "grad_norm": 415.89532470703125, "learning_rate": 2.3403463150491434e-06, "loss": 14.4724, "step": 355340 }, { "epoch": 0.7178294824193894, "grad_norm": 514.6676025390625, "learning_rate": 2.34005073680403e-06, "loss": 17.0091, "step": 355350 }, { "epoch": 0.7178496830520732, "grad_norm": 280.961181640625, "learning_rate": 2.3397551715231887e-06, "loss": 19.1272, "step": 355360 }, { "epoch": 0.717869883684757, "grad_norm": 864.1436767578125, "learning_rate": 2.3394596192080575e-06, "loss": 21.7884, "step": 355370 }, { "epoch": 0.7178900843174408, "grad_norm": 400.3459167480469, "learning_rate": 2.3391640798600762e-06, "loss": 20.2554, "step": 355380 }, { "epoch": 0.7179102849501247, "grad_norm": 381.305908203125, "learning_rate": 2.3388685534806865e-06, "loss": 19.4034, "step": 355390 }, { "epoch": 0.7179304855828085, "grad_norm": 204.8538818359375, "learning_rate": 2.338573040071332e-06, "loss": 19.2713, "step": 355400 }, { "epoch": 0.7179506862154923, "grad_norm": 340.0127258300781, "learning_rate": 2.338277539633446e-06, "loss": 18.7788, "step": 355410 }, { "epoch": 0.7179708868481761, "grad_norm": 165.15855407714844, "learning_rate": 2.3379820521684727e-06, "loss": 23.9126, "step": 355420 }, { "epoch": 0.7179910874808599, "grad_norm": 224.77195739746094, "learning_rate": 2.337686577677854e-06, "loss": 12.2019, "step": 355430 }, { "epoch": 0.7180112881135438, "grad_norm": 692.0313110351562, "learning_rate": 2.3373911161630274e-06, "loss": 26.3247, "step": 355440 }, { "epoch": 0.7180314887462276, "grad_norm": 270.647216796875, "learning_rate": 2.3370956676254327e-06, "loss": 26.7632, "step": 355450 }, { "epoch": 0.7180516893789114, "grad_norm": 228.72059631347656, "learning_rate": 2.3368002320665118e-06, "loss": 15.5518, "step": 355460 }, { "epoch": 0.7180718900115952, "grad_norm": 639.113037109375, "learning_rate": 2.3365048094877033e-06, "loss": 26.6053, "step": 355470 }, { "epoch": 0.7180920906442789, "grad_norm": 800.0162353515625, "learning_rate": 2.336209399890446e-06, "loss": 29.9338, "step": 355480 }, { "epoch": 0.7181122912769627, "grad_norm": 104.81077575683594, "learning_rate": 2.3359140032761827e-06, "loss": 15.7749, "step": 355490 }, { "epoch": 0.7181324919096466, "grad_norm": 349.87005615234375, "learning_rate": 2.3356186196463497e-06, "loss": 22.9356, "step": 355500 }, { "epoch": 0.7181526925423304, "grad_norm": 530.3905639648438, "learning_rate": 2.335323249002391e-06, "loss": 15.7555, "step": 355510 }, { "epoch": 0.7181728931750142, "grad_norm": 488.87469482421875, "learning_rate": 2.3350278913457404e-06, "loss": 15.9624, "step": 355520 }, { "epoch": 0.718193093807698, "grad_norm": 381.0428161621094, "learning_rate": 2.334732546677843e-06, "loss": 19.2444, "step": 355530 }, { "epoch": 0.7182132944403818, "grad_norm": 271.8262939453125, "learning_rate": 2.334437215000136e-06, "loss": 9.1133, "step": 355540 }, { "epoch": 0.7182334950730657, "grad_norm": 876.6840209960938, "learning_rate": 2.334141896314057e-06, "loss": 13.2906, "step": 355550 }, { "epoch": 0.7182536957057495, "grad_norm": 410.1135559082031, "learning_rate": 2.333846590621049e-06, "loss": 27.2027, "step": 355560 }, { "epoch": 0.7182738963384333, "grad_norm": 40.40521240234375, "learning_rate": 2.333551297922547e-06, "loss": 21.0658, "step": 355570 }, { "epoch": 0.7182940969711171, "grad_norm": 258.37713623046875, "learning_rate": 2.333256018219995e-06, "loss": 12.5867, "step": 355580 }, { "epoch": 0.7183142976038009, "grad_norm": 2.117507219314575, "learning_rate": 2.3329607515148287e-06, "loss": 27.9023, "step": 355590 }, { "epoch": 0.7183344982364848, "grad_norm": 242.10740661621094, "learning_rate": 2.3326654978084872e-06, "loss": 19.9327, "step": 355600 }, { "epoch": 0.7183546988691686, "grad_norm": 29.936824798583984, "learning_rate": 2.3323702571024125e-06, "loss": 14.2646, "step": 355610 }, { "epoch": 0.7183748995018524, "grad_norm": 299.51361083984375, "learning_rate": 2.3320750293980416e-06, "loss": 18.7445, "step": 355620 }, { "epoch": 0.7183951001345362, "grad_norm": 551.1920166015625, "learning_rate": 2.3317798146968113e-06, "loss": 22.4605, "step": 355630 }, { "epoch": 0.71841530076722, "grad_norm": 209.5087127685547, "learning_rate": 2.3314846130001622e-06, "loss": 13.3467, "step": 355640 }, { "epoch": 0.7184355013999039, "grad_norm": 314.8741149902344, "learning_rate": 2.3311894243095363e-06, "loss": 14.4373, "step": 355650 }, { "epoch": 0.7184557020325877, "grad_norm": 229.1568145751953, "learning_rate": 2.3308942486263685e-06, "loss": 17.6382, "step": 355660 }, { "epoch": 0.7184759026652715, "grad_norm": 90.40312194824219, "learning_rate": 2.3305990859520974e-06, "loss": 11.6542, "step": 355670 }, { "epoch": 0.7184961032979553, "grad_norm": 154.19496154785156, "learning_rate": 2.3303039362881634e-06, "loss": 18.7662, "step": 355680 }, { "epoch": 0.7185163039306391, "grad_norm": 262.4820251464844, "learning_rate": 2.3300087996360053e-06, "loss": 21.7533, "step": 355690 }, { "epoch": 0.718536504563323, "grad_norm": 556.9293823242188, "learning_rate": 2.329713675997058e-06, "loss": 19.841, "step": 355700 }, { "epoch": 0.7185567051960068, "grad_norm": 359.2424621582031, "learning_rate": 2.3294185653727623e-06, "loss": 30.0503, "step": 355710 }, { "epoch": 0.7185769058286906, "grad_norm": 125.88587188720703, "learning_rate": 2.329123467764559e-06, "loss": 7.1373, "step": 355720 }, { "epoch": 0.7185971064613744, "grad_norm": 345.49530029296875, "learning_rate": 2.3288283831738834e-06, "loss": 24.5578, "step": 355730 }, { "epoch": 0.7186173070940581, "grad_norm": 494.84930419921875, "learning_rate": 2.328533311602173e-06, "loss": 24.5741, "step": 355740 }, { "epoch": 0.718637507726742, "grad_norm": 76.14154815673828, "learning_rate": 2.3282382530508683e-06, "loss": 18.5573, "step": 355750 }, { "epoch": 0.7186577083594258, "grad_norm": 36.37162780761719, "learning_rate": 2.327943207521407e-06, "loss": 17.8213, "step": 355760 }, { "epoch": 0.7186779089921096, "grad_norm": 235.41998291015625, "learning_rate": 2.3276481750152245e-06, "loss": 12.3597, "step": 355770 }, { "epoch": 0.7186981096247934, "grad_norm": 226.21112060546875, "learning_rate": 2.3273531555337624e-06, "loss": 17.2089, "step": 355780 }, { "epoch": 0.7187183102574772, "grad_norm": 456.3151550292969, "learning_rate": 2.327058149078455e-06, "loss": 20.9525, "step": 355790 }, { "epoch": 0.718738510890161, "grad_norm": 142.62860107421875, "learning_rate": 2.3267631556507443e-06, "loss": 23.3455, "step": 355800 }, { "epoch": 0.7187587115228449, "grad_norm": 238.02566528320312, "learning_rate": 2.326468175252065e-06, "loss": 18.0613, "step": 355810 }, { "epoch": 0.7187789121555287, "grad_norm": 227.1192626953125, "learning_rate": 2.326173207883854e-06, "loss": 20.6126, "step": 355820 }, { "epoch": 0.7187991127882125, "grad_norm": 510.2124328613281, "learning_rate": 2.325878253547552e-06, "loss": 21.3357, "step": 355830 }, { "epoch": 0.7188193134208963, "grad_norm": 263.8762512207031, "learning_rate": 2.3255833122445937e-06, "loss": 20.2004, "step": 355840 }, { "epoch": 0.7188395140535802, "grad_norm": 422.0729675292969, "learning_rate": 2.32528838397642e-06, "loss": 22.8262, "step": 355850 }, { "epoch": 0.718859714686264, "grad_norm": 315.4471740722656, "learning_rate": 2.3249934687444642e-06, "loss": 20.9653, "step": 355860 }, { "epoch": 0.7188799153189478, "grad_norm": 189.99371337890625, "learning_rate": 2.3246985665501674e-06, "loss": 22.349, "step": 355870 }, { "epoch": 0.7189001159516316, "grad_norm": 385.8592224121094, "learning_rate": 2.3244036773949658e-06, "loss": 18.8033, "step": 355880 }, { "epoch": 0.7189203165843154, "grad_norm": 334.801025390625, "learning_rate": 2.3241088012802938e-06, "loss": 15.2655, "step": 355890 }, { "epoch": 0.7189405172169993, "grad_norm": 410.3312683105469, "learning_rate": 2.323813938207593e-06, "loss": 14.3538, "step": 355900 }, { "epoch": 0.7189607178496831, "grad_norm": 140.47235107421875, "learning_rate": 2.323519088178299e-06, "loss": 11.6279, "step": 355910 }, { "epoch": 0.7189809184823669, "grad_norm": 390.5113220214844, "learning_rate": 2.3232242511938452e-06, "loss": 19.8344, "step": 355920 }, { "epoch": 0.7190011191150507, "grad_norm": 2609.6708984375, "learning_rate": 2.322929427255673e-06, "loss": 49.692, "step": 355930 }, { "epoch": 0.7190213197477345, "grad_norm": 41.339778900146484, "learning_rate": 2.3226346163652213e-06, "loss": 9.447, "step": 355940 }, { "epoch": 0.7190415203804184, "grad_norm": 259.9169921875, "learning_rate": 2.32233981852392e-06, "loss": 18.7707, "step": 355950 }, { "epoch": 0.7190617210131022, "grad_norm": 14.54073429107666, "learning_rate": 2.3220450337332097e-06, "loss": 13.8515, "step": 355960 }, { "epoch": 0.719081921645786, "grad_norm": 32.974342346191406, "learning_rate": 2.321750261994529e-06, "loss": 14.8644, "step": 355970 }, { "epoch": 0.7191021222784698, "grad_norm": 403.5469665527344, "learning_rate": 2.321455503309313e-06, "loss": 11.0765, "step": 355980 }, { "epoch": 0.7191223229111535, "grad_norm": 68.36952209472656, "learning_rate": 2.3211607576789958e-06, "loss": 15.0253, "step": 355990 }, { "epoch": 0.7191425235438373, "grad_norm": 104.4713134765625, "learning_rate": 2.320866025105016e-06, "loss": 19.6905, "step": 356000 }, { "epoch": 0.7191627241765212, "grad_norm": 317.15625, "learning_rate": 2.320571305588814e-06, "loss": 22.8881, "step": 356010 }, { "epoch": 0.719182924809205, "grad_norm": 1018.44091796875, "learning_rate": 2.3202765991318195e-06, "loss": 12.6107, "step": 356020 }, { "epoch": 0.7192031254418888, "grad_norm": 12.036026954650879, "learning_rate": 2.3199819057354712e-06, "loss": 24.2663, "step": 356030 }, { "epoch": 0.7192233260745726, "grad_norm": 263.9620056152344, "learning_rate": 2.3196872254012075e-06, "loss": 30.5985, "step": 356040 }, { "epoch": 0.7192435267072564, "grad_norm": 348.8106689453125, "learning_rate": 2.319392558130464e-06, "loss": 24.131, "step": 356050 }, { "epoch": 0.7192637273399403, "grad_norm": 284.87884521484375, "learning_rate": 2.3190979039246738e-06, "loss": 25.1836, "step": 356060 }, { "epoch": 0.7192839279726241, "grad_norm": 196.61846923828125, "learning_rate": 2.318803262785277e-06, "loss": 22.8368, "step": 356070 }, { "epoch": 0.7193041286053079, "grad_norm": 238.06700134277344, "learning_rate": 2.318508634713708e-06, "loss": 16.3864, "step": 356080 }, { "epoch": 0.7193243292379917, "grad_norm": 680.2268676757812, "learning_rate": 2.318214019711401e-06, "loss": 22.7759, "step": 356090 }, { "epoch": 0.7193445298706755, "grad_norm": 509.99310302734375, "learning_rate": 2.3179194177797954e-06, "loss": 12.396, "step": 356100 }, { "epoch": 0.7193647305033594, "grad_norm": 295.4245910644531, "learning_rate": 2.3176248289203237e-06, "loss": 20.8813, "step": 356110 }, { "epoch": 0.7193849311360432, "grad_norm": 316.9297790527344, "learning_rate": 2.3173302531344243e-06, "loss": 16.3086, "step": 356120 }, { "epoch": 0.719405131768727, "grad_norm": 395.9407958984375, "learning_rate": 2.317035690423531e-06, "loss": 19.1438, "step": 356130 }, { "epoch": 0.7194253324014108, "grad_norm": 155.4447021484375, "learning_rate": 2.3167411407890814e-06, "loss": 23.5283, "step": 356140 }, { "epoch": 0.7194455330340946, "grad_norm": 412.4838562011719, "learning_rate": 2.3164466042325106e-06, "loss": 15.8925, "step": 356150 }, { "epoch": 0.7194657336667785, "grad_norm": 201.9062042236328, "learning_rate": 2.316152080755251e-06, "loss": 21.3347, "step": 356160 }, { "epoch": 0.7194859342994623, "grad_norm": 20.51656723022461, "learning_rate": 2.315857570358743e-06, "loss": 16.2291, "step": 356170 }, { "epoch": 0.7195061349321461, "grad_norm": 234.8279266357422, "learning_rate": 2.3155630730444185e-06, "loss": 12.0799, "step": 356180 }, { "epoch": 0.7195263355648299, "grad_norm": 312.28240966796875, "learning_rate": 2.315268588813715e-06, "loss": 11.6614, "step": 356190 }, { "epoch": 0.7195465361975137, "grad_norm": 0.8734074831008911, "learning_rate": 2.3149741176680666e-06, "loss": 9.7195, "step": 356200 }, { "epoch": 0.7195667368301976, "grad_norm": 170.59036254882812, "learning_rate": 2.314679659608907e-06, "loss": 34.5746, "step": 356210 }, { "epoch": 0.7195869374628814, "grad_norm": 244.38009643554688, "learning_rate": 2.3143852146376754e-06, "loss": 29.7209, "step": 356220 }, { "epoch": 0.7196071380955652, "grad_norm": 703.2997436523438, "learning_rate": 2.314090782755804e-06, "loss": 15.7573, "step": 356230 }, { "epoch": 0.719627338728249, "grad_norm": 644.5662841796875, "learning_rate": 2.313796363964727e-06, "loss": 38.2722, "step": 356240 }, { "epoch": 0.7196475393609327, "grad_norm": 591.7333374023438, "learning_rate": 2.3135019582658803e-06, "loss": 21.339, "step": 356250 }, { "epoch": 0.7196677399936166, "grad_norm": 243.1295623779297, "learning_rate": 2.3132075656607034e-06, "loss": 10.279, "step": 356260 }, { "epoch": 0.7196879406263004, "grad_norm": 103.611572265625, "learning_rate": 2.3129131861506225e-06, "loss": 24.62, "step": 356270 }, { "epoch": 0.7197081412589842, "grad_norm": 328.0987548828125, "learning_rate": 2.3126188197370773e-06, "loss": 18.2817, "step": 356280 }, { "epoch": 0.719728341891668, "grad_norm": 273.7485656738281, "learning_rate": 2.312324466421504e-06, "loss": 20.5311, "step": 356290 }, { "epoch": 0.7197485425243518, "grad_norm": 338.8016357421875, "learning_rate": 2.312030126205335e-06, "loss": 8.4978, "step": 356300 }, { "epoch": 0.7197687431570357, "grad_norm": 298.8153381347656, "learning_rate": 2.3117357990900034e-06, "loss": 24.1243, "step": 356310 }, { "epoch": 0.7197889437897195, "grad_norm": 95.53570556640625, "learning_rate": 2.3114414850769458e-06, "loss": 14.4008, "step": 356320 }, { "epoch": 0.7198091444224033, "grad_norm": 143.20953369140625, "learning_rate": 2.3111471841675993e-06, "loss": 19.1132, "step": 356330 }, { "epoch": 0.7198293450550871, "grad_norm": 427.0244140625, "learning_rate": 2.310852896363392e-06, "loss": 14.0407, "step": 356340 }, { "epoch": 0.7198495456877709, "grad_norm": 415.5598449707031, "learning_rate": 2.3105586216657616e-06, "loss": 26.9595, "step": 356350 }, { "epoch": 0.7198697463204548, "grad_norm": 366.85296630859375, "learning_rate": 2.3102643600761445e-06, "loss": 35.1391, "step": 356360 }, { "epoch": 0.7198899469531386, "grad_norm": 101.57357788085938, "learning_rate": 2.3099701115959715e-06, "loss": 10.3231, "step": 356370 }, { "epoch": 0.7199101475858224, "grad_norm": 150.08840942382812, "learning_rate": 2.309675876226677e-06, "loss": 11.2345, "step": 356380 }, { "epoch": 0.7199303482185062, "grad_norm": 274.6263122558594, "learning_rate": 2.309381653969698e-06, "loss": 29.2011, "step": 356390 }, { "epoch": 0.71995054885119, "grad_norm": 120.92787170410156, "learning_rate": 2.309087444826464e-06, "loss": 10.1864, "step": 356400 }, { "epoch": 0.7199707494838739, "grad_norm": 407.27337646484375, "learning_rate": 2.308793248798414e-06, "loss": 19.4523, "step": 356410 }, { "epoch": 0.7199909501165577, "grad_norm": 304.27410888671875, "learning_rate": 2.308499065886978e-06, "loss": 24.299, "step": 356420 }, { "epoch": 0.7200111507492415, "grad_norm": 195.33270263671875, "learning_rate": 2.3082048960935905e-06, "loss": 9.7156, "step": 356430 }, { "epoch": 0.7200313513819253, "grad_norm": 313.06610107421875, "learning_rate": 2.3079107394196875e-06, "loss": 8.2572, "step": 356440 }, { "epoch": 0.7200515520146091, "grad_norm": 128.92010498046875, "learning_rate": 2.3076165958666992e-06, "loss": 10.423, "step": 356450 }, { "epoch": 0.720071752647293, "grad_norm": 256.7684631347656, "learning_rate": 2.3073224654360627e-06, "loss": 19.4092, "step": 356460 }, { "epoch": 0.7200919532799768, "grad_norm": 158.2165069580078, "learning_rate": 2.3070283481292077e-06, "loss": 13.9447, "step": 356470 }, { "epoch": 0.7201121539126606, "grad_norm": 366.3782958984375, "learning_rate": 2.306734243947572e-06, "loss": 25.5881, "step": 356480 }, { "epoch": 0.7201323545453444, "grad_norm": 237.52078247070312, "learning_rate": 2.3064401528925872e-06, "loss": 14.946, "step": 356490 }, { "epoch": 0.7201525551780282, "grad_norm": 107.8667984008789, "learning_rate": 2.3061460749656844e-06, "loss": 12.5879, "step": 356500 }, { "epoch": 0.7201727558107119, "grad_norm": 60.819034576416016, "learning_rate": 2.3058520101683012e-06, "loss": 19.821, "step": 356510 }, { "epoch": 0.7201929564433958, "grad_norm": 368.0421447753906, "learning_rate": 2.3055579585018685e-06, "loss": 18.6961, "step": 356520 }, { "epoch": 0.7202131570760796, "grad_norm": 199.55715942382812, "learning_rate": 2.3052639199678167e-06, "loss": 13.0571, "step": 356530 }, { "epoch": 0.7202333577087634, "grad_norm": 397.94793701171875, "learning_rate": 2.3049698945675826e-06, "loss": 14.2328, "step": 356540 }, { "epoch": 0.7202535583414472, "grad_norm": 52.55872344970703, "learning_rate": 2.3046758823026018e-06, "loss": 13.1545, "step": 356550 }, { "epoch": 0.720273758974131, "grad_norm": 344.5338134765625, "learning_rate": 2.3043818831743003e-06, "loss": 12.0586, "step": 356560 }, { "epoch": 0.7202939596068149, "grad_norm": 408.9903869628906, "learning_rate": 2.304087897184114e-06, "loss": 25.4751, "step": 356570 }, { "epoch": 0.7203141602394987, "grad_norm": 6.053652763366699, "learning_rate": 2.303793924333479e-06, "loss": 24.0125, "step": 356580 }, { "epoch": 0.7203343608721825, "grad_norm": 314.3376159667969, "learning_rate": 2.303499964623825e-06, "loss": 15.138, "step": 356590 }, { "epoch": 0.7203545615048663, "grad_norm": 235.4812774658203, "learning_rate": 2.303206018056583e-06, "loss": 13.1227, "step": 356600 }, { "epoch": 0.7203747621375501, "grad_norm": 337.00640869140625, "learning_rate": 2.3029120846331883e-06, "loss": 17.2821, "step": 356610 }, { "epoch": 0.720394962770234, "grad_norm": 40.25985336303711, "learning_rate": 2.3026181643550767e-06, "loss": 25.2829, "step": 356620 }, { "epoch": 0.7204151634029178, "grad_norm": 256.7934265136719, "learning_rate": 2.3023242572236728e-06, "loss": 21.2834, "step": 356630 }, { "epoch": 0.7204353640356016, "grad_norm": 37.199493408203125, "learning_rate": 2.3020303632404132e-06, "loss": 13.9729, "step": 356640 }, { "epoch": 0.7204555646682854, "grad_norm": 1094.634521484375, "learning_rate": 2.301736482406733e-06, "loss": 26.9079, "step": 356650 }, { "epoch": 0.7204757653009692, "grad_norm": 513.7996826171875, "learning_rate": 2.3014426147240614e-06, "loss": 12.1929, "step": 356660 }, { "epoch": 0.7204959659336531, "grad_norm": 293.4723205566406, "learning_rate": 2.3011487601938292e-06, "loss": 15.7683, "step": 356670 }, { "epoch": 0.7205161665663369, "grad_norm": 163.67262268066406, "learning_rate": 2.3008549188174728e-06, "loss": 17.3491, "step": 356680 }, { "epoch": 0.7205363671990207, "grad_norm": 187.08697509765625, "learning_rate": 2.300561090596422e-06, "loss": 13.4877, "step": 356690 }, { "epoch": 0.7205565678317045, "grad_norm": 214.4397735595703, "learning_rate": 2.3002672755321076e-06, "loss": 15.3683, "step": 356700 }, { "epoch": 0.7205767684643883, "grad_norm": 267.14471435546875, "learning_rate": 2.2999734736259644e-06, "loss": 12.2271, "step": 356710 }, { "epoch": 0.7205969690970722, "grad_norm": 264.2240905761719, "learning_rate": 2.299679684879421e-06, "loss": 30.0065, "step": 356720 }, { "epoch": 0.720617169729756, "grad_norm": 200.52139282226562, "learning_rate": 2.2993859092939136e-06, "loss": 12.759, "step": 356730 }, { "epoch": 0.7206373703624398, "grad_norm": 522.8004760742188, "learning_rate": 2.29909214687087e-06, "loss": 17.2499, "step": 356740 }, { "epoch": 0.7206575709951236, "grad_norm": 353.4561462402344, "learning_rate": 2.298798397611725e-06, "loss": 19.4124, "step": 356750 }, { "epoch": 0.7206777716278073, "grad_norm": 7.217230319976807, "learning_rate": 2.2985046615179098e-06, "loss": 4.3016, "step": 356760 }, { "epoch": 0.7206979722604911, "grad_norm": 179.41905212402344, "learning_rate": 2.2982109385908524e-06, "loss": 14.8691, "step": 356770 }, { "epoch": 0.720718172893175, "grad_norm": 222.937744140625, "learning_rate": 2.29791722883199e-06, "loss": 16.0485, "step": 356780 }, { "epoch": 0.7207383735258588, "grad_norm": 334.99066162109375, "learning_rate": 2.2976235322427487e-06, "loss": 42.0555, "step": 356790 }, { "epoch": 0.7207585741585426, "grad_norm": 174.93997192382812, "learning_rate": 2.297329848824565e-06, "loss": 9.1868, "step": 356800 }, { "epoch": 0.7207787747912264, "grad_norm": 194.0845947265625, "learning_rate": 2.2970361785788673e-06, "loss": 19.9425, "step": 356810 }, { "epoch": 0.7207989754239102, "grad_norm": 134.17715454101562, "learning_rate": 2.296742521507086e-06, "loss": 9.7291, "step": 356820 }, { "epoch": 0.7208191760565941, "grad_norm": 534.063232421875, "learning_rate": 2.296448877610655e-06, "loss": 16.5421, "step": 356830 }, { "epoch": 0.7208393766892779, "grad_norm": 171.25244140625, "learning_rate": 2.2961552468910048e-06, "loss": 17.6212, "step": 356840 }, { "epoch": 0.7208595773219617, "grad_norm": 152.2783203125, "learning_rate": 2.295861629349564e-06, "loss": 15.0755, "step": 356850 }, { "epoch": 0.7208797779546455, "grad_norm": 536.8139038085938, "learning_rate": 2.295568024987766e-06, "loss": 34.1357, "step": 356860 }, { "epoch": 0.7208999785873293, "grad_norm": 371.63909912109375, "learning_rate": 2.2952744338070447e-06, "loss": 16.7012, "step": 356870 }, { "epoch": 0.7209201792200132, "grad_norm": 563.446044921875, "learning_rate": 2.2949808558088243e-06, "loss": 21.8304, "step": 356880 }, { "epoch": 0.720940379852697, "grad_norm": 118.25423431396484, "learning_rate": 2.2946872909945387e-06, "loss": 80.0601, "step": 356890 }, { "epoch": 0.7209605804853808, "grad_norm": 104.51567077636719, "learning_rate": 2.294393739365621e-06, "loss": 29.6952, "step": 356900 }, { "epoch": 0.7209807811180646, "grad_norm": 150.73060607910156, "learning_rate": 2.294100200923501e-06, "loss": 21.8762, "step": 356910 }, { "epoch": 0.7210009817507484, "grad_norm": 331.5848388671875, "learning_rate": 2.293806675669606e-06, "loss": 19.1218, "step": 356920 }, { "epoch": 0.7210211823834323, "grad_norm": 115.88788604736328, "learning_rate": 2.2935131636053687e-06, "loss": 12.245, "step": 356930 }, { "epoch": 0.7210413830161161, "grad_norm": 84.21212005615234, "learning_rate": 2.293219664732224e-06, "loss": 16.9377, "step": 356940 }, { "epoch": 0.7210615836487999, "grad_norm": 117.29344940185547, "learning_rate": 2.2929261790515944e-06, "loss": 16.7247, "step": 356950 }, { "epoch": 0.7210817842814837, "grad_norm": 238.5404510498047, "learning_rate": 2.2926327065649144e-06, "loss": 10.0556, "step": 356960 }, { "epoch": 0.7211019849141675, "grad_norm": 1458.4068603515625, "learning_rate": 2.292339247273617e-06, "loss": 34.5001, "step": 356970 }, { "epoch": 0.7211221855468514, "grad_norm": 94.7646255493164, "learning_rate": 2.2920458011791287e-06, "loss": 5.7025, "step": 356980 }, { "epoch": 0.7211423861795352, "grad_norm": 227.38778686523438, "learning_rate": 2.291752368282879e-06, "loss": 21.5338, "step": 356990 }, { "epoch": 0.721162586812219, "grad_norm": 241.6106719970703, "learning_rate": 2.2914589485863015e-06, "loss": 27.8222, "step": 357000 }, { "epoch": 0.7211827874449028, "grad_norm": 394.4272766113281, "learning_rate": 2.2911655420908247e-06, "loss": 20.3868, "step": 357010 }, { "epoch": 0.7212029880775865, "grad_norm": 182.5919952392578, "learning_rate": 2.290872148797877e-06, "loss": 18.9027, "step": 357020 }, { "epoch": 0.7212231887102704, "grad_norm": 204.43896484375, "learning_rate": 2.2905787687088914e-06, "loss": 13.5134, "step": 357030 }, { "epoch": 0.7212433893429542, "grad_norm": 431.2008361816406, "learning_rate": 2.2902854018252945e-06, "loss": 16.1125, "step": 357040 }, { "epoch": 0.721263589975638, "grad_norm": 361.4604797363281, "learning_rate": 2.2899920481485192e-06, "loss": 26.3758, "step": 357050 }, { "epoch": 0.7212837906083218, "grad_norm": 618.9961547851562, "learning_rate": 2.2896987076799933e-06, "loss": 22.6974, "step": 357060 }, { "epoch": 0.7213039912410056, "grad_norm": 257.6907958984375, "learning_rate": 2.289405380421148e-06, "loss": 20.3775, "step": 357070 }, { "epoch": 0.7213241918736895, "grad_norm": 278.0897521972656, "learning_rate": 2.289112066373411e-06, "loss": 13.9974, "step": 357080 }, { "epoch": 0.7213443925063733, "grad_norm": 276.6914367675781, "learning_rate": 2.2888187655382143e-06, "loss": 10.2706, "step": 357090 }, { "epoch": 0.7213645931390571, "grad_norm": 377.61602783203125, "learning_rate": 2.288525477916986e-06, "loss": 15.6032, "step": 357100 }, { "epoch": 0.7213847937717409, "grad_norm": 483.26263427734375, "learning_rate": 2.2882322035111543e-06, "loss": 13.3426, "step": 357110 }, { "epoch": 0.7214049944044247, "grad_norm": 564.2094116210938, "learning_rate": 2.2879389423221514e-06, "loss": 16.1819, "step": 357120 }, { "epoch": 0.7214251950371086, "grad_norm": 316.120361328125, "learning_rate": 2.287645694351406e-06, "loss": 28.6746, "step": 357130 }, { "epoch": 0.7214453956697924, "grad_norm": 290.21392822265625, "learning_rate": 2.287352459600344e-06, "loss": 9.7208, "step": 357140 }, { "epoch": 0.7214655963024762, "grad_norm": 339.1097717285156, "learning_rate": 2.287059238070397e-06, "loss": 12.8544, "step": 357150 }, { "epoch": 0.72148579693516, "grad_norm": 3.0314667224884033, "learning_rate": 2.2867660297629977e-06, "loss": 14.2467, "step": 357160 }, { "epoch": 0.7215059975678438, "grad_norm": 54.69688034057617, "learning_rate": 2.2864728346795686e-06, "loss": 13.4124, "step": 357170 }, { "epoch": 0.7215261982005277, "grad_norm": 184.17947387695312, "learning_rate": 2.286179652821542e-06, "loss": 23.8344, "step": 357180 }, { "epoch": 0.7215463988332115, "grad_norm": 409.2073974609375, "learning_rate": 2.285886484190348e-06, "loss": 20.9984, "step": 357190 }, { "epoch": 0.7215665994658953, "grad_norm": 17.507165908813477, "learning_rate": 2.285593328787414e-06, "loss": 16.3525, "step": 357200 }, { "epoch": 0.7215868000985791, "grad_norm": 177.25448608398438, "learning_rate": 2.285300186614167e-06, "loss": 13.5711, "step": 357210 }, { "epoch": 0.7216070007312629, "grad_norm": 365.2554626464844, "learning_rate": 2.285007057672038e-06, "loss": 18.6931, "step": 357220 }, { "epoch": 0.7216272013639468, "grad_norm": 239.2264862060547, "learning_rate": 2.2847139419624585e-06, "loss": 20.3205, "step": 357230 }, { "epoch": 0.7216474019966306, "grad_norm": 137.36053466796875, "learning_rate": 2.2844208394868504e-06, "loss": 13.1116, "step": 357240 }, { "epoch": 0.7216676026293144, "grad_norm": 475.0229187011719, "learning_rate": 2.284127750246646e-06, "loss": 13.9642, "step": 357250 }, { "epoch": 0.7216878032619982, "grad_norm": 336.03277587890625, "learning_rate": 2.2838346742432753e-06, "loss": 24.3037, "step": 357260 }, { "epoch": 0.7217080038946819, "grad_norm": 252.24588012695312, "learning_rate": 2.283541611478165e-06, "loss": 7.7413, "step": 357270 }, { "epoch": 0.7217282045273657, "grad_norm": 206.38668823242188, "learning_rate": 2.2832485619527417e-06, "loss": 13.8762, "step": 357280 }, { "epoch": 0.7217484051600496, "grad_norm": 487.6889953613281, "learning_rate": 2.2829555256684372e-06, "loss": 22.7098, "step": 357290 }, { "epoch": 0.7217686057927334, "grad_norm": 300.626953125, "learning_rate": 2.282662502626678e-06, "loss": 13.1918, "step": 357300 }, { "epoch": 0.7217888064254172, "grad_norm": 169.10415649414062, "learning_rate": 2.2823694928288897e-06, "loss": 17.0417, "step": 357310 }, { "epoch": 0.721809007058101, "grad_norm": 218.5157470703125, "learning_rate": 2.282076496276506e-06, "loss": 12.2256, "step": 357320 }, { "epoch": 0.7218292076907848, "grad_norm": 508.5581970214844, "learning_rate": 2.2817835129709486e-06, "loss": 13.9216, "step": 357330 }, { "epoch": 0.7218494083234687, "grad_norm": 379.67974853515625, "learning_rate": 2.2814905429136515e-06, "loss": 16.1941, "step": 357340 }, { "epoch": 0.7218696089561525, "grad_norm": 220.76409912109375, "learning_rate": 2.281197586106037e-06, "loss": 33.5465, "step": 357350 }, { "epoch": 0.7218898095888363, "grad_norm": 195.1992645263672, "learning_rate": 2.2809046425495386e-06, "loss": 22.3215, "step": 357360 }, { "epoch": 0.7219100102215201, "grad_norm": 98.57269287109375, "learning_rate": 2.2806117122455806e-06, "loss": 12.7751, "step": 357370 }, { "epoch": 0.721930210854204, "grad_norm": 178.23912048339844, "learning_rate": 2.280318795195589e-06, "loss": 23.0532, "step": 357380 }, { "epoch": 0.7219504114868878, "grad_norm": 576.5174560546875, "learning_rate": 2.2800258914009966e-06, "loss": 24.273, "step": 357390 }, { "epoch": 0.7219706121195716, "grad_norm": 146.30963134765625, "learning_rate": 2.2797330008632255e-06, "loss": 9.3606, "step": 357400 }, { "epoch": 0.7219908127522554, "grad_norm": 17.506610870361328, "learning_rate": 2.2794401235837083e-06, "loss": 11.6243, "step": 357410 }, { "epoch": 0.7220110133849392, "grad_norm": 207.46640014648438, "learning_rate": 2.2791472595638693e-06, "loss": 19.4371, "step": 357420 }, { "epoch": 0.722031214017623, "grad_norm": 190.5224151611328, "learning_rate": 2.278854408805135e-06, "loss": 13.9949, "step": 357430 }, { "epoch": 0.7220514146503069, "grad_norm": 284.9352111816406, "learning_rate": 2.2785615713089363e-06, "loss": 21.7524, "step": 357440 }, { "epoch": 0.7220716152829907, "grad_norm": 289.9217834472656, "learning_rate": 2.2782687470766985e-06, "loss": 19.7832, "step": 357450 }, { "epoch": 0.7220918159156745, "grad_norm": 384.205810546875, "learning_rate": 2.277975936109846e-06, "loss": 15.4378, "step": 357460 }, { "epoch": 0.7221120165483583, "grad_norm": 172.30145263671875, "learning_rate": 2.2776831384098096e-06, "loss": 15.0402, "step": 357470 }, { "epoch": 0.7221322171810421, "grad_norm": 76.46100616455078, "learning_rate": 2.277390353978019e-06, "loss": 13.4709, "step": 357480 }, { "epoch": 0.722152417813726, "grad_norm": 258.3019714355469, "learning_rate": 2.2770975828158936e-06, "loss": 18.0509, "step": 357490 }, { "epoch": 0.7221726184464098, "grad_norm": 182.73471069335938, "learning_rate": 2.2768048249248648e-06, "loss": 9.1743, "step": 357500 }, { "epoch": 0.7221928190790936, "grad_norm": 145.87364196777344, "learning_rate": 2.27651208030636e-06, "loss": 20.1633, "step": 357510 }, { "epoch": 0.7222130197117774, "grad_norm": 344.2505187988281, "learning_rate": 2.2762193489618057e-06, "loss": 26.6334, "step": 357520 }, { "epoch": 0.7222332203444611, "grad_norm": 160.7799530029297, "learning_rate": 2.2759266308926257e-06, "loss": 14.6848, "step": 357530 }, { "epoch": 0.722253420977145, "grad_norm": 220.37721252441406, "learning_rate": 2.275633926100249e-06, "loss": 17.4599, "step": 357540 }, { "epoch": 0.7222736216098288, "grad_norm": 3313.99169921875, "learning_rate": 2.2753412345861065e-06, "loss": 34.1575, "step": 357550 }, { "epoch": 0.7222938222425126, "grad_norm": 163.14268493652344, "learning_rate": 2.2750485563516154e-06, "loss": 20.2219, "step": 357560 }, { "epoch": 0.7223140228751964, "grad_norm": 125.36463165283203, "learning_rate": 2.2747558913982084e-06, "loss": 22.8801, "step": 357570 }, { "epoch": 0.7223342235078802, "grad_norm": 1687.087158203125, "learning_rate": 2.2744632397273113e-06, "loss": 36.2431, "step": 357580 }, { "epoch": 0.722354424140564, "grad_norm": 304.7818298339844, "learning_rate": 2.2741706013403507e-06, "loss": 17.3731, "step": 357590 }, { "epoch": 0.7223746247732479, "grad_norm": 15.944353103637695, "learning_rate": 2.27387797623875e-06, "loss": 12.2739, "step": 357600 }, { "epoch": 0.7223948254059317, "grad_norm": 317.0122375488281, "learning_rate": 2.273585364423939e-06, "loss": 19.7602, "step": 357610 }, { "epoch": 0.7224150260386155, "grad_norm": 307.92913818359375, "learning_rate": 2.2732927658973427e-06, "loss": 24.5959, "step": 357620 }, { "epoch": 0.7224352266712993, "grad_norm": 0.0, "learning_rate": 2.273000180660384e-06, "loss": 23.0615, "step": 357630 }, { "epoch": 0.7224554273039832, "grad_norm": 224.8555450439453, "learning_rate": 2.272707608714493e-06, "loss": 32.384, "step": 357640 }, { "epoch": 0.722475627936667, "grad_norm": 0.6432770490646362, "learning_rate": 2.2724150500610946e-06, "loss": 26.7456, "step": 357650 }, { "epoch": 0.7224958285693508, "grad_norm": 191.19839477539062, "learning_rate": 2.2721225047016153e-06, "loss": 13.2417, "step": 357660 }, { "epoch": 0.7225160292020346, "grad_norm": 87.68319702148438, "learning_rate": 2.2718299726374786e-06, "loss": 11.88, "step": 357670 }, { "epoch": 0.7225362298347184, "grad_norm": 146.5957794189453, "learning_rate": 2.271537453870113e-06, "loss": 16.4589, "step": 357680 }, { "epoch": 0.7225564304674023, "grad_norm": 398.52655029296875, "learning_rate": 2.271244948400943e-06, "loss": 18.951, "step": 357690 }, { "epoch": 0.7225766311000861, "grad_norm": 249.3287353515625, "learning_rate": 2.2709524562313923e-06, "loss": 27.8872, "step": 357700 }, { "epoch": 0.7225968317327699, "grad_norm": 429.16278076171875, "learning_rate": 2.2706599773628906e-06, "loss": 14.3575, "step": 357710 }, { "epoch": 0.7226170323654537, "grad_norm": 499.9886474609375, "learning_rate": 2.270367511796859e-06, "loss": 21.3194, "step": 357720 }, { "epoch": 0.7226372329981375, "grad_norm": 303.24530029296875, "learning_rate": 2.2700750595347263e-06, "loss": 15.246, "step": 357730 }, { "epoch": 0.7226574336308214, "grad_norm": 359.04150390625, "learning_rate": 2.2697826205779178e-06, "loss": 9.3299, "step": 357740 }, { "epoch": 0.7226776342635052, "grad_norm": 84.22815704345703, "learning_rate": 2.2694901949278554e-06, "loss": 10.002, "step": 357750 }, { "epoch": 0.722697834896189, "grad_norm": 198.41868591308594, "learning_rate": 2.269197782585968e-06, "loss": 10.8646, "step": 357760 }, { "epoch": 0.7227180355288728, "grad_norm": 196.36647033691406, "learning_rate": 2.26890538355368e-06, "loss": 15.8983, "step": 357770 }, { "epoch": 0.7227382361615566, "grad_norm": 230.56851196289062, "learning_rate": 2.2686129978324134e-06, "loss": 12.6932, "step": 357780 }, { "epoch": 0.7227584367942403, "grad_norm": 176.10511779785156, "learning_rate": 2.2683206254235962e-06, "loss": 13.6545, "step": 357790 }, { "epoch": 0.7227786374269242, "grad_norm": 220.7939910888672, "learning_rate": 2.268028266328655e-06, "loss": 13.7173, "step": 357800 }, { "epoch": 0.722798838059608, "grad_norm": 67.04261016845703, "learning_rate": 2.2677359205490122e-06, "loss": 20.7847, "step": 357810 }, { "epoch": 0.7228190386922918, "grad_norm": 538.314453125, "learning_rate": 2.267443588086092e-06, "loss": 12.6392, "step": 357820 }, { "epoch": 0.7228392393249756, "grad_norm": 8.557598114013672, "learning_rate": 2.26715126894132e-06, "loss": 13.3107, "step": 357830 }, { "epoch": 0.7228594399576594, "grad_norm": 266.5348815917969, "learning_rate": 2.2668589631161246e-06, "loss": 19.9006, "step": 357840 }, { "epoch": 0.7228796405903433, "grad_norm": 185.941162109375, "learning_rate": 2.2665666706119237e-06, "loss": 19.985, "step": 357850 }, { "epoch": 0.7228998412230271, "grad_norm": 254.8385772705078, "learning_rate": 2.2662743914301455e-06, "loss": 13.5491, "step": 357860 }, { "epoch": 0.7229200418557109, "grad_norm": 379.8462219238281, "learning_rate": 2.265982125572216e-06, "loss": 22.9443, "step": 357870 }, { "epoch": 0.7229402424883947, "grad_norm": 421.5854187011719, "learning_rate": 2.2656898730395575e-06, "loss": 30.2651, "step": 357880 }, { "epoch": 0.7229604431210785, "grad_norm": 157.99099731445312, "learning_rate": 2.2653976338335936e-06, "loss": 17.5238, "step": 357890 }, { "epoch": 0.7229806437537624, "grad_norm": 537.8452758789062, "learning_rate": 2.265105407955752e-06, "loss": 45.8691, "step": 357900 }, { "epoch": 0.7230008443864462, "grad_norm": 53.6563606262207, "learning_rate": 2.2648131954074546e-06, "loss": 29.1586, "step": 357910 }, { "epoch": 0.72302104501913, "grad_norm": 178.46717834472656, "learning_rate": 2.264520996190124e-06, "loss": 12.2131, "step": 357920 }, { "epoch": 0.7230412456518138, "grad_norm": 299.25506591796875, "learning_rate": 2.264228810305189e-06, "loss": 7.6888, "step": 357930 }, { "epoch": 0.7230614462844976, "grad_norm": 305.2010498046875, "learning_rate": 2.2639366377540684e-06, "loss": 16.024, "step": 357940 }, { "epoch": 0.7230816469171815, "grad_norm": 354.305419921875, "learning_rate": 2.263644478538191e-06, "loss": 17.6694, "step": 357950 }, { "epoch": 0.7231018475498653, "grad_norm": 469.99688720703125, "learning_rate": 2.263352332658976e-06, "loss": 21.0649, "step": 357960 }, { "epoch": 0.7231220481825491, "grad_norm": 116.97770690917969, "learning_rate": 2.2630602001178524e-06, "loss": 11.7081, "step": 357970 }, { "epoch": 0.7231422488152329, "grad_norm": 144.48252868652344, "learning_rate": 2.262768080916241e-06, "loss": 17.4906, "step": 357980 }, { "epoch": 0.7231624494479167, "grad_norm": 432.1307678222656, "learning_rate": 2.2624759750555642e-06, "loss": 14.5641, "step": 357990 }, { "epoch": 0.7231826500806006, "grad_norm": 196.40982055664062, "learning_rate": 2.2621838825372496e-06, "loss": 9.9661, "step": 358000 }, { "epoch": 0.7232028507132844, "grad_norm": 329.9579772949219, "learning_rate": 2.2618918033627168e-06, "loss": 23.7206, "step": 358010 }, { "epoch": 0.7232230513459682, "grad_norm": 152.0462188720703, "learning_rate": 2.2615997375333926e-06, "loss": 8.7909, "step": 358020 }, { "epoch": 0.723243251978652, "grad_norm": 163.75411987304688, "learning_rate": 2.2613076850506997e-06, "loss": 16.9223, "step": 358030 }, { "epoch": 0.7232634526113357, "grad_norm": 107.2142333984375, "learning_rate": 2.261015645916059e-06, "loss": 11.9921, "step": 358040 }, { "epoch": 0.7232836532440196, "grad_norm": 639.236328125, "learning_rate": 2.2607236201308974e-06, "loss": 26.2776, "step": 358050 }, { "epoch": 0.7233038538767034, "grad_norm": 245.37562561035156, "learning_rate": 2.260431607696637e-06, "loss": 16.495, "step": 358060 }, { "epoch": 0.7233240545093872, "grad_norm": 290.36376953125, "learning_rate": 2.260139608614699e-06, "loss": 12.7392, "step": 358070 }, { "epoch": 0.723344255142071, "grad_norm": 376.6602478027344, "learning_rate": 2.2598476228865078e-06, "loss": 18.1509, "step": 358080 }, { "epoch": 0.7233644557747548, "grad_norm": 134.86099243164062, "learning_rate": 2.2595556505134885e-06, "loss": 11.6993, "step": 358090 }, { "epoch": 0.7233846564074387, "grad_norm": 246.8845672607422, "learning_rate": 2.2592636914970633e-06, "loss": 14.23, "step": 358100 }, { "epoch": 0.7234048570401225, "grad_norm": 101.54085540771484, "learning_rate": 2.258971745838652e-06, "loss": 8.6617, "step": 358110 }, { "epoch": 0.7234250576728063, "grad_norm": 202.16807556152344, "learning_rate": 2.2586798135396824e-06, "loss": 13.9432, "step": 358120 }, { "epoch": 0.7234452583054901, "grad_norm": 86.7498550415039, "learning_rate": 2.258387894601575e-06, "loss": 8.7832, "step": 358130 }, { "epoch": 0.7234654589381739, "grad_norm": 155.08056640625, "learning_rate": 2.2580959890257496e-06, "loss": 14.2589, "step": 358140 }, { "epoch": 0.7234856595708578, "grad_norm": 482.8001708984375, "learning_rate": 2.2578040968136326e-06, "loss": 18.5724, "step": 358150 }, { "epoch": 0.7235058602035416, "grad_norm": 174.7190704345703, "learning_rate": 2.25751221796665e-06, "loss": 22.7118, "step": 358160 }, { "epoch": 0.7235260608362254, "grad_norm": 101.64354705810547, "learning_rate": 2.257220352486216e-06, "loss": 17.6635, "step": 358170 }, { "epoch": 0.7235462614689092, "grad_norm": 540.1415405273438, "learning_rate": 2.2569285003737567e-06, "loss": 15.851, "step": 358180 }, { "epoch": 0.723566462101593, "grad_norm": 250.85702514648438, "learning_rate": 2.256636661630698e-06, "loss": 9.8908, "step": 358190 }, { "epoch": 0.7235866627342769, "grad_norm": 281.3968811035156, "learning_rate": 2.256344836258459e-06, "loss": 18.2179, "step": 358200 }, { "epoch": 0.7236068633669607, "grad_norm": 403.9873352050781, "learning_rate": 2.2560530242584604e-06, "loss": 12.9648, "step": 358210 }, { "epoch": 0.7236270639996445, "grad_norm": 490.8674621582031, "learning_rate": 2.255761225632129e-06, "loss": 28.1752, "step": 358220 }, { "epoch": 0.7236472646323283, "grad_norm": 224.1666259765625, "learning_rate": 2.255469440380885e-06, "loss": 20.1477, "step": 358230 }, { "epoch": 0.7236674652650121, "grad_norm": 99.33848571777344, "learning_rate": 2.255177668506147e-06, "loss": 10.1643, "step": 358240 }, { "epoch": 0.723687665897696, "grad_norm": 36.391273498535156, "learning_rate": 2.254885910009341e-06, "loss": 10.233, "step": 358250 }, { "epoch": 0.7237078665303798, "grad_norm": 518.534423828125, "learning_rate": 2.2545941648918897e-06, "loss": 19.6563, "step": 358260 }, { "epoch": 0.7237280671630636, "grad_norm": 56.04056167602539, "learning_rate": 2.2543024331552133e-06, "loss": 20.3348, "step": 358270 }, { "epoch": 0.7237482677957474, "grad_norm": 158.05213928222656, "learning_rate": 2.2540107148007316e-06, "loss": 17.9473, "step": 358280 }, { "epoch": 0.7237684684284312, "grad_norm": 54.571067810058594, "learning_rate": 2.253719009829871e-06, "loss": 9.1222, "step": 358290 }, { "epoch": 0.7237886690611149, "grad_norm": 109.19054412841797, "learning_rate": 2.2534273182440515e-06, "loss": 11.8646, "step": 358300 }, { "epoch": 0.7238088696937988, "grad_norm": 40.6551628112793, "learning_rate": 2.2531356400446913e-06, "loss": 11.0887, "step": 358310 }, { "epoch": 0.7238290703264826, "grad_norm": 32.4726448059082, "learning_rate": 2.252843975233217e-06, "loss": 13.5501, "step": 358320 }, { "epoch": 0.7238492709591664, "grad_norm": 265.2007751464844, "learning_rate": 2.2525523238110465e-06, "loss": 17.8188, "step": 358330 }, { "epoch": 0.7238694715918502, "grad_norm": 286.80010986328125, "learning_rate": 2.2522606857796036e-06, "loss": 14.2289, "step": 358340 }, { "epoch": 0.723889672224534, "grad_norm": 1331.595458984375, "learning_rate": 2.25196906114031e-06, "loss": 15.4297, "step": 358350 }, { "epoch": 0.7239098728572179, "grad_norm": 267.7685241699219, "learning_rate": 2.251677449894583e-06, "loss": 21.5479, "step": 358360 }, { "epoch": 0.7239300734899017, "grad_norm": 12.107305526733398, "learning_rate": 2.2513858520438497e-06, "loss": 11.2891, "step": 358370 }, { "epoch": 0.7239502741225855, "grad_norm": 267.81005859375, "learning_rate": 2.2510942675895277e-06, "loss": 14.4994, "step": 358380 }, { "epoch": 0.7239704747552693, "grad_norm": 518.7481689453125, "learning_rate": 2.250802696533037e-06, "loss": 25.238, "step": 358390 }, { "epoch": 0.7239906753879531, "grad_norm": 316.18701171875, "learning_rate": 2.250511138875801e-06, "loss": 7.6155, "step": 358400 }, { "epoch": 0.724010876020637, "grad_norm": 395.47625732421875, "learning_rate": 2.250219594619242e-06, "loss": 13.7433, "step": 358410 }, { "epoch": 0.7240310766533208, "grad_norm": 141.8294219970703, "learning_rate": 2.2499280637647785e-06, "loss": 14.2265, "step": 358420 }, { "epoch": 0.7240512772860046, "grad_norm": 230.67630004882812, "learning_rate": 2.249636546313831e-06, "loss": 18.1114, "step": 358430 }, { "epoch": 0.7240714779186884, "grad_norm": 81.55047607421875, "learning_rate": 2.2493450422678224e-06, "loss": 25.4966, "step": 358440 }, { "epoch": 0.7240916785513722, "grad_norm": 352.6331787109375, "learning_rate": 2.249053551628173e-06, "loss": 20.3758, "step": 358450 }, { "epoch": 0.7241118791840561, "grad_norm": 233.20077514648438, "learning_rate": 2.248762074396301e-06, "loss": 24.4504, "step": 358460 }, { "epoch": 0.7241320798167399, "grad_norm": 381.57452392578125, "learning_rate": 2.2484706105736294e-06, "loss": 12.6143, "step": 358470 }, { "epoch": 0.7241522804494237, "grad_norm": 425.6722106933594, "learning_rate": 2.2481791601615797e-06, "loss": 14.72, "step": 358480 }, { "epoch": 0.7241724810821075, "grad_norm": 449.74481201171875, "learning_rate": 2.247887723161571e-06, "loss": 16.7269, "step": 358490 }, { "epoch": 0.7241926817147913, "grad_norm": 619.2308349609375, "learning_rate": 2.2475962995750224e-06, "loss": 18.0948, "step": 358500 }, { "epoch": 0.7242128823474752, "grad_norm": 134.9829559326172, "learning_rate": 2.2473048894033566e-06, "loss": 8.1842, "step": 358510 }, { "epoch": 0.724233082980159, "grad_norm": 536.5662231445312, "learning_rate": 2.247013492647994e-06, "loss": 18.0933, "step": 358520 }, { "epoch": 0.7242532836128428, "grad_norm": 245.47134399414062, "learning_rate": 2.246722109310351e-06, "loss": 17.8211, "step": 358530 }, { "epoch": 0.7242734842455266, "grad_norm": 366.67205810546875, "learning_rate": 2.2464307393918523e-06, "loss": 26.8166, "step": 358540 }, { "epoch": 0.7242936848782103, "grad_norm": 265.507080078125, "learning_rate": 2.246139382893915e-06, "loss": 22.8495, "step": 358550 }, { "epoch": 0.7243138855108942, "grad_norm": 360.2337951660156, "learning_rate": 2.2458480398179615e-06, "loss": 37.8161, "step": 358560 }, { "epoch": 0.724334086143578, "grad_norm": 293.1464538574219, "learning_rate": 2.245556710165409e-06, "loss": 14.4267, "step": 358570 }, { "epoch": 0.7243542867762618, "grad_norm": 532.0188598632812, "learning_rate": 2.245265393937681e-06, "loss": 22.676, "step": 358580 }, { "epoch": 0.7243744874089456, "grad_norm": 384.99798583984375, "learning_rate": 2.2449740911361955e-06, "loss": 19.6062, "step": 358590 }, { "epoch": 0.7243946880416294, "grad_norm": 251.82794189453125, "learning_rate": 2.24468280176237e-06, "loss": 11.62, "step": 358600 }, { "epoch": 0.7244148886743133, "grad_norm": 394.1304626464844, "learning_rate": 2.2443915258176283e-06, "loss": 14.7176, "step": 358610 }, { "epoch": 0.7244350893069971, "grad_norm": 18.310619354248047, "learning_rate": 2.2441002633033865e-06, "loss": 12.7041, "step": 358620 }, { "epoch": 0.7244552899396809, "grad_norm": 320.12420654296875, "learning_rate": 2.243809014221068e-06, "loss": 19.9087, "step": 358630 }, { "epoch": 0.7244754905723647, "grad_norm": 3.3724231719970703, "learning_rate": 2.243517778572089e-06, "loss": 28.3331, "step": 358640 }, { "epoch": 0.7244956912050485, "grad_norm": 186.55284118652344, "learning_rate": 2.2432265563578686e-06, "loss": 7.2752, "step": 358650 }, { "epoch": 0.7245158918377324, "grad_norm": 169.7702178955078, "learning_rate": 2.2429353475798298e-06, "loss": 7.2917, "step": 358660 }, { "epoch": 0.7245360924704162, "grad_norm": 311.3572998046875, "learning_rate": 2.2426441522393893e-06, "loss": 15.5679, "step": 358670 }, { "epoch": 0.7245562931031, "grad_norm": 269.85247802734375, "learning_rate": 2.2423529703379646e-06, "loss": 23.8924, "step": 358680 }, { "epoch": 0.7245764937357838, "grad_norm": 22.668136596679688, "learning_rate": 2.242061801876978e-06, "loss": 9.7468, "step": 358690 }, { "epoch": 0.7245966943684676, "grad_norm": 159.8321533203125, "learning_rate": 2.2417706468578495e-06, "loss": 18.1073, "step": 358700 }, { "epoch": 0.7246168950011515, "grad_norm": 483.7013244628906, "learning_rate": 2.2414795052819956e-06, "loss": 32.5769, "step": 358710 }, { "epoch": 0.7246370956338353, "grad_norm": 352.2583312988281, "learning_rate": 2.241188377150834e-06, "loss": 23.1521, "step": 358720 }, { "epoch": 0.7246572962665191, "grad_norm": 86.16938018798828, "learning_rate": 2.240897262465788e-06, "loss": 24.9257, "step": 358730 }, { "epoch": 0.7246774968992029, "grad_norm": 7.6055908203125, "learning_rate": 2.240606161228274e-06, "loss": 15.0282, "step": 358740 }, { "epoch": 0.7246976975318867, "grad_norm": 316.9888000488281, "learning_rate": 2.2403150734397095e-06, "loss": 14.8761, "step": 358750 }, { "epoch": 0.7247178981645706, "grad_norm": 214.02569580078125, "learning_rate": 2.2400239991015144e-06, "loss": 14.7237, "step": 358760 }, { "epoch": 0.7247380987972544, "grad_norm": 109.19200134277344, "learning_rate": 2.239732938215111e-06, "loss": 10.2398, "step": 358770 }, { "epoch": 0.7247582994299382, "grad_norm": 465.0061950683594, "learning_rate": 2.239441890781911e-06, "loss": 14.3741, "step": 358780 }, { "epoch": 0.724778500062622, "grad_norm": 315.34808349609375, "learning_rate": 2.239150856803336e-06, "loss": 11.0552, "step": 358790 }, { "epoch": 0.7247987006953058, "grad_norm": 384.1463623046875, "learning_rate": 2.2388598362808074e-06, "loss": 26.1565, "step": 358800 }, { "epoch": 0.7248189013279895, "grad_norm": 181.23043823242188, "learning_rate": 2.2385688292157405e-06, "loss": 10.3301, "step": 358810 }, { "epoch": 0.7248391019606734, "grad_norm": 140.66348266601562, "learning_rate": 2.2382778356095524e-06, "loss": 15.0977, "step": 358820 }, { "epoch": 0.7248593025933572, "grad_norm": 168.4095001220703, "learning_rate": 2.2379868554636653e-06, "loss": 16.1471, "step": 358830 }, { "epoch": 0.724879503226041, "grad_norm": 144.32022094726562, "learning_rate": 2.2376958887794953e-06, "loss": 16.6979, "step": 358840 }, { "epoch": 0.7248997038587248, "grad_norm": 263.641845703125, "learning_rate": 2.2374049355584583e-06, "loss": 9.4825, "step": 358850 }, { "epoch": 0.7249199044914086, "grad_norm": 302.3390197753906, "learning_rate": 2.237113995801975e-06, "loss": 38.5727, "step": 358860 }, { "epoch": 0.7249401051240925, "grad_norm": 7.518458366394043, "learning_rate": 2.2368230695114644e-06, "loss": 29.6606, "step": 358870 }, { "epoch": 0.7249603057567763, "grad_norm": 17.497325897216797, "learning_rate": 2.2365321566883437e-06, "loss": 29.5275, "step": 358880 }, { "epoch": 0.7249805063894601, "grad_norm": 478.6875305175781, "learning_rate": 2.2362412573340274e-06, "loss": 21.7222, "step": 358890 }, { "epoch": 0.7250007070221439, "grad_norm": 13.899215698242188, "learning_rate": 2.235950371449938e-06, "loss": 13.4242, "step": 358900 }, { "epoch": 0.7250209076548277, "grad_norm": 379.40570068359375, "learning_rate": 2.235659499037492e-06, "loss": 13.6615, "step": 358910 }, { "epoch": 0.7250411082875116, "grad_norm": 141.05178833007812, "learning_rate": 2.2353686400981038e-06, "loss": 19.5369, "step": 358920 }, { "epoch": 0.7250613089201954, "grad_norm": 322.4009704589844, "learning_rate": 2.235077794633196e-06, "loss": 14.4313, "step": 358930 }, { "epoch": 0.7250815095528792, "grad_norm": 336.5608215332031, "learning_rate": 2.234786962644181e-06, "loss": 14.9405, "step": 358940 }, { "epoch": 0.725101710185563, "grad_norm": 523.0360717773438, "learning_rate": 2.2344961441324814e-06, "loss": 19.9993, "step": 358950 }, { "epoch": 0.7251219108182468, "grad_norm": 21.858196258544922, "learning_rate": 2.2342053390995117e-06, "loss": 12.5908, "step": 358960 }, { "epoch": 0.7251421114509307, "grad_norm": 21.843706130981445, "learning_rate": 2.2339145475466885e-06, "loss": 13.9439, "step": 358970 }, { "epoch": 0.7251623120836145, "grad_norm": 208.50442504882812, "learning_rate": 2.2336237694754314e-06, "loss": 24.7206, "step": 358980 }, { "epoch": 0.7251825127162983, "grad_norm": 181.96279907226562, "learning_rate": 2.233333004887157e-06, "loss": 14.2335, "step": 358990 }, { "epoch": 0.7252027133489821, "grad_norm": 48.79985809326172, "learning_rate": 2.23304225378328e-06, "loss": 10.1609, "step": 359000 }, { "epoch": 0.7252229139816659, "grad_norm": 374.2315368652344, "learning_rate": 2.2327515161652196e-06, "loss": 14.258, "step": 359010 }, { "epoch": 0.7252431146143498, "grad_norm": 211.30712890625, "learning_rate": 2.232460792034395e-06, "loss": 16.5922, "step": 359020 }, { "epoch": 0.7252633152470336, "grad_norm": 366.4339904785156, "learning_rate": 2.2321700813922205e-06, "loss": 13.0304, "step": 359030 }, { "epoch": 0.7252835158797174, "grad_norm": 10.903585433959961, "learning_rate": 2.231879384240111e-06, "loss": 25.1773, "step": 359040 }, { "epoch": 0.7253037165124012, "grad_norm": 70.9896011352539, "learning_rate": 2.231588700579488e-06, "loss": 6.9916, "step": 359050 }, { "epoch": 0.7253239171450849, "grad_norm": 318.41094970703125, "learning_rate": 2.2312980304117656e-06, "loss": 13.2802, "step": 359060 }, { "epoch": 0.7253441177777687, "grad_norm": 3.024660587310791, "learning_rate": 2.2310073737383593e-06, "loss": 15.7357, "step": 359070 }, { "epoch": 0.7253643184104526, "grad_norm": 199.41903686523438, "learning_rate": 2.230716730560687e-06, "loss": 17.8427, "step": 359080 }, { "epoch": 0.7253845190431364, "grad_norm": 104.36315155029297, "learning_rate": 2.230426100880167e-06, "loss": 16.4016, "step": 359090 }, { "epoch": 0.7254047196758202, "grad_norm": 98.3814697265625, "learning_rate": 2.2301354846982148e-06, "loss": 15.3243, "step": 359100 }, { "epoch": 0.725424920308504, "grad_norm": 50.13833236694336, "learning_rate": 2.2298448820162438e-06, "loss": 11.3162, "step": 359110 }, { "epoch": 0.7254451209411878, "grad_norm": 109.08189392089844, "learning_rate": 2.2295542928356755e-06, "loss": 19.3582, "step": 359120 }, { "epoch": 0.7254653215738717, "grad_norm": 272.020263671875, "learning_rate": 2.229263717157923e-06, "loss": 10.6712, "step": 359130 }, { "epoch": 0.7254855222065555, "grad_norm": 289.6346435546875, "learning_rate": 2.2289731549844018e-06, "loss": 18.0658, "step": 359140 }, { "epoch": 0.7255057228392393, "grad_norm": 411.3837585449219, "learning_rate": 2.228682606316529e-06, "loss": 18.0259, "step": 359150 }, { "epoch": 0.7255259234719231, "grad_norm": 193.7371368408203, "learning_rate": 2.2283920711557226e-06, "loss": 17.7662, "step": 359160 }, { "epoch": 0.725546124104607, "grad_norm": 500.78741455078125, "learning_rate": 2.2281015495033975e-06, "loss": 16.2329, "step": 359170 }, { "epoch": 0.7255663247372908, "grad_norm": 491.4415588378906, "learning_rate": 2.227811041360967e-06, "loss": 26.3721, "step": 359180 }, { "epoch": 0.7255865253699746, "grad_norm": 278.33935546875, "learning_rate": 2.2275205467298515e-06, "loss": 22.8303, "step": 359190 }, { "epoch": 0.7256067260026584, "grad_norm": 334.75543212890625, "learning_rate": 2.2272300656114648e-06, "loss": 15.975, "step": 359200 }, { "epoch": 0.7256269266353422, "grad_norm": 277.4870300292969, "learning_rate": 2.2269395980072206e-06, "loss": 23.0086, "step": 359210 }, { "epoch": 0.725647127268026, "grad_norm": 512.50537109375, "learning_rate": 2.226649143918538e-06, "loss": 17.1548, "step": 359220 }, { "epoch": 0.7256673279007099, "grad_norm": 1228.03662109375, "learning_rate": 2.2263587033468293e-06, "loss": 19.0559, "step": 359230 }, { "epoch": 0.7256875285333937, "grad_norm": 239.3020477294922, "learning_rate": 2.2260682762935137e-06, "loss": 14.9295, "step": 359240 }, { "epoch": 0.7257077291660775, "grad_norm": 664.602294921875, "learning_rate": 2.2257778627600044e-06, "loss": 19.8334, "step": 359250 }, { "epoch": 0.7257279297987613, "grad_norm": 227.204345703125, "learning_rate": 2.2254874627477164e-06, "loss": 13.6155, "step": 359260 }, { "epoch": 0.7257481304314451, "grad_norm": 243.64254760742188, "learning_rate": 2.2251970762580675e-06, "loss": 13.0671, "step": 359270 }, { "epoch": 0.725768331064129, "grad_norm": 18.833589553833008, "learning_rate": 2.2249067032924715e-06, "loss": 18.9543, "step": 359280 }, { "epoch": 0.7257885316968128, "grad_norm": 101.23211669921875, "learning_rate": 2.2246163438523417e-06, "loss": 16.9889, "step": 359290 }, { "epoch": 0.7258087323294966, "grad_norm": 237.12060546875, "learning_rate": 2.224325997939095e-06, "loss": 11.6359, "step": 359300 }, { "epoch": 0.7258289329621804, "grad_norm": 237.32522583007812, "learning_rate": 2.2240356655541488e-06, "loss": 18.2871, "step": 359310 }, { "epoch": 0.7258491335948641, "grad_norm": 19.09532928466797, "learning_rate": 2.223745346698917e-06, "loss": 10.5122, "step": 359320 }, { "epoch": 0.725869334227548, "grad_norm": 225.32827758789062, "learning_rate": 2.2234550413748106e-06, "loss": 16.6745, "step": 359330 }, { "epoch": 0.7258895348602318, "grad_norm": 899.34033203125, "learning_rate": 2.2231647495832496e-06, "loss": 19.2738, "step": 359340 }, { "epoch": 0.7259097354929156, "grad_norm": 10.996953964233398, "learning_rate": 2.222874471325647e-06, "loss": 14.401, "step": 359350 }, { "epoch": 0.7259299361255994, "grad_norm": 194.322509765625, "learning_rate": 2.222584206603416e-06, "loss": 15.9225, "step": 359360 }, { "epoch": 0.7259501367582832, "grad_norm": 198.6556396484375, "learning_rate": 2.222293955417972e-06, "loss": 16.709, "step": 359370 }, { "epoch": 0.7259703373909671, "grad_norm": 215.99627685546875, "learning_rate": 2.2220037177707342e-06, "loss": 18.2352, "step": 359380 }, { "epoch": 0.7259905380236509, "grad_norm": 143.17730712890625, "learning_rate": 2.2217134936631095e-06, "loss": 9.9757, "step": 359390 }, { "epoch": 0.7260107386563347, "grad_norm": 271.55865478515625, "learning_rate": 2.221423283096517e-06, "loss": 13.1041, "step": 359400 }, { "epoch": 0.7260309392890185, "grad_norm": 485.2309265136719, "learning_rate": 2.221133086072372e-06, "loss": 22.931, "step": 359410 }, { "epoch": 0.7260511399217023, "grad_norm": 239.90501403808594, "learning_rate": 2.220842902592087e-06, "loss": 14.1043, "step": 359420 }, { "epoch": 0.7260713405543862, "grad_norm": 19.61916732788086, "learning_rate": 2.220552732657075e-06, "loss": 17.413, "step": 359430 }, { "epoch": 0.72609154118707, "grad_norm": 503.21221923828125, "learning_rate": 2.2202625762687533e-06, "loss": 13.4475, "step": 359440 }, { "epoch": 0.7261117418197538, "grad_norm": 149.0402374267578, "learning_rate": 2.219972433428535e-06, "loss": 14.2539, "step": 359450 }, { "epoch": 0.7261319424524376, "grad_norm": 107.77302551269531, "learning_rate": 2.2196823041378325e-06, "loss": 12.6686, "step": 359460 }, { "epoch": 0.7261521430851214, "grad_norm": 110.4867172241211, "learning_rate": 2.21939218839806e-06, "loss": 7.4569, "step": 359470 }, { "epoch": 0.7261723437178053, "grad_norm": 210.55955505371094, "learning_rate": 2.2191020862106353e-06, "loss": 16.6358, "step": 359480 }, { "epoch": 0.7261925443504891, "grad_norm": 183.5537872314453, "learning_rate": 2.21881199757697e-06, "loss": 23.8701, "step": 359490 }, { "epoch": 0.7262127449831729, "grad_norm": 203.51669311523438, "learning_rate": 2.218521922498476e-06, "loss": 21.5479, "step": 359500 }, { "epoch": 0.7262329456158567, "grad_norm": 130.28781127929688, "learning_rate": 2.2182318609765703e-06, "loss": 15.4903, "step": 359510 }, { "epoch": 0.7262531462485405, "grad_norm": 231.99510192871094, "learning_rate": 2.217941813012665e-06, "loss": 32.2102, "step": 359520 }, { "epoch": 0.7262733468812244, "grad_norm": 52.69635772705078, "learning_rate": 2.217651778608172e-06, "loss": 12.51, "step": 359530 }, { "epoch": 0.7262935475139082, "grad_norm": 316.3515319824219, "learning_rate": 2.217361757764509e-06, "loss": 7.0852, "step": 359540 }, { "epoch": 0.726313748146592, "grad_norm": 300.9638977050781, "learning_rate": 2.217071750483085e-06, "loss": 11.419, "step": 359550 }, { "epoch": 0.7263339487792758, "grad_norm": 302.1546936035156, "learning_rate": 2.2167817567653176e-06, "loss": 15.4344, "step": 359560 }, { "epoch": 0.7263541494119596, "grad_norm": 498.55426025390625, "learning_rate": 2.216491776612619e-06, "loss": 27.9888, "step": 359570 }, { "epoch": 0.7263743500446433, "grad_norm": 93.27960205078125, "learning_rate": 2.2162018100263995e-06, "loss": 8.6425, "step": 359580 }, { "epoch": 0.7263945506773272, "grad_norm": 55.91289138793945, "learning_rate": 2.215911857008077e-06, "loss": 12.0494, "step": 359590 }, { "epoch": 0.726414751310011, "grad_norm": 466.63409423828125, "learning_rate": 2.2156219175590623e-06, "loss": 21.6595, "step": 359600 }, { "epoch": 0.7264349519426948, "grad_norm": 237.260498046875, "learning_rate": 2.215331991680766e-06, "loss": 11.7843, "step": 359610 }, { "epoch": 0.7264551525753786, "grad_norm": 230.4551239013672, "learning_rate": 2.215042079374605e-06, "loss": 11.8585, "step": 359620 }, { "epoch": 0.7264753532080624, "grad_norm": 179.90518188476562, "learning_rate": 2.214752180641992e-06, "loss": 16.9804, "step": 359630 }, { "epoch": 0.7264955538407463, "grad_norm": 397.14501953125, "learning_rate": 2.2144622954843396e-06, "loss": 22.2605, "step": 359640 }, { "epoch": 0.7265157544734301, "grad_norm": 214.158935546875, "learning_rate": 2.214172423903058e-06, "loss": 20.3478, "step": 359650 }, { "epoch": 0.7265359551061139, "grad_norm": 263.8233642578125, "learning_rate": 2.2138825658995645e-06, "loss": 20.7943, "step": 359660 }, { "epoch": 0.7265561557387977, "grad_norm": 0.0, "learning_rate": 2.213592721475269e-06, "loss": 14.1737, "step": 359670 }, { "epoch": 0.7265763563714815, "grad_norm": 227.76454162597656, "learning_rate": 2.213302890631583e-06, "loss": 15.7395, "step": 359680 }, { "epoch": 0.7265965570041654, "grad_norm": 480.5167236328125, "learning_rate": 2.2130130733699206e-06, "loss": 21.9331, "step": 359690 }, { "epoch": 0.7266167576368492, "grad_norm": 411.7996826171875, "learning_rate": 2.212723269691697e-06, "loss": 23.5108, "step": 359700 }, { "epoch": 0.726636958269533, "grad_norm": 320.2870178222656, "learning_rate": 2.212433479598321e-06, "loss": 9.6063, "step": 359710 }, { "epoch": 0.7266571589022168, "grad_norm": 437.1916198730469, "learning_rate": 2.2121437030912045e-06, "loss": 16.5147, "step": 359720 }, { "epoch": 0.7266773595349006, "grad_norm": 236.51560974121094, "learning_rate": 2.2118539401717636e-06, "loss": 19.2751, "step": 359730 }, { "epoch": 0.7266975601675845, "grad_norm": 360.0168151855469, "learning_rate": 2.2115641908414087e-06, "loss": 22.9462, "step": 359740 }, { "epoch": 0.7267177608002683, "grad_norm": 392.27294921875, "learning_rate": 2.2112744551015496e-06, "loss": 13.8659, "step": 359750 }, { "epoch": 0.7267379614329521, "grad_norm": 3.9393136501312256, "learning_rate": 2.2109847329536005e-06, "loss": 10.3314, "step": 359760 }, { "epoch": 0.7267581620656359, "grad_norm": 217.58135986328125, "learning_rate": 2.2106950243989754e-06, "loss": 13.0819, "step": 359770 }, { "epoch": 0.7267783626983197, "grad_norm": 254.57762145996094, "learning_rate": 2.2104053294390847e-06, "loss": 22.2647, "step": 359780 }, { "epoch": 0.7267985633310036, "grad_norm": 77.20474243164062, "learning_rate": 2.210115648075338e-06, "loss": 13.1966, "step": 359790 }, { "epoch": 0.7268187639636874, "grad_norm": 403.8537902832031, "learning_rate": 2.209825980309151e-06, "loss": 19.9957, "step": 359800 }, { "epoch": 0.7268389645963712, "grad_norm": 327.754638671875, "learning_rate": 2.209536326141934e-06, "loss": 13.9549, "step": 359810 }, { "epoch": 0.726859165229055, "grad_norm": 608.7437744140625, "learning_rate": 2.2092466855750966e-06, "loss": 22.9616, "step": 359820 }, { "epoch": 0.7268793658617387, "grad_norm": 209.6382293701172, "learning_rate": 2.2089570586100545e-06, "loss": 14.3323, "step": 359830 }, { "epoch": 0.7268995664944226, "grad_norm": 240.98770141601562, "learning_rate": 2.208667445248215e-06, "loss": 10.8374, "step": 359840 }, { "epoch": 0.7269197671271064, "grad_norm": 412.45654296875, "learning_rate": 2.208377845490994e-06, "loss": 29.3666, "step": 359850 }, { "epoch": 0.7269399677597902, "grad_norm": 294.53717041015625, "learning_rate": 2.2080882593398e-06, "loss": 16.3301, "step": 359860 }, { "epoch": 0.726960168392474, "grad_norm": 147.00233459472656, "learning_rate": 2.2077986867960436e-06, "loss": 17.4383, "step": 359870 }, { "epoch": 0.7269803690251578, "grad_norm": 207.1067657470703, "learning_rate": 2.20750912786114e-06, "loss": 16.0649, "step": 359880 }, { "epoch": 0.7270005696578417, "grad_norm": 275.49993896484375, "learning_rate": 2.2072195825364983e-06, "loss": 24.1891, "step": 359890 }, { "epoch": 0.7270207702905255, "grad_norm": 250.59954833984375, "learning_rate": 2.2069300508235273e-06, "loss": 15.5027, "step": 359900 }, { "epoch": 0.7270409709232093, "grad_norm": 156.43067932128906, "learning_rate": 2.2066405327236413e-06, "loss": 11.5165, "step": 359910 }, { "epoch": 0.7270611715558931, "grad_norm": 196.00477600097656, "learning_rate": 2.2063510282382517e-06, "loss": 20.6583, "step": 359920 }, { "epoch": 0.7270813721885769, "grad_norm": 193.53799438476562, "learning_rate": 2.206061537368768e-06, "loss": 16.0899, "step": 359930 }, { "epoch": 0.7271015728212608, "grad_norm": 210.4393310546875, "learning_rate": 2.2057720601166004e-06, "loss": 16.4804, "step": 359940 }, { "epoch": 0.7271217734539446, "grad_norm": 241.4913330078125, "learning_rate": 2.2054825964831627e-06, "loss": 32.3542, "step": 359950 }, { "epoch": 0.7271419740866284, "grad_norm": 246.5613250732422, "learning_rate": 2.2051931464698636e-06, "loss": 20.4897, "step": 359960 }, { "epoch": 0.7271621747193122, "grad_norm": 232.8644256591797, "learning_rate": 2.2049037100781125e-06, "loss": 9.3791, "step": 359970 }, { "epoch": 0.727182375351996, "grad_norm": 102.3315658569336, "learning_rate": 2.204614287309321e-06, "loss": 16.4243, "step": 359980 }, { "epoch": 0.7272025759846799, "grad_norm": 428.9281005859375, "learning_rate": 2.204324878164905e-06, "loss": 17.5576, "step": 359990 }, { "epoch": 0.7272227766173637, "grad_norm": 359.7954406738281, "learning_rate": 2.204035482646267e-06, "loss": 14.9906, "step": 360000 }, { "epoch": 0.7272429772500475, "grad_norm": 369.4302062988281, "learning_rate": 2.20374610075482e-06, "loss": 26.2978, "step": 360010 }, { "epoch": 0.7272631778827313, "grad_norm": 262.4725646972656, "learning_rate": 2.2034567324919774e-06, "loss": 6.7904, "step": 360020 }, { "epoch": 0.7272833785154151, "grad_norm": 480.4075622558594, "learning_rate": 2.2031673778591477e-06, "loss": 20.425, "step": 360030 }, { "epoch": 0.727303579148099, "grad_norm": 127.73979187011719, "learning_rate": 2.2028780368577395e-06, "loss": 22.3044, "step": 360040 }, { "epoch": 0.7273237797807828, "grad_norm": 23.006282806396484, "learning_rate": 2.2025887094891657e-06, "loss": 19.9379, "step": 360050 }, { "epoch": 0.7273439804134666, "grad_norm": 253.8053741455078, "learning_rate": 2.202299395754836e-06, "loss": 13.749, "step": 360060 }, { "epoch": 0.7273641810461504, "grad_norm": 278.5460205078125, "learning_rate": 2.2020100956561576e-06, "loss": 17.6705, "step": 360070 }, { "epoch": 0.7273843816788342, "grad_norm": 169.5330047607422, "learning_rate": 2.201720809194542e-06, "loss": 13.9643, "step": 360080 }, { "epoch": 0.7274045823115179, "grad_norm": 229.326904296875, "learning_rate": 2.201431536371402e-06, "loss": 14.8426, "step": 360090 }, { "epoch": 0.7274247829442018, "grad_norm": 316.36004638671875, "learning_rate": 2.201142277188146e-06, "loss": 22.1122, "step": 360100 }, { "epoch": 0.7274449835768856, "grad_norm": 335.8208923339844, "learning_rate": 2.20085303164618e-06, "loss": 34.1879, "step": 360110 }, { "epoch": 0.7274651842095694, "grad_norm": 438.3086853027344, "learning_rate": 2.2005637997469194e-06, "loss": 22.1837, "step": 360120 }, { "epoch": 0.7274853848422532, "grad_norm": 444.8335266113281, "learning_rate": 2.2002745814917716e-06, "loss": 23.8976, "step": 360130 }, { "epoch": 0.727505585474937, "grad_norm": 366.5465087890625, "learning_rate": 2.1999853768821433e-06, "loss": 16.5816, "step": 360140 }, { "epoch": 0.7275257861076209, "grad_norm": 540.4655151367188, "learning_rate": 2.1996961859194487e-06, "loss": 13.8446, "step": 360150 }, { "epoch": 0.7275459867403047, "grad_norm": 458.49664306640625, "learning_rate": 2.1994070086050937e-06, "loss": 17.3515, "step": 360160 }, { "epoch": 0.7275661873729885, "grad_norm": 497.86810302734375, "learning_rate": 2.199117844940491e-06, "loss": 12.8746, "step": 360170 }, { "epoch": 0.7275863880056723, "grad_norm": 28.408031463623047, "learning_rate": 2.198828694927048e-06, "loss": 17.8337, "step": 360180 }, { "epoch": 0.7276065886383561, "grad_norm": 232.59349060058594, "learning_rate": 2.198539558566173e-06, "loss": 28.6549, "step": 360190 }, { "epoch": 0.72762678927104, "grad_norm": 227.22988891601562, "learning_rate": 2.1982504358592777e-06, "loss": 28.3088, "step": 360200 }, { "epoch": 0.7276469899037238, "grad_norm": 385.22894287109375, "learning_rate": 2.1979613268077684e-06, "loss": 11.8383, "step": 360210 }, { "epoch": 0.7276671905364076, "grad_norm": 47.670433044433594, "learning_rate": 2.1976722314130576e-06, "loss": 11.9218, "step": 360220 }, { "epoch": 0.7276873911690914, "grad_norm": 263.89013671875, "learning_rate": 2.1973831496765503e-06, "loss": 16.4215, "step": 360230 }, { "epoch": 0.7277075918017752, "grad_norm": 549.816650390625, "learning_rate": 2.1970940815996592e-06, "loss": 18.3059, "step": 360240 }, { "epoch": 0.7277277924344591, "grad_norm": 237.64840698242188, "learning_rate": 2.1968050271837926e-06, "loss": 15.3328, "step": 360250 }, { "epoch": 0.7277479930671429, "grad_norm": 347.87786865234375, "learning_rate": 2.196515986430356e-06, "loss": 33.7964, "step": 360260 }, { "epoch": 0.7277681936998267, "grad_norm": 93.40373992919922, "learning_rate": 2.196226959340762e-06, "loss": 7.252, "step": 360270 }, { "epoch": 0.7277883943325105, "grad_norm": 278.6224670410156, "learning_rate": 2.195937945916418e-06, "loss": 17.3044, "step": 360280 }, { "epoch": 0.7278085949651943, "grad_norm": 0.0, "learning_rate": 2.1956489461587307e-06, "loss": 15.7651, "step": 360290 }, { "epoch": 0.7278287955978782, "grad_norm": 336.8443603515625, "learning_rate": 2.19535996006911e-06, "loss": 21.3633, "step": 360300 }, { "epoch": 0.727848996230562, "grad_norm": 303.0080871582031, "learning_rate": 2.195070987648966e-06, "loss": 23.3836, "step": 360310 }, { "epoch": 0.7278691968632458, "grad_norm": 122.2686538696289, "learning_rate": 2.1947820288997067e-06, "loss": 9.5106, "step": 360320 }, { "epoch": 0.7278893974959296, "grad_norm": 304.7421875, "learning_rate": 2.1944930838227374e-06, "loss": 11.9995, "step": 360330 }, { "epoch": 0.7279095981286133, "grad_norm": 297.1445617675781, "learning_rate": 2.1942041524194705e-06, "loss": 19.2026, "step": 360340 }, { "epoch": 0.7279297987612972, "grad_norm": 160.26055908203125, "learning_rate": 2.193915234691312e-06, "loss": 11.7346, "step": 360350 }, { "epoch": 0.727949999393981, "grad_norm": 118.5428466796875, "learning_rate": 2.1936263306396688e-06, "loss": 16.0866, "step": 360360 }, { "epoch": 0.7279702000266648, "grad_norm": 1.3943226337432861, "learning_rate": 2.1933374402659502e-06, "loss": 26.9275, "step": 360370 }, { "epoch": 0.7279904006593486, "grad_norm": 278.4662170410156, "learning_rate": 2.1930485635715665e-06, "loss": 18.5338, "step": 360380 }, { "epoch": 0.7280106012920324, "grad_norm": 216.89944458007812, "learning_rate": 2.1927597005579236e-06, "loss": 19.9683, "step": 360390 }, { "epoch": 0.7280308019247163, "grad_norm": 1167.5286865234375, "learning_rate": 2.192470851226428e-06, "loss": 20.1165, "step": 360400 }, { "epoch": 0.7280510025574001, "grad_norm": 334.60546875, "learning_rate": 2.19218201557849e-06, "loss": 13.0403, "step": 360410 }, { "epoch": 0.7280712031900839, "grad_norm": 472.5481872558594, "learning_rate": 2.1918931936155167e-06, "loss": 21.5773, "step": 360420 }, { "epoch": 0.7280914038227677, "grad_norm": 379.5268859863281, "learning_rate": 2.191604385338914e-06, "loss": 18.5379, "step": 360430 }, { "epoch": 0.7281116044554515, "grad_norm": 6.738478660583496, "learning_rate": 2.1913155907500923e-06, "loss": 17.2367, "step": 360440 }, { "epoch": 0.7281318050881354, "grad_norm": 50.494903564453125, "learning_rate": 2.1910268098504562e-06, "loss": 8.5026, "step": 360450 }, { "epoch": 0.7281520057208192, "grad_norm": 172.7053985595703, "learning_rate": 2.190738042641416e-06, "loss": 9.4632, "step": 360460 }, { "epoch": 0.728172206353503, "grad_norm": 127.6079330444336, "learning_rate": 2.1904492891243785e-06, "loss": 27.0927, "step": 360470 }, { "epoch": 0.7281924069861868, "grad_norm": 399.0224914550781, "learning_rate": 2.190160549300748e-06, "loss": 23.0292, "step": 360480 }, { "epoch": 0.7282126076188706, "grad_norm": 194.73509216308594, "learning_rate": 2.189871823171936e-06, "loss": 11.0908, "step": 360490 }, { "epoch": 0.7282328082515545, "grad_norm": 206.2386474609375, "learning_rate": 2.1895831107393485e-06, "loss": 15.9138, "step": 360500 }, { "epoch": 0.7282530088842383, "grad_norm": 268.6193542480469, "learning_rate": 2.18929441200439e-06, "loss": 24.8321, "step": 360510 }, { "epoch": 0.7282732095169221, "grad_norm": 134.07916259765625, "learning_rate": 2.1890057269684695e-06, "loss": 21.9515, "step": 360520 }, { "epoch": 0.7282934101496059, "grad_norm": 27.34345245361328, "learning_rate": 2.1887170556329962e-06, "loss": 9.3634, "step": 360530 }, { "epoch": 0.7283136107822897, "grad_norm": 132.28298950195312, "learning_rate": 2.188428397999375e-06, "loss": 12.6064, "step": 360540 }, { "epoch": 0.7283338114149736, "grad_norm": 11.157598495483398, "learning_rate": 2.1881397540690106e-06, "loss": 8.4423, "step": 360550 }, { "epoch": 0.7283540120476574, "grad_norm": 504.2037048339844, "learning_rate": 2.187851123843314e-06, "loss": 20.3765, "step": 360560 }, { "epoch": 0.7283742126803412, "grad_norm": 24.038522720336914, "learning_rate": 2.18756250732369e-06, "loss": 13.8281, "step": 360570 }, { "epoch": 0.728394413313025, "grad_norm": 247.77316284179688, "learning_rate": 2.187273904511544e-06, "loss": 15.4947, "step": 360580 }, { "epoch": 0.7284146139457088, "grad_norm": 337.4891052246094, "learning_rate": 2.1869853154082828e-06, "loss": 12.1233, "step": 360590 }, { "epoch": 0.7284348145783925, "grad_norm": 191.09283447265625, "learning_rate": 2.1866967400153184e-06, "loss": 13.6891, "step": 360600 }, { "epoch": 0.7284550152110764, "grad_norm": 451.8763732910156, "learning_rate": 2.1864081783340484e-06, "loss": 16.725, "step": 360610 }, { "epoch": 0.7284752158437602, "grad_norm": 457.778564453125, "learning_rate": 2.1861196303658843e-06, "loss": 13.4116, "step": 360620 }, { "epoch": 0.728495416476444, "grad_norm": 235.13636779785156, "learning_rate": 2.1858310961122336e-06, "loss": 10.5218, "step": 360630 }, { "epoch": 0.7285156171091278, "grad_norm": 352.1008605957031, "learning_rate": 2.185542575574501e-06, "loss": 18.2875, "step": 360640 }, { "epoch": 0.7285358177418116, "grad_norm": 211.91964721679688, "learning_rate": 2.18525406875409e-06, "loss": 14.316, "step": 360650 }, { "epoch": 0.7285560183744955, "grad_norm": 201.18612670898438, "learning_rate": 2.184965575652412e-06, "loss": 12.1263, "step": 360660 }, { "epoch": 0.7285762190071793, "grad_norm": 459.8050842285156, "learning_rate": 2.18467709627087e-06, "loss": 15.4893, "step": 360670 }, { "epoch": 0.7285964196398631, "grad_norm": 10.585278511047363, "learning_rate": 2.1843886306108686e-06, "loss": 9.7937, "step": 360680 }, { "epoch": 0.7286166202725469, "grad_norm": 222.33837890625, "learning_rate": 2.184100178673815e-06, "loss": 7.9741, "step": 360690 }, { "epoch": 0.7286368209052307, "grad_norm": 220.4395294189453, "learning_rate": 2.183811740461118e-06, "loss": 29.3216, "step": 360700 }, { "epoch": 0.7286570215379146, "grad_norm": 316.5571594238281, "learning_rate": 2.183523315974181e-06, "loss": 18.2769, "step": 360710 }, { "epoch": 0.7286772221705984, "grad_norm": 235.94573974609375, "learning_rate": 2.183234905214408e-06, "loss": 24.1429, "step": 360720 }, { "epoch": 0.7286974228032822, "grad_norm": 288.64288330078125, "learning_rate": 2.182946508183208e-06, "loss": 31.4966, "step": 360730 }, { "epoch": 0.728717623435966, "grad_norm": 310.3736267089844, "learning_rate": 2.182658124881985e-06, "loss": 24.9094, "step": 360740 }, { "epoch": 0.7287378240686498, "grad_norm": 339.79766845703125, "learning_rate": 2.1823697553121432e-06, "loss": 16.3953, "step": 360750 }, { "epoch": 0.7287580247013337, "grad_norm": 176.7882843017578, "learning_rate": 2.1820813994750904e-06, "loss": 16.9962, "step": 360760 }, { "epoch": 0.7287782253340175, "grad_norm": 181.03692626953125, "learning_rate": 2.18179305737223e-06, "loss": 12.8856, "step": 360770 }, { "epoch": 0.7287984259667013, "grad_norm": 538.189697265625, "learning_rate": 2.1815047290049707e-06, "loss": 16.2669, "step": 360780 }, { "epoch": 0.7288186265993851, "grad_norm": 31.636962890625, "learning_rate": 2.1812164143747143e-06, "loss": 11.9591, "step": 360790 }, { "epoch": 0.7288388272320689, "grad_norm": 189.7004852294922, "learning_rate": 2.1809281134828663e-06, "loss": 17.8196, "step": 360800 }, { "epoch": 0.7288590278647528, "grad_norm": 104.81151580810547, "learning_rate": 2.1806398263308343e-06, "loss": 14.3664, "step": 360810 }, { "epoch": 0.7288792284974366, "grad_norm": 298.4812316894531, "learning_rate": 2.1803515529200204e-06, "loss": 24.2691, "step": 360820 }, { "epoch": 0.7288994291301204, "grad_norm": 193.35939025878906, "learning_rate": 2.1800632932518325e-06, "loss": 18.1362, "step": 360830 }, { "epoch": 0.7289196297628042, "grad_norm": 717.67626953125, "learning_rate": 2.179775047327672e-06, "loss": 23.1739, "step": 360840 }, { "epoch": 0.728939830395488, "grad_norm": 265.8473815917969, "learning_rate": 2.179486815148948e-06, "loss": 12.9999, "step": 360850 }, { "epoch": 0.7289600310281718, "grad_norm": 485.35101318359375, "learning_rate": 2.179198596717063e-06, "loss": 26.817, "step": 360860 }, { "epoch": 0.7289802316608556, "grad_norm": 734.91162109375, "learning_rate": 2.1789103920334205e-06, "loss": 28.2984, "step": 360870 }, { "epoch": 0.7290004322935394, "grad_norm": 126.39995574951172, "learning_rate": 2.178622201099428e-06, "loss": 35.4762, "step": 360880 }, { "epoch": 0.7290206329262232, "grad_norm": 588.4315795898438, "learning_rate": 2.178334023916489e-06, "loss": 28.1081, "step": 360890 }, { "epoch": 0.729040833558907, "grad_norm": 246.72479248046875, "learning_rate": 2.1780458604860056e-06, "loss": 13.9081, "step": 360900 }, { "epoch": 0.7290610341915909, "grad_norm": 374.6429443359375, "learning_rate": 2.1777577108093843e-06, "loss": 13.6366, "step": 360910 }, { "epoch": 0.7290812348242747, "grad_norm": 226.692626953125, "learning_rate": 2.177469574888034e-06, "loss": 17.6876, "step": 360920 }, { "epoch": 0.7291014354569585, "grad_norm": 100.94586181640625, "learning_rate": 2.17718145272335e-06, "loss": 16.7502, "step": 360930 }, { "epoch": 0.7291216360896423, "grad_norm": 238.69143676757812, "learning_rate": 2.1768933443167423e-06, "loss": 23.7208, "step": 360940 }, { "epoch": 0.7291418367223261, "grad_norm": 212.9239044189453, "learning_rate": 2.1766052496696155e-06, "loss": 28.3068, "step": 360950 }, { "epoch": 0.72916203735501, "grad_norm": 253.48846435546875, "learning_rate": 2.176317168783372e-06, "loss": 11.3916, "step": 360960 }, { "epoch": 0.7291822379876938, "grad_norm": 59.06787872314453, "learning_rate": 2.1760291016594143e-06, "loss": 8.7546, "step": 360970 }, { "epoch": 0.7292024386203776, "grad_norm": 258.9161071777344, "learning_rate": 2.1757410482991488e-06, "loss": 12.1634, "step": 360980 }, { "epoch": 0.7292226392530614, "grad_norm": 84.30406188964844, "learning_rate": 2.17545300870398e-06, "loss": 20.7575, "step": 360990 }, { "epoch": 0.7292428398857452, "grad_norm": 89.87386322021484, "learning_rate": 2.175164982875311e-06, "loss": 12.4138, "step": 361000 }, { "epoch": 0.729263040518429, "grad_norm": 256.37677001953125, "learning_rate": 2.1748769708145435e-06, "loss": 14.852, "step": 361010 }, { "epoch": 0.7292832411511129, "grad_norm": 232.28858947753906, "learning_rate": 2.1745889725230845e-06, "loss": 12.3563, "step": 361020 }, { "epoch": 0.7293034417837967, "grad_norm": 253.85897827148438, "learning_rate": 2.1743009880023364e-06, "loss": 14.2716, "step": 361030 }, { "epoch": 0.7293236424164805, "grad_norm": 185.53709411621094, "learning_rate": 2.174013017253701e-06, "loss": 15.8146, "step": 361040 }, { "epoch": 0.7293438430491643, "grad_norm": 1444.5364990234375, "learning_rate": 2.173725060278585e-06, "loss": 16.2077, "step": 361050 }, { "epoch": 0.7293640436818482, "grad_norm": 159.49757385253906, "learning_rate": 2.1734371170783888e-06, "loss": 15.2322, "step": 361060 }, { "epoch": 0.729384244314532, "grad_norm": 380.75384521484375, "learning_rate": 2.173149187654518e-06, "loss": 17.2736, "step": 361070 }, { "epoch": 0.7294044449472158, "grad_norm": 311.32806396484375, "learning_rate": 2.1728612720083764e-06, "loss": 23.7503, "step": 361080 }, { "epoch": 0.7294246455798996, "grad_norm": 424.0375061035156, "learning_rate": 2.172573370141364e-06, "loss": 17.5566, "step": 361090 }, { "epoch": 0.7294448462125834, "grad_norm": 350.7486572265625, "learning_rate": 2.1722854820548873e-06, "loss": 17.0999, "step": 361100 }, { "epoch": 0.7294650468452671, "grad_norm": 120.17621612548828, "learning_rate": 2.1719976077503484e-06, "loss": 15.8821, "step": 361110 }, { "epoch": 0.729485247477951, "grad_norm": 69.36054229736328, "learning_rate": 2.171709747229149e-06, "loss": 15.7934, "step": 361120 }, { "epoch": 0.7295054481106348, "grad_norm": 261.7652282714844, "learning_rate": 2.1714219004926923e-06, "loss": 13.5016, "step": 361130 }, { "epoch": 0.7295256487433186, "grad_norm": 202.45272827148438, "learning_rate": 2.1711340675423847e-06, "loss": 13.8777, "step": 361140 }, { "epoch": 0.7295458493760024, "grad_norm": 443.10491943359375, "learning_rate": 2.1708462483796263e-06, "loss": 17.5336, "step": 361150 }, { "epoch": 0.7295660500086862, "grad_norm": 23.073869705200195, "learning_rate": 2.170558443005818e-06, "loss": 26.7115, "step": 361160 }, { "epoch": 0.7295862506413701, "grad_norm": 215.3922882080078, "learning_rate": 2.170270651422367e-06, "loss": 10.8931, "step": 361170 }, { "epoch": 0.7296064512740539, "grad_norm": 256.45416259765625, "learning_rate": 2.1699828736306736e-06, "loss": 22.3653, "step": 361180 }, { "epoch": 0.7296266519067377, "grad_norm": 233.01913452148438, "learning_rate": 2.1696951096321383e-06, "loss": 18.4842, "step": 361190 }, { "epoch": 0.7296468525394215, "grad_norm": 339.0245666503906, "learning_rate": 2.1694073594281663e-06, "loss": 39.6248, "step": 361200 }, { "epoch": 0.7296670531721053, "grad_norm": 76.8597412109375, "learning_rate": 2.1691196230201626e-06, "loss": 11.8955, "step": 361210 }, { "epoch": 0.7296872538047892, "grad_norm": 8.179326057434082, "learning_rate": 2.168831900409523e-06, "loss": 12.7853, "step": 361220 }, { "epoch": 0.729707454437473, "grad_norm": 242.32781982421875, "learning_rate": 2.1685441915976537e-06, "loss": 21.2555, "step": 361230 }, { "epoch": 0.7297276550701568, "grad_norm": 148.67555236816406, "learning_rate": 2.168256496585958e-06, "loss": 14.1514, "step": 361240 }, { "epoch": 0.7297478557028406, "grad_norm": 358.68707275390625, "learning_rate": 2.1679688153758373e-06, "loss": 16.4668, "step": 361250 }, { "epoch": 0.7297680563355244, "grad_norm": 486.2264099121094, "learning_rate": 2.1676811479686905e-06, "loss": 10.5952, "step": 361260 }, { "epoch": 0.7297882569682083, "grad_norm": 579.328369140625, "learning_rate": 2.1673934943659226e-06, "loss": 26.9122, "step": 361270 }, { "epoch": 0.7298084576008921, "grad_norm": 1396.4542236328125, "learning_rate": 2.1671058545689387e-06, "loss": 39.5685, "step": 361280 }, { "epoch": 0.7298286582335759, "grad_norm": 458.4530944824219, "learning_rate": 2.166818228579134e-06, "loss": 27.3363, "step": 361290 }, { "epoch": 0.7298488588662597, "grad_norm": 223.0692901611328, "learning_rate": 2.1665306163979132e-06, "loss": 6.7999, "step": 361300 }, { "epoch": 0.7298690594989435, "grad_norm": 552.4453735351562, "learning_rate": 2.1662430180266808e-06, "loss": 13.6605, "step": 361310 }, { "epoch": 0.7298892601316274, "grad_norm": 261.2221374511719, "learning_rate": 2.1659554334668364e-06, "loss": 26.5074, "step": 361320 }, { "epoch": 0.7299094607643112, "grad_norm": 195.18429565429688, "learning_rate": 2.1656678627197793e-06, "loss": 7.4277, "step": 361330 }, { "epoch": 0.729929661396995, "grad_norm": 26.890743255615234, "learning_rate": 2.165380305786915e-06, "loss": 24.8765, "step": 361340 }, { "epoch": 0.7299498620296788, "grad_norm": 1167.6458740234375, "learning_rate": 2.165092762669643e-06, "loss": 27.1901, "step": 361350 }, { "epoch": 0.7299700626623626, "grad_norm": 312.314697265625, "learning_rate": 2.164805233369364e-06, "loss": 18.0086, "step": 361360 }, { "epoch": 0.7299902632950463, "grad_norm": 383.10479736328125, "learning_rate": 2.1645177178874817e-06, "loss": 11.2114, "step": 361370 }, { "epoch": 0.7300104639277302, "grad_norm": 10.094188690185547, "learning_rate": 2.164230216225395e-06, "loss": 25.3006, "step": 361380 }, { "epoch": 0.730030664560414, "grad_norm": 281.0256042480469, "learning_rate": 2.163942728384507e-06, "loss": 14.7171, "step": 361390 }, { "epoch": 0.7300508651930978, "grad_norm": 65.37433624267578, "learning_rate": 2.1636552543662187e-06, "loss": 12.3877, "step": 361400 }, { "epoch": 0.7300710658257816, "grad_norm": 252.3701934814453, "learning_rate": 2.163367794171929e-06, "loss": 13.2518, "step": 361410 }, { "epoch": 0.7300912664584654, "grad_norm": 367.776123046875, "learning_rate": 2.1630803478030428e-06, "loss": 17.5288, "step": 361420 }, { "epoch": 0.7301114670911493, "grad_norm": 204.08401489257812, "learning_rate": 2.162792915260956e-06, "loss": 10.4009, "step": 361430 }, { "epoch": 0.7301316677238331, "grad_norm": 44.90028762817383, "learning_rate": 2.1625054965470754e-06, "loss": 20.1912, "step": 361440 }, { "epoch": 0.7301518683565169, "grad_norm": 235.499267578125, "learning_rate": 2.1622180916627964e-06, "loss": 10.8041, "step": 361450 }, { "epoch": 0.7301720689892007, "grad_norm": 170.93577575683594, "learning_rate": 2.161930700609524e-06, "loss": 21.3566, "step": 361460 }, { "epoch": 0.7301922696218845, "grad_norm": 38.35749053955078, "learning_rate": 2.1616433233886576e-06, "loss": 29.6056, "step": 361470 }, { "epoch": 0.7302124702545684, "grad_norm": 329.5901794433594, "learning_rate": 2.1613559600015955e-06, "loss": 15.695, "step": 361480 }, { "epoch": 0.7302326708872522, "grad_norm": 97.2346420288086, "learning_rate": 2.1610686104497413e-06, "loss": 15.3789, "step": 361490 }, { "epoch": 0.730252871519936, "grad_norm": 370.47650146484375, "learning_rate": 2.1607812747344955e-06, "loss": 20.5215, "step": 361500 }, { "epoch": 0.7302730721526198, "grad_norm": 130.2599334716797, "learning_rate": 2.160493952857255e-06, "loss": 9.5733, "step": 361510 }, { "epoch": 0.7302932727853036, "grad_norm": 509.4718322753906, "learning_rate": 2.160206644819422e-06, "loss": 22.5773, "step": 361520 }, { "epoch": 0.7303134734179875, "grad_norm": 58.12928771972656, "learning_rate": 2.159919350622402e-06, "loss": 9.2884, "step": 361530 }, { "epoch": 0.7303336740506713, "grad_norm": 289.2933349609375, "learning_rate": 2.1596320702675867e-06, "loss": 9.0009, "step": 361540 }, { "epoch": 0.7303538746833551, "grad_norm": 254.37644958496094, "learning_rate": 2.1593448037563795e-06, "loss": 26.6712, "step": 361550 }, { "epoch": 0.7303740753160389, "grad_norm": 359.59222412109375, "learning_rate": 2.159057551090184e-06, "loss": 21.5467, "step": 361560 }, { "epoch": 0.7303942759487227, "grad_norm": 398.8002624511719, "learning_rate": 2.158770312270397e-06, "loss": 24.5981, "step": 361570 }, { "epoch": 0.7304144765814066, "grad_norm": 79.03711700439453, "learning_rate": 2.158483087298417e-06, "loss": 14.7203, "step": 361580 }, { "epoch": 0.7304346772140904, "grad_norm": 287.1849365234375, "learning_rate": 2.158195876175646e-06, "loss": 22.3018, "step": 361590 }, { "epoch": 0.7304548778467742, "grad_norm": 272.5879211425781, "learning_rate": 2.157908678903487e-06, "loss": 15.5893, "step": 361600 }, { "epoch": 0.730475078479458, "grad_norm": 131.74618530273438, "learning_rate": 2.157621495483333e-06, "loss": 12.8334, "step": 361610 }, { "epoch": 0.7304952791121417, "grad_norm": 329.29547119140625, "learning_rate": 2.157334325916587e-06, "loss": 19.4448, "step": 361620 }, { "epoch": 0.7305154797448256, "grad_norm": 0.0, "learning_rate": 2.1570471702046504e-06, "loss": 10.5664, "step": 361630 }, { "epoch": 0.7305356803775094, "grad_norm": 123.33502960205078, "learning_rate": 2.1567600283489213e-06, "loss": 15.8539, "step": 361640 }, { "epoch": 0.7305558810101932, "grad_norm": 188.6826629638672, "learning_rate": 2.1564729003507974e-06, "loss": 17.862, "step": 361650 }, { "epoch": 0.730576081642877, "grad_norm": 218.0082550048828, "learning_rate": 2.156185786211681e-06, "loss": 22.03, "step": 361660 }, { "epoch": 0.7305962822755608, "grad_norm": 172.41810607910156, "learning_rate": 2.15589868593297e-06, "loss": 19.7883, "step": 361670 }, { "epoch": 0.7306164829082447, "grad_norm": 313.0063781738281, "learning_rate": 2.1556115995160624e-06, "loss": 15.4494, "step": 361680 }, { "epoch": 0.7306366835409285, "grad_norm": 398.39642333984375, "learning_rate": 2.155324526962361e-06, "loss": 26.9387, "step": 361690 }, { "epoch": 0.7306568841736123, "grad_norm": 164.1744384765625, "learning_rate": 2.1550374682732605e-06, "loss": 29.137, "step": 361700 }, { "epoch": 0.7306770848062961, "grad_norm": 89.40987396240234, "learning_rate": 2.154750423450165e-06, "loss": 14.6746, "step": 361710 }, { "epoch": 0.7306972854389799, "grad_norm": 173.64126586914062, "learning_rate": 2.154463392494468e-06, "loss": 10.6505, "step": 361720 }, { "epoch": 0.7307174860716638, "grad_norm": 256.40386962890625, "learning_rate": 2.1541763754075732e-06, "loss": 18.6754, "step": 361730 }, { "epoch": 0.7307376867043476, "grad_norm": 65.70005798339844, "learning_rate": 2.1538893721908766e-06, "loss": 11.3698, "step": 361740 }, { "epoch": 0.7307578873370314, "grad_norm": 288.4854431152344, "learning_rate": 2.1536023828457793e-06, "loss": 13.3614, "step": 361750 }, { "epoch": 0.7307780879697152, "grad_norm": 269.83782958984375, "learning_rate": 2.153315407373679e-06, "loss": 19.9484, "step": 361760 }, { "epoch": 0.730798288602399, "grad_norm": 577.5112915039062, "learning_rate": 2.153028445775972e-06, "loss": 33.5486, "step": 361770 }, { "epoch": 0.7308184892350829, "grad_norm": 280.5733642578125, "learning_rate": 2.1527414980540607e-06, "loss": 24.447, "step": 361780 }, { "epoch": 0.7308386898677667, "grad_norm": 151.4690399169922, "learning_rate": 2.1524545642093426e-06, "loss": 40.2282, "step": 361790 }, { "epoch": 0.7308588905004505, "grad_norm": 537.35888671875, "learning_rate": 2.152167644243213e-06, "loss": 15.5832, "step": 361800 }, { "epoch": 0.7308790911331343, "grad_norm": 317.5626525878906, "learning_rate": 2.1518807381570737e-06, "loss": 10.4389, "step": 361810 }, { "epoch": 0.7308992917658181, "grad_norm": 208.39234924316406, "learning_rate": 2.1515938459523254e-06, "loss": 15.2609, "step": 361820 }, { "epoch": 0.730919492398502, "grad_norm": 120.46800231933594, "learning_rate": 2.15130696763036e-06, "loss": 9.1421, "step": 361830 }, { "epoch": 0.7309396930311858, "grad_norm": 145.6685028076172, "learning_rate": 2.151020103192579e-06, "loss": 8.7755, "step": 361840 }, { "epoch": 0.7309598936638696, "grad_norm": 0.0, "learning_rate": 2.1507332526403814e-06, "loss": 27.3811, "step": 361850 }, { "epoch": 0.7309800942965534, "grad_norm": 150.5175018310547, "learning_rate": 2.1504464159751646e-06, "loss": 9.9701, "step": 361860 }, { "epoch": 0.7310002949292372, "grad_norm": 435.3010559082031, "learning_rate": 2.1501595931983256e-06, "loss": 11.6997, "step": 361870 }, { "epoch": 0.731020495561921, "grad_norm": 422.4320068359375, "learning_rate": 2.149872784311262e-06, "loss": 17.2753, "step": 361880 }, { "epoch": 0.7310406961946048, "grad_norm": 357.7993469238281, "learning_rate": 2.149585989315377e-06, "loss": 22.5001, "step": 361890 }, { "epoch": 0.7310608968272886, "grad_norm": 143.29721069335938, "learning_rate": 2.14929920821206e-06, "loss": 31.2977, "step": 361900 }, { "epoch": 0.7310810974599724, "grad_norm": 373.5242919921875, "learning_rate": 2.1490124410027137e-06, "loss": 21.857, "step": 361910 }, { "epoch": 0.7311012980926562, "grad_norm": 492.150390625, "learning_rate": 2.1487256876887356e-06, "loss": 19.6206, "step": 361920 }, { "epoch": 0.73112149872534, "grad_norm": 216.5948486328125, "learning_rate": 2.148438948271524e-06, "loss": 12.4254, "step": 361930 }, { "epoch": 0.7311416993580239, "grad_norm": 310.24310302734375, "learning_rate": 2.1481522227524725e-06, "loss": 11.7539, "step": 361940 }, { "epoch": 0.7311618999907077, "grad_norm": 294.27496337890625, "learning_rate": 2.147865511132983e-06, "loss": 19.9996, "step": 361950 }, { "epoch": 0.7311821006233915, "grad_norm": 279.9197998046875, "learning_rate": 2.1475788134144516e-06, "loss": 12.9746, "step": 361960 }, { "epoch": 0.7312023012560753, "grad_norm": 493.5824279785156, "learning_rate": 2.147292129598273e-06, "loss": 14.7243, "step": 361970 }, { "epoch": 0.7312225018887591, "grad_norm": 116.81647491455078, "learning_rate": 2.147005459685848e-06, "loss": 16.1664, "step": 361980 }, { "epoch": 0.731242702521443, "grad_norm": 237.32005310058594, "learning_rate": 2.1467188036785706e-06, "loss": 12.6283, "step": 361990 }, { "epoch": 0.7312629031541268, "grad_norm": 31.52461051940918, "learning_rate": 2.146432161577842e-06, "loss": 9.5775, "step": 362000 }, { "epoch": 0.7312831037868106, "grad_norm": 303.5574951171875, "learning_rate": 2.146145533385057e-06, "loss": 12.1719, "step": 362010 }, { "epoch": 0.7313033044194944, "grad_norm": 293.03265380859375, "learning_rate": 2.1458589191016103e-06, "loss": 41.0131, "step": 362020 }, { "epoch": 0.7313235050521782, "grad_norm": 248.96372985839844, "learning_rate": 2.1455723187289028e-06, "loss": 17.443, "step": 362030 }, { "epoch": 0.7313437056848621, "grad_norm": 200.71824645996094, "learning_rate": 2.1452857322683285e-06, "loss": 12.7661, "step": 362040 }, { "epoch": 0.7313639063175459, "grad_norm": 0.0, "learning_rate": 2.1449991597212865e-06, "loss": 30.969, "step": 362050 }, { "epoch": 0.7313841069502297, "grad_norm": 40.37230682373047, "learning_rate": 2.1447126010891704e-06, "loss": 14.9572, "step": 362060 }, { "epoch": 0.7314043075829135, "grad_norm": 122.91261291503906, "learning_rate": 2.144426056373381e-06, "loss": 14.1695, "step": 362070 }, { "epoch": 0.7314245082155973, "grad_norm": 286.9078063964844, "learning_rate": 2.144139525575313e-06, "loss": 22.4025, "step": 362080 }, { "epoch": 0.7314447088482812, "grad_norm": 78.23506927490234, "learning_rate": 2.14385300869636e-06, "loss": 22.9852, "step": 362090 }, { "epoch": 0.731464909480965, "grad_norm": 299.5748291015625, "learning_rate": 2.1435665057379233e-06, "loss": 24.0319, "step": 362100 }, { "epoch": 0.7314851101136488, "grad_norm": 8.459238052368164, "learning_rate": 2.143280016701397e-06, "loss": 18.2483, "step": 362110 }, { "epoch": 0.7315053107463326, "grad_norm": 410.2729187011719, "learning_rate": 2.1429935415881753e-06, "loss": 24.2836, "step": 362120 }, { "epoch": 0.7315255113790163, "grad_norm": 83.34759521484375, "learning_rate": 2.1427070803996565e-06, "loss": 7.631, "step": 362130 }, { "epoch": 0.7315457120117002, "grad_norm": 254.82315063476562, "learning_rate": 2.142420633137241e-06, "loss": 21.0717, "step": 362140 }, { "epoch": 0.731565912644384, "grad_norm": 231.8488311767578, "learning_rate": 2.1421341998023167e-06, "loss": 14.0261, "step": 362150 }, { "epoch": 0.7315861132770678, "grad_norm": 184.5997772216797, "learning_rate": 2.141847780396284e-06, "loss": 8.1853, "step": 362160 }, { "epoch": 0.7316063139097516, "grad_norm": 942.6822509765625, "learning_rate": 2.14156137492054e-06, "loss": 18.0977, "step": 362170 }, { "epoch": 0.7316265145424354, "grad_norm": 152.9513397216797, "learning_rate": 2.141274983376479e-06, "loss": 8.9364, "step": 362180 }, { "epoch": 0.7316467151751193, "grad_norm": 296.63934326171875, "learning_rate": 2.1409886057654963e-06, "loss": 15.6173, "step": 362190 }, { "epoch": 0.7316669158078031, "grad_norm": 6394.7861328125, "learning_rate": 2.140702242088987e-06, "loss": 30.6304, "step": 362200 }, { "epoch": 0.7316871164404869, "grad_norm": 182.7490997314453, "learning_rate": 2.1404158923483524e-06, "loss": 22.6901, "step": 362210 }, { "epoch": 0.7317073170731707, "grad_norm": 771.7197265625, "learning_rate": 2.1401295565449803e-06, "loss": 27.4147, "step": 362220 }, { "epoch": 0.7317275177058545, "grad_norm": 247.33331298828125, "learning_rate": 2.13984323468027e-06, "loss": 28.8346, "step": 362230 }, { "epoch": 0.7317477183385384, "grad_norm": 11.089387893676758, "learning_rate": 2.1395569267556187e-06, "loss": 23.5769, "step": 362240 }, { "epoch": 0.7317679189712222, "grad_norm": 162.6513671875, "learning_rate": 2.13927063277242e-06, "loss": 18.0349, "step": 362250 }, { "epoch": 0.731788119603906, "grad_norm": 106.21112823486328, "learning_rate": 2.1389843527320675e-06, "loss": 9.247, "step": 362260 }, { "epoch": 0.7318083202365898, "grad_norm": 308.93218994140625, "learning_rate": 2.1386980866359595e-06, "loss": 20.4615, "step": 362270 }, { "epoch": 0.7318285208692736, "grad_norm": 260.24688720703125, "learning_rate": 2.1384118344854906e-06, "loss": 14.3164, "step": 362280 }, { "epoch": 0.7318487215019575, "grad_norm": 349.673095703125, "learning_rate": 2.1381255962820535e-06, "loss": 12.933, "step": 362290 }, { "epoch": 0.7318689221346413, "grad_norm": 636.61572265625, "learning_rate": 2.137839372027047e-06, "loss": 19.5609, "step": 362300 }, { "epoch": 0.7318891227673251, "grad_norm": 549.0619506835938, "learning_rate": 2.137553161721862e-06, "loss": 26.6231, "step": 362310 }, { "epoch": 0.7319093234000089, "grad_norm": 286.256591796875, "learning_rate": 2.137266965367898e-06, "loss": 11.1451, "step": 362320 }, { "epoch": 0.7319295240326927, "grad_norm": 293.4443359375, "learning_rate": 2.1369807829665455e-06, "loss": 16.0919, "step": 362330 }, { "epoch": 0.7319497246653766, "grad_norm": 530.5147705078125, "learning_rate": 2.136694614519203e-06, "loss": 19.3131, "step": 362340 }, { "epoch": 0.7319699252980604, "grad_norm": 248.47470092773438, "learning_rate": 2.1364084600272645e-06, "loss": 8.3872, "step": 362350 }, { "epoch": 0.7319901259307442, "grad_norm": 180.2115020751953, "learning_rate": 2.1361223194921214e-06, "loss": 32.2915, "step": 362360 }, { "epoch": 0.732010326563428, "grad_norm": 230.42027282714844, "learning_rate": 2.135836192915173e-06, "loss": 21.2841, "step": 362370 }, { "epoch": 0.7320305271961118, "grad_norm": 81.7883071899414, "learning_rate": 2.1355500802978093e-06, "loss": 18.5862, "step": 362380 }, { "epoch": 0.7320507278287955, "grad_norm": 210.7587432861328, "learning_rate": 2.135263981641429e-06, "loss": 17.6742, "step": 362390 }, { "epoch": 0.7320709284614794, "grad_norm": 92.9783935546875, "learning_rate": 2.134977896947425e-06, "loss": 9.9971, "step": 362400 }, { "epoch": 0.7320911290941632, "grad_norm": 259.857177734375, "learning_rate": 2.134691826217189e-06, "loss": 26.6176, "step": 362410 }, { "epoch": 0.732111329726847, "grad_norm": 206.9111785888672, "learning_rate": 2.1344057694521177e-06, "loss": 6.766, "step": 362420 }, { "epoch": 0.7321315303595308, "grad_norm": 126.73641967773438, "learning_rate": 2.1341197266536085e-06, "loss": 22.2745, "step": 362430 }, { "epoch": 0.7321517309922146, "grad_norm": 398.0264892578125, "learning_rate": 2.1338336978230487e-06, "loss": 19.8499, "step": 362440 }, { "epoch": 0.7321719316248985, "grad_norm": 59.321468353271484, "learning_rate": 2.1335476829618364e-06, "loss": 15.8949, "step": 362450 }, { "epoch": 0.7321921322575823, "grad_norm": 43.930755615234375, "learning_rate": 2.133261682071366e-06, "loss": 16.9702, "step": 362460 }, { "epoch": 0.7322123328902661, "grad_norm": 165.3673095703125, "learning_rate": 2.1329756951530307e-06, "loss": 9.669, "step": 362470 }, { "epoch": 0.7322325335229499, "grad_norm": 396.2498474121094, "learning_rate": 2.132689722208223e-06, "loss": 18.3295, "step": 362480 }, { "epoch": 0.7322527341556337, "grad_norm": 353.7226867675781, "learning_rate": 2.132403763238337e-06, "loss": 8.7233, "step": 362490 }, { "epoch": 0.7322729347883176, "grad_norm": 45.07123947143555, "learning_rate": 2.132117818244771e-06, "loss": 21.3078, "step": 362500 }, { "epoch": 0.7322931354210014, "grad_norm": 159.62062072753906, "learning_rate": 2.1318318872289117e-06, "loss": 26.5322, "step": 362510 }, { "epoch": 0.7323133360536852, "grad_norm": 129.12539672851562, "learning_rate": 2.1315459701921553e-06, "loss": 12.4635, "step": 362520 }, { "epoch": 0.732333536686369, "grad_norm": 207.158203125, "learning_rate": 2.1312600671358983e-06, "loss": 22.6551, "step": 362530 }, { "epoch": 0.7323537373190528, "grad_norm": 241.94337463378906, "learning_rate": 2.1309741780615316e-06, "loss": 16.8303, "step": 362540 }, { "epoch": 0.7323739379517367, "grad_norm": 337.6984558105469, "learning_rate": 2.1306883029704472e-06, "loss": 12.6057, "step": 362550 }, { "epoch": 0.7323941385844205, "grad_norm": 115.45652770996094, "learning_rate": 2.130402441864041e-06, "loss": 8.4147, "step": 362560 }, { "epoch": 0.7324143392171043, "grad_norm": 850.3384399414062, "learning_rate": 2.1301165947437064e-06, "loss": 28.0071, "step": 362570 }, { "epoch": 0.7324345398497881, "grad_norm": 185.47665405273438, "learning_rate": 2.129830761610833e-06, "loss": 27.8709, "step": 362580 }, { "epoch": 0.7324547404824719, "grad_norm": 198.30885314941406, "learning_rate": 2.1295449424668184e-06, "loss": 23.3307, "step": 362590 }, { "epoch": 0.7324749411151558, "grad_norm": 583.9700317382812, "learning_rate": 2.1292591373130515e-06, "loss": 16.7103, "step": 362600 }, { "epoch": 0.7324951417478396, "grad_norm": 252.70068359375, "learning_rate": 2.1289733461509294e-06, "loss": 24.5339, "step": 362610 }, { "epoch": 0.7325153423805234, "grad_norm": 324.47332763671875, "learning_rate": 2.128687568981843e-06, "loss": 18.0378, "step": 362620 }, { "epoch": 0.7325355430132072, "grad_norm": 499.86566162109375, "learning_rate": 2.1284018058071833e-06, "loss": 20.3023, "step": 362630 }, { "epoch": 0.732555743645891, "grad_norm": 174.8582305908203, "learning_rate": 2.1281160566283466e-06, "loss": 25.8857, "step": 362640 }, { "epoch": 0.7325759442785748, "grad_norm": 208.74159240722656, "learning_rate": 2.127830321446722e-06, "loss": 7.5548, "step": 362650 }, { "epoch": 0.7325961449112586, "grad_norm": 563.8468017578125, "learning_rate": 2.1275446002637063e-06, "loss": 26.3315, "step": 362660 }, { "epoch": 0.7326163455439424, "grad_norm": 314.295654296875, "learning_rate": 2.127258893080688e-06, "loss": 15.2458, "step": 362670 }, { "epoch": 0.7326365461766262, "grad_norm": 316.4440612792969, "learning_rate": 2.126973199899063e-06, "loss": 13.5014, "step": 362680 }, { "epoch": 0.73265674680931, "grad_norm": 268.5365905761719, "learning_rate": 2.126687520720222e-06, "loss": 11.3732, "step": 362690 }, { "epoch": 0.7326769474419939, "grad_norm": 252.9990692138672, "learning_rate": 2.1264018555455563e-06, "loss": 27.6556, "step": 362700 }, { "epoch": 0.7326971480746777, "grad_norm": 223.3609161376953, "learning_rate": 2.1261162043764606e-06, "loss": 19.5327, "step": 362710 }, { "epoch": 0.7327173487073615, "grad_norm": 422.05133056640625, "learning_rate": 2.1258305672143265e-06, "loss": 23.6622, "step": 362720 }, { "epoch": 0.7327375493400453, "grad_norm": 296.18450927734375, "learning_rate": 2.1255449440605436e-06, "loss": 17.5998, "step": 362730 }, { "epoch": 0.7327577499727291, "grad_norm": 336.5849609375, "learning_rate": 2.1252593349165056e-06, "loss": 14.0286, "step": 362740 }, { "epoch": 0.732777950605413, "grad_norm": 280.03936767578125, "learning_rate": 2.124973739783609e-06, "loss": 5.9345, "step": 362750 }, { "epoch": 0.7327981512380968, "grad_norm": 231.70654296875, "learning_rate": 2.1246881586632384e-06, "loss": 27.7851, "step": 362760 }, { "epoch": 0.7328183518707806, "grad_norm": 172.58609008789062, "learning_rate": 2.1244025915567883e-06, "loss": 16.8072, "step": 362770 }, { "epoch": 0.7328385525034644, "grad_norm": 174.11318969726562, "learning_rate": 2.1241170384656533e-06, "loss": 10.0172, "step": 362780 }, { "epoch": 0.7328587531361482, "grad_norm": 126.89042663574219, "learning_rate": 2.123831499391223e-06, "loss": 32.3933, "step": 362790 }, { "epoch": 0.732878953768832, "grad_norm": 215.4153289794922, "learning_rate": 2.1235459743348874e-06, "loss": 11.8307, "step": 362800 }, { "epoch": 0.7328991544015159, "grad_norm": 227.22021484375, "learning_rate": 2.12326046329804e-06, "loss": 8.6214, "step": 362810 }, { "epoch": 0.7329193550341997, "grad_norm": 456.304931640625, "learning_rate": 2.1229749662820754e-06, "loss": 28.5015, "step": 362820 }, { "epoch": 0.7329395556668835, "grad_norm": 620.4779663085938, "learning_rate": 2.122689483288379e-06, "loss": 21.3732, "step": 362830 }, { "epoch": 0.7329597562995673, "grad_norm": 323.3889465332031, "learning_rate": 2.1224040143183444e-06, "loss": 10.264, "step": 362840 }, { "epoch": 0.7329799569322512, "grad_norm": 468.9705505371094, "learning_rate": 2.122118559373366e-06, "loss": 18.8075, "step": 362850 }, { "epoch": 0.733000157564935, "grad_norm": 134.55491638183594, "learning_rate": 2.121833118454832e-06, "loss": 17.3802, "step": 362860 }, { "epoch": 0.7330203581976188, "grad_norm": 490.9466247558594, "learning_rate": 2.1215476915641327e-06, "loss": 27.196, "step": 362870 }, { "epoch": 0.7330405588303026, "grad_norm": 188.0200958251953, "learning_rate": 2.1212622787026626e-06, "loss": 13.0218, "step": 362880 }, { "epoch": 0.7330607594629864, "grad_norm": 64.50437927246094, "learning_rate": 2.120976879871811e-06, "loss": 8.7682, "step": 362890 }, { "epoch": 0.7330809600956701, "grad_norm": 288.52581787109375, "learning_rate": 2.1206914950729673e-06, "loss": 15.0355, "step": 362900 }, { "epoch": 0.733101160728354, "grad_norm": 330.55780029296875, "learning_rate": 2.1204061243075257e-06, "loss": 15.0016, "step": 362910 }, { "epoch": 0.7331213613610378, "grad_norm": 156.34127807617188, "learning_rate": 2.1201207675768738e-06, "loss": 13.5698, "step": 362920 }, { "epoch": 0.7331415619937216, "grad_norm": 318.50238037109375, "learning_rate": 2.1198354248824057e-06, "loss": 16.4368, "step": 362930 }, { "epoch": 0.7331617626264054, "grad_norm": 185.44338989257812, "learning_rate": 2.1195500962255084e-06, "loss": 11.8513, "step": 362940 }, { "epoch": 0.7331819632590892, "grad_norm": 118.72286224365234, "learning_rate": 2.119264781607577e-06, "loss": 15.6635, "step": 362950 }, { "epoch": 0.7332021638917731, "grad_norm": 259.85369873046875, "learning_rate": 2.118979481029999e-06, "loss": 12.5439, "step": 362960 }, { "epoch": 0.7332223645244569, "grad_norm": 1.688622236251831, "learning_rate": 2.118694194494164e-06, "loss": 17.8337, "step": 362970 }, { "epoch": 0.7332425651571407, "grad_norm": 436.18499755859375, "learning_rate": 2.1184089220014657e-06, "loss": 24.8784, "step": 362980 }, { "epoch": 0.7332627657898245, "grad_norm": 103.55606842041016, "learning_rate": 2.1181236635532913e-06, "loss": 17.5159, "step": 362990 }, { "epoch": 0.7332829664225083, "grad_norm": 224.92689514160156, "learning_rate": 2.1178384191510344e-06, "loss": 17.3668, "step": 363000 }, { "epoch": 0.7333031670551922, "grad_norm": 400.4418640136719, "learning_rate": 2.1175531887960834e-06, "loss": 12.1203, "step": 363010 }, { "epoch": 0.733323367687876, "grad_norm": 234.56947326660156, "learning_rate": 2.1172679724898264e-06, "loss": 18.8262, "step": 363020 }, { "epoch": 0.7333435683205598, "grad_norm": 218.43556213378906, "learning_rate": 2.116982770233658e-06, "loss": 15.6948, "step": 363030 }, { "epoch": 0.7333637689532436, "grad_norm": 16.350723266601562, "learning_rate": 2.116697582028966e-06, "loss": 15.5294, "step": 363040 }, { "epoch": 0.7333839695859274, "grad_norm": 353.0278625488281, "learning_rate": 2.116412407877138e-06, "loss": 21.1523, "step": 363050 }, { "epoch": 0.7334041702186113, "grad_norm": 486.548828125, "learning_rate": 2.116127247779566e-06, "loss": 15.8035, "step": 363060 }, { "epoch": 0.7334243708512951, "grad_norm": 22.93790054321289, "learning_rate": 2.1158421017376423e-06, "loss": 29.5757, "step": 363070 }, { "epoch": 0.7334445714839789, "grad_norm": 400.7310485839844, "learning_rate": 2.1155569697527546e-06, "loss": 20.6992, "step": 363080 }, { "epoch": 0.7334647721166627, "grad_norm": 176.71839904785156, "learning_rate": 2.1152718518262903e-06, "loss": 20.0847, "step": 363090 }, { "epoch": 0.7334849727493465, "grad_norm": 206.02749633789062, "learning_rate": 2.114986747959643e-06, "loss": 16.9637, "step": 363100 }, { "epoch": 0.7335051733820304, "grad_norm": 257.4684143066406, "learning_rate": 2.1147016581542e-06, "loss": 16.0752, "step": 363110 }, { "epoch": 0.7335253740147142, "grad_norm": 31.302799224853516, "learning_rate": 2.11441658241135e-06, "loss": 18.852, "step": 363120 }, { "epoch": 0.733545574647398, "grad_norm": 119.71320343017578, "learning_rate": 2.114131520732483e-06, "loss": 16.5166, "step": 363130 }, { "epoch": 0.7335657752800818, "grad_norm": 356.7124328613281, "learning_rate": 2.113846473118991e-06, "loss": 8.1063, "step": 363140 }, { "epoch": 0.7335859759127656, "grad_norm": 262.5131530761719, "learning_rate": 2.1135614395722613e-06, "loss": 26.3426, "step": 363150 }, { "epoch": 0.7336061765454494, "grad_norm": 307.5450744628906, "learning_rate": 2.113276420093681e-06, "loss": 17.8047, "step": 363160 }, { "epoch": 0.7336263771781332, "grad_norm": 295.5090026855469, "learning_rate": 2.1129914146846435e-06, "loss": 28.4694, "step": 363170 }, { "epoch": 0.733646577810817, "grad_norm": 421.9253845214844, "learning_rate": 2.1127064233465354e-06, "loss": 23.5918, "step": 363180 }, { "epoch": 0.7336667784435008, "grad_norm": 274.78900146484375, "learning_rate": 2.1124214460807446e-06, "loss": 22.883, "step": 363190 }, { "epoch": 0.7336869790761846, "grad_norm": 287.8680114746094, "learning_rate": 2.112136482888663e-06, "loss": 31.8865, "step": 363200 }, { "epoch": 0.7337071797088685, "grad_norm": 177.9224853515625, "learning_rate": 2.111851533771676e-06, "loss": 13.0556, "step": 363210 }, { "epoch": 0.7337273803415523, "grad_norm": 303.4087829589844, "learning_rate": 2.111566598731176e-06, "loss": 13.7889, "step": 363220 }, { "epoch": 0.7337475809742361, "grad_norm": 252.21031188964844, "learning_rate": 2.1112816777685506e-06, "loss": 20.7715, "step": 363230 }, { "epoch": 0.7337677816069199, "grad_norm": 262.19415283203125, "learning_rate": 2.110996770885186e-06, "loss": 14.6008, "step": 363240 }, { "epoch": 0.7337879822396037, "grad_norm": 0.4300570785999298, "learning_rate": 2.1107118780824744e-06, "loss": 20.9008, "step": 363250 }, { "epoch": 0.7338081828722876, "grad_norm": 0.0, "learning_rate": 2.1104269993618008e-06, "loss": 16.0882, "step": 363260 }, { "epoch": 0.7338283835049714, "grad_norm": 369.2259521484375, "learning_rate": 2.1101421347245576e-06, "loss": 23.5525, "step": 363270 }, { "epoch": 0.7338485841376552, "grad_norm": 0.0, "learning_rate": 2.109857284172129e-06, "loss": 16.0468, "step": 363280 }, { "epoch": 0.733868784770339, "grad_norm": 329.10968017578125, "learning_rate": 2.1095724477059077e-06, "loss": 13.1567, "step": 363290 }, { "epoch": 0.7338889854030228, "grad_norm": 279.489990234375, "learning_rate": 2.1092876253272793e-06, "loss": 15.5828, "step": 363300 }, { "epoch": 0.7339091860357067, "grad_norm": 224.955810546875, "learning_rate": 2.1090028170376307e-06, "loss": 8.995, "step": 363310 }, { "epoch": 0.7339293866683905, "grad_norm": 109.019775390625, "learning_rate": 2.1087180228383536e-06, "loss": 15.9683, "step": 363320 }, { "epoch": 0.7339495873010743, "grad_norm": 507.80914306640625, "learning_rate": 2.108433242730834e-06, "loss": 16.1979, "step": 363330 }, { "epoch": 0.7339697879337581, "grad_norm": 74.98799896240234, "learning_rate": 2.1081484767164584e-06, "loss": 11.4906, "step": 363340 }, { "epoch": 0.7339899885664419, "grad_norm": 79.34088134765625, "learning_rate": 2.1078637247966166e-06, "loss": 13.0096, "step": 363350 }, { "epoch": 0.7340101891991258, "grad_norm": 92.39824676513672, "learning_rate": 2.1075789869726998e-06, "loss": 18.2189, "step": 363360 }, { "epoch": 0.7340303898318096, "grad_norm": 323.61932373046875, "learning_rate": 2.1072942632460887e-06, "loss": 12.9048, "step": 363370 }, { "epoch": 0.7340505904644934, "grad_norm": 432.76800537109375, "learning_rate": 2.107009553618174e-06, "loss": 11.6334, "step": 363380 }, { "epoch": 0.7340707910971772, "grad_norm": 403.7558288574219, "learning_rate": 2.106724858090346e-06, "loss": 19.7182, "step": 363390 }, { "epoch": 0.734090991729861, "grad_norm": 224.74990844726562, "learning_rate": 2.10644017666399e-06, "loss": 14.0076, "step": 363400 }, { "epoch": 0.7341111923625447, "grad_norm": 495.9862976074219, "learning_rate": 2.1061555093404917e-06, "loss": 7.0589, "step": 363410 }, { "epoch": 0.7341313929952286, "grad_norm": 128.88540649414062, "learning_rate": 2.105870856121241e-06, "loss": 13.5996, "step": 363420 }, { "epoch": 0.7341515936279124, "grad_norm": 271.5550842285156, "learning_rate": 2.105586217007628e-06, "loss": 13.2975, "step": 363430 }, { "epoch": 0.7341717942605962, "grad_norm": 168.48757934570312, "learning_rate": 2.1053015920010328e-06, "loss": 6.412, "step": 363440 }, { "epoch": 0.73419199489328, "grad_norm": 410.2112731933594, "learning_rate": 2.105016981102847e-06, "loss": 10.4793, "step": 363450 }, { "epoch": 0.7342121955259638, "grad_norm": 464.45391845703125, "learning_rate": 2.104732384314459e-06, "loss": 12.1352, "step": 363460 }, { "epoch": 0.7342323961586477, "grad_norm": 351.6531677246094, "learning_rate": 2.1044478016372544e-06, "loss": 23.2848, "step": 363470 }, { "epoch": 0.7342525967913315, "grad_norm": 869.0027465820312, "learning_rate": 2.104163233072618e-06, "loss": 24.9643, "step": 363480 }, { "epoch": 0.7342727974240153, "grad_norm": 215.43370056152344, "learning_rate": 2.1038786786219405e-06, "loss": 9.2253, "step": 363490 }, { "epoch": 0.7342929980566991, "grad_norm": 160.44802856445312, "learning_rate": 2.103594138286607e-06, "loss": 18.253, "step": 363500 }, { "epoch": 0.7343131986893829, "grad_norm": 158.06399536132812, "learning_rate": 2.103309612068003e-06, "loss": 9.7817, "step": 363510 }, { "epoch": 0.7343333993220668, "grad_norm": 200.13926696777344, "learning_rate": 2.1030250999675184e-06, "loss": 9.2126, "step": 363520 }, { "epoch": 0.7343535999547506, "grad_norm": 169.5173797607422, "learning_rate": 2.102740601986536e-06, "loss": 12.8211, "step": 363530 }, { "epoch": 0.7343738005874344, "grad_norm": 127.53115844726562, "learning_rate": 2.1024561181264464e-06, "loss": 22.431, "step": 363540 }, { "epoch": 0.7343940012201182, "grad_norm": 303.04705810546875, "learning_rate": 2.1021716483886323e-06, "loss": 18.4178, "step": 363550 }, { "epoch": 0.734414201852802, "grad_norm": 273.5811462402344, "learning_rate": 2.1018871927744844e-06, "loss": 10.8444, "step": 363560 }, { "epoch": 0.7344344024854859, "grad_norm": 372.3602600097656, "learning_rate": 2.1016027512853864e-06, "loss": 12.382, "step": 363570 }, { "epoch": 0.7344546031181697, "grad_norm": 84.6185073852539, "learning_rate": 2.101318323922723e-06, "loss": 16.617, "step": 363580 }, { "epoch": 0.7344748037508535, "grad_norm": 90.39535522460938, "learning_rate": 2.1010339106878846e-06, "loss": 24.6151, "step": 363590 }, { "epoch": 0.7344950043835373, "grad_norm": 14.250065803527832, "learning_rate": 2.100749511582254e-06, "loss": 12.8602, "step": 363600 }, { "epoch": 0.7345152050162211, "grad_norm": 275.46661376953125, "learning_rate": 2.10046512660722e-06, "loss": 19.9954, "step": 363610 }, { "epoch": 0.734535405648905, "grad_norm": 230.6047821044922, "learning_rate": 2.1001807557641673e-06, "loss": 18.245, "step": 363620 }, { "epoch": 0.7345556062815888, "grad_norm": 641.57470703125, "learning_rate": 2.09989639905448e-06, "loss": 17.1894, "step": 363630 }, { "epoch": 0.7345758069142726, "grad_norm": 327.9907531738281, "learning_rate": 2.099612056479548e-06, "loss": 13.878, "step": 363640 }, { "epoch": 0.7345960075469564, "grad_norm": 95.35296630859375, "learning_rate": 2.0993277280407547e-06, "loss": 11.8236, "step": 363650 }, { "epoch": 0.7346162081796402, "grad_norm": 104.40717315673828, "learning_rate": 2.099043413739485e-06, "loss": 19.7897, "step": 363660 }, { "epoch": 0.734636408812324, "grad_norm": 204.8915557861328, "learning_rate": 2.098759113577125e-06, "loss": 22.5227, "step": 363670 }, { "epoch": 0.7346566094450078, "grad_norm": 103.42743682861328, "learning_rate": 2.098474827555064e-06, "loss": 32.608, "step": 363680 }, { "epoch": 0.7346768100776916, "grad_norm": 0.5974836945533752, "learning_rate": 2.098190555674684e-06, "loss": 20.0239, "step": 363690 }, { "epoch": 0.7346970107103754, "grad_norm": 28.006046295166016, "learning_rate": 2.09790629793737e-06, "loss": 11.7999, "step": 363700 }, { "epoch": 0.7347172113430592, "grad_norm": 236.2725067138672, "learning_rate": 2.09762205434451e-06, "loss": 20.0753, "step": 363710 }, { "epoch": 0.734737411975743, "grad_norm": 225.6661376953125, "learning_rate": 2.0973378248974884e-06, "loss": 14.4168, "step": 363720 }, { "epoch": 0.7347576126084269, "grad_norm": 276.6294860839844, "learning_rate": 2.0970536095976884e-06, "loss": 25.1084, "step": 363730 }, { "epoch": 0.7347778132411107, "grad_norm": 77.72364044189453, "learning_rate": 2.0967694084464973e-06, "loss": 25.3937, "step": 363740 }, { "epoch": 0.7347980138737945, "grad_norm": 357.6109619140625, "learning_rate": 2.096485221445301e-06, "loss": 29.3122, "step": 363750 }, { "epoch": 0.7348182145064783, "grad_norm": 314.7205505371094, "learning_rate": 2.0962010485954844e-06, "loss": 13.8269, "step": 363760 }, { "epoch": 0.7348384151391621, "grad_norm": 323.6448059082031, "learning_rate": 2.0959168898984295e-06, "loss": 11.5564, "step": 363770 }, { "epoch": 0.734858615771846, "grad_norm": 251.83470153808594, "learning_rate": 2.095632745355525e-06, "loss": 22.5384, "step": 363780 }, { "epoch": 0.7348788164045298, "grad_norm": 237.3880157470703, "learning_rate": 2.0953486149681553e-06, "loss": 15.3943, "step": 363790 }, { "epoch": 0.7348990170372136, "grad_norm": 364.8495788574219, "learning_rate": 2.095064498737701e-06, "loss": 20.8282, "step": 363800 }, { "epoch": 0.7349192176698974, "grad_norm": 301.7351989746094, "learning_rate": 2.0947803966655526e-06, "loss": 17.9668, "step": 363810 }, { "epoch": 0.7349394183025812, "grad_norm": 273.9213562011719, "learning_rate": 2.09449630875309e-06, "loss": 13.6403, "step": 363820 }, { "epoch": 0.7349596189352651, "grad_norm": 110.57875061035156, "learning_rate": 2.0942122350017023e-06, "loss": 9.3886, "step": 363830 }, { "epoch": 0.7349798195679489, "grad_norm": 394.31396484375, "learning_rate": 2.0939281754127695e-06, "loss": 27.6192, "step": 363840 }, { "epoch": 0.7350000202006327, "grad_norm": 192.35055541992188, "learning_rate": 2.0936441299876803e-06, "loss": 12.1223, "step": 363850 }, { "epoch": 0.7350202208333165, "grad_norm": 370.5030212402344, "learning_rate": 2.093360098727817e-06, "loss": 12.2906, "step": 363860 }, { "epoch": 0.7350404214660003, "grad_norm": 431.5680236816406, "learning_rate": 2.0930760816345626e-06, "loss": 19.443, "step": 363870 }, { "epoch": 0.7350606220986842, "grad_norm": 419.0373229980469, "learning_rate": 2.092792078709304e-06, "loss": 21.6547, "step": 363880 }, { "epoch": 0.735080822731368, "grad_norm": 21.656082153320312, "learning_rate": 2.0925080899534227e-06, "loss": 12.1773, "step": 363890 }, { "epoch": 0.7351010233640518, "grad_norm": 366.78076171875, "learning_rate": 2.0922241153683064e-06, "loss": 13.6152, "step": 363900 }, { "epoch": 0.7351212239967356, "grad_norm": 374.6824035644531, "learning_rate": 2.091940154955337e-06, "loss": 14.2582, "step": 363910 }, { "epoch": 0.7351414246294194, "grad_norm": 1.460685133934021, "learning_rate": 2.0916562087158964e-06, "loss": 14.9178, "step": 363920 }, { "epoch": 0.7351616252621032, "grad_norm": 274.4546203613281, "learning_rate": 2.091372276651373e-06, "loss": 31.4811, "step": 363930 }, { "epoch": 0.735181825894787, "grad_norm": 345.71624755859375, "learning_rate": 2.0910883587631476e-06, "loss": 11.8139, "step": 363940 }, { "epoch": 0.7352020265274708, "grad_norm": 342.5632019042969, "learning_rate": 2.0908044550526034e-06, "loss": 9.2752, "step": 363950 }, { "epoch": 0.7352222271601546, "grad_norm": 115.6194839477539, "learning_rate": 2.0905205655211257e-06, "loss": 18.5525, "step": 363960 }, { "epoch": 0.7352424277928384, "grad_norm": 480.8943176269531, "learning_rate": 2.090236690170101e-06, "loss": 21.0887, "step": 363970 }, { "epoch": 0.7352626284255223, "grad_norm": 217.8319549560547, "learning_rate": 2.0899528290009065e-06, "loss": 19.1683, "step": 363980 }, { "epoch": 0.7352828290582061, "grad_norm": 246.22958374023438, "learning_rate": 2.089668982014929e-06, "loss": 15.2767, "step": 363990 }, { "epoch": 0.7353030296908899, "grad_norm": 379.083984375, "learning_rate": 2.0893851492135536e-06, "loss": 21.7924, "step": 364000 }, { "epoch": 0.7353232303235737, "grad_norm": 192.2853240966797, "learning_rate": 2.0891013305981615e-06, "loss": 16.7719, "step": 364010 }, { "epoch": 0.7353434309562575, "grad_norm": 415.3433532714844, "learning_rate": 2.0888175261701355e-06, "loss": 21.436, "step": 364020 }, { "epoch": 0.7353636315889414, "grad_norm": 330.7675476074219, "learning_rate": 2.0885337359308592e-06, "loss": 28.9374, "step": 364030 }, { "epoch": 0.7353838322216252, "grad_norm": 105.7321548461914, "learning_rate": 2.08824995988172e-06, "loss": 7.156, "step": 364040 }, { "epoch": 0.735404032854309, "grad_norm": 164.71913146972656, "learning_rate": 2.087966198024094e-06, "loss": 20.2629, "step": 364050 }, { "epoch": 0.7354242334869928, "grad_norm": 52.61669158935547, "learning_rate": 2.0876824503593673e-06, "loss": 21.2754, "step": 364060 }, { "epoch": 0.7354444341196766, "grad_norm": 495.74639892578125, "learning_rate": 2.087398716888925e-06, "loss": 22.3426, "step": 364070 }, { "epoch": 0.7354646347523605, "grad_norm": 199.39761352539062, "learning_rate": 2.0871149976141484e-06, "loss": 12.4136, "step": 364080 }, { "epoch": 0.7354848353850443, "grad_norm": 51.174617767333984, "learning_rate": 2.086831292536418e-06, "loss": 8.277, "step": 364090 }, { "epoch": 0.7355050360177281, "grad_norm": 366.7145080566406, "learning_rate": 2.0865476016571206e-06, "loss": 14.1236, "step": 364100 }, { "epoch": 0.7355252366504119, "grad_norm": 291.0434875488281, "learning_rate": 2.0862639249776364e-06, "loss": 16.7987, "step": 364110 }, { "epoch": 0.7355454372830957, "grad_norm": 827.0059204101562, "learning_rate": 2.085980262499347e-06, "loss": 33.8915, "step": 364120 }, { "epoch": 0.7355656379157796, "grad_norm": 416.3851623535156, "learning_rate": 2.085696614223638e-06, "loss": 12.6877, "step": 364130 }, { "epoch": 0.7355858385484634, "grad_norm": 295.455078125, "learning_rate": 2.085412980151888e-06, "loss": 11.8272, "step": 364140 }, { "epoch": 0.7356060391811472, "grad_norm": 241.25743103027344, "learning_rate": 2.085129360285484e-06, "loss": 12.7497, "step": 364150 }, { "epoch": 0.735626239813831, "grad_norm": 127.84748077392578, "learning_rate": 2.0848457546258037e-06, "loss": 19.1434, "step": 364160 }, { "epoch": 0.7356464404465148, "grad_norm": 757.2649536132812, "learning_rate": 2.0845621631742342e-06, "loss": 42.4509, "step": 364170 }, { "epoch": 0.7356666410791985, "grad_norm": 95.27188110351562, "learning_rate": 2.0842785859321545e-06, "loss": 15.7337, "step": 364180 }, { "epoch": 0.7356868417118824, "grad_norm": 307.6221008300781, "learning_rate": 2.083995022900946e-06, "loss": 13.6713, "step": 364190 }, { "epoch": 0.7357070423445662, "grad_norm": 264.21405029296875, "learning_rate": 2.083711474081993e-06, "loss": 13.5628, "step": 364200 }, { "epoch": 0.73572724297725, "grad_norm": 439.1298828125, "learning_rate": 2.0834279394766755e-06, "loss": 18.8438, "step": 364210 }, { "epoch": 0.7357474436099338, "grad_norm": 343.0540466308594, "learning_rate": 2.083144419086378e-06, "loss": 23.0455, "step": 364220 }, { "epoch": 0.7357676442426176, "grad_norm": 304.3514099121094, "learning_rate": 2.082860912912481e-06, "loss": 14.0903, "step": 364230 }, { "epoch": 0.7357878448753015, "grad_norm": 464.1375427246094, "learning_rate": 2.082577420956364e-06, "loss": 9.8185, "step": 364240 }, { "epoch": 0.7358080455079853, "grad_norm": 458.04852294921875, "learning_rate": 2.0822939432194134e-06, "loss": 15.2923, "step": 364250 }, { "epoch": 0.7358282461406691, "grad_norm": 173.16824340820312, "learning_rate": 2.082010479703008e-06, "loss": 11.654, "step": 364260 }, { "epoch": 0.7358484467733529, "grad_norm": 280.2939147949219, "learning_rate": 2.0817270304085273e-06, "loss": 8.5364, "step": 364270 }, { "epoch": 0.7358686474060367, "grad_norm": 408.34405517578125, "learning_rate": 2.0814435953373554e-06, "loss": 39.7129, "step": 364280 }, { "epoch": 0.7358888480387206, "grad_norm": 11.529012680053711, "learning_rate": 2.081160174490875e-06, "loss": 20.7997, "step": 364290 }, { "epoch": 0.7359090486714044, "grad_norm": 207.01719665527344, "learning_rate": 2.080876767870466e-06, "loss": 18.9748, "step": 364300 }, { "epoch": 0.7359292493040882, "grad_norm": 0.0, "learning_rate": 2.0805933754775083e-06, "loss": 15.9359, "step": 364310 }, { "epoch": 0.735949449936772, "grad_norm": 335.0271911621094, "learning_rate": 2.0803099973133856e-06, "loss": 10.8069, "step": 364320 }, { "epoch": 0.7359696505694558, "grad_norm": 461.7209777832031, "learning_rate": 2.080026633379478e-06, "loss": 25.7399, "step": 364330 }, { "epoch": 0.7359898512021397, "grad_norm": 51.753822326660156, "learning_rate": 2.079743283677165e-06, "loss": 13.3638, "step": 364340 }, { "epoch": 0.7360100518348235, "grad_norm": 141.8705291748047, "learning_rate": 2.079459948207828e-06, "loss": 14.9442, "step": 364350 }, { "epoch": 0.7360302524675073, "grad_norm": 121.82787322998047, "learning_rate": 2.079176626972852e-06, "loss": 16.6707, "step": 364360 }, { "epoch": 0.7360504531001911, "grad_norm": 0.0, "learning_rate": 2.0788933199736145e-06, "loss": 9.5453, "step": 364370 }, { "epoch": 0.736070653732875, "grad_norm": 261.0813903808594, "learning_rate": 2.0786100272114943e-06, "loss": 13.5053, "step": 364380 }, { "epoch": 0.7360908543655588, "grad_norm": 213.71841430664062, "learning_rate": 2.0783267486878773e-06, "loss": 15.1412, "step": 364390 }, { "epoch": 0.7361110549982426, "grad_norm": 1.229562759399414, "learning_rate": 2.07804348440414e-06, "loss": 10.6317, "step": 364400 }, { "epoch": 0.7361312556309264, "grad_norm": 287.5855712890625, "learning_rate": 2.077760234361664e-06, "loss": 11.9581, "step": 364410 }, { "epoch": 0.7361514562636102, "grad_norm": 369.441650390625, "learning_rate": 2.0774769985618317e-06, "loss": 14.6438, "step": 364420 }, { "epoch": 0.736171656896294, "grad_norm": 111.44657897949219, "learning_rate": 2.07719377700602e-06, "loss": 33.0962, "step": 364430 }, { "epoch": 0.7361918575289778, "grad_norm": 169.14227294921875, "learning_rate": 2.0769105696956128e-06, "loss": 22.2605, "step": 364440 }, { "epoch": 0.7362120581616616, "grad_norm": 147.81724548339844, "learning_rate": 2.0766273766319873e-06, "loss": 17.0881, "step": 364450 }, { "epoch": 0.7362322587943454, "grad_norm": 447.33837890625, "learning_rate": 2.076344197816527e-06, "loss": 25.9864, "step": 364460 }, { "epoch": 0.7362524594270292, "grad_norm": 345.8941345214844, "learning_rate": 2.076061033250611e-06, "loss": 29.266, "step": 364470 }, { "epoch": 0.736272660059713, "grad_norm": 942.658935546875, "learning_rate": 2.0757778829356175e-06, "loss": 13.9456, "step": 364480 }, { "epoch": 0.7362928606923969, "grad_norm": 261.2853698730469, "learning_rate": 2.0754947468729285e-06, "loss": 12.507, "step": 364490 }, { "epoch": 0.7363130613250807, "grad_norm": 330.0860900878906, "learning_rate": 2.075211625063923e-06, "loss": 18.9869, "step": 364500 }, { "epoch": 0.7363332619577645, "grad_norm": 282.0302734375, "learning_rate": 2.074928517509982e-06, "loss": 22.9876, "step": 364510 }, { "epoch": 0.7363534625904483, "grad_norm": 175.9064178466797, "learning_rate": 2.0746454242124846e-06, "loss": 11.8251, "step": 364520 }, { "epoch": 0.7363736632231321, "grad_norm": 495.2672119140625, "learning_rate": 2.0743623451728096e-06, "loss": 14.4239, "step": 364530 }, { "epoch": 0.736393863855816, "grad_norm": 156.2136688232422, "learning_rate": 2.074079280392339e-06, "loss": 17.7734, "step": 364540 }, { "epoch": 0.7364140644884998, "grad_norm": 581.2864379882812, "learning_rate": 2.0737962298724513e-06, "loss": 23.4664, "step": 364550 }, { "epoch": 0.7364342651211836, "grad_norm": 382.4385070800781, "learning_rate": 2.0735131936145237e-06, "loss": 9.7517, "step": 364560 }, { "epoch": 0.7364544657538674, "grad_norm": 185.64642333984375, "learning_rate": 2.073230171619938e-06, "loss": 20.1574, "step": 364570 }, { "epoch": 0.7364746663865512, "grad_norm": 281.55084228515625, "learning_rate": 2.0729471638900772e-06, "loss": 27.5317, "step": 364580 }, { "epoch": 0.736494867019235, "grad_norm": 171.21524047851562, "learning_rate": 2.0726641704263133e-06, "loss": 27.4874, "step": 364590 }, { "epoch": 0.7365150676519189, "grad_norm": 401.3856506347656, "learning_rate": 2.0723811912300295e-06, "loss": 11.1647, "step": 364600 }, { "epoch": 0.7365352682846027, "grad_norm": 309.9267883300781, "learning_rate": 2.072098226302606e-06, "loss": 14.5185, "step": 364610 }, { "epoch": 0.7365554689172865, "grad_norm": 130.75787353515625, "learning_rate": 2.0718152756454207e-06, "loss": 21.4094, "step": 364620 }, { "epoch": 0.7365756695499703, "grad_norm": 230.09799194335938, "learning_rate": 2.071532339259851e-06, "loss": 23.0809, "step": 364630 }, { "epoch": 0.7365958701826542, "grad_norm": 314.9452819824219, "learning_rate": 2.0712494171472776e-06, "loss": 16.7451, "step": 364640 }, { "epoch": 0.736616070815338, "grad_norm": 290.4595947265625, "learning_rate": 2.0709665093090824e-06, "loss": 12.8954, "step": 364650 }, { "epoch": 0.7366362714480218, "grad_norm": 80.91291046142578, "learning_rate": 2.0706836157466383e-06, "loss": 12.0238, "step": 364660 }, { "epoch": 0.7366564720807056, "grad_norm": 183.29373168945312, "learning_rate": 2.0704007364613266e-06, "loss": 13.7047, "step": 364670 }, { "epoch": 0.7366766727133894, "grad_norm": 343.65618896484375, "learning_rate": 2.0701178714545285e-06, "loss": 17.3165, "step": 364680 }, { "epoch": 0.7366968733460731, "grad_norm": 459.056396484375, "learning_rate": 2.0698350207276204e-06, "loss": 16.7552, "step": 364690 }, { "epoch": 0.736717073978757, "grad_norm": 24.113134384155273, "learning_rate": 2.0695521842819788e-06, "loss": 18.0808, "step": 364700 }, { "epoch": 0.7367372746114408, "grad_norm": 207.49606323242188, "learning_rate": 2.0692693621189864e-06, "loss": 14.399, "step": 364710 }, { "epoch": 0.7367574752441246, "grad_norm": 65.61236572265625, "learning_rate": 2.0689865542400196e-06, "loss": 24.8957, "step": 364720 }, { "epoch": 0.7367776758768084, "grad_norm": 262.65625, "learning_rate": 2.0687037606464554e-06, "loss": 20.9687, "step": 364730 }, { "epoch": 0.7367978765094922, "grad_norm": 207.8208465576172, "learning_rate": 2.0684209813396748e-06, "loss": 14.0187, "step": 364740 }, { "epoch": 0.7368180771421761, "grad_norm": 226.45315551757812, "learning_rate": 2.0681382163210533e-06, "loss": 13.6162, "step": 364750 }, { "epoch": 0.7368382777748599, "grad_norm": 310.6300048828125, "learning_rate": 2.0678554655919725e-06, "loss": 15.5037, "step": 364760 }, { "epoch": 0.7368584784075437, "grad_norm": 216.29624938964844, "learning_rate": 2.0675727291538068e-06, "loss": 17.7513, "step": 364770 }, { "epoch": 0.7368786790402275, "grad_norm": 273.3415222167969, "learning_rate": 2.0672900070079375e-06, "loss": 14.5422, "step": 364780 }, { "epoch": 0.7368988796729113, "grad_norm": 561.1387329101562, "learning_rate": 2.067007299155741e-06, "loss": 16.3474, "step": 364790 }, { "epoch": 0.7369190803055952, "grad_norm": 204.79673767089844, "learning_rate": 2.066724605598594e-06, "loss": 14.187, "step": 364800 }, { "epoch": 0.736939280938279, "grad_norm": 285.2713623046875, "learning_rate": 2.0664419263378764e-06, "loss": 17.4179, "step": 364810 }, { "epoch": 0.7369594815709628, "grad_norm": 185.80743408203125, "learning_rate": 2.066159261374964e-06, "loss": 18.6028, "step": 364820 }, { "epoch": 0.7369796822036466, "grad_norm": 134.344482421875, "learning_rate": 2.0658766107112367e-06, "loss": 23.4958, "step": 364830 }, { "epoch": 0.7369998828363304, "grad_norm": 301.8940734863281, "learning_rate": 2.0655939743480714e-06, "loss": 13.0005, "step": 364840 }, { "epoch": 0.7370200834690143, "grad_norm": 117.98628234863281, "learning_rate": 2.0653113522868427e-06, "loss": 9.7244, "step": 364850 }, { "epoch": 0.7370402841016981, "grad_norm": 435.515380859375, "learning_rate": 2.065028744528933e-06, "loss": 29.86, "step": 364860 }, { "epoch": 0.7370604847343819, "grad_norm": 345.5526428222656, "learning_rate": 2.0647461510757173e-06, "loss": 20.6785, "step": 364870 }, { "epoch": 0.7370806853670657, "grad_norm": 250.77130126953125, "learning_rate": 2.0644635719285704e-06, "loss": 18.6128, "step": 364880 }, { "epoch": 0.7371008859997495, "grad_norm": 208.97006225585938, "learning_rate": 2.064181007088873e-06, "loss": 15.1113, "step": 364890 }, { "epoch": 0.7371210866324334, "grad_norm": 346.6854553222656, "learning_rate": 2.063898456558002e-06, "loss": 25.05, "step": 364900 }, { "epoch": 0.7371412872651172, "grad_norm": 326.9173889160156, "learning_rate": 2.0636159203373344e-06, "loss": 30.0216, "step": 364910 }, { "epoch": 0.737161487897801, "grad_norm": 349.0940246582031, "learning_rate": 2.063333398428245e-06, "loss": 14.3607, "step": 364920 }, { "epoch": 0.7371816885304848, "grad_norm": 232.4935760498047, "learning_rate": 2.063050890832114e-06, "loss": 13.6735, "step": 364930 }, { "epoch": 0.7372018891631686, "grad_norm": 265.9580993652344, "learning_rate": 2.0627683975503165e-06, "loss": 13.1873, "step": 364940 }, { "epoch": 0.7372220897958524, "grad_norm": 277.0948486328125, "learning_rate": 2.0624859185842284e-06, "loss": 13.3561, "step": 364950 }, { "epoch": 0.7372422904285362, "grad_norm": 145.63168334960938, "learning_rate": 2.062203453935227e-06, "loss": 11.0036, "step": 364960 }, { "epoch": 0.73726249106122, "grad_norm": 138.93759155273438, "learning_rate": 2.0619210036046923e-06, "loss": 14.2802, "step": 364970 }, { "epoch": 0.7372826916939038, "grad_norm": 318.3196105957031, "learning_rate": 2.0616385675939977e-06, "loss": 14.774, "step": 364980 }, { "epoch": 0.7373028923265876, "grad_norm": 446.8403625488281, "learning_rate": 2.0613561459045184e-06, "loss": 24.2287, "step": 364990 }, { "epoch": 0.7373230929592715, "grad_norm": 229.9139862060547, "learning_rate": 2.061073738537635e-06, "loss": 20.3077, "step": 365000 }, { "epoch": 0.7373432935919553, "grad_norm": 221.96971130371094, "learning_rate": 2.0607913454947215e-06, "loss": 14.015, "step": 365010 }, { "epoch": 0.7373634942246391, "grad_norm": 38.09165573120117, "learning_rate": 2.060508966777153e-06, "loss": 10.6826, "step": 365020 }, { "epoch": 0.7373836948573229, "grad_norm": 215.7365264892578, "learning_rate": 2.0602266023863088e-06, "loss": 17.4002, "step": 365030 }, { "epoch": 0.7374038954900067, "grad_norm": 157.53614807128906, "learning_rate": 2.059944252323562e-06, "loss": 12.8178, "step": 365040 }, { "epoch": 0.7374240961226906, "grad_norm": 19.778242111206055, "learning_rate": 2.0596619165902916e-06, "loss": 16.7695, "step": 365050 }, { "epoch": 0.7374442967553744, "grad_norm": 228.9174346923828, "learning_rate": 2.059379595187871e-06, "loss": 27.6745, "step": 365060 }, { "epoch": 0.7374644973880582, "grad_norm": 348.86090087890625, "learning_rate": 2.0590972881176798e-06, "loss": 15.7967, "step": 365070 }, { "epoch": 0.737484698020742, "grad_norm": 247.96437072753906, "learning_rate": 2.058814995381091e-06, "loss": 11.8111, "step": 365080 }, { "epoch": 0.7375048986534258, "grad_norm": 238.9818878173828, "learning_rate": 2.0585327169794796e-06, "loss": 23.0752, "step": 365090 }, { "epoch": 0.7375250992861097, "grad_norm": 179.84596252441406, "learning_rate": 2.0582504529142248e-06, "loss": 22.9183, "step": 365100 }, { "epoch": 0.7375452999187935, "grad_norm": 636.6481323242188, "learning_rate": 2.0579682031866988e-06, "loss": 21.138, "step": 365110 }, { "epoch": 0.7375655005514773, "grad_norm": 1.6469379663467407, "learning_rate": 2.057685967798281e-06, "loss": 14.4998, "step": 365120 }, { "epoch": 0.7375857011841611, "grad_norm": 167.55844116210938, "learning_rate": 2.0574037467503444e-06, "loss": 15.8097, "step": 365130 }, { "epoch": 0.7376059018168449, "grad_norm": 247.66326904296875, "learning_rate": 2.0571215400442634e-06, "loss": 11.2414, "step": 365140 }, { "epoch": 0.7376261024495288, "grad_norm": 171.0535430908203, "learning_rate": 2.056839347681417e-06, "loss": 18.8929, "step": 365150 }, { "epoch": 0.7376463030822126, "grad_norm": 231.7176055908203, "learning_rate": 2.056557169663179e-06, "loss": 15.4994, "step": 365160 }, { "epoch": 0.7376665037148964, "grad_norm": 291.1667785644531, "learning_rate": 2.056275005990922e-06, "loss": 18.7266, "step": 365170 }, { "epoch": 0.7376867043475802, "grad_norm": 417.04205322265625, "learning_rate": 2.0559928566660235e-06, "loss": 19.2908, "step": 365180 }, { "epoch": 0.737706904980264, "grad_norm": 203.66758728027344, "learning_rate": 2.055710721689863e-06, "loss": 33.9966, "step": 365190 }, { "epoch": 0.7377271056129477, "grad_norm": 32.893314361572266, "learning_rate": 2.0554286010638076e-06, "loss": 27.7009, "step": 365200 }, { "epoch": 0.7377473062456316, "grad_norm": 540.2160034179688, "learning_rate": 2.055146494789236e-06, "loss": 23.3507, "step": 365210 }, { "epoch": 0.7377675068783154, "grad_norm": 221.48060607910156, "learning_rate": 2.0548644028675246e-06, "loss": 19.8652, "step": 365220 }, { "epoch": 0.7377877075109992, "grad_norm": 348.6289367675781, "learning_rate": 2.054582325300047e-06, "loss": 13.2348, "step": 365230 }, { "epoch": 0.737807908143683, "grad_norm": 270.948486328125, "learning_rate": 2.0543002620881764e-06, "loss": 12.5475, "step": 365240 }, { "epoch": 0.7378281087763668, "grad_norm": 423.3374328613281, "learning_rate": 2.0540182132332886e-06, "loss": 20.8072, "step": 365250 }, { "epoch": 0.7378483094090507, "grad_norm": 138.97349548339844, "learning_rate": 2.0537361787367625e-06, "loss": 15.6145, "step": 365260 }, { "epoch": 0.7378685100417345, "grad_norm": 189.7269287109375, "learning_rate": 2.0534541585999656e-06, "loss": 7.0584, "step": 365270 }, { "epoch": 0.7378887106744183, "grad_norm": 119.2812271118164, "learning_rate": 2.053172152824276e-06, "loss": 14.1877, "step": 365280 }, { "epoch": 0.7379089113071021, "grad_norm": 267.62152099609375, "learning_rate": 2.052890161411069e-06, "loss": 23.8224, "step": 365290 }, { "epoch": 0.7379291119397859, "grad_norm": 549.848876953125, "learning_rate": 2.0526081843617183e-06, "loss": 20.4111, "step": 365300 }, { "epoch": 0.7379493125724698, "grad_norm": 559.1074829101562, "learning_rate": 2.0523262216775962e-06, "loss": 21.0699, "step": 365310 }, { "epoch": 0.7379695132051536, "grad_norm": 146.25828552246094, "learning_rate": 2.0520442733600805e-06, "loss": 19.1974, "step": 365320 }, { "epoch": 0.7379897138378374, "grad_norm": 207.69581604003906, "learning_rate": 2.0517623394105427e-06, "loss": 20.4073, "step": 365330 }, { "epoch": 0.7380099144705212, "grad_norm": 241.86427307128906, "learning_rate": 2.0514804198303565e-06, "loss": 12.9895, "step": 365340 }, { "epoch": 0.738030115103205, "grad_norm": 267.7161865234375, "learning_rate": 2.0511985146208966e-06, "loss": 15.1163, "step": 365350 }, { "epoch": 0.7380503157358889, "grad_norm": 194.32603454589844, "learning_rate": 2.0509166237835398e-06, "loss": 30.665, "step": 365360 }, { "epoch": 0.7380705163685727, "grad_norm": 1056.5902099609375, "learning_rate": 2.0506347473196582e-06, "loss": 23.2665, "step": 365370 }, { "epoch": 0.7380907170012565, "grad_norm": 0.0, "learning_rate": 2.0503528852306225e-06, "loss": 13.6363, "step": 365380 }, { "epoch": 0.7381109176339403, "grad_norm": 161.00453186035156, "learning_rate": 2.0500710375178107e-06, "loss": 8.5267, "step": 365390 }, { "epoch": 0.7381311182666241, "grad_norm": 239.17218017578125, "learning_rate": 2.049789204182596e-06, "loss": 16.8064, "step": 365400 }, { "epoch": 0.738151318899308, "grad_norm": 421.1703796386719, "learning_rate": 2.0495073852263476e-06, "loss": 19.5393, "step": 365410 }, { "epoch": 0.7381715195319918, "grad_norm": 100.81103515625, "learning_rate": 2.0492255806504453e-06, "loss": 27.2007, "step": 365420 }, { "epoch": 0.7381917201646756, "grad_norm": 260.3039855957031, "learning_rate": 2.048943790456257e-06, "loss": 26.142, "step": 365430 }, { "epoch": 0.7382119207973594, "grad_norm": 309.89471435546875, "learning_rate": 2.0486620146451613e-06, "loss": 24.508, "step": 365440 }, { "epoch": 0.7382321214300432, "grad_norm": 187.27854919433594, "learning_rate": 2.0483802532185286e-06, "loss": 15.217, "step": 365450 }, { "epoch": 0.738252322062727, "grad_norm": 313.9341735839844, "learning_rate": 2.048098506177731e-06, "loss": 12.4372, "step": 365460 }, { "epoch": 0.7382725226954108, "grad_norm": 281.3721923828125, "learning_rate": 2.0478167735241443e-06, "loss": 21.7751, "step": 365470 }, { "epoch": 0.7382927233280946, "grad_norm": 167.46229553222656, "learning_rate": 2.0475350552591418e-06, "loss": 18.4169, "step": 365480 }, { "epoch": 0.7383129239607784, "grad_norm": 320.04168701171875, "learning_rate": 2.0472533513840923e-06, "loss": 16.0886, "step": 365490 }, { "epoch": 0.7383331245934622, "grad_norm": 39.24554443359375, "learning_rate": 2.046971661900373e-06, "loss": 14.832, "step": 365500 }, { "epoch": 0.738353325226146, "grad_norm": 275.777587890625, "learning_rate": 2.0466899868093566e-06, "loss": 17.8824, "step": 365510 }, { "epoch": 0.7383735258588299, "grad_norm": 244.69287109375, "learning_rate": 2.0464083261124156e-06, "loss": 23.7722, "step": 365520 }, { "epoch": 0.7383937264915137, "grad_norm": 234.90740966796875, "learning_rate": 2.04612667981092e-06, "loss": 22.5435, "step": 365530 }, { "epoch": 0.7384139271241975, "grad_norm": 329.1228942871094, "learning_rate": 2.0458450479062465e-06, "loss": 11.9775, "step": 365540 }, { "epoch": 0.7384341277568813, "grad_norm": 431.00616455078125, "learning_rate": 2.045563430399766e-06, "loss": 18.3387, "step": 365550 }, { "epoch": 0.7384543283895652, "grad_norm": 399.3988952636719, "learning_rate": 2.0452818272928493e-06, "loss": 15.4414, "step": 365560 }, { "epoch": 0.738474529022249, "grad_norm": 456.4241027832031, "learning_rate": 2.0450002385868707e-06, "loss": 20.6484, "step": 365570 }, { "epoch": 0.7384947296549328, "grad_norm": 287.249755859375, "learning_rate": 2.0447186642832063e-06, "loss": 10.968, "step": 365580 }, { "epoch": 0.7385149302876166, "grad_norm": 310.80712890625, "learning_rate": 2.0444371043832208e-06, "loss": 15.8585, "step": 365590 }, { "epoch": 0.7385351309203004, "grad_norm": 378.6942138671875, "learning_rate": 2.04415555888829e-06, "loss": 24.9382, "step": 365600 }, { "epoch": 0.7385553315529843, "grad_norm": 155.54159545898438, "learning_rate": 2.0438740277997888e-06, "loss": 20.0538, "step": 365610 }, { "epoch": 0.7385755321856681, "grad_norm": 80.82801818847656, "learning_rate": 2.0435925111190867e-06, "loss": 10.1502, "step": 365620 }, { "epoch": 0.7385957328183519, "grad_norm": 305.4777526855469, "learning_rate": 2.043311008847555e-06, "loss": 25.9568, "step": 365630 }, { "epoch": 0.7386159334510357, "grad_norm": 66.8805160522461, "learning_rate": 2.043029520986568e-06, "loss": 15.6075, "step": 365640 }, { "epoch": 0.7386361340837195, "grad_norm": 226.81167602539062, "learning_rate": 2.042748047537495e-06, "loss": 21.6082, "step": 365650 }, { "epoch": 0.7386563347164034, "grad_norm": 385.1241760253906, "learning_rate": 2.0424665885017114e-06, "loss": 18.1627, "step": 365660 }, { "epoch": 0.7386765353490872, "grad_norm": 107.81928253173828, "learning_rate": 2.0421851438805845e-06, "loss": 16.1762, "step": 365670 }, { "epoch": 0.738696735981771, "grad_norm": 124.99971771240234, "learning_rate": 2.0419037136754903e-06, "loss": 16.435, "step": 365680 }, { "epoch": 0.7387169366144548, "grad_norm": 365.0763854980469, "learning_rate": 2.0416222978877993e-06, "loss": 24.5573, "step": 365690 }, { "epoch": 0.7387371372471386, "grad_norm": 112.6429214477539, "learning_rate": 2.04134089651888e-06, "loss": 20.69, "step": 365700 }, { "epoch": 0.7387573378798225, "grad_norm": 174.94970703125, "learning_rate": 2.0410595095701084e-06, "loss": 23.3282, "step": 365710 }, { "epoch": 0.7387775385125062, "grad_norm": 343.23284912109375, "learning_rate": 2.040778137042852e-06, "loss": 17.56, "step": 365720 }, { "epoch": 0.73879773914519, "grad_norm": 274.7388000488281, "learning_rate": 2.040496778938486e-06, "loss": 17.9342, "step": 365730 }, { "epoch": 0.7388179397778738, "grad_norm": 255.76620483398438, "learning_rate": 2.04021543525838e-06, "loss": 28.3558, "step": 365740 }, { "epoch": 0.7388381404105576, "grad_norm": 237.38235473632812, "learning_rate": 2.0399341060039023e-06, "loss": 23.6198, "step": 365750 }, { "epoch": 0.7388583410432414, "grad_norm": 207.71348571777344, "learning_rate": 2.039652791176429e-06, "loss": 10.1718, "step": 365760 }, { "epoch": 0.7388785416759253, "grad_norm": 247.26116943359375, "learning_rate": 2.0393714907773294e-06, "loss": 19.9435, "step": 365770 }, { "epoch": 0.7388987423086091, "grad_norm": 352.3051452636719, "learning_rate": 2.0390902048079717e-06, "loss": 20.0924, "step": 365780 }, { "epoch": 0.7389189429412929, "grad_norm": 301.5779724121094, "learning_rate": 2.038808933269729e-06, "loss": 14.3689, "step": 365790 }, { "epoch": 0.7389391435739767, "grad_norm": 284.98895263671875, "learning_rate": 2.0385276761639768e-06, "loss": 7.6783, "step": 365800 }, { "epoch": 0.7389593442066605, "grad_norm": 368.13958740234375, "learning_rate": 2.0382464334920774e-06, "loss": 15.752, "step": 365810 }, { "epoch": 0.7389795448393444, "grad_norm": 77.63290405273438, "learning_rate": 2.037965205255406e-06, "loss": 9.4528, "step": 365820 }, { "epoch": 0.7389997454720282, "grad_norm": 122.74534606933594, "learning_rate": 2.037683991455334e-06, "loss": 25.9699, "step": 365830 }, { "epoch": 0.739019946104712, "grad_norm": 110.82786560058594, "learning_rate": 2.037402792093232e-06, "loss": 14.8682, "step": 365840 }, { "epoch": 0.7390401467373958, "grad_norm": 274.2793273925781, "learning_rate": 2.0371216071704667e-06, "loss": 17.1196, "step": 365850 }, { "epoch": 0.7390603473700796, "grad_norm": 442.5780944824219, "learning_rate": 2.036840436688412e-06, "loss": 20.0816, "step": 365860 }, { "epoch": 0.7390805480027635, "grad_norm": 156.7594757080078, "learning_rate": 2.036559280648441e-06, "loss": 25.1313, "step": 365870 }, { "epoch": 0.7391007486354473, "grad_norm": 199.4496307373047, "learning_rate": 2.036278139051917e-06, "loss": 19.0366, "step": 365880 }, { "epoch": 0.7391209492681311, "grad_norm": 301.6427307128906, "learning_rate": 2.0359970119002143e-06, "loss": 27.6711, "step": 365890 }, { "epoch": 0.7391411499008149, "grad_norm": 204.02413940429688, "learning_rate": 2.035715899194704e-06, "loss": 18.1012, "step": 365900 }, { "epoch": 0.7391613505334987, "grad_norm": 239.34986877441406, "learning_rate": 2.0354348009367554e-06, "loss": 14.6583, "step": 365910 }, { "epoch": 0.7391815511661826, "grad_norm": 254.1197509765625, "learning_rate": 2.0351537171277353e-06, "loss": 10.1444, "step": 365920 }, { "epoch": 0.7392017517988664, "grad_norm": 812.4669799804688, "learning_rate": 2.034872647769019e-06, "loss": 27.275, "step": 365930 }, { "epoch": 0.7392219524315502, "grad_norm": 472.6971435546875, "learning_rate": 2.0345915928619737e-06, "loss": 20.2201, "step": 365940 }, { "epoch": 0.739242153064234, "grad_norm": 8.073907852172852, "learning_rate": 2.034310552407967e-06, "loss": 11.8616, "step": 365950 }, { "epoch": 0.7392623536969178, "grad_norm": 451.08392333984375, "learning_rate": 2.0340295264083716e-06, "loss": 17.3742, "step": 365960 }, { "epoch": 0.7392825543296015, "grad_norm": 665.9612426757812, "learning_rate": 2.033748514864558e-06, "loss": 26.3067, "step": 365970 }, { "epoch": 0.7393027549622854, "grad_norm": 278.1483154296875, "learning_rate": 2.0334675177778938e-06, "loss": 28.0713, "step": 365980 }, { "epoch": 0.7393229555949692, "grad_norm": 33.099708557128906, "learning_rate": 2.033186535149748e-06, "loss": 23.295, "step": 365990 }, { "epoch": 0.739343156227653, "grad_norm": 262.3233947753906, "learning_rate": 2.0329055669814936e-06, "loss": 14.9861, "step": 366000 }, { "epoch": 0.7393633568603368, "grad_norm": 118.96928405761719, "learning_rate": 2.0326246132744963e-06, "loss": 19.6959, "step": 366010 }, { "epoch": 0.7393835574930206, "grad_norm": 541.4327392578125, "learning_rate": 2.0323436740301262e-06, "loss": 38.5022, "step": 366020 }, { "epoch": 0.7394037581257045, "grad_norm": 147.88461303710938, "learning_rate": 2.0320627492497543e-06, "loss": 8.2325, "step": 366030 }, { "epoch": 0.7394239587583883, "grad_norm": 236.9964599609375, "learning_rate": 2.0317818389347468e-06, "loss": 14.2466, "step": 366040 }, { "epoch": 0.7394441593910721, "grad_norm": 0.0, "learning_rate": 2.0315009430864762e-06, "loss": 13.3172, "step": 366050 }, { "epoch": 0.7394643600237559, "grad_norm": 160.79183959960938, "learning_rate": 2.03122006170631e-06, "loss": 16.8306, "step": 366060 }, { "epoch": 0.7394845606564397, "grad_norm": 444.9661865234375, "learning_rate": 2.030939194795616e-06, "loss": 19.8156, "step": 366070 }, { "epoch": 0.7395047612891236, "grad_norm": 258.55657958984375, "learning_rate": 2.030658342355765e-06, "loss": 12.6051, "step": 366080 }, { "epoch": 0.7395249619218074, "grad_norm": 80.04545593261719, "learning_rate": 2.030377504388126e-06, "loss": 18.1042, "step": 366090 }, { "epoch": 0.7395451625544912, "grad_norm": 260.5012512207031, "learning_rate": 2.030096680894065e-06, "loss": 14.9923, "step": 366100 }, { "epoch": 0.739565363187175, "grad_norm": 74.93181610107422, "learning_rate": 2.029815871874952e-06, "loss": 13.7342, "step": 366110 }, { "epoch": 0.7395855638198588, "grad_norm": 158.3104705810547, "learning_rate": 2.0295350773321583e-06, "loss": 15.5955, "step": 366120 }, { "epoch": 0.7396057644525427, "grad_norm": 112.11204528808594, "learning_rate": 2.0292542972670503e-06, "loss": 27.239, "step": 366130 }, { "epoch": 0.7396259650852265, "grad_norm": 54.970401763916016, "learning_rate": 2.0289735316809948e-06, "loss": 16.5304, "step": 366140 }, { "epoch": 0.7396461657179103, "grad_norm": 311.14642333984375, "learning_rate": 2.028692780575364e-06, "loss": 23.2708, "step": 366150 }, { "epoch": 0.7396663663505941, "grad_norm": 106.73009490966797, "learning_rate": 2.028412043951524e-06, "loss": 15.1041, "step": 366160 }, { "epoch": 0.739686566983278, "grad_norm": 391.684814453125, "learning_rate": 2.028131321810841e-06, "loss": 23.0826, "step": 366170 }, { "epoch": 0.7397067676159618, "grad_norm": 646.7919921875, "learning_rate": 2.027850614154686e-06, "loss": 23.9114, "step": 366180 }, { "epoch": 0.7397269682486456, "grad_norm": 288.9836120605469, "learning_rate": 2.0275699209844306e-06, "loss": 19.1593, "step": 366190 }, { "epoch": 0.7397471688813294, "grad_norm": 209.39369201660156, "learning_rate": 2.027289242301435e-06, "loss": 14.8877, "step": 366200 }, { "epoch": 0.7397673695140132, "grad_norm": 22.264484405517578, "learning_rate": 2.0270085781070707e-06, "loss": 17.2931, "step": 366210 }, { "epoch": 0.739787570146697, "grad_norm": 158.23672485351562, "learning_rate": 2.026727928402708e-06, "loss": 12.6629, "step": 366220 }, { "epoch": 0.7398077707793808, "grad_norm": 1172.7041015625, "learning_rate": 2.0264472931897135e-06, "loss": 24.6025, "step": 366230 }, { "epoch": 0.7398279714120646, "grad_norm": 368.38336181640625, "learning_rate": 2.026166672469451e-06, "loss": 20.6105, "step": 366240 }, { "epoch": 0.7398481720447484, "grad_norm": 61.520179748535156, "learning_rate": 2.0258860662432946e-06, "loss": 19.0537, "step": 366250 }, { "epoch": 0.7398683726774322, "grad_norm": 10.269258499145508, "learning_rate": 2.025605474512608e-06, "loss": 15.9425, "step": 366260 }, { "epoch": 0.739888573310116, "grad_norm": 153.625, "learning_rate": 2.025324897278758e-06, "loss": 17.617, "step": 366270 }, { "epoch": 0.7399087739427999, "grad_norm": 285.333251953125, "learning_rate": 2.0250443345431135e-06, "loss": 23.1137, "step": 366280 }, { "epoch": 0.7399289745754837, "grad_norm": 62.553775787353516, "learning_rate": 2.0247637863070446e-06, "loss": 17.1555, "step": 366290 }, { "epoch": 0.7399491752081675, "grad_norm": 243.1405487060547, "learning_rate": 2.0244832525719155e-06, "loss": 10.8034, "step": 366300 }, { "epoch": 0.7399693758408513, "grad_norm": 253.81822204589844, "learning_rate": 2.0242027333390924e-06, "loss": 42.0256, "step": 366310 }, { "epoch": 0.7399895764735351, "grad_norm": 243.43682861328125, "learning_rate": 2.0239222286099463e-06, "loss": 16.7683, "step": 366320 }, { "epoch": 0.740009777106219, "grad_norm": 269.8628234863281, "learning_rate": 2.0236417383858404e-06, "loss": 22.693, "step": 366330 }, { "epoch": 0.7400299777389028, "grad_norm": 161.45541381835938, "learning_rate": 2.0233612626681454e-06, "loss": 18.2236, "step": 366340 }, { "epoch": 0.7400501783715866, "grad_norm": 526.797119140625, "learning_rate": 2.0230808014582265e-06, "loss": 17.5325, "step": 366350 }, { "epoch": 0.7400703790042704, "grad_norm": 104.6729965209961, "learning_rate": 2.0228003547574488e-06, "loss": 25.3331, "step": 366360 }, { "epoch": 0.7400905796369542, "grad_norm": 0.0, "learning_rate": 2.022519922567183e-06, "loss": 9.1429, "step": 366370 }, { "epoch": 0.7401107802696381, "grad_norm": 345.9813232421875, "learning_rate": 2.0222395048887942e-06, "loss": 14.1053, "step": 366380 }, { "epoch": 0.7401309809023219, "grad_norm": 125.46451568603516, "learning_rate": 2.0219591017236474e-06, "loss": 21.4447, "step": 366390 }, { "epoch": 0.7401511815350057, "grad_norm": 300.88702392578125, "learning_rate": 2.02167871307311e-06, "loss": 18.8821, "step": 366400 }, { "epoch": 0.7401713821676895, "grad_norm": 269.1597900390625, "learning_rate": 2.021398338938552e-06, "loss": 26.6138, "step": 366410 }, { "epoch": 0.7401915828003733, "grad_norm": 604.0388793945312, "learning_rate": 2.021117979321336e-06, "loss": 18.7006, "step": 366420 }, { "epoch": 0.7402117834330572, "grad_norm": 112.74735260009766, "learning_rate": 2.0208376342228287e-06, "loss": 20.5085, "step": 366430 }, { "epoch": 0.740231984065741, "grad_norm": 316.6685791015625, "learning_rate": 2.0205573036443994e-06, "loss": 19.5005, "step": 366440 }, { "epoch": 0.7402521846984248, "grad_norm": 234.3267822265625, "learning_rate": 2.020276987587412e-06, "loss": 15.2741, "step": 366450 }, { "epoch": 0.7402723853311086, "grad_norm": 295.02899169921875, "learning_rate": 2.019996686053231e-06, "loss": 21.1693, "step": 366460 }, { "epoch": 0.7402925859637924, "grad_norm": 93.33123016357422, "learning_rate": 2.0197163990432256e-06, "loss": 19.6212, "step": 366470 }, { "epoch": 0.7403127865964761, "grad_norm": 159.46368408203125, "learning_rate": 2.0194361265587644e-06, "loss": 8.1293, "step": 366480 }, { "epoch": 0.74033298722916, "grad_norm": 193.46075439453125, "learning_rate": 2.019155868601206e-06, "loss": 7.9615, "step": 366490 }, { "epoch": 0.7403531878618438, "grad_norm": 206.31236267089844, "learning_rate": 2.0188756251719204e-06, "loss": 16.6118, "step": 366500 }, { "epoch": 0.7403733884945276, "grad_norm": 90.60697937011719, "learning_rate": 2.018595396272275e-06, "loss": 18.3829, "step": 366510 }, { "epoch": 0.7403935891272114, "grad_norm": 304.6546936035156, "learning_rate": 2.018315181903635e-06, "loss": 15.7765, "step": 366520 }, { "epoch": 0.7404137897598952, "grad_norm": 202.5250701904297, "learning_rate": 2.018034982067363e-06, "loss": 19.462, "step": 366530 }, { "epoch": 0.7404339903925791, "grad_norm": 114.65703582763672, "learning_rate": 2.0177547967648283e-06, "loss": 20.0481, "step": 366540 }, { "epoch": 0.7404541910252629, "grad_norm": 226.51736450195312, "learning_rate": 2.0174746259973956e-06, "loss": 10.1943, "step": 366550 }, { "epoch": 0.7404743916579467, "grad_norm": 554.5503540039062, "learning_rate": 2.0171944697664277e-06, "loss": 16.0153, "step": 366560 }, { "epoch": 0.7404945922906305, "grad_norm": 367.66064453125, "learning_rate": 2.0169143280732916e-06, "loss": 11.599, "step": 366570 }, { "epoch": 0.7405147929233143, "grad_norm": 267.1320495605469, "learning_rate": 2.016634200919356e-06, "loss": 14.3265, "step": 366580 }, { "epoch": 0.7405349935559982, "grad_norm": 0.0, "learning_rate": 2.016354088305983e-06, "loss": 12.8017, "step": 366590 }, { "epoch": 0.740555194188682, "grad_norm": 426.7953186035156, "learning_rate": 2.016073990234536e-06, "loss": 20.2186, "step": 366600 }, { "epoch": 0.7405753948213658, "grad_norm": 73.13876342773438, "learning_rate": 2.0157939067063848e-06, "loss": 20.2897, "step": 366610 }, { "epoch": 0.7405955954540496, "grad_norm": 361.85577392578125, "learning_rate": 2.0155138377228924e-06, "loss": 21.9846, "step": 366620 }, { "epoch": 0.7406157960867334, "grad_norm": 57.44482421875, "learning_rate": 2.0152337832854213e-06, "loss": 13.9314, "step": 366630 }, { "epoch": 0.7406359967194173, "grad_norm": 0.0, "learning_rate": 2.014953743395341e-06, "loss": 12.2749, "step": 366640 }, { "epoch": 0.7406561973521011, "grad_norm": 279.24456787109375, "learning_rate": 2.014673718054012e-06, "loss": 34.2237, "step": 366650 }, { "epoch": 0.7406763979847849, "grad_norm": 24.146780014038086, "learning_rate": 2.0143937072628033e-06, "loss": 13.0718, "step": 366660 }, { "epoch": 0.7406965986174687, "grad_norm": 429.49993896484375, "learning_rate": 2.014113711023077e-06, "loss": 18.8736, "step": 366670 }, { "epoch": 0.7407167992501525, "grad_norm": 467.516845703125, "learning_rate": 2.013833729336197e-06, "loss": 29.0995, "step": 366680 }, { "epoch": 0.7407369998828364, "grad_norm": 169.6893310546875, "learning_rate": 2.0135537622035313e-06, "loss": 13.7956, "step": 366690 }, { "epoch": 0.7407572005155202, "grad_norm": 253.6094512939453, "learning_rate": 2.0132738096264415e-06, "loss": 11.4353, "step": 366700 }, { "epoch": 0.740777401148204, "grad_norm": 195.2223358154297, "learning_rate": 2.0129938716062917e-06, "loss": 21.0559, "step": 366710 }, { "epoch": 0.7407976017808878, "grad_norm": 100.58228302001953, "learning_rate": 2.0127139481444475e-06, "loss": 21.3179, "step": 366720 }, { "epoch": 0.7408178024135716, "grad_norm": 203.38262939453125, "learning_rate": 2.0124340392422746e-06, "loss": 20.3868, "step": 366730 }, { "epoch": 0.7408380030462554, "grad_norm": 393.06842041015625, "learning_rate": 2.012154144901136e-06, "loss": 33.2973, "step": 366740 }, { "epoch": 0.7408582036789392, "grad_norm": 423.6712646484375, "learning_rate": 2.0118742651223944e-06, "loss": 17.7266, "step": 366750 }, { "epoch": 0.740878404311623, "grad_norm": 204.5248260498047, "learning_rate": 2.0115943999074167e-06, "loss": 10.1614, "step": 366760 }, { "epoch": 0.7408986049443068, "grad_norm": 154.24110412597656, "learning_rate": 2.011314549257565e-06, "loss": 19.0672, "step": 366770 }, { "epoch": 0.7409188055769906, "grad_norm": 189.6920166015625, "learning_rate": 2.0110347131742024e-06, "loss": 6.3045, "step": 366780 }, { "epoch": 0.7409390062096745, "grad_norm": 272.947998046875, "learning_rate": 2.0107548916586946e-06, "loss": 16.1232, "step": 366790 }, { "epoch": 0.7409592068423583, "grad_norm": 377.66229248046875, "learning_rate": 2.0104750847124075e-06, "loss": 13.0806, "step": 366800 }, { "epoch": 0.7409794074750421, "grad_norm": 188.9100799560547, "learning_rate": 2.010195292336699e-06, "loss": 23.162, "step": 366810 }, { "epoch": 0.7409996081077259, "grad_norm": 294.621337890625, "learning_rate": 2.0099155145329364e-06, "loss": 19.2804, "step": 366820 }, { "epoch": 0.7410198087404097, "grad_norm": 237.41400146484375, "learning_rate": 2.009635751302484e-06, "loss": 14.8057, "step": 366830 }, { "epoch": 0.7410400093730936, "grad_norm": 8.49656867980957, "learning_rate": 2.0093560026467046e-06, "loss": 13.9875, "step": 366840 }, { "epoch": 0.7410602100057774, "grad_norm": 169.50997924804688, "learning_rate": 2.0090762685669597e-06, "loss": 14.3442, "step": 366850 }, { "epoch": 0.7410804106384612, "grad_norm": 371.4494934082031, "learning_rate": 2.0087965490646144e-06, "loss": 21.4225, "step": 366860 }, { "epoch": 0.741100611271145, "grad_norm": 374.9836730957031, "learning_rate": 2.0085168441410352e-06, "loss": 13.1538, "step": 366870 }, { "epoch": 0.7411208119038288, "grad_norm": 59.40908432006836, "learning_rate": 2.0082371537975786e-06, "loss": 15.9524, "step": 366880 }, { "epoch": 0.7411410125365127, "grad_norm": 297.62969970703125, "learning_rate": 2.0079574780356116e-06, "loss": 22.4849, "step": 366890 }, { "epoch": 0.7411612131691965, "grad_norm": 390.44000244140625, "learning_rate": 2.007677816856498e-06, "loss": 12.569, "step": 366900 }, { "epoch": 0.7411814138018803, "grad_norm": 767.443603515625, "learning_rate": 2.0073981702616e-06, "loss": 30.7023, "step": 366910 }, { "epoch": 0.7412016144345641, "grad_norm": 292.87701416015625, "learning_rate": 2.007118538252279e-06, "loss": 23.736, "step": 366920 }, { "epoch": 0.7412218150672479, "grad_norm": 450.4876403808594, "learning_rate": 2.006838920829901e-06, "loss": 18.5403, "step": 366930 }, { "epoch": 0.7412420156999318, "grad_norm": 6.3739800453186035, "learning_rate": 2.0065593179958267e-06, "loss": 34.3997, "step": 366940 }, { "epoch": 0.7412622163326156, "grad_norm": 236.29052734375, "learning_rate": 2.0062797297514176e-06, "loss": 10.6745, "step": 366950 }, { "epoch": 0.7412824169652994, "grad_norm": 135.9730682373047, "learning_rate": 2.0060001560980395e-06, "loss": 25.962, "step": 366960 }, { "epoch": 0.7413026175979832, "grad_norm": 123.23271179199219, "learning_rate": 2.0057205970370524e-06, "loss": 25.3257, "step": 366970 }, { "epoch": 0.741322818230667, "grad_norm": 355.720947265625, "learning_rate": 2.0054410525698217e-06, "loss": 21.4028, "step": 366980 }, { "epoch": 0.7413430188633509, "grad_norm": 167.85791015625, "learning_rate": 2.0051615226977072e-06, "loss": 9.8572, "step": 366990 }, { "epoch": 0.7413632194960346, "grad_norm": 538.937255859375, "learning_rate": 2.0048820074220716e-06, "loss": 27.8741, "step": 367000 }, { "epoch": 0.7413834201287184, "grad_norm": 372.1699523925781, "learning_rate": 2.0046025067442788e-06, "loss": 20.5673, "step": 367010 }, { "epoch": 0.7414036207614022, "grad_norm": 1249.5499267578125, "learning_rate": 2.0043230206656884e-06, "loss": 27.8961, "step": 367020 }, { "epoch": 0.741423821394086, "grad_norm": 180.46141052246094, "learning_rate": 2.0040435491876666e-06, "loss": 13.8916, "step": 367030 }, { "epoch": 0.7414440220267698, "grad_norm": 202.12899780273438, "learning_rate": 2.0037640923115704e-06, "loss": 21.2558, "step": 367040 }, { "epoch": 0.7414642226594537, "grad_norm": 392.5320739746094, "learning_rate": 2.0034846500387674e-06, "loss": 12.3448, "step": 367050 }, { "epoch": 0.7414844232921375, "grad_norm": 32.71195983886719, "learning_rate": 2.003205222370616e-06, "loss": 23.2893, "step": 367060 }, { "epoch": 0.7415046239248213, "grad_norm": 233.7024383544922, "learning_rate": 2.0029258093084774e-06, "loss": 18.3875, "step": 367070 }, { "epoch": 0.7415248245575051, "grad_norm": 391.29132080078125, "learning_rate": 2.0026464108537153e-06, "loss": 18.953, "step": 367080 }, { "epoch": 0.7415450251901889, "grad_norm": 441.9764709472656, "learning_rate": 2.002367027007694e-06, "loss": 9.2161, "step": 367090 }, { "epoch": 0.7415652258228728, "grad_norm": 209.99954223632812, "learning_rate": 2.002087657771769e-06, "loss": 10.1851, "step": 367100 }, { "epoch": 0.7415854264555566, "grad_norm": 266.2446594238281, "learning_rate": 2.001808303147305e-06, "loss": 25.4493, "step": 367110 }, { "epoch": 0.7416056270882404, "grad_norm": 312.999755859375, "learning_rate": 2.0015289631356654e-06, "loss": 10.5166, "step": 367120 }, { "epoch": 0.7416258277209242, "grad_norm": 464.5867004394531, "learning_rate": 2.00124963773821e-06, "loss": 16.7711, "step": 367130 }, { "epoch": 0.741646028353608, "grad_norm": 231.7934112548828, "learning_rate": 2.000970326956299e-06, "loss": 17.1499, "step": 367140 }, { "epoch": 0.7416662289862919, "grad_norm": 285.97802734375, "learning_rate": 2.0006910307912965e-06, "loss": 52.8812, "step": 367150 }, { "epoch": 0.7416864296189757, "grad_norm": 243.31594848632812, "learning_rate": 2.0004117492445614e-06, "loss": 12.8612, "step": 367160 }, { "epoch": 0.7417066302516595, "grad_norm": 224.35581970214844, "learning_rate": 2.0001324823174544e-06, "loss": 25.5231, "step": 367170 }, { "epoch": 0.7417268308843433, "grad_norm": 308.7130432128906, "learning_rate": 1.9998532300113376e-06, "loss": 15.5439, "step": 367180 }, { "epoch": 0.7417470315170271, "grad_norm": 215.59402465820312, "learning_rate": 1.9995739923275743e-06, "loss": 21.0197, "step": 367190 }, { "epoch": 0.741767232149711, "grad_norm": 292.8240051269531, "learning_rate": 1.999294769267523e-06, "loss": 11.6652, "step": 367200 }, { "epoch": 0.7417874327823948, "grad_norm": 299.47064208984375, "learning_rate": 1.999015560832544e-06, "loss": 22.7522, "step": 367210 }, { "epoch": 0.7418076334150786, "grad_norm": 455.2286376953125, "learning_rate": 1.9987363670240006e-06, "loss": 18.3243, "step": 367220 }, { "epoch": 0.7418278340477624, "grad_norm": 657.6630249023438, "learning_rate": 1.998457187843252e-06, "loss": 20.4992, "step": 367230 }, { "epoch": 0.7418480346804462, "grad_norm": 256.2066345214844, "learning_rate": 1.998178023291657e-06, "loss": 13.7661, "step": 367240 }, { "epoch": 0.74186823531313, "grad_norm": 208.75985717773438, "learning_rate": 1.9978988733705807e-06, "loss": 16.5184, "step": 367250 }, { "epoch": 0.7418884359458138, "grad_norm": 1055.902587890625, "learning_rate": 1.997619738081379e-06, "loss": 20.5726, "step": 367260 }, { "epoch": 0.7419086365784976, "grad_norm": 243.648193359375, "learning_rate": 1.997340617425416e-06, "loss": 10.2253, "step": 367270 }, { "epoch": 0.7419288372111814, "grad_norm": 595.0977172851562, "learning_rate": 1.9970615114040514e-06, "loss": 15.8817, "step": 367280 }, { "epoch": 0.7419490378438652, "grad_norm": 407.83648681640625, "learning_rate": 1.9967824200186426e-06, "loss": 20.4996, "step": 367290 }, { "epoch": 0.741969238476549, "grad_norm": 221.29779052734375, "learning_rate": 1.996503343270554e-06, "loss": 11.8434, "step": 367300 }, { "epoch": 0.7419894391092329, "grad_norm": 251.33489990234375, "learning_rate": 1.9962242811611437e-06, "loss": 13.3403, "step": 367310 }, { "epoch": 0.7420096397419167, "grad_norm": 345.42535400390625, "learning_rate": 1.99594523369177e-06, "loss": 12.9572, "step": 367320 }, { "epoch": 0.7420298403746005, "grad_norm": 100.04777526855469, "learning_rate": 1.995666200863795e-06, "loss": 21.9641, "step": 367330 }, { "epoch": 0.7420500410072843, "grad_norm": 565.8916015625, "learning_rate": 1.9953871826785804e-06, "loss": 18.6878, "step": 367340 }, { "epoch": 0.7420702416399682, "grad_norm": 43.21929168701172, "learning_rate": 1.9951081791374843e-06, "loss": 18.6011, "step": 367350 }, { "epoch": 0.742090442272652, "grad_norm": 9.895845413208008, "learning_rate": 1.994829190241865e-06, "loss": 11.3782, "step": 367360 }, { "epoch": 0.7421106429053358, "grad_norm": 251.171142578125, "learning_rate": 1.9945502159930846e-06, "loss": 11.0881, "step": 367370 }, { "epoch": 0.7421308435380196, "grad_norm": 351.5343322753906, "learning_rate": 1.994271256392503e-06, "loss": 11.1921, "step": 367380 }, { "epoch": 0.7421510441707034, "grad_norm": 168.21499633789062, "learning_rate": 1.993992311441476e-06, "loss": 17.1055, "step": 367390 }, { "epoch": 0.7421712448033873, "grad_norm": 69.68181610107422, "learning_rate": 1.9937133811413666e-06, "loss": 16.3139, "step": 367400 }, { "epoch": 0.7421914454360711, "grad_norm": 173.83041381835938, "learning_rate": 1.9934344654935367e-06, "loss": 13.015, "step": 367410 }, { "epoch": 0.7422116460687549, "grad_norm": 296.4053955078125, "learning_rate": 1.9931555644993395e-06, "loss": 20.4323, "step": 367420 }, { "epoch": 0.7422318467014387, "grad_norm": 513.5421142578125, "learning_rate": 1.9928766781601366e-06, "loss": 21.2729, "step": 367430 }, { "epoch": 0.7422520473341225, "grad_norm": 244.1932373046875, "learning_rate": 1.9925978064772904e-06, "loss": 32.3145, "step": 367440 }, { "epoch": 0.7422722479668064, "grad_norm": 152.6263885498047, "learning_rate": 1.9923189494521576e-06, "loss": 12.8472, "step": 367450 }, { "epoch": 0.7422924485994902, "grad_norm": 230.7895050048828, "learning_rate": 1.9920401070860955e-06, "loss": 16.6771, "step": 367460 }, { "epoch": 0.742312649232174, "grad_norm": 160.8119354248047, "learning_rate": 1.991761279380466e-06, "loss": 12.8239, "step": 367470 }, { "epoch": 0.7423328498648578, "grad_norm": 335.1759338378906, "learning_rate": 1.9914824663366296e-06, "loss": 22.2577, "step": 367480 }, { "epoch": 0.7423530504975416, "grad_norm": 254.2704620361328, "learning_rate": 1.9912036679559397e-06, "loss": 18.7343, "step": 367490 }, { "epoch": 0.7423732511302255, "grad_norm": 397.2294616699219, "learning_rate": 1.990924884239758e-06, "loss": 14.7446, "step": 367500 }, { "epoch": 0.7423934517629092, "grad_norm": 316.7737121582031, "learning_rate": 1.990646115189446e-06, "loss": 20.0771, "step": 367510 }, { "epoch": 0.742413652395593, "grad_norm": 134.926025390625, "learning_rate": 1.990367360806359e-06, "loss": 11.968, "step": 367520 }, { "epoch": 0.7424338530282768, "grad_norm": 261.20159912109375, "learning_rate": 1.9900886210918547e-06, "loss": 17.8885, "step": 367530 }, { "epoch": 0.7424540536609606, "grad_norm": 146.17864990234375, "learning_rate": 1.989809896047295e-06, "loss": 24.1632, "step": 367540 }, { "epoch": 0.7424742542936444, "grad_norm": 337.9813537597656, "learning_rate": 1.989531185674037e-06, "loss": 26.4214, "step": 367550 }, { "epoch": 0.7424944549263283, "grad_norm": 263.51336669921875, "learning_rate": 1.989252489973438e-06, "loss": 10.7383, "step": 367560 }, { "epoch": 0.7425146555590121, "grad_norm": 502.2348327636719, "learning_rate": 1.988973808946858e-06, "loss": 11.2318, "step": 367570 }, { "epoch": 0.7425348561916959, "grad_norm": 304.49444580078125, "learning_rate": 1.988695142595653e-06, "loss": 8.0019, "step": 367580 }, { "epoch": 0.7425550568243797, "grad_norm": 236.69387817382812, "learning_rate": 1.988416490921184e-06, "loss": 12.1734, "step": 367590 }, { "epoch": 0.7425752574570635, "grad_norm": 467.0987548828125, "learning_rate": 1.988137853924808e-06, "loss": 32.6569, "step": 367600 }, { "epoch": 0.7425954580897474, "grad_norm": 392.0259704589844, "learning_rate": 1.9878592316078813e-06, "loss": 21.9704, "step": 367610 }, { "epoch": 0.7426156587224312, "grad_norm": 327.9149475097656, "learning_rate": 1.987580623971765e-06, "loss": 37.3804, "step": 367620 }, { "epoch": 0.742635859355115, "grad_norm": 366.4577331542969, "learning_rate": 1.987302031017814e-06, "loss": 21.7834, "step": 367630 }, { "epoch": 0.7426560599877988, "grad_norm": 178.47251892089844, "learning_rate": 1.9870234527473886e-06, "loss": 27.7447, "step": 367640 }, { "epoch": 0.7426762606204826, "grad_norm": 405.4564514160156, "learning_rate": 1.986744889161844e-06, "loss": 15.2619, "step": 367650 }, { "epoch": 0.7426964612531665, "grad_norm": 335.5322265625, "learning_rate": 1.986466340262541e-06, "loss": 16.0602, "step": 367660 }, { "epoch": 0.7427166618858503, "grad_norm": 104.83189392089844, "learning_rate": 1.9861878060508357e-06, "loss": 28.1249, "step": 367670 }, { "epoch": 0.7427368625185341, "grad_norm": 0.0, "learning_rate": 1.985909286528084e-06, "loss": 17.4133, "step": 367680 }, { "epoch": 0.7427570631512179, "grad_norm": 159.8107452392578, "learning_rate": 1.985630781695646e-06, "loss": 16.9388, "step": 367690 }, { "epoch": 0.7427772637839017, "grad_norm": 242.489990234375, "learning_rate": 1.9853522915548777e-06, "loss": 27.2193, "step": 367700 }, { "epoch": 0.7427974644165856, "grad_norm": 287.59765625, "learning_rate": 1.985073816107136e-06, "loss": 28.216, "step": 367710 }, { "epoch": 0.7428176650492694, "grad_norm": 182.03689575195312, "learning_rate": 1.984795355353778e-06, "loss": 23.0323, "step": 367720 }, { "epoch": 0.7428378656819532, "grad_norm": 455.4546203613281, "learning_rate": 1.9845169092961643e-06, "loss": 21.2279, "step": 367730 }, { "epoch": 0.742858066314637, "grad_norm": 221.86207580566406, "learning_rate": 1.984238477935649e-06, "loss": 16.6077, "step": 367740 }, { "epoch": 0.7428782669473208, "grad_norm": 200.2098388671875, "learning_rate": 1.9839600612735877e-06, "loss": 16.6375, "step": 367750 }, { "epoch": 0.7428984675800046, "grad_norm": 333.95086669921875, "learning_rate": 1.983681659311341e-06, "loss": 11.5182, "step": 367760 }, { "epoch": 0.7429186682126884, "grad_norm": 158.81553649902344, "learning_rate": 1.9834032720502646e-06, "loss": 22.653, "step": 367770 }, { "epoch": 0.7429388688453722, "grad_norm": 301.6739196777344, "learning_rate": 1.9831248994917123e-06, "loss": 22.8606, "step": 367780 }, { "epoch": 0.742959069478056, "grad_norm": 188.39089965820312, "learning_rate": 1.9828465416370434e-06, "loss": 10.0721, "step": 367790 }, { "epoch": 0.7429792701107398, "grad_norm": 231.16969299316406, "learning_rate": 1.9825681984876173e-06, "loss": 24.3137, "step": 367800 }, { "epoch": 0.7429994707434237, "grad_norm": 403.64111328125, "learning_rate": 1.982289870044787e-06, "loss": 24.8769, "step": 367810 }, { "epoch": 0.7430196713761075, "grad_norm": 133.44190979003906, "learning_rate": 1.982011556309908e-06, "loss": 8.1818, "step": 367820 }, { "epoch": 0.7430398720087913, "grad_norm": 290.5305480957031, "learning_rate": 1.9817332572843408e-06, "loss": 22.0711, "step": 367830 }, { "epoch": 0.7430600726414751, "grad_norm": 110.72178649902344, "learning_rate": 1.9814549729694395e-06, "loss": 10.8305, "step": 367840 }, { "epoch": 0.7430802732741589, "grad_norm": 239.3633575439453, "learning_rate": 1.9811767033665587e-06, "loss": 48.1588, "step": 367850 }, { "epoch": 0.7431004739068428, "grad_norm": 174.079345703125, "learning_rate": 1.9808984484770577e-06, "loss": 14.5616, "step": 367860 }, { "epoch": 0.7431206745395266, "grad_norm": 332.0126647949219, "learning_rate": 1.9806202083022906e-06, "loss": 14.3007, "step": 367870 }, { "epoch": 0.7431408751722104, "grad_norm": 130.88601684570312, "learning_rate": 1.980341982843616e-06, "loss": 16.0841, "step": 367880 }, { "epoch": 0.7431610758048942, "grad_norm": 239.69517517089844, "learning_rate": 1.980063772102388e-06, "loss": 11.4614, "step": 367890 }, { "epoch": 0.743181276437578, "grad_norm": 354.2300109863281, "learning_rate": 1.979785576079961e-06, "loss": 14.561, "step": 367900 }, { "epoch": 0.7432014770702619, "grad_norm": 83.34329223632812, "learning_rate": 1.9795073947776955e-06, "loss": 19.0726, "step": 367910 }, { "epoch": 0.7432216777029457, "grad_norm": 50.507164001464844, "learning_rate": 1.979229228196942e-06, "loss": 7.649, "step": 367920 }, { "epoch": 0.7432418783356295, "grad_norm": 158.787353515625, "learning_rate": 1.9789510763390605e-06, "loss": 17.9491, "step": 367930 }, { "epoch": 0.7432620789683133, "grad_norm": 148.45040893554688, "learning_rate": 1.978672939205404e-06, "loss": 12.1768, "step": 367940 }, { "epoch": 0.7432822796009971, "grad_norm": 170.5314483642578, "learning_rate": 1.9783948167973306e-06, "loss": 19.9886, "step": 367950 }, { "epoch": 0.743302480233681, "grad_norm": 192.61187744140625, "learning_rate": 1.9781167091161944e-06, "loss": 20.1661, "step": 367960 }, { "epoch": 0.7433226808663648, "grad_norm": 291.2362365722656, "learning_rate": 1.977838616163349e-06, "loss": 28.3776, "step": 367970 }, { "epoch": 0.7433428814990486, "grad_norm": 302.2829284667969, "learning_rate": 1.9775605379401534e-06, "loss": 16.0191, "step": 367980 }, { "epoch": 0.7433630821317324, "grad_norm": 433.32684326171875, "learning_rate": 1.9772824744479613e-06, "loss": 18.3496, "step": 367990 }, { "epoch": 0.7433832827644162, "grad_norm": 237.73533630371094, "learning_rate": 1.977004425688126e-06, "loss": 6.1527, "step": 368000 }, { "epoch": 0.7434034833971, "grad_norm": 166.23826599121094, "learning_rate": 1.9767263916620043e-06, "loss": 17.3147, "step": 368010 }, { "epoch": 0.7434236840297838, "grad_norm": 549.0132446289062, "learning_rate": 1.9764483723709555e-06, "loss": 20.4114, "step": 368020 }, { "epoch": 0.7434438846624676, "grad_norm": 183.91201782226562, "learning_rate": 1.9761703678163267e-06, "loss": 13.6363, "step": 368030 }, { "epoch": 0.7434640852951514, "grad_norm": 169.45643615722656, "learning_rate": 1.975892377999477e-06, "loss": 7.8362, "step": 368040 }, { "epoch": 0.7434842859278352, "grad_norm": 71.91312408447266, "learning_rate": 1.9756144029217626e-06, "loss": 17.6861, "step": 368050 }, { "epoch": 0.743504486560519, "grad_norm": 334.2380065917969, "learning_rate": 1.975336442584537e-06, "loss": 15.15, "step": 368060 }, { "epoch": 0.7435246871932029, "grad_norm": 234.39605712890625, "learning_rate": 1.975058496989153e-06, "loss": 9.6358, "step": 368070 }, { "epoch": 0.7435448878258867, "grad_norm": 211.35427856445312, "learning_rate": 1.9747805661369662e-06, "loss": 23.4997, "step": 368080 }, { "epoch": 0.7435650884585705, "grad_norm": 209.2626495361328, "learning_rate": 1.974502650029336e-06, "loss": 26.8, "step": 368090 }, { "epoch": 0.7435852890912543, "grad_norm": 19.723731994628906, "learning_rate": 1.97422474866761e-06, "loss": 18.9623, "step": 368100 }, { "epoch": 0.7436054897239381, "grad_norm": 429.3409729003906, "learning_rate": 1.9739468620531448e-06, "loss": 15.449, "step": 368110 }, { "epoch": 0.743625690356622, "grad_norm": 233.4496307373047, "learning_rate": 1.973668990187298e-06, "loss": 14.7837, "step": 368120 }, { "epoch": 0.7436458909893058, "grad_norm": 259.6336975097656, "learning_rate": 1.9733911330714213e-06, "loss": 15.0814, "step": 368130 }, { "epoch": 0.7436660916219896, "grad_norm": 420.91510009765625, "learning_rate": 1.973113290706867e-06, "loss": 14.4967, "step": 368140 }, { "epoch": 0.7436862922546734, "grad_norm": 55.218353271484375, "learning_rate": 1.9728354630949935e-06, "loss": 6.53, "step": 368150 }, { "epoch": 0.7437064928873572, "grad_norm": 193.62115478515625, "learning_rate": 1.972557650237153e-06, "loss": 14.1057, "step": 368160 }, { "epoch": 0.7437266935200411, "grad_norm": 420.7798156738281, "learning_rate": 1.972279852134697e-06, "loss": 19.6225, "step": 368170 }, { "epoch": 0.7437468941527249, "grad_norm": 325.1748352050781, "learning_rate": 1.972002068788984e-06, "loss": 9.4804, "step": 368180 }, { "epoch": 0.7437670947854087, "grad_norm": 238.03909301757812, "learning_rate": 1.9717243002013636e-06, "loss": 17.5486, "step": 368190 }, { "epoch": 0.7437872954180925, "grad_norm": 306.3053283691406, "learning_rate": 1.9714465463731934e-06, "loss": 26.5005, "step": 368200 }, { "epoch": 0.7438074960507763, "grad_norm": 411.758056640625, "learning_rate": 1.9711688073058262e-06, "loss": 21.2639, "step": 368210 }, { "epoch": 0.7438276966834602, "grad_norm": 341.78216552734375, "learning_rate": 1.9708910830006124e-06, "loss": 35.439, "step": 368220 }, { "epoch": 0.743847897316144, "grad_norm": 52.16340637207031, "learning_rate": 1.97061337345891e-06, "loss": 17.1439, "step": 368230 }, { "epoch": 0.7438680979488278, "grad_norm": 251.45828247070312, "learning_rate": 1.9703356786820687e-06, "loss": 14.1048, "step": 368240 }, { "epoch": 0.7438882985815116, "grad_norm": 378.696044921875, "learning_rate": 1.970057998671446e-06, "loss": 10.2073, "step": 368250 }, { "epoch": 0.7439084992141954, "grad_norm": 604.6239013671875, "learning_rate": 1.9697803334283906e-06, "loss": 20.4907, "step": 368260 }, { "epoch": 0.7439286998468791, "grad_norm": 194.32254028320312, "learning_rate": 1.9695026829542607e-06, "loss": 16.1047, "step": 368270 }, { "epoch": 0.743948900479563, "grad_norm": 291.6835632324219, "learning_rate": 1.969225047250407e-06, "loss": 17.6714, "step": 368280 }, { "epoch": 0.7439691011122468, "grad_norm": 250.54135131835938, "learning_rate": 1.9689474263181814e-06, "loss": 17.8419, "step": 368290 }, { "epoch": 0.7439893017449306, "grad_norm": 215.53341674804688, "learning_rate": 1.9686698201589395e-06, "loss": 17.8036, "step": 368300 }, { "epoch": 0.7440095023776144, "grad_norm": 494.6080017089844, "learning_rate": 1.968392228774034e-06, "loss": 18.3013, "step": 368310 }, { "epoch": 0.7440297030102982, "grad_norm": 125.88616943359375, "learning_rate": 1.968114652164815e-06, "loss": 12.3066, "step": 368320 }, { "epoch": 0.7440499036429821, "grad_norm": 133.43788146972656, "learning_rate": 1.967837090332637e-06, "loss": 12.3462, "step": 368330 }, { "epoch": 0.7440701042756659, "grad_norm": 241.65093994140625, "learning_rate": 1.967559543278856e-06, "loss": 22.3677, "step": 368340 }, { "epoch": 0.7440903049083497, "grad_norm": 207.7406768798828, "learning_rate": 1.9672820110048207e-06, "loss": 14.6837, "step": 368350 }, { "epoch": 0.7441105055410335, "grad_norm": 412.1327209472656, "learning_rate": 1.967004493511884e-06, "loss": 28.8685, "step": 368360 }, { "epoch": 0.7441307061737173, "grad_norm": 278.62506103515625, "learning_rate": 1.966726990801402e-06, "loss": 12.9305, "step": 368370 }, { "epoch": 0.7441509068064012, "grad_norm": 13.271073341369629, "learning_rate": 1.966449502874724e-06, "loss": 8.3623, "step": 368380 }, { "epoch": 0.744171107439085, "grad_norm": 3.3363125324249268, "learning_rate": 1.9661720297332014e-06, "loss": 27.7611, "step": 368390 }, { "epoch": 0.7441913080717688, "grad_norm": 199.39918518066406, "learning_rate": 1.9658945713781883e-06, "loss": 22.7746, "step": 368400 }, { "epoch": 0.7442115087044526, "grad_norm": 156.12396240234375, "learning_rate": 1.9656171278110394e-06, "loss": 17.1862, "step": 368410 }, { "epoch": 0.7442317093371364, "grad_norm": 450.0686950683594, "learning_rate": 1.9653396990331043e-06, "loss": 44.503, "step": 368420 }, { "epoch": 0.7442519099698203, "grad_norm": 330.6335754394531, "learning_rate": 1.965062285045733e-06, "loss": 23.2586, "step": 368430 }, { "epoch": 0.7442721106025041, "grad_norm": 185.37367248535156, "learning_rate": 1.9647848858502825e-06, "loss": 14.6242, "step": 368440 }, { "epoch": 0.7442923112351879, "grad_norm": 272.1004943847656, "learning_rate": 1.9645075014481024e-06, "loss": 14.2629, "step": 368450 }, { "epoch": 0.7443125118678717, "grad_norm": 32.35946273803711, "learning_rate": 1.964230131840543e-06, "loss": 12.5041, "step": 368460 }, { "epoch": 0.7443327125005555, "grad_norm": 221.45089721679688, "learning_rate": 1.9639527770289586e-06, "loss": 11.6113, "step": 368470 }, { "epoch": 0.7443529131332394, "grad_norm": 313.3411865234375, "learning_rate": 1.9636754370146987e-06, "loss": 14.9058, "step": 368480 }, { "epoch": 0.7443731137659232, "grad_norm": 213.5787353515625, "learning_rate": 1.9633981117991186e-06, "loss": 18.3837, "step": 368490 }, { "epoch": 0.744393314398607, "grad_norm": 284.8614501953125, "learning_rate": 1.9631208013835677e-06, "loss": 9.8215, "step": 368500 }, { "epoch": 0.7444135150312908, "grad_norm": 34.31322479248047, "learning_rate": 1.9628435057693963e-06, "loss": 10.7396, "step": 368510 }, { "epoch": 0.7444337156639746, "grad_norm": 389.54608154296875, "learning_rate": 1.9625662249579586e-06, "loss": 17.5919, "step": 368520 }, { "epoch": 0.7444539162966584, "grad_norm": 459.6904602050781, "learning_rate": 1.962288958950603e-06, "loss": 11.7468, "step": 368530 }, { "epoch": 0.7444741169293422, "grad_norm": 336.3078918457031, "learning_rate": 1.9620117077486838e-06, "loss": 14.0847, "step": 368540 }, { "epoch": 0.744494317562026, "grad_norm": 379.08367919921875, "learning_rate": 1.9617344713535503e-06, "loss": 12.4333, "step": 368550 }, { "epoch": 0.7445145181947098, "grad_norm": 102.53549194335938, "learning_rate": 1.9614572497665555e-06, "loss": 39.4696, "step": 368560 }, { "epoch": 0.7445347188273936, "grad_norm": 488.2542419433594, "learning_rate": 1.9611800429890497e-06, "loss": 19.3227, "step": 368570 }, { "epoch": 0.7445549194600775, "grad_norm": 135.92665100097656, "learning_rate": 1.960902851022382e-06, "loss": 24.2319, "step": 368580 }, { "epoch": 0.7445751200927613, "grad_norm": 295.5620422363281, "learning_rate": 1.9606256738679074e-06, "loss": 13.0005, "step": 368590 }, { "epoch": 0.7445953207254451, "grad_norm": 469.6124267578125, "learning_rate": 1.9603485115269743e-06, "loss": 26.0127, "step": 368600 }, { "epoch": 0.7446155213581289, "grad_norm": 106.21295928955078, "learning_rate": 1.960071364000932e-06, "loss": 13.2503, "step": 368610 }, { "epoch": 0.7446357219908127, "grad_norm": 275.1622314453125, "learning_rate": 1.959794231291134e-06, "loss": 13.4812, "step": 368620 }, { "epoch": 0.7446559226234966, "grad_norm": 167.83338928222656, "learning_rate": 1.959517113398933e-06, "loss": 15.3316, "step": 368630 }, { "epoch": 0.7446761232561804, "grad_norm": 744.9295043945312, "learning_rate": 1.959240010325673e-06, "loss": 20.192, "step": 368640 }, { "epoch": 0.7446963238888642, "grad_norm": 549.769775390625, "learning_rate": 1.958962922072709e-06, "loss": 23.4067, "step": 368650 }, { "epoch": 0.744716524521548, "grad_norm": 193.88809204101562, "learning_rate": 1.9586858486413923e-06, "loss": 14.1112, "step": 368660 }, { "epoch": 0.7447367251542318, "grad_norm": 495.541748046875, "learning_rate": 1.958408790033072e-06, "loss": 34.2071, "step": 368670 }, { "epoch": 0.7447569257869157, "grad_norm": 269.2591552734375, "learning_rate": 1.958131746249097e-06, "loss": 14.1928, "step": 368680 }, { "epoch": 0.7447771264195995, "grad_norm": 177.7523651123047, "learning_rate": 1.9578547172908185e-06, "loss": 26.1636, "step": 368690 }, { "epoch": 0.7447973270522833, "grad_norm": 388.0298767089844, "learning_rate": 1.9575777031595906e-06, "loss": 18.1299, "step": 368700 }, { "epoch": 0.7448175276849671, "grad_norm": 593.1123657226562, "learning_rate": 1.9573007038567565e-06, "loss": 10.6442, "step": 368710 }, { "epoch": 0.7448377283176509, "grad_norm": 0.0, "learning_rate": 1.95702371938367e-06, "loss": 12.2218, "step": 368720 }, { "epoch": 0.7448579289503348, "grad_norm": 365.4989929199219, "learning_rate": 1.956746749741682e-06, "loss": 9.6673, "step": 368730 }, { "epoch": 0.7448781295830186, "grad_norm": 246.7922821044922, "learning_rate": 1.9564697949321417e-06, "loss": 13.5259, "step": 368740 }, { "epoch": 0.7448983302157024, "grad_norm": 517.4268188476562, "learning_rate": 1.956192854956397e-06, "loss": 30.5942, "step": 368750 }, { "epoch": 0.7449185308483862, "grad_norm": 71.61907958984375, "learning_rate": 1.9559159298158e-06, "loss": 14.2901, "step": 368760 }, { "epoch": 0.74493873148107, "grad_norm": 1503.7191162109375, "learning_rate": 1.9556390195117004e-06, "loss": 20.7141, "step": 368770 }, { "epoch": 0.7449589321137539, "grad_norm": 24.238157272338867, "learning_rate": 1.955362124045445e-06, "loss": 20.4478, "step": 368780 }, { "epoch": 0.7449791327464376, "grad_norm": 297.56341552734375, "learning_rate": 1.955085243418387e-06, "loss": 23.9189, "step": 368790 }, { "epoch": 0.7449993333791214, "grad_norm": 309.5731506347656, "learning_rate": 1.9548083776318727e-06, "loss": 10.9845, "step": 368800 }, { "epoch": 0.7450195340118052, "grad_norm": 429.45355224609375, "learning_rate": 1.9545315266872545e-06, "loss": 15.845, "step": 368810 }, { "epoch": 0.745039734644489, "grad_norm": 489.4195861816406, "learning_rate": 1.95425469058588e-06, "loss": 16.6035, "step": 368820 }, { "epoch": 0.7450599352771728, "grad_norm": 777.9450073242188, "learning_rate": 1.9539778693290976e-06, "loss": 12.4995, "step": 368830 }, { "epoch": 0.7450801359098567, "grad_norm": 258.9618835449219, "learning_rate": 1.953701062918259e-06, "loss": 18.9143, "step": 368840 }, { "epoch": 0.7451003365425405, "grad_norm": 0.0, "learning_rate": 1.95342427135471e-06, "loss": 21.1056, "step": 368850 }, { "epoch": 0.7451205371752243, "grad_norm": 261.5935363769531, "learning_rate": 1.953147494639804e-06, "loss": 15.712, "step": 368860 }, { "epoch": 0.7451407378079081, "grad_norm": 559.705810546875, "learning_rate": 1.9528707327748853e-06, "loss": 35.6598, "step": 368870 }, { "epoch": 0.745160938440592, "grad_norm": 57.239009857177734, "learning_rate": 1.9525939857613075e-06, "loss": 15.357, "step": 368880 }, { "epoch": 0.7451811390732758, "grad_norm": 458.3349304199219, "learning_rate": 1.9523172536004165e-06, "loss": 10.0815, "step": 368890 }, { "epoch": 0.7452013397059596, "grad_norm": 69.7030029296875, "learning_rate": 1.95204053629356e-06, "loss": 15.8566, "step": 368900 }, { "epoch": 0.7452215403386434, "grad_norm": 401.9903869628906, "learning_rate": 1.9517638338420898e-06, "loss": 16.2265, "step": 368910 }, { "epoch": 0.7452417409713272, "grad_norm": 363.6653137207031, "learning_rate": 1.9514871462473527e-06, "loss": 17.1667, "step": 368920 }, { "epoch": 0.745261941604011, "grad_norm": 76.08296966552734, "learning_rate": 1.951210473510696e-06, "loss": 20.7524, "step": 368930 }, { "epoch": 0.7452821422366949, "grad_norm": 527.6973266601562, "learning_rate": 1.9509338156334695e-06, "loss": 26.2898, "step": 368940 }, { "epoch": 0.7453023428693787, "grad_norm": 367.317626953125, "learning_rate": 1.9506571726170236e-06, "loss": 16.4667, "step": 368950 }, { "epoch": 0.7453225435020625, "grad_norm": 514.7332763671875, "learning_rate": 1.9503805444627054e-06, "loss": 20.7721, "step": 368960 }, { "epoch": 0.7453427441347463, "grad_norm": 767.1329956054688, "learning_rate": 1.95010393117186e-06, "loss": 32.5805, "step": 368970 }, { "epoch": 0.7453629447674301, "grad_norm": 233.0145721435547, "learning_rate": 1.9498273327458405e-06, "loss": 14.4523, "step": 368980 }, { "epoch": 0.745383145400114, "grad_norm": 20.302433013916016, "learning_rate": 1.9495507491859922e-06, "loss": 19.0464, "step": 368990 }, { "epoch": 0.7454033460327978, "grad_norm": 278.6795654296875, "learning_rate": 1.9492741804936623e-06, "loss": 9.3105, "step": 369000 }, { "epoch": 0.7454235466654816, "grad_norm": 98.70714569091797, "learning_rate": 1.9489976266702e-06, "loss": 17.8488, "step": 369010 }, { "epoch": 0.7454437472981654, "grad_norm": 277.7071838378906, "learning_rate": 1.9487210877169545e-06, "loss": 13.715, "step": 369020 }, { "epoch": 0.7454639479308492, "grad_norm": 231.84506225585938, "learning_rate": 1.9484445636352724e-06, "loss": 14.0661, "step": 369030 }, { "epoch": 0.745484148563533, "grad_norm": 689.6864013671875, "learning_rate": 1.9481680544264995e-06, "loss": 24.1357, "step": 369040 }, { "epoch": 0.7455043491962168, "grad_norm": 461.5655822753906, "learning_rate": 1.9478915600919877e-06, "loss": 20.8549, "step": 369050 }, { "epoch": 0.7455245498289006, "grad_norm": 137.40443420410156, "learning_rate": 1.9476150806330816e-06, "loss": 9.2373, "step": 369060 }, { "epoch": 0.7455447504615844, "grad_norm": 117.66206359863281, "learning_rate": 1.947338616051129e-06, "loss": 11.3088, "step": 369070 }, { "epoch": 0.7455649510942682, "grad_norm": 554.678466796875, "learning_rate": 1.947062166347478e-06, "loss": 26.727, "step": 369080 }, { "epoch": 0.7455851517269521, "grad_norm": 335.57293701171875, "learning_rate": 1.9467857315234746e-06, "loss": 22.4823, "step": 369090 }, { "epoch": 0.7456053523596359, "grad_norm": 295.3412780761719, "learning_rate": 1.946509311580469e-06, "loss": 19.2833, "step": 369100 }, { "epoch": 0.7456255529923197, "grad_norm": 486.77728271484375, "learning_rate": 1.946232906519806e-06, "loss": 24.5029, "step": 369110 }, { "epoch": 0.7456457536250035, "grad_norm": 110.87494659423828, "learning_rate": 1.9459565163428322e-06, "loss": 23.9253, "step": 369120 }, { "epoch": 0.7456659542576873, "grad_norm": 272.86224365234375, "learning_rate": 1.945680141050898e-06, "loss": 16.7559, "step": 369130 }, { "epoch": 0.7456861548903712, "grad_norm": 52.24162673950195, "learning_rate": 1.945403780645346e-06, "loss": 4.5544, "step": 369140 }, { "epoch": 0.745706355523055, "grad_norm": 51.346282958984375, "learning_rate": 1.945127435127528e-06, "loss": 22.2818, "step": 369150 }, { "epoch": 0.7457265561557388, "grad_norm": 409.6199951171875, "learning_rate": 1.9448511044987862e-06, "loss": 15.4519, "step": 369160 }, { "epoch": 0.7457467567884226, "grad_norm": 371.30706787109375, "learning_rate": 1.944574788760471e-06, "loss": 26.0713, "step": 369170 }, { "epoch": 0.7457669574211064, "grad_norm": 295.7040710449219, "learning_rate": 1.944298487913928e-06, "loss": 58.7002, "step": 369180 }, { "epoch": 0.7457871580537903, "grad_norm": 291.29443359375, "learning_rate": 1.9440222019605022e-06, "loss": 24.1389, "step": 369190 }, { "epoch": 0.7458073586864741, "grad_norm": 0.9337621331214905, "learning_rate": 1.9437459309015426e-06, "loss": 6.0835, "step": 369200 }, { "epoch": 0.7458275593191579, "grad_norm": 0.0, "learning_rate": 1.9434696747383946e-06, "loss": 17.0467, "step": 369210 }, { "epoch": 0.7458477599518417, "grad_norm": 253.93679809570312, "learning_rate": 1.9431934334724035e-06, "loss": 20.108, "step": 369220 }, { "epoch": 0.7458679605845255, "grad_norm": 228.84030151367188, "learning_rate": 1.942917207104917e-06, "loss": 20.9286, "step": 369230 }, { "epoch": 0.7458881612172094, "grad_norm": 188.84446716308594, "learning_rate": 1.942640995637284e-06, "loss": 18.11, "step": 369240 }, { "epoch": 0.7459083618498932, "grad_norm": 161.1903533935547, "learning_rate": 1.942364799070845e-06, "loss": 30.2066, "step": 369250 }, { "epoch": 0.745928562482577, "grad_norm": 279.9927673339844, "learning_rate": 1.9420886174069486e-06, "loss": 18.2402, "step": 369260 }, { "epoch": 0.7459487631152608, "grad_norm": 361.8961181640625, "learning_rate": 1.9418124506469437e-06, "loss": 15.0564, "step": 369270 }, { "epoch": 0.7459689637479446, "grad_norm": 283.78271484375, "learning_rate": 1.9415362987921737e-06, "loss": 15.0902, "step": 369280 }, { "epoch": 0.7459891643806285, "grad_norm": 62.40973663330078, "learning_rate": 1.9412601618439834e-06, "loss": 18.2081, "step": 369290 }, { "epoch": 0.7460093650133122, "grad_norm": 280.6089782714844, "learning_rate": 1.94098403980372e-06, "loss": 24.2679, "step": 369300 }, { "epoch": 0.746029565645996, "grad_norm": 396.5609436035156, "learning_rate": 1.940707932672733e-06, "loss": 8.5066, "step": 369310 }, { "epoch": 0.7460497662786798, "grad_norm": 225.30154418945312, "learning_rate": 1.9404318404523605e-06, "loss": 15.7426, "step": 369320 }, { "epoch": 0.7460699669113636, "grad_norm": 22.356210708618164, "learning_rate": 1.9401557631439526e-06, "loss": 27.1646, "step": 369330 }, { "epoch": 0.7460901675440474, "grad_norm": 238.75132751464844, "learning_rate": 1.939879700748856e-06, "loss": 13.3898, "step": 369340 }, { "epoch": 0.7461103681767313, "grad_norm": 98.89534759521484, "learning_rate": 1.939603653268414e-06, "loss": 74.2161, "step": 369350 }, { "epoch": 0.7461305688094151, "grad_norm": 152.38438415527344, "learning_rate": 1.9393276207039717e-06, "loss": 16.5454, "step": 369360 }, { "epoch": 0.7461507694420989, "grad_norm": 355.16668701171875, "learning_rate": 1.9390516030568767e-06, "loss": 18.917, "step": 369370 }, { "epoch": 0.7461709700747827, "grad_norm": 95.01445770263672, "learning_rate": 1.938775600328473e-06, "loss": 13.3871, "step": 369380 }, { "epoch": 0.7461911707074665, "grad_norm": 372.54266357421875, "learning_rate": 1.9384996125201045e-06, "loss": 23.0742, "step": 369390 }, { "epoch": 0.7462113713401504, "grad_norm": 192.01544189453125, "learning_rate": 1.938223639633119e-06, "loss": 28.5114, "step": 369400 }, { "epoch": 0.7462315719728342, "grad_norm": 264.236572265625, "learning_rate": 1.937947681668858e-06, "loss": 16.9165, "step": 369410 }, { "epoch": 0.746251772605518, "grad_norm": 128.19076538085938, "learning_rate": 1.9376717386286703e-06, "loss": 15.9717, "step": 369420 }, { "epoch": 0.7462719732382018, "grad_norm": 528.9849853515625, "learning_rate": 1.9373958105138997e-06, "loss": 14.4122, "step": 369430 }, { "epoch": 0.7462921738708856, "grad_norm": 491.97552490234375, "learning_rate": 1.937119897325889e-06, "loss": 16.5629, "step": 369440 }, { "epoch": 0.7463123745035695, "grad_norm": 261.1350402832031, "learning_rate": 1.936843999065985e-06, "loss": 28.4189, "step": 369450 }, { "epoch": 0.7463325751362533, "grad_norm": 7.200567722320557, "learning_rate": 1.936568115735531e-06, "loss": 26.9393, "step": 369460 }, { "epoch": 0.7463527757689371, "grad_norm": 324.49920654296875, "learning_rate": 1.9362922473358735e-06, "loss": 19.7749, "step": 369470 }, { "epoch": 0.7463729764016209, "grad_norm": 318.5406188964844, "learning_rate": 1.936016393868355e-06, "loss": 34.8937, "step": 369480 }, { "epoch": 0.7463931770343047, "grad_norm": 451.1589050292969, "learning_rate": 1.9357405553343224e-06, "loss": 16.9858, "step": 369490 }, { "epoch": 0.7464133776669886, "grad_norm": 139.7960205078125, "learning_rate": 1.9354647317351187e-06, "loss": 10.4283, "step": 369500 }, { "epoch": 0.7464335782996724, "grad_norm": 116.3575210571289, "learning_rate": 1.9351889230720866e-06, "loss": 7.8481, "step": 369510 }, { "epoch": 0.7464537789323562, "grad_norm": 344.3850402832031, "learning_rate": 1.9349131293465732e-06, "loss": 13.7909, "step": 369520 }, { "epoch": 0.74647397956504, "grad_norm": 253.46810913085938, "learning_rate": 1.934637350559922e-06, "loss": 29.8886, "step": 369530 }, { "epoch": 0.7464941801977238, "grad_norm": 314.2491760253906, "learning_rate": 1.9343615867134748e-06, "loss": 23.6937, "step": 369540 }, { "epoch": 0.7465143808304076, "grad_norm": 208.8568572998047, "learning_rate": 1.9340858378085777e-06, "loss": 18.0877, "step": 369550 }, { "epoch": 0.7465345814630914, "grad_norm": 391.9168701171875, "learning_rate": 1.933810103846575e-06, "loss": 20.8511, "step": 369560 }, { "epoch": 0.7465547820957752, "grad_norm": 409.0274658203125, "learning_rate": 1.933534384828811e-06, "loss": 21.2901, "step": 369570 }, { "epoch": 0.746574982728459, "grad_norm": 47.9107551574707, "learning_rate": 1.933258680756627e-06, "loss": 13.116, "step": 369580 }, { "epoch": 0.7465951833611428, "grad_norm": 201.5399169921875, "learning_rate": 1.9329829916313684e-06, "loss": 9.0202, "step": 369590 }, { "epoch": 0.7466153839938267, "grad_norm": 140.60545349121094, "learning_rate": 1.93270731745438e-06, "loss": 26.1781, "step": 369600 }, { "epoch": 0.7466355846265105, "grad_norm": 406.8057556152344, "learning_rate": 1.9324316582270025e-06, "loss": 12.8992, "step": 369610 }, { "epoch": 0.7466557852591943, "grad_norm": 349.28631591796875, "learning_rate": 1.93215601395058e-06, "loss": 19.4778, "step": 369620 }, { "epoch": 0.7466759858918781, "grad_norm": 273.364013671875, "learning_rate": 1.93188038462646e-06, "loss": 18.114, "step": 369630 }, { "epoch": 0.7466961865245619, "grad_norm": 672.4388427734375, "learning_rate": 1.931604770255982e-06, "loss": 22.5418, "step": 369640 }, { "epoch": 0.7467163871572458, "grad_norm": 218.03318786621094, "learning_rate": 1.9313291708404885e-06, "loss": 10.9196, "step": 369650 }, { "epoch": 0.7467365877899296, "grad_norm": 391.0922546386719, "learning_rate": 1.9310535863813266e-06, "loss": 19.0975, "step": 369660 }, { "epoch": 0.7467567884226134, "grad_norm": 298.487548828125, "learning_rate": 1.9307780168798374e-06, "loss": 17.0633, "step": 369670 }, { "epoch": 0.7467769890552972, "grad_norm": 111.75115966796875, "learning_rate": 1.930502462337362e-06, "loss": 15.8402, "step": 369680 }, { "epoch": 0.746797189687981, "grad_norm": 353.9564208984375, "learning_rate": 1.9302269227552465e-06, "loss": 21.9925, "step": 369690 }, { "epoch": 0.7468173903206649, "grad_norm": 160.16566467285156, "learning_rate": 1.929951398134832e-06, "loss": 19.7594, "step": 369700 }, { "epoch": 0.7468375909533487, "grad_norm": 93.86961364746094, "learning_rate": 1.9296758884774624e-06, "loss": 15.7284, "step": 369710 }, { "epoch": 0.7468577915860325, "grad_norm": 290.2197265625, "learning_rate": 1.9294003937844806e-06, "loss": 18.6405, "step": 369720 }, { "epoch": 0.7468779922187163, "grad_norm": 323.9588928222656, "learning_rate": 1.9291249140572275e-06, "loss": 17.3232, "step": 369730 }, { "epoch": 0.7468981928514001, "grad_norm": 696.0647583007812, "learning_rate": 1.9288494492970487e-06, "loss": 18.776, "step": 369740 }, { "epoch": 0.746918393484084, "grad_norm": 112.11795043945312, "learning_rate": 1.928573999505284e-06, "loss": 18.3183, "step": 369750 }, { "epoch": 0.7469385941167678, "grad_norm": 95.89191436767578, "learning_rate": 1.928298564683278e-06, "loss": 8.6259, "step": 369760 }, { "epoch": 0.7469587947494516, "grad_norm": 172.56997680664062, "learning_rate": 1.928023144832371e-06, "loss": 13.1083, "step": 369770 }, { "epoch": 0.7469789953821354, "grad_norm": 226.8030242919922, "learning_rate": 1.927747739953908e-06, "loss": 16.5631, "step": 369780 }, { "epoch": 0.7469991960148192, "grad_norm": 173.50096130371094, "learning_rate": 1.9274723500492304e-06, "loss": 9.3867, "step": 369790 }, { "epoch": 0.747019396647503, "grad_norm": 329.6085510253906, "learning_rate": 1.927196975119678e-06, "loss": 11.535, "step": 369800 }, { "epoch": 0.7470395972801868, "grad_norm": 212.38369750976562, "learning_rate": 1.926921615166596e-06, "loss": 6.3506, "step": 369810 }, { "epoch": 0.7470597979128706, "grad_norm": 389.636474609375, "learning_rate": 1.926646270191326e-06, "loss": 15.2027, "step": 369820 }, { "epoch": 0.7470799985455544, "grad_norm": 227.70860290527344, "learning_rate": 1.9263709401952076e-06, "loss": 24.8615, "step": 369830 }, { "epoch": 0.7471001991782382, "grad_norm": 87.89580535888672, "learning_rate": 1.926095625179584e-06, "loss": 16.04, "step": 369840 }, { "epoch": 0.747120399810922, "grad_norm": 186.9021453857422, "learning_rate": 1.9258203251458012e-06, "loss": 18.4775, "step": 369850 }, { "epoch": 0.7471406004436059, "grad_norm": 311.2008056640625, "learning_rate": 1.9255450400951937e-06, "loss": 21.1176, "step": 369860 }, { "epoch": 0.7471608010762897, "grad_norm": 254.03387451171875, "learning_rate": 1.925269770029107e-06, "loss": 10.0443, "step": 369870 }, { "epoch": 0.7471810017089735, "grad_norm": 106.2398681640625, "learning_rate": 1.924994514948884e-06, "loss": 7.995, "step": 369880 }, { "epoch": 0.7472012023416573, "grad_norm": 226.22946166992188, "learning_rate": 1.9247192748558648e-06, "loss": 14.5774, "step": 369890 }, { "epoch": 0.7472214029743411, "grad_norm": 180.93296813964844, "learning_rate": 1.9244440497513895e-06, "loss": 20.5435, "step": 369900 }, { "epoch": 0.747241603607025, "grad_norm": 320.24615478515625, "learning_rate": 1.9241688396368e-06, "loss": 19.036, "step": 369910 }, { "epoch": 0.7472618042397088, "grad_norm": 696.014892578125, "learning_rate": 1.923893644513443e-06, "loss": 17.9663, "step": 369920 }, { "epoch": 0.7472820048723926, "grad_norm": 111.00938415527344, "learning_rate": 1.9236184643826515e-06, "loss": 22.4306, "step": 369930 }, { "epoch": 0.7473022055050764, "grad_norm": 160.22994995117188, "learning_rate": 1.9233432992457708e-06, "loss": 16.2355, "step": 369940 }, { "epoch": 0.7473224061377602, "grad_norm": 476.5484313964844, "learning_rate": 1.9230681491041425e-06, "loss": 27.1737, "step": 369950 }, { "epoch": 0.7473426067704441, "grad_norm": 354.2621765136719, "learning_rate": 1.9227930139591077e-06, "loss": 19.5869, "step": 369960 }, { "epoch": 0.7473628074031279, "grad_norm": 736.0292358398438, "learning_rate": 1.922517893812004e-06, "loss": 16.5576, "step": 369970 }, { "epoch": 0.7473830080358117, "grad_norm": 322.47613525390625, "learning_rate": 1.9222427886641774e-06, "loss": 14.5207, "step": 369980 }, { "epoch": 0.7474032086684955, "grad_norm": 333.9470520019531, "learning_rate": 1.921967698516966e-06, "loss": 26.0815, "step": 369990 }, { "epoch": 0.7474234093011793, "grad_norm": 213.09869384765625, "learning_rate": 1.9216926233717087e-06, "loss": 20.734, "step": 370000 }, { "epoch": 0.7474436099338632, "grad_norm": 114.10691833496094, "learning_rate": 1.9214175632297503e-06, "loss": 14.2768, "step": 370010 }, { "epoch": 0.747463810566547, "grad_norm": 50.42219924926758, "learning_rate": 1.9211425180924274e-06, "loss": 20.0802, "step": 370020 }, { "epoch": 0.7474840111992308, "grad_norm": 132.0945587158203, "learning_rate": 1.920867487961084e-06, "loss": 11.7345, "step": 370030 }, { "epoch": 0.7475042118319146, "grad_norm": 319.1049499511719, "learning_rate": 1.920592472837057e-06, "loss": 27.8331, "step": 370040 }, { "epoch": 0.7475244124645984, "grad_norm": 563.8472290039062, "learning_rate": 1.920317472721691e-06, "loss": 27.8424, "step": 370050 }, { "epoch": 0.7475446130972823, "grad_norm": 238.07940673828125, "learning_rate": 1.9200424876163244e-06, "loss": 10.5889, "step": 370060 }, { "epoch": 0.747564813729966, "grad_norm": 289.7926025390625, "learning_rate": 1.9197675175222954e-06, "loss": 17.8578, "step": 370070 }, { "epoch": 0.7475850143626498, "grad_norm": 316.75689697265625, "learning_rate": 1.919492562440947e-06, "loss": 11.601, "step": 370080 }, { "epoch": 0.7476052149953336, "grad_norm": 884.5833740234375, "learning_rate": 1.919217622373617e-06, "loss": 33.1757, "step": 370090 }, { "epoch": 0.7476254156280174, "grad_norm": 275.05841064453125, "learning_rate": 1.9189426973216478e-06, "loss": 11.054, "step": 370100 }, { "epoch": 0.7476456162607013, "grad_norm": 106.81330871582031, "learning_rate": 1.918667787286379e-06, "loss": 20.5739, "step": 370110 }, { "epoch": 0.7476658168933851, "grad_norm": 250.07186889648438, "learning_rate": 1.9183928922691474e-06, "loss": 19.2854, "step": 370120 }, { "epoch": 0.7476860175260689, "grad_norm": 151.4365692138672, "learning_rate": 1.918118012271297e-06, "loss": 11.2196, "step": 370130 }, { "epoch": 0.7477062181587527, "grad_norm": 42.512943267822266, "learning_rate": 1.917843147294166e-06, "loss": 9.4453, "step": 370140 }, { "epoch": 0.7477264187914365, "grad_norm": 632.7243041992188, "learning_rate": 1.917568297339091e-06, "loss": 23.3536, "step": 370150 }, { "epoch": 0.7477466194241204, "grad_norm": 458.36834716796875, "learning_rate": 1.9172934624074153e-06, "loss": 26.5292, "step": 370160 }, { "epoch": 0.7477668200568042, "grad_norm": 208.3182830810547, "learning_rate": 1.9170186425004805e-06, "loss": 13.9111, "step": 370170 }, { "epoch": 0.747787020689488, "grad_norm": 513.8473510742188, "learning_rate": 1.916743837619619e-06, "loss": 18.0323, "step": 370180 }, { "epoch": 0.7478072213221718, "grad_norm": 426.390380859375, "learning_rate": 1.9164690477661746e-06, "loss": 22.5995, "step": 370190 }, { "epoch": 0.7478274219548556, "grad_norm": 4.58008337020874, "learning_rate": 1.9161942729414876e-06, "loss": 27.1126, "step": 370200 }, { "epoch": 0.7478476225875395, "grad_norm": 130.28773498535156, "learning_rate": 1.9159195131468955e-06, "loss": 13.3869, "step": 370210 }, { "epoch": 0.7478678232202233, "grad_norm": 211.2389678955078, "learning_rate": 1.9156447683837365e-06, "loss": 16.6078, "step": 370220 }, { "epoch": 0.7478880238529071, "grad_norm": 171.8124542236328, "learning_rate": 1.9153700386533502e-06, "loss": 25.6353, "step": 370230 }, { "epoch": 0.7479082244855909, "grad_norm": 216.5200958251953, "learning_rate": 1.9150953239570784e-06, "loss": 9.8549, "step": 370240 }, { "epoch": 0.7479284251182747, "grad_norm": 276.8360595703125, "learning_rate": 1.9148206242962575e-06, "loss": 7.6981, "step": 370250 }, { "epoch": 0.7479486257509586, "grad_norm": 186.382080078125, "learning_rate": 1.9145459396722248e-06, "loss": 20.3598, "step": 370260 }, { "epoch": 0.7479688263836424, "grad_norm": 546.8480224609375, "learning_rate": 1.914271270086323e-06, "loss": 12.259, "step": 370270 }, { "epoch": 0.7479890270163262, "grad_norm": 131.8167266845703, "learning_rate": 1.9139966155398894e-06, "loss": 12.6977, "step": 370280 }, { "epoch": 0.74800922764901, "grad_norm": 426.40625, "learning_rate": 1.913721976034259e-06, "loss": 18.7393, "step": 370290 }, { "epoch": 0.7480294282816938, "grad_norm": 271.49627685546875, "learning_rate": 1.913447351570776e-06, "loss": 30.5129, "step": 370300 }, { "epoch": 0.7480496289143777, "grad_norm": 446.80865478515625, "learning_rate": 1.913172742150774e-06, "loss": 19.8997, "step": 370310 }, { "epoch": 0.7480698295470614, "grad_norm": 251.02719116210938, "learning_rate": 1.912898147775596e-06, "loss": 9.5635, "step": 370320 }, { "epoch": 0.7480900301797452, "grad_norm": 165.54541015625, "learning_rate": 1.912623568446578e-06, "loss": 16.6174, "step": 370330 }, { "epoch": 0.748110230812429, "grad_norm": 390.7007751464844, "learning_rate": 1.9123490041650556e-06, "loss": 10.5735, "step": 370340 }, { "epoch": 0.7481304314451128, "grad_norm": 241.35340881347656, "learning_rate": 1.912074454932372e-06, "loss": 25.5752, "step": 370350 }, { "epoch": 0.7481506320777966, "grad_norm": 214.2505645751953, "learning_rate": 1.911799920749861e-06, "loss": 9.1914, "step": 370360 }, { "epoch": 0.7481708327104805, "grad_norm": 330.2734375, "learning_rate": 1.911525401618865e-06, "loss": 27.8218, "step": 370370 }, { "epoch": 0.7481910333431643, "grad_norm": 111.91781616210938, "learning_rate": 1.9112508975407173e-06, "loss": 21.6309, "step": 370380 }, { "epoch": 0.7482112339758481, "grad_norm": 211.00917053222656, "learning_rate": 1.9109764085167604e-06, "loss": 20.6751, "step": 370390 }, { "epoch": 0.7482314346085319, "grad_norm": 211.5698699951172, "learning_rate": 1.910701934548329e-06, "loss": 17.0429, "step": 370400 }, { "epoch": 0.7482516352412157, "grad_norm": 283.0148620605469, "learning_rate": 1.9104274756367606e-06, "loss": 14.1954, "step": 370410 }, { "epoch": 0.7482718358738996, "grad_norm": 436.4775085449219, "learning_rate": 1.9101530317833957e-06, "loss": 15.3355, "step": 370420 }, { "epoch": 0.7482920365065834, "grad_norm": 661.9071655273438, "learning_rate": 1.9098786029895698e-06, "loss": 27.6099, "step": 370430 }, { "epoch": 0.7483122371392672, "grad_norm": 402.4980163574219, "learning_rate": 1.909604189256619e-06, "loss": 29.1606, "step": 370440 }, { "epoch": 0.748332437771951, "grad_norm": 468.9497985839844, "learning_rate": 1.9093297905858833e-06, "loss": 13.9686, "step": 370450 }, { "epoch": 0.7483526384046348, "grad_norm": 445.6475524902344, "learning_rate": 1.909055406978702e-06, "loss": 13.108, "step": 370460 }, { "epoch": 0.7483728390373187, "grad_norm": 192.17112731933594, "learning_rate": 1.908781038436407e-06, "loss": 19.2281, "step": 370470 }, { "epoch": 0.7483930396700025, "grad_norm": 219.0399169921875, "learning_rate": 1.9085066849603377e-06, "loss": 15.7932, "step": 370480 }, { "epoch": 0.7484132403026863, "grad_norm": 390.29522705078125, "learning_rate": 1.908232346551834e-06, "loss": 18.1032, "step": 370490 }, { "epoch": 0.7484334409353701, "grad_norm": 341.1053161621094, "learning_rate": 1.90795802321223e-06, "loss": 21.15, "step": 370500 }, { "epoch": 0.7484536415680539, "grad_norm": 219.66871643066406, "learning_rate": 1.907683714942863e-06, "loss": 14.3503, "step": 370510 }, { "epoch": 0.7484738422007378, "grad_norm": 149.77102661132812, "learning_rate": 1.90740942174507e-06, "loss": 21.7256, "step": 370520 }, { "epoch": 0.7484940428334216, "grad_norm": 274.599853515625, "learning_rate": 1.9071351436201918e-06, "loss": 12.2105, "step": 370530 }, { "epoch": 0.7485142434661054, "grad_norm": 357.609130859375, "learning_rate": 1.9068608805695588e-06, "loss": 17.5165, "step": 370540 }, { "epoch": 0.7485344440987892, "grad_norm": 45.921058654785156, "learning_rate": 1.9065866325945099e-06, "loss": 28.8463, "step": 370550 }, { "epoch": 0.748554644731473, "grad_norm": 488.1148376464844, "learning_rate": 1.906312399696385e-06, "loss": 19.2621, "step": 370560 }, { "epoch": 0.7485748453641569, "grad_norm": 217.16351318359375, "learning_rate": 1.9060381818765177e-06, "loss": 13.9055, "step": 370570 }, { "epoch": 0.7485950459968406, "grad_norm": 120.966064453125, "learning_rate": 1.9057639791362437e-06, "loss": 17.5191, "step": 370580 }, { "epoch": 0.7486152466295244, "grad_norm": 50.76523208618164, "learning_rate": 1.9054897914769028e-06, "loss": 20.9123, "step": 370590 }, { "epoch": 0.7486354472622082, "grad_norm": 305.01220703125, "learning_rate": 1.9052156188998284e-06, "loss": 9.1841, "step": 370600 }, { "epoch": 0.748655647894892, "grad_norm": 232.60914611816406, "learning_rate": 1.9049414614063566e-06, "loss": 16.159, "step": 370610 }, { "epoch": 0.7486758485275758, "grad_norm": 455.8209228515625, "learning_rate": 1.9046673189978266e-06, "loss": 22.5252, "step": 370620 }, { "epoch": 0.7486960491602597, "grad_norm": 82.41120147705078, "learning_rate": 1.904393191675571e-06, "loss": 21.8097, "step": 370630 }, { "epoch": 0.7487162497929435, "grad_norm": 351.81597900390625, "learning_rate": 1.9041190794409287e-06, "loss": 14.9213, "step": 370640 }, { "epoch": 0.7487364504256273, "grad_norm": 236.0299530029297, "learning_rate": 1.9038449822952331e-06, "loss": 10.5469, "step": 370650 }, { "epoch": 0.7487566510583111, "grad_norm": 2428.568115234375, "learning_rate": 1.9035709002398234e-06, "loss": 36.0981, "step": 370660 }, { "epoch": 0.748776851690995, "grad_norm": 223.53973388671875, "learning_rate": 1.9032968332760331e-06, "loss": 14.3164, "step": 370670 }, { "epoch": 0.7487970523236788, "grad_norm": 193.33673095703125, "learning_rate": 1.903022781405197e-06, "loss": 9.8684, "step": 370680 }, { "epoch": 0.7488172529563626, "grad_norm": 29.285791397094727, "learning_rate": 1.902748744628654e-06, "loss": 19.1501, "step": 370690 }, { "epoch": 0.7488374535890464, "grad_norm": 333.2878723144531, "learning_rate": 1.9024747229477365e-06, "loss": 20.6614, "step": 370700 }, { "epoch": 0.7488576542217302, "grad_norm": 204.9554443359375, "learning_rate": 1.9022007163637829e-06, "loss": 18.1107, "step": 370710 }, { "epoch": 0.748877854854414, "grad_norm": 405.0108642578125, "learning_rate": 1.9019267248781276e-06, "loss": 12.7056, "step": 370720 }, { "epoch": 0.7488980554870979, "grad_norm": 235.40260314941406, "learning_rate": 1.9016527484921037e-06, "loss": 19.9678, "step": 370730 }, { "epoch": 0.7489182561197817, "grad_norm": 364.274658203125, "learning_rate": 1.9013787872070506e-06, "loss": 23.2196, "step": 370740 }, { "epoch": 0.7489384567524655, "grad_norm": 51.982601165771484, "learning_rate": 1.9011048410243011e-06, "loss": 36.4674, "step": 370750 }, { "epoch": 0.7489586573851493, "grad_norm": 307.5245056152344, "learning_rate": 1.900830909945189e-06, "loss": 20.2551, "step": 370760 }, { "epoch": 0.7489788580178331, "grad_norm": 80.573486328125, "learning_rate": 1.900556993971051e-06, "loss": 9.3837, "step": 370770 }, { "epoch": 0.748999058650517, "grad_norm": 507.08880615234375, "learning_rate": 1.9002830931032262e-06, "loss": 18.8267, "step": 370780 }, { "epoch": 0.7490192592832008, "grad_norm": 423.7588195800781, "learning_rate": 1.900009207343042e-06, "loss": 17.2743, "step": 370790 }, { "epoch": 0.7490394599158846, "grad_norm": 143.3446807861328, "learning_rate": 1.8997353366918369e-06, "loss": 11.3105, "step": 370800 }, { "epoch": 0.7490596605485684, "grad_norm": 208.96926879882812, "learning_rate": 1.8994614811509475e-06, "loss": 16.4052, "step": 370810 }, { "epoch": 0.7490798611812522, "grad_norm": 145.70599365234375, "learning_rate": 1.8991876407217068e-06, "loss": 14.4433, "step": 370820 }, { "epoch": 0.749100061813936, "grad_norm": 139.9664306640625, "learning_rate": 1.8989138154054482e-06, "loss": 31.7103, "step": 370830 }, { "epoch": 0.7491202624466198, "grad_norm": 0.0, "learning_rate": 1.898640005203507e-06, "loss": 22.7449, "step": 370840 }, { "epoch": 0.7491404630793036, "grad_norm": 260.2795104980469, "learning_rate": 1.8983662101172217e-06, "loss": 8.0602, "step": 370850 }, { "epoch": 0.7491606637119874, "grad_norm": 17.827693939208984, "learning_rate": 1.8980924301479199e-06, "loss": 26.9873, "step": 370860 }, { "epoch": 0.7491808643446712, "grad_norm": 320.9022216796875, "learning_rate": 1.8978186652969394e-06, "loss": 27.058, "step": 370870 }, { "epoch": 0.7492010649773551, "grad_norm": 364.3447570800781, "learning_rate": 1.8975449155656162e-06, "loss": 19.1128, "step": 370880 }, { "epoch": 0.7492212656100389, "grad_norm": 249.30101013183594, "learning_rate": 1.897271180955283e-06, "loss": 17.489, "step": 370890 }, { "epoch": 0.7492414662427227, "grad_norm": 509.1755065917969, "learning_rate": 1.896997461467272e-06, "loss": 17.9714, "step": 370900 }, { "epoch": 0.7492616668754065, "grad_norm": 39.167903900146484, "learning_rate": 1.8967237571029207e-06, "loss": 12.3592, "step": 370910 }, { "epoch": 0.7492818675080903, "grad_norm": 105.03791046142578, "learning_rate": 1.896450067863561e-06, "loss": 12.4758, "step": 370920 }, { "epoch": 0.7493020681407742, "grad_norm": 289.82379150390625, "learning_rate": 1.8961763937505262e-06, "loss": 11.8574, "step": 370930 }, { "epoch": 0.749322268773458, "grad_norm": 340.8379821777344, "learning_rate": 1.8959027347651527e-06, "loss": 12.5359, "step": 370940 }, { "epoch": 0.7493424694061418, "grad_norm": 572.29638671875, "learning_rate": 1.895629090908771e-06, "loss": 18.4067, "step": 370950 }, { "epoch": 0.7493626700388256, "grad_norm": 91.726318359375, "learning_rate": 1.895355462182718e-06, "loss": 20.5134, "step": 370960 }, { "epoch": 0.7493828706715094, "grad_norm": 371.5910339355469, "learning_rate": 1.8950818485883248e-06, "loss": 11.789, "step": 370970 }, { "epoch": 0.7494030713041933, "grad_norm": 0.0, "learning_rate": 1.8948082501269272e-06, "loss": 11.9838, "step": 370980 }, { "epoch": 0.7494232719368771, "grad_norm": 470.0531311035156, "learning_rate": 1.8945346667998566e-06, "loss": 12.501, "step": 370990 }, { "epoch": 0.7494434725695609, "grad_norm": 479.8145751953125, "learning_rate": 1.8942610986084487e-06, "loss": 15.1406, "step": 371000 }, { "epoch": 0.7494636732022447, "grad_norm": 483.75372314453125, "learning_rate": 1.8939875455540352e-06, "loss": 25.651, "step": 371010 }, { "epoch": 0.7494838738349285, "grad_norm": 150.5609893798828, "learning_rate": 1.8937140076379484e-06, "loss": 31.5946, "step": 371020 }, { "epoch": 0.7495040744676124, "grad_norm": 395.453369140625, "learning_rate": 1.8934404848615245e-06, "loss": 15.6602, "step": 371030 }, { "epoch": 0.7495242751002962, "grad_norm": 292.02789306640625, "learning_rate": 1.8931669772260946e-06, "loss": 6.2316, "step": 371040 }, { "epoch": 0.74954447573298, "grad_norm": 100.59710693359375, "learning_rate": 1.8928934847329905e-06, "loss": 15.3211, "step": 371050 }, { "epoch": 0.7495646763656638, "grad_norm": 143.78709411621094, "learning_rate": 1.8926200073835466e-06, "loss": 33.8803, "step": 371060 }, { "epoch": 0.7495848769983476, "grad_norm": 386.78692626953125, "learning_rate": 1.8923465451790997e-06, "loss": 17.6445, "step": 371070 }, { "epoch": 0.7496050776310315, "grad_norm": 127.44560241699219, "learning_rate": 1.892073098120975e-06, "loss": 10.7279, "step": 371080 }, { "epoch": 0.7496252782637152, "grad_norm": 771.6856689453125, "learning_rate": 1.8917996662105092e-06, "loss": 26.4416, "step": 371090 }, { "epoch": 0.749645478896399, "grad_norm": 328.34881591796875, "learning_rate": 1.8915262494490366e-06, "loss": 20.6496, "step": 371100 }, { "epoch": 0.7496656795290828, "grad_norm": 0.0, "learning_rate": 1.8912528478378877e-06, "loss": 26.5899, "step": 371110 }, { "epoch": 0.7496858801617666, "grad_norm": 192.69497680664062, "learning_rate": 1.8909794613783943e-06, "loss": 26.2871, "step": 371120 }, { "epoch": 0.7497060807944504, "grad_norm": 89.1185302734375, "learning_rate": 1.8907060900718894e-06, "loss": 15.7058, "step": 371130 }, { "epoch": 0.7497262814271343, "grad_norm": 1682.911376953125, "learning_rate": 1.8904327339197098e-06, "loss": 11.0275, "step": 371140 }, { "epoch": 0.7497464820598181, "grad_norm": 295.31146240234375, "learning_rate": 1.8901593929231804e-06, "loss": 27.3743, "step": 371150 }, { "epoch": 0.7497666826925019, "grad_norm": 133.7964630126953, "learning_rate": 1.8898860670836367e-06, "loss": 15.8069, "step": 371160 }, { "epoch": 0.7497868833251857, "grad_norm": 43.890254974365234, "learning_rate": 1.8896127564024124e-06, "loss": 17.8565, "step": 371170 }, { "epoch": 0.7498070839578695, "grad_norm": 59.6690673828125, "learning_rate": 1.8893394608808391e-06, "loss": 21.3591, "step": 371180 }, { "epoch": 0.7498272845905534, "grad_norm": 222.10760498046875, "learning_rate": 1.889066180520246e-06, "loss": 16.9338, "step": 371190 }, { "epoch": 0.7498474852232372, "grad_norm": 322.930908203125, "learning_rate": 1.8887929153219687e-06, "loss": 22.6679, "step": 371200 }, { "epoch": 0.749867685855921, "grad_norm": 173.83990478515625, "learning_rate": 1.8885196652873372e-06, "loss": 15.6963, "step": 371210 }, { "epoch": 0.7498878864886048, "grad_norm": 141.28070068359375, "learning_rate": 1.8882464304176817e-06, "loss": 26.6838, "step": 371220 }, { "epoch": 0.7499080871212886, "grad_norm": 128.2418975830078, "learning_rate": 1.8879732107143378e-06, "loss": 28.2601, "step": 371230 }, { "epoch": 0.7499282877539725, "grad_norm": 318.5549621582031, "learning_rate": 1.8877000061786333e-06, "loss": 20.6768, "step": 371240 }, { "epoch": 0.7499484883866563, "grad_norm": 181.9122314453125, "learning_rate": 1.887426816811903e-06, "loss": 12.4233, "step": 371250 }, { "epoch": 0.7499686890193401, "grad_norm": 170.62318420410156, "learning_rate": 1.8871536426154752e-06, "loss": 11.186, "step": 371260 }, { "epoch": 0.7499888896520239, "grad_norm": 357.69268798828125, "learning_rate": 1.8868804835906845e-06, "loss": 19.0101, "step": 371270 }, { "epoch": 0.7500090902847077, "grad_norm": 459.1524353027344, "learning_rate": 1.8866073397388612e-06, "loss": 15.1567, "step": 371280 }, { "epoch": 0.7500292909173916, "grad_norm": 66.97949981689453, "learning_rate": 1.8863342110613342e-06, "loss": 14.5358, "step": 371290 }, { "epoch": 0.7500494915500754, "grad_norm": 129.07508850097656, "learning_rate": 1.8860610975594384e-06, "loss": 10.4975, "step": 371300 }, { "epoch": 0.7500696921827592, "grad_norm": 2.092559337615967, "learning_rate": 1.8857879992345013e-06, "loss": 14.7937, "step": 371310 }, { "epoch": 0.750089892815443, "grad_norm": 336.1669616699219, "learning_rate": 1.8855149160878571e-06, "loss": 18.0881, "step": 371320 }, { "epoch": 0.7501100934481268, "grad_norm": 168.75352478027344, "learning_rate": 1.8852418481208362e-06, "loss": 14.0733, "step": 371330 }, { "epoch": 0.7501302940808106, "grad_norm": 195.59048461914062, "learning_rate": 1.8849687953347666e-06, "loss": 24.082, "step": 371340 }, { "epoch": 0.7501504947134944, "grad_norm": 515.1174926757812, "learning_rate": 1.8846957577309832e-06, "loss": 18.69, "step": 371350 }, { "epoch": 0.7501706953461782, "grad_norm": 80.65044403076172, "learning_rate": 1.8844227353108146e-06, "loss": 21.7323, "step": 371360 }, { "epoch": 0.750190895978862, "grad_norm": 327.2787780761719, "learning_rate": 1.8841497280755906e-06, "loss": 25.434, "step": 371370 }, { "epoch": 0.7502110966115458, "grad_norm": 298.43499755859375, "learning_rate": 1.8838767360266425e-06, "loss": 20.0417, "step": 371380 }, { "epoch": 0.7502312972442297, "grad_norm": 395.0507507324219, "learning_rate": 1.8836037591653044e-06, "loss": 17.9934, "step": 371390 }, { "epoch": 0.7502514978769135, "grad_norm": 192.87710571289062, "learning_rate": 1.8833307974929006e-06, "loss": 17.8138, "step": 371400 }, { "epoch": 0.7502716985095973, "grad_norm": 468.6221618652344, "learning_rate": 1.8830578510107638e-06, "loss": 32.258, "step": 371410 }, { "epoch": 0.7502918991422811, "grad_norm": 410.21630859375, "learning_rate": 1.8827849197202275e-06, "loss": 30.8834, "step": 371420 }, { "epoch": 0.7503120997749649, "grad_norm": 632.2503662109375, "learning_rate": 1.8825120036226192e-06, "loss": 10.7712, "step": 371430 }, { "epoch": 0.7503323004076488, "grad_norm": 55.30056381225586, "learning_rate": 1.8822391027192677e-06, "loss": 22.2709, "step": 371440 }, { "epoch": 0.7503525010403326, "grad_norm": 384.4527893066406, "learning_rate": 1.8819662170115043e-06, "loss": 13.2961, "step": 371450 }, { "epoch": 0.7503727016730164, "grad_norm": 347.42120361328125, "learning_rate": 1.881693346500663e-06, "loss": 18.6116, "step": 371460 }, { "epoch": 0.7503929023057002, "grad_norm": 208.43563842773438, "learning_rate": 1.8814204911880667e-06, "loss": 21.1833, "step": 371470 }, { "epoch": 0.750413102938384, "grad_norm": 344.66015625, "learning_rate": 1.8811476510750486e-06, "loss": 15.6512, "step": 371480 }, { "epoch": 0.7504333035710679, "grad_norm": 372.6371765136719, "learning_rate": 1.8808748261629406e-06, "loss": 18.8799, "step": 371490 }, { "epoch": 0.7504535042037517, "grad_norm": 260.3725891113281, "learning_rate": 1.8806020164530702e-06, "loss": 19.5407, "step": 371500 }, { "epoch": 0.7504737048364355, "grad_norm": 535.9419555664062, "learning_rate": 1.8803292219467656e-06, "loss": 33.6744, "step": 371510 }, { "epoch": 0.7504939054691193, "grad_norm": 345.4858093261719, "learning_rate": 1.8800564426453595e-06, "loss": 24.4308, "step": 371520 }, { "epoch": 0.7505141061018031, "grad_norm": 363.9193420410156, "learning_rate": 1.87978367855018e-06, "loss": 15.7183, "step": 371530 }, { "epoch": 0.750534306734487, "grad_norm": 98.58710479736328, "learning_rate": 1.8795109296625546e-06, "loss": 4.4871, "step": 371540 }, { "epoch": 0.7505545073671708, "grad_norm": 356.28271484375, "learning_rate": 1.8792381959838147e-06, "loss": 9.7465, "step": 371550 }, { "epoch": 0.7505747079998546, "grad_norm": 297.53631591796875, "learning_rate": 1.878965477515291e-06, "loss": 19.711, "step": 371560 }, { "epoch": 0.7505949086325384, "grad_norm": 177.80442810058594, "learning_rate": 1.8786927742583111e-06, "loss": 14.9644, "step": 371570 }, { "epoch": 0.7506151092652222, "grad_norm": 172.0708770751953, "learning_rate": 1.878420086214202e-06, "loss": 19.806, "step": 371580 }, { "epoch": 0.7506353098979061, "grad_norm": 10.99874210357666, "learning_rate": 1.8781474133842963e-06, "loss": 20.2485, "step": 371590 }, { "epoch": 0.7506555105305898, "grad_norm": 354.6606140136719, "learning_rate": 1.8778747557699223e-06, "loss": 14.2237, "step": 371600 }, { "epoch": 0.7506757111632736, "grad_norm": 16.260732650756836, "learning_rate": 1.877602113372406e-06, "loss": 15.2683, "step": 371610 }, { "epoch": 0.7506959117959574, "grad_norm": 171.46290588378906, "learning_rate": 1.8773294861930797e-06, "loss": 17.4215, "step": 371620 }, { "epoch": 0.7507161124286412, "grad_norm": 130.80169677734375, "learning_rate": 1.8770568742332695e-06, "loss": 16.9161, "step": 371630 }, { "epoch": 0.750736313061325, "grad_norm": 569.0813598632812, "learning_rate": 1.8767842774943068e-06, "loss": 17.4082, "step": 371640 }, { "epoch": 0.7507565136940089, "grad_norm": 2.297719717025757, "learning_rate": 1.8765116959775187e-06, "loss": 7.9937, "step": 371650 }, { "epoch": 0.7507767143266927, "grad_norm": 184.576171875, "learning_rate": 1.876239129684232e-06, "loss": 8.2777, "step": 371660 }, { "epoch": 0.7507969149593765, "grad_norm": 308.5185546875, "learning_rate": 1.875966578615777e-06, "loss": 19.0659, "step": 371670 }, { "epoch": 0.7508171155920603, "grad_norm": 429.6871643066406, "learning_rate": 1.8756940427734854e-06, "loss": 21.2599, "step": 371680 }, { "epoch": 0.7508373162247441, "grad_norm": 644.5056762695312, "learning_rate": 1.8754215221586785e-06, "loss": 34.0296, "step": 371690 }, { "epoch": 0.750857516857428, "grad_norm": 142.82122802734375, "learning_rate": 1.8751490167726888e-06, "loss": 13.545, "step": 371700 }, { "epoch": 0.7508777174901118, "grad_norm": 440.2891845703125, "learning_rate": 1.874876526616845e-06, "loss": 17.4862, "step": 371710 }, { "epoch": 0.7508979181227956, "grad_norm": 174.17462158203125, "learning_rate": 1.874604051692474e-06, "loss": 19.2879, "step": 371720 }, { "epoch": 0.7509181187554794, "grad_norm": 430.67425537109375, "learning_rate": 1.874331592000902e-06, "loss": 16.412, "step": 371730 }, { "epoch": 0.7509383193881632, "grad_norm": 444.29705810546875, "learning_rate": 1.8740591475434588e-06, "loss": 17.7198, "step": 371740 }, { "epoch": 0.7509585200208471, "grad_norm": 221.83868408203125, "learning_rate": 1.873786718321476e-06, "loss": 14.6594, "step": 371750 }, { "epoch": 0.7509787206535309, "grad_norm": 97.37433624267578, "learning_rate": 1.8735143043362735e-06, "loss": 28.5682, "step": 371760 }, { "epoch": 0.7509989212862147, "grad_norm": 324.7624206542969, "learning_rate": 1.8732419055891832e-06, "loss": 27.2949, "step": 371770 }, { "epoch": 0.7510191219188985, "grad_norm": 194.3037109375, "learning_rate": 1.8729695220815346e-06, "loss": 12.4371, "step": 371780 }, { "epoch": 0.7510393225515823, "grad_norm": 155.5506591796875, "learning_rate": 1.8726971538146532e-06, "loss": 14.5144, "step": 371790 }, { "epoch": 0.7510595231842662, "grad_norm": 207.47317504882812, "learning_rate": 1.8724248007898648e-06, "loss": 26.5328, "step": 371800 }, { "epoch": 0.75107972381695, "grad_norm": 5.858146667480469, "learning_rate": 1.8721524630085003e-06, "loss": 9.1418, "step": 371810 }, { "epoch": 0.7510999244496338, "grad_norm": 361.8494567871094, "learning_rate": 1.8718801404718856e-06, "loss": 15.2751, "step": 371820 }, { "epoch": 0.7511201250823176, "grad_norm": 175.71853637695312, "learning_rate": 1.8716078331813459e-06, "loss": 18.1844, "step": 371830 }, { "epoch": 0.7511403257150014, "grad_norm": 497.8709716796875, "learning_rate": 1.8713355411382117e-06, "loss": 17.4586, "step": 371840 }, { "epoch": 0.7511605263476853, "grad_norm": 57.90321350097656, "learning_rate": 1.871063264343807e-06, "loss": 14.8321, "step": 371850 }, { "epoch": 0.751180726980369, "grad_norm": 186.2997589111328, "learning_rate": 1.870791002799462e-06, "loss": 19.4902, "step": 371860 }, { "epoch": 0.7512009276130528, "grad_norm": 483.42742919921875, "learning_rate": 1.8705187565065003e-06, "loss": 18.1327, "step": 371870 }, { "epoch": 0.7512211282457366, "grad_norm": 307.2109375, "learning_rate": 1.8702465254662527e-06, "loss": 12.4286, "step": 371880 }, { "epoch": 0.7512413288784204, "grad_norm": 186.39794921875, "learning_rate": 1.8699743096800438e-06, "loss": 17.4333, "step": 371890 }, { "epoch": 0.7512615295111043, "grad_norm": 970.8729248046875, "learning_rate": 1.8697021091491991e-06, "loss": 27.1103, "step": 371900 }, { "epoch": 0.7512817301437881, "grad_norm": 177.46376037597656, "learning_rate": 1.869429923875048e-06, "loss": 13.5754, "step": 371910 }, { "epoch": 0.7513019307764719, "grad_norm": 223.567626953125, "learning_rate": 1.869157753858914e-06, "loss": 21.5386, "step": 371920 }, { "epoch": 0.7513221314091557, "grad_norm": 13.240137100219727, "learning_rate": 1.8688855991021272e-06, "loss": 17.3284, "step": 371930 }, { "epoch": 0.7513423320418395, "grad_norm": 281.25494384765625, "learning_rate": 1.8686134596060123e-06, "loss": 12.2437, "step": 371940 }, { "epoch": 0.7513625326745234, "grad_norm": 116.84770965576172, "learning_rate": 1.8683413353718937e-06, "loss": 17.7168, "step": 371950 }, { "epoch": 0.7513827333072072, "grad_norm": 489.1940612792969, "learning_rate": 1.8680692264011014e-06, "loss": 18.8212, "step": 371960 }, { "epoch": 0.751402933939891, "grad_norm": 284.4034423828125, "learning_rate": 1.8677971326949602e-06, "loss": 13.5852, "step": 371970 }, { "epoch": 0.7514231345725748, "grad_norm": 203.17247009277344, "learning_rate": 1.867525054254794e-06, "loss": 9.4102, "step": 371980 }, { "epoch": 0.7514433352052586, "grad_norm": 354.8770446777344, "learning_rate": 1.8672529910819305e-06, "loss": 32.7523, "step": 371990 }, { "epoch": 0.7514635358379425, "grad_norm": 357.680908203125, "learning_rate": 1.8669809431776991e-06, "loss": 31.6429, "step": 372000 }, { "epoch": 0.7514837364706263, "grad_norm": 268.7816467285156, "learning_rate": 1.86670891054342e-06, "loss": 13.5867, "step": 372010 }, { "epoch": 0.7515039371033101, "grad_norm": 238.21539306640625, "learning_rate": 1.8664368931804211e-06, "loss": 11.6549, "step": 372020 }, { "epoch": 0.7515241377359939, "grad_norm": 137.00787353515625, "learning_rate": 1.8661648910900303e-06, "loss": 26.3963, "step": 372030 }, { "epoch": 0.7515443383686777, "grad_norm": 354.9906921386719, "learning_rate": 1.8658929042735725e-06, "loss": 15.5531, "step": 372040 }, { "epoch": 0.7515645390013616, "grad_norm": 123.8189926147461, "learning_rate": 1.8656209327323704e-06, "loss": 22.4948, "step": 372050 }, { "epoch": 0.7515847396340454, "grad_norm": 484.3825988769531, "learning_rate": 1.8653489764677512e-06, "loss": 13.5583, "step": 372060 }, { "epoch": 0.7516049402667292, "grad_norm": 327.2861633300781, "learning_rate": 1.865077035481045e-06, "loss": 16.0913, "step": 372070 }, { "epoch": 0.751625140899413, "grad_norm": 553.262451171875, "learning_rate": 1.8648051097735697e-06, "loss": 17.1464, "step": 372080 }, { "epoch": 0.7516453415320968, "grad_norm": 196.69821166992188, "learning_rate": 1.8645331993466537e-06, "loss": 9.0393, "step": 372090 }, { "epoch": 0.7516655421647807, "grad_norm": 312.74908447265625, "learning_rate": 1.8642613042016245e-06, "loss": 27.2509, "step": 372100 }, { "epoch": 0.7516857427974644, "grad_norm": 359.1634216308594, "learning_rate": 1.8639894243398055e-06, "loss": 13.586, "step": 372110 }, { "epoch": 0.7517059434301482, "grad_norm": 2.7487335205078125, "learning_rate": 1.8637175597625195e-06, "loss": 13.0632, "step": 372120 }, { "epoch": 0.751726144062832, "grad_norm": 233.28746032714844, "learning_rate": 1.8634457104710956e-06, "loss": 11.3842, "step": 372130 }, { "epoch": 0.7517463446955158, "grad_norm": 269.3181457519531, "learning_rate": 1.8631738764668571e-06, "loss": 15.8975, "step": 372140 }, { "epoch": 0.7517665453281996, "grad_norm": 169.63861083984375, "learning_rate": 1.862902057751127e-06, "loss": 6.3894, "step": 372150 }, { "epoch": 0.7517867459608835, "grad_norm": 239.3770751953125, "learning_rate": 1.8626302543252317e-06, "loss": 11.5571, "step": 372160 }, { "epoch": 0.7518069465935673, "grad_norm": 19.957876205444336, "learning_rate": 1.8623584661904976e-06, "loss": 10.6067, "step": 372170 }, { "epoch": 0.7518271472262511, "grad_norm": 218.98687744140625, "learning_rate": 1.862086693348248e-06, "loss": 22.7541, "step": 372180 }, { "epoch": 0.7518473478589349, "grad_norm": 199.07884216308594, "learning_rate": 1.8618149357998055e-06, "loss": 9.1286, "step": 372190 }, { "epoch": 0.7518675484916187, "grad_norm": 334.0825500488281, "learning_rate": 1.8615431935464984e-06, "loss": 16.6159, "step": 372200 }, { "epoch": 0.7518877491243026, "grad_norm": 136.04904174804688, "learning_rate": 1.8612714665896486e-06, "loss": 9.1285, "step": 372210 }, { "epoch": 0.7519079497569864, "grad_norm": 143.34255981445312, "learning_rate": 1.8609997549305792e-06, "loss": 15.9081, "step": 372220 }, { "epoch": 0.7519281503896702, "grad_norm": 1.9718449115753174, "learning_rate": 1.8607280585706183e-06, "loss": 25.0753, "step": 372230 }, { "epoch": 0.751948351022354, "grad_norm": 815.2902221679688, "learning_rate": 1.8604563775110868e-06, "loss": 29.9698, "step": 372240 }, { "epoch": 0.7519685516550378, "grad_norm": 186.02572631835938, "learning_rate": 1.8601847117533112e-06, "loss": 14.2338, "step": 372250 }, { "epoch": 0.7519887522877217, "grad_norm": 168.09886169433594, "learning_rate": 1.859913061298615e-06, "loss": 10.519, "step": 372260 }, { "epoch": 0.7520089529204055, "grad_norm": 110.88844299316406, "learning_rate": 1.8596414261483192e-06, "loss": 14.5192, "step": 372270 }, { "epoch": 0.7520291535530893, "grad_norm": 472.1087646484375, "learning_rate": 1.8593698063037525e-06, "loss": 23.3645, "step": 372280 }, { "epoch": 0.7520493541857731, "grad_norm": 165.42684936523438, "learning_rate": 1.8590982017662362e-06, "loss": 23.9716, "step": 372290 }, { "epoch": 0.7520695548184569, "grad_norm": 519.8869018554688, "learning_rate": 1.8588266125370929e-06, "loss": 23.4642, "step": 372300 }, { "epoch": 0.7520897554511408, "grad_norm": 222.23216247558594, "learning_rate": 1.8585550386176476e-06, "loss": 14.9329, "step": 372310 }, { "epoch": 0.7521099560838246, "grad_norm": 551.2659301757812, "learning_rate": 1.858283480009226e-06, "loss": 19.0285, "step": 372320 }, { "epoch": 0.7521301567165084, "grad_norm": 497.09490966796875, "learning_rate": 1.8580119367131487e-06, "loss": 21.276, "step": 372330 }, { "epoch": 0.7521503573491922, "grad_norm": 132.97637939453125, "learning_rate": 1.8577404087307394e-06, "loss": 12.9163, "step": 372340 }, { "epoch": 0.752170557981876, "grad_norm": 363.87896728515625, "learning_rate": 1.8574688960633236e-06, "loss": 19.0654, "step": 372350 }, { "epoch": 0.7521907586145599, "grad_norm": 572.6422729492188, "learning_rate": 1.8571973987122233e-06, "loss": 12.7428, "step": 372360 }, { "epoch": 0.7522109592472436, "grad_norm": 182.0271453857422, "learning_rate": 1.85692591667876e-06, "loss": 22.9928, "step": 372370 }, { "epoch": 0.7522311598799274, "grad_norm": 484.3217468261719, "learning_rate": 1.8566544499642587e-06, "loss": 22.5951, "step": 372380 }, { "epoch": 0.7522513605126112, "grad_norm": 187.559326171875, "learning_rate": 1.8563829985700444e-06, "loss": 15.2235, "step": 372390 }, { "epoch": 0.752271561145295, "grad_norm": 283.005126953125, "learning_rate": 1.8561115624974374e-06, "loss": 12.5326, "step": 372400 }, { "epoch": 0.7522917617779789, "grad_norm": 388.7681579589844, "learning_rate": 1.8558401417477602e-06, "loss": 23.2669, "step": 372410 }, { "epoch": 0.7523119624106627, "grad_norm": 651.609619140625, "learning_rate": 1.855568736322338e-06, "loss": 23.0013, "step": 372420 }, { "epoch": 0.7523321630433465, "grad_norm": 339.4350891113281, "learning_rate": 1.8552973462224926e-06, "loss": 35.0268, "step": 372430 }, { "epoch": 0.7523523636760303, "grad_norm": 693.3035278320312, "learning_rate": 1.8550259714495444e-06, "loss": 26.2475, "step": 372440 }, { "epoch": 0.7523725643087141, "grad_norm": 433.6942443847656, "learning_rate": 1.8547546120048204e-06, "loss": 17.1729, "step": 372450 }, { "epoch": 0.752392764941398, "grad_norm": 577.9378662109375, "learning_rate": 1.854483267889639e-06, "loss": 8.9756, "step": 372460 }, { "epoch": 0.7524129655740818, "grad_norm": 414.58355712890625, "learning_rate": 1.854211939105327e-06, "loss": 15.2692, "step": 372470 }, { "epoch": 0.7524331662067656, "grad_norm": 390.64105224609375, "learning_rate": 1.8539406256532022e-06, "loss": 11.6262, "step": 372480 }, { "epoch": 0.7524533668394494, "grad_norm": 360.4312438964844, "learning_rate": 1.8536693275345908e-06, "loss": 26.9072, "step": 372490 }, { "epoch": 0.7524735674721332, "grad_norm": 370.9836120605469, "learning_rate": 1.8533980447508138e-06, "loss": 22.6281, "step": 372500 }, { "epoch": 0.752493768104817, "grad_norm": 398.18597412109375, "learning_rate": 1.8531267773031913e-06, "loss": 15.5106, "step": 372510 }, { "epoch": 0.7525139687375009, "grad_norm": 235.54580688476562, "learning_rate": 1.8528555251930492e-06, "loss": 20.8748, "step": 372520 }, { "epoch": 0.7525341693701847, "grad_norm": 565.748779296875, "learning_rate": 1.8525842884217055e-06, "loss": 22.1918, "step": 372530 }, { "epoch": 0.7525543700028685, "grad_norm": 0.0, "learning_rate": 1.852313066990486e-06, "loss": 13.2048, "step": 372540 }, { "epoch": 0.7525745706355523, "grad_norm": 317.895263671875, "learning_rate": 1.8520418609007107e-06, "loss": 13.16, "step": 372550 }, { "epoch": 0.7525947712682362, "grad_norm": 405.5411376953125, "learning_rate": 1.8517706701536998e-06, "loss": 12.2959, "step": 372560 }, { "epoch": 0.75261497190092, "grad_norm": 204.23092651367188, "learning_rate": 1.8514994947507787e-06, "loss": 16.7468, "step": 372570 }, { "epoch": 0.7526351725336038, "grad_norm": 290.1387023925781, "learning_rate": 1.8512283346932675e-06, "loss": 16.1299, "step": 372580 }, { "epoch": 0.7526553731662876, "grad_norm": 473.88726806640625, "learning_rate": 1.8509571899824851e-06, "loss": 33.331, "step": 372590 }, { "epoch": 0.7526755737989714, "grad_norm": 127.69290924072266, "learning_rate": 1.8506860606197564e-06, "loss": 14.8516, "step": 372600 }, { "epoch": 0.7526957744316553, "grad_norm": 74.41572570800781, "learning_rate": 1.850414946606403e-06, "loss": 18.5232, "step": 372610 }, { "epoch": 0.752715975064339, "grad_norm": 317.71533203125, "learning_rate": 1.850143847943745e-06, "loss": 25.9852, "step": 372620 }, { "epoch": 0.7527361756970228, "grad_norm": 348.8948974609375, "learning_rate": 1.8498727646331022e-06, "loss": 23.5769, "step": 372630 }, { "epoch": 0.7527563763297066, "grad_norm": 346.9688415527344, "learning_rate": 1.8496016966757996e-06, "loss": 27.0463, "step": 372640 }, { "epoch": 0.7527765769623904, "grad_norm": 184.89759826660156, "learning_rate": 1.8493306440731557e-06, "loss": 14.5803, "step": 372650 }, { "epoch": 0.7527967775950742, "grad_norm": 167.66400146484375, "learning_rate": 1.849059606826491e-06, "loss": 8.3785, "step": 372660 }, { "epoch": 0.7528169782277581, "grad_norm": 374.91552734375, "learning_rate": 1.8487885849371268e-06, "loss": 20.7782, "step": 372670 }, { "epoch": 0.7528371788604419, "grad_norm": 538.972412109375, "learning_rate": 1.848517578406389e-06, "loss": 19.3844, "step": 372680 }, { "epoch": 0.7528573794931257, "grad_norm": 338.70001220703125, "learning_rate": 1.8482465872355904e-06, "loss": 20.5977, "step": 372690 }, { "epoch": 0.7528775801258095, "grad_norm": 718.5799560546875, "learning_rate": 1.8479756114260562e-06, "loss": 21.3674, "step": 372700 }, { "epoch": 0.7528977807584933, "grad_norm": 300.2347106933594, "learning_rate": 1.847704650979108e-06, "loss": 12.5836, "step": 372710 }, { "epoch": 0.7529179813911772, "grad_norm": 231.1571502685547, "learning_rate": 1.8474337058960646e-06, "loss": 23.0435, "step": 372720 }, { "epoch": 0.752938182023861, "grad_norm": 370.2584533691406, "learning_rate": 1.8471627761782457e-06, "loss": 13.0893, "step": 372730 }, { "epoch": 0.7529583826565448, "grad_norm": 498.2112731933594, "learning_rate": 1.8468918618269749e-06, "loss": 21.5959, "step": 372740 }, { "epoch": 0.7529785832892286, "grad_norm": 276.0571594238281, "learning_rate": 1.8466209628435705e-06, "loss": 15.1261, "step": 372750 }, { "epoch": 0.7529987839219124, "grad_norm": 123.42581939697266, "learning_rate": 1.846350079229351e-06, "loss": 21.7773, "step": 372760 }, { "epoch": 0.7530189845545963, "grad_norm": 245.96035766601562, "learning_rate": 1.846079210985639e-06, "loss": 22.8511, "step": 372770 }, { "epoch": 0.7530391851872801, "grad_norm": 4.515310764312744, "learning_rate": 1.8458083581137565e-06, "loss": 10.6068, "step": 372780 }, { "epoch": 0.7530593858199639, "grad_norm": 219.97572326660156, "learning_rate": 1.8455375206150212e-06, "loss": 17.4104, "step": 372790 }, { "epoch": 0.7530795864526477, "grad_norm": 276.88995361328125, "learning_rate": 1.8452666984907519e-06, "loss": 10.8381, "step": 372800 }, { "epoch": 0.7530997870853315, "grad_norm": 366.96624755859375, "learning_rate": 1.8449958917422712e-06, "loss": 19.4583, "step": 372810 }, { "epoch": 0.7531199877180154, "grad_norm": 7.265793323516846, "learning_rate": 1.8447251003708982e-06, "loss": 22.6578, "step": 372820 }, { "epoch": 0.7531401883506992, "grad_norm": 136.7703857421875, "learning_rate": 1.8444543243779512e-06, "loss": 32.6324, "step": 372830 }, { "epoch": 0.753160388983383, "grad_norm": 212.67579650878906, "learning_rate": 1.844183563764752e-06, "loss": 13.0142, "step": 372840 }, { "epoch": 0.7531805896160668, "grad_norm": 360.8093566894531, "learning_rate": 1.8439128185326183e-06, "loss": 21.0656, "step": 372850 }, { "epoch": 0.7532007902487506, "grad_norm": 12.624883651733398, "learning_rate": 1.843642088682872e-06, "loss": 24.889, "step": 372860 }, { "epoch": 0.7532209908814345, "grad_norm": 395.3611755371094, "learning_rate": 1.8433713742168313e-06, "loss": 11.4491, "step": 372870 }, { "epoch": 0.7532411915141182, "grad_norm": 101.68817901611328, "learning_rate": 1.8431006751358143e-06, "loss": 9.1052, "step": 372880 }, { "epoch": 0.753261392146802, "grad_norm": 185.86386108398438, "learning_rate": 1.8428299914411435e-06, "loss": 17.5092, "step": 372890 }, { "epoch": 0.7532815927794858, "grad_norm": 387.506103515625, "learning_rate": 1.842559323134136e-06, "loss": 17.2897, "step": 372900 }, { "epoch": 0.7533017934121696, "grad_norm": 177.74490356445312, "learning_rate": 1.8422886702161098e-06, "loss": 22.9729, "step": 372910 }, { "epoch": 0.7533219940448534, "grad_norm": 314.6582336425781, "learning_rate": 1.8420180326883857e-06, "loss": 34.7826, "step": 372920 }, { "epoch": 0.7533421946775373, "grad_norm": 277.3191833496094, "learning_rate": 1.8417474105522849e-06, "loss": 23.0506, "step": 372930 }, { "epoch": 0.7533623953102211, "grad_norm": 210.78102111816406, "learning_rate": 1.8414768038091235e-06, "loss": 9.0511, "step": 372940 }, { "epoch": 0.7533825959429049, "grad_norm": 177.88006591796875, "learning_rate": 1.8412062124602192e-06, "loss": 18.2188, "step": 372950 }, { "epoch": 0.7534027965755887, "grad_norm": 425.5992736816406, "learning_rate": 1.8409356365068947e-06, "loss": 31.3682, "step": 372960 }, { "epoch": 0.7534229972082725, "grad_norm": 485.27960205078125, "learning_rate": 1.8406650759504667e-06, "loss": 14.0963, "step": 372970 }, { "epoch": 0.7534431978409564, "grad_norm": 153.61788940429688, "learning_rate": 1.8403945307922526e-06, "loss": 15.6879, "step": 372980 }, { "epoch": 0.7534633984736402, "grad_norm": 505.7862854003906, "learning_rate": 1.8401240010335725e-06, "loss": 18.2478, "step": 372990 }, { "epoch": 0.753483599106324, "grad_norm": 241.792724609375, "learning_rate": 1.8398534866757455e-06, "loss": 17.9446, "step": 373000 }, { "epoch": 0.7535037997390078, "grad_norm": 138.29689025878906, "learning_rate": 1.8395829877200904e-06, "loss": 15.2204, "step": 373010 }, { "epoch": 0.7535240003716916, "grad_norm": 298.5768737792969, "learning_rate": 1.8393125041679221e-06, "loss": 13.5114, "step": 373020 }, { "epoch": 0.7535442010043755, "grad_norm": 274.4516296386719, "learning_rate": 1.8390420360205635e-06, "loss": 13.0657, "step": 373030 }, { "epoch": 0.7535644016370593, "grad_norm": 156.4977569580078, "learning_rate": 1.83877158327933e-06, "loss": 6.8421, "step": 373040 }, { "epoch": 0.7535846022697431, "grad_norm": 499.7320251464844, "learning_rate": 1.8385011459455394e-06, "loss": 17.7813, "step": 373050 }, { "epoch": 0.7536048029024269, "grad_norm": 181.1956024169922, "learning_rate": 1.83823072402051e-06, "loss": 17.6532, "step": 373060 }, { "epoch": 0.7536250035351107, "grad_norm": 649.1834716796875, "learning_rate": 1.8379603175055628e-06, "loss": 19.729, "step": 373070 }, { "epoch": 0.7536452041677946, "grad_norm": 7.706061363220215, "learning_rate": 1.8376899264020138e-06, "loss": 36.0864, "step": 373080 }, { "epoch": 0.7536654048004784, "grad_norm": 429.47576904296875, "learning_rate": 1.8374195507111781e-06, "loss": 18.2878, "step": 373090 }, { "epoch": 0.7536856054331622, "grad_norm": 216.8671417236328, "learning_rate": 1.837149190434378e-06, "loss": 17.9459, "step": 373100 }, { "epoch": 0.753705806065846, "grad_norm": 419.2262268066406, "learning_rate": 1.8368788455729292e-06, "loss": 24.6017, "step": 373110 }, { "epoch": 0.7537260066985298, "grad_norm": 467.5675964355469, "learning_rate": 1.8366085161281477e-06, "loss": 17.2489, "step": 373120 }, { "epoch": 0.7537462073312137, "grad_norm": 432.9957580566406, "learning_rate": 1.8363382021013536e-06, "loss": 12.2874, "step": 373130 }, { "epoch": 0.7537664079638974, "grad_norm": 2.057077646255493, "learning_rate": 1.8360679034938628e-06, "loss": 22.0209, "step": 373140 }, { "epoch": 0.7537866085965812, "grad_norm": 250.20404052734375, "learning_rate": 1.8357976203069943e-06, "loss": 15.5268, "step": 373150 }, { "epoch": 0.753806809229265, "grad_norm": 522.334228515625, "learning_rate": 1.8355273525420642e-06, "loss": 26.7327, "step": 373160 }, { "epoch": 0.7538270098619488, "grad_norm": 598.1461181640625, "learning_rate": 1.8352571002003888e-06, "loss": 24.1075, "step": 373170 }, { "epoch": 0.7538472104946327, "grad_norm": 403.48553466796875, "learning_rate": 1.834986863283288e-06, "loss": 11.0467, "step": 373180 }, { "epoch": 0.7538674111273165, "grad_norm": 58.01956558227539, "learning_rate": 1.8347166417920776e-06, "loss": 19.0689, "step": 373190 }, { "epoch": 0.7538876117600003, "grad_norm": 241.477783203125, "learning_rate": 1.8344464357280722e-06, "loss": 13.3962, "step": 373200 }, { "epoch": 0.7539078123926841, "grad_norm": 244.2522735595703, "learning_rate": 1.834176245092591e-06, "loss": 18.2786, "step": 373210 }, { "epoch": 0.7539280130253679, "grad_norm": 190.03317260742188, "learning_rate": 1.8339060698869526e-06, "loss": 23.9354, "step": 373220 }, { "epoch": 0.7539482136580518, "grad_norm": 234.9008331298828, "learning_rate": 1.8336359101124724e-06, "loss": 8.9695, "step": 373230 }, { "epoch": 0.7539684142907356, "grad_norm": 328.0928955078125, "learning_rate": 1.8333657657704645e-06, "loss": 13.3538, "step": 373240 }, { "epoch": 0.7539886149234194, "grad_norm": 94.23754119873047, "learning_rate": 1.8330956368622498e-06, "loss": 19.1921, "step": 373250 }, { "epoch": 0.7540088155561032, "grad_norm": 384.3306884765625, "learning_rate": 1.8328255233891428e-06, "loss": 12.9553, "step": 373260 }, { "epoch": 0.754029016188787, "grad_norm": 299.4577941894531, "learning_rate": 1.8325554253524585e-06, "loss": 16.6405, "step": 373270 }, { "epoch": 0.7540492168214709, "grad_norm": 288.4255065917969, "learning_rate": 1.8322853427535148e-06, "loss": 12.3962, "step": 373280 }, { "epoch": 0.7540694174541547, "grad_norm": 148.76492309570312, "learning_rate": 1.832015275593631e-06, "loss": 8.5253, "step": 373290 }, { "epoch": 0.7540896180868385, "grad_norm": 141.30752563476562, "learning_rate": 1.831745223874118e-06, "loss": 16.5222, "step": 373300 }, { "epoch": 0.7541098187195223, "grad_norm": 859.2390747070312, "learning_rate": 1.8314751875962939e-06, "loss": 33.0018, "step": 373310 }, { "epoch": 0.7541300193522061, "grad_norm": 554.9686889648438, "learning_rate": 1.8312051667614772e-06, "loss": 22.7421, "step": 373320 }, { "epoch": 0.75415021998489, "grad_norm": 324.8944396972656, "learning_rate": 1.8309351613709825e-06, "loss": 24.7885, "step": 373330 }, { "epoch": 0.7541704206175738, "grad_norm": 107.12764739990234, "learning_rate": 1.8306651714261237e-06, "loss": 25.086, "step": 373340 }, { "epoch": 0.7541906212502576, "grad_norm": 315.23779296875, "learning_rate": 1.8303951969282202e-06, "loss": 17.5487, "step": 373350 }, { "epoch": 0.7542108218829414, "grad_norm": 491.9957580566406, "learning_rate": 1.8301252378785856e-06, "loss": 14.9946, "step": 373360 }, { "epoch": 0.7542310225156252, "grad_norm": 258.3334045410156, "learning_rate": 1.8298552942785352e-06, "loss": 11.8274, "step": 373370 }, { "epoch": 0.7542512231483091, "grad_norm": 200.33392333984375, "learning_rate": 1.829585366129385e-06, "loss": 13.7411, "step": 373380 }, { "epoch": 0.7542714237809928, "grad_norm": 289.10894775390625, "learning_rate": 1.8293154534324531e-06, "loss": 18.3791, "step": 373390 }, { "epoch": 0.7542916244136766, "grad_norm": 77.41669464111328, "learning_rate": 1.829045556189053e-06, "loss": 25.9937, "step": 373400 }, { "epoch": 0.7543118250463604, "grad_norm": 339.200439453125, "learning_rate": 1.8287756744004986e-06, "loss": 41.959, "step": 373410 }, { "epoch": 0.7543320256790442, "grad_norm": 161.6198272705078, "learning_rate": 1.828505808068109e-06, "loss": 9.6083, "step": 373420 }, { "epoch": 0.754352226311728, "grad_norm": 110.94188690185547, "learning_rate": 1.8282359571931968e-06, "loss": 8.6879, "step": 373430 }, { "epoch": 0.7543724269444119, "grad_norm": 144.35133361816406, "learning_rate": 1.8279661217770766e-06, "loss": 12.8273, "step": 373440 }, { "epoch": 0.7543926275770957, "grad_norm": 193.38316345214844, "learning_rate": 1.8276963018210664e-06, "loss": 15.1566, "step": 373450 }, { "epoch": 0.7544128282097795, "grad_norm": 62.55465316772461, "learning_rate": 1.8274264973264782e-06, "loss": 20.1919, "step": 373460 }, { "epoch": 0.7544330288424633, "grad_norm": 279.06134033203125, "learning_rate": 1.8271567082946302e-06, "loss": 25.8124, "step": 373470 }, { "epoch": 0.7544532294751471, "grad_norm": 243.78436279296875, "learning_rate": 1.8268869347268348e-06, "loss": 14.8915, "step": 373480 }, { "epoch": 0.754473430107831, "grad_norm": 559.3649291992188, "learning_rate": 1.8266171766244067e-06, "loss": 22.5594, "step": 373490 }, { "epoch": 0.7544936307405148, "grad_norm": 535.0740966796875, "learning_rate": 1.8263474339886628e-06, "loss": 15.0088, "step": 373500 }, { "epoch": 0.7545138313731986, "grad_norm": 15.133732795715332, "learning_rate": 1.8260777068209168e-06, "loss": 9.1016, "step": 373510 }, { "epoch": 0.7545340320058824, "grad_norm": 3.440117597579956, "learning_rate": 1.8258079951224816e-06, "loss": 12.6074, "step": 373520 }, { "epoch": 0.7545542326385662, "grad_norm": 27.929208755493164, "learning_rate": 1.825538298894673e-06, "loss": 25.3468, "step": 373530 }, { "epoch": 0.7545744332712501, "grad_norm": 467.04339599609375, "learning_rate": 1.825268618138808e-06, "loss": 18.8338, "step": 373540 }, { "epoch": 0.7545946339039339, "grad_norm": 276.5616149902344, "learning_rate": 1.824998952856198e-06, "loss": 20.5404, "step": 373550 }, { "epoch": 0.7546148345366177, "grad_norm": 365.4840087890625, "learning_rate": 1.8247293030481568e-06, "loss": 21.6105, "step": 373560 }, { "epoch": 0.7546350351693015, "grad_norm": 345.1330871582031, "learning_rate": 1.824459668716001e-06, "loss": 25.0468, "step": 373570 }, { "epoch": 0.7546552358019853, "grad_norm": 375.4909362792969, "learning_rate": 1.8241900498610438e-06, "loss": 17.7669, "step": 373580 }, { "epoch": 0.7546754364346692, "grad_norm": 102.23528289794922, "learning_rate": 1.8239204464845978e-06, "loss": 14.519, "step": 373590 }, { "epoch": 0.754695637067353, "grad_norm": 30.963123321533203, "learning_rate": 1.8236508585879781e-06, "loss": 20.3173, "step": 373600 }, { "epoch": 0.7547158377000368, "grad_norm": 416.2012634277344, "learning_rate": 1.8233812861725002e-06, "loss": 12.4123, "step": 373610 }, { "epoch": 0.7547360383327206, "grad_norm": 82.71387481689453, "learning_rate": 1.8231117292394772e-06, "loss": 18.7651, "step": 373620 }, { "epoch": 0.7547562389654044, "grad_norm": 68.5100326538086, "learning_rate": 1.8228421877902203e-06, "loss": 20.1089, "step": 373630 }, { "epoch": 0.7547764395980883, "grad_norm": 414.1241760253906, "learning_rate": 1.822572661826047e-06, "loss": 25.0694, "step": 373640 }, { "epoch": 0.754796640230772, "grad_norm": 145.6114501953125, "learning_rate": 1.8223031513482692e-06, "loss": 10.8576, "step": 373650 }, { "epoch": 0.7548168408634558, "grad_norm": 237.76441955566406, "learning_rate": 1.8220336563581986e-06, "loss": 16.7015, "step": 373660 }, { "epoch": 0.7548370414961396, "grad_norm": 374.1480712890625, "learning_rate": 1.821764176857151e-06, "loss": 24.5724, "step": 373670 }, { "epoch": 0.7548572421288234, "grad_norm": 154.25526428222656, "learning_rate": 1.8214947128464406e-06, "loss": 12.942, "step": 373680 }, { "epoch": 0.7548774427615073, "grad_norm": 424.0699157714844, "learning_rate": 1.8212252643273797e-06, "loss": 28.8731, "step": 373690 }, { "epoch": 0.7548976433941911, "grad_norm": 106.17951965332031, "learning_rate": 1.8209558313012792e-06, "loss": 23.8064, "step": 373700 }, { "epoch": 0.7549178440268749, "grad_norm": 328.5592041015625, "learning_rate": 1.8206864137694563e-06, "loss": 14.3259, "step": 373710 }, { "epoch": 0.7549380446595587, "grad_norm": 312.2573547363281, "learning_rate": 1.8204170117332226e-06, "loss": 19.5881, "step": 373720 }, { "epoch": 0.7549582452922425, "grad_norm": 186.91293334960938, "learning_rate": 1.8201476251938888e-06, "loss": 25.5465, "step": 373730 }, { "epoch": 0.7549784459249264, "grad_norm": 506.7359619140625, "learning_rate": 1.8198782541527715e-06, "loss": 12.9429, "step": 373740 }, { "epoch": 0.7549986465576102, "grad_norm": 335.74127197265625, "learning_rate": 1.8196088986111798e-06, "loss": 18.1327, "step": 373750 }, { "epoch": 0.755018847190294, "grad_norm": 201.4241180419922, "learning_rate": 1.819339558570431e-06, "loss": 7.8501, "step": 373760 }, { "epoch": 0.7550390478229778, "grad_norm": 105.55039978027344, "learning_rate": 1.819070234031835e-06, "loss": 13.2574, "step": 373770 }, { "epoch": 0.7550592484556616, "grad_norm": 24.654640197753906, "learning_rate": 1.818800924996703e-06, "loss": 18.5261, "step": 373780 }, { "epoch": 0.7550794490883455, "grad_norm": 214.51229858398438, "learning_rate": 1.8185316314663515e-06, "loss": 13.9128, "step": 373790 }, { "epoch": 0.7550996497210293, "grad_norm": 142.58901977539062, "learning_rate": 1.8182623534420906e-06, "loss": 23.0841, "step": 373800 }, { "epoch": 0.7551198503537131, "grad_norm": 238.1830291748047, "learning_rate": 1.817993090925232e-06, "loss": 8.8785, "step": 373810 }, { "epoch": 0.7551400509863969, "grad_norm": 483.6149597167969, "learning_rate": 1.8177238439170885e-06, "loss": 11.5263, "step": 373820 }, { "epoch": 0.7551602516190807, "grad_norm": 538.661376953125, "learning_rate": 1.8174546124189752e-06, "loss": 15.4403, "step": 373830 }, { "epoch": 0.7551804522517646, "grad_norm": 81.05498504638672, "learning_rate": 1.8171853964322016e-06, "loss": 10.0287, "step": 373840 }, { "epoch": 0.7552006528844484, "grad_norm": 343.8782043457031, "learning_rate": 1.8169161959580795e-06, "loss": 16.4323, "step": 373850 }, { "epoch": 0.7552208535171322, "grad_norm": 275.1444396972656, "learning_rate": 1.816647010997923e-06, "loss": 18.8287, "step": 373860 }, { "epoch": 0.755241054149816, "grad_norm": 250.4497833251953, "learning_rate": 1.8163778415530425e-06, "loss": 22.489, "step": 373870 }, { "epoch": 0.7552612547824998, "grad_norm": 123.08308410644531, "learning_rate": 1.8161086876247492e-06, "loss": 29.0341, "step": 373880 }, { "epoch": 0.7552814554151837, "grad_norm": 251.13031005859375, "learning_rate": 1.8158395492143555e-06, "loss": 18.4822, "step": 373890 }, { "epoch": 0.7553016560478674, "grad_norm": 583.1220092773438, "learning_rate": 1.8155704263231777e-06, "loss": 13.928, "step": 373900 }, { "epoch": 0.7553218566805512, "grad_norm": 51.52424240112305, "learning_rate": 1.8153013189525192e-06, "loss": 26.4437, "step": 373910 }, { "epoch": 0.755342057313235, "grad_norm": 385.1058654785156, "learning_rate": 1.8150322271036962e-06, "loss": 39.8106, "step": 373920 }, { "epoch": 0.7553622579459188, "grad_norm": 430.46954345703125, "learning_rate": 1.8147631507780217e-06, "loss": 21.9112, "step": 373930 }, { "epoch": 0.7553824585786026, "grad_norm": 76.53976440429688, "learning_rate": 1.814494089976805e-06, "loss": 18.8283, "step": 373940 }, { "epoch": 0.7554026592112865, "grad_norm": 329.3174133300781, "learning_rate": 1.8142250447013566e-06, "loss": 18.5861, "step": 373950 }, { "epoch": 0.7554228598439703, "grad_norm": 520.67431640625, "learning_rate": 1.81395601495299e-06, "loss": 15.9519, "step": 373960 }, { "epoch": 0.7554430604766541, "grad_norm": 241.17335510253906, "learning_rate": 1.8136870007330155e-06, "loss": 9.3005, "step": 373970 }, { "epoch": 0.7554632611093379, "grad_norm": 263.6846008300781, "learning_rate": 1.8134180020427423e-06, "loss": 33.4948, "step": 373980 }, { "epoch": 0.7554834617420217, "grad_norm": 1.3399641513824463, "learning_rate": 1.8131490188834837e-06, "loss": 8.156, "step": 373990 }, { "epoch": 0.7555036623747056, "grad_norm": 110.55274200439453, "learning_rate": 1.8128800512565514e-06, "loss": 10.2366, "step": 374000 }, { "epoch": 0.7555238630073894, "grad_norm": 300.9415283203125, "learning_rate": 1.8126110991632556e-06, "loss": 9.7988, "step": 374010 }, { "epoch": 0.7555440636400732, "grad_norm": 188.08917236328125, "learning_rate": 1.8123421626049048e-06, "loss": 13.2391, "step": 374020 }, { "epoch": 0.755564264272757, "grad_norm": 568.755615234375, "learning_rate": 1.8120732415828135e-06, "loss": 11.615, "step": 374030 }, { "epoch": 0.7555844649054408, "grad_norm": 198.98580932617188, "learning_rate": 1.8118043360982906e-06, "loss": 12.7262, "step": 374040 }, { "epoch": 0.7556046655381247, "grad_norm": 282.9316711425781, "learning_rate": 1.8115354461526453e-06, "loss": 28.8861, "step": 374050 }, { "epoch": 0.7556248661708085, "grad_norm": 280.8355407714844, "learning_rate": 1.8112665717471905e-06, "loss": 22.9243, "step": 374060 }, { "epoch": 0.7556450668034923, "grad_norm": 181.79820251464844, "learning_rate": 1.8109977128832346e-06, "loss": 27.814, "step": 374070 }, { "epoch": 0.7556652674361761, "grad_norm": 302.9494323730469, "learning_rate": 1.8107288695620905e-06, "loss": 19.5836, "step": 374080 }, { "epoch": 0.75568546806886, "grad_norm": 498.0571594238281, "learning_rate": 1.810460041785067e-06, "loss": 9.4701, "step": 374090 }, { "epoch": 0.7557056687015438, "grad_norm": 250.4716033935547, "learning_rate": 1.810191229553473e-06, "loss": 18.7385, "step": 374100 }, { "epoch": 0.7557258693342276, "grad_norm": 324.8961181640625, "learning_rate": 1.8099224328686216e-06, "loss": 29.2094, "step": 374110 }, { "epoch": 0.7557460699669114, "grad_norm": 371.6725158691406, "learning_rate": 1.8096536517318196e-06, "loss": 19.444, "step": 374120 }, { "epoch": 0.7557662705995952, "grad_norm": 279.1492919921875, "learning_rate": 1.8093848861443802e-06, "loss": 12.9051, "step": 374130 }, { "epoch": 0.755786471232279, "grad_norm": 305.0962829589844, "learning_rate": 1.809116136107611e-06, "loss": 18.2336, "step": 374140 }, { "epoch": 0.7558066718649629, "grad_norm": 441.7709655761719, "learning_rate": 1.8088474016228236e-06, "loss": 26.1891, "step": 374150 }, { "epoch": 0.7558268724976466, "grad_norm": 191.62747192382812, "learning_rate": 1.808578682691327e-06, "loss": 9.5756, "step": 374160 }, { "epoch": 0.7558470731303304, "grad_norm": 376.8204650878906, "learning_rate": 1.8083099793144299e-06, "loss": 34.832, "step": 374170 }, { "epoch": 0.7558672737630142, "grad_norm": 279.6910705566406, "learning_rate": 1.8080412914934436e-06, "loss": 11.1636, "step": 374180 }, { "epoch": 0.755887474395698, "grad_norm": 167.48773193359375, "learning_rate": 1.8077726192296774e-06, "loss": 8.536, "step": 374190 }, { "epoch": 0.7559076750283819, "grad_norm": 163.50146484375, "learning_rate": 1.807503962524439e-06, "loss": 12.0101, "step": 374200 }, { "epoch": 0.7559278756610657, "grad_norm": 201.55093383789062, "learning_rate": 1.8072353213790383e-06, "loss": 17.4611, "step": 374210 }, { "epoch": 0.7559480762937495, "grad_norm": 96.20030212402344, "learning_rate": 1.8069666957947873e-06, "loss": 13.6526, "step": 374220 }, { "epoch": 0.7559682769264333, "grad_norm": 56.61305236816406, "learning_rate": 1.8066980857729937e-06, "loss": 15.3879, "step": 374230 }, { "epoch": 0.7559884775591171, "grad_norm": 110.54608917236328, "learning_rate": 1.8064294913149645e-06, "loss": 13.8521, "step": 374240 }, { "epoch": 0.756008678191801, "grad_norm": 681.6128540039062, "learning_rate": 1.806160912422012e-06, "loss": 17.1136, "step": 374250 }, { "epoch": 0.7560288788244848, "grad_norm": 106.87359619140625, "learning_rate": 1.8058923490954443e-06, "loss": 10.3537, "step": 374260 }, { "epoch": 0.7560490794571686, "grad_norm": 210.40232849121094, "learning_rate": 1.8056238013365679e-06, "loss": 19.8252, "step": 374270 }, { "epoch": 0.7560692800898524, "grad_norm": 50.89204788208008, "learning_rate": 1.8053552691466936e-06, "loss": 15.3704, "step": 374280 }, { "epoch": 0.7560894807225362, "grad_norm": 567.3876342773438, "learning_rate": 1.805086752527132e-06, "loss": 14.6839, "step": 374290 }, { "epoch": 0.75610968135522, "grad_norm": 889.1785888671875, "learning_rate": 1.8048182514791901e-06, "loss": 25.6586, "step": 374300 }, { "epoch": 0.7561298819879039, "grad_norm": 7.536034107208252, "learning_rate": 1.804549766004175e-06, "loss": 11.0978, "step": 374310 }, { "epoch": 0.7561500826205877, "grad_norm": 188.3358612060547, "learning_rate": 1.8042812961033983e-06, "loss": 12.2811, "step": 374320 }, { "epoch": 0.7561702832532715, "grad_norm": 6.683176040649414, "learning_rate": 1.8040128417781672e-06, "loss": 16.8592, "step": 374330 }, { "epoch": 0.7561904838859553, "grad_norm": 129.92552185058594, "learning_rate": 1.8037444030297878e-06, "loss": 23.2297, "step": 374340 }, { "epoch": 0.7562106845186392, "grad_norm": 505.0289611816406, "learning_rate": 1.8034759798595724e-06, "loss": 18.0153, "step": 374350 }, { "epoch": 0.756230885151323, "grad_norm": 310.5516662597656, "learning_rate": 1.803207572268826e-06, "loss": 10.7811, "step": 374360 }, { "epoch": 0.7562510857840068, "grad_norm": 313.41949462890625, "learning_rate": 1.8029391802588598e-06, "loss": 16.531, "step": 374370 }, { "epoch": 0.7562712864166906, "grad_norm": 976.0877685546875, "learning_rate": 1.8026708038309797e-06, "loss": 17.1283, "step": 374380 }, { "epoch": 0.7562914870493744, "grad_norm": 357.8668212890625, "learning_rate": 1.8024024429864928e-06, "loss": 10.046, "step": 374390 }, { "epoch": 0.7563116876820583, "grad_norm": 193.85714721679688, "learning_rate": 1.8021340977267104e-06, "loss": 14.8637, "step": 374400 }, { "epoch": 0.756331888314742, "grad_norm": 678.7256469726562, "learning_rate": 1.801865768052939e-06, "loss": 18.9322, "step": 374410 }, { "epoch": 0.7563520889474258, "grad_norm": 356.74072265625, "learning_rate": 1.8015974539664839e-06, "loss": 19.5662, "step": 374420 }, { "epoch": 0.7563722895801096, "grad_norm": 302.2659912109375, "learning_rate": 1.8013291554686547e-06, "loss": 13.7546, "step": 374430 }, { "epoch": 0.7563924902127934, "grad_norm": 287.4959411621094, "learning_rate": 1.801060872560761e-06, "loss": 13.965, "step": 374440 }, { "epoch": 0.7564126908454772, "grad_norm": 85.29144287109375, "learning_rate": 1.800792605244109e-06, "loss": 13.7068, "step": 374450 }, { "epoch": 0.7564328914781611, "grad_norm": 191.5181884765625, "learning_rate": 1.8005243535200034e-06, "loss": 22.8049, "step": 374460 }, { "epoch": 0.7564530921108449, "grad_norm": 453.1621398925781, "learning_rate": 1.8002561173897564e-06, "loss": 12.7302, "step": 374470 }, { "epoch": 0.7564732927435287, "grad_norm": 6.109457015991211, "learning_rate": 1.7999878968546724e-06, "loss": 16.598, "step": 374480 }, { "epoch": 0.7564934933762125, "grad_norm": 352.456298828125, "learning_rate": 1.7997196919160582e-06, "loss": 14.3241, "step": 374490 }, { "epoch": 0.7565136940088963, "grad_norm": 63.585296630859375, "learning_rate": 1.799451502575222e-06, "loss": 11.3152, "step": 374500 }, { "epoch": 0.7565338946415802, "grad_norm": 674.2815551757812, "learning_rate": 1.7991833288334742e-06, "loss": 19.2635, "step": 374510 }, { "epoch": 0.756554095274264, "grad_norm": 70.12757110595703, "learning_rate": 1.7989151706921155e-06, "loss": 12.8808, "step": 374520 }, { "epoch": 0.7565742959069478, "grad_norm": 599.1892700195312, "learning_rate": 1.7986470281524555e-06, "loss": 18.8225, "step": 374530 }, { "epoch": 0.7565944965396316, "grad_norm": 423.82373046875, "learning_rate": 1.7983789012158037e-06, "loss": 15.8649, "step": 374540 }, { "epoch": 0.7566146971723154, "grad_norm": 305.4238586425781, "learning_rate": 1.798110789883465e-06, "loss": 18.1302, "step": 374550 }, { "epoch": 0.7566348978049993, "grad_norm": 13.181485176086426, "learning_rate": 1.7978426941567435e-06, "loss": 11.0098, "step": 374560 }, { "epoch": 0.7566550984376831, "grad_norm": 321.24053955078125, "learning_rate": 1.7975746140369505e-06, "loss": 13.813, "step": 374570 }, { "epoch": 0.7566752990703669, "grad_norm": 344.3838806152344, "learning_rate": 1.7973065495253905e-06, "loss": 20.7409, "step": 374580 }, { "epoch": 0.7566954997030507, "grad_norm": 227.4783172607422, "learning_rate": 1.7970385006233682e-06, "loss": 17.4998, "step": 374590 }, { "epoch": 0.7567157003357345, "grad_norm": 320.521728515625, "learning_rate": 1.7967704673321917e-06, "loss": 25.832, "step": 374600 }, { "epoch": 0.7567359009684184, "grad_norm": 149.49171447753906, "learning_rate": 1.796502449653169e-06, "loss": 10.3551, "step": 374610 }, { "epoch": 0.7567561016011022, "grad_norm": 194.81900024414062, "learning_rate": 1.7962344475876054e-06, "loss": 13.9497, "step": 374620 }, { "epoch": 0.756776302233786, "grad_norm": 131.8449249267578, "learning_rate": 1.7959664611368043e-06, "loss": 17.1921, "step": 374630 }, { "epoch": 0.7567965028664698, "grad_norm": 331.63104248046875, "learning_rate": 1.7956984903020757e-06, "loss": 23.7655, "step": 374640 }, { "epoch": 0.7568167034991536, "grad_norm": 494.4194641113281, "learning_rate": 1.7954305350847246e-06, "loss": 25.3602, "step": 374650 }, { "epoch": 0.7568369041318375, "grad_norm": 249.48912048339844, "learning_rate": 1.7951625954860541e-06, "loss": 18.3152, "step": 374660 }, { "epoch": 0.7568571047645212, "grad_norm": 406.9605712890625, "learning_rate": 1.7948946715073744e-06, "loss": 20.2428, "step": 374670 }, { "epoch": 0.756877305397205, "grad_norm": 0.0, "learning_rate": 1.7946267631499874e-06, "loss": 31.4678, "step": 374680 }, { "epoch": 0.7568975060298888, "grad_norm": 138.20411682128906, "learning_rate": 1.7943588704152033e-06, "loss": 18.0225, "step": 374690 }, { "epoch": 0.7569177066625726, "grad_norm": 220.34573364257812, "learning_rate": 1.7940909933043243e-06, "loss": 7.5423, "step": 374700 }, { "epoch": 0.7569379072952565, "grad_norm": 366.0596618652344, "learning_rate": 1.7938231318186555e-06, "loss": 19.0047, "step": 374710 }, { "epoch": 0.7569581079279403, "grad_norm": 0.0, "learning_rate": 1.7935552859595058e-06, "loss": 14.6056, "step": 374720 }, { "epoch": 0.7569783085606241, "grad_norm": 263.7019958496094, "learning_rate": 1.793287455728177e-06, "loss": 30.3345, "step": 374730 }, { "epoch": 0.7569985091933079, "grad_norm": 404.52197265625, "learning_rate": 1.7930196411259782e-06, "loss": 23.2876, "step": 374740 }, { "epoch": 0.7570187098259917, "grad_norm": 83.6988754272461, "learning_rate": 1.7927518421542106e-06, "loss": 17.9395, "step": 374750 }, { "epoch": 0.7570389104586756, "grad_norm": 309.87237548828125, "learning_rate": 1.7924840588141829e-06, "loss": 10.5663, "step": 374760 }, { "epoch": 0.7570591110913594, "grad_norm": 360.4497985839844, "learning_rate": 1.7922162911071993e-06, "loss": 18.2489, "step": 374770 }, { "epoch": 0.7570793117240432, "grad_norm": 391.6981201171875, "learning_rate": 1.7919485390345631e-06, "loss": 21.5628, "step": 374780 }, { "epoch": 0.757099512356727, "grad_norm": 254.58428955078125, "learning_rate": 1.791680802597582e-06, "loss": 12.6833, "step": 374790 }, { "epoch": 0.7571197129894108, "grad_norm": 337.30084228515625, "learning_rate": 1.7914130817975595e-06, "loss": 17.619, "step": 374800 }, { "epoch": 0.7571399136220947, "grad_norm": 626.58935546875, "learning_rate": 1.7911453766357983e-06, "loss": 27.7415, "step": 374810 }, { "epoch": 0.7571601142547785, "grad_norm": 28.492826461791992, "learning_rate": 1.7908776871136063e-06, "loss": 28.8566, "step": 374820 }, { "epoch": 0.7571803148874623, "grad_norm": 397.635009765625, "learning_rate": 1.7906100132322902e-06, "loss": 14.4296, "step": 374830 }, { "epoch": 0.7572005155201461, "grad_norm": 162.2833709716797, "learning_rate": 1.7903423549931482e-06, "loss": 15.9858, "step": 374840 }, { "epoch": 0.7572207161528299, "grad_norm": 92.31011962890625, "learning_rate": 1.7900747123974882e-06, "loss": 33.6762, "step": 374850 }, { "epoch": 0.7572409167855138, "grad_norm": 310.4580993652344, "learning_rate": 1.7898070854466165e-06, "loss": 14.9239, "step": 374860 }, { "epoch": 0.7572611174181976, "grad_norm": 332.7672119140625, "learning_rate": 1.7895394741418355e-06, "loss": 30.0263, "step": 374870 }, { "epoch": 0.7572813180508814, "grad_norm": 370.1589050292969, "learning_rate": 1.7892718784844482e-06, "loss": 14.64, "step": 374880 }, { "epoch": 0.7573015186835652, "grad_norm": 186.5875244140625, "learning_rate": 1.7890042984757605e-06, "loss": 28.4847, "step": 374890 }, { "epoch": 0.757321719316249, "grad_norm": 245.8651885986328, "learning_rate": 1.7887367341170781e-06, "loss": 14.0182, "step": 374900 }, { "epoch": 0.7573419199489329, "grad_norm": 200.75401306152344, "learning_rate": 1.788469185409703e-06, "loss": 21.3586, "step": 374910 }, { "epoch": 0.7573621205816167, "grad_norm": 81.17245483398438, "learning_rate": 1.7882016523549383e-06, "loss": 10.8858, "step": 374920 }, { "epoch": 0.7573823212143004, "grad_norm": 408.662841796875, "learning_rate": 1.7879341349540907e-06, "loss": 22.676, "step": 374930 }, { "epoch": 0.7574025218469842, "grad_norm": 187.87655639648438, "learning_rate": 1.787666633208463e-06, "loss": 13.6757, "step": 374940 }, { "epoch": 0.757422722479668, "grad_norm": 239.34779357910156, "learning_rate": 1.7873991471193563e-06, "loss": 32.8698, "step": 374950 }, { "epoch": 0.7574429231123518, "grad_norm": 222.33099365234375, "learning_rate": 1.787131676688078e-06, "loss": 26.5589, "step": 374960 }, { "epoch": 0.7574631237450357, "grad_norm": 1713.766357421875, "learning_rate": 1.7868642219159292e-06, "loss": 31.9687, "step": 374970 }, { "epoch": 0.7574833243777195, "grad_norm": 76.4247055053711, "learning_rate": 1.7865967828042158e-06, "loss": 16.209, "step": 374980 }, { "epoch": 0.7575035250104033, "grad_norm": 618.4375, "learning_rate": 1.7863293593542402e-06, "loss": 27.5389, "step": 374990 }, { "epoch": 0.7575237256430871, "grad_norm": 276.76641845703125, "learning_rate": 1.7860619515673034e-06, "loss": 15.757, "step": 375000 }, { "epoch": 0.7575439262757709, "grad_norm": 197.02761840820312, "learning_rate": 1.7857945594447128e-06, "loss": 18.2251, "step": 375010 }, { "epoch": 0.7575641269084548, "grad_norm": 175.77569580078125, "learning_rate": 1.7855271829877696e-06, "loss": 16.9077, "step": 375020 }, { "epoch": 0.7575843275411386, "grad_norm": 380.82940673828125, "learning_rate": 1.7852598221977757e-06, "loss": 23.3943, "step": 375030 }, { "epoch": 0.7576045281738224, "grad_norm": 1469.0279541015625, "learning_rate": 1.7849924770760352e-06, "loss": 17.4919, "step": 375040 }, { "epoch": 0.7576247288065062, "grad_norm": 283.28289794921875, "learning_rate": 1.784725147623853e-06, "loss": 12.1433, "step": 375050 }, { "epoch": 0.75764492943919, "grad_norm": 190.5680389404297, "learning_rate": 1.7844578338425306e-06, "loss": 15.3056, "step": 375060 }, { "epoch": 0.7576651300718739, "grad_norm": 281.5687255859375, "learning_rate": 1.7841905357333688e-06, "loss": 10.0437, "step": 375070 }, { "epoch": 0.7576853307045577, "grad_norm": 179.4819793701172, "learning_rate": 1.7839232532976746e-06, "loss": 21.4888, "step": 375080 }, { "epoch": 0.7577055313372415, "grad_norm": 157.9290313720703, "learning_rate": 1.783655986536748e-06, "loss": 15.1183, "step": 375090 }, { "epoch": 0.7577257319699253, "grad_norm": 149.65347290039062, "learning_rate": 1.7833887354518902e-06, "loss": 9.1619, "step": 375100 }, { "epoch": 0.7577459326026091, "grad_norm": 281.3508605957031, "learning_rate": 1.7831215000444057e-06, "loss": 9.0749, "step": 375110 }, { "epoch": 0.757766133235293, "grad_norm": 144.6361083984375, "learning_rate": 1.7828542803156007e-06, "loss": 17.7034, "step": 375120 }, { "epoch": 0.7577863338679768, "grad_norm": 229.33370971679688, "learning_rate": 1.7825870762667696e-06, "loss": 15.702, "step": 375130 }, { "epoch": 0.7578065345006606, "grad_norm": 276.37213134765625, "learning_rate": 1.7823198878992192e-06, "loss": 12.733, "step": 375140 }, { "epoch": 0.7578267351333444, "grad_norm": 268.3088684082031, "learning_rate": 1.7820527152142531e-06, "loss": 15.6442, "step": 375150 }, { "epoch": 0.7578469357660282, "grad_norm": 137.8107452392578, "learning_rate": 1.781785558213172e-06, "loss": 25.569, "step": 375160 }, { "epoch": 0.7578671363987121, "grad_norm": 184.7560272216797, "learning_rate": 1.781518416897276e-06, "loss": 22.1088, "step": 375170 }, { "epoch": 0.7578873370313958, "grad_norm": 447.39447021484375, "learning_rate": 1.7812512912678687e-06, "loss": 16.5513, "step": 375180 }, { "epoch": 0.7579075376640796, "grad_norm": 146.0526123046875, "learning_rate": 1.7809841813262558e-06, "loss": 10.8843, "step": 375190 }, { "epoch": 0.7579277382967634, "grad_norm": 147.344970703125, "learning_rate": 1.7807170870737317e-06, "loss": 14.4283, "step": 375200 }, { "epoch": 0.7579479389294472, "grad_norm": 325.8018493652344, "learning_rate": 1.7804500085116022e-06, "loss": 13.0072, "step": 375210 }, { "epoch": 0.757968139562131, "grad_norm": 246.00439453125, "learning_rate": 1.7801829456411713e-06, "loss": 10.7406, "step": 375220 }, { "epoch": 0.7579883401948149, "grad_norm": 101.23670959472656, "learning_rate": 1.7799158984637372e-06, "loss": 23.7406, "step": 375230 }, { "epoch": 0.7580085408274987, "grad_norm": 269.1505126953125, "learning_rate": 1.779648866980601e-06, "loss": 9.3598, "step": 375240 }, { "epoch": 0.7580287414601825, "grad_norm": 256.2615966796875, "learning_rate": 1.7793818511930678e-06, "loss": 25.4213, "step": 375250 }, { "epoch": 0.7580489420928663, "grad_norm": 716.2333984375, "learning_rate": 1.779114851102437e-06, "loss": 21.4151, "step": 375260 }, { "epoch": 0.7580691427255501, "grad_norm": 322.4092712402344, "learning_rate": 1.7788478667100074e-06, "loss": 12.0049, "step": 375270 }, { "epoch": 0.758089343358234, "grad_norm": 50.093421936035156, "learning_rate": 1.7785808980170848e-06, "loss": 11.4855, "step": 375280 }, { "epoch": 0.7581095439909178, "grad_norm": 220.8499755859375, "learning_rate": 1.7783139450249664e-06, "loss": 25.5153, "step": 375290 }, { "epoch": 0.7581297446236016, "grad_norm": 370.3611755371094, "learning_rate": 1.7780470077349566e-06, "loss": 20.9253, "step": 375300 }, { "epoch": 0.7581499452562854, "grad_norm": 323.7540588378906, "learning_rate": 1.7777800861483552e-06, "loss": 14.4636, "step": 375310 }, { "epoch": 0.7581701458889692, "grad_norm": 132.79600524902344, "learning_rate": 1.7775131802664608e-06, "loss": 16.2077, "step": 375320 }, { "epoch": 0.7581903465216531, "grad_norm": 717.122314453125, "learning_rate": 1.777246290090578e-06, "loss": 49.2829, "step": 375330 }, { "epoch": 0.7582105471543369, "grad_norm": 317.91229248046875, "learning_rate": 1.7769794156220043e-06, "loss": 16.2889, "step": 375340 }, { "epoch": 0.7582307477870207, "grad_norm": 110.70922088623047, "learning_rate": 1.7767125568620442e-06, "loss": 24.5653, "step": 375350 }, { "epoch": 0.7582509484197045, "grad_norm": 255.2703094482422, "learning_rate": 1.776445713811994e-06, "loss": 12.6978, "step": 375360 }, { "epoch": 0.7582711490523883, "grad_norm": 197.41647338867188, "learning_rate": 1.7761788864731582e-06, "loss": 21.1625, "step": 375370 }, { "epoch": 0.7582913496850722, "grad_norm": 370.6630859375, "learning_rate": 1.7759120748468356e-06, "loss": 21.5443, "step": 375380 }, { "epoch": 0.758311550317756, "grad_norm": 207.20758056640625, "learning_rate": 1.7756452789343243e-06, "loss": 26.2304, "step": 375390 }, { "epoch": 0.7583317509504398, "grad_norm": 289.76397705078125, "learning_rate": 1.7753784987369287e-06, "loss": 24.1166, "step": 375400 }, { "epoch": 0.7583519515831236, "grad_norm": 239.311279296875, "learning_rate": 1.7751117342559477e-06, "loss": 8.6624, "step": 375410 }, { "epoch": 0.7583721522158074, "grad_norm": 741.30224609375, "learning_rate": 1.7748449854926792e-06, "loss": 39.8336, "step": 375420 }, { "epoch": 0.7583923528484913, "grad_norm": 71.34526824951172, "learning_rate": 1.774578252448425e-06, "loss": 7.1148, "step": 375430 }, { "epoch": 0.758412553481175, "grad_norm": 112.40595245361328, "learning_rate": 1.7743115351244883e-06, "loss": 11.7249, "step": 375440 }, { "epoch": 0.7584327541138588, "grad_norm": 246.3319854736328, "learning_rate": 1.7740448335221628e-06, "loss": 9.7839, "step": 375450 }, { "epoch": 0.7584529547465426, "grad_norm": 235.85780334472656, "learning_rate": 1.7737781476427511e-06, "loss": 29.1932, "step": 375460 }, { "epoch": 0.7584731553792264, "grad_norm": 256.6685791015625, "learning_rate": 1.7735114774875556e-06, "loss": 10.0243, "step": 375470 }, { "epoch": 0.7584933560119103, "grad_norm": 7.884824275970459, "learning_rate": 1.7732448230578743e-06, "loss": 8.1411, "step": 375480 }, { "epoch": 0.7585135566445941, "grad_norm": 222.97828674316406, "learning_rate": 1.7729781843550036e-06, "loss": 13.432, "step": 375490 }, { "epoch": 0.7585337572772779, "grad_norm": 355.53155517578125, "learning_rate": 1.7727115613802465e-06, "loss": 24.0644, "step": 375500 }, { "epoch": 0.7585539579099617, "grad_norm": 59.354736328125, "learning_rate": 1.7724449541349048e-06, "loss": 15.9048, "step": 375510 }, { "epoch": 0.7585741585426455, "grad_norm": 460.7884521484375, "learning_rate": 1.772178362620272e-06, "loss": 23.2923, "step": 375520 }, { "epoch": 0.7585943591753294, "grad_norm": 956.916259765625, "learning_rate": 1.77191178683765e-06, "loss": 28.4029, "step": 375530 }, { "epoch": 0.7586145598080132, "grad_norm": 21.740720748901367, "learning_rate": 1.7716452267883404e-06, "loss": 29.0061, "step": 375540 }, { "epoch": 0.758634760440697, "grad_norm": 403.81658935546875, "learning_rate": 1.7713786824736406e-06, "loss": 22.9602, "step": 375550 }, { "epoch": 0.7586549610733808, "grad_norm": 266.36370849609375, "learning_rate": 1.7711121538948473e-06, "loss": 16.6714, "step": 375560 }, { "epoch": 0.7586751617060646, "grad_norm": 252.36807250976562, "learning_rate": 1.7708456410532637e-06, "loss": 21.789, "step": 375570 }, { "epoch": 0.7586953623387485, "grad_norm": 33.26327896118164, "learning_rate": 1.7705791439501851e-06, "loss": 12.3732, "step": 375580 }, { "epoch": 0.7587155629714323, "grad_norm": 171.20797729492188, "learning_rate": 1.7703126625869138e-06, "loss": 14.1504, "step": 375590 }, { "epoch": 0.7587357636041161, "grad_norm": 805.0087890625, "learning_rate": 1.770046196964747e-06, "loss": 31.2324, "step": 375600 }, { "epoch": 0.7587559642367999, "grad_norm": 351.720703125, "learning_rate": 1.769779747084981e-06, "loss": 19.382, "step": 375610 }, { "epoch": 0.7587761648694837, "grad_norm": 196.55337524414062, "learning_rate": 1.769513312948919e-06, "loss": 13.8089, "step": 375620 }, { "epoch": 0.7587963655021676, "grad_norm": 204.39578247070312, "learning_rate": 1.7692468945578572e-06, "loss": 18.6688, "step": 375630 }, { "epoch": 0.7588165661348514, "grad_norm": 163.14385986328125, "learning_rate": 1.768980491913092e-06, "loss": 13.7456, "step": 375640 }, { "epoch": 0.7588367667675352, "grad_norm": 213.82723999023438, "learning_rate": 1.7687141050159246e-06, "loss": 21.241, "step": 375650 }, { "epoch": 0.758856967400219, "grad_norm": 215.51187133789062, "learning_rate": 1.7684477338676543e-06, "loss": 14.9541, "step": 375660 }, { "epoch": 0.7588771680329028, "grad_norm": 136.1697998046875, "learning_rate": 1.7681813784695778e-06, "loss": 13.7415, "step": 375670 }, { "epoch": 0.7588973686655867, "grad_norm": 256.3163757324219, "learning_rate": 1.7679150388229916e-06, "loss": 12.4358, "step": 375680 }, { "epoch": 0.7589175692982704, "grad_norm": 291.9053039550781, "learning_rate": 1.7676487149291972e-06, "loss": 20.1447, "step": 375690 }, { "epoch": 0.7589377699309542, "grad_norm": 300.357666015625, "learning_rate": 1.7673824067894912e-06, "loss": 12.9809, "step": 375700 }, { "epoch": 0.758957970563638, "grad_norm": 361.86322021484375, "learning_rate": 1.767116114405169e-06, "loss": 13.7616, "step": 375710 }, { "epoch": 0.7589781711963218, "grad_norm": 318.9299011230469, "learning_rate": 1.7668498377775312e-06, "loss": 14.946, "step": 375720 }, { "epoch": 0.7589983718290056, "grad_norm": 266.9317626953125, "learning_rate": 1.7665835769078782e-06, "loss": 11.0619, "step": 375730 }, { "epoch": 0.7590185724616895, "grad_norm": 1.6954996585845947, "learning_rate": 1.7663173317975012e-06, "loss": 9.2286, "step": 375740 }, { "epoch": 0.7590387730943733, "grad_norm": 300.1250915527344, "learning_rate": 1.7660511024477018e-06, "loss": 26.9672, "step": 375750 }, { "epoch": 0.7590589737270571, "grad_norm": 276.9754333496094, "learning_rate": 1.765784888859779e-06, "loss": 15.4874, "step": 375760 }, { "epoch": 0.7590791743597409, "grad_norm": 298.92034912109375, "learning_rate": 1.7655186910350276e-06, "loss": 24.272, "step": 375770 }, { "epoch": 0.7590993749924247, "grad_norm": 346.47674560546875, "learning_rate": 1.7652525089747447e-06, "loss": 21.6291, "step": 375780 }, { "epoch": 0.7591195756251086, "grad_norm": 619.6890869140625, "learning_rate": 1.7649863426802283e-06, "loss": 28.0037, "step": 375790 }, { "epoch": 0.7591397762577924, "grad_norm": 292.9516906738281, "learning_rate": 1.7647201921527802e-06, "loss": 13.057, "step": 375800 }, { "epoch": 0.7591599768904762, "grad_norm": 162.71002197265625, "learning_rate": 1.7644540573936892e-06, "loss": 9.5756, "step": 375810 }, { "epoch": 0.75918017752316, "grad_norm": 222.08578491210938, "learning_rate": 1.7641879384042571e-06, "loss": 23.4257, "step": 375820 }, { "epoch": 0.7592003781558438, "grad_norm": 414.46905517578125, "learning_rate": 1.7639218351857824e-06, "loss": 26.2202, "step": 375830 }, { "epoch": 0.7592205787885277, "grad_norm": 332.1457824707031, "learning_rate": 1.76365574773956e-06, "loss": 23.3855, "step": 375840 }, { "epoch": 0.7592407794212115, "grad_norm": 291.902099609375, "learning_rate": 1.763389676066885e-06, "loss": 13.8941, "step": 375850 }, { "epoch": 0.7592609800538953, "grad_norm": 279.8938293457031, "learning_rate": 1.7631236201690583e-06, "loss": 10.2849, "step": 375860 }, { "epoch": 0.7592811806865791, "grad_norm": 230.0496826171875, "learning_rate": 1.7628575800473747e-06, "loss": 22.6142, "step": 375870 }, { "epoch": 0.759301381319263, "grad_norm": 390.0735778808594, "learning_rate": 1.7625915557031287e-06, "loss": 17.137, "step": 375880 }, { "epoch": 0.7593215819519468, "grad_norm": 225.2536163330078, "learning_rate": 1.7623255471376199e-06, "loss": 16.7773, "step": 375890 }, { "epoch": 0.7593417825846306, "grad_norm": 256.6319885253906, "learning_rate": 1.762059554352143e-06, "loss": 34.3456, "step": 375900 }, { "epoch": 0.7593619832173144, "grad_norm": 580.4307861328125, "learning_rate": 1.761793577347996e-06, "loss": 19.4304, "step": 375910 }, { "epoch": 0.7593821838499982, "grad_norm": 245.7985382080078, "learning_rate": 1.761527616126475e-06, "loss": 18.6137, "step": 375920 }, { "epoch": 0.759402384482682, "grad_norm": 197.40977478027344, "learning_rate": 1.7612616706888735e-06, "loss": 5.0816, "step": 375930 }, { "epoch": 0.7594225851153659, "grad_norm": 403.1015319824219, "learning_rate": 1.7609957410364913e-06, "loss": 16.5082, "step": 375940 }, { "epoch": 0.7594427857480496, "grad_norm": 181.65943908691406, "learning_rate": 1.760729827170622e-06, "loss": 16.1482, "step": 375950 }, { "epoch": 0.7594629863807334, "grad_norm": 300.30322265625, "learning_rate": 1.760463929092564e-06, "loss": 10.5627, "step": 375960 }, { "epoch": 0.7594831870134172, "grad_norm": 268.24530029296875, "learning_rate": 1.76019804680361e-06, "loss": 24.4457, "step": 375970 }, { "epoch": 0.759503387646101, "grad_norm": 270.9637451171875, "learning_rate": 1.7599321803050595e-06, "loss": 13.448, "step": 375980 }, { "epoch": 0.7595235882787849, "grad_norm": 283.8780822753906, "learning_rate": 1.7596663295982069e-06, "loss": 26.1105, "step": 375990 }, { "epoch": 0.7595437889114687, "grad_norm": 232.76173400878906, "learning_rate": 1.7594004946843458e-06, "loss": 12.4638, "step": 376000 }, { "epoch": 0.7595639895441525, "grad_norm": 7.890575408935547, "learning_rate": 1.7591346755647754e-06, "loss": 13.4442, "step": 376010 }, { "epoch": 0.7595841901768363, "grad_norm": 264.6801452636719, "learning_rate": 1.7588688722407892e-06, "loss": 20.0535, "step": 376020 }, { "epoch": 0.7596043908095201, "grad_norm": 617.1033935546875, "learning_rate": 1.7586030847136815e-06, "loss": 19.0903, "step": 376030 }, { "epoch": 0.759624591442204, "grad_norm": 447.5340881347656, "learning_rate": 1.7583373129847493e-06, "loss": 28.5174, "step": 376040 }, { "epoch": 0.7596447920748878, "grad_norm": 604.9071044921875, "learning_rate": 1.758071557055291e-06, "loss": 31.3851, "step": 376050 }, { "epoch": 0.7596649927075716, "grad_norm": 321.3642272949219, "learning_rate": 1.7578058169265954e-06, "loss": 31.0852, "step": 376060 }, { "epoch": 0.7596851933402554, "grad_norm": 32.48884963989258, "learning_rate": 1.7575400925999613e-06, "loss": 9.0862, "step": 376070 }, { "epoch": 0.7597053939729392, "grad_norm": 180.25103759765625, "learning_rate": 1.7572743840766854e-06, "loss": 20.1032, "step": 376080 }, { "epoch": 0.7597255946056231, "grad_norm": 166.23541259765625, "learning_rate": 1.7570086913580603e-06, "loss": 14.4864, "step": 376090 }, { "epoch": 0.7597457952383069, "grad_norm": 299.0395812988281, "learning_rate": 1.7567430144453801e-06, "loss": 13.7994, "step": 376100 }, { "epoch": 0.7597659958709907, "grad_norm": 249.75775146484375, "learning_rate": 1.756477353339941e-06, "loss": 14.9232, "step": 376110 }, { "epoch": 0.7597861965036745, "grad_norm": 508.8680114746094, "learning_rate": 1.7562117080430413e-06, "loss": 38.7722, "step": 376120 }, { "epoch": 0.7598063971363583, "grad_norm": 266.7574768066406, "learning_rate": 1.755946078555969e-06, "loss": 15.0039, "step": 376130 }, { "epoch": 0.7598265977690422, "grad_norm": 222.82427978515625, "learning_rate": 1.755680464880022e-06, "loss": 9.2024, "step": 376140 }, { "epoch": 0.759846798401726, "grad_norm": 421.0816650390625, "learning_rate": 1.7554148670164966e-06, "loss": 22.4237, "step": 376150 }, { "epoch": 0.7598669990344098, "grad_norm": 138.4772186279297, "learning_rate": 1.7551492849666857e-06, "loss": 13.9505, "step": 376160 }, { "epoch": 0.7598871996670936, "grad_norm": 318.8111572265625, "learning_rate": 1.7548837187318817e-06, "loss": 20.8391, "step": 376170 }, { "epoch": 0.7599074002997774, "grad_norm": 11.425552368164062, "learning_rate": 1.7546181683133829e-06, "loss": 17.3424, "step": 376180 }, { "epoch": 0.7599276009324613, "grad_norm": 303.99505615234375, "learning_rate": 1.7543526337124817e-06, "loss": 20.852, "step": 376190 }, { "epoch": 0.7599478015651451, "grad_norm": 243.3747100830078, "learning_rate": 1.75408711493047e-06, "loss": 16.8221, "step": 376200 }, { "epoch": 0.7599680021978288, "grad_norm": 288.9319763183594, "learning_rate": 1.7538216119686457e-06, "loss": 16.4526, "step": 376210 }, { "epoch": 0.7599882028305126, "grad_norm": 406.0584411621094, "learning_rate": 1.7535561248282994e-06, "loss": 16.2775, "step": 376220 }, { "epoch": 0.7600084034631964, "grad_norm": 336.5354919433594, "learning_rate": 1.7532906535107286e-06, "loss": 9.9216, "step": 376230 }, { "epoch": 0.7600286040958802, "grad_norm": 0.0, "learning_rate": 1.7530251980172241e-06, "loss": 25.1667, "step": 376240 }, { "epoch": 0.7600488047285641, "grad_norm": 268.2503967285156, "learning_rate": 1.7527597583490825e-06, "loss": 10.5594, "step": 376250 }, { "epoch": 0.7600690053612479, "grad_norm": 208.54385375976562, "learning_rate": 1.7524943345075957e-06, "loss": 10.8612, "step": 376260 }, { "epoch": 0.7600892059939317, "grad_norm": 79.3169937133789, "learning_rate": 1.7522289264940557e-06, "loss": 9.4932, "step": 376270 }, { "epoch": 0.7601094066266155, "grad_norm": 486.26654052734375, "learning_rate": 1.7519635343097601e-06, "loss": 24.4631, "step": 376280 }, { "epoch": 0.7601296072592993, "grad_norm": 348.9071350097656, "learning_rate": 1.7516981579559987e-06, "loss": 17.5926, "step": 376290 }, { "epoch": 0.7601498078919832, "grad_norm": 162.6879119873047, "learning_rate": 1.751432797434068e-06, "loss": 21.2327, "step": 376300 }, { "epoch": 0.760170008524667, "grad_norm": 274.6268005371094, "learning_rate": 1.75116745274526e-06, "loss": 22.1939, "step": 376310 }, { "epoch": 0.7601902091573508, "grad_norm": 135.724365234375, "learning_rate": 1.7509021238908659e-06, "loss": 12.8639, "step": 376320 }, { "epoch": 0.7602104097900346, "grad_norm": 137.8172607421875, "learning_rate": 1.750636810872181e-06, "loss": 7.1545, "step": 376330 }, { "epoch": 0.7602306104227184, "grad_norm": 422.0423889160156, "learning_rate": 1.7503715136905014e-06, "loss": 18.7146, "step": 376340 }, { "epoch": 0.7602508110554023, "grad_norm": 252.1905517578125, "learning_rate": 1.7501062323471136e-06, "loss": 11.5627, "step": 376350 }, { "epoch": 0.7602710116880861, "grad_norm": 267.8533020019531, "learning_rate": 1.7498409668433135e-06, "loss": 13.506, "step": 376360 }, { "epoch": 0.7602912123207699, "grad_norm": 317.3609924316406, "learning_rate": 1.7495757171803967e-06, "loss": 6.7984, "step": 376370 }, { "epoch": 0.7603114129534537, "grad_norm": 137.287353515625, "learning_rate": 1.749310483359653e-06, "loss": 26.3172, "step": 376380 }, { "epoch": 0.7603316135861375, "grad_norm": 268.3994140625, "learning_rate": 1.7490452653823747e-06, "loss": 17.5337, "step": 376390 }, { "epoch": 0.7603518142188214, "grad_norm": 196.5290985107422, "learning_rate": 1.7487800632498547e-06, "loss": 16.2643, "step": 376400 }, { "epoch": 0.7603720148515052, "grad_norm": 187.63275146484375, "learning_rate": 1.7485148769633903e-06, "loss": 21.4942, "step": 376410 }, { "epoch": 0.760392215484189, "grad_norm": 507.62957763671875, "learning_rate": 1.7482497065242665e-06, "loss": 17.6357, "step": 376420 }, { "epoch": 0.7604124161168728, "grad_norm": 0.0, "learning_rate": 1.7479845519337795e-06, "loss": 15.9316, "step": 376430 }, { "epoch": 0.7604326167495566, "grad_norm": 522.481201171875, "learning_rate": 1.7477194131932229e-06, "loss": 23.838, "step": 376440 }, { "epoch": 0.7604528173822405, "grad_norm": 305.57757568359375, "learning_rate": 1.747454290303887e-06, "loss": 21.1544, "step": 376450 }, { "epoch": 0.7604730180149242, "grad_norm": 196.60952758789062, "learning_rate": 1.747189183267063e-06, "loss": 16.611, "step": 376460 }, { "epoch": 0.760493218647608, "grad_norm": 247.35885620117188, "learning_rate": 1.7469240920840463e-06, "loss": 21.4635, "step": 376470 }, { "epoch": 0.7605134192802918, "grad_norm": 252.498046875, "learning_rate": 1.746659016756127e-06, "loss": 18.6444, "step": 376480 }, { "epoch": 0.7605336199129756, "grad_norm": 339.9057922363281, "learning_rate": 1.7463939572845951e-06, "loss": 12.1208, "step": 376490 }, { "epoch": 0.7605538205456595, "grad_norm": 183.49368286132812, "learning_rate": 1.746128913670746e-06, "loss": 12.6448, "step": 376500 }, { "epoch": 0.7605740211783433, "grad_norm": 168.50961303710938, "learning_rate": 1.7458638859158688e-06, "loss": 14.0209, "step": 376510 }, { "epoch": 0.7605942218110271, "grad_norm": 606.6734619140625, "learning_rate": 1.7455988740212576e-06, "loss": 23.4704, "step": 376520 }, { "epoch": 0.7606144224437109, "grad_norm": 129.78860473632812, "learning_rate": 1.7453338779882029e-06, "loss": 25.3711, "step": 376530 }, { "epoch": 0.7606346230763947, "grad_norm": 156.38787841796875, "learning_rate": 1.7450688978179947e-06, "loss": 20.3583, "step": 376540 }, { "epoch": 0.7606548237090786, "grad_norm": 599.2107543945312, "learning_rate": 1.7448039335119272e-06, "loss": 23.5007, "step": 376550 }, { "epoch": 0.7606750243417624, "grad_norm": 141.66009521484375, "learning_rate": 1.744538985071289e-06, "loss": 18.6486, "step": 376560 }, { "epoch": 0.7606952249744462, "grad_norm": 186.71266174316406, "learning_rate": 1.7442740524973744e-06, "loss": 20.6237, "step": 376570 }, { "epoch": 0.76071542560713, "grad_norm": 10.53161334991455, "learning_rate": 1.7440091357914718e-06, "loss": 19.5937, "step": 376580 }, { "epoch": 0.7607356262398138, "grad_norm": 534.8386840820312, "learning_rate": 1.7437442349548756e-06, "loss": 17.2655, "step": 376590 }, { "epoch": 0.7607558268724977, "grad_norm": 238.18035888671875, "learning_rate": 1.7434793499888746e-06, "loss": 14.3173, "step": 376600 }, { "epoch": 0.7607760275051815, "grad_norm": 434.36956787109375, "learning_rate": 1.7432144808947587e-06, "loss": 19.4673, "step": 376610 }, { "epoch": 0.7607962281378653, "grad_norm": 186.24063110351562, "learning_rate": 1.7429496276738223e-06, "loss": 16.0649, "step": 376620 }, { "epoch": 0.7608164287705491, "grad_norm": 295.795166015625, "learning_rate": 1.7426847903273547e-06, "loss": 26.1377, "step": 376630 }, { "epoch": 0.7608366294032329, "grad_norm": 317.76068115234375, "learning_rate": 1.742419968856644e-06, "loss": 18.2191, "step": 376640 }, { "epoch": 0.7608568300359168, "grad_norm": 204.006591796875, "learning_rate": 1.7421551632629835e-06, "loss": 14.8726, "step": 376650 }, { "epoch": 0.7608770306686006, "grad_norm": 171.38426208496094, "learning_rate": 1.7418903735476673e-06, "loss": 25.7304, "step": 376660 }, { "epoch": 0.7608972313012844, "grad_norm": 185.5011749267578, "learning_rate": 1.7416255997119786e-06, "loss": 15.8866, "step": 376670 }, { "epoch": 0.7609174319339682, "grad_norm": 167.08522033691406, "learning_rate": 1.7413608417572114e-06, "loss": 17.3357, "step": 376680 }, { "epoch": 0.760937632566652, "grad_norm": 405.77044677734375, "learning_rate": 1.7410960996846583e-06, "loss": 14.3021, "step": 376690 }, { "epoch": 0.7609578331993359, "grad_norm": 642.8429565429688, "learning_rate": 1.7408313734956074e-06, "loss": 29.6945, "step": 376700 }, { "epoch": 0.7609780338320197, "grad_norm": 53.92332458496094, "learning_rate": 1.7405666631913475e-06, "loss": 21.013, "step": 376710 }, { "epoch": 0.7609982344647034, "grad_norm": 240.93472290039062, "learning_rate": 1.7403019687731704e-06, "loss": 12.3693, "step": 376720 }, { "epoch": 0.7610184350973872, "grad_norm": 261.5607604980469, "learning_rate": 1.740037290242369e-06, "loss": 12.2696, "step": 376730 }, { "epoch": 0.761038635730071, "grad_norm": 297.2135009765625, "learning_rate": 1.7397726276002274e-06, "loss": 11.8733, "step": 376740 }, { "epoch": 0.7610588363627548, "grad_norm": 349.4959411621094, "learning_rate": 1.7395079808480386e-06, "loss": 27.6034, "step": 376750 }, { "epoch": 0.7610790369954387, "grad_norm": 51.7515983581543, "learning_rate": 1.7392433499870941e-06, "loss": 26.7227, "step": 376760 }, { "epoch": 0.7610992376281225, "grad_norm": 0.0, "learning_rate": 1.738978735018682e-06, "loss": 13.4829, "step": 376770 }, { "epoch": 0.7611194382608063, "grad_norm": 266.7490234375, "learning_rate": 1.7387141359440907e-06, "loss": 26.3471, "step": 376780 }, { "epoch": 0.7611396388934901, "grad_norm": 122.3057861328125, "learning_rate": 1.7384495527646127e-06, "loss": 19.405, "step": 376790 }, { "epoch": 0.7611598395261739, "grad_norm": 632.8466796875, "learning_rate": 1.738184985481536e-06, "loss": 17.1642, "step": 376800 }, { "epoch": 0.7611800401588578, "grad_norm": 447.6985168457031, "learning_rate": 1.7379204340961481e-06, "loss": 15.2456, "step": 376810 }, { "epoch": 0.7612002407915416, "grad_norm": 310.02996826171875, "learning_rate": 1.7376558986097424e-06, "loss": 19.9028, "step": 376820 }, { "epoch": 0.7612204414242254, "grad_norm": 185.0839080810547, "learning_rate": 1.737391379023604e-06, "loss": 12.8091, "step": 376830 }, { "epoch": 0.7612406420569092, "grad_norm": 417.41094970703125, "learning_rate": 1.7371268753390265e-06, "loss": 33.9448, "step": 376840 }, { "epoch": 0.761260842689593, "grad_norm": 121.45059204101562, "learning_rate": 1.7368623875572948e-06, "loss": 14.0206, "step": 376850 }, { "epoch": 0.7612810433222769, "grad_norm": 249.18222045898438, "learning_rate": 1.736597915679702e-06, "loss": 15.3184, "step": 376860 }, { "epoch": 0.7613012439549607, "grad_norm": 319.7296447753906, "learning_rate": 1.7363334597075354e-06, "loss": 29.378, "step": 376870 }, { "epoch": 0.7613214445876445, "grad_norm": 146.24925231933594, "learning_rate": 1.7360690196420816e-06, "loss": 12.8323, "step": 376880 }, { "epoch": 0.7613416452203283, "grad_norm": 274.88623046875, "learning_rate": 1.735804595484633e-06, "loss": 16.6813, "step": 376890 }, { "epoch": 0.7613618458530121, "grad_norm": 318.9274597167969, "learning_rate": 1.7355401872364759e-06, "loss": 22.8179, "step": 376900 }, { "epoch": 0.761382046485696, "grad_norm": 324.9254150390625, "learning_rate": 1.7352757948989012e-06, "loss": 33.1684, "step": 376910 }, { "epoch": 0.7614022471183798, "grad_norm": 57.93290328979492, "learning_rate": 1.7350114184731965e-06, "loss": 21.1865, "step": 376920 }, { "epoch": 0.7614224477510636, "grad_norm": 82.1806640625, "learning_rate": 1.7347470579606478e-06, "loss": 13.9466, "step": 376930 }, { "epoch": 0.7614426483837474, "grad_norm": 85.11298370361328, "learning_rate": 1.7344827133625487e-06, "loss": 12.8278, "step": 376940 }, { "epoch": 0.7614628490164312, "grad_norm": 0.0, "learning_rate": 1.734218384680184e-06, "loss": 19.9789, "step": 376950 }, { "epoch": 0.7614830496491151, "grad_norm": 2.6260697841644287, "learning_rate": 1.7339540719148417e-06, "loss": 18.0291, "step": 376960 }, { "epoch": 0.7615032502817988, "grad_norm": 205.03211975097656, "learning_rate": 1.7336897750678106e-06, "loss": 14.099, "step": 376970 }, { "epoch": 0.7615234509144826, "grad_norm": 513.7680053710938, "learning_rate": 1.733425494140381e-06, "loss": 13.708, "step": 376980 }, { "epoch": 0.7615436515471664, "grad_norm": 266.2865905761719, "learning_rate": 1.7331612291338402e-06, "loss": 19.0577, "step": 376990 }, { "epoch": 0.7615638521798502, "grad_norm": 273.6104736328125, "learning_rate": 1.7328969800494727e-06, "loss": 13.1958, "step": 377000 }, { "epoch": 0.761584052812534, "grad_norm": 44.56938171386719, "learning_rate": 1.7326327468885695e-06, "loss": 23.8411, "step": 377010 }, { "epoch": 0.7616042534452179, "grad_norm": 100.64534759521484, "learning_rate": 1.7323685296524212e-06, "loss": 17.8327, "step": 377020 }, { "epoch": 0.7616244540779017, "grad_norm": 426.18731689453125, "learning_rate": 1.7321043283423094e-06, "loss": 21.1919, "step": 377030 }, { "epoch": 0.7616446547105855, "grad_norm": 140.48846435546875, "learning_rate": 1.7318401429595244e-06, "loss": 15.9758, "step": 377040 }, { "epoch": 0.7616648553432693, "grad_norm": 0.0, "learning_rate": 1.7315759735053562e-06, "loss": 10.4788, "step": 377050 }, { "epoch": 0.7616850559759532, "grad_norm": 604.33251953125, "learning_rate": 1.7313118199810897e-06, "loss": 20.6444, "step": 377060 }, { "epoch": 0.761705256608637, "grad_norm": 33.24126052856445, "learning_rate": 1.7310476823880118e-06, "loss": 27.7321, "step": 377070 }, { "epoch": 0.7617254572413208, "grad_norm": 507.9164123535156, "learning_rate": 1.7307835607274125e-06, "loss": 22.2527, "step": 377080 }, { "epoch": 0.7617456578740046, "grad_norm": 30.18045997619629, "learning_rate": 1.7305194550005776e-06, "loss": 18.3452, "step": 377090 }, { "epoch": 0.7617658585066884, "grad_norm": 358.3895263671875, "learning_rate": 1.7302553652087927e-06, "loss": 9.0634, "step": 377100 }, { "epoch": 0.7617860591393723, "grad_norm": 248.94517517089844, "learning_rate": 1.7299912913533485e-06, "loss": 12.7289, "step": 377110 }, { "epoch": 0.7618062597720561, "grad_norm": 110.5439682006836, "learning_rate": 1.729727233435528e-06, "loss": 21.9172, "step": 377120 }, { "epoch": 0.7618264604047399, "grad_norm": 85.02391815185547, "learning_rate": 1.7294631914566222e-06, "loss": 19.1168, "step": 377130 }, { "epoch": 0.7618466610374237, "grad_norm": 423.2138671875, "learning_rate": 1.7291991654179163e-06, "loss": 19.5246, "step": 377140 }, { "epoch": 0.7618668616701075, "grad_norm": 1.7739111185073853, "learning_rate": 1.7289351553206952e-06, "loss": 17.1763, "step": 377150 }, { "epoch": 0.7618870623027914, "grad_norm": 180.4893798828125, "learning_rate": 1.7286711611662488e-06, "loss": 9.9872, "step": 377160 }, { "epoch": 0.7619072629354752, "grad_norm": 260.87060546875, "learning_rate": 1.7284071829558606e-06, "loss": 11.6142, "step": 377170 }, { "epoch": 0.761927463568159, "grad_norm": 580.7609252929688, "learning_rate": 1.7281432206908211e-06, "loss": 25.809, "step": 377180 }, { "epoch": 0.7619476642008428, "grad_norm": 339.8973388671875, "learning_rate": 1.7278792743724133e-06, "loss": 32.61, "step": 377190 }, { "epoch": 0.7619678648335266, "grad_norm": 532.9705810546875, "learning_rate": 1.727615344001926e-06, "loss": 13.8687, "step": 377200 }, { "epoch": 0.7619880654662105, "grad_norm": 249.15040588378906, "learning_rate": 1.7273514295806454e-06, "loss": 18.536, "step": 377210 }, { "epoch": 0.7620082660988943, "grad_norm": 52.58942794799805, "learning_rate": 1.727087531109855e-06, "loss": 17.5569, "step": 377220 }, { "epoch": 0.762028466731578, "grad_norm": 213.00343322753906, "learning_rate": 1.7268236485908446e-06, "loss": 21.495, "step": 377230 }, { "epoch": 0.7620486673642618, "grad_norm": 318.0677185058594, "learning_rate": 1.7265597820248987e-06, "loss": 14.9756, "step": 377240 }, { "epoch": 0.7620688679969456, "grad_norm": 203.6577911376953, "learning_rate": 1.7262959314133015e-06, "loss": 21.7776, "step": 377250 }, { "epoch": 0.7620890686296294, "grad_norm": 357.4606628417969, "learning_rate": 1.7260320967573413e-06, "loss": 17.5465, "step": 377260 }, { "epoch": 0.7621092692623133, "grad_norm": 31.14415168762207, "learning_rate": 1.7257682780583068e-06, "loss": 15.8608, "step": 377270 }, { "epoch": 0.7621294698949971, "grad_norm": 270.2769775390625, "learning_rate": 1.7255044753174778e-06, "loss": 25.0792, "step": 377280 }, { "epoch": 0.7621496705276809, "grad_norm": 353.939208984375, "learning_rate": 1.7252406885361416e-06, "loss": 16.4975, "step": 377290 }, { "epoch": 0.7621698711603647, "grad_norm": 494.85791015625, "learning_rate": 1.7249769177155879e-06, "loss": 28.8226, "step": 377300 }, { "epoch": 0.7621900717930485, "grad_norm": 450.192626953125, "learning_rate": 1.724713162857099e-06, "loss": 17.6465, "step": 377310 }, { "epoch": 0.7622102724257324, "grad_norm": 118.97035217285156, "learning_rate": 1.7244494239619592e-06, "loss": 16.6864, "step": 377320 }, { "epoch": 0.7622304730584162, "grad_norm": 413.91485595703125, "learning_rate": 1.7241857010314555e-06, "loss": 11.2181, "step": 377330 }, { "epoch": 0.7622506736911, "grad_norm": 257.5901794433594, "learning_rate": 1.7239219940668771e-06, "loss": 16.7916, "step": 377340 }, { "epoch": 0.7622708743237838, "grad_norm": 231.21031188964844, "learning_rate": 1.723658303069502e-06, "loss": 17.2143, "step": 377350 }, { "epoch": 0.7622910749564676, "grad_norm": 316.17950439453125, "learning_rate": 1.7233946280406193e-06, "loss": 22.3707, "step": 377360 }, { "epoch": 0.7623112755891515, "grad_norm": 521.2201538085938, "learning_rate": 1.723130968981515e-06, "loss": 24.8295, "step": 377370 }, { "epoch": 0.7623314762218353, "grad_norm": 255.21510314941406, "learning_rate": 1.722867325893473e-06, "loss": 25.3234, "step": 377380 }, { "epoch": 0.7623516768545191, "grad_norm": 352.3016052246094, "learning_rate": 1.7226036987777767e-06, "loss": 16.3131, "step": 377390 }, { "epoch": 0.7623718774872029, "grad_norm": 315.75140380859375, "learning_rate": 1.7223400876357144e-06, "loss": 22.054, "step": 377400 }, { "epoch": 0.7623920781198867, "grad_norm": 200.4274139404297, "learning_rate": 1.7220764924685685e-06, "loss": 18.0049, "step": 377410 }, { "epoch": 0.7624122787525706, "grad_norm": 310.86407470703125, "learning_rate": 1.7218129132776224e-06, "loss": 24.297, "step": 377420 }, { "epoch": 0.7624324793852544, "grad_norm": 20.094764709472656, "learning_rate": 1.7215493500641645e-06, "loss": 11.7011, "step": 377430 }, { "epoch": 0.7624526800179382, "grad_norm": 253.82220458984375, "learning_rate": 1.721285802829476e-06, "loss": 22.7156, "step": 377440 }, { "epoch": 0.762472880650622, "grad_norm": 205.05116271972656, "learning_rate": 1.7210222715748443e-06, "loss": 11.3778, "step": 377450 }, { "epoch": 0.7624930812833058, "grad_norm": 417.2034912109375, "learning_rate": 1.7207587563015505e-06, "loss": 31.2602, "step": 377460 }, { "epoch": 0.7625132819159897, "grad_norm": 57.275604248046875, "learning_rate": 1.720495257010883e-06, "loss": 13.6426, "step": 377470 }, { "epoch": 0.7625334825486734, "grad_norm": 305.15264892578125, "learning_rate": 1.7202317737041235e-06, "loss": 17.4992, "step": 377480 }, { "epoch": 0.7625536831813572, "grad_norm": 293.41455078125, "learning_rate": 1.7199683063825544e-06, "loss": 20.5391, "step": 377490 }, { "epoch": 0.762573883814041, "grad_norm": 2.5843496322631836, "learning_rate": 1.7197048550474643e-06, "loss": 11.6639, "step": 377500 }, { "epoch": 0.7625940844467248, "grad_norm": 213.95082092285156, "learning_rate": 1.719441419700133e-06, "loss": 25.751, "step": 377510 }, { "epoch": 0.7626142850794086, "grad_norm": 405.8974304199219, "learning_rate": 1.7191780003418485e-06, "loss": 24.2551, "step": 377520 }, { "epoch": 0.7626344857120925, "grad_norm": 196.18359375, "learning_rate": 1.7189145969738918e-06, "loss": 19.9062, "step": 377530 }, { "epoch": 0.7626546863447763, "grad_norm": 96.23590850830078, "learning_rate": 1.718651209597546e-06, "loss": 16.2782, "step": 377540 }, { "epoch": 0.7626748869774601, "grad_norm": 436.2441711425781, "learning_rate": 1.7183878382140978e-06, "loss": 13.0576, "step": 377550 }, { "epoch": 0.7626950876101439, "grad_norm": 100.75172424316406, "learning_rate": 1.7181244828248294e-06, "loss": 10.9535, "step": 377560 }, { "epoch": 0.7627152882428277, "grad_norm": 263.13409423828125, "learning_rate": 1.7178611434310221e-06, "loss": 30.3607, "step": 377570 }, { "epoch": 0.7627354888755116, "grad_norm": 3.924471616744995, "learning_rate": 1.7175978200339622e-06, "loss": 9.748, "step": 377580 }, { "epoch": 0.7627556895081954, "grad_norm": 103.77410125732422, "learning_rate": 1.7173345126349339e-06, "loss": 9.7136, "step": 377590 }, { "epoch": 0.7627758901408792, "grad_norm": 160.88894653320312, "learning_rate": 1.7170712212352187e-06, "loss": 12.8549, "step": 377600 }, { "epoch": 0.762796090773563, "grad_norm": 277.9986877441406, "learning_rate": 1.7168079458360987e-06, "loss": 27.1723, "step": 377610 }, { "epoch": 0.7628162914062468, "grad_norm": 215.54397583007812, "learning_rate": 1.7165446864388608e-06, "loss": 13.8129, "step": 377620 }, { "epoch": 0.7628364920389307, "grad_norm": 236.11624145507812, "learning_rate": 1.7162814430447849e-06, "loss": 15.6417, "step": 377630 }, { "epoch": 0.7628566926716145, "grad_norm": 579.3740844726562, "learning_rate": 1.7160182156551542e-06, "loss": 16.1685, "step": 377640 }, { "epoch": 0.7628768933042983, "grad_norm": 251.1966552734375, "learning_rate": 1.7157550042712517e-06, "loss": 11.3775, "step": 377650 }, { "epoch": 0.7628970939369821, "grad_norm": 150.9752655029297, "learning_rate": 1.7154918088943629e-06, "loss": 10.5738, "step": 377660 }, { "epoch": 0.762917294569666, "grad_norm": 268.9073791503906, "learning_rate": 1.7152286295257687e-06, "loss": 23.4936, "step": 377670 }, { "epoch": 0.7629374952023498, "grad_norm": 245.87823486328125, "learning_rate": 1.7149654661667503e-06, "loss": 14.052, "step": 377680 }, { "epoch": 0.7629576958350336, "grad_norm": 358.27557373046875, "learning_rate": 1.714702318818593e-06, "loss": 19.5729, "step": 377690 }, { "epoch": 0.7629778964677174, "grad_norm": 298.7748107910156, "learning_rate": 1.7144391874825784e-06, "loss": 14.4122, "step": 377700 }, { "epoch": 0.7629980971004012, "grad_norm": 54.16899490356445, "learning_rate": 1.714176072159987e-06, "loss": 31.6667, "step": 377710 }, { "epoch": 0.763018297733085, "grad_norm": 207.3931884765625, "learning_rate": 1.7139129728521048e-06, "loss": 15.3437, "step": 377720 }, { "epoch": 0.7630384983657689, "grad_norm": 269.1498107910156, "learning_rate": 1.7136498895602105e-06, "loss": 18.697, "step": 377730 }, { "epoch": 0.7630586989984526, "grad_norm": 245.47607421875, "learning_rate": 1.7133868222855893e-06, "loss": 15.9564, "step": 377740 }, { "epoch": 0.7630788996311364, "grad_norm": 291.18829345703125, "learning_rate": 1.7131237710295207e-06, "loss": 21.3592, "step": 377750 }, { "epoch": 0.7630991002638202, "grad_norm": 297.55462646484375, "learning_rate": 1.7128607357932903e-06, "loss": 19.4467, "step": 377760 }, { "epoch": 0.763119300896504, "grad_norm": 281.09521484375, "learning_rate": 1.7125977165781772e-06, "loss": 19.7366, "step": 377770 }, { "epoch": 0.7631395015291879, "grad_norm": 336.9539489746094, "learning_rate": 1.7123347133854628e-06, "loss": 18.7081, "step": 377780 }, { "epoch": 0.7631597021618717, "grad_norm": 287.53240966796875, "learning_rate": 1.7120717262164322e-06, "loss": 20.9795, "step": 377790 }, { "epoch": 0.7631799027945555, "grad_norm": 352.1684265136719, "learning_rate": 1.7118087550723633e-06, "loss": 14.3606, "step": 377800 }, { "epoch": 0.7632001034272393, "grad_norm": 179.2467041015625, "learning_rate": 1.711545799954541e-06, "loss": 27.5687, "step": 377810 }, { "epoch": 0.7632203040599231, "grad_norm": 655.4510498046875, "learning_rate": 1.7112828608642463e-06, "loss": 11.0703, "step": 377820 }, { "epoch": 0.763240504692607, "grad_norm": 187.80224609375, "learning_rate": 1.7110199378027581e-06, "loss": 21.5192, "step": 377830 }, { "epoch": 0.7632607053252908, "grad_norm": 500.19171142578125, "learning_rate": 1.7107570307713618e-06, "loss": 18.0183, "step": 377840 }, { "epoch": 0.7632809059579746, "grad_norm": 222.8728790283203, "learning_rate": 1.710494139771336e-06, "loss": 11.178, "step": 377850 }, { "epoch": 0.7633011065906584, "grad_norm": 195.7327423095703, "learning_rate": 1.7102312648039616e-06, "loss": 19.8605, "step": 377860 }, { "epoch": 0.7633213072233422, "grad_norm": 318.9140625, "learning_rate": 1.7099684058705212e-06, "loss": 21.2292, "step": 377870 }, { "epoch": 0.7633415078560261, "grad_norm": 308.5555725097656, "learning_rate": 1.7097055629722991e-06, "loss": 7.0309, "step": 377880 }, { "epoch": 0.7633617084887099, "grad_norm": 654.63525390625, "learning_rate": 1.7094427361105693e-06, "loss": 14.2262, "step": 377890 }, { "epoch": 0.7633819091213937, "grad_norm": 119.25172424316406, "learning_rate": 1.709179925286617e-06, "loss": 26.3689, "step": 377900 }, { "epoch": 0.7634021097540775, "grad_norm": 790.751708984375, "learning_rate": 1.7089171305017238e-06, "loss": 19.844, "step": 377910 }, { "epoch": 0.7634223103867613, "grad_norm": 210.7842254638672, "learning_rate": 1.7086543517571697e-06, "loss": 24.9508, "step": 377920 }, { "epoch": 0.7634425110194452, "grad_norm": 313.3032531738281, "learning_rate": 1.7083915890542329e-06, "loss": 23.9662, "step": 377930 }, { "epoch": 0.763462711652129, "grad_norm": 456.5846862792969, "learning_rate": 1.7081288423941967e-06, "loss": 34.8081, "step": 377940 }, { "epoch": 0.7634829122848128, "grad_norm": 9.538440704345703, "learning_rate": 1.7078661117783447e-06, "loss": 17.5508, "step": 377950 }, { "epoch": 0.7635031129174966, "grad_norm": 131.02572631835938, "learning_rate": 1.7076033972079503e-06, "loss": 15.5204, "step": 377960 }, { "epoch": 0.7635233135501804, "grad_norm": 141.56260681152344, "learning_rate": 1.7073406986842982e-06, "loss": 18.2791, "step": 377970 }, { "epoch": 0.7635435141828643, "grad_norm": 116.24472045898438, "learning_rate": 1.7070780162086691e-06, "loss": 19.3723, "step": 377980 }, { "epoch": 0.7635637148155481, "grad_norm": 277.434814453125, "learning_rate": 1.7068153497823431e-06, "loss": 17.2797, "step": 377990 }, { "epoch": 0.7635839154482318, "grad_norm": 293.9352722167969, "learning_rate": 1.7065526994065973e-06, "loss": 8.1209, "step": 378000 }, { "epoch": 0.7636041160809156, "grad_norm": 209.91897583007812, "learning_rate": 1.7062900650827163e-06, "loss": 13.0924, "step": 378010 }, { "epoch": 0.7636243167135994, "grad_norm": 332.0283203125, "learning_rate": 1.7060274468119782e-06, "loss": 29.5559, "step": 378020 }, { "epoch": 0.7636445173462832, "grad_norm": 234.50924682617188, "learning_rate": 1.7057648445956609e-06, "loss": 15.4109, "step": 378030 }, { "epoch": 0.7636647179789671, "grad_norm": 192.5923309326172, "learning_rate": 1.7055022584350477e-06, "loss": 12.3049, "step": 378040 }, { "epoch": 0.7636849186116509, "grad_norm": 324.85504150390625, "learning_rate": 1.7052396883314154e-06, "loss": 15.4046, "step": 378050 }, { "epoch": 0.7637051192443347, "grad_norm": 106.18983459472656, "learning_rate": 1.704977134286047e-06, "loss": 11.6515, "step": 378060 }, { "epoch": 0.7637253198770185, "grad_norm": 24.106670379638672, "learning_rate": 1.7047145963002187e-06, "loss": 15.3674, "step": 378070 }, { "epoch": 0.7637455205097023, "grad_norm": 537.938232421875, "learning_rate": 1.7044520743752135e-06, "loss": 21.5565, "step": 378080 }, { "epoch": 0.7637657211423862, "grad_norm": 446.2663879394531, "learning_rate": 1.7041895685123088e-06, "loss": 22.8365, "step": 378090 }, { "epoch": 0.76378592177507, "grad_norm": 580.9189453125, "learning_rate": 1.7039270787127832e-06, "loss": 25.869, "step": 378100 }, { "epoch": 0.7638061224077538, "grad_norm": 225.2794189453125, "learning_rate": 1.7036646049779188e-06, "loss": 23.4554, "step": 378110 }, { "epoch": 0.7638263230404376, "grad_norm": 227.18179321289062, "learning_rate": 1.7034021473089918e-06, "loss": 16.7018, "step": 378120 }, { "epoch": 0.7638465236731214, "grad_norm": 256.54718017578125, "learning_rate": 1.7031397057072846e-06, "loss": 14.3847, "step": 378130 }, { "epoch": 0.7638667243058053, "grad_norm": 150.58876037597656, "learning_rate": 1.702877280174074e-06, "loss": 12.2851, "step": 378140 }, { "epoch": 0.7638869249384891, "grad_norm": 225.750732421875, "learning_rate": 1.7026148707106388e-06, "loss": 14.4953, "step": 378150 }, { "epoch": 0.7639071255711729, "grad_norm": 501.5416564941406, "learning_rate": 1.7023524773182598e-06, "loss": 10.2479, "step": 378160 }, { "epoch": 0.7639273262038567, "grad_norm": 138.78663635253906, "learning_rate": 1.7020900999982154e-06, "loss": 9.0675, "step": 378170 }, { "epoch": 0.7639475268365405, "grad_norm": 336.83380126953125, "learning_rate": 1.7018277387517817e-06, "loss": 21.7502, "step": 378180 }, { "epoch": 0.7639677274692244, "grad_norm": 63.544334411621094, "learning_rate": 1.70156539358024e-06, "loss": 17.8616, "step": 378190 }, { "epoch": 0.7639879281019082, "grad_norm": 216.5476837158203, "learning_rate": 1.7013030644848698e-06, "loss": 16.5139, "step": 378200 }, { "epoch": 0.764008128734592, "grad_norm": 145.57095336914062, "learning_rate": 1.7010407514669485e-06, "loss": 30.002, "step": 378210 }, { "epoch": 0.7640283293672758, "grad_norm": 379.74481201171875, "learning_rate": 1.7007784545277528e-06, "loss": 12.7986, "step": 378220 }, { "epoch": 0.7640485299999596, "grad_norm": 207.50527954101562, "learning_rate": 1.700516173668565e-06, "loss": 17.6267, "step": 378230 }, { "epoch": 0.7640687306326435, "grad_norm": 439.6679992675781, "learning_rate": 1.7002539088906606e-06, "loss": 28.9579, "step": 378240 }, { "epoch": 0.7640889312653272, "grad_norm": 58.70814895629883, "learning_rate": 1.6999916601953165e-06, "loss": 22.6594, "step": 378250 }, { "epoch": 0.764109131898011, "grad_norm": 624.1655883789062, "learning_rate": 1.6997294275838134e-06, "loss": 33.432, "step": 378260 }, { "epoch": 0.7641293325306948, "grad_norm": 254.2341766357422, "learning_rate": 1.69946721105743e-06, "loss": 16.4452, "step": 378270 }, { "epoch": 0.7641495331633786, "grad_norm": 394.1383361816406, "learning_rate": 1.6992050106174435e-06, "loss": 16.3708, "step": 378280 }, { "epoch": 0.7641697337960625, "grad_norm": 468.5248718261719, "learning_rate": 1.6989428262651296e-06, "loss": 18.1938, "step": 378290 }, { "epoch": 0.7641899344287463, "grad_norm": 206.3083038330078, "learning_rate": 1.6986806580017695e-06, "loss": 13.2682, "step": 378300 }, { "epoch": 0.7642101350614301, "grad_norm": 573.8595581054688, "learning_rate": 1.6984185058286396e-06, "loss": 23.8704, "step": 378310 }, { "epoch": 0.7642303356941139, "grad_norm": 366.8370361328125, "learning_rate": 1.698156369747016e-06, "loss": 22.7604, "step": 378320 }, { "epoch": 0.7642505363267977, "grad_norm": 347.14697265625, "learning_rate": 1.6978942497581797e-06, "loss": 20.2802, "step": 378330 }, { "epoch": 0.7642707369594816, "grad_norm": 118.79061889648438, "learning_rate": 1.6976321458634036e-06, "loss": 13.7837, "step": 378340 }, { "epoch": 0.7642909375921654, "grad_norm": 299.0809631347656, "learning_rate": 1.6973700580639707e-06, "loss": 16.0792, "step": 378350 }, { "epoch": 0.7643111382248492, "grad_norm": 408.10986328125, "learning_rate": 1.6971079863611534e-06, "loss": 18.3005, "step": 378360 }, { "epoch": 0.764331338857533, "grad_norm": 445.2966613769531, "learning_rate": 1.6968459307562329e-06, "loss": 17.3684, "step": 378370 }, { "epoch": 0.7643515394902168, "grad_norm": 194.7529754638672, "learning_rate": 1.6965838912504845e-06, "loss": 16.9826, "step": 378380 }, { "epoch": 0.7643717401229007, "grad_norm": 18.0908145904541, "learning_rate": 1.6963218678451843e-06, "loss": 28.3402, "step": 378390 }, { "epoch": 0.7643919407555845, "grad_norm": 352.92767333984375, "learning_rate": 1.6960598605416117e-06, "loss": 13.0319, "step": 378400 }, { "epoch": 0.7644121413882683, "grad_norm": 136.8628387451172, "learning_rate": 1.6957978693410414e-06, "loss": 21.0314, "step": 378410 }, { "epoch": 0.7644323420209521, "grad_norm": 211.46861267089844, "learning_rate": 1.695535894244753e-06, "loss": 17.3546, "step": 378420 }, { "epoch": 0.7644525426536359, "grad_norm": 209.52064514160156, "learning_rate": 1.695273935254022e-06, "loss": 19.9528, "step": 378430 }, { "epoch": 0.7644727432863198, "grad_norm": 190.12261962890625, "learning_rate": 1.6950119923701235e-06, "loss": 19.382, "step": 378440 }, { "epoch": 0.7644929439190036, "grad_norm": 211.62258911132812, "learning_rate": 1.6947500655943373e-06, "loss": 19.3818, "step": 378450 }, { "epoch": 0.7645131445516874, "grad_norm": 344.5990905761719, "learning_rate": 1.6944881549279384e-06, "loss": 22.7856, "step": 378460 }, { "epoch": 0.7645333451843712, "grad_norm": 596.4730834960938, "learning_rate": 1.6942262603722015e-06, "loss": 26.3811, "step": 378470 }, { "epoch": 0.764553545817055, "grad_norm": 239.67735290527344, "learning_rate": 1.6939643819284051e-06, "loss": 12.175, "step": 378480 }, { "epoch": 0.7645737464497389, "grad_norm": 344.6586608886719, "learning_rate": 1.6937025195978286e-06, "loss": 15.6395, "step": 378490 }, { "epoch": 0.7645939470824227, "grad_norm": 424.4085693359375, "learning_rate": 1.6934406733817417e-06, "loss": 14.637, "step": 378500 }, { "epoch": 0.7646141477151064, "grad_norm": 242.608154296875, "learning_rate": 1.6931788432814233e-06, "loss": 24.3651, "step": 378510 }, { "epoch": 0.7646343483477902, "grad_norm": 193.21029663085938, "learning_rate": 1.6929170292981528e-06, "loss": 12.5357, "step": 378520 }, { "epoch": 0.764654548980474, "grad_norm": 217.15052795410156, "learning_rate": 1.692655231433203e-06, "loss": 16.7962, "step": 378530 }, { "epoch": 0.7646747496131578, "grad_norm": 259.7039489746094, "learning_rate": 1.6923934496878485e-06, "loss": 11.0987, "step": 378540 }, { "epoch": 0.7646949502458417, "grad_norm": 166.98068237304688, "learning_rate": 1.6921316840633678e-06, "loss": 15.2636, "step": 378550 }, { "epoch": 0.7647151508785255, "grad_norm": 742.8504638671875, "learning_rate": 1.6918699345610395e-06, "loss": 20.3359, "step": 378560 }, { "epoch": 0.7647353515112093, "grad_norm": 369.79046630859375, "learning_rate": 1.6916082011821322e-06, "loss": 19.7018, "step": 378570 }, { "epoch": 0.7647555521438931, "grad_norm": 452.14306640625, "learning_rate": 1.6913464839279254e-06, "loss": 18.3089, "step": 378580 }, { "epoch": 0.764775752776577, "grad_norm": 383.2402038574219, "learning_rate": 1.6910847827996961e-06, "loss": 16.9836, "step": 378590 }, { "epoch": 0.7647959534092608, "grad_norm": 441.5247802734375, "learning_rate": 1.6908230977987184e-06, "loss": 19.4648, "step": 378600 }, { "epoch": 0.7648161540419446, "grad_norm": 242.163330078125, "learning_rate": 1.6905614289262657e-06, "loss": 20.9766, "step": 378610 }, { "epoch": 0.7648363546746284, "grad_norm": 133.74148559570312, "learning_rate": 1.690299776183617e-06, "loss": 18.7445, "step": 378620 }, { "epoch": 0.7648565553073122, "grad_norm": 131.70932006835938, "learning_rate": 1.6900381395720455e-06, "loss": 13.617, "step": 378630 }, { "epoch": 0.764876755939996, "grad_norm": 179.88262939453125, "learning_rate": 1.6897765190928257e-06, "loss": 16.6061, "step": 378640 }, { "epoch": 0.7648969565726799, "grad_norm": 175.525390625, "learning_rate": 1.6895149147472344e-06, "loss": 15.0344, "step": 378650 }, { "epoch": 0.7649171572053637, "grad_norm": 385.87799072265625, "learning_rate": 1.6892533265365445e-06, "loss": 13.0535, "step": 378660 }, { "epoch": 0.7649373578380475, "grad_norm": 547.041748046875, "learning_rate": 1.6889917544620342e-06, "loss": 25.6159, "step": 378670 }, { "epoch": 0.7649575584707313, "grad_norm": 336.2063293457031, "learning_rate": 1.6887301985249754e-06, "loss": 15.9684, "step": 378680 }, { "epoch": 0.7649777591034151, "grad_norm": 154.34693908691406, "learning_rate": 1.6884686587266446e-06, "loss": 19.4223, "step": 378690 }, { "epoch": 0.764997959736099, "grad_norm": 482.7477111816406, "learning_rate": 1.6882071350683165e-06, "loss": 15.9186, "step": 378700 }, { "epoch": 0.7650181603687828, "grad_norm": 97.22772216796875, "learning_rate": 1.6879456275512634e-06, "loss": 12.1428, "step": 378710 }, { "epoch": 0.7650383610014666, "grad_norm": 327.8579406738281, "learning_rate": 1.6876841361767637e-06, "loss": 20.3037, "step": 378720 }, { "epoch": 0.7650585616341504, "grad_norm": 439.4616394042969, "learning_rate": 1.6874226609460875e-06, "loss": 16.7572, "step": 378730 }, { "epoch": 0.7650787622668342, "grad_norm": 238.30792236328125, "learning_rate": 1.6871612018605131e-06, "loss": 36.0123, "step": 378740 }, { "epoch": 0.7650989628995181, "grad_norm": 188.152099609375, "learning_rate": 1.6868997589213138e-06, "loss": 13.9213, "step": 378750 }, { "epoch": 0.7651191635322018, "grad_norm": 125.10443115234375, "learning_rate": 1.6866383321297614e-06, "loss": 19.5305, "step": 378760 }, { "epoch": 0.7651393641648856, "grad_norm": 275.4287414550781, "learning_rate": 1.6863769214871334e-06, "loss": 9.4257, "step": 378770 }, { "epoch": 0.7651595647975694, "grad_norm": 3.2733709812164307, "learning_rate": 1.6861155269947022e-06, "loss": 19.5712, "step": 378780 }, { "epoch": 0.7651797654302532, "grad_norm": 285.9407043457031, "learning_rate": 1.6858541486537406e-06, "loss": 20.378, "step": 378790 }, { "epoch": 0.765199966062937, "grad_norm": 562.6853637695312, "learning_rate": 1.6855927864655241e-06, "loss": 21.3048, "step": 378800 }, { "epoch": 0.7652201666956209, "grad_norm": 83.56769561767578, "learning_rate": 1.6853314404313275e-06, "loss": 6.7362, "step": 378810 }, { "epoch": 0.7652403673283047, "grad_norm": 389.07550048828125, "learning_rate": 1.6850701105524236e-06, "loss": 31.7969, "step": 378820 }, { "epoch": 0.7652605679609885, "grad_norm": 195.12115478515625, "learning_rate": 1.6848087968300848e-06, "loss": 15.1225, "step": 378830 }, { "epoch": 0.7652807685936723, "grad_norm": 42.15168380737305, "learning_rate": 1.684547499265587e-06, "loss": 19.0222, "step": 378840 }, { "epoch": 0.7653009692263562, "grad_norm": 343.1339111328125, "learning_rate": 1.6842862178602026e-06, "loss": 12.5293, "step": 378850 }, { "epoch": 0.76532116985904, "grad_norm": 13.194989204406738, "learning_rate": 1.6840249526152036e-06, "loss": 13.7244, "step": 378860 }, { "epoch": 0.7653413704917238, "grad_norm": 156.29598999023438, "learning_rate": 1.6837637035318643e-06, "loss": 14.0472, "step": 378870 }, { "epoch": 0.7653615711244076, "grad_norm": 354.23016357421875, "learning_rate": 1.6835024706114605e-06, "loss": 12.8773, "step": 378880 }, { "epoch": 0.7653817717570914, "grad_norm": 383.7392883300781, "learning_rate": 1.6832412538552634e-06, "loss": 12.014, "step": 378890 }, { "epoch": 0.7654019723897753, "grad_norm": 1173.26904296875, "learning_rate": 1.6829800532645447e-06, "loss": 6.5332, "step": 378900 }, { "epoch": 0.7654221730224591, "grad_norm": 501.18328857421875, "learning_rate": 1.6827188688405805e-06, "loss": 33.5097, "step": 378910 }, { "epoch": 0.7654423736551429, "grad_norm": 468.9473876953125, "learning_rate": 1.6824577005846421e-06, "loss": 15.7766, "step": 378920 }, { "epoch": 0.7654625742878267, "grad_norm": 80.15396881103516, "learning_rate": 1.6821965484980007e-06, "loss": 15.3227, "step": 378930 }, { "epoch": 0.7654827749205105, "grad_norm": 438.0061340332031, "learning_rate": 1.6819354125819327e-06, "loss": 19.5701, "step": 378940 }, { "epoch": 0.7655029755531944, "grad_norm": 340.5198669433594, "learning_rate": 1.6816742928377072e-06, "loss": 9.4255, "step": 378950 }, { "epoch": 0.7655231761858782, "grad_norm": 166.37611389160156, "learning_rate": 1.6814131892666009e-06, "loss": 14.8419, "step": 378960 }, { "epoch": 0.765543376818562, "grad_norm": 422.54791259765625, "learning_rate": 1.6811521018698824e-06, "loss": 27.3245, "step": 378970 }, { "epoch": 0.7655635774512458, "grad_norm": 209.76217651367188, "learning_rate": 1.6808910306488274e-06, "loss": 15.5651, "step": 378980 }, { "epoch": 0.7655837780839296, "grad_norm": 198.67791748046875, "learning_rate": 1.6806299756047068e-06, "loss": 14.8886, "step": 378990 }, { "epoch": 0.7656039787166135, "grad_norm": 0.0, "learning_rate": 1.680368936738792e-06, "loss": 6.2701, "step": 379000 }, { "epoch": 0.7656241793492973, "grad_norm": 629.2919921875, "learning_rate": 1.680107914052358e-06, "loss": 24.239, "step": 379010 }, { "epoch": 0.765644379981981, "grad_norm": 79.63924407958984, "learning_rate": 1.6798469075466734e-06, "loss": 12.0556, "step": 379020 }, { "epoch": 0.7656645806146648, "grad_norm": 1348.72021484375, "learning_rate": 1.6795859172230139e-06, "loss": 39.3359, "step": 379030 }, { "epoch": 0.7656847812473486, "grad_norm": 11.997395515441895, "learning_rate": 1.6793249430826502e-06, "loss": 22.5019, "step": 379040 }, { "epoch": 0.7657049818800324, "grad_norm": 108.5953140258789, "learning_rate": 1.679063985126852e-06, "loss": 15.7991, "step": 379050 }, { "epoch": 0.7657251825127163, "grad_norm": 347.10369873046875, "learning_rate": 1.6788030433568953e-06, "loss": 9.4605, "step": 379060 }, { "epoch": 0.7657453831454001, "grad_norm": 176.26849365234375, "learning_rate": 1.678542117774049e-06, "loss": 11.7956, "step": 379070 }, { "epoch": 0.7657655837780839, "grad_norm": 120.49628448486328, "learning_rate": 1.6782812083795846e-06, "loss": 34.2316, "step": 379080 }, { "epoch": 0.7657857844107677, "grad_norm": 178.52459716796875, "learning_rate": 1.6780203151747742e-06, "loss": 9.5936, "step": 379090 }, { "epoch": 0.7658059850434515, "grad_norm": 375.7460632324219, "learning_rate": 1.6777594381608936e-06, "loss": 25.6198, "step": 379100 }, { "epoch": 0.7658261856761354, "grad_norm": 300.37359619140625, "learning_rate": 1.6774985773392071e-06, "loss": 23.2889, "step": 379110 }, { "epoch": 0.7658463863088192, "grad_norm": 266.06365966796875, "learning_rate": 1.6772377327109896e-06, "loss": 16.5774, "step": 379120 }, { "epoch": 0.765866586941503, "grad_norm": 326.099853515625, "learning_rate": 1.6769769042775141e-06, "loss": 15.9844, "step": 379130 }, { "epoch": 0.7658867875741868, "grad_norm": 323.84234619140625, "learning_rate": 1.67671609204005e-06, "loss": 23.5014, "step": 379140 }, { "epoch": 0.7659069882068706, "grad_norm": 415.2989807128906, "learning_rate": 1.676455295999867e-06, "loss": 14.8495, "step": 379150 }, { "epoch": 0.7659271888395545, "grad_norm": 442.8310546875, "learning_rate": 1.6761945161582382e-06, "loss": 16.2715, "step": 379160 }, { "epoch": 0.7659473894722383, "grad_norm": 304.157958984375, "learning_rate": 1.675933752516437e-06, "loss": 25.4054, "step": 379170 }, { "epoch": 0.7659675901049221, "grad_norm": 187.2795867919922, "learning_rate": 1.675673005075728e-06, "loss": 18.0333, "step": 379180 }, { "epoch": 0.7659877907376059, "grad_norm": 289.3010559082031, "learning_rate": 1.6754122738373863e-06, "loss": 30.8316, "step": 379190 }, { "epoch": 0.7660079913702897, "grad_norm": 188.05067443847656, "learning_rate": 1.6751515588026828e-06, "loss": 18.3426, "step": 379200 }, { "epoch": 0.7660281920029736, "grad_norm": 309.1974182128906, "learning_rate": 1.674890859972888e-06, "loss": 16.0116, "step": 379210 }, { "epoch": 0.7660483926356574, "grad_norm": 833.0745849609375, "learning_rate": 1.6746301773492701e-06, "loss": 24.3192, "step": 379220 }, { "epoch": 0.7660685932683412, "grad_norm": 204.6585235595703, "learning_rate": 1.6743695109331027e-06, "loss": 13.3245, "step": 379230 }, { "epoch": 0.766088793901025, "grad_norm": 382.77587890625, "learning_rate": 1.6741088607256556e-06, "loss": 26.5977, "step": 379240 }, { "epoch": 0.7661089945337088, "grad_norm": 363.687744140625, "learning_rate": 1.6738482267281963e-06, "loss": 21.3599, "step": 379250 }, { "epoch": 0.7661291951663927, "grad_norm": 235.57240295410156, "learning_rate": 1.6735876089419973e-06, "loss": 15.8634, "step": 379260 }, { "epoch": 0.7661493957990765, "grad_norm": 118.54756164550781, "learning_rate": 1.6733270073683305e-06, "loss": 14.0711, "step": 379270 }, { "epoch": 0.7661695964317602, "grad_norm": 94.49728393554688, "learning_rate": 1.6730664220084647e-06, "loss": 15.7165, "step": 379280 }, { "epoch": 0.766189797064444, "grad_norm": 232.14781188964844, "learning_rate": 1.6728058528636682e-06, "loss": 17.7663, "step": 379290 }, { "epoch": 0.7662099976971278, "grad_norm": 250.5006561279297, "learning_rate": 1.6725452999352137e-06, "loss": 35.8369, "step": 379300 }, { "epoch": 0.7662301983298117, "grad_norm": 265.7820129394531, "learning_rate": 1.6722847632243699e-06, "loss": 12.1336, "step": 379310 }, { "epoch": 0.7662503989624955, "grad_norm": 124.18810272216797, "learning_rate": 1.6720242427324045e-06, "loss": 15.0164, "step": 379320 }, { "epoch": 0.7662705995951793, "grad_norm": 198.30926513671875, "learning_rate": 1.6717637384605917e-06, "loss": 12.827, "step": 379330 }, { "epoch": 0.7662908002278631, "grad_norm": 88.16915130615234, "learning_rate": 1.6715032504101968e-06, "loss": 9.2961, "step": 379340 }, { "epoch": 0.7663110008605469, "grad_norm": 88.98184967041016, "learning_rate": 1.671242778582493e-06, "loss": 20.3898, "step": 379350 }, { "epoch": 0.7663312014932308, "grad_norm": 169.90472412109375, "learning_rate": 1.670982322978748e-06, "loss": 9.889, "step": 379360 }, { "epoch": 0.7663514021259146, "grad_norm": 557.4299926757812, "learning_rate": 1.6707218836002298e-06, "loss": 18.4005, "step": 379370 }, { "epoch": 0.7663716027585984, "grad_norm": 300.2458190917969, "learning_rate": 1.6704614604482112e-06, "loss": 18.3542, "step": 379380 }, { "epoch": 0.7663918033912822, "grad_norm": 410.5574951171875, "learning_rate": 1.67020105352396e-06, "loss": 22.578, "step": 379390 }, { "epoch": 0.766412004023966, "grad_norm": 234.48724365234375, "learning_rate": 1.6699406628287423e-06, "loss": 13.5226, "step": 379400 }, { "epoch": 0.7664322046566499, "grad_norm": 216.70814514160156, "learning_rate": 1.6696802883638309e-06, "loss": 23.3061, "step": 379410 }, { "epoch": 0.7664524052893337, "grad_norm": 198.57115173339844, "learning_rate": 1.6694199301304947e-06, "loss": 10.6701, "step": 379420 }, { "epoch": 0.7664726059220175, "grad_norm": 244.79559326171875, "learning_rate": 1.6691595881300026e-06, "loss": 16.8247, "step": 379430 }, { "epoch": 0.7664928065547013, "grad_norm": 141.99124145507812, "learning_rate": 1.6688992623636201e-06, "loss": 24.7953, "step": 379440 }, { "epoch": 0.7665130071873851, "grad_norm": 218.17178344726562, "learning_rate": 1.6686389528326214e-06, "loss": 18.3299, "step": 379450 }, { "epoch": 0.766533207820069, "grad_norm": 167.5032196044922, "learning_rate": 1.6683786595382716e-06, "loss": 15.9843, "step": 379460 }, { "epoch": 0.7665534084527528, "grad_norm": 188.64894104003906, "learning_rate": 1.6681183824818387e-06, "loss": 7.4724, "step": 379470 }, { "epoch": 0.7665736090854366, "grad_norm": 352.69915771484375, "learning_rate": 1.6678581216645928e-06, "loss": 17.6078, "step": 379480 }, { "epoch": 0.7665938097181204, "grad_norm": 364.5897216796875, "learning_rate": 1.6675978770878055e-06, "loss": 19.1935, "step": 379490 }, { "epoch": 0.7666140103508042, "grad_norm": 151.12364196777344, "learning_rate": 1.6673376487527382e-06, "loss": 19.0768, "step": 379500 }, { "epoch": 0.766634210983488, "grad_norm": 207.0095977783203, "learning_rate": 1.6670774366606635e-06, "loss": 11.7365, "step": 379510 }, { "epoch": 0.7666544116161719, "grad_norm": 525.1183471679688, "learning_rate": 1.6668172408128509e-06, "loss": 16.5687, "step": 379520 }, { "epoch": 0.7666746122488556, "grad_norm": 212.12005615234375, "learning_rate": 1.6665570612105663e-06, "loss": 9.4346, "step": 379530 }, { "epoch": 0.7666948128815394, "grad_norm": 283.63836669921875, "learning_rate": 1.666296897855077e-06, "loss": 19.5487, "step": 379540 }, { "epoch": 0.7667150135142232, "grad_norm": 276.8514404296875, "learning_rate": 1.6660367507476539e-06, "loss": 19.7547, "step": 379550 }, { "epoch": 0.766735214146907, "grad_norm": 572.6661376953125, "learning_rate": 1.665776619889562e-06, "loss": 26.4335, "step": 379560 }, { "epoch": 0.7667554147795909, "grad_norm": 424.4990234375, "learning_rate": 1.6655165052820715e-06, "loss": 10.1693, "step": 379570 }, { "epoch": 0.7667756154122747, "grad_norm": 108.43693542480469, "learning_rate": 1.6652564069264476e-06, "loss": 9.6335, "step": 379580 }, { "epoch": 0.7667958160449585, "grad_norm": 99.37386322021484, "learning_rate": 1.6649963248239614e-06, "loss": 27.3893, "step": 379590 }, { "epoch": 0.7668160166776423, "grad_norm": 547.4306030273438, "learning_rate": 1.6647362589758787e-06, "loss": 27.5301, "step": 379600 }, { "epoch": 0.7668362173103261, "grad_norm": 126.62194061279297, "learning_rate": 1.6644762093834648e-06, "loss": 11.7068, "step": 379610 }, { "epoch": 0.76685641794301, "grad_norm": 290.7106018066406, "learning_rate": 1.6642161760479913e-06, "loss": 14.0254, "step": 379620 }, { "epoch": 0.7668766185756938, "grad_norm": 178.44374084472656, "learning_rate": 1.663956158970722e-06, "loss": 15.1706, "step": 379630 }, { "epoch": 0.7668968192083776, "grad_norm": 52.76251983642578, "learning_rate": 1.6636961581529277e-06, "loss": 17.1249, "step": 379640 }, { "epoch": 0.7669170198410614, "grad_norm": 599.8442993164062, "learning_rate": 1.6634361735958731e-06, "loss": 16.4345, "step": 379650 }, { "epoch": 0.7669372204737452, "grad_norm": 466.796142578125, "learning_rate": 1.6631762053008244e-06, "loss": 15.345, "step": 379660 }, { "epoch": 0.7669574211064291, "grad_norm": 484.4393310546875, "learning_rate": 1.6629162532690517e-06, "loss": 27.2989, "step": 379670 }, { "epoch": 0.7669776217391129, "grad_norm": 365.11407470703125, "learning_rate": 1.6626563175018207e-06, "loss": 10.8853, "step": 379680 }, { "epoch": 0.7669978223717967, "grad_norm": 306.56903076171875, "learning_rate": 1.6623963980003966e-06, "loss": 17.384, "step": 379690 }, { "epoch": 0.7670180230044805, "grad_norm": 99.13673400878906, "learning_rate": 1.6621364947660472e-06, "loss": 12.7832, "step": 379700 }, { "epoch": 0.7670382236371643, "grad_norm": 255.65431213378906, "learning_rate": 1.661876607800043e-06, "loss": 38.2307, "step": 379710 }, { "epoch": 0.7670584242698482, "grad_norm": 178.4351806640625, "learning_rate": 1.6616167371036446e-06, "loss": 8.3696, "step": 379720 }, { "epoch": 0.767078624902532, "grad_norm": 260.73431396484375, "learning_rate": 1.6613568826781208e-06, "loss": 22.6857, "step": 379730 }, { "epoch": 0.7670988255352158, "grad_norm": 135.3890838623047, "learning_rate": 1.6610970445247404e-06, "loss": 23.7011, "step": 379740 }, { "epoch": 0.7671190261678996, "grad_norm": 441.68548583984375, "learning_rate": 1.6608372226447678e-06, "loss": 18.7255, "step": 379750 }, { "epoch": 0.7671392268005834, "grad_norm": 198.85354614257812, "learning_rate": 1.6605774170394683e-06, "loss": 22.1617, "step": 379760 }, { "epoch": 0.7671594274332673, "grad_norm": 509.1815490722656, "learning_rate": 1.6603176277101095e-06, "loss": 25.1922, "step": 379770 }, { "epoch": 0.7671796280659511, "grad_norm": 131.404296875, "learning_rate": 1.6600578546579604e-06, "loss": 18.2558, "step": 379780 }, { "epoch": 0.7671998286986348, "grad_norm": 162.04257202148438, "learning_rate": 1.6597980978842814e-06, "loss": 23.3092, "step": 379790 }, { "epoch": 0.7672200293313186, "grad_norm": 76.0589370727539, "learning_rate": 1.6595383573903412e-06, "loss": 16.8713, "step": 379800 }, { "epoch": 0.7672402299640024, "grad_norm": 162.29991149902344, "learning_rate": 1.6592786331774075e-06, "loss": 17.9122, "step": 379810 }, { "epoch": 0.7672604305966862, "grad_norm": 361.5678405761719, "learning_rate": 1.6590189252467448e-06, "loss": 9.669, "step": 379820 }, { "epoch": 0.7672806312293701, "grad_norm": 296.17431640625, "learning_rate": 1.6587592335996167e-06, "loss": 13.0513, "step": 379830 }, { "epoch": 0.7673008318620539, "grad_norm": 236.4839324951172, "learning_rate": 1.6584995582372926e-06, "loss": 26.2146, "step": 379840 }, { "epoch": 0.7673210324947377, "grad_norm": 301.588623046875, "learning_rate": 1.658239899161036e-06, "loss": 9.3908, "step": 379850 }, { "epoch": 0.7673412331274215, "grad_norm": 448.1802673339844, "learning_rate": 1.6579802563721115e-06, "loss": 16.6969, "step": 379860 }, { "epoch": 0.7673614337601053, "grad_norm": 669.8584594726562, "learning_rate": 1.6577206298717862e-06, "loss": 31.2982, "step": 379870 }, { "epoch": 0.7673816343927892, "grad_norm": 30.565696716308594, "learning_rate": 1.657461019661326e-06, "loss": 24.3842, "step": 379880 }, { "epoch": 0.767401835025473, "grad_norm": 55.489994049072266, "learning_rate": 1.657201425741996e-06, "loss": 11.6404, "step": 379890 }, { "epoch": 0.7674220356581568, "grad_norm": 143.50486755371094, "learning_rate": 1.6569418481150596e-06, "loss": 13.2853, "step": 379900 }, { "epoch": 0.7674422362908406, "grad_norm": 230.04637145996094, "learning_rate": 1.656682286781784e-06, "loss": 32.4818, "step": 379910 }, { "epoch": 0.7674624369235244, "grad_norm": 224.27484130859375, "learning_rate": 1.6564227417434336e-06, "loss": 16.4922, "step": 379920 }, { "epoch": 0.7674826375562083, "grad_norm": 246.12545776367188, "learning_rate": 1.6561632130012716e-06, "loss": 16.4618, "step": 379930 }, { "epoch": 0.7675028381888921, "grad_norm": 109.74546813964844, "learning_rate": 1.6559037005565665e-06, "loss": 17.1524, "step": 379940 }, { "epoch": 0.7675230388215759, "grad_norm": 497.41741943359375, "learning_rate": 1.6556442044105797e-06, "loss": 17.2138, "step": 379950 }, { "epoch": 0.7675432394542597, "grad_norm": 287.6593322753906, "learning_rate": 1.6553847245645787e-06, "loss": 20.5766, "step": 379960 }, { "epoch": 0.7675634400869435, "grad_norm": 302.1579284667969, "learning_rate": 1.6551252610198266e-06, "loss": 20.316, "step": 379970 }, { "epoch": 0.7675836407196274, "grad_norm": 166.44244384765625, "learning_rate": 1.6548658137775868e-06, "loss": 24.2673, "step": 379980 }, { "epoch": 0.7676038413523112, "grad_norm": 266.0969543457031, "learning_rate": 1.6546063828391272e-06, "loss": 22.7109, "step": 379990 }, { "epoch": 0.767624041984995, "grad_norm": 307.35198974609375, "learning_rate": 1.6543469682057105e-06, "loss": 11.1322, "step": 380000 }, { "epoch": 0.7676442426176788, "grad_norm": 328.82080078125, "learning_rate": 1.654087569878599e-06, "loss": 22.5374, "step": 380010 }, { "epoch": 0.7676644432503626, "grad_norm": 275.94952392578125, "learning_rate": 1.653828187859059e-06, "loss": 14.1345, "step": 380020 }, { "epoch": 0.7676846438830465, "grad_norm": 191.5183868408203, "learning_rate": 1.6535688221483558e-06, "loss": 26.0147, "step": 380030 }, { "epoch": 0.7677048445157302, "grad_norm": 154.0655975341797, "learning_rate": 1.6533094727477528e-06, "loss": 11.7091, "step": 380040 }, { "epoch": 0.767725045148414, "grad_norm": 275.80438232421875, "learning_rate": 1.653050139658512e-06, "loss": 34.3075, "step": 380050 }, { "epoch": 0.7677452457810978, "grad_norm": 5.737986087799072, "learning_rate": 1.6527908228819e-06, "loss": 9.019, "step": 380060 }, { "epoch": 0.7677654464137816, "grad_norm": 222.0810089111328, "learning_rate": 1.6525315224191795e-06, "loss": 16.861, "step": 380070 }, { "epoch": 0.7677856470464655, "grad_norm": 726.4586791992188, "learning_rate": 1.6522722382716133e-06, "loss": 12.8204, "step": 380080 }, { "epoch": 0.7678058476791493, "grad_norm": 221.79351806640625, "learning_rate": 1.6520129704404659e-06, "loss": 23.9359, "step": 380090 }, { "epoch": 0.7678260483118331, "grad_norm": 189.5615234375, "learning_rate": 1.6517537189270043e-06, "loss": 18.3956, "step": 380100 }, { "epoch": 0.7678462489445169, "grad_norm": 439.0717468261719, "learning_rate": 1.651494483732486e-06, "loss": 28.3254, "step": 380110 }, { "epoch": 0.7678664495772007, "grad_norm": 289.67950439453125, "learning_rate": 1.651235264858177e-06, "loss": 15.9659, "step": 380120 }, { "epoch": 0.7678866502098846, "grad_norm": 80.90190887451172, "learning_rate": 1.6509760623053435e-06, "loss": 18.9208, "step": 380130 }, { "epoch": 0.7679068508425684, "grad_norm": 663.58447265625, "learning_rate": 1.6507168760752457e-06, "loss": 22.8557, "step": 380140 }, { "epoch": 0.7679270514752522, "grad_norm": 5.595545768737793, "learning_rate": 1.6504577061691468e-06, "loss": 12.079, "step": 380150 }, { "epoch": 0.767947252107936, "grad_norm": 276.5797424316406, "learning_rate": 1.650198552588312e-06, "loss": 18.6186, "step": 380160 }, { "epoch": 0.7679674527406198, "grad_norm": 292.4634704589844, "learning_rate": 1.649939415334003e-06, "loss": 7.1369, "step": 380170 }, { "epoch": 0.7679876533733037, "grad_norm": 271.3321838378906, "learning_rate": 1.6496802944074814e-06, "loss": 18.7277, "step": 380180 }, { "epoch": 0.7680078540059875, "grad_norm": 12.163592338562012, "learning_rate": 1.649421189810012e-06, "loss": 20.2606, "step": 380190 }, { "epoch": 0.7680280546386713, "grad_norm": 384.8508605957031, "learning_rate": 1.6491621015428588e-06, "loss": 17.369, "step": 380200 }, { "epoch": 0.7680482552713551, "grad_norm": 368.04351806640625, "learning_rate": 1.6489030296072833e-06, "loss": 13.7643, "step": 380210 }, { "epoch": 0.7680684559040389, "grad_norm": 253.7948760986328, "learning_rate": 1.6486439740045463e-06, "loss": 19.6314, "step": 380220 }, { "epoch": 0.7680886565367228, "grad_norm": 170.41635131835938, "learning_rate": 1.6483849347359132e-06, "loss": 17.2037, "step": 380230 }, { "epoch": 0.7681088571694066, "grad_norm": 2771.7294921875, "learning_rate": 1.6481259118026444e-06, "loss": 25.4573, "step": 380240 }, { "epoch": 0.7681290578020904, "grad_norm": 375.8173522949219, "learning_rate": 1.6478669052060048e-06, "loss": 18.7471, "step": 380250 }, { "epoch": 0.7681492584347742, "grad_norm": 144.49293518066406, "learning_rate": 1.6476079149472552e-06, "loss": 11.1937, "step": 380260 }, { "epoch": 0.768169459067458, "grad_norm": 267.39556884765625, "learning_rate": 1.6473489410276565e-06, "loss": 31.064, "step": 380270 }, { "epoch": 0.7681896597001419, "grad_norm": 533.2514038085938, "learning_rate": 1.6470899834484744e-06, "loss": 9.5212, "step": 380280 }, { "epoch": 0.7682098603328257, "grad_norm": 102.65806579589844, "learning_rate": 1.6468310422109684e-06, "loss": 9.9908, "step": 380290 }, { "epoch": 0.7682300609655094, "grad_norm": 34.78265380859375, "learning_rate": 1.6465721173164e-06, "loss": 12.8952, "step": 380300 }, { "epoch": 0.7682502615981932, "grad_norm": 279.13836669921875, "learning_rate": 1.6463132087660327e-06, "loss": 9.8641, "step": 380310 }, { "epoch": 0.768270462230877, "grad_norm": 183.0652313232422, "learning_rate": 1.6460543165611291e-06, "loss": 15.9621, "step": 380320 }, { "epoch": 0.7682906628635608, "grad_norm": 386.725341796875, "learning_rate": 1.6457954407029503e-06, "loss": 17.4816, "step": 380330 }, { "epoch": 0.7683108634962447, "grad_norm": 183.0020294189453, "learning_rate": 1.6455365811927559e-06, "loss": 8.0311, "step": 380340 }, { "epoch": 0.7683310641289285, "grad_norm": 274.7873229980469, "learning_rate": 1.6452777380318112e-06, "loss": 15.6413, "step": 380350 }, { "epoch": 0.7683512647616123, "grad_norm": 296.9296875, "learning_rate": 1.645018911221376e-06, "loss": 16.0559, "step": 380360 }, { "epoch": 0.7683714653942961, "grad_norm": 256.2965393066406, "learning_rate": 1.6447601007627096e-06, "loss": 14.9643, "step": 380370 }, { "epoch": 0.76839166602698, "grad_norm": 602.979248046875, "learning_rate": 1.6445013066570758e-06, "loss": 13.3529, "step": 380380 }, { "epoch": 0.7684118666596638, "grad_norm": 122.27928161621094, "learning_rate": 1.644242528905739e-06, "loss": 7.5984, "step": 380390 }, { "epoch": 0.7684320672923476, "grad_norm": 453.8408508300781, "learning_rate": 1.643983767509954e-06, "loss": 9.8941, "step": 380400 }, { "epoch": 0.7684522679250314, "grad_norm": 74.75115966796875, "learning_rate": 1.6437250224709844e-06, "loss": 12.587, "step": 380410 }, { "epoch": 0.7684724685577152, "grad_norm": 292.0780334472656, "learning_rate": 1.6434662937900942e-06, "loss": 17.0144, "step": 380420 }, { "epoch": 0.768492669190399, "grad_norm": 266.9320068359375, "learning_rate": 1.6432075814685423e-06, "loss": 25.2212, "step": 380430 }, { "epoch": 0.7685128698230829, "grad_norm": 218.83689880371094, "learning_rate": 1.6429488855075876e-06, "loss": 20.1003, "step": 380440 }, { "epoch": 0.7685330704557667, "grad_norm": 58.93135070800781, "learning_rate": 1.6426902059084942e-06, "loss": 15.6326, "step": 380450 }, { "epoch": 0.7685532710884505, "grad_norm": 118.41670989990234, "learning_rate": 1.6424315426725224e-06, "loss": 12.2903, "step": 380460 }, { "epoch": 0.7685734717211343, "grad_norm": 284.3265075683594, "learning_rate": 1.6421728958009298e-06, "loss": 22.3982, "step": 380470 }, { "epoch": 0.7685936723538181, "grad_norm": 358.7496032714844, "learning_rate": 1.6419142652949793e-06, "loss": 17.4947, "step": 380480 }, { "epoch": 0.768613872986502, "grad_norm": 358.91259765625, "learning_rate": 1.6416556511559329e-06, "loss": 8.7693, "step": 380490 }, { "epoch": 0.7686340736191858, "grad_norm": 193.21429443359375, "learning_rate": 1.6413970533850498e-06, "loss": 14.4506, "step": 380500 }, { "epoch": 0.7686542742518696, "grad_norm": 0.09616389870643616, "learning_rate": 1.6411384719835882e-06, "loss": 10.3158, "step": 380510 }, { "epoch": 0.7686744748845534, "grad_norm": 298.53887939453125, "learning_rate": 1.6408799069528119e-06, "loss": 15.9855, "step": 380520 }, { "epoch": 0.7686946755172372, "grad_norm": 0.0, "learning_rate": 1.6406213582939801e-06, "loss": 11.1399, "step": 380530 }, { "epoch": 0.7687148761499211, "grad_norm": 150.9296112060547, "learning_rate": 1.6403628260083503e-06, "loss": 18.1897, "step": 380540 }, { "epoch": 0.7687350767826048, "grad_norm": 306.8619689941406, "learning_rate": 1.6401043100971864e-06, "loss": 15.7428, "step": 380550 }, { "epoch": 0.7687552774152886, "grad_norm": 392.1243896484375, "learning_rate": 1.639845810561745e-06, "loss": 12.9886, "step": 380560 }, { "epoch": 0.7687754780479724, "grad_norm": 470.0807800292969, "learning_rate": 1.6395873274032887e-06, "loss": 13.1475, "step": 380570 }, { "epoch": 0.7687956786806562, "grad_norm": 466.9950866699219, "learning_rate": 1.6393288606230768e-06, "loss": 26.9898, "step": 380580 }, { "epoch": 0.7688158793133401, "grad_norm": 867.8502807617188, "learning_rate": 1.6390704102223664e-06, "loss": 32.1879, "step": 380590 }, { "epoch": 0.7688360799460239, "grad_norm": 295.3981018066406, "learning_rate": 1.6388119762024213e-06, "loss": 19.9481, "step": 380600 }, { "epoch": 0.7688562805787077, "grad_norm": 238.3459014892578, "learning_rate": 1.6385535585644985e-06, "loss": 14.1114, "step": 380610 }, { "epoch": 0.7688764812113915, "grad_norm": 152.8134002685547, "learning_rate": 1.6382951573098565e-06, "loss": 25.2644, "step": 380620 }, { "epoch": 0.7688966818440753, "grad_norm": 473.7834777832031, "learning_rate": 1.638036772439756e-06, "loss": 17.5054, "step": 380630 }, { "epoch": 0.7689168824767592, "grad_norm": 239.5711212158203, "learning_rate": 1.6377784039554584e-06, "loss": 18.501, "step": 380640 }, { "epoch": 0.768937083109443, "grad_norm": 136.3107452392578, "learning_rate": 1.6375200518582208e-06, "loss": 23.6628, "step": 380650 }, { "epoch": 0.7689572837421268, "grad_norm": 588.1956176757812, "learning_rate": 1.6372617161493014e-06, "loss": 15.8727, "step": 380660 }, { "epoch": 0.7689774843748106, "grad_norm": 552.57421875, "learning_rate": 1.637003396829962e-06, "loss": 26.7292, "step": 380670 }, { "epoch": 0.7689976850074944, "grad_norm": 544.6727294921875, "learning_rate": 1.63674509390146e-06, "loss": 33.7145, "step": 380680 }, { "epoch": 0.7690178856401783, "grad_norm": 274.08270263671875, "learning_rate": 1.636486807365053e-06, "loss": 23.3949, "step": 380690 }, { "epoch": 0.7690380862728621, "grad_norm": 201.4409942626953, "learning_rate": 1.6362285372220016e-06, "loss": 27.526, "step": 380700 }, { "epoch": 0.7690582869055459, "grad_norm": 116.2074966430664, "learning_rate": 1.635970283473567e-06, "loss": 16.1907, "step": 380710 }, { "epoch": 0.7690784875382297, "grad_norm": 204.48223876953125, "learning_rate": 1.6357120461210024e-06, "loss": 18.4942, "step": 380720 }, { "epoch": 0.7690986881709135, "grad_norm": 217.5348358154297, "learning_rate": 1.6354538251655695e-06, "loss": 15.0978, "step": 380730 }, { "epoch": 0.7691188888035974, "grad_norm": 333.04412841796875, "learning_rate": 1.6351956206085273e-06, "loss": 19.9123, "step": 380740 }, { "epoch": 0.7691390894362812, "grad_norm": 18.82965087890625, "learning_rate": 1.6349374324511347e-06, "loss": 16.1229, "step": 380750 }, { "epoch": 0.769159290068965, "grad_norm": 357.07586669921875, "learning_rate": 1.6346792606946466e-06, "loss": 19.73, "step": 380760 }, { "epoch": 0.7691794907016488, "grad_norm": 287.72308349609375, "learning_rate": 1.6344211053403253e-06, "loss": 18.0472, "step": 380770 }, { "epoch": 0.7691996913343326, "grad_norm": 356.3037109375, "learning_rate": 1.634162966389427e-06, "loss": 12.5382, "step": 380780 }, { "epoch": 0.7692198919670165, "grad_norm": 495.1387634277344, "learning_rate": 1.6339048438432093e-06, "loss": 23.6587, "step": 380790 }, { "epoch": 0.7692400925997003, "grad_norm": 126.89824676513672, "learning_rate": 1.6336467377029308e-06, "loss": 14.087, "step": 380800 }, { "epoch": 0.769260293232384, "grad_norm": 214.9866485595703, "learning_rate": 1.6333886479698514e-06, "loss": 13.5726, "step": 380810 }, { "epoch": 0.7692804938650678, "grad_norm": 484.38232421875, "learning_rate": 1.6331305746452276e-06, "loss": 15.8146, "step": 380820 }, { "epoch": 0.7693006944977516, "grad_norm": 138.88601684570312, "learning_rate": 1.632872517730315e-06, "loss": 17.0918, "step": 380830 }, { "epoch": 0.7693208951304354, "grad_norm": 346.05029296875, "learning_rate": 1.6326144772263752e-06, "loss": 21.9814, "step": 380840 }, { "epoch": 0.7693410957631193, "grad_norm": 7.960198879241943, "learning_rate": 1.6323564531346642e-06, "loss": 6.1024, "step": 380850 }, { "epoch": 0.7693612963958031, "grad_norm": 144.4032440185547, "learning_rate": 1.6320984454564377e-06, "loss": 20.4758, "step": 380860 }, { "epoch": 0.7693814970284869, "grad_norm": 324.5146484375, "learning_rate": 1.6318404541929562e-06, "loss": 23.503, "step": 380870 }, { "epoch": 0.7694016976611707, "grad_norm": 627.2685546875, "learning_rate": 1.6315824793454743e-06, "loss": 23.7387, "step": 380880 }, { "epoch": 0.7694218982938545, "grad_norm": 10.285508155822754, "learning_rate": 1.631324520915253e-06, "loss": 4.9536, "step": 380890 }, { "epoch": 0.7694420989265384, "grad_norm": 144.7498779296875, "learning_rate": 1.6310665789035468e-06, "loss": 13.0146, "step": 380900 }, { "epoch": 0.7694622995592222, "grad_norm": 360.9971008300781, "learning_rate": 1.6308086533116125e-06, "loss": 20.6875, "step": 380910 }, { "epoch": 0.769482500191906, "grad_norm": 390.6474304199219, "learning_rate": 1.6305507441407076e-06, "loss": 30.8658, "step": 380920 }, { "epoch": 0.7695027008245898, "grad_norm": 110.81896209716797, "learning_rate": 1.6302928513920912e-06, "loss": 17.094, "step": 380930 }, { "epoch": 0.7695229014572736, "grad_norm": 707.8203735351562, "learning_rate": 1.6300349750670191e-06, "loss": 12.2866, "step": 380940 }, { "epoch": 0.7695431020899575, "grad_norm": 272.0848083496094, "learning_rate": 1.6297771151667462e-06, "loss": 20.9635, "step": 380950 }, { "epoch": 0.7695633027226413, "grad_norm": 442.42193603515625, "learning_rate": 1.6295192716925324e-06, "loss": 17.5016, "step": 380960 }, { "epoch": 0.7695835033553251, "grad_norm": 217.8946075439453, "learning_rate": 1.6292614446456328e-06, "loss": 12.6172, "step": 380970 }, { "epoch": 0.7696037039880089, "grad_norm": 247.62091064453125, "learning_rate": 1.629003634027303e-06, "loss": 11.6595, "step": 380980 }, { "epoch": 0.7696239046206927, "grad_norm": 409.61834716796875, "learning_rate": 1.6287458398388001e-06, "loss": 14.7695, "step": 380990 }, { "epoch": 0.7696441052533766, "grad_norm": 237.6927947998047, "learning_rate": 1.6284880620813847e-06, "loss": 20.7272, "step": 381000 }, { "epoch": 0.7696643058860604, "grad_norm": 129.66336059570312, "learning_rate": 1.6282303007563061e-06, "loss": 16.0719, "step": 381010 }, { "epoch": 0.7696845065187442, "grad_norm": 259.98956298828125, "learning_rate": 1.627972555864824e-06, "loss": 13.3376, "step": 381020 }, { "epoch": 0.769704707151428, "grad_norm": 355.7007141113281, "learning_rate": 1.6277148274081962e-06, "loss": 30.0392, "step": 381030 }, { "epoch": 0.7697249077841118, "grad_norm": 80.93647766113281, "learning_rate": 1.6274571153876777e-06, "loss": 16.5708, "step": 381040 }, { "epoch": 0.7697451084167957, "grad_norm": 323.4113464355469, "learning_rate": 1.627199419804522e-06, "loss": 11.0382, "step": 381050 }, { "epoch": 0.7697653090494795, "grad_norm": 384.3371276855469, "learning_rate": 1.6269417406599897e-06, "loss": 17.9962, "step": 381060 }, { "epoch": 0.7697855096821632, "grad_norm": 207.7653350830078, "learning_rate": 1.626684077955334e-06, "loss": 14.0203, "step": 381070 }, { "epoch": 0.769805710314847, "grad_norm": 407.4594421386719, "learning_rate": 1.6264264316918087e-06, "loss": 21.8607, "step": 381080 }, { "epoch": 0.7698259109475308, "grad_norm": 63.55842208862305, "learning_rate": 1.6261688018706724e-06, "loss": 7.7267, "step": 381090 }, { "epoch": 0.7698461115802147, "grad_norm": 178.68003845214844, "learning_rate": 1.6259111884931817e-06, "loss": 17.6481, "step": 381100 }, { "epoch": 0.7698663122128985, "grad_norm": 0.0, "learning_rate": 1.6256535915605904e-06, "loss": 8.428, "step": 381110 }, { "epoch": 0.7698865128455823, "grad_norm": 242.06675720214844, "learning_rate": 1.6253960110741528e-06, "loss": 9.0235, "step": 381120 }, { "epoch": 0.7699067134782661, "grad_norm": 274.2604675292969, "learning_rate": 1.6251384470351272e-06, "loss": 19.894, "step": 381130 }, { "epoch": 0.7699269141109499, "grad_norm": 234.91165161132812, "learning_rate": 1.6248808994447678e-06, "loss": 8.0088, "step": 381140 }, { "epoch": 0.7699471147436338, "grad_norm": 158.4322052001953, "learning_rate": 1.6246233683043279e-06, "loss": 16.259, "step": 381150 }, { "epoch": 0.7699673153763176, "grad_norm": 67.972412109375, "learning_rate": 1.6243658536150657e-06, "loss": 15.7134, "step": 381160 }, { "epoch": 0.7699875160090014, "grad_norm": 568.8428344726562, "learning_rate": 1.6241083553782332e-06, "loss": 18.7535, "step": 381170 }, { "epoch": 0.7700077166416852, "grad_norm": 299.7206115722656, "learning_rate": 1.6238508735950892e-06, "loss": 18.8805, "step": 381180 }, { "epoch": 0.770027917274369, "grad_norm": 398.8367004394531, "learning_rate": 1.6235934082668863e-06, "loss": 14.9712, "step": 381190 }, { "epoch": 0.7700481179070529, "grad_norm": 225.67263793945312, "learning_rate": 1.6233359593948777e-06, "loss": 19.8767, "step": 381200 }, { "epoch": 0.7700683185397367, "grad_norm": 80.89678955078125, "learning_rate": 1.6230785269803223e-06, "loss": 11.1922, "step": 381210 }, { "epoch": 0.7700885191724205, "grad_norm": 132.7426300048828, "learning_rate": 1.6228211110244725e-06, "loss": 19.0917, "step": 381220 }, { "epoch": 0.7701087198051043, "grad_norm": 151.37063598632812, "learning_rate": 1.6225637115285809e-06, "loss": 18.6763, "step": 381230 }, { "epoch": 0.7701289204377881, "grad_norm": 310.9026794433594, "learning_rate": 1.6223063284939045e-06, "loss": 25.5746, "step": 381240 }, { "epoch": 0.770149121070472, "grad_norm": 481.2515869140625, "learning_rate": 1.6220489619216988e-06, "loss": 34.1356, "step": 381250 }, { "epoch": 0.7701693217031558, "grad_norm": 696.6420288085938, "learning_rate": 1.621791611813217e-06, "loss": 18.7898, "step": 381260 }, { "epoch": 0.7701895223358396, "grad_norm": 378.4097900390625, "learning_rate": 1.6215342781697118e-06, "loss": 20.214, "step": 381270 }, { "epoch": 0.7702097229685234, "grad_norm": 414.4416809082031, "learning_rate": 1.62127696099244e-06, "loss": 36.8537, "step": 381280 }, { "epoch": 0.7702299236012072, "grad_norm": 369.2762451171875, "learning_rate": 1.6210196602826544e-06, "loss": 15.2596, "step": 381290 }, { "epoch": 0.770250124233891, "grad_norm": 233.53399658203125, "learning_rate": 1.6207623760416074e-06, "loss": 21.9481, "step": 381300 }, { "epoch": 0.7702703248665749, "grad_norm": 168.417724609375, "learning_rate": 1.6205051082705553e-06, "loss": 25.3135, "step": 381310 }, { "epoch": 0.7702905254992586, "grad_norm": 272.6971740722656, "learning_rate": 1.620247856970754e-06, "loss": 14.5226, "step": 381320 }, { "epoch": 0.7703107261319424, "grad_norm": 114.01438903808594, "learning_rate": 1.6199906221434525e-06, "loss": 10.4329, "step": 381330 }, { "epoch": 0.7703309267646262, "grad_norm": 297.49932861328125, "learning_rate": 1.619733403789906e-06, "loss": 9.5995, "step": 381340 }, { "epoch": 0.77035112739731, "grad_norm": 322.9773254394531, "learning_rate": 1.6194762019113703e-06, "loss": 14.5679, "step": 381350 }, { "epoch": 0.7703713280299939, "grad_norm": 274.81396484375, "learning_rate": 1.6192190165090982e-06, "loss": 16.8955, "step": 381360 }, { "epoch": 0.7703915286626777, "grad_norm": 509.3038024902344, "learning_rate": 1.6189618475843406e-06, "loss": 14.4301, "step": 381370 }, { "epoch": 0.7704117292953615, "grad_norm": 97.97931671142578, "learning_rate": 1.618704695138353e-06, "loss": 18.9814, "step": 381380 }, { "epoch": 0.7704319299280453, "grad_norm": 1100.88134765625, "learning_rate": 1.618447559172392e-06, "loss": 22.0251, "step": 381390 }, { "epoch": 0.7704521305607291, "grad_norm": 74.53239440917969, "learning_rate": 1.6181904396877041e-06, "loss": 8.3187, "step": 381400 }, { "epoch": 0.770472331193413, "grad_norm": 235.23931884765625, "learning_rate": 1.6179333366855455e-06, "loss": 22.5462, "step": 381410 }, { "epoch": 0.7704925318260968, "grad_norm": 209.3513641357422, "learning_rate": 1.6176762501671717e-06, "loss": 17.2015, "step": 381420 }, { "epoch": 0.7705127324587806, "grad_norm": 248.5102081298828, "learning_rate": 1.6174191801338334e-06, "loss": 12.8372, "step": 381430 }, { "epoch": 0.7705329330914644, "grad_norm": 94.54737091064453, "learning_rate": 1.617162126586782e-06, "loss": 14.8448, "step": 381440 }, { "epoch": 0.7705531337241482, "grad_norm": 29.525590896606445, "learning_rate": 1.6169050895272743e-06, "loss": 18.2706, "step": 381450 }, { "epoch": 0.7705733343568321, "grad_norm": 276.4920349121094, "learning_rate": 1.61664806895656e-06, "loss": 9.3384, "step": 381460 }, { "epoch": 0.7705935349895159, "grad_norm": 45.749263763427734, "learning_rate": 1.6163910648758919e-06, "loss": 17.7657, "step": 381470 }, { "epoch": 0.7706137356221997, "grad_norm": 100.77938079833984, "learning_rate": 1.6161340772865247e-06, "loss": 11.1474, "step": 381480 }, { "epoch": 0.7706339362548835, "grad_norm": 394.2664489746094, "learning_rate": 1.6158771061897077e-06, "loss": 15.1802, "step": 381490 }, { "epoch": 0.7706541368875673, "grad_norm": 211.75694274902344, "learning_rate": 1.6156201515866971e-06, "loss": 15.6633, "step": 381500 }, { "epoch": 0.7706743375202512, "grad_norm": 250.71343994140625, "learning_rate": 1.6153632134787433e-06, "loss": 37.7344, "step": 381510 }, { "epoch": 0.770694538152935, "grad_norm": 158.55894470214844, "learning_rate": 1.6151062918670973e-06, "loss": 12.102, "step": 381520 }, { "epoch": 0.7707147387856188, "grad_norm": 174.44285583496094, "learning_rate": 1.614849386753014e-06, "loss": 14.6762, "step": 381530 }, { "epoch": 0.7707349394183026, "grad_norm": 162.6228790283203, "learning_rate": 1.6145924981377424e-06, "loss": 8.1172, "step": 381540 }, { "epoch": 0.7707551400509864, "grad_norm": 235.456787109375, "learning_rate": 1.6143356260225385e-06, "loss": 11.5991, "step": 381550 }, { "epoch": 0.7707753406836703, "grad_norm": 216.19288635253906, "learning_rate": 1.6140787704086502e-06, "loss": 18.8945, "step": 381560 }, { "epoch": 0.7707955413163541, "grad_norm": 4.392897605895996, "learning_rate": 1.6138219312973335e-06, "loss": 12.911, "step": 381570 }, { "epoch": 0.7708157419490378, "grad_norm": 526.4610595703125, "learning_rate": 1.6135651086898373e-06, "loss": 19.8537, "step": 381580 }, { "epoch": 0.7708359425817216, "grad_norm": 166.4870147705078, "learning_rate": 1.613308302587413e-06, "loss": 20.8562, "step": 381590 }, { "epoch": 0.7708561432144054, "grad_norm": 233.2560272216797, "learning_rate": 1.6130515129913144e-06, "loss": 10.2752, "step": 381600 }, { "epoch": 0.7708763438470893, "grad_norm": 122.21912384033203, "learning_rate": 1.6127947399027922e-06, "loss": 11.2976, "step": 381610 }, { "epoch": 0.7708965444797731, "grad_norm": 111.13080596923828, "learning_rate": 1.612537983323096e-06, "loss": 22.5382, "step": 381620 }, { "epoch": 0.7709167451124569, "grad_norm": 260.4112854003906, "learning_rate": 1.6122812432534785e-06, "loss": 20.6366, "step": 381630 }, { "epoch": 0.7709369457451407, "grad_norm": 110.6598892211914, "learning_rate": 1.6120245196951933e-06, "loss": 11.6413, "step": 381640 }, { "epoch": 0.7709571463778245, "grad_norm": 476.56976318359375, "learning_rate": 1.6117678126494895e-06, "loss": 18.2709, "step": 381650 }, { "epoch": 0.7709773470105084, "grad_norm": 341.3958740234375, "learning_rate": 1.6115111221176171e-06, "loss": 15.2279, "step": 381660 }, { "epoch": 0.7709975476431922, "grad_norm": 213.99644470214844, "learning_rate": 1.6112544481008296e-06, "loss": 22.1134, "step": 381670 }, { "epoch": 0.771017748275876, "grad_norm": 292.73052978515625, "learning_rate": 1.6109977906003777e-06, "loss": 19.5962, "step": 381680 }, { "epoch": 0.7710379489085598, "grad_norm": 125.82281494140625, "learning_rate": 1.610741149617509e-06, "loss": 9.9936, "step": 381690 }, { "epoch": 0.7710581495412436, "grad_norm": 136.56301879882812, "learning_rate": 1.6104845251534772e-06, "loss": 13.6125, "step": 381700 }, { "epoch": 0.7710783501739275, "grad_norm": 372.99993896484375, "learning_rate": 1.6102279172095348e-06, "loss": 20.8608, "step": 381710 }, { "epoch": 0.7710985508066113, "grad_norm": 69.37654113769531, "learning_rate": 1.6099713257869303e-06, "loss": 11.1504, "step": 381720 }, { "epoch": 0.7711187514392951, "grad_norm": 506.95941162109375, "learning_rate": 1.6097147508869127e-06, "loss": 19.3223, "step": 381730 }, { "epoch": 0.7711389520719789, "grad_norm": 145.7698211669922, "learning_rate": 1.6094581925107356e-06, "loss": 9.31, "step": 381740 }, { "epoch": 0.7711591527046627, "grad_norm": 938.3321533203125, "learning_rate": 1.6092016506596481e-06, "loss": 20.4139, "step": 381750 }, { "epoch": 0.7711793533373466, "grad_norm": 280.6569519042969, "learning_rate": 1.6089451253348987e-06, "loss": 15.302, "step": 381760 }, { "epoch": 0.7711995539700304, "grad_norm": 294.08013916015625, "learning_rate": 1.6086886165377414e-06, "loss": 16.9592, "step": 381770 }, { "epoch": 0.7712197546027142, "grad_norm": 8.306781768798828, "learning_rate": 1.608432124269423e-06, "loss": 13.8975, "step": 381780 }, { "epoch": 0.771239955235398, "grad_norm": 118.00489044189453, "learning_rate": 1.608175648531196e-06, "loss": 15.0151, "step": 381790 }, { "epoch": 0.7712601558680818, "grad_norm": 259.82379150390625, "learning_rate": 1.6079191893243102e-06, "loss": 16.7618, "step": 381800 }, { "epoch": 0.7712803565007657, "grad_norm": 119.34048461914062, "learning_rate": 1.6076627466500127e-06, "loss": 16.6795, "step": 381810 }, { "epoch": 0.7713005571334495, "grad_norm": 356.4816589355469, "learning_rate": 1.6074063205095576e-06, "loss": 30.4978, "step": 381820 }, { "epoch": 0.7713207577661332, "grad_norm": 447.9054260253906, "learning_rate": 1.607149910904191e-06, "loss": 29.5475, "step": 381830 }, { "epoch": 0.771340958398817, "grad_norm": 330.8212890625, "learning_rate": 1.6068935178351657e-06, "loss": 11.6045, "step": 381840 }, { "epoch": 0.7713611590315008, "grad_norm": 449.1188659667969, "learning_rate": 1.6066371413037286e-06, "loss": 23.0913, "step": 381850 }, { "epoch": 0.7713813596641846, "grad_norm": 239.00271606445312, "learning_rate": 1.6063807813111315e-06, "loss": 25.0254, "step": 381860 }, { "epoch": 0.7714015602968685, "grad_norm": 298.73260498046875, "learning_rate": 1.6061244378586233e-06, "loss": 19.7918, "step": 381870 }, { "epoch": 0.7714217609295523, "grad_norm": 166.3368682861328, "learning_rate": 1.6058681109474511e-06, "loss": 17.4632, "step": 381880 }, { "epoch": 0.7714419615622361, "grad_norm": 322.3607482910156, "learning_rate": 1.605611800578868e-06, "loss": 18.6671, "step": 381890 }, { "epoch": 0.7714621621949199, "grad_norm": 634.7911987304688, "learning_rate": 1.605355506754121e-06, "loss": 18.8431, "step": 381900 }, { "epoch": 0.7714823628276037, "grad_norm": 442.3897705078125, "learning_rate": 1.6050992294744578e-06, "loss": 27.6957, "step": 381910 }, { "epoch": 0.7715025634602876, "grad_norm": 201.3455352783203, "learning_rate": 1.6048429687411294e-06, "loss": 12.3607, "step": 381920 }, { "epoch": 0.7715227640929714, "grad_norm": 159.6464080810547, "learning_rate": 1.604586724555388e-06, "loss": 21.1286, "step": 381930 }, { "epoch": 0.7715429647256552, "grad_norm": 47.50971984863281, "learning_rate": 1.6043304969184752e-06, "loss": 19.3672, "step": 381940 }, { "epoch": 0.771563165358339, "grad_norm": 481.9412841796875, "learning_rate": 1.6040742858316443e-06, "loss": 23.5268, "step": 381950 }, { "epoch": 0.7715833659910228, "grad_norm": 385.8701477050781, "learning_rate": 1.6038180912961455e-06, "loss": 36.0474, "step": 381960 }, { "epoch": 0.7716035666237067, "grad_norm": 241.29635620117188, "learning_rate": 1.6035619133132247e-06, "loss": 14.91, "step": 381970 }, { "epoch": 0.7716237672563905, "grad_norm": 296.1517639160156, "learning_rate": 1.6033057518841294e-06, "loss": 26.9522, "step": 381980 }, { "epoch": 0.7716439678890743, "grad_norm": 426.364013671875, "learning_rate": 1.6030496070101104e-06, "loss": 16.1218, "step": 381990 }, { "epoch": 0.7716641685217581, "grad_norm": 159.3304443359375, "learning_rate": 1.6027934786924187e-06, "loss": 19.8243, "step": 382000 }, { "epoch": 0.7716843691544419, "grad_norm": 143.75221252441406, "learning_rate": 1.6025373669322963e-06, "loss": 14.3331, "step": 382010 }, { "epoch": 0.7717045697871258, "grad_norm": 203.56085205078125, "learning_rate": 1.602281271730995e-06, "loss": 6.5334, "step": 382020 }, { "epoch": 0.7717247704198096, "grad_norm": 211.9809112548828, "learning_rate": 1.602025193089764e-06, "loss": 10.687, "step": 382030 }, { "epoch": 0.7717449710524934, "grad_norm": 350.32000732421875, "learning_rate": 1.60176913100985e-06, "loss": 22.972, "step": 382040 }, { "epoch": 0.7717651716851772, "grad_norm": 207.4131317138672, "learning_rate": 1.6015130854924999e-06, "loss": 16.9334, "step": 382050 }, { "epoch": 0.771785372317861, "grad_norm": 395.6531982421875, "learning_rate": 1.601257056538964e-06, "loss": 14.2392, "step": 382060 }, { "epoch": 0.7718055729505449, "grad_norm": 194.61184692382812, "learning_rate": 1.6010010441504891e-06, "loss": 16.9463, "step": 382070 }, { "epoch": 0.7718257735832287, "grad_norm": 499.833740234375, "learning_rate": 1.6007450483283215e-06, "loss": 25.2315, "step": 382080 }, { "epoch": 0.7718459742159124, "grad_norm": 217.65228271484375, "learning_rate": 1.6004890690737114e-06, "loss": 25.1147, "step": 382090 }, { "epoch": 0.7718661748485962, "grad_norm": 206.83099365234375, "learning_rate": 1.600233106387904e-06, "loss": 26.1421, "step": 382100 }, { "epoch": 0.77188637548128, "grad_norm": 258.4510498046875, "learning_rate": 1.5999771602721499e-06, "loss": 15.1982, "step": 382110 }, { "epoch": 0.7719065761139638, "grad_norm": 54.17173385620117, "learning_rate": 1.5997212307276943e-06, "loss": 12.3389, "step": 382120 }, { "epoch": 0.7719267767466477, "grad_norm": 579.0675659179688, "learning_rate": 1.5994653177557833e-06, "loss": 14.8714, "step": 382130 }, { "epoch": 0.7719469773793315, "grad_norm": 513.6767578125, "learning_rate": 1.5992094213576682e-06, "loss": 29.8801, "step": 382140 }, { "epoch": 0.7719671780120153, "grad_norm": 361.0621032714844, "learning_rate": 1.598953541534592e-06, "loss": 24.4513, "step": 382150 }, { "epoch": 0.7719873786446991, "grad_norm": 364.93902587890625, "learning_rate": 1.598697678287805e-06, "loss": 20.9541, "step": 382160 }, { "epoch": 0.772007579277383, "grad_norm": 294.11956787109375, "learning_rate": 1.5984418316185518e-06, "loss": 15.96, "step": 382170 }, { "epoch": 0.7720277799100668, "grad_norm": 263.8746032714844, "learning_rate": 1.598186001528082e-06, "loss": 14.5787, "step": 382180 }, { "epoch": 0.7720479805427506, "grad_norm": 1.6730331182479858, "learning_rate": 1.5979301880176407e-06, "loss": 10.5723, "step": 382190 }, { "epoch": 0.7720681811754344, "grad_norm": 46.63867950439453, "learning_rate": 1.597674391088474e-06, "loss": 12.643, "step": 382200 }, { "epoch": 0.7720883818081182, "grad_norm": 362.6148986816406, "learning_rate": 1.597418610741831e-06, "loss": 32.808, "step": 382210 }, { "epoch": 0.772108582440802, "grad_norm": 160.4918975830078, "learning_rate": 1.5971628469789569e-06, "loss": 12.1611, "step": 382220 }, { "epoch": 0.7721287830734859, "grad_norm": 392.6042785644531, "learning_rate": 1.5969070998010972e-06, "loss": 14.8171, "step": 382230 }, { "epoch": 0.7721489837061697, "grad_norm": 436.999755859375, "learning_rate": 1.5966513692094992e-06, "loss": 12.9351, "step": 382240 }, { "epoch": 0.7721691843388535, "grad_norm": 209.6871337890625, "learning_rate": 1.596395655205411e-06, "loss": 15.4751, "step": 382250 }, { "epoch": 0.7721893849715373, "grad_norm": 1661.7242431640625, "learning_rate": 1.5961399577900783e-06, "loss": 33.6471, "step": 382260 }, { "epoch": 0.7722095856042211, "grad_norm": 411.33099365234375, "learning_rate": 1.5958842769647442e-06, "loss": 17.4275, "step": 382270 }, { "epoch": 0.772229786236905, "grad_norm": 285.2146911621094, "learning_rate": 1.5956286127306591e-06, "loss": 17.6652, "step": 382280 }, { "epoch": 0.7722499868695888, "grad_norm": 262.1933288574219, "learning_rate": 1.5953729650890675e-06, "loss": 15.9023, "step": 382290 }, { "epoch": 0.7722701875022726, "grad_norm": 136.69412231445312, "learning_rate": 1.5951173340412134e-06, "loss": 19.8989, "step": 382300 }, { "epoch": 0.7722903881349564, "grad_norm": 140.0436553955078, "learning_rate": 1.5948617195883448e-06, "loss": 10.0115, "step": 382310 }, { "epoch": 0.7723105887676402, "grad_norm": 255.2784423828125, "learning_rate": 1.5946061217317082e-06, "loss": 21.0779, "step": 382320 }, { "epoch": 0.7723307894003241, "grad_norm": 1.5545421838760376, "learning_rate": 1.594350540472549e-06, "loss": 18.5292, "step": 382330 }, { "epoch": 0.7723509900330079, "grad_norm": 283.8136901855469, "learning_rate": 1.59409497581211e-06, "loss": 11.4169, "step": 382340 }, { "epoch": 0.7723711906656916, "grad_norm": 88.55598449707031, "learning_rate": 1.5938394277516412e-06, "loss": 25.7661, "step": 382350 }, { "epoch": 0.7723913912983754, "grad_norm": 148.04937744140625, "learning_rate": 1.5935838962923849e-06, "loss": 13.1143, "step": 382360 }, { "epoch": 0.7724115919310592, "grad_norm": 265.55340576171875, "learning_rate": 1.5933283814355871e-06, "loss": 21.8403, "step": 382370 }, { "epoch": 0.7724317925637431, "grad_norm": 366.2705078125, "learning_rate": 1.5930728831824943e-06, "loss": 12.414, "step": 382380 }, { "epoch": 0.7724519931964269, "grad_norm": 305.16448974609375, "learning_rate": 1.59281740153435e-06, "loss": 17.4045, "step": 382390 }, { "epoch": 0.7724721938291107, "grad_norm": 263.9412536621094, "learning_rate": 1.5925619364924016e-06, "loss": 13.6544, "step": 382400 }, { "epoch": 0.7724923944617945, "grad_norm": 227.05178833007812, "learning_rate": 1.5923064880578937e-06, "loss": 19.5646, "step": 382410 }, { "epoch": 0.7725125950944783, "grad_norm": 269.2883605957031, "learning_rate": 1.5920510562320685e-06, "loss": 28.1225, "step": 382420 }, { "epoch": 0.7725327957271622, "grad_norm": 291.134521484375, "learning_rate": 1.591795641016175e-06, "loss": 8.2899, "step": 382430 }, { "epoch": 0.772552996359846, "grad_norm": 766.7240600585938, "learning_rate": 1.5915402424114545e-06, "loss": 29.9062, "step": 382440 }, { "epoch": 0.7725731969925298, "grad_norm": 383.0875244140625, "learning_rate": 1.591284860419155e-06, "loss": 11.0937, "step": 382450 }, { "epoch": 0.7725933976252136, "grad_norm": 172.4079132080078, "learning_rate": 1.591029495040518e-06, "loss": 25.068, "step": 382460 }, { "epoch": 0.7726135982578974, "grad_norm": 856.8572387695312, "learning_rate": 1.5907741462767916e-06, "loss": 22.5054, "step": 382470 }, { "epoch": 0.7726337988905813, "grad_norm": 1218.111083984375, "learning_rate": 1.5905188141292182e-06, "loss": 25.4959, "step": 382480 }, { "epoch": 0.7726539995232651, "grad_norm": 245.06689453125, "learning_rate": 1.5902634985990412e-06, "loss": 17.0721, "step": 382490 }, { "epoch": 0.7726742001559489, "grad_norm": 238.30548095703125, "learning_rate": 1.5900081996875083e-06, "loss": 15.7299, "step": 382500 }, { "epoch": 0.7726944007886327, "grad_norm": 164.114501953125, "learning_rate": 1.5897529173958615e-06, "loss": 18.9543, "step": 382510 }, { "epoch": 0.7727146014213165, "grad_norm": 247.4830780029297, "learning_rate": 1.5894976517253436e-06, "loss": 28.9017, "step": 382520 }, { "epoch": 0.7727348020540004, "grad_norm": 353.74749755859375, "learning_rate": 1.5892424026772008e-06, "loss": 30.8128, "step": 382530 }, { "epoch": 0.7727550026866842, "grad_norm": 230.04835510253906, "learning_rate": 1.5889871702526799e-06, "loss": 19.7028, "step": 382540 }, { "epoch": 0.772775203319368, "grad_norm": 59.593570709228516, "learning_rate": 1.5887319544530182e-06, "loss": 19.3113, "step": 382550 }, { "epoch": 0.7727954039520518, "grad_norm": 425.7927551269531, "learning_rate": 1.5884767552794639e-06, "loss": 15.8614, "step": 382560 }, { "epoch": 0.7728156045847356, "grad_norm": 254.07989501953125, "learning_rate": 1.5882215727332618e-06, "loss": 18.3569, "step": 382570 }, { "epoch": 0.7728358052174195, "grad_norm": 213.16993713378906, "learning_rate": 1.5879664068156535e-06, "loss": 16.2, "step": 382580 }, { "epoch": 0.7728560058501033, "grad_norm": 380.9659729003906, "learning_rate": 1.5877112575278819e-06, "loss": 10.2677, "step": 382590 }, { "epoch": 0.772876206482787, "grad_norm": 138.21022033691406, "learning_rate": 1.587456124871191e-06, "loss": 24.6405, "step": 382600 }, { "epoch": 0.7728964071154708, "grad_norm": 138.7893524169922, "learning_rate": 1.5872010088468293e-06, "loss": 10.1834, "step": 382610 }, { "epoch": 0.7729166077481546, "grad_norm": 293.9682312011719, "learning_rate": 1.586945909456032e-06, "loss": 10.0647, "step": 382620 }, { "epoch": 0.7729368083808384, "grad_norm": 675.6156005859375, "learning_rate": 1.5866908267000464e-06, "loss": 18.3482, "step": 382630 }, { "epoch": 0.7729570090135223, "grad_norm": 24.647655487060547, "learning_rate": 1.586435760580118e-06, "loss": 14.6801, "step": 382640 }, { "epoch": 0.7729772096462061, "grad_norm": 224.32696533203125, "learning_rate": 1.5861807110974869e-06, "loss": 19.4645, "step": 382650 }, { "epoch": 0.7729974102788899, "grad_norm": 218.0642852783203, "learning_rate": 1.585925678253396e-06, "loss": 28.5721, "step": 382660 }, { "epoch": 0.7730176109115737, "grad_norm": 858.185302734375, "learning_rate": 1.5856706620490902e-06, "loss": 23.5589, "step": 382670 }, { "epoch": 0.7730378115442575, "grad_norm": 271.1485595703125, "learning_rate": 1.5854156624858119e-06, "loss": 21.6207, "step": 382680 }, { "epoch": 0.7730580121769414, "grad_norm": 229.85513305664062, "learning_rate": 1.5851606795648023e-06, "loss": 19.83, "step": 382690 }, { "epoch": 0.7730782128096252, "grad_norm": 349.9148254394531, "learning_rate": 1.5849057132873063e-06, "loss": 14.3973, "step": 382700 }, { "epoch": 0.773098413442309, "grad_norm": 306.8370056152344, "learning_rate": 1.5846507636545645e-06, "loss": 9.1267, "step": 382710 }, { "epoch": 0.7731186140749928, "grad_norm": 466.5513916015625, "learning_rate": 1.5843958306678219e-06, "loss": 12.8841, "step": 382720 }, { "epoch": 0.7731388147076766, "grad_norm": 58.80337142944336, "learning_rate": 1.58414091432832e-06, "loss": 9.4935, "step": 382730 }, { "epoch": 0.7731590153403605, "grad_norm": 49.18659973144531, "learning_rate": 1.5838860146372992e-06, "loss": 11.3888, "step": 382740 }, { "epoch": 0.7731792159730443, "grad_norm": 181.579833984375, "learning_rate": 1.5836311315960051e-06, "loss": 12.9409, "step": 382750 }, { "epoch": 0.7731994166057281, "grad_norm": 237.5634002685547, "learning_rate": 1.5833762652056773e-06, "loss": 26.0093, "step": 382760 }, { "epoch": 0.7732196172384119, "grad_norm": 95.00493621826172, "learning_rate": 1.5831214154675605e-06, "loss": 13.3607, "step": 382770 }, { "epoch": 0.7732398178710957, "grad_norm": 253.17051696777344, "learning_rate": 1.5828665823828943e-06, "loss": 15.476, "step": 382780 }, { "epoch": 0.7732600185037796, "grad_norm": 409.5142822265625, "learning_rate": 1.5826117659529234e-06, "loss": 12.7914, "step": 382790 }, { "epoch": 0.7732802191364634, "grad_norm": 601.6853637695312, "learning_rate": 1.582356966178888e-06, "loss": 27.7386, "step": 382800 }, { "epoch": 0.7733004197691472, "grad_norm": 121.81703186035156, "learning_rate": 1.5821021830620287e-06, "loss": 13.2929, "step": 382810 }, { "epoch": 0.773320620401831, "grad_norm": 259.06744384765625, "learning_rate": 1.5818474166035907e-06, "loss": 14.676, "step": 382820 }, { "epoch": 0.7733408210345148, "grad_norm": 197.45632934570312, "learning_rate": 1.5815926668048138e-06, "loss": 14.3273, "step": 382830 }, { "epoch": 0.7733610216671987, "grad_norm": 446.2453918457031, "learning_rate": 1.5813379336669377e-06, "loss": 20.8277, "step": 382840 }, { "epoch": 0.7733812222998825, "grad_norm": 438.212158203125, "learning_rate": 1.581083217191206e-06, "loss": 16.3745, "step": 382850 }, { "epoch": 0.7734014229325662, "grad_norm": 125.53752136230469, "learning_rate": 1.5808285173788617e-06, "loss": 14.3211, "step": 382860 }, { "epoch": 0.77342162356525, "grad_norm": 207.5806121826172, "learning_rate": 1.5805738342311444e-06, "loss": 9.3589, "step": 382870 }, { "epoch": 0.7734418241979338, "grad_norm": 552.1026000976562, "learning_rate": 1.5803191677492941e-06, "loss": 19.7583, "step": 382880 }, { "epoch": 0.7734620248306177, "grad_norm": 313.5600280761719, "learning_rate": 1.5800645179345548e-06, "loss": 24.5917, "step": 382890 }, { "epoch": 0.7734822254633015, "grad_norm": 283.7438659667969, "learning_rate": 1.5798098847881664e-06, "loss": 23.8335, "step": 382900 }, { "epoch": 0.7735024260959853, "grad_norm": 112.70356750488281, "learning_rate": 1.5795552683113679e-06, "loss": 20.425, "step": 382910 }, { "epoch": 0.7735226267286691, "grad_norm": 276.1265563964844, "learning_rate": 1.579300668505403e-06, "loss": 19.4591, "step": 382920 }, { "epoch": 0.7735428273613529, "grad_norm": 553.0477294921875, "learning_rate": 1.5790460853715123e-06, "loss": 20.4367, "step": 382930 }, { "epoch": 0.7735630279940368, "grad_norm": 71.53166961669922, "learning_rate": 1.578791518910937e-06, "loss": 12.7154, "step": 382940 }, { "epoch": 0.7735832286267206, "grad_norm": 215.59628295898438, "learning_rate": 1.5785369691249147e-06, "loss": 42.6674, "step": 382950 }, { "epoch": 0.7736034292594044, "grad_norm": 125.41472625732422, "learning_rate": 1.5782824360146897e-06, "loss": 32.474, "step": 382960 }, { "epoch": 0.7736236298920882, "grad_norm": 167.70497131347656, "learning_rate": 1.5780279195815018e-06, "loss": 11.2374, "step": 382970 }, { "epoch": 0.773643830524772, "grad_norm": 392.530517578125, "learning_rate": 1.5777734198265887e-06, "loss": 21.1458, "step": 382980 }, { "epoch": 0.7736640311574559, "grad_norm": 365.7991638183594, "learning_rate": 1.5775189367511946e-06, "loss": 14.8029, "step": 382990 }, { "epoch": 0.7736842317901397, "grad_norm": 56.213069915771484, "learning_rate": 1.5772644703565564e-06, "loss": 12.4235, "step": 383000 }, { "epoch": 0.7737044324228235, "grad_norm": 228.25033569335938, "learning_rate": 1.5770100206439177e-06, "loss": 18.7353, "step": 383010 }, { "epoch": 0.7737246330555073, "grad_norm": 259.7063293457031, "learning_rate": 1.576755587614517e-06, "loss": 26.3171, "step": 383020 }, { "epoch": 0.7737448336881911, "grad_norm": 195.3341827392578, "learning_rate": 1.5765011712695928e-06, "loss": 12.2297, "step": 383030 }, { "epoch": 0.773765034320875, "grad_norm": 594.3290405273438, "learning_rate": 1.5762467716103884e-06, "loss": 21.3343, "step": 383040 }, { "epoch": 0.7737852349535588, "grad_norm": 306.4254455566406, "learning_rate": 1.5759923886381402e-06, "loss": 18.3333, "step": 383050 }, { "epoch": 0.7738054355862426, "grad_norm": 335.51580810546875, "learning_rate": 1.5757380223540914e-06, "loss": 21.3109, "step": 383060 }, { "epoch": 0.7738256362189264, "grad_norm": 241.49298095703125, "learning_rate": 1.5754836727594786e-06, "loss": 11.2997, "step": 383070 }, { "epoch": 0.7738458368516102, "grad_norm": 130.44760131835938, "learning_rate": 1.5752293398555446e-06, "loss": 13.0793, "step": 383080 }, { "epoch": 0.7738660374842941, "grad_norm": 49.54830551147461, "learning_rate": 1.5749750236435279e-06, "loss": 11.3571, "step": 383090 }, { "epoch": 0.7738862381169779, "grad_norm": 647.9216918945312, "learning_rate": 1.5747207241246654e-06, "loss": 20.6248, "step": 383100 }, { "epoch": 0.7739064387496616, "grad_norm": 146.637939453125, "learning_rate": 1.5744664413002004e-06, "loss": 13.622, "step": 383110 }, { "epoch": 0.7739266393823454, "grad_norm": 620.6266479492188, "learning_rate": 1.5742121751713708e-06, "loss": 21.252, "step": 383120 }, { "epoch": 0.7739468400150292, "grad_norm": 233.712158203125, "learning_rate": 1.5739579257394132e-06, "loss": 19.4837, "step": 383130 }, { "epoch": 0.773967040647713, "grad_norm": 18.972026824951172, "learning_rate": 1.57370369300557e-06, "loss": 15.9509, "step": 383140 }, { "epoch": 0.7739872412803969, "grad_norm": 290.032470703125, "learning_rate": 1.5734494769710817e-06, "loss": 12.394, "step": 383150 }, { "epoch": 0.7740074419130807, "grad_norm": 355.4277648925781, "learning_rate": 1.5731952776371828e-06, "loss": 12.2578, "step": 383160 }, { "epoch": 0.7740276425457645, "grad_norm": 402.60809326171875, "learning_rate": 1.5729410950051139e-06, "loss": 19.4072, "step": 383170 }, { "epoch": 0.7740478431784483, "grad_norm": 231.38613891601562, "learning_rate": 1.572686929076116e-06, "loss": 17.9742, "step": 383180 }, { "epoch": 0.7740680438111321, "grad_norm": 219.92153930664062, "learning_rate": 1.5724327798514267e-06, "loss": 20.0618, "step": 383190 }, { "epoch": 0.774088244443816, "grad_norm": 177.3785858154297, "learning_rate": 1.5721786473322825e-06, "loss": 11.3473, "step": 383200 }, { "epoch": 0.7741084450764998, "grad_norm": 491.52642822265625, "learning_rate": 1.571924531519924e-06, "loss": 22.6989, "step": 383210 }, { "epoch": 0.7741286457091836, "grad_norm": 8.291218757629395, "learning_rate": 1.571670432415593e-06, "loss": 12.9934, "step": 383220 }, { "epoch": 0.7741488463418674, "grad_norm": 47.0040397644043, "learning_rate": 1.5714163500205203e-06, "loss": 23.5812, "step": 383230 }, { "epoch": 0.7741690469745512, "grad_norm": 567.0422973632812, "learning_rate": 1.5711622843359492e-06, "loss": 38.7332, "step": 383240 }, { "epoch": 0.7741892476072351, "grad_norm": 272.1426696777344, "learning_rate": 1.5709082353631188e-06, "loss": 24.0635, "step": 383250 }, { "epoch": 0.7742094482399189, "grad_norm": 267.34173583984375, "learning_rate": 1.5706542031032663e-06, "loss": 20.0942, "step": 383260 }, { "epoch": 0.7742296488726027, "grad_norm": 242.16262817382812, "learning_rate": 1.570400187557627e-06, "loss": 9.6801, "step": 383270 }, { "epoch": 0.7742498495052865, "grad_norm": 370.6517333984375, "learning_rate": 1.5701461887274428e-06, "loss": 22.6949, "step": 383280 }, { "epoch": 0.7742700501379703, "grad_norm": 360.5954284667969, "learning_rate": 1.5698922066139505e-06, "loss": 21.3104, "step": 383290 }, { "epoch": 0.7742902507706542, "grad_norm": 358.52130126953125, "learning_rate": 1.5696382412183853e-06, "loss": 18.444, "step": 383300 }, { "epoch": 0.774310451403338, "grad_norm": 29.29496192932129, "learning_rate": 1.5693842925419894e-06, "loss": 18.2438, "step": 383310 }, { "epoch": 0.7743306520360218, "grad_norm": 1040.0792236328125, "learning_rate": 1.5691303605859964e-06, "loss": 39.3167, "step": 383320 }, { "epoch": 0.7743508526687056, "grad_norm": 430.4454650878906, "learning_rate": 1.568876445351648e-06, "loss": 18.3384, "step": 383330 }, { "epoch": 0.7743710533013894, "grad_norm": 172.21665954589844, "learning_rate": 1.5686225468401795e-06, "loss": 20.602, "step": 383340 }, { "epoch": 0.7743912539340733, "grad_norm": 348.52301025390625, "learning_rate": 1.5683686650528267e-06, "loss": 14.7804, "step": 383350 }, { "epoch": 0.7744114545667571, "grad_norm": 229.5061798095703, "learning_rate": 1.5681147999908308e-06, "loss": 23.5792, "step": 383360 }, { "epoch": 0.7744316551994408, "grad_norm": 462.8115234375, "learning_rate": 1.567860951655425e-06, "loss": 20.8231, "step": 383370 }, { "epoch": 0.7744518558321246, "grad_norm": 468.7329406738281, "learning_rate": 1.5676071200478504e-06, "loss": 31.918, "step": 383380 }, { "epoch": 0.7744720564648084, "grad_norm": 336.5062561035156, "learning_rate": 1.5673533051693413e-06, "loss": 16.5105, "step": 383390 }, { "epoch": 0.7744922570974923, "grad_norm": 1288.8465576171875, "learning_rate": 1.567099507021137e-06, "loss": 35.1476, "step": 383400 }, { "epoch": 0.7745124577301761, "grad_norm": 174.7385711669922, "learning_rate": 1.5668457256044733e-06, "loss": 21.0269, "step": 383410 }, { "epoch": 0.7745326583628599, "grad_norm": 2.5240590572357178, "learning_rate": 1.566591960920586e-06, "loss": 16.6987, "step": 383420 }, { "epoch": 0.7745528589955437, "grad_norm": 570.2108154296875, "learning_rate": 1.5663382129707144e-06, "loss": 24.0321, "step": 383430 }, { "epoch": 0.7745730596282275, "grad_norm": 33.70161437988281, "learning_rate": 1.5660844817560939e-06, "loss": 12.2041, "step": 383440 }, { "epoch": 0.7745932602609114, "grad_norm": 0.0, "learning_rate": 1.5658307672779594e-06, "loss": 13.083, "step": 383450 }, { "epoch": 0.7746134608935952, "grad_norm": 305.1890563964844, "learning_rate": 1.5655770695375494e-06, "loss": 9.2528, "step": 383460 }, { "epoch": 0.774633661526279, "grad_norm": 599.1336059570312, "learning_rate": 1.5653233885361013e-06, "loss": 25.1512, "step": 383470 }, { "epoch": 0.7746538621589628, "grad_norm": 202.7955780029297, "learning_rate": 1.5650697242748513e-06, "loss": 6.3836, "step": 383480 }, { "epoch": 0.7746740627916466, "grad_norm": 258.429931640625, "learning_rate": 1.5648160767550324e-06, "loss": 13.8255, "step": 383490 }, { "epoch": 0.7746942634243305, "grad_norm": 136.3521728515625, "learning_rate": 1.5645624459778858e-06, "loss": 10.1335, "step": 383500 }, { "epoch": 0.7747144640570143, "grad_norm": 475.4459228515625, "learning_rate": 1.5643088319446441e-06, "loss": 26.4721, "step": 383510 }, { "epoch": 0.7747346646896981, "grad_norm": 284.13153076171875, "learning_rate": 1.5640552346565441e-06, "loss": 24.6417, "step": 383520 }, { "epoch": 0.7747548653223819, "grad_norm": 421.2711181640625, "learning_rate": 1.563801654114821e-06, "loss": 19.5452, "step": 383530 }, { "epoch": 0.7747750659550657, "grad_norm": 359.39007568359375, "learning_rate": 1.5635480903207139e-06, "loss": 16.8146, "step": 383540 }, { "epoch": 0.7747952665877496, "grad_norm": 618.3807983398438, "learning_rate": 1.563294543275457e-06, "loss": 23.5527, "step": 383550 }, { "epoch": 0.7748154672204334, "grad_norm": 581.7918701171875, "learning_rate": 1.5630410129802837e-06, "loss": 32.7873, "step": 383560 }, { "epoch": 0.7748356678531172, "grad_norm": 129.5443115234375, "learning_rate": 1.5627874994364335e-06, "loss": 10.8633, "step": 383570 }, { "epoch": 0.774855868485801, "grad_norm": 42.37995910644531, "learning_rate": 1.5625340026451396e-06, "loss": 11.4909, "step": 383580 }, { "epoch": 0.7748760691184848, "grad_norm": 293.1564636230469, "learning_rate": 1.562280522607637e-06, "loss": 15.5059, "step": 383590 }, { "epoch": 0.7748962697511687, "grad_norm": 324.8996276855469, "learning_rate": 1.5620270593251635e-06, "loss": 18.4323, "step": 383600 }, { "epoch": 0.7749164703838525, "grad_norm": 433.90521240234375, "learning_rate": 1.561773612798952e-06, "loss": 13.5256, "step": 383610 }, { "epoch": 0.7749366710165362, "grad_norm": 221.35055541992188, "learning_rate": 1.5615201830302402e-06, "loss": 13.7689, "step": 383620 }, { "epoch": 0.77495687164922, "grad_norm": 417.2428894042969, "learning_rate": 1.5612667700202616e-06, "loss": 18.3554, "step": 383630 }, { "epoch": 0.7749770722819038, "grad_norm": 247.6269073486328, "learning_rate": 1.5610133737702503e-06, "loss": 23.3691, "step": 383640 }, { "epoch": 0.7749972729145876, "grad_norm": 254.74227905273438, "learning_rate": 1.560759994281445e-06, "loss": 14.845, "step": 383650 }, { "epoch": 0.7750174735472715, "grad_norm": 177.33004760742188, "learning_rate": 1.5605066315550759e-06, "loss": 12.4361, "step": 383660 }, { "epoch": 0.7750376741799553, "grad_norm": 203.95765686035156, "learning_rate": 1.5602532855923824e-06, "loss": 17.5767, "step": 383670 }, { "epoch": 0.7750578748126391, "grad_norm": 356.1719665527344, "learning_rate": 1.5599999563945955e-06, "loss": 11.1412, "step": 383680 }, { "epoch": 0.7750780754453229, "grad_norm": 241.8129425048828, "learning_rate": 1.5597466439629532e-06, "loss": 16.3037, "step": 383690 }, { "epoch": 0.7750982760780067, "grad_norm": 486.2669372558594, "learning_rate": 1.5594933482986885e-06, "loss": 28.1552, "step": 383700 }, { "epoch": 0.7751184767106906, "grad_norm": 65.52667999267578, "learning_rate": 1.5592400694030342e-06, "loss": 14.8213, "step": 383710 }, { "epoch": 0.7751386773433744, "grad_norm": 473.5024108886719, "learning_rate": 1.5589868072772279e-06, "loss": 52.2109, "step": 383720 }, { "epoch": 0.7751588779760582, "grad_norm": 21.476516723632812, "learning_rate": 1.558733561922503e-06, "loss": 7.174, "step": 383730 }, { "epoch": 0.775179078608742, "grad_norm": 434.16571044921875, "learning_rate": 1.5584803333400917e-06, "loss": 22.1485, "step": 383740 }, { "epoch": 0.7751992792414258, "grad_norm": 143.4740753173828, "learning_rate": 1.5582271215312294e-06, "loss": 16.0964, "step": 383750 }, { "epoch": 0.7752194798741097, "grad_norm": 691.790771484375, "learning_rate": 1.5579739264971544e-06, "loss": 26.5359, "step": 383760 }, { "epoch": 0.7752396805067935, "grad_norm": 191.64759826660156, "learning_rate": 1.5577207482390933e-06, "loss": 15.1753, "step": 383770 }, { "epoch": 0.7752598811394773, "grad_norm": 361.62689208984375, "learning_rate": 1.5574675867582845e-06, "loss": 18.7599, "step": 383780 }, { "epoch": 0.7752800817721611, "grad_norm": 227.40109252929688, "learning_rate": 1.557214442055962e-06, "loss": 15.7086, "step": 383790 }, { "epoch": 0.7753002824048449, "grad_norm": 46.88688278198242, "learning_rate": 1.556961314133359e-06, "loss": 16.5168, "step": 383800 }, { "epoch": 0.7753204830375288, "grad_norm": 304.6522521972656, "learning_rate": 1.5567082029917074e-06, "loss": 12.9177, "step": 383810 }, { "epoch": 0.7753406836702126, "grad_norm": 369.4507751464844, "learning_rate": 1.5564551086322428e-06, "loss": 19.9543, "step": 383820 }, { "epoch": 0.7753608843028964, "grad_norm": 238.27516174316406, "learning_rate": 1.556202031056201e-06, "loss": 9.4066, "step": 383830 }, { "epoch": 0.7753810849355802, "grad_norm": 234.7765350341797, "learning_rate": 1.5559489702648096e-06, "loss": 18.0895, "step": 383840 }, { "epoch": 0.775401285568264, "grad_norm": 381.4833679199219, "learning_rate": 1.5556959262593058e-06, "loss": 17.244, "step": 383850 }, { "epoch": 0.7754214862009479, "grad_norm": 177.50929260253906, "learning_rate": 1.5554428990409232e-06, "loss": 12.5436, "step": 383860 }, { "epoch": 0.7754416868336317, "grad_norm": 447.87860107421875, "learning_rate": 1.5551898886108947e-06, "loss": 15.5923, "step": 383870 }, { "epoch": 0.7754618874663154, "grad_norm": 241.33004760742188, "learning_rate": 1.5549368949704507e-06, "loss": 16.8437, "step": 383880 }, { "epoch": 0.7754820880989992, "grad_norm": 235.27049255371094, "learning_rate": 1.5546839181208284e-06, "loss": 13.2566, "step": 383890 }, { "epoch": 0.775502288731683, "grad_norm": 288.9822692871094, "learning_rate": 1.554430958063259e-06, "loss": 22.6139, "step": 383900 }, { "epoch": 0.7755224893643669, "grad_norm": 260.0544738769531, "learning_rate": 1.5541780147989733e-06, "loss": 14.1498, "step": 383910 }, { "epoch": 0.7755426899970507, "grad_norm": 109.87434387207031, "learning_rate": 1.5539250883292078e-06, "loss": 20.5753, "step": 383920 }, { "epoch": 0.7755628906297345, "grad_norm": 250.42709350585938, "learning_rate": 1.5536721786551918e-06, "loss": 17.4523, "step": 383930 }, { "epoch": 0.7755830912624183, "grad_norm": 485.7607421875, "learning_rate": 1.5534192857781611e-06, "loss": 11.9443, "step": 383940 }, { "epoch": 0.7756032918951021, "grad_norm": 237.27540588378906, "learning_rate": 1.5531664096993454e-06, "loss": 12.5655, "step": 383950 }, { "epoch": 0.775623492527786, "grad_norm": 22.050783157348633, "learning_rate": 1.55291355041998e-06, "loss": 14.8993, "step": 383960 }, { "epoch": 0.7756436931604698, "grad_norm": 96.60517883300781, "learning_rate": 1.552660707941296e-06, "loss": 12.6042, "step": 383970 }, { "epoch": 0.7756638937931536, "grad_norm": 191.2528533935547, "learning_rate": 1.552407882264524e-06, "loss": 18.8675, "step": 383980 }, { "epoch": 0.7756840944258374, "grad_norm": 226.61968994140625, "learning_rate": 1.552155073390899e-06, "loss": 31.4594, "step": 383990 }, { "epoch": 0.7757042950585212, "grad_norm": 101.32936096191406, "learning_rate": 1.551902281321651e-06, "loss": 18.0379, "step": 384000 }, { "epoch": 0.775724495691205, "grad_norm": 156.12094116210938, "learning_rate": 1.5516495060580145e-06, "loss": 11.8116, "step": 384010 }, { "epoch": 0.7757446963238889, "grad_norm": 249.17918395996094, "learning_rate": 1.5513967476012198e-06, "loss": 19.1866, "step": 384020 }, { "epoch": 0.7757648969565727, "grad_norm": 261.20458984375, "learning_rate": 1.551144005952498e-06, "loss": 16.0182, "step": 384030 }, { "epoch": 0.7757850975892565, "grad_norm": 224.83786010742188, "learning_rate": 1.5508912811130832e-06, "loss": 7.599, "step": 384040 }, { "epoch": 0.7758052982219403, "grad_norm": 184.5642547607422, "learning_rate": 1.5506385730842062e-06, "loss": 19.7801, "step": 384050 }, { "epoch": 0.7758254988546242, "grad_norm": 185.95916748046875, "learning_rate": 1.5503858818670963e-06, "loss": 11.6802, "step": 384060 }, { "epoch": 0.775845699487308, "grad_norm": 245.7959442138672, "learning_rate": 1.5501332074629876e-06, "loss": 23.1456, "step": 384070 }, { "epoch": 0.7758659001199918, "grad_norm": 201.1875, "learning_rate": 1.5498805498731146e-06, "loss": 14.774, "step": 384080 }, { "epoch": 0.7758861007526756, "grad_norm": 334.5734558105469, "learning_rate": 1.549627909098702e-06, "loss": 17.642, "step": 384090 }, { "epoch": 0.7759063013853594, "grad_norm": 428.7024230957031, "learning_rate": 1.5493752851409844e-06, "loss": 33.0233, "step": 384100 }, { "epoch": 0.7759265020180433, "grad_norm": 27.835582733154297, "learning_rate": 1.5491226780011954e-06, "loss": 36.5351, "step": 384110 }, { "epoch": 0.7759467026507271, "grad_norm": 197.75399780273438, "learning_rate": 1.548870087680563e-06, "loss": 20.2317, "step": 384120 }, { "epoch": 0.7759669032834109, "grad_norm": 649.497314453125, "learning_rate": 1.5486175141803177e-06, "loss": 25.4214, "step": 384130 }, { "epoch": 0.7759871039160946, "grad_norm": 124.53889465332031, "learning_rate": 1.5483649575016929e-06, "loss": 19.4995, "step": 384140 }, { "epoch": 0.7760073045487784, "grad_norm": 212.21401977539062, "learning_rate": 1.5481124176459195e-06, "loss": 26.4195, "step": 384150 }, { "epoch": 0.7760275051814622, "grad_norm": 115.39411163330078, "learning_rate": 1.5478598946142277e-06, "loss": 9.079, "step": 384160 }, { "epoch": 0.7760477058141461, "grad_norm": 205.0250244140625, "learning_rate": 1.5476073884078463e-06, "loss": 25.6658, "step": 384170 }, { "epoch": 0.7760679064468299, "grad_norm": 516.2677612304688, "learning_rate": 1.5473548990280097e-06, "loss": 17.4732, "step": 384180 }, { "epoch": 0.7760881070795137, "grad_norm": 190.59893798828125, "learning_rate": 1.5471024264759466e-06, "loss": 18.9903, "step": 384190 }, { "epoch": 0.7761083077121975, "grad_norm": 150.55946350097656, "learning_rate": 1.5468499707528856e-06, "loss": 13.415, "step": 384200 }, { "epoch": 0.7761285083448813, "grad_norm": 294.0908508300781, "learning_rate": 1.5465975318600607e-06, "loss": 21.1245, "step": 384210 }, { "epoch": 0.7761487089775652, "grad_norm": 278.4869689941406, "learning_rate": 1.5463451097986993e-06, "loss": 25.7308, "step": 384220 }, { "epoch": 0.776168909610249, "grad_norm": 310.0438232421875, "learning_rate": 1.5460927045700342e-06, "loss": 25.5038, "step": 384230 }, { "epoch": 0.7761891102429328, "grad_norm": 128.18861389160156, "learning_rate": 1.5458403161752943e-06, "loss": 10.7669, "step": 384240 }, { "epoch": 0.7762093108756166, "grad_norm": 409.55389404296875, "learning_rate": 1.5455879446157084e-06, "loss": 31.5259, "step": 384250 }, { "epoch": 0.7762295115083004, "grad_norm": 0.0, "learning_rate": 1.5453355898925094e-06, "loss": 19.6231, "step": 384260 }, { "epoch": 0.7762497121409843, "grad_norm": 294.72119140625, "learning_rate": 1.5450832520069241e-06, "loss": 17.7458, "step": 384270 }, { "epoch": 0.7762699127736681, "grad_norm": 652.5994873046875, "learning_rate": 1.5448309309601855e-06, "loss": 27.6062, "step": 384280 }, { "epoch": 0.7762901134063519, "grad_norm": 195.565673828125, "learning_rate": 1.5445786267535207e-06, "loss": 12.5931, "step": 384290 }, { "epoch": 0.7763103140390357, "grad_norm": 336.3799743652344, "learning_rate": 1.5443263393881619e-06, "loss": 16.578, "step": 384300 }, { "epoch": 0.7763305146717195, "grad_norm": 295.211669921875, "learning_rate": 1.5440740688653372e-06, "loss": 16.1856, "step": 384310 }, { "epoch": 0.7763507153044034, "grad_norm": 340.9779968261719, "learning_rate": 1.543821815186275e-06, "loss": 20.6586, "step": 384320 }, { "epoch": 0.7763709159370872, "grad_norm": 773.42236328125, "learning_rate": 1.5435695783522076e-06, "loss": 24.6748, "step": 384330 }, { "epoch": 0.776391116569771, "grad_norm": 150.40151977539062, "learning_rate": 1.5433173583643628e-06, "loss": 11.4025, "step": 384340 }, { "epoch": 0.7764113172024548, "grad_norm": 237.61758422851562, "learning_rate": 1.5430651552239684e-06, "loss": 20.8679, "step": 384350 }, { "epoch": 0.7764315178351386, "grad_norm": 185.77975463867188, "learning_rate": 1.5428129689322552e-06, "loss": 19.8723, "step": 384360 }, { "epoch": 0.7764517184678225, "grad_norm": 170.27609252929688, "learning_rate": 1.5425607994904552e-06, "loss": 13.3056, "step": 384370 }, { "epoch": 0.7764719191005063, "grad_norm": 91.09300231933594, "learning_rate": 1.5423086468997917e-06, "loss": 12.8996, "step": 384380 }, { "epoch": 0.77649211973319, "grad_norm": 340.5990295410156, "learning_rate": 1.5420565111614965e-06, "loss": 17.6836, "step": 384390 }, { "epoch": 0.7765123203658738, "grad_norm": 372.34515380859375, "learning_rate": 1.5418043922768e-06, "loss": 12.3336, "step": 384400 }, { "epoch": 0.7765325209985576, "grad_norm": 383.2972106933594, "learning_rate": 1.5415522902469293e-06, "loss": 14.9062, "step": 384410 }, { "epoch": 0.7765527216312414, "grad_norm": 218.6856231689453, "learning_rate": 1.5413002050731118e-06, "loss": 22.3479, "step": 384420 }, { "epoch": 0.7765729222639253, "grad_norm": 150.78863525390625, "learning_rate": 1.5410481367565777e-06, "loss": 13.696, "step": 384430 }, { "epoch": 0.7765931228966091, "grad_norm": 353.4713439941406, "learning_rate": 1.5407960852985582e-06, "loss": 15.3687, "step": 384440 }, { "epoch": 0.7766133235292929, "grad_norm": 370.2243347167969, "learning_rate": 1.540544050700276e-06, "loss": 6.8385, "step": 384450 }, { "epoch": 0.7766335241619767, "grad_norm": 449.73724365234375, "learning_rate": 1.5402920329629627e-06, "loss": 17.3083, "step": 384460 }, { "epoch": 0.7766537247946605, "grad_norm": 224.1399383544922, "learning_rate": 1.5400400320878484e-06, "loss": 10.4613, "step": 384470 }, { "epoch": 0.7766739254273444, "grad_norm": 224.21331787109375, "learning_rate": 1.539788048076159e-06, "loss": 20.9176, "step": 384480 }, { "epoch": 0.7766941260600282, "grad_norm": 231.90760803222656, "learning_rate": 1.539536080929121e-06, "loss": 46.6711, "step": 384490 }, { "epoch": 0.776714326692712, "grad_norm": 212.0497589111328, "learning_rate": 1.5392841306479667e-06, "loss": 5.893, "step": 384500 }, { "epoch": 0.7767345273253958, "grad_norm": 19.12787437438965, "learning_rate": 1.539032197233921e-06, "loss": 10.536, "step": 384510 }, { "epoch": 0.7767547279580796, "grad_norm": 180.9272918701172, "learning_rate": 1.5387802806882118e-06, "loss": 14.9591, "step": 384520 }, { "epoch": 0.7767749285907635, "grad_norm": 105.29832458496094, "learning_rate": 1.5385283810120688e-06, "loss": 25.9019, "step": 384530 }, { "epoch": 0.7767951292234473, "grad_norm": 491.0495300292969, "learning_rate": 1.5382764982067172e-06, "loss": 21.9422, "step": 384540 }, { "epoch": 0.7768153298561311, "grad_norm": 429.4051208496094, "learning_rate": 1.5380246322733883e-06, "loss": 41.6235, "step": 384550 }, { "epoch": 0.7768355304888149, "grad_norm": 43.668731689453125, "learning_rate": 1.5377727832133049e-06, "loss": 15.2682, "step": 384560 }, { "epoch": 0.7768557311214987, "grad_norm": 319.5467224121094, "learning_rate": 1.537520951027699e-06, "loss": 17.0557, "step": 384570 }, { "epoch": 0.7768759317541826, "grad_norm": 80.5936279296875, "learning_rate": 1.537269135717796e-06, "loss": 15.3367, "step": 384580 }, { "epoch": 0.7768961323868664, "grad_norm": 159.59219360351562, "learning_rate": 1.5370173372848218e-06, "loss": 8.4518, "step": 384590 }, { "epoch": 0.7769163330195502, "grad_norm": 180.53353881835938, "learning_rate": 1.5367655557300066e-06, "loss": 10.1666, "step": 384600 }, { "epoch": 0.776936533652234, "grad_norm": 569.2769775390625, "learning_rate": 1.5365137910545747e-06, "loss": 21.5753, "step": 384610 }, { "epoch": 0.7769567342849178, "grad_norm": 44.91267395019531, "learning_rate": 1.5362620432597559e-06, "loss": 8.6273, "step": 384620 }, { "epoch": 0.7769769349176017, "grad_norm": 303.3965148925781, "learning_rate": 1.5360103123467756e-06, "loss": 30.3285, "step": 384630 }, { "epoch": 0.7769971355502855, "grad_norm": 388.14434814453125, "learning_rate": 1.5357585983168593e-06, "loss": 31.2973, "step": 384640 }, { "epoch": 0.7770173361829692, "grad_norm": 375.4293518066406, "learning_rate": 1.5355069011712376e-06, "loss": 18.2922, "step": 384650 }, { "epoch": 0.777037536815653, "grad_norm": 305.43841552734375, "learning_rate": 1.5352552209111344e-06, "loss": 13.7377, "step": 384660 }, { "epoch": 0.7770577374483368, "grad_norm": 164.4486083984375, "learning_rate": 1.535003557537776e-06, "loss": 14.5133, "step": 384670 }, { "epoch": 0.7770779380810207, "grad_norm": 2.0482821464538574, "learning_rate": 1.5347519110523895e-06, "loss": 19.314, "step": 384680 }, { "epoch": 0.7770981387137045, "grad_norm": 299.0445861816406, "learning_rate": 1.5345002814562055e-06, "loss": 44.4664, "step": 384690 }, { "epoch": 0.7771183393463883, "grad_norm": 141.56808471679688, "learning_rate": 1.5342486687504432e-06, "loss": 11.9532, "step": 384700 }, { "epoch": 0.7771385399790721, "grad_norm": 5.604905128479004, "learning_rate": 1.533997072936333e-06, "loss": 13.7544, "step": 384710 }, { "epoch": 0.7771587406117559, "grad_norm": 241.60031127929688, "learning_rate": 1.533745494015102e-06, "loss": 11.7577, "step": 384720 }, { "epoch": 0.7771789412444398, "grad_norm": 137.21929931640625, "learning_rate": 1.533493931987975e-06, "loss": 18.7908, "step": 384730 }, { "epoch": 0.7771991418771236, "grad_norm": 254.34652709960938, "learning_rate": 1.5332423868561769e-06, "loss": 26.6564, "step": 384740 }, { "epoch": 0.7772193425098074, "grad_norm": 0.0, "learning_rate": 1.5329908586209347e-06, "loss": 7.0881, "step": 384750 }, { "epoch": 0.7772395431424912, "grad_norm": 357.8957824707031, "learning_rate": 1.5327393472834772e-06, "loss": 19.4995, "step": 384760 }, { "epoch": 0.777259743775175, "grad_norm": 154.64395141601562, "learning_rate": 1.5324878528450254e-06, "loss": 18.1654, "step": 384770 }, { "epoch": 0.7772799444078589, "grad_norm": 144.20655822753906, "learning_rate": 1.5322363753068064e-06, "loss": 10.8828, "step": 384780 }, { "epoch": 0.7773001450405427, "grad_norm": 51.50415802001953, "learning_rate": 1.5319849146700488e-06, "loss": 12.5334, "step": 384790 }, { "epoch": 0.7773203456732265, "grad_norm": 296.0111083984375, "learning_rate": 1.531733470935976e-06, "loss": 11.7463, "step": 384800 }, { "epoch": 0.7773405463059103, "grad_norm": 428.5551452636719, "learning_rate": 1.531482044105812e-06, "loss": 17.4505, "step": 384810 }, { "epoch": 0.7773607469385941, "grad_norm": 255.1742706298828, "learning_rate": 1.5312306341807858e-06, "loss": 26.9732, "step": 384820 }, { "epoch": 0.777380947571278, "grad_norm": 255.47784423828125, "learning_rate": 1.5309792411621204e-06, "loss": 24.9055, "step": 384830 }, { "epoch": 0.7774011482039618, "grad_norm": 356.9274597167969, "learning_rate": 1.5307278650510399e-06, "loss": 17.7897, "step": 384840 }, { "epoch": 0.7774213488366456, "grad_norm": 361.5262756347656, "learning_rate": 1.5304765058487725e-06, "loss": 13.1262, "step": 384850 }, { "epoch": 0.7774415494693294, "grad_norm": 262.4082946777344, "learning_rate": 1.5302251635565401e-06, "loss": 24.8385, "step": 384860 }, { "epoch": 0.7774617501020132, "grad_norm": 113.4153823852539, "learning_rate": 1.5299738381755712e-06, "loss": 13.6484, "step": 384870 }, { "epoch": 0.7774819507346971, "grad_norm": 122.51457214355469, "learning_rate": 1.5297225297070879e-06, "loss": 16.6544, "step": 384880 }, { "epoch": 0.7775021513673809, "grad_norm": 272.8089294433594, "learning_rate": 1.5294712381523168e-06, "loss": 15.6394, "step": 384890 }, { "epoch": 0.7775223520000646, "grad_norm": 477.9259033203125, "learning_rate": 1.529219963512481e-06, "loss": 13.2523, "step": 384900 }, { "epoch": 0.7775425526327484, "grad_norm": 282.08026123046875, "learning_rate": 1.5289687057888075e-06, "loss": 15.8721, "step": 384910 }, { "epoch": 0.7775627532654322, "grad_norm": 423.3241882324219, "learning_rate": 1.5287174649825194e-06, "loss": 22.1561, "step": 384920 }, { "epoch": 0.777582953898116, "grad_norm": 226.7703399658203, "learning_rate": 1.5284662410948398e-06, "loss": 13.2108, "step": 384930 }, { "epoch": 0.7776031545307999, "grad_norm": 207.5243682861328, "learning_rate": 1.5282150341269964e-06, "loss": 16.425, "step": 384940 }, { "epoch": 0.7776233551634837, "grad_norm": 349.1506652832031, "learning_rate": 1.5279638440802118e-06, "loss": 11.14, "step": 384950 }, { "epoch": 0.7776435557961675, "grad_norm": 459.5455322265625, "learning_rate": 1.5277126709557088e-06, "loss": 13.6403, "step": 384960 }, { "epoch": 0.7776637564288513, "grad_norm": 353.90460205078125, "learning_rate": 1.5274615147547128e-06, "loss": 19.2815, "step": 384970 }, { "epoch": 0.7776839570615351, "grad_norm": 66.26374816894531, "learning_rate": 1.5272103754784517e-06, "loss": 31.0412, "step": 384980 }, { "epoch": 0.777704157694219, "grad_norm": 337.0487365722656, "learning_rate": 1.526959253128143e-06, "loss": 14.5851, "step": 384990 }, { "epoch": 0.7777243583269028, "grad_norm": 340.6404113769531, "learning_rate": 1.5267081477050132e-06, "loss": 12.055, "step": 385000 }, { "epoch": 0.7777445589595866, "grad_norm": 203.42178344726562, "learning_rate": 1.5264570592102883e-06, "loss": 20.4627, "step": 385010 }, { "epoch": 0.7777647595922704, "grad_norm": 271.7607727050781, "learning_rate": 1.5262059876451906e-06, "loss": 19.6084, "step": 385020 }, { "epoch": 0.7777849602249542, "grad_norm": 325.53314208984375, "learning_rate": 1.5259549330109424e-06, "loss": 12.1886, "step": 385030 }, { "epoch": 0.7778051608576381, "grad_norm": 216.19627380371094, "learning_rate": 1.5257038953087678e-06, "loss": 15.123, "step": 385040 }, { "epoch": 0.7778253614903219, "grad_norm": 202.7661590576172, "learning_rate": 1.5254528745398943e-06, "loss": 18.2803, "step": 385050 }, { "epoch": 0.7778455621230057, "grad_norm": 390.0231018066406, "learning_rate": 1.5252018707055393e-06, "loss": 22.7321, "step": 385060 }, { "epoch": 0.7778657627556895, "grad_norm": 410.1373291015625, "learning_rate": 1.5249508838069287e-06, "loss": 10.8382, "step": 385070 }, { "epoch": 0.7778859633883733, "grad_norm": 154.49732971191406, "learning_rate": 1.5246999138452878e-06, "loss": 16.496, "step": 385080 }, { "epoch": 0.7779061640210572, "grad_norm": 339.65838623046875, "learning_rate": 1.5244489608218376e-06, "loss": 16.355, "step": 385090 }, { "epoch": 0.777926364653741, "grad_norm": 523.0009765625, "learning_rate": 1.5241980247378008e-06, "loss": 21.3962, "step": 385100 }, { "epoch": 0.7779465652864248, "grad_norm": 345.1305236816406, "learning_rate": 1.5239471055944022e-06, "loss": 14.3179, "step": 385110 }, { "epoch": 0.7779667659191086, "grad_norm": 243.1503143310547, "learning_rate": 1.5236962033928636e-06, "loss": 20.6552, "step": 385120 }, { "epoch": 0.7779869665517924, "grad_norm": 485.7555847167969, "learning_rate": 1.5234453181344071e-06, "loss": 23.4175, "step": 385130 }, { "epoch": 0.7780071671844763, "grad_norm": 229.3474884033203, "learning_rate": 1.5231944498202578e-06, "loss": 20.1737, "step": 385140 }, { "epoch": 0.7780273678171601, "grad_norm": 216.33518981933594, "learning_rate": 1.5229435984516355e-06, "loss": 25.7524, "step": 385150 }, { "epoch": 0.7780475684498438, "grad_norm": 356.80413818359375, "learning_rate": 1.5226927640297663e-06, "loss": 37.9736, "step": 385160 }, { "epoch": 0.7780677690825276, "grad_norm": 743.1900024414062, "learning_rate": 1.5224419465558687e-06, "loss": 16.6319, "step": 385170 }, { "epoch": 0.7780879697152114, "grad_norm": 319.8369140625, "learning_rate": 1.522191146031169e-06, "loss": 23.0396, "step": 385180 }, { "epoch": 0.7781081703478953, "grad_norm": 476.9859924316406, "learning_rate": 1.521940362456888e-06, "loss": 21.1673, "step": 385190 }, { "epoch": 0.7781283709805791, "grad_norm": 8.452079772949219, "learning_rate": 1.521689595834246e-06, "loss": 25.1977, "step": 385200 }, { "epoch": 0.7781485716132629, "grad_norm": 142.00799560546875, "learning_rate": 1.521438846164469e-06, "loss": 16.7236, "step": 385210 }, { "epoch": 0.7781687722459467, "grad_norm": 480.31072998046875, "learning_rate": 1.5211881134487755e-06, "loss": 21.273, "step": 385220 }, { "epoch": 0.7781889728786305, "grad_norm": 155.32162475585938, "learning_rate": 1.5209373976883906e-06, "loss": 30.2661, "step": 385230 }, { "epoch": 0.7782091735113144, "grad_norm": 235.1208038330078, "learning_rate": 1.5206866988845348e-06, "loss": 15.4628, "step": 385240 }, { "epoch": 0.7782293741439982, "grad_norm": 278.7958984375, "learning_rate": 1.5204360170384286e-06, "loss": 12.9618, "step": 385250 }, { "epoch": 0.778249574776682, "grad_norm": 1554.95263671875, "learning_rate": 1.5201853521512967e-06, "loss": 50.7689, "step": 385260 }, { "epoch": 0.7782697754093658, "grad_norm": 230.40733337402344, "learning_rate": 1.5199347042243595e-06, "loss": 17.3294, "step": 385270 }, { "epoch": 0.7782899760420496, "grad_norm": 236.63577270507812, "learning_rate": 1.519684073258837e-06, "loss": 23.2617, "step": 385280 }, { "epoch": 0.7783101766747335, "grad_norm": 478.6683349609375, "learning_rate": 1.5194334592559517e-06, "loss": 21.1271, "step": 385290 }, { "epoch": 0.7783303773074173, "grad_norm": 0.0, "learning_rate": 1.519182862216929e-06, "loss": 12.1399, "step": 385300 }, { "epoch": 0.7783505779401011, "grad_norm": 165.4044647216797, "learning_rate": 1.5189322821429842e-06, "loss": 9.9615, "step": 385310 }, { "epoch": 0.7783707785727849, "grad_norm": 222.9793701171875, "learning_rate": 1.5186817190353404e-06, "loss": 18.9139, "step": 385320 }, { "epoch": 0.7783909792054687, "grad_norm": 289.9752197265625, "learning_rate": 1.5184311728952216e-06, "loss": 18.4147, "step": 385330 }, { "epoch": 0.7784111798381526, "grad_norm": 372.9223937988281, "learning_rate": 1.5181806437238472e-06, "loss": 12.7692, "step": 385340 }, { "epoch": 0.7784313804708364, "grad_norm": 396.4694519042969, "learning_rate": 1.5179301315224364e-06, "loss": 26.5706, "step": 385350 }, { "epoch": 0.7784515811035202, "grad_norm": 227.4086151123047, "learning_rate": 1.5176796362922119e-06, "loss": 12.4351, "step": 385360 }, { "epoch": 0.778471781736204, "grad_norm": 26.361953735351562, "learning_rate": 1.5174291580343976e-06, "loss": 16.6074, "step": 385370 }, { "epoch": 0.7784919823688878, "grad_norm": 190.70008850097656, "learning_rate": 1.5171786967502078e-06, "loss": 26.1394, "step": 385380 }, { "epoch": 0.7785121830015717, "grad_norm": 320.2771911621094, "learning_rate": 1.516928252440867e-06, "loss": 34.334, "step": 385390 }, { "epoch": 0.7785323836342555, "grad_norm": 473.52166748046875, "learning_rate": 1.5166778251075964e-06, "loss": 18.0951, "step": 385400 }, { "epoch": 0.7785525842669393, "grad_norm": 180.4763641357422, "learning_rate": 1.516427414751616e-06, "loss": 14.0817, "step": 385410 }, { "epoch": 0.778572784899623, "grad_norm": 201.51504516601562, "learning_rate": 1.5161770213741444e-06, "loss": 15.5677, "step": 385420 }, { "epoch": 0.7785929855323068, "grad_norm": 222.45774841308594, "learning_rate": 1.5159266449764048e-06, "loss": 15.3802, "step": 385430 }, { "epoch": 0.7786131861649906, "grad_norm": 183.7897491455078, "learning_rate": 1.5156762855596162e-06, "loss": 11.2139, "step": 385440 }, { "epoch": 0.7786333867976745, "grad_norm": 368.4095764160156, "learning_rate": 1.5154259431249978e-06, "loss": 16.636, "step": 385450 }, { "epoch": 0.7786535874303583, "grad_norm": 163.91477966308594, "learning_rate": 1.5151756176737703e-06, "loss": 14.2475, "step": 385460 }, { "epoch": 0.7786737880630421, "grad_norm": 103.26740264892578, "learning_rate": 1.5149253092071554e-06, "loss": 13.2101, "step": 385470 }, { "epoch": 0.7786939886957259, "grad_norm": 437.7824401855469, "learning_rate": 1.5146750177263725e-06, "loss": 22.8571, "step": 385480 }, { "epoch": 0.7787141893284097, "grad_norm": 620.0509033203125, "learning_rate": 1.5144247432326386e-06, "loss": 22.5877, "step": 385490 }, { "epoch": 0.7787343899610936, "grad_norm": 300.136962890625, "learning_rate": 1.514174485727178e-06, "loss": 15.5891, "step": 385500 }, { "epoch": 0.7787545905937774, "grad_norm": 130.44216918945312, "learning_rate": 1.5139242452112074e-06, "loss": 23.9937, "step": 385510 }, { "epoch": 0.7787747912264612, "grad_norm": 0.36940526962280273, "learning_rate": 1.5136740216859464e-06, "loss": 12.5285, "step": 385520 }, { "epoch": 0.778794991859145, "grad_norm": 429.982421875, "learning_rate": 1.5134238151526166e-06, "loss": 22.1609, "step": 385530 }, { "epoch": 0.7788151924918288, "grad_norm": 205.65570068359375, "learning_rate": 1.5131736256124346e-06, "loss": 8.2691, "step": 385540 }, { "epoch": 0.7788353931245127, "grad_norm": 7.637307167053223, "learning_rate": 1.5129234530666232e-06, "loss": 19.4682, "step": 385550 }, { "epoch": 0.7788555937571965, "grad_norm": 266.55859375, "learning_rate": 1.5126732975164e-06, "loss": 12.4985, "step": 385560 }, { "epoch": 0.7788757943898803, "grad_norm": 99.7104721069336, "learning_rate": 1.5124231589629823e-06, "loss": 6.6364, "step": 385570 }, { "epoch": 0.7788959950225641, "grad_norm": 63.70204544067383, "learning_rate": 1.5121730374075916e-06, "loss": 14.8123, "step": 385580 }, { "epoch": 0.778916195655248, "grad_norm": 225.86007690429688, "learning_rate": 1.511922932851449e-06, "loss": 6.5314, "step": 385590 }, { "epoch": 0.7789363962879318, "grad_norm": 302.1482238769531, "learning_rate": 1.5116728452957686e-06, "loss": 12.3871, "step": 385600 }, { "epoch": 0.7789565969206156, "grad_norm": 510.6732177734375, "learning_rate": 1.511422774741771e-06, "loss": 22.007, "step": 385610 }, { "epoch": 0.7789767975532994, "grad_norm": 195.7790069580078, "learning_rate": 1.5111727211906774e-06, "loss": 12.9481, "step": 385620 }, { "epoch": 0.7789969981859832, "grad_norm": 109.26073455810547, "learning_rate": 1.5109226846437054e-06, "loss": 30.0748, "step": 385630 }, { "epoch": 0.779017198818667, "grad_norm": 231.77159118652344, "learning_rate": 1.510672665102071e-06, "loss": 20.4227, "step": 385640 }, { "epoch": 0.7790373994513509, "grad_norm": 225.8578643798828, "learning_rate": 1.5104226625669943e-06, "loss": 9.239, "step": 385650 }, { "epoch": 0.7790576000840347, "grad_norm": 438.7658996582031, "learning_rate": 1.5101726770396986e-06, "loss": 28.8008, "step": 385660 }, { "epoch": 0.7790778007167184, "grad_norm": 57.71368408203125, "learning_rate": 1.509922708521394e-06, "loss": 13.3397, "step": 385670 }, { "epoch": 0.7790980013494022, "grad_norm": 305.3207702636719, "learning_rate": 1.509672757013303e-06, "loss": 23.1299, "step": 385680 }, { "epoch": 0.779118201982086, "grad_norm": 424.953369140625, "learning_rate": 1.509422822516645e-06, "loss": 16.7751, "step": 385690 }, { "epoch": 0.7791384026147699, "grad_norm": 220.29344177246094, "learning_rate": 1.5091729050326376e-06, "loss": 14.7274, "step": 385700 }, { "epoch": 0.7791586032474537, "grad_norm": 200.9113006591797, "learning_rate": 1.5089230045624958e-06, "loss": 28.4599, "step": 385710 }, { "epoch": 0.7791788038801375, "grad_norm": 248.34886169433594, "learning_rate": 1.5086731211074418e-06, "loss": 13.204, "step": 385720 }, { "epoch": 0.7791990045128213, "grad_norm": 324.5210266113281, "learning_rate": 1.5084232546686911e-06, "loss": 14.9036, "step": 385730 }, { "epoch": 0.7792192051455051, "grad_norm": 373.56170654296875, "learning_rate": 1.508173405247461e-06, "loss": 25.9731, "step": 385740 }, { "epoch": 0.779239405778189, "grad_norm": 364.13287353515625, "learning_rate": 1.5079235728449714e-06, "loss": 15.8016, "step": 385750 }, { "epoch": 0.7792596064108728, "grad_norm": 152.9652862548828, "learning_rate": 1.5076737574624372e-06, "loss": 14.8728, "step": 385760 }, { "epoch": 0.7792798070435566, "grad_norm": 213.01657104492188, "learning_rate": 1.5074239591010791e-06, "loss": 6.6801, "step": 385770 }, { "epoch": 0.7793000076762404, "grad_norm": 307.4491882324219, "learning_rate": 1.507174177762112e-06, "loss": 16.3304, "step": 385780 }, { "epoch": 0.7793202083089242, "grad_norm": 280.9504089355469, "learning_rate": 1.5069244134467553e-06, "loss": 20.8016, "step": 385790 }, { "epoch": 0.779340408941608, "grad_norm": 750.549560546875, "learning_rate": 1.5066746661562254e-06, "loss": 21.813, "step": 385800 }, { "epoch": 0.7793606095742919, "grad_norm": 330.99072265625, "learning_rate": 1.5064249358917383e-06, "loss": 21.8062, "step": 385810 }, { "epoch": 0.7793808102069757, "grad_norm": 251.02325439453125, "learning_rate": 1.5061752226545134e-06, "loss": 8.5193, "step": 385820 }, { "epoch": 0.7794010108396595, "grad_norm": 0.0, "learning_rate": 1.5059255264457656e-06, "loss": 16.9836, "step": 385830 }, { "epoch": 0.7794212114723433, "grad_norm": 448.4695129394531, "learning_rate": 1.5056758472667144e-06, "loss": 18.1629, "step": 385840 }, { "epoch": 0.7794414121050272, "grad_norm": 438.26324462890625, "learning_rate": 1.5054261851185753e-06, "loss": 16.0447, "step": 385850 }, { "epoch": 0.779461612737711, "grad_norm": 132.9532012939453, "learning_rate": 1.5051765400025636e-06, "loss": 13.763, "step": 385860 }, { "epoch": 0.7794818133703948, "grad_norm": 5229.51318359375, "learning_rate": 1.5049269119198988e-06, "loss": 23.4192, "step": 385870 }, { "epoch": 0.7795020140030786, "grad_norm": 176.83570861816406, "learning_rate": 1.5046773008717968e-06, "loss": 22.7246, "step": 385880 }, { "epoch": 0.7795222146357624, "grad_norm": 656.0529174804688, "learning_rate": 1.5044277068594721e-06, "loss": 19.6964, "step": 385890 }, { "epoch": 0.7795424152684463, "grad_norm": 0.0, "learning_rate": 1.5041781298841424e-06, "loss": 22.2232, "step": 385900 }, { "epoch": 0.7795626159011301, "grad_norm": 71.97404479980469, "learning_rate": 1.503928569947028e-06, "loss": 22.6833, "step": 385910 }, { "epoch": 0.7795828165338139, "grad_norm": 265.4490966796875, "learning_rate": 1.5036790270493383e-06, "loss": 16.4374, "step": 385920 }, { "epoch": 0.7796030171664976, "grad_norm": 362.5272521972656, "learning_rate": 1.5034295011922933e-06, "loss": 14.235, "step": 385930 }, { "epoch": 0.7796232177991814, "grad_norm": 26.482257843017578, "learning_rate": 1.5031799923771102e-06, "loss": 22.0316, "step": 385940 }, { "epoch": 0.7796434184318652, "grad_norm": 238.99551391601562, "learning_rate": 1.5029305006050038e-06, "loss": 13.9746, "step": 385950 }, { "epoch": 0.7796636190645491, "grad_norm": 20.579370498657227, "learning_rate": 1.5026810258771885e-06, "loss": 13.2542, "step": 385960 }, { "epoch": 0.7796838196972329, "grad_norm": 543.6018676757812, "learning_rate": 1.5024315681948815e-06, "loss": 13.723, "step": 385970 }, { "epoch": 0.7797040203299167, "grad_norm": 972.4806518554688, "learning_rate": 1.5021821275593018e-06, "loss": 31.6778, "step": 385980 }, { "epoch": 0.7797242209626005, "grad_norm": 522.6668090820312, "learning_rate": 1.5019327039716598e-06, "loss": 21.3935, "step": 385990 }, { "epoch": 0.7797444215952843, "grad_norm": 127.69088745117188, "learning_rate": 1.5016832974331725e-06, "loss": 8.2515, "step": 386000 }, { "epoch": 0.7797646222279682, "grad_norm": 298.71966552734375, "learning_rate": 1.5014339079450586e-06, "loss": 18.1674, "step": 386010 }, { "epoch": 0.779784822860652, "grad_norm": 140.33242797851562, "learning_rate": 1.501184535508532e-06, "loss": 13.2116, "step": 386020 }, { "epoch": 0.7798050234933358, "grad_norm": 129.23724365234375, "learning_rate": 1.500935180124805e-06, "loss": 11.8531, "step": 386030 }, { "epoch": 0.7798252241260196, "grad_norm": 445.4544677734375, "learning_rate": 1.500685841795098e-06, "loss": 23.3271, "step": 386040 }, { "epoch": 0.7798454247587034, "grad_norm": 269.25982666015625, "learning_rate": 1.5004365205206235e-06, "loss": 26.3859, "step": 386050 }, { "epoch": 0.7798656253913873, "grad_norm": 236.81675720214844, "learning_rate": 1.5001872163025954e-06, "loss": 18.9519, "step": 386060 }, { "epoch": 0.7798858260240711, "grad_norm": 12.307541847229004, "learning_rate": 1.49993792914223e-06, "loss": 20.7199, "step": 386070 }, { "epoch": 0.7799060266567549, "grad_norm": 284.9108581542969, "learning_rate": 1.4996886590407445e-06, "loss": 13.6634, "step": 386080 }, { "epoch": 0.7799262272894387, "grad_norm": 211.33920288085938, "learning_rate": 1.4994394059993522e-06, "loss": 11.5602, "step": 386090 }, { "epoch": 0.7799464279221225, "grad_norm": 234.34495544433594, "learning_rate": 1.4991901700192657e-06, "loss": 23.1045, "step": 386100 }, { "epoch": 0.7799666285548064, "grad_norm": 218.60520935058594, "learning_rate": 1.4989409511017034e-06, "loss": 14.7636, "step": 386110 }, { "epoch": 0.7799868291874902, "grad_norm": 139.44931030273438, "learning_rate": 1.498691749247878e-06, "loss": 13.5471, "step": 386120 }, { "epoch": 0.780007029820174, "grad_norm": 174.9041290283203, "learning_rate": 1.4984425644590033e-06, "loss": 20.533, "step": 386130 }, { "epoch": 0.7800272304528578, "grad_norm": 390.3197021484375, "learning_rate": 1.498193396736296e-06, "loss": 18.3498, "step": 386140 }, { "epoch": 0.7800474310855416, "grad_norm": 190.32969665527344, "learning_rate": 1.4979442460809684e-06, "loss": 7.1266, "step": 386150 }, { "epoch": 0.7800676317182255, "grad_norm": 237.4027862548828, "learning_rate": 1.4976951124942369e-06, "loss": 15.7396, "step": 386160 }, { "epoch": 0.7800878323509093, "grad_norm": 248.53060913085938, "learning_rate": 1.4974459959773146e-06, "loss": 11.9861, "step": 386170 }, { "epoch": 0.780108032983593, "grad_norm": 86.4929428100586, "learning_rate": 1.4971968965314143e-06, "loss": 14.7314, "step": 386180 }, { "epoch": 0.7801282336162768, "grad_norm": 434.0429382324219, "learning_rate": 1.4969478141577531e-06, "loss": 10.6914, "step": 386190 }, { "epoch": 0.7801484342489606, "grad_norm": 249.0204315185547, "learning_rate": 1.496698748857543e-06, "loss": 21.871, "step": 386200 }, { "epoch": 0.7801686348816445, "grad_norm": 466.05023193359375, "learning_rate": 1.4964497006319972e-06, "loss": 14.8065, "step": 386210 }, { "epoch": 0.7801888355143283, "grad_norm": 781.0115966796875, "learning_rate": 1.4962006694823306e-06, "loss": 26.0206, "step": 386220 }, { "epoch": 0.7802090361470121, "grad_norm": 301.3993835449219, "learning_rate": 1.4959516554097581e-06, "loss": 15.5446, "step": 386230 }, { "epoch": 0.7802292367796959, "grad_norm": 171.87547302246094, "learning_rate": 1.4957026584154926e-06, "loss": 15.2769, "step": 386240 }, { "epoch": 0.7802494374123797, "grad_norm": 586.065185546875, "learning_rate": 1.4954536785007456e-06, "loss": 23.7377, "step": 386250 }, { "epoch": 0.7802696380450636, "grad_norm": 400.77752685546875, "learning_rate": 1.4952047156667326e-06, "loss": 22.4909, "step": 386260 }, { "epoch": 0.7802898386777474, "grad_norm": 439.84356689453125, "learning_rate": 1.4949557699146694e-06, "loss": 21.6808, "step": 386270 }, { "epoch": 0.7803100393104312, "grad_norm": 418.0362548828125, "learning_rate": 1.4947068412457639e-06, "loss": 22.5902, "step": 386280 }, { "epoch": 0.780330239943115, "grad_norm": 462.4202575683594, "learning_rate": 1.4944579296612323e-06, "loss": 20.0496, "step": 386290 }, { "epoch": 0.7803504405757988, "grad_norm": 143.228271484375, "learning_rate": 1.4942090351622884e-06, "loss": 17.2076, "step": 386300 }, { "epoch": 0.7803706412084827, "grad_norm": 392.22979736328125, "learning_rate": 1.493960157750145e-06, "loss": 17.4356, "step": 386310 }, { "epoch": 0.7803908418411665, "grad_norm": 190.47076416015625, "learning_rate": 1.493711297426013e-06, "loss": 16.9277, "step": 386320 }, { "epoch": 0.7804110424738503, "grad_norm": 428.4692077636719, "learning_rate": 1.4934624541911086e-06, "loss": 13.4695, "step": 386330 }, { "epoch": 0.7804312431065341, "grad_norm": 286.432861328125, "learning_rate": 1.4932136280466426e-06, "loss": 21.9964, "step": 386340 }, { "epoch": 0.7804514437392179, "grad_norm": 176.18360900878906, "learning_rate": 1.492964818993826e-06, "loss": 18.0602, "step": 386350 }, { "epoch": 0.7804716443719018, "grad_norm": 2.3707468509674072, "learning_rate": 1.492716027033876e-06, "loss": 18.6188, "step": 386360 }, { "epoch": 0.7804918450045856, "grad_norm": 305.6631774902344, "learning_rate": 1.4924672521680006e-06, "loss": 15.3792, "step": 386370 }, { "epoch": 0.7805120456372694, "grad_norm": 354.7536315917969, "learning_rate": 1.4922184943974167e-06, "loss": 19.0363, "step": 386380 }, { "epoch": 0.7805322462699532, "grad_norm": 791.7786254882812, "learning_rate": 1.4919697537233318e-06, "loss": 26.5833, "step": 386390 }, { "epoch": 0.780552446902637, "grad_norm": 309.8801574707031, "learning_rate": 1.491721030146963e-06, "loss": 17.7362, "step": 386400 }, { "epoch": 0.7805726475353209, "grad_norm": 133.623046875, "learning_rate": 1.4914723236695206e-06, "loss": 11.4533, "step": 386410 }, { "epoch": 0.7805928481680047, "grad_norm": 148.83004760742188, "learning_rate": 1.4912236342922143e-06, "loss": 21.6949, "step": 386420 }, { "epoch": 0.7806130488006885, "grad_norm": 374.0242004394531, "learning_rate": 1.4909749620162605e-06, "loss": 22.2138, "step": 386430 }, { "epoch": 0.7806332494333722, "grad_norm": 197.76132202148438, "learning_rate": 1.4907263068428673e-06, "loss": 14.7626, "step": 386440 }, { "epoch": 0.780653450066056, "grad_norm": 194.49395751953125, "learning_rate": 1.4904776687732503e-06, "loss": 5.8577, "step": 386450 }, { "epoch": 0.7806736506987398, "grad_norm": 323.6297302246094, "learning_rate": 1.4902290478086195e-06, "loss": 17.6088, "step": 386460 }, { "epoch": 0.7806938513314237, "grad_norm": 23.722332000732422, "learning_rate": 1.4899804439501853e-06, "loss": 35.0169, "step": 386470 }, { "epoch": 0.7807140519641075, "grad_norm": 274.83685302734375, "learning_rate": 1.4897318571991615e-06, "loss": 22.2155, "step": 386480 }, { "epoch": 0.7807342525967913, "grad_norm": 173.51768493652344, "learning_rate": 1.4894832875567593e-06, "loss": 11.5541, "step": 386490 }, { "epoch": 0.7807544532294751, "grad_norm": 305.93829345703125, "learning_rate": 1.489234735024188e-06, "loss": 11.7917, "step": 386500 }, { "epoch": 0.7807746538621589, "grad_norm": 197.58517456054688, "learning_rate": 1.4889861996026617e-06, "loss": 20.4221, "step": 386510 }, { "epoch": 0.7807948544948428, "grad_norm": 516.6758422851562, "learning_rate": 1.4887376812933913e-06, "loss": 17.7547, "step": 386520 }, { "epoch": 0.7808150551275266, "grad_norm": 18.043903350830078, "learning_rate": 1.488489180097588e-06, "loss": 32.2416, "step": 386530 }, { "epoch": 0.7808352557602104, "grad_norm": 367.4571533203125, "learning_rate": 1.4882406960164615e-06, "loss": 11.2764, "step": 386540 }, { "epoch": 0.7808554563928942, "grad_norm": 409.2172546386719, "learning_rate": 1.4879922290512244e-06, "loss": 21.1518, "step": 386550 }, { "epoch": 0.780875657025578, "grad_norm": 357.2014465332031, "learning_rate": 1.487743779203088e-06, "loss": 14.6595, "step": 386560 }, { "epoch": 0.7808958576582619, "grad_norm": 71.89716339111328, "learning_rate": 1.4874953464732606e-06, "loss": 19.2014, "step": 386570 }, { "epoch": 0.7809160582909457, "grad_norm": 459.6585998535156, "learning_rate": 1.487246930862955e-06, "loss": 13.5465, "step": 386580 }, { "epoch": 0.7809362589236295, "grad_norm": 190.4988250732422, "learning_rate": 1.486998532373385e-06, "loss": 14.6607, "step": 386590 }, { "epoch": 0.7809564595563133, "grad_norm": 77.13426971435547, "learning_rate": 1.4867501510057548e-06, "loss": 16.563, "step": 386600 }, { "epoch": 0.7809766601889971, "grad_norm": 355.4620666503906, "learning_rate": 1.486501786761278e-06, "loss": 20.7791, "step": 386610 }, { "epoch": 0.780996860821681, "grad_norm": 187.28884887695312, "learning_rate": 1.4862534396411671e-06, "loss": 13.632, "step": 386620 }, { "epoch": 0.7810170614543648, "grad_norm": 407.7522277832031, "learning_rate": 1.486005109646631e-06, "loss": 16.2679, "step": 386630 }, { "epoch": 0.7810372620870486, "grad_norm": 285.285888671875, "learning_rate": 1.4857567967788784e-06, "loss": 18.6135, "step": 386640 }, { "epoch": 0.7810574627197324, "grad_norm": 317.40667724609375, "learning_rate": 1.4855085010391217e-06, "loss": 20.9131, "step": 386650 }, { "epoch": 0.7810776633524162, "grad_norm": 543.3297729492188, "learning_rate": 1.485260222428571e-06, "loss": 16.215, "step": 386660 }, { "epoch": 0.7810978639851001, "grad_norm": 335.4390563964844, "learning_rate": 1.4850119609484342e-06, "loss": 22.3467, "step": 386670 }, { "epoch": 0.7811180646177839, "grad_norm": 285.9117736816406, "learning_rate": 1.4847637165999224e-06, "loss": 18.0567, "step": 386680 }, { "epoch": 0.7811382652504676, "grad_norm": 155.2165985107422, "learning_rate": 1.4845154893842473e-06, "loss": 18.315, "step": 386690 }, { "epoch": 0.7811584658831514, "grad_norm": 550.5028686523438, "learning_rate": 1.484267279302618e-06, "loss": 17.586, "step": 386700 }, { "epoch": 0.7811786665158352, "grad_norm": 136.49476623535156, "learning_rate": 1.4840190863562414e-06, "loss": 15.9218, "step": 386710 }, { "epoch": 0.781198867148519, "grad_norm": 585.5150146484375, "learning_rate": 1.483770910546331e-06, "loss": 18.1064, "step": 386720 }, { "epoch": 0.7812190677812029, "grad_norm": 62.63594055175781, "learning_rate": 1.4835227518740951e-06, "loss": 11.1416, "step": 386730 }, { "epoch": 0.7812392684138867, "grad_norm": 202.699462890625, "learning_rate": 1.4832746103407409e-06, "loss": 13.4616, "step": 386740 }, { "epoch": 0.7812594690465705, "grad_norm": 159.60455322265625, "learning_rate": 1.4830264859474814e-06, "loss": 15.767, "step": 386750 }, { "epoch": 0.7812796696792543, "grad_norm": 755.0714111328125, "learning_rate": 1.4827783786955224e-06, "loss": 19.7584, "step": 386760 }, { "epoch": 0.7812998703119381, "grad_norm": 74.487060546875, "learning_rate": 1.482530288586077e-06, "loss": 17.5709, "step": 386770 }, { "epoch": 0.781320070944622, "grad_norm": 15.490070343017578, "learning_rate": 1.482282215620352e-06, "loss": 14.2064, "step": 386780 }, { "epoch": 0.7813402715773058, "grad_norm": 9.746573448181152, "learning_rate": 1.4820341597995558e-06, "loss": 15.2501, "step": 386790 }, { "epoch": 0.7813604722099896, "grad_norm": 48.64480972290039, "learning_rate": 1.4817861211248996e-06, "loss": 22.937, "step": 386800 }, { "epoch": 0.7813806728426734, "grad_norm": 427.409423828125, "learning_rate": 1.4815380995975908e-06, "loss": 15.9917, "step": 386810 }, { "epoch": 0.7814008734753572, "grad_norm": 407.4538269042969, "learning_rate": 1.4812900952188374e-06, "loss": 15.2806, "step": 386820 }, { "epoch": 0.7814210741080411, "grad_norm": 668.0812377929688, "learning_rate": 1.4810421079898495e-06, "loss": 27.9007, "step": 386830 }, { "epoch": 0.7814412747407249, "grad_norm": 506.49456787109375, "learning_rate": 1.4807941379118368e-06, "loss": 16.4458, "step": 386840 }, { "epoch": 0.7814614753734087, "grad_norm": 267.5150146484375, "learning_rate": 1.480546184986007e-06, "loss": 15.2417, "step": 386850 }, { "epoch": 0.7814816760060925, "grad_norm": 149.66664123535156, "learning_rate": 1.4802982492135664e-06, "loss": 13.0584, "step": 386860 }, { "epoch": 0.7815018766387763, "grad_norm": 278.3874206542969, "learning_rate": 1.4800503305957264e-06, "loss": 12.3267, "step": 386870 }, { "epoch": 0.7815220772714602, "grad_norm": 202.0239715576172, "learning_rate": 1.4798024291336949e-06, "loss": 23.8061, "step": 386880 }, { "epoch": 0.781542277904144, "grad_norm": 114.47103118896484, "learning_rate": 1.4795545448286774e-06, "loss": 29.4291, "step": 386890 }, { "epoch": 0.7815624785368278, "grad_norm": 249.2436981201172, "learning_rate": 1.4793066776818843e-06, "loss": 13.6475, "step": 386900 }, { "epoch": 0.7815826791695116, "grad_norm": 482.7110290527344, "learning_rate": 1.479058827694525e-06, "loss": 11.6142, "step": 386910 }, { "epoch": 0.7816028798021954, "grad_norm": 225.9482879638672, "learning_rate": 1.4788109948678058e-06, "loss": 16.0768, "step": 386920 }, { "epoch": 0.7816230804348793, "grad_norm": 102.4180679321289, "learning_rate": 1.478563179202933e-06, "loss": 51.8072, "step": 386930 }, { "epoch": 0.7816432810675631, "grad_norm": 229.05992126464844, "learning_rate": 1.4783153807011186e-06, "loss": 22.4106, "step": 386940 }, { "epoch": 0.7816634817002468, "grad_norm": 296.3047790527344, "learning_rate": 1.4780675993635668e-06, "loss": 14.9291, "step": 386950 }, { "epoch": 0.7816836823329306, "grad_norm": 753.77099609375, "learning_rate": 1.4778198351914853e-06, "loss": 12.9999, "step": 386960 }, { "epoch": 0.7817038829656144, "grad_norm": 242.297607421875, "learning_rate": 1.4775720881860845e-06, "loss": 13.5131, "step": 386970 }, { "epoch": 0.7817240835982983, "grad_norm": 411.2208557128906, "learning_rate": 1.4773243583485681e-06, "loss": 22.5391, "step": 386980 }, { "epoch": 0.7817442842309821, "grad_norm": 174.7594451904297, "learning_rate": 1.4770766456801477e-06, "loss": 24.6807, "step": 386990 }, { "epoch": 0.7817644848636659, "grad_norm": 158.1733856201172, "learning_rate": 1.4768289501820265e-06, "loss": 13.9115, "step": 387000 }, { "epoch": 0.7817846854963497, "grad_norm": 106.55404663085938, "learning_rate": 1.476581271855415e-06, "loss": 23.2097, "step": 387010 }, { "epoch": 0.7818048861290335, "grad_norm": 721.3005981445312, "learning_rate": 1.4763336107015192e-06, "loss": 11.1873, "step": 387020 }, { "epoch": 0.7818250867617174, "grad_norm": 30.25313949584961, "learning_rate": 1.4760859667215449e-06, "loss": 22.0044, "step": 387030 }, { "epoch": 0.7818452873944012, "grad_norm": 386.5306396484375, "learning_rate": 1.4758383399167014e-06, "loss": 16.0887, "step": 387040 }, { "epoch": 0.781865488027085, "grad_norm": 203.12020874023438, "learning_rate": 1.4755907302881927e-06, "loss": 10.9014, "step": 387050 }, { "epoch": 0.7818856886597688, "grad_norm": 573.9024047851562, "learning_rate": 1.4753431378372291e-06, "loss": 17.2449, "step": 387060 }, { "epoch": 0.7819058892924526, "grad_norm": 349.8882751464844, "learning_rate": 1.4750955625650153e-06, "loss": 13.5531, "step": 387070 }, { "epoch": 0.7819260899251365, "grad_norm": 3.1438515186309814, "learning_rate": 1.474848004472757e-06, "loss": 13.4607, "step": 387080 }, { "epoch": 0.7819462905578203, "grad_norm": 244.84091186523438, "learning_rate": 1.4746004635616634e-06, "loss": 8.8733, "step": 387090 }, { "epoch": 0.7819664911905041, "grad_norm": 380.5775146484375, "learning_rate": 1.4743529398329393e-06, "loss": 17.1318, "step": 387100 }, { "epoch": 0.7819866918231879, "grad_norm": 168.15480041503906, "learning_rate": 1.4741054332877902e-06, "loss": 20.4051, "step": 387110 }, { "epoch": 0.7820068924558717, "grad_norm": 223.9401092529297, "learning_rate": 1.4738579439274236e-06, "loss": 32.2727, "step": 387120 }, { "epoch": 0.7820270930885556, "grad_norm": 415.8381652832031, "learning_rate": 1.473610471753047e-06, "loss": 17.5676, "step": 387130 }, { "epoch": 0.7820472937212394, "grad_norm": 174.7503204345703, "learning_rate": 1.4733630167658652e-06, "loss": 22.4006, "step": 387140 }, { "epoch": 0.7820674943539232, "grad_norm": 417.538330078125, "learning_rate": 1.473115578967083e-06, "loss": 17.4081, "step": 387150 }, { "epoch": 0.782087694986607, "grad_norm": 542.3140258789062, "learning_rate": 1.4728681583579091e-06, "loss": 15.1301, "step": 387160 }, { "epoch": 0.7821078956192908, "grad_norm": 496.0982360839844, "learning_rate": 1.4726207549395482e-06, "loss": 18.2218, "step": 387170 }, { "epoch": 0.7821280962519747, "grad_norm": 244.68734741210938, "learning_rate": 1.4723733687132041e-06, "loss": 14.6996, "step": 387180 }, { "epoch": 0.7821482968846585, "grad_norm": 350.3805847167969, "learning_rate": 1.4721259996800847e-06, "loss": 17.9912, "step": 387190 }, { "epoch": 0.7821684975173423, "grad_norm": 119.83030700683594, "learning_rate": 1.4718786478413983e-06, "loss": 31.6847, "step": 387200 }, { "epoch": 0.782188698150026, "grad_norm": 53.518898010253906, "learning_rate": 1.471631313198344e-06, "loss": 19.5045, "step": 387210 }, { "epoch": 0.7822088987827098, "grad_norm": 378.4377746582031, "learning_rate": 1.4713839957521315e-06, "loss": 17.7835, "step": 387220 }, { "epoch": 0.7822290994153936, "grad_norm": 3.013847827911377, "learning_rate": 1.4711366955039664e-06, "loss": 14.1446, "step": 387230 }, { "epoch": 0.7822493000480775, "grad_norm": 510.8134765625, "learning_rate": 1.4708894124550527e-06, "loss": 21.0334, "step": 387240 }, { "epoch": 0.7822695006807613, "grad_norm": 354.1209716796875, "learning_rate": 1.4706421466065952e-06, "loss": 13.0044, "step": 387250 }, { "epoch": 0.7822897013134451, "grad_norm": 335.2782287597656, "learning_rate": 1.470394897959801e-06, "loss": 15.5593, "step": 387260 }, { "epoch": 0.7823099019461289, "grad_norm": 366.88165283203125, "learning_rate": 1.4701476665158738e-06, "loss": 25.7206, "step": 387270 }, { "epoch": 0.7823301025788127, "grad_norm": 328.0639343261719, "learning_rate": 1.4699004522760174e-06, "loss": 18.1611, "step": 387280 }, { "epoch": 0.7823503032114966, "grad_norm": 225.9266357421875, "learning_rate": 1.4696532552414383e-06, "loss": 14.4296, "step": 387290 }, { "epoch": 0.7823705038441804, "grad_norm": 208.9155731201172, "learning_rate": 1.469406075413342e-06, "loss": 9.8176, "step": 387300 }, { "epoch": 0.7823907044768642, "grad_norm": 165.5037078857422, "learning_rate": 1.4691589127929328e-06, "loss": 16.5392, "step": 387310 }, { "epoch": 0.782410905109548, "grad_norm": 241.97059631347656, "learning_rate": 1.4689117673814135e-06, "loss": 29.6724, "step": 387320 }, { "epoch": 0.7824311057422318, "grad_norm": 89.24755859375, "learning_rate": 1.4686646391799909e-06, "loss": 14.5834, "step": 387330 }, { "epoch": 0.7824513063749157, "grad_norm": 295.3399658203125, "learning_rate": 1.4684175281898688e-06, "loss": 11.4239, "step": 387340 }, { "epoch": 0.7824715070075995, "grad_norm": 262.4174499511719, "learning_rate": 1.46817043441225e-06, "loss": 16.4959, "step": 387350 }, { "epoch": 0.7824917076402833, "grad_norm": 225.23338317871094, "learning_rate": 1.4679233578483415e-06, "loss": 21.5507, "step": 387360 }, { "epoch": 0.7825119082729671, "grad_norm": 325.22076416015625, "learning_rate": 1.4676762984993443e-06, "loss": 21.0835, "step": 387370 }, { "epoch": 0.782532108905651, "grad_norm": 623.3818359375, "learning_rate": 1.467429256366466e-06, "loss": 17.2592, "step": 387380 }, { "epoch": 0.7825523095383348, "grad_norm": 328.6619873046875, "learning_rate": 1.4671822314509099e-06, "loss": 21.9587, "step": 387390 }, { "epoch": 0.7825725101710186, "grad_norm": 162.79782104492188, "learning_rate": 1.4669352237538763e-06, "loss": 18.9777, "step": 387400 }, { "epoch": 0.7825927108037024, "grad_norm": 256.6744689941406, "learning_rate": 1.4666882332765747e-06, "loss": 21.0847, "step": 387410 }, { "epoch": 0.7826129114363862, "grad_norm": 221.7779083251953, "learning_rate": 1.4664412600202056e-06, "loss": 7.5765, "step": 387420 }, { "epoch": 0.78263311206907, "grad_norm": 194.32374572753906, "learning_rate": 1.4661943039859716e-06, "loss": 9.8516, "step": 387430 }, { "epoch": 0.7826533127017539, "grad_norm": 162.12655639648438, "learning_rate": 1.4659473651750777e-06, "loss": 16.1245, "step": 387440 }, { "epoch": 0.7826735133344377, "grad_norm": 402.704345703125, "learning_rate": 1.4657004435887296e-06, "loss": 16.1749, "step": 387450 }, { "epoch": 0.7826937139671214, "grad_norm": 154.46629333496094, "learning_rate": 1.4654535392281287e-06, "loss": 17.1691, "step": 387460 }, { "epoch": 0.7827139145998052, "grad_norm": 376.6253662109375, "learning_rate": 1.4652066520944774e-06, "loss": 26.9638, "step": 387470 }, { "epoch": 0.782734115232489, "grad_norm": 312.4566955566406, "learning_rate": 1.4649597821889817e-06, "loss": 16.1024, "step": 387480 }, { "epoch": 0.7827543158651729, "grad_norm": 98.08582305908203, "learning_rate": 1.4647129295128426e-06, "loss": 22.0678, "step": 387490 }, { "epoch": 0.7827745164978567, "grad_norm": 403.4292907714844, "learning_rate": 1.4644660940672628e-06, "loss": 22.333, "step": 387500 }, { "epoch": 0.7827947171305405, "grad_norm": 19.739137649536133, "learning_rate": 1.4642192758534463e-06, "loss": 8.1321, "step": 387510 }, { "epoch": 0.7828149177632243, "grad_norm": 160.80552673339844, "learning_rate": 1.463972474872598e-06, "loss": 15.1734, "step": 387520 }, { "epoch": 0.7828351183959081, "grad_norm": 199.50161743164062, "learning_rate": 1.463725691125919e-06, "loss": 29.6852, "step": 387530 }, { "epoch": 0.782855319028592, "grad_norm": 487.4304504394531, "learning_rate": 1.4634789246146103e-06, "loss": 25.6479, "step": 387540 }, { "epoch": 0.7828755196612758, "grad_norm": 269.5765075683594, "learning_rate": 1.463232175339878e-06, "loss": 14.0734, "step": 387550 }, { "epoch": 0.7828957202939596, "grad_norm": 429.9248046875, "learning_rate": 1.4629854433029234e-06, "loss": 30.6149, "step": 387560 }, { "epoch": 0.7829159209266434, "grad_norm": 422.1339111328125, "learning_rate": 1.4627387285049465e-06, "loss": 11.9703, "step": 387570 }, { "epoch": 0.7829361215593272, "grad_norm": 177.26063537597656, "learning_rate": 1.462492030947153e-06, "loss": 15.3645, "step": 387580 }, { "epoch": 0.7829563221920111, "grad_norm": 278.6905212402344, "learning_rate": 1.462245350630745e-06, "loss": 11.7296, "step": 387590 }, { "epoch": 0.7829765228246949, "grad_norm": 277.57366943359375, "learning_rate": 1.4619986875569247e-06, "loss": 19.6296, "step": 387600 }, { "epoch": 0.7829967234573787, "grad_norm": 366.2218933105469, "learning_rate": 1.4617520417268916e-06, "loss": 11.6152, "step": 387610 }, { "epoch": 0.7830169240900625, "grad_norm": 271.4199523925781, "learning_rate": 1.4615054131418521e-06, "loss": 9.3571, "step": 387620 }, { "epoch": 0.7830371247227463, "grad_norm": 89.44618225097656, "learning_rate": 1.4612588018030055e-06, "loss": 17.945, "step": 387630 }, { "epoch": 0.7830573253554302, "grad_norm": 326.3226318359375, "learning_rate": 1.461012207711553e-06, "loss": 21.0229, "step": 387640 }, { "epoch": 0.783077525988114, "grad_norm": 453.1385192871094, "learning_rate": 1.460765630868699e-06, "loss": 25.0663, "step": 387650 }, { "epoch": 0.7830977266207978, "grad_norm": 70.35225677490234, "learning_rate": 1.4605190712756428e-06, "loss": 12.3963, "step": 387660 }, { "epoch": 0.7831179272534816, "grad_norm": 399.1490783691406, "learning_rate": 1.460272528933589e-06, "loss": 28.9916, "step": 387670 }, { "epoch": 0.7831381278861654, "grad_norm": 202.74989318847656, "learning_rate": 1.4600260038437376e-06, "loss": 19.2686, "step": 387680 }, { "epoch": 0.7831583285188493, "grad_norm": 591.0513916015625, "learning_rate": 1.459779496007288e-06, "loss": 15.7403, "step": 387690 }, { "epoch": 0.7831785291515331, "grad_norm": 225.31300354003906, "learning_rate": 1.459533005425446e-06, "loss": 11.6261, "step": 387700 }, { "epoch": 0.7831987297842169, "grad_norm": 126.36961364746094, "learning_rate": 1.4592865320994103e-06, "loss": 16.003, "step": 387710 }, { "epoch": 0.7832189304169006, "grad_norm": 263.0609436035156, "learning_rate": 1.4590400760303814e-06, "loss": 13.9852, "step": 387720 }, { "epoch": 0.7832391310495844, "grad_norm": 156.9039764404297, "learning_rate": 1.4587936372195611e-06, "loss": 18.0401, "step": 387730 }, { "epoch": 0.7832593316822682, "grad_norm": 9.20401382446289, "learning_rate": 1.4585472156681535e-06, "loss": 8.5853, "step": 387740 }, { "epoch": 0.7832795323149521, "grad_norm": 11.521835327148438, "learning_rate": 1.4583008113773567e-06, "loss": 18.678, "step": 387750 }, { "epoch": 0.7832997329476359, "grad_norm": 297.08538818359375, "learning_rate": 1.4580544243483708e-06, "loss": 11.3333, "step": 387760 }, { "epoch": 0.7833199335803197, "grad_norm": 2.951110363006592, "learning_rate": 1.4578080545823991e-06, "loss": 14.5335, "step": 387770 }, { "epoch": 0.7833401342130035, "grad_norm": 357.1047668457031, "learning_rate": 1.457561702080642e-06, "loss": 14.5122, "step": 387780 }, { "epoch": 0.7833603348456873, "grad_norm": 150.04153442382812, "learning_rate": 1.457315366844298e-06, "loss": 10.5898, "step": 387790 }, { "epoch": 0.7833805354783712, "grad_norm": 362.931884765625, "learning_rate": 1.4570690488745687e-06, "loss": 18.7497, "step": 387800 }, { "epoch": 0.783400736111055, "grad_norm": 365.5284118652344, "learning_rate": 1.4568227481726589e-06, "loss": 11.4381, "step": 387810 }, { "epoch": 0.7834209367437388, "grad_norm": 291.3116149902344, "learning_rate": 1.4565764647397612e-06, "loss": 11.5281, "step": 387820 }, { "epoch": 0.7834411373764226, "grad_norm": 288.05975341796875, "learning_rate": 1.4563301985770812e-06, "loss": 18.1956, "step": 387830 }, { "epoch": 0.7834613380091064, "grad_norm": 295.5234375, "learning_rate": 1.4560839496858187e-06, "loss": 9.5896, "step": 387840 }, { "epoch": 0.7834815386417903, "grad_norm": 210.48765563964844, "learning_rate": 1.4558377180671734e-06, "loss": 7.742, "step": 387850 }, { "epoch": 0.7835017392744741, "grad_norm": 108.25033569335938, "learning_rate": 1.4555915037223438e-06, "loss": 15.4602, "step": 387860 }, { "epoch": 0.7835219399071579, "grad_norm": 8.609034538269043, "learning_rate": 1.455345306652533e-06, "loss": 10.8407, "step": 387870 }, { "epoch": 0.7835421405398417, "grad_norm": 219.0823974609375, "learning_rate": 1.4550991268589393e-06, "loss": 34.5009, "step": 387880 }, { "epoch": 0.7835623411725255, "grad_norm": 121.86177062988281, "learning_rate": 1.4548529643427607e-06, "loss": 16.5747, "step": 387890 }, { "epoch": 0.7835825418052094, "grad_norm": 280.77081298828125, "learning_rate": 1.4546068191051988e-06, "loss": 16.7478, "step": 387900 }, { "epoch": 0.7836027424378932, "grad_norm": 152.8258056640625, "learning_rate": 1.4543606911474545e-06, "loss": 22.0623, "step": 387910 }, { "epoch": 0.783622943070577, "grad_norm": 452.426025390625, "learning_rate": 1.4541145804707268e-06, "loss": 28.5235, "step": 387920 }, { "epoch": 0.7836431437032608, "grad_norm": 496.1844482421875, "learning_rate": 1.4538684870762127e-06, "loss": 16.8242, "step": 387930 }, { "epoch": 0.7836633443359446, "grad_norm": 205.73187255859375, "learning_rate": 1.4536224109651148e-06, "loss": 14.7191, "step": 387940 }, { "epoch": 0.7836835449686285, "grad_norm": 85.98100280761719, "learning_rate": 1.4533763521386319e-06, "loss": 19.5622, "step": 387950 }, { "epoch": 0.7837037456013123, "grad_norm": 93.58921813964844, "learning_rate": 1.4531303105979605e-06, "loss": 7.5024, "step": 387960 }, { "epoch": 0.783723946233996, "grad_norm": 162.13291931152344, "learning_rate": 1.4528842863443033e-06, "loss": 13.9582, "step": 387970 }, { "epoch": 0.7837441468666798, "grad_norm": 368.0335693359375, "learning_rate": 1.4526382793788564e-06, "loss": 15.7887, "step": 387980 }, { "epoch": 0.7837643474993636, "grad_norm": 118.779052734375, "learning_rate": 1.4523922897028215e-06, "loss": 18.0837, "step": 387990 }, { "epoch": 0.7837845481320475, "grad_norm": 160.97503662109375, "learning_rate": 1.4521463173173966e-06, "loss": 11.5087, "step": 388000 }, { "epoch": 0.7838047487647313, "grad_norm": 233.3734893798828, "learning_rate": 1.4519003622237788e-06, "loss": 14.3115, "step": 388010 }, { "epoch": 0.7838249493974151, "grad_norm": 348.1415100097656, "learning_rate": 1.4516544244231695e-06, "loss": 17.6093, "step": 388020 }, { "epoch": 0.7838451500300989, "grad_norm": 189.06253051757812, "learning_rate": 1.4514085039167652e-06, "loss": 4.4898, "step": 388030 }, { "epoch": 0.7838653506627827, "grad_norm": 345.5284118652344, "learning_rate": 1.4511626007057667e-06, "loss": 10.2741, "step": 388040 }, { "epoch": 0.7838855512954666, "grad_norm": 380.59832763671875, "learning_rate": 1.4509167147913693e-06, "loss": 12.9027, "step": 388050 }, { "epoch": 0.7839057519281504, "grad_norm": 443.894287109375, "learning_rate": 1.4506708461747754e-06, "loss": 18.8658, "step": 388060 }, { "epoch": 0.7839259525608342, "grad_norm": 437.2759704589844, "learning_rate": 1.4504249948571814e-06, "loss": 23.6951, "step": 388070 }, { "epoch": 0.783946153193518, "grad_norm": 260.9906311035156, "learning_rate": 1.4501791608397835e-06, "loss": 29.1178, "step": 388080 }, { "epoch": 0.7839663538262018, "grad_norm": 323.01849365234375, "learning_rate": 1.449933344123784e-06, "loss": 14.8279, "step": 388090 }, { "epoch": 0.7839865544588857, "grad_norm": 464.3163757324219, "learning_rate": 1.4496875447103781e-06, "loss": 26.4409, "step": 388100 }, { "epoch": 0.7840067550915695, "grad_norm": 293.2833251953125, "learning_rate": 1.4494417626007633e-06, "loss": 17.1304, "step": 388110 }, { "epoch": 0.7840269557242533, "grad_norm": 229.27281188964844, "learning_rate": 1.449195997796139e-06, "loss": 12.2866, "step": 388120 }, { "epoch": 0.7840471563569371, "grad_norm": 108.07600402832031, "learning_rate": 1.4489502502977037e-06, "loss": 13.269, "step": 388130 }, { "epoch": 0.7840673569896209, "grad_norm": 363.1028137207031, "learning_rate": 1.4487045201066547e-06, "loss": 21.1918, "step": 388140 }, { "epoch": 0.7840875576223048, "grad_norm": 599.3868408203125, "learning_rate": 1.4484588072241873e-06, "loss": 22.33, "step": 388150 }, { "epoch": 0.7841077582549886, "grad_norm": 1.3592357635498047, "learning_rate": 1.4482131116515026e-06, "loss": 10.8026, "step": 388160 }, { "epoch": 0.7841279588876724, "grad_norm": 404.5920715332031, "learning_rate": 1.4479674333897964e-06, "loss": 14.8203, "step": 388170 }, { "epoch": 0.7841481595203562, "grad_norm": 378.4433898925781, "learning_rate": 1.4477217724402643e-06, "loss": 15.6374, "step": 388180 }, { "epoch": 0.78416836015304, "grad_norm": 270.4464111328125, "learning_rate": 1.4474761288041057e-06, "loss": 12.5897, "step": 388190 }, { "epoch": 0.7841885607857239, "grad_norm": 1344.0645751953125, "learning_rate": 1.4472305024825189e-06, "loss": 22.6332, "step": 388200 }, { "epoch": 0.7842087614184077, "grad_norm": 168.61964416503906, "learning_rate": 1.4469848934767e-06, "loss": 9.9179, "step": 388210 }, { "epoch": 0.7842289620510915, "grad_norm": 180.02552795410156, "learning_rate": 1.4467393017878444e-06, "loss": 21.1299, "step": 388220 }, { "epoch": 0.7842491626837752, "grad_norm": 323.9595031738281, "learning_rate": 1.446493727417152e-06, "loss": 28.0647, "step": 388230 }, { "epoch": 0.784269363316459, "grad_norm": 76.49543762207031, "learning_rate": 1.4462481703658177e-06, "loss": 8.5584, "step": 388240 }, { "epoch": 0.7842895639491428, "grad_norm": 498.9045104980469, "learning_rate": 1.4460026306350378e-06, "loss": 11.7002, "step": 388250 }, { "epoch": 0.7843097645818267, "grad_norm": 521.3787231445312, "learning_rate": 1.4457571082260113e-06, "loss": 22.6733, "step": 388260 }, { "epoch": 0.7843299652145105, "grad_norm": 156.16351318359375, "learning_rate": 1.445511603139932e-06, "loss": 10.5527, "step": 388270 }, { "epoch": 0.7843501658471943, "grad_norm": 214.96559143066406, "learning_rate": 1.4452661153779996e-06, "loss": 25.6624, "step": 388280 }, { "epoch": 0.7843703664798781, "grad_norm": 186.5121612548828, "learning_rate": 1.445020644941409e-06, "loss": 21.2167, "step": 388290 }, { "epoch": 0.7843905671125619, "grad_norm": 321.2003479003906, "learning_rate": 1.4447751918313552e-06, "loss": 13.9303, "step": 388300 }, { "epoch": 0.7844107677452458, "grad_norm": 661.0518188476562, "learning_rate": 1.4445297560490373e-06, "loss": 33.1244, "step": 388310 }, { "epoch": 0.7844309683779296, "grad_norm": 363.9385681152344, "learning_rate": 1.4442843375956506e-06, "loss": 24.6696, "step": 388320 }, { "epoch": 0.7844511690106134, "grad_norm": 192.94200134277344, "learning_rate": 1.4440389364723889e-06, "loss": 21.5704, "step": 388330 }, { "epoch": 0.7844713696432972, "grad_norm": 217.00933837890625, "learning_rate": 1.4437935526804497e-06, "loss": 9.0075, "step": 388340 }, { "epoch": 0.784491570275981, "grad_norm": 174.43661499023438, "learning_rate": 1.4435481862210315e-06, "loss": 14.9648, "step": 388350 }, { "epoch": 0.7845117709086649, "grad_norm": 223.96200561523438, "learning_rate": 1.4433028370953279e-06, "loss": 21.0505, "step": 388360 }, { "epoch": 0.7845319715413487, "grad_norm": 250.60910034179688, "learning_rate": 1.4430575053045337e-06, "loss": 17.8896, "step": 388370 }, { "epoch": 0.7845521721740325, "grad_norm": 128.46368408203125, "learning_rate": 1.4428121908498472e-06, "loss": 25.4731, "step": 388380 }, { "epoch": 0.7845723728067163, "grad_norm": 433.1712951660156, "learning_rate": 1.4425668937324623e-06, "loss": 16.1521, "step": 388390 }, { "epoch": 0.7845925734394001, "grad_norm": 31.850547790527344, "learning_rate": 1.4423216139535735e-06, "loss": 22.5226, "step": 388400 }, { "epoch": 0.784612774072084, "grad_norm": 226.79258728027344, "learning_rate": 1.4420763515143777e-06, "loss": 25.8415, "step": 388410 }, { "epoch": 0.7846329747047678, "grad_norm": 135.6475372314453, "learning_rate": 1.4418311064160735e-06, "loss": 16.4391, "step": 388420 }, { "epoch": 0.7846531753374516, "grad_norm": 6.935695171356201, "learning_rate": 1.4415858786598496e-06, "loss": 12.4819, "step": 388430 }, { "epoch": 0.7846733759701354, "grad_norm": 344.50482177734375, "learning_rate": 1.4413406682469044e-06, "loss": 23.4274, "step": 388440 }, { "epoch": 0.7846935766028192, "grad_norm": 553.3461303710938, "learning_rate": 1.4410954751784352e-06, "loss": 24.8776, "step": 388450 }, { "epoch": 0.7847137772355031, "grad_norm": 569.1242065429688, "learning_rate": 1.440850299455635e-06, "loss": 29.178, "step": 388460 }, { "epoch": 0.7847339778681869, "grad_norm": 264.9673767089844, "learning_rate": 1.4406051410796968e-06, "loss": 20.9562, "step": 388470 }, { "epoch": 0.7847541785008707, "grad_norm": 324.87396240234375, "learning_rate": 1.4403600000518191e-06, "loss": 15.1118, "step": 388480 }, { "epoch": 0.7847743791335544, "grad_norm": 211.48023986816406, "learning_rate": 1.4401148763731953e-06, "loss": 13.6061, "step": 388490 }, { "epoch": 0.7847945797662382, "grad_norm": 575.14501953125, "learning_rate": 1.4398697700450181e-06, "loss": 22.8862, "step": 388500 }, { "epoch": 0.784814780398922, "grad_norm": 128.52870178222656, "learning_rate": 1.4396246810684839e-06, "loss": 15.8176, "step": 388510 }, { "epoch": 0.7848349810316059, "grad_norm": 260.6927490234375, "learning_rate": 1.4393796094447886e-06, "loss": 20.1276, "step": 388520 }, { "epoch": 0.7848551816642897, "grad_norm": 145.7863311767578, "learning_rate": 1.4391345551751251e-06, "loss": 11.1682, "step": 388530 }, { "epoch": 0.7848753822969735, "grad_norm": 213.95742797851562, "learning_rate": 1.4388895182606867e-06, "loss": 8.8265, "step": 388540 }, { "epoch": 0.7848955829296573, "grad_norm": 711.3648071289062, "learning_rate": 1.4386444987026705e-06, "loss": 16.7509, "step": 388550 }, { "epoch": 0.7849157835623412, "grad_norm": 296.3977355957031, "learning_rate": 1.4383994965022684e-06, "loss": 16.3959, "step": 388560 }, { "epoch": 0.784935984195025, "grad_norm": 407.4393615722656, "learning_rate": 1.4381545116606744e-06, "loss": 22.0173, "step": 388570 }, { "epoch": 0.7849561848277088, "grad_norm": 379.6836242675781, "learning_rate": 1.4379095441790847e-06, "loss": 28.2002, "step": 388580 }, { "epoch": 0.7849763854603926, "grad_norm": 121.0509262084961, "learning_rate": 1.4376645940586898e-06, "loss": 18.0762, "step": 388590 }, { "epoch": 0.7849965860930764, "grad_norm": 610.8275146484375, "learning_rate": 1.4374196613006874e-06, "loss": 23.8113, "step": 388600 }, { "epoch": 0.7850167867257603, "grad_norm": 153.95645141601562, "learning_rate": 1.4371747459062695e-06, "loss": 12.0811, "step": 388610 }, { "epoch": 0.7850369873584441, "grad_norm": 141.9251251220703, "learning_rate": 1.4369298478766286e-06, "loss": 22.6062, "step": 388620 }, { "epoch": 0.7850571879911279, "grad_norm": 30.947370529174805, "learning_rate": 1.4366849672129607e-06, "loss": 19.3245, "step": 388630 }, { "epoch": 0.7850773886238117, "grad_norm": 25.888994216918945, "learning_rate": 1.4364401039164566e-06, "loss": 14.0383, "step": 388640 }, { "epoch": 0.7850975892564955, "grad_norm": 863.11181640625, "learning_rate": 1.4361952579883127e-06, "loss": 13.1211, "step": 388650 }, { "epoch": 0.7851177898891794, "grad_norm": 266.4909973144531, "learning_rate": 1.4359504294297195e-06, "loss": 16.3043, "step": 388660 }, { "epoch": 0.7851379905218632, "grad_norm": 306.98443603515625, "learning_rate": 1.4357056182418727e-06, "loss": 27.6102, "step": 388670 }, { "epoch": 0.785158191154547, "grad_norm": 130.6201629638672, "learning_rate": 1.4354608244259649e-06, "loss": 18.7795, "step": 388680 }, { "epoch": 0.7851783917872308, "grad_norm": 255.7649383544922, "learning_rate": 1.4352160479831873e-06, "loss": 11.1545, "step": 388690 }, { "epoch": 0.7851985924199146, "grad_norm": 176.26959228515625, "learning_rate": 1.4349712889147355e-06, "loss": 11.6397, "step": 388700 }, { "epoch": 0.7852187930525985, "grad_norm": 295.94580078125, "learning_rate": 1.4347265472218014e-06, "loss": 21.0047, "step": 388710 }, { "epoch": 0.7852389936852823, "grad_norm": 6.615862846374512, "learning_rate": 1.4344818229055762e-06, "loss": 13.8264, "step": 388720 }, { "epoch": 0.7852591943179661, "grad_norm": 416.576904296875, "learning_rate": 1.434237115967254e-06, "loss": 23.8102, "step": 388730 }, { "epoch": 0.7852793949506498, "grad_norm": 272.1756286621094, "learning_rate": 1.4339924264080308e-06, "loss": 14.4334, "step": 388740 }, { "epoch": 0.7852995955833336, "grad_norm": 398.3642272949219, "learning_rate": 1.433747754229093e-06, "loss": 23.7442, "step": 388750 }, { "epoch": 0.7853197962160174, "grad_norm": 348.1199645996094, "learning_rate": 1.4335030994316357e-06, "loss": 16.4986, "step": 388760 }, { "epoch": 0.7853399968487013, "grad_norm": 214.75543212890625, "learning_rate": 1.4332584620168538e-06, "loss": 17.0368, "step": 388770 }, { "epoch": 0.7853601974813851, "grad_norm": 188.7235107421875, "learning_rate": 1.4330138419859375e-06, "loss": 33.2299, "step": 388780 }, { "epoch": 0.7853803981140689, "grad_norm": 12.866677284240723, "learning_rate": 1.4327692393400771e-06, "loss": 15.9268, "step": 388790 }, { "epoch": 0.7854005987467527, "grad_norm": 335.775146484375, "learning_rate": 1.4325246540804672e-06, "loss": 23.4427, "step": 388800 }, { "epoch": 0.7854207993794365, "grad_norm": 3.2989556789398193, "learning_rate": 1.4322800862083009e-06, "loss": 7.2267, "step": 388810 }, { "epoch": 0.7854410000121204, "grad_norm": 498.54132080078125, "learning_rate": 1.4320355357247689e-06, "loss": 16.2224, "step": 388820 }, { "epoch": 0.7854612006448042, "grad_norm": 124.73262023925781, "learning_rate": 1.4317910026310611e-06, "loss": 21.0037, "step": 388830 }, { "epoch": 0.785481401277488, "grad_norm": 0.0, "learning_rate": 1.431546486928373e-06, "loss": 19.878, "step": 388840 }, { "epoch": 0.7855016019101718, "grad_norm": 398.97625732421875, "learning_rate": 1.4313019886178942e-06, "loss": 16.7632, "step": 388850 }, { "epoch": 0.7855218025428556, "grad_norm": 335.5660095214844, "learning_rate": 1.4310575077008154e-06, "loss": 17.5135, "step": 388860 }, { "epoch": 0.7855420031755395, "grad_norm": 259.23724365234375, "learning_rate": 1.4308130441783307e-06, "loss": 15.4127, "step": 388870 }, { "epoch": 0.7855622038082233, "grad_norm": 241.80581665039062, "learning_rate": 1.4305685980516293e-06, "loss": 17.3936, "step": 388880 }, { "epoch": 0.7855824044409071, "grad_norm": 105.48463439941406, "learning_rate": 1.430324169321905e-06, "loss": 7.6988, "step": 388890 }, { "epoch": 0.7856026050735909, "grad_norm": 1351.6390380859375, "learning_rate": 1.4300797579903476e-06, "loss": 34.2322, "step": 388900 }, { "epoch": 0.7856228057062747, "grad_norm": 134.53964233398438, "learning_rate": 1.429835364058147e-06, "loss": 23.637, "step": 388910 }, { "epoch": 0.7856430063389586, "grad_norm": 469.89471435546875, "learning_rate": 1.4295909875264973e-06, "loss": 15.1801, "step": 388920 }, { "epoch": 0.7856632069716424, "grad_norm": 462.8110046386719, "learning_rate": 1.4293466283965878e-06, "loss": 18.4767, "step": 388930 }, { "epoch": 0.7856834076043262, "grad_norm": 221.388916015625, "learning_rate": 1.4291022866696086e-06, "loss": 29.1875, "step": 388940 }, { "epoch": 0.78570360823701, "grad_norm": 79.65106964111328, "learning_rate": 1.428857962346752e-06, "loss": 31.8819, "step": 388950 }, { "epoch": 0.7857238088696938, "grad_norm": 249.96401977539062, "learning_rate": 1.4286136554292096e-06, "loss": 9.9069, "step": 388960 }, { "epoch": 0.7857440095023777, "grad_norm": 162.8196563720703, "learning_rate": 1.4283693659181713e-06, "loss": 18.4539, "step": 388970 }, { "epoch": 0.7857642101350615, "grad_norm": 172.65858459472656, "learning_rate": 1.4281250938148262e-06, "loss": 14.1193, "step": 388980 }, { "epoch": 0.7857844107677453, "grad_norm": 197.45440673828125, "learning_rate": 1.4278808391203674e-06, "loss": 12.6619, "step": 388990 }, { "epoch": 0.785804611400429, "grad_norm": 387.7592468261719, "learning_rate": 1.4276366018359845e-06, "loss": 15.7725, "step": 389000 }, { "epoch": 0.7858248120331128, "grad_norm": 314.5818786621094, "learning_rate": 1.4273923819628654e-06, "loss": 18.1583, "step": 389010 }, { "epoch": 0.7858450126657966, "grad_norm": 251.29774475097656, "learning_rate": 1.427148179502203e-06, "loss": 24.7854, "step": 389020 }, { "epoch": 0.7858652132984805, "grad_norm": 639.8449096679688, "learning_rate": 1.42690399445519e-06, "loss": 24.732, "step": 389030 }, { "epoch": 0.7858854139311643, "grad_norm": 399.4125671386719, "learning_rate": 1.4266598268230102e-06, "loss": 21.496, "step": 389040 }, { "epoch": 0.7859056145638481, "grad_norm": 0.0, "learning_rate": 1.4264156766068577e-06, "loss": 23.9383, "step": 389050 }, { "epoch": 0.7859258151965319, "grad_norm": 233.56837463378906, "learning_rate": 1.4261715438079227e-06, "loss": 24.4178, "step": 389060 }, { "epoch": 0.7859460158292157, "grad_norm": 367.3778991699219, "learning_rate": 1.4259274284273943e-06, "loss": 19.0032, "step": 389070 }, { "epoch": 0.7859662164618996, "grad_norm": 219.5296173095703, "learning_rate": 1.4256833304664609e-06, "loss": 21.734, "step": 389080 }, { "epoch": 0.7859864170945834, "grad_norm": 248.1083221435547, "learning_rate": 1.425439249926313e-06, "loss": 17.3965, "step": 389090 }, { "epoch": 0.7860066177272672, "grad_norm": 295.58624267578125, "learning_rate": 1.4251951868081438e-06, "loss": 20.1584, "step": 389100 }, { "epoch": 0.786026818359951, "grad_norm": 185.41017150878906, "learning_rate": 1.4249511411131367e-06, "loss": 19.0956, "step": 389110 }, { "epoch": 0.7860470189926348, "grad_norm": 372.18408203125, "learning_rate": 1.4247071128424838e-06, "loss": 17.1941, "step": 389120 }, { "epoch": 0.7860672196253187, "grad_norm": 332.6990051269531, "learning_rate": 1.424463101997377e-06, "loss": 9.9156, "step": 389130 }, { "epoch": 0.7860874202580025, "grad_norm": 254.2989959716797, "learning_rate": 1.424219108579003e-06, "loss": 15.6126, "step": 389140 }, { "epoch": 0.7861076208906863, "grad_norm": 127.84619140625, "learning_rate": 1.4239751325885499e-06, "loss": 13.7519, "step": 389150 }, { "epoch": 0.7861278215233701, "grad_norm": 405.25970458984375, "learning_rate": 1.4237311740272097e-06, "loss": 18.2351, "step": 389160 }, { "epoch": 0.786148022156054, "grad_norm": 0.0, "learning_rate": 1.4234872328961702e-06, "loss": 13.23, "step": 389170 }, { "epoch": 0.7861682227887378, "grad_norm": 594.2636108398438, "learning_rate": 1.4232433091966187e-06, "loss": 30.4003, "step": 389180 }, { "epoch": 0.7861884234214216, "grad_norm": 134.83416748046875, "learning_rate": 1.4229994029297467e-06, "loss": 27.3915, "step": 389190 }, { "epoch": 0.7862086240541054, "grad_norm": 571.1751708984375, "learning_rate": 1.4227555140967402e-06, "loss": 14.0678, "step": 389200 }, { "epoch": 0.7862288246867892, "grad_norm": 661.9761962890625, "learning_rate": 1.4225116426987916e-06, "loss": 25.9219, "step": 389210 }, { "epoch": 0.786249025319473, "grad_norm": 195.4805450439453, "learning_rate": 1.4222677887370868e-06, "loss": 6.5861, "step": 389220 }, { "epoch": 0.7862692259521569, "grad_norm": 196.07427978515625, "learning_rate": 1.4220239522128138e-06, "loss": 21.1284, "step": 389230 }, { "epoch": 0.7862894265848407, "grad_norm": 150.67138671875, "learning_rate": 1.421780133127163e-06, "loss": 10.0138, "step": 389240 }, { "epoch": 0.7863096272175244, "grad_norm": 270.1505126953125, "learning_rate": 1.4215363314813208e-06, "loss": 13.1679, "step": 389250 }, { "epoch": 0.7863298278502082, "grad_norm": 111.02875518798828, "learning_rate": 1.4212925472764777e-06, "loss": 16.7495, "step": 389260 }, { "epoch": 0.786350028482892, "grad_norm": 128.27792358398438, "learning_rate": 1.4210487805138195e-06, "loss": 29.3929, "step": 389270 }, { "epoch": 0.7863702291155759, "grad_norm": 339.1668701171875, "learning_rate": 1.4208050311945365e-06, "loss": 15.4236, "step": 389280 }, { "epoch": 0.7863904297482597, "grad_norm": 368.4068603515625, "learning_rate": 1.4205612993198165e-06, "loss": 27.338, "step": 389290 }, { "epoch": 0.7864106303809435, "grad_norm": 196.6361541748047, "learning_rate": 1.420317584890844e-06, "loss": 18.2999, "step": 389300 }, { "epoch": 0.7864308310136273, "grad_norm": 343.2928466796875, "learning_rate": 1.4200738879088117e-06, "loss": 17.2679, "step": 389310 }, { "epoch": 0.7864510316463111, "grad_norm": 199.85171508789062, "learning_rate": 1.4198302083749049e-06, "loss": 11.8438, "step": 389320 }, { "epoch": 0.786471232278995, "grad_norm": 67.61869812011719, "learning_rate": 1.4195865462903102e-06, "loss": 17.4814, "step": 389330 }, { "epoch": 0.7864914329116788, "grad_norm": 372.1341857910156, "learning_rate": 1.4193429016562161e-06, "loss": 12.1717, "step": 389340 }, { "epoch": 0.7865116335443626, "grad_norm": 312.80462646484375, "learning_rate": 1.4190992744738135e-06, "loss": 14.6291, "step": 389350 }, { "epoch": 0.7865318341770464, "grad_norm": 212.23077392578125, "learning_rate": 1.4188556647442836e-06, "loss": 17.4359, "step": 389360 }, { "epoch": 0.7865520348097302, "grad_norm": 234.572509765625, "learning_rate": 1.4186120724688169e-06, "loss": 18.0573, "step": 389370 }, { "epoch": 0.7865722354424141, "grad_norm": 421.0780944824219, "learning_rate": 1.4183684976486024e-06, "loss": 12.8179, "step": 389380 }, { "epoch": 0.7865924360750979, "grad_norm": 218.20697021484375, "learning_rate": 1.4181249402848246e-06, "loss": 24.6235, "step": 389390 }, { "epoch": 0.7866126367077817, "grad_norm": 733.5469970703125, "learning_rate": 1.4178814003786706e-06, "loss": 17.2591, "step": 389400 }, { "epoch": 0.7866328373404655, "grad_norm": 189.36854553222656, "learning_rate": 1.4176378779313282e-06, "loss": 20.6908, "step": 389410 }, { "epoch": 0.7866530379731493, "grad_norm": 211.37281799316406, "learning_rate": 1.417394372943987e-06, "loss": 11.4417, "step": 389420 }, { "epoch": 0.7866732386058332, "grad_norm": 188.55284118652344, "learning_rate": 1.4171508854178284e-06, "loss": 12.0852, "step": 389430 }, { "epoch": 0.786693439238517, "grad_norm": 352.80242919921875, "learning_rate": 1.4169074153540418e-06, "loss": 13.4355, "step": 389440 }, { "epoch": 0.7867136398712008, "grad_norm": 348.797119140625, "learning_rate": 1.4166639627538153e-06, "loss": 12.0845, "step": 389450 }, { "epoch": 0.7867338405038846, "grad_norm": 180.8582763671875, "learning_rate": 1.416420527618334e-06, "loss": 19.7237, "step": 389460 }, { "epoch": 0.7867540411365684, "grad_norm": 205.78396606445312, "learning_rate": 1.4161771099487832e-06, "loss": 15.4973, "step": 389470 }, { "epoch": 0.7867742417692523, "grad_norm": 150.43373107910156, "learning_rate": 1.4159337097463515e-06, "loss": 14.1364, "step": 389480 }, { "epoch": 0.7867944424019361, "grad_norm": 163.03968811035156, "learning_rate": 1.415690327012223e-06, "loss": 23.5773, "step": 389490 }, { "epoch": 0.7868146430346199, "grad_norm": 92.24357604980469, "learning_rate": 1.4154469617475864e-06, "loss": 12.1157, "step": 389500 }, { "epoch": 0.7868348436673036, "grad_norm": 605.7415161132812, "learning_rate": 1.4152036139536269e-06, "loss": 16.3002, "step": 389510 }, { "epoch": 0.7868550442999874, "grad_norm": 63.65967559814453, "learning_rate": 1.4149602836315285e-06, "loss": 13.0718, "step": 389520 }, { "epoch": 0.7868752449326712, "grad_norm": 295.32537841796875, "learning_rate": 1.4147169707824805e-06, "loss": 20.375, "step": 389530 }, { "epoch": 0.7868954455653551, "grad_norm": 224.61203002929688, "learning_rate": 1.414473675407667e-06, "loss": 10.9175, "step": 389540 }, { "epoch": 0.7869156461980389, "grad_norm": 193.99159240722656, "learning_rate": 1.4142303975082723e-06, "loss": 25.4591, "step": 389550 }, { "epoch": 0.7869358468307227, "grad_norm": 143.17723083496094, "learning_rate": 1.413987137085484e-06, "loss": 12.3736, "step": 389560 }, { "epoch": 0.7869560474634065, "grad_norm": 376.6431579589844, "learning_rate": 1.413743894140489e-06, "loss": 23.777, "step": 389570 }, { "epoch": 0.7869762480960903, "grad_norm": 226.5075225830078, "learning_rate": 1.4135006686744711e-06, "loss": 18.5839, "step": 389580 }, { "epoch": 0.7869964487287742, "grad_norm": 254.0447998046875, "learning_rate": 1.4132574606886146e-06, "loss": 13.7845, "step": 389590 }, { "epoch": 0.787016649361458, "grad_norm": 248.10939025878906, "learning_rate": 1.4130142701841076e-06, "loss": 16.2194, "step": 389600 }, { "epoch": 0.7870368499941418, "grad_norm": 0.0, "learning_rate": 1.4127710971621339e-06, "loss": 18.9934, "step": 389610 }, { "epoch": 0.7870570506268256, "grad_norm": 464.31622314453125, "learning_rate": 1.4125279416238773e-06, "loss": 17.2174, "step": 389620 }, { "epoch": 0.7870772512595094, "grad_norm": 118.74166870117188, "learning_rate": 1.412284803570525e-06, "loss": 8.7052, "step": 389630 }, { "epoch": 0.7870974518921933, "grad_norm": 299.9891357421875, "learning_rate": 1.4120416830032641e-06, "loss": 19.213, "step": 389640 }, { "epoch": 0.7871176525248771, "grad_norm": 178.8135528564453, "learning_rate": 1.4117985799232735e-06, "loss": 18.7878, "step": 389650 }, { "epoch": 0.7871378531575609, "grad_norm": 87.82231903076172, "learning_rate": 1.4115554943317416e-06, "loss": 20.7476, "step": 389660 }, { "epoch": 0.7871580537902447, "grad_norm": 179.60142517089844, "learning_rate": 1.4113124262298544e-06, "loss": 30.6449, "step": 389670 }, { "epoch": 0.7871782544229285, "grad_norm": 224.93910217285156, "learning_rate": 1.4110693756187954e-06, "loss": 15.7866, "step": 389680 }, { "epoch": 0.7871984550556124, "grad_norm": 223.9901580810547, "learning_rate": 1.4108263424997475e-06, "loss": 22.6244, "step": 389690 }, { "epoch": 0.7872186556882962, "grad_norm": 145.09034729003906, "learning_rate": 1.4105833268738966e-06, "loss": 10.0173, "step": 389700 }, { "epoch": 0.78723885632098, "grad_norm": 658.6116333007812, "learning_rate": 1.4103403287424306e-06, "loss": 19.6111, "step": 389710 }, { "epoch": 0.7872590569536638, "grad_norm": 125.37450408935547, "learning_rate": 1.4100973481065266e-06, "loss": 17.6719, "step": 389720 }, { "epoch": 0.7872792575863476, "grad_norm": 92.36499786376953, "learning_rate": 1.4098543849673734e-06, "loss": 11.1696, "step": 389730 }, { "epoch": 0.7872994582190315, "grad_norm": 158.11468505859375, "learning_rate": 1.4096114393261557e-06, "loss": 18.6133, "step": 389740 }, { "epoch": 0.7873196588517153, "grad_norm": 566.9324340820312, "learning_rate": 1.4093685111840567e-06, "loss": 12.9622, "step": 389750 }, { "epoch": 0.787339859484399, "grad_norm": 89.23612213134766, "learning_rate": 1.4091256005422583e-06, "loss": 14.6173, "step": 389760 }, { "epoch": 0.7873600601170828, "grad_norm": 319.2309265136719, "learning_rate": 1.4088827074019479e-06, "loss": 32.2419, "step": 389770 }, { "epoch": 0.7873802607497666, "grad_norm": 569.8267822265625, "learning_rate": 1.4086398317643074e-06, "loss": 30.9478, "step": 389780 }, { "epoch": 0.7874004613824505, "grad_norm": 124.90144348144531, "learning_rate": 1.4083969736305191e-06, "loss": 13.6604, "step": 389790 }, { "epoch": 0.7874206620151343, "grad_norm": 42.044891357421875, "learning_rate": 1.4081541330017706e-06, "loss": 27.6706, "step": 389800 }, { "epoch": 0.7874408626478181, "grad_norm": 355.5982971191406, "learning_rate": 1.4079113098792413e-06, "loss": 27.9106, "step": 389810 }, { "epoch": 0.7874610632805019, "grad_norm": 20.312686920166016, "learning_rate": 1.407668504264118e-06, "loss": 16.1749, "step": 389820 }, { "epoch": 0.7874812639131857, "grad_norm": 415.4949035644531, "learning_rate": 1.4074257161575828e-06, "loss": 23.8151, "step": 389830 }, { "epoch": 0.7875014645458696, "grad_norm": 60.10762405395508, "learning_rate": 1.407182945560817e-06, "loss": 10.4726, "step": 389840 }, { "epoch": 0.7875216651785534, "grad_norm": 165.85964965820312, "learning_rate": 1.4069401924750082e-06, "loss": 9.6803, "step": 389850 }, { "epoch": 0.7875418658112372, "grad_norm": 313.8074645996094, "learning_rate": 1.4066974569013346e-06, "loss": 12.3684, "step": 389860 }, { "epoch": 0.787562066443921, "grad_norm": 221.18521118164062, "learning_rate": 1.4064547388409838e-06, "loss": 25.2069, "step": 389870 }, { "epoch": 0.7875822670766048, "grad_norm": 509.8290100097656, "learning_rate": 1.4062120382951355e-06, "loss": 18.8958, "step": 389880 }, { "epoch": 0.7876024677092887, "grad_norm": 300.2735595703125, "learning_rate": 1.405969355264975e-06, "loss": 8.7204, "step": 389890 }, { "epoch": 0.7876226683419725, "grad_norm": 200.26466369628906, "learning_rate": 1.4057266897516842e-06, "loss": 21.0617, "step": 389900 }, { "epoch": 0.7876428689746563, "grad_norm": 394.1932678222656, "learning_rate": 1.4054840417564436e-06, "loss": 8.9613, "step": 389910 }, { "epoch": 0.7876630696073401, "grad_norm": 125.610595703125, "learning_rate": 1.4052414112804396e-06, "loss": 9.524, "step": 389920 }, { "epoch": 0.7876832702400239, "grad_norm": 161.48448181152344, "learning_rate": 1.404998798324853e-06, "loss": 11.4527, "step": 389930 }, { "epoch": 0.7877034708727078, "grad_norm": 152.41319274902344, "learning_rate": 1.404756202890865e-06, "loss": 17.2687, "step": 389940 }, { "epoch": 0.7877236715053916, "grad_norm": 338.13751220703125, "learning_rate": 1.4045136249796588e-06, "loss": 16.8425, "step": 389950 }, { "epoch": 0.7877438721380754, "grad_norm": 271.4814453125, "learning_rate": 1.4042710645924207e-06, "loss": 8.1209, "step": 389960 }, { "epoch": 0.7877640727707592, "grad_norm": 312.9481201171875, "learning_rate": 1.4040285217303256e-06, "loss": 21.3578, "step": 389970 }, { "epoch": 0.787784273403443, "grad_norm": 289.2872314453125, "learning_rate": 1.4037859963945598e-06, "loss": 15.7865, "step": 389980 }, { "epoch": 0.7878044740361269, "grad_norm": 395.5421447753906, "learning_rate": 1.4035434885863064e-06, "loss": 11.3721, "step": 389990 }, { "epoch": 0.7878246746688107, "grad_norm": 68.84208679199219, "learning_rate": 1.4033009983067454e-06, "loss": 13.7153, "step": 390000 }, { "epoch": 0.7878448753014945, "grad_norm": 824.5088500976562, "learning_rate": 1.4030585255570577e-06, "loss": 17.9009, "step": 390010 }, { "epoch": 0.7878650759341782, "grad_norm": 118.33230590820312, "learning_rate": 1.402816070338427e-06, "loss": 12.5773, "step": 390020 }, { "epoch": 0.787885276566862, "grad_norm": 23.347389221191406, "learning_rate": 1.4025736326520373e-06, "loss": 13.9563, "step": 390030 }, { "epoch": 0.7879054771995458, "grad_norm": 520.8345336914062, "learning_rate": 1.402331212499064e-06, "loss": 12.3134, "step": 390040 }, { "epoch": 0.7879256778322297, "grad_norm": 239.73251342773438, "learning_rate": 1.4020888098806924e-06, "loss": 19.2427, "step": 390050 }, { "epoch": 0.7879458784649135, "grad_norm": 41.689453125, "learning_rate": 1.401846424798105e-06, "loss": 17.4938, "step": 390060 }, { "epoch": 0.7879660790975973, "grad_norm": 377.1200866699219, "learning_rate": 1.4016040572524813e-06, "loss": 12.0682, "step": 390070 }, { "epoch": 0.7879862797302811, "grad_norm": 160.70571899414062, "learning_rate": 1.4013617072450019e-06, "loss": 34.5986, "step": 390080 }, { "epoch": 0.788006480362965, "grad_norm": 221.7627410888672, "learning_rate": 1.401119374776851e-06, "loss": 17.2055, "step": 390090 }, { "epoch": 0.7880266809956488, "grad_norm": 175.22467041015625, "learning_rate": 1.4008770598492072e-06, "loss": 7.9553, "step": 390100 }, { "epoch": 0.7880468816283326, "grad_norm": 286.597412109375, "learning_rate": 1.4006347624632505e-06, "loss": 21.7024, "step": 390110 }, { "epoch": 0.7880670822610164, "grad_norm": 254.11712646484375, "learning_rate": 1.4003924826201653e-06, "loss": 19.7913, "step": 390120 }, { "epoch": 0.7880872828937002, "grad_norm": 347.97698974609375, "learning_rate": 1.4001502203211286e-06, "loss": 17.6848, "step": 390130 }, { "epoch": 0.788107483526384, "grad_norm": 311.739501953125, "learning_rate": 1.399907975567325e-06, "loss": 23.9958, "step": 390140 }, { "epoch": 0.7881276841590679, "grad_norm": 240.4696044921875, "learning_rate": 1.3996657483599318e-06, "loss": 12.6005, "step": 390150 }, { "epoch": 0.7881478847917517, "grad_norm": 191.77345275878906, "learning_rate": 1.3994235387001326e-06, "loss": 11.9961, "step": 390160 }, { "epoch": 0.7881680854244355, "grad_norm": 304.16827392578125, "learning_rate": 1.3991813465891046e-06, "loss": 13.852, "step": 390170 }, { "epoch": 0.7881882860571193, "grad_norm": 447.4962158203125, "learning_rate": 1.3989391720280316e-06, "loss": 23.2397, "step": 390180 }, { "epoch": 0.7882084866898031, "grad_norm": 178.92068481445312, "learning_rate": 1.3986970150180923e-06, "loss": 16.73, "step": 390190 }, { "epoch": 0.788228687322487, "grad_norm": 61.68071365356445, "learning_rate": 1.3984548755604655e-06, "loss": 28.2658, "step": 390200 }, { "epoch": 0.7882488879551708, "grad_norm": 301.2347717285156, "learning_rate": 1.3982127536563345e-06, "loss": 18.7226, "step": 390210 }, { "epoch": 0.7882690885878546, "grad_norm": 200.91592407226562, "learning_rate": 1.3979706493068772e-06, "loss": 9.5835, "step": 390220 }, { "epoch": 0.7882892892205384, "grad_norm": 168.3613739013672, "learning_rate": 1.397728562513273e-06, "loss": 18.944, "step": 390230 }, { "epoch": 0.7883094898532222, "grad_norm": 589.9331665039062, "learning_rate": 1.397486493276703e-06, "loss": 35.2399, "step": 390240 }, { "epoch": 0.7883296904859061, "grad_norm": 251.90640258789062, "learning_rate": 1.3972444415983495e-06, "loss": 21.2505, "step": 390250 }, { "epoch": 0.7883498911185899, "grad_norm": 146.84149169921875, "learning_rate": 1.397002407479387e-06, "loss": 18.7945, "step": 390260 }, { "epoch": 0.7883700917512737, "grad_norm": 247.73011779785156, "learning_rate": 1.3967603909209976e-06, "loss": 17.0814, "step": 390270 }, { "epoch": 0.7883902923839574, "grad_norm": 265.45343017578125, "learning_rate": 1.3965183919243624e-06, "loss": 15.6773, "step": 390280 }, { "epoch": 0.7884104930166412, "grad_norm": 85.62440490722656, "learning_rate": 1.3962764104906596e-06, "loss": 20.4839, "step": 390290 }, { "epoch": 0.788430693649325, "grad_norm": 384.3556823730469, "learning_rate": 1.3960344466210669e-06, "loss": 34.382, "step": 390300 }, { "epoch": 0.7884508942820089, "grad_norm": 197.79977416992188, "learning_rate": 1.3957925003167655e-06, "loss": 21.6095, "step": 390310 }, { "epoch": 0.7884710949146927, "grad_norm": 82.48686981201172, "learning_rate": 1.3955505715789368e-06, "loss": 14.0907, "step": 390320 }, { "epoch": 0.7884912955473765, "grad_norm": 44.241886138916016, "learning_rate": 1.395308660408755e-06, "loss": 29.5422, "step": 390330 }, { "epoch": 0.7885114961800603, "grad_norm": 314.9237060546875, "learning_rate": 1.3950667668074015e-06, "loss": 20.7573, "step": 390340 }, { "epoch": 0.7885316968127442, "grad_norm": 130.89208984375, "learning_rate": 1.3948248907760565e-06, "loss": 16.1754, "step": 390350 }, { "epoch": 0.788551897445428, "grad_norm": 64.89344024658203, "learning_rate": 1.3945830323158982e-06, "loss": 27.3932, "step": 390360 }, { "epoch": 0.7885720980781118, "grad_norm": 192.45751953125, "learning_rate": 1.394341191428103e-06, "loss": 10.4163, "step": 390370 }, { "epoch": 0.7885922987107956, "grad_norm": 194.18630981445312, "learning_rate": 1.3940993681138533e-06, "loss": 21.8587, "step": 390380 }, { "epoch": 0.7886124993434794, "grad_norm": 206.67506408691406, "learning_rate": 1.3938575623743262e-06, "loss": 25.4804, "step": 390390 }, { "epoch": 0.7886326999761633, "grad_norm": 175.30599975585938, "learning_rate": 1.3936157742106977e-06, "loss": 13.003, "step": 390400 }, { "epoch": 0.7886529006088471, "grad_norm": 111.41796112060547, "learning_rate": 1.3933740036241505e-06, "loss": 22.5252, "step": 390410 }, { "epoch": 0.7886731012415309, "grad_norm": 291.4173583984375, "learning_rate": 1.3931322506158596e-06, "loss": 17.5072, "step": 390420 }, { "epoch": 0.7886933018742147, "grad_norm": 39.11369705200195, "learning_rate": 1.3928905151870059e-06, "loss": 17.0073, "step": 390430 }, { "epoch": 0.7887135025068985, "grad_norm": 205.7732696533203, "learning_rate": 1.3926487973387665e-06, "loss": 8.714, "step": 390440 }, { "epoch": 0.7887337031395824, "grad_norm": 197.4651336669922, "learning_rate": 1.3924070970723176e-06, "loss": 10.7358, "step": 390450 }, { "epoch": 0.7887539037722662, "grad_norm": 244.48448181152344, "learning_rate": 1.3921654143888403e-06, "loss": 12.4059, "step": 390460 }, { "epoch": 0.78877410440495, "grad_norm": 314.1690979003906, "learning_rate": 1.39192374928951e-06, "loss": 17.2969, "step": 390470 }, { "epoch": 0.7887943050376338, "grad_norm": 17.684673309326172, "learning_rate": 1.3916821017755073e-06, "loss": 15.8962, "step": 390480 }, { "epoch": 0.7888145056703176, "grad_norm": 801.8357543945312, "learning_rate": 1.3914404718480067e-06, "loss": 27.0105, "step": 390490 }, { "epoch": 0.7888347063030015, "grad_norm": 332.5932922363281, "learning_rate": 1.3911988595081894e-06, "loss": 9.9117, "step": 390500 }, { "epoch": 0.7888549069356853, "grad_norm": 148.60386657714844, "learning_rate": 1.3909572647572312e-06, "loss": 18.6719, "step": 390510 }, { "epoch": 0.7888751075683691, "grad_norm": 164.45216369628906, "learning_rate": 1.3907156875963073e-06, "loss": 19.7203, "step": 390520 }, { "epoch": 0.7888953082010528, "grad_norm": 281.331787109375, "learning_rate": 1.3904741280265998e-06, "loss": 16.8353, "step": 390530 }, { "epoch": 0.7889155088337366, "grad_norm": 481.3197326660156, "learning_rate": 1.3902325860492832e-06, "loss": 12.3015, "step": 390540 }, { "epoch": 0.7889357094664204, "grad_norm": 969.7797241210938, "learning_rate": 1.3899910616655338e-06, "loss": 38.4087, "step": 390550 }, { "epoch": 0.7889559100991043, "grad_norm": 467.8034362792969, "learning_rate": 1.38974955487653e-06, "loss": 16.4433, "step": 390560 }, { "epoch": 0.7889761107317881, "grad_norm": 279.6859436035156, "learning_rate": 1.389508065683452e-06, "loss": 11.8289, "step": 390570 }, { "epoch": 0.7889963113644719, "grad_norm": 339.20916748046875, "learning_rate": 1.3892665940874705e-06, "loss": 25.0625, "step": 390580 }, { "epoch": 0.7890165119971557, "grad_norm": 264.67742919921875, "learning_rate": 1.3890251400897663e-06, "loss": 28.5999, "step": 390590 }, { "epoch": 0.7890367126298395, "grad_norm": 193.97393798828125, "learning_rate": 1.3887837036915169e-06, "loss": 18.1135, "step": 390600 }, { "epoch": 0.7890569132625234, "grad_norm": 169.39662170410156, "learning_rate": 1.3885422848938974e-06, "loss": 16.0831, "step": 390610 }, { "epoch": 0.7890771138952072, "grad_norm": 339.45709228515625, "learning_rate": 1.3883008836980837e-06, "loss": 13.2955, "step": 390620 }, { "epoch": 0.789097314527891, "grad_norm": 173.91537475585938, "learning_rate": 1.3880595001052533e-06, "loss": 15.2689, "step": 390630 }, { "epoch": 0.7891175151605748, "grad_norm": 225.97947692871094, "learning_rate": 1.3878181341165858e-06, "loss": 15.3871, "step": 390640 }, { "epoch": 0.7891377157932586, "grad_norm": 398.6194763183594, "learning_rate": 1.3875767857332512e-06, "loss": 18.8848, "step": 390650 }, { "epoch": 0.7891579164259425, "grad_norm": 254.34739685058594, "learning_rate": 1.38733545495643e-06, "loss": 32.3013, "step": 390660 }, { "epoch": 0.7891781170586263, "grad_norm": 340.9490661621094, "learning_rate": 1.3870941417872985e-06, "loss": 18.7212, "step": 390670 }, { "epoch": 0.7891983176913101, "grad_norm": 12.201582908630371, "learning_rate": 1.3868528462270326e-06, "loss": 17.783, "step": 390680 }, { "epoch": 0.7892185183239939, "grad_norm": 243.3224639892578, "learning_rate": 1.3866115682768055e-06, "loss": 13.3577, "step": 390690 }, { "epoch": 0.7892387189566777, "grad_norm": 152.58932495117188, "learning_rate": 1.3863703079377971e-06, "loss": 14.3685, "step": 390700 }, { "epoch": 0.7892589195893616, "grad_norm": 235.34710693359375, "learning_rate": 1.3861290652111819e-06, "loss": 13.9805, "step": 390710 }, { "epoch": 0.7892791202220454, "grad_norm": 383.85772705078125, "learning_rate": 1.3858878400981335e-06, "loss": 17.5873, "step": 390720 }, { "epoch": 0.7892993208547292, "grad_norm": 413.3513488769531, "learning_rate": 1.3856466325998307e-06, "loss": 16.6266, "step": 390730 }, { "epoch": 0.789319521487413, "grad_norm": 198.61376953125, "learning_rate": 1.3854054427174468e-06, "loss": 14.4449, "step": 390740 }, { "epoch": 0.7893397221200968, "grad_norm": 565.2077026367188, "learning_rate": 1.3851642704521596e-06, "loss": 16.4567, "step": 390750 }, { "epoch": 0.7893599227527807, "grad_norm": 454.0048522949219, "learning_rate": 1.3849231158051418e-06, "loss": 12.2367, "step": 390760 }, { "epoch": 0.7893801233854645, "grad_norm": 48.9958610534668, "learning_rate": 1.3846819787775723e-06, "loss": 14.3357, "step": 390770 }, { "epoch": 0.7894003240181483, "grad_norm": 254.6192626953125, "learning_rate": 1.3844408593706238e-06, "loss": 13.9997, "step": 390780 }, { "epoch": 0.789420524650832, "grad_norm": 129.65170288085938, "learning_rate": 1.3841997575854703e-06, "loss": 10.6724, "step": 390790 }, { "epoch": 0.7894407252835158, "grad_norm": 233.35665893554688, "learning_rate": 1.3839586734232907e-06, "loss": 11.6794, "step": 390800 }, { "epoch": 0.7894609259161997, "grad_norm": 37.22419738769531, "learning_rate": 1.3837176068852565e-06, "loss": 4.3753, "step": 390810 }, { "epoch": 0.7894811265488835, "grad_norm": 228.70919799804688, "learning_rate": 1.3834765579725452e-06, "loss": 12.8101, "step": 390820 }, { "epoch": 0.7895013271815673, "grad_norm": 168.91867065429688, "learning_rate": 1.3832355266863307e-06, "loss": 10.4656, "step": 390830 }, { "epoch": 0.7895215278142511, "grad_norm": 215.65719604492188, "learning_rate": 1.3829945130277861e-06, "loss": 22.5831, "step": 390840 }, { "epoch": 0.7895417284469349, "grad_norm": 488.27569580078125, "learning_rate": 1.3827535169980888e-06, "loss": 17.5267, "step": 390850 }, { "epoch": 0.7895619290796188, "grad_norm": 644.7637939453125, "learning_rate": 1.3825125385984123e-06, "loss": 27.2732, "step": 390860 }, { "epoch": 0.7895821297123026, "grad_norm": 627.1541137695312, "learning_rate": 1.3822715778299295e-06, "loss": 22.5378, "step": 390870 }, { "epoch": 0.7896023303449864, "grad_norm": 124.99207305908203, "learning_rate": 1.3820306346938161e-06, "loss": 19.9832, "step": 390880 }, { "epoch": 0.7896225309776702, "grad_norm": 465.4399108886719, "learning_rate": 1.3817897091912485e-06, "loss": 18.2056, "step": 390890 }, { "epoch": 0.789642731610354, "grad_norm": 221.91409301757812, "learning_rate": 1.3815488013233986e-06, "loss": 13.0697, "step": 390900 }, { "epoch": 0.7896629322430379, "grad_norm": 434.96630859375, "learning_rate": 1.3813079110914396e-06, "loss": 28.7992, "step": 390910 }, { "epoch": 0.7896831328757217, "grad_norm": 183.5871124267578, "learning_rate": 1.3810670384965469e-06, "loss": 18.4667, "step": 390920 }, { "epoch": 0.7897033335084055, "grad_norm": 301.3536376953125, "learning_rate": 1.380826183539898e-06, "loss": 30.3429, "step": 390930 }, { "epoch": 0.7897235341410893, "grad_norm": 292.64697265625, "learning_rate": 1.38058534622266e-06, "loss": 21.5172, "step": 390940 }, { "epoch": 0.7897437347737731, "grad_norm": 296.518310546875, "learning_rate": 1.3803445265460096e-06, "loss": 8.7521, "step": 390950 }, { "epoch": 0.789763935406457, "grad_norm": 416.9619445800781, "learning_rate": 1.3801037245111233e-06, "loss": 15.2635, "step": 390960 }, { "epoch": 0.7897841360391408, "grad_norm": 428.948974609375, "learning_rate": 1.3798629401191715e-06, "loss": 18.876, "step": 390970 }, { "epoch": 0.7898043366718246, "grad_norm": 107.63420104980469, "learning_rate": 1.3796221733713278e-06, "loss": 15.2175, "step": 390980 }, { "epoch": 0.7898245373045084, "grad_norm": 126.30062103271484, "learning_rate": 1.3793814242687676e-06, "loss": 16.9822, "step": 390990 }, { "epoch": 0.7898447379371922, "grad_norm": 98.96853637695312, "learning_rate": 1.3791406928126638e-06, "loss": 13.9221, "step": 391000 }, { "epoch": 0.789864938569876, "grad_norm": 152.3211669921875, "learning_rate": 1.3788999790041867e-06, "loss": 11.9526, "step": 391010 }, { "epoch": 0.7898851392025599, "grad_norm": 350.4453430175781, "learning_rate": 1.3786592828445144e-06, "loss": 15.8589, "step": 391020 }, { "epoch": 0.7899053398352437, "grad_norm": 357.67584228515625, "learning_rate": 1.3784186043348151e-06, "loss": 7.9547, "step": 391030 }, { "epoch": 0.7899255404679274, "grad_norm": 381.3420715332031, "learning_rate": 1.3781779434762666e-06, "loss": 40.129, "step": 391040 }, { "epoch": 0.7899457411006112, "grad_norm": 289.3614807128906, "learning_rate": 1.3779373002700391e-06, "loss": 19.5915, "step": 391050 }, { "epoch": 0.789965941733295, "grad_norm": 365.95513916015625, "learning_rate": 1.377696674717305e-06, "loss": 18.7974, "step": 391060 }, { "epoch": 0.7899861423659789, "grad_norm": 272.894775390625, "learning_rate": 1.3774560668192389e-06, "loss": 13.1604, "step": 391070 }, { "epoch": 0.7900063429986627, "grad_norm": 444.2506408691406, "learning_rate": 1.3772154765770106e-06, "loss": 24.8925, "step": 391080 }, { "epoch": 0.7900265436313465, "grad_norm": 189.24462890625, "learning_rate": 1.3769749039917968e-06, "loss": 28.9551, "step": 391090 }, { "epoch": 0.7900467442640303, "grad_norm": 393.8255615234375, "learning_rate": 1.3767343490647668e-06, "loss": 18.6129, "step": 391100 }, { "epoch": 0.7900669448967141, "grad_norm": 591.8316040039062, "learning_rate": 1.376493811797095e-06, "loss": 19.5453, "step": 391110 }, { "epoch": 0.790087145529398, "grad_norm": 531.25390625, "learning_rate": 1.3762532921899529e-06, "loss": 22.0293, "step": 391120 }, { "epoch": 0.7901073461620818, "grad_norm": 235.73712158203125, "learning_rate": 1.3760127902445114e-06, "loss": 25.1874, "step": 391130 }, { "epoch": 0.7901275467947656, "grad_norm": 445.7985534667969, "learning_rate": 1.3757723059619455e-06, "loss": 21.6681, "step": 391140 }, { "epoch": 0.7901477474274494, "grad_norm": 212.6785430908203, "learning_rate": 1.3755318393434259e-06, "loss": 28.1086, "step": 391150 }, { "epoch": 0.7901679480601332, "grad_norm": 38.189964294433594, "learning_rate": 1.3752913903901227e-06, "loss": 12.3999, "step": 391160 }, { "epoch": 0.7901881486928171, "grad_norm": 214.90350341796875, "learning_rate": 1.3750509591032102e-06, "loss": 15.9244, "step": 391170 }, { "epoch": 0.7902083493255009, "grad_norm": 167.56671142578125, "learning_rate": 1.3748105454838623e-06, "loss": 22.9148, "step": 391180 }, { "epoch": 0.7902285499581847, "grad_norm": 106.4455795288086, "learning_rate": 1.3745701495332447e-06, "loss": 10.8185, "step": 391190 }, { "epoch": 0.7902487505908685, "grad_norm": 252.7376708984375, "learning_rate": 1.3743297712525334e-06, "loss": 18.6703, "step": 391200 }, { "epoch": 0.7902689512235523, "grad_norm": 1.2692079544067383, "learning_rate": 1.3740894106428997e-06, "loss": 11.7295, "step": 391210 }, { "epoch": 0.7902891518562362, "grad_norm": 167.26058959960938, "learning_rate": 1.373849067705515e-06, "loss": 10.2614, "step": 391220 }, { "epoch": 0.79030935248892, "grad_norm": 214.4142608642578, "learning_rate": 1.3736087424415483e-06, "loss": 16.6402, "step": 391230 }, { "epoch": 0.7903295531216038, "grad_norm": 325.05029296875, "learning_rate": 1.373368434852173e-06, "loss": 10.9385, "step": 391240 }, { "epoch": 0.7903497537542876, "grad_norm": 160.5977783203125, "learning_rate": 1.373128144938563e-06, "loss": 9.412, "step": 391250 }, { "epoch": 0.7903699543869714, "grad_norm": 546.1732177734375, "learning_rate": 1.372887872701884e-06, "loss": 17.4925, "step": 391260 }, { "epoch": 0.7903901550196553, "grad_norm": 54.0604133605957, "learning_rate": 1.372647618143309e-06, "loss": 21.5941, "step": 391270 }, { "epoch": 0.7904103556523391, "grad_norm": 462.6307373046875, "learning_rate": 1.372407381264011e-06, "loss": 31.3298, "step": 391280 }, { "epoch": 0.7904305562850229, "grad_norm": 417.45184326171875, "learning_rate": 1.37216716206516e-06, "loss": 17.8493, "step": 391290 }, { "epoch": 0.7904507569177066, "grad_norm": 115.490966796875, "learning_rate": 1.3719269605479241e-06, "loss": 17.1174, "step": 391300 }, { "epoch": 0.7904709575503904, "grad_norm": 272.7155456542969, "learning_rate": 1.3716867767134783e-06, "loss": 25.7387, "step": 391310 }, { "epoch": 0.7904911581830742, "grad_norm": 28.392807006835938, "learning_rate": 1.3714466105629908e-06, "loss": 13.2, "step": 391320 }, { "epoch": 0.7905113588157581, "grad_norm": 330.7696228027344, "learning_rate": 1.3712064620976305e-06, "loss": 13.8645, "step": 391330 }, { "epoch": 0.7905315594484419, "grad_norm": 572.33056640625, "learning_rate": 1.3709663313185723e-06, "loss": 20.4206, "step": 391340 }, { "epoch": 0.7905517600811257, "grad_norm": 228.50250244140625, "learning_rate": 1.3707262182269814e-06, "loss": 14.4629, "step": 391350 }, { "epoch": 0.7905719607138095, "grad_norm": 377.2584228515625, "learning_rate": 1.370486122824033e-06, "loss": 19.9594, "step": 391360 }, { "epoch": 0.7905921613464933, "grad_norm": 497.0187072753906, "learning_rate": 1.3702460451108934e-06, "loss": 22.9046, "step": 391370 }, { "epoch": 0.7906123619791772, "grad_norm": 143.923583984375, "learning_rate": 1.370005985088736e-06, "loss": 18.1048, "step": 391380 }, { "epoch": 0.790632562611861, "grad_norm": 332.82281494140625, "learning_rate": 1.3697659427587284e-06, "loss": 19.2406, "step": 391390 }, { "epoch": 0.7906527632445448, "grad_norm": 394.8443603515625, "learning_rate": 1.3695259181220405e-06, "loss": 28.1962, "step": 391400 }, { "epoch": 0.7906729638772286, "grad_norm": 222.03135681152344, "learning_rate": 1.3692859111798446e-06, "loss": 14.8794, "step": 391410 }, { "epoch": 0.7906931645099124, "grad_norm": 286.568115234375, "learning_rate": 1.3690459219333068e-06, "loss": 22.5856, "step": 391420 }, { "epoch": 0.7907133651425963, "grad_norm": 312.208740234375, "learning_rate": 1.3688059503836004e-06, "loss": 18.7924, "step": 391430 }, { "epoch": 0.7907335657752801, "grad_norm": 255.4131622314453, "learning_rate": 1.3685659965318937e-06, "loss": 8.8213, "step": 391440 }, { "epoch": 0.7907537664079639, "grad_norm": 196.51722717285156, "learning_rate": 1.368326060379354e-06, "loss": 10.5961, "step": 391450 }, { "epoch": 0.7907739670406477, "grad_norm": 107.88954162597656, "learning_rate": 1.368086141927154e-06, "loss": 8.7219, "step": 391460 }, { "epoch": 0.7907941676733315, "grad_norm": 152.7507781982422, "learning_rate": 1.367846241176462e-06, "loss": 23.3977, "step": 391470 }, { "epoch": 0.7908143683060154, "grad_norm": 236.0130157470703, "learning_rate": 1.3676063581284454e-06, "loss": 16.9637, "step": 391480 }, { "epoch": 0.7908345689386992, "grad_norm": 390.30224609375, "learning_rate": 1.367366492784275e-06, "loss": 18.6745, "step": 391490 }, { "epoch": 0.790854769571383, "grad_norm": 356.69525146484375, "learning_rate": 1.3671266451451209e-06, "loss": 22.7324, "step": 391500 }, { "epoch": 0.7908749702040668, "grad_norm": 9.115467071533203, "learning_rate": 1.3668868152121505e-06, "loss": 14.4992, "step": 391510 }, { "epoch": 0.7908951708367506, "grad_norm": 204.87879943847656, "learning_rate": 1.3666470029865325e-06, "loss": 23.257, "step": 391520 }, { "epoch": 0.7909153714694345, "grad_norm": 256.8188781738281, "learning_rate": 1.3664072084694374e-06, "loss": 15.7719, "step": 391530 }, { "epoch": 0.7909355721021183, "grad_norm": 276.78277587890625, "learning_rate": 1.3661674316620332e-06, "loss": 12.6916, "step": 391540 }, { "epoch": 0.7909557727348021, "grad_norm": 518.7556762695312, "learning_rate": 1.3659276725654863e-06, "loss": 15.1787, "step": 391550 }, { "epoch": 0.7909759733674858, "grad_norm": 603.6201171875, "learning_rate": 1.3656879311809674e-06, "loss": 20.1029, "step": 391560 }, { "epoch": 0.7909961740001696, "grad_norm": 432.62445068359375, "learning_rate": 1.365448207509646e-06, "loss": 27.1968, "step": 391570 }, { "epoch": 0.7910163746328535, "grad_norm": 292.9189453125, "learning_rate": 1.3652085015526895e-06, "loss": 13.9597, "step": 391580 }, { "epoch": 0.7910365752655373, "grad_norm": 271.1680908203125, "learning_rate": 1.3649688133112644e-06, "loss": 25.3856, "step": 391590 }, { "epoch": 0.7910567758982211, "grad_norm": 328.74462890625, "learning_rate": 1.3647291427865417e-06, "loss": 15.6074, "step": 391600 }, { "epoch": 0.7910769765309049, "grad_norm": 347.0096435546875, "learning_rate": 1.364489489979688e-06, "loss": 21.0469, "step": 391610 }, { "epoch": 0.7910971771635887, "grad_norm": 504.7010498046875, "learning_rate": 1.3642498548918704e-06, "loss": 12.8427, "step": 391620 }, { "epoch": 0.7911173777962726, "grad_norm": 0.0, "learning_rate": 1.3640102375242598e-06, "loss": 18.948, "step": 391630 }, { "epoch": 0.7911375784289564, "grad_norm": 95.36141967773438, "learning_rate": 1.3637706378780209e-06, "loss": 17.732, "step": 391640 }, { "epoch": 0.7911577790616402, "grad_norm": 258.3627014160156, "learning_rate": 1.3635310559543235e-06, "loss": 24.0221, "step": 391650 }, { "epoch": 0.791177979694324, "grad_norm": 205.928466796875, "learning_rate": 1.3632914917543338e-06, "loss": 12.6501, "step": 391660 }, { "epoch": 0.7911981803270078, "grad_norm": 94.33451843261719, "learning_rate": 1.3630519452792219e-06, "loss": 16.5817, "step": 391670 }, { "epoch": 0.7912183809596917, "grad_norm": 94.63825225830078, "learning_rate": 1.3628124165301537e-06, "loss": 15.2196, "step": 391680 }, { "epoch": 0.7912385815923755, "grad_norm": 77.66868591308594, "learning_rate": 1.362572905508295e-06, "loss": 19.1116, "step": 391690 }, { "epoch": 0.7912587822250593, "grad_norm": 152.9215087890625, "learning_rate": 1.3623334122148164e-06, "loss": 21.9512, "step": 391700 }, { "epoch": 0.7912789828577431, "grad_norm": 123.73573303222656, "learning_rate": 1.3620939366508818e-06, "loss": 7.5644, "step": 391710 }, { "epoch": 0.7912991834904269, "grad_norm": 204.20164489746094, "learning_rate": 1.361854478817662e-06, "loss": 19.2011, "step": 391720 }, { "epoch": 0.7913193841231108, "grad_norm": 96.19065856933594, "learning_rate": 1.361615038716322e-06, "loss": 9.5968, "step": 391730 }, { "epoch": 0.7913395847557946, "grad_norm": 320.56207275390625, "learning_rate": 1.3613756163480275e-06, "loss": 33.3998, "step": 391740 }, { "epoch": 0.7913597853884784, "grad_norm": 173.8499755859375, "learning_rate": 1.3611362117139481e-06, "loss": 18.1146, "step": 391750 }, { "epoch": 0.7913799860211622, "grad_norm": 242.74917602539062, "learning_rate": 1.3608968248152498e-06, "loss": 13.4793, "step": 391760 }, { "epoch": 0.791400186653846, "grad_norm": 222.7771759033203, "learning_rate": 1.3606574556530976e-06, "loss": 13.6027, "step": 391770 }, { "epoch": 0.7914203872865299, "grad_norm": 9.392804145812988, "learning_rate": 1.3604181042286597e-06, "loss": 14.3643, "step": 391780 }, { "epoch": 0.7914405879192137, "grad_norm": 231.20779418945312, "learning_rate": 1.3601787705431052e-06, "loss": 16.5473, "step": 391790 }, { "epoch": 0.7914607885518975, "grad_norm": 355.26953125, "learning_rate": 1.3599394545975952e-06, "loss": 12.6252, "step": 391800 }, { "epoch": 0.7914809891845812, "grad_norm": 319.2149963378906, "learning_rate": 1.3597001563932982e-06, "loss": 19.3247, "step": 391810 }, { "epoch": 0.791501189817265, "grad_norm": 154.01596069335938, "learning_rate": 1.3594608759313832e-06, "loss": 10.5979, "step": 391820 }, { "epoch": 0.7915213904499488, "grad_norm": 227.66909790039062, "learning_rate": 1.3592216132130142e-06, "loss": 18.2573, "step": 391830 }, { "epoch": 0.7915415910826327, "grad_norm": 599.9718627929688, "learning_rate": 1.358982368239356e-06, "loss": 14.7102, "step": 391840 }, { "epoch": 0.7915617917153165, "grad_norm": 107.21186065673828, "learning_rate": 1.3587431410115765e-06, "loss": 11.2986, "step": 391850 }, { "epoch": 0.7915819923480003, "grad_norm": 274.064208984375, "learning_rate": 1.3585039315308436e-06, "loss": 21.7636, "step": 391860 }, { "epoch": 0.7916021929806841, "grad_norm": 825.1958618164062, "learning_rate": 1.3582647397983185e-06, "loss": 38.3153, "step": 391870 }, { "epoch": 0.791622393613368, "grad_norm": 91.26548767089844, "learning_rate": 1.3580255658151687e-06, "loss": 18.8254, "step": 391880 }, { "epoch": 0.7916425942460518, "grad_norm": 92.4037857055664, "learning_rate": 1.3577864095825627e-06, "loss": 11.3991, "step": 391890 }, { "epoch": 0.7916627948787356, "grad_norm": 214.36520385742188, "learning_rate": 1.3575472711016634e-06, "loss": 17.9944, "step": 391900 }, { "epoch": 0.7916829955114194, "grad_norm": 498.19500732421875, "learning_rate": 1.3573081503736362e-06, "loss": 13.9439, "step": 391910 }, { "epoch": 0.7917031961441032, "grad_norm": 123.29412841796875, "learning_rate": 1.3570690473996483e-06, "loss": 19.2872, "step": 391920 }, { "epoch": 0.791723396776787, "grad_norm": 369.1876525878906, "learning_rate": 1.356829962180864e-06, "loss": 15.8396, "step": 391930 }, { "epoch": 0.7917435974094709, "grad_norm": 132.96266174316406, "learning_rate": 1.356590894718447e-06, "loss": 20.2359, "step": 391940 }, { "epoch": 0.7917637980421547, "grad_norm": 163.537353515625, "learning_rate": 1.356351845013566e-06, "loss": 14.9085, "step": 391950 }, { "epoch": 0.7917839986748385, "grad_norm": 138.02053833007812, "learning_rate": 1.3561128130673823e-06, "loss": 10.8959, "step": 391960 }, { "epoch": 0.7918041993075223, "grad_norm": 67.9908676147461, "learning_rate": 1.3558737988810644e-06, "loss": 17.7529, "step": 391970 }, { "epoch": 0.7918243999402061, "grad_norm": 421.5635681152344, "learning_rate": 1.3556348024557743e-06, "loss": 11.5864, "step": 391980 }, { "epoch": 0.79184460057289, "grad_norm": 206.14918518066406, "learning_rate": 1.3553958237926794e-06, "loss": 23.8499, "step": 391990 }, { "epoch": 0.7918648012055738, "grad_norm": 756.670166015625, "learning_rate": 1.3551568628929434e-06, "loss": 32.3865, "step": 392000 }, { "epoch": 0.7918850018382576, "grad_norm": 350.7532653808594, "learning_rate": 1.3549179197577295e-06, "loss": 13.3893, "step": 392010 }, { "epoch": 0.7919052024709414, "grad_norm": 283.5830993652344, "learning_rate": 1.3546789943882045e-06, "loss": 16.3155, "step": 392020 }, { "epoch": 0.7919254031036252, "grad_norm": 129.85946655273438, "learning_rate": 1.3544400867855306e-06, "loss": 21.3893, "step": 392030 }, { "epoch": 0.7919456037363091, "grad_norm": 358.0461120605469, "learning_rate": 1.3542011969508756e-06, "loss": 20.6042, "step": 392040 }, { "epoch": 0.7919658043689929, "grad_norm": 340.8746032714844, "learning_rate": 1.3539623248854012e-06, "loss": 12.0876, "step": 392050 }, { "epoch": 0.7919860050016767, "grad_norm": 24.11623764038086, "learning_rate": 1.3537234705902709e-06, "loss": 5.8092, "step": 392060 }, { "epoch": 0.7920062056343604, "grad_norm": 419.4682922363281, "learning_rate": 1.353484634066652e-06, "loss": 9.9459, "step": 392070 }, { "epoch": 0.7920264062670442, "grad_norm": 445.34454345703125, "learning_rate": 1.3532458153157062e-06, "loss": 7.6881, "step": 392080 }, { "epoch": 0.7920466068997281, "grad_norm": 149.68397521972656, "learning_rate": 1.3530070143385966e-06, "loss": 13.4582, "step": 392090 }, { "epoch": 0.7920668075324119, "grad_norm": 243.070068359375, "learning_rate": 1.3527682311364886e-06, "loss": 21.217, "step": 392100 }, { "epoch": 0.7920870081650957, "grad_norm": 342.85260009765625, "learning_rate": 1.3525294657105476e-06, "loss": 10.109, "step": 392110 }, { "epoch": 0.7921072087977795, "grad_norm": 0.0, "learning_rate": 1.352290718061935e-06, "loss": 9.4317, "step": 392120 }, { "epoch": 0.7921274094304633, "grad_norm": 330.6348571777344, "learning_rate": 1.3520519881918143e-06, "loss": 25.748, "step": 392130 }, { "epoch": 0.7921476100631472, "grad_norm": 133.02635192871094, "learning_rate": 1.3518132761013509e-06, "loss": 19.4547, "step": 392140 }, { "epoch": 0.792167810695831, "grad_norm": 296.6041564941406, "learning_rate": 1.351574581791707e-06, "loss": 21.5884, "step": 392150 }, { "epoch": 0.7921880113285148, "grad_norm": 396.9137878417969, "learning_rate": 1.3513359052640445e-06, "loss": 17.5666, "step": 392160 }, { "epoch": 0.7922082119611986, "grad_norm": 229.1615447998047, "learning_rate": 1.3510972465195283e-06, "loss": 23.8788, "step": 392170 }, { "epoch": 0.7922284125938824, "grad_norm": 578.8016967773438, "learning_rate": 1.350858605559323e-06, "loss": 32.7969, "step": 392180 }, { "epoch": 0.7922486132265663, "grad_norm": 396.6239013671875, "learning_rate": 1.3506199823845905e-06, "loss": 25.4391, "step": 392190 }, { "epoch": 0.7922688138592501, "grad_norm": 224.752197265625, "learning_rate": 1.3503813769964923e-06, "loss": 18.8072, "step": 392200 }, { "epoch": 0.7922890144919339, "grad_norm": 228.20848083496094, "learning_rate": 1.3501427893961938e-06, "loss": 16.0852, "step": 392210 }, { "epoch": 0.7923092151246177, "grad_norm": 54.29498291015625, "learning_rate": 1.3499042195848571e-06, "loss": 25.5241, "step": 392220 }, { "epoch": 0.7923294157573015, "grad_norm": 91.87275695800781, "learning_rate": 1.3496656675636427e-06, "loss": 29.1993, "step": 392230 }, { "epoch": 0.7923496163899854, "grad_norm": 119.47010803222656, "learning_rate": 1.3494271333337162e-06, "loss": 19.7819, "step": 392240 }, { "epoch": 0.7923698170226692, "grad_norm": 69.37132263183594, "learning_rate": 1.349188616896238e-06, "loss": 30.3201, "step": 392250 }, { "epoch": 0.792390017655353, "grad_norm": 235.2445831298828, "learning_rate": 1.3489501182523735e-06, "loss": 20.3295, "step": 392260 }, { "epoch": 0.7924102182880368, "grad_norm": 275.5227355957031, "learning_rate": 1.3487116374032811e-06, "loss": 13.0038, "step": 392270 }, { "epoch": 0.7924304189207206, "grad_norm": 338.8359069824219, "learning_rate": 1.3484731743501272e-06, "loss": 29.4055, "step": 392280 }, { "epoch": 0.7924506195534045, "grad_norm": 271.2021484375, "learning_rate": 1.3482347290940723e-06, "loss": 18.5621, "step": 392290 }, { "epoch": 0.7924708201860883, "grad_norm": 166.6522979736328, "learning_rate": 1.3479963016362768e-06, "loss": 20.1542, "step": 392300 }, { "epoch": 0.7924910208187721, "grad_norm": 297.52716064453125, "learning_rate": 1.3477578919779062e-06, "loss": 24.7216, "step": 392310 }, { "epoch": 0.7925112214514558, "grad_norm": 496.1330871582031, "learning_rate": 1.3475195001201186e-06, "loss": 21.4515, "step": 392320 }, { "epoch": 0.7925314220841396, "grad_norm": 110.03520965576172, "learning_rate": 1.34728112606408e-06, "loss": 17.8735, "step": 392330 }, { "epoch": 0.7925516227168234, "grad_norm": 183.55506896972656, "learning_rate": 1.3470427698109496e-06, "loss": 18.6575, "step": 392340 }, { "epoch": 0.7925718233495073, "grad_norm": 139.1975555419922, "learning_rate": 1.3468044313618883e-06, "loss": 32.0101, "step": 392350 }, { "epoch": 0.7925920239821911, "grad_norm": 242.81640625, "learning_rate": 1.346566110718061e-06, "loss": 16.195, "step": 392360 }, { "epoch": 0.7926122246148749, "grad_norm": 299.5010070800781, "learning_rate": 1.3463278078806274e-06, "loss": 16.9455, "step": 392370 }, { "epoch": 0.7926324252475587, "grad_norm": 274.6398010253906, "learning_rate": 1.346089522850747e-06, "loss": 22.3089, "step": 392380 }, { "epoch": 0.7926526258802425, "grad_norm": 125.0633544921875, "learning_rate": 1.3458512556295833e-06, "loss": 19.0877, "step": 392390 }, { "epoch": 0.7926728265129264, "grad_norm": 226.63441467285156, "learning_rate": 1.3456130062183003e-06, "loss": 15.7393, "step": 392400 }, { "epoch": 0.7926930271456102, "grad_norm": 244.64349365234375, "learning_rate": 1.3453747746180535e-06, "loss": 29.4663, "step": 392410 }, { "epoch": 0.792713227778294, "grad_norm": 148.66908264160156, "learning_rate": 1.3451365608300066e-06, "loss": 15.4951, "step": 392420 }, { "epoch": 0.7927334284109778, "grad_norm": 391.25299072265625, "learning_rate": 1.3448983648553227e-06, "loss": 16.7641, "step": 392430 }, { "epoch": 0.7927536290436616, "grad_norm": 287.80029296875, "learning_rate": 1.3446601866951604e-06, "loss": 20.4229, "step": 392440 }, { "epoch": 0.7927738296763455, "grad_norm": 141.1724090576172, "learning_rate": 1.3444220263506797e-06, "loss": 18.1888, "step": 392450 }, { "epoch": 0.7927940303090293, "grad_norm": 252.00091552734375, "learning_rate": 1.3441838838230425e-06, "loss": 15.6695, "step": 392460 }, { "epoch": 0.7928142309417131, "grad_norm": 191.00791931152344, "learning_rate": 1.343945759113413e-06, "loss": 9.9387, "step": 392470 }, { "epoch": 0.7928344315743969, "grad_norm": 157.86338806152344, "learning_rate": 1.3437076522229454e-06, "loss": 9.0102, "step": 392480 }, { "epoch": 0.7928546322070807, "grad_norm": 79.69384765625, "learning_rate": 1.3434695631528028e-06, "loss": 21.3102, "step": 392490 }, { "epoch": 0.7928748328397646, "grad_norm": 323.73907470703125, "learning_rate": 1.3432314919041478e-06, "loss": 16.6018, "step": 392500 }, { "epoch": 0.7928950334724484, "grad_norm": 77.21026611328125, "learning_rate": 1.342993438478139e-06, "loss": 19.2767, "step": 392510 }, { "epoch": 0.7929152341051322, "grad_norm": 63.747501373291016, "learning_rate": 1.3427554028759355e-06, "loss": 11.9705, "step": 392520 }, { "epoch": 0.792935434737816, "grad_norm": 676.7313232421875, "learning_rate": 1.3425173850986994e-06, "loss": 16.0067, "step": 392530 }, { "epoch": 0.7929556353704998, "grad_norm": 362.4096984863281, "learning_rate": 1.3422793851475907e-06, "loss": 15.9531, "step": 392540 }, { "epoch": 0.7929758360031837, "grad_norm": 165.18959045410156, "learning_rate": 1.3420414030237667e-06, "loss": 5.4979, "step": 392550 }, { "epoch": 0.7929960366358675, "grad_norm": 434.9794921875, "learning_rate": 1.3418034387283907e-06, "loss": 19.7285, "step": 392560 }, { "epoch": 0.7930162372685513, "grad_norm": 300.0006408691406, "learning_rate": 1.3415654922626198e-06, "loss": 17.0832, "step": 392570 }, { "epoch": 0.793036437901235, "grad_norm": 526.0717163085938, "learning_rate": 1.3413275636276164e-06, "loss": 24.0548, "step": 392580 }, { "epoch": 0.7930566385339188, "grad_norm": 447.23333740234375, "learning_rate": 1.3410896528245371e-06, "loss": 20.4363, "step": 392590 }, { "epoch": 0.7930768391666027, "grad_norm": 208.10610961914062, "learning_rate": 1.3408517598545446e-06, "loss": 17.1852, "step": 392600 }, { "epoch": 0.7930970397992865, "grad_norm": 150.31869506835938, "learning_rate": 1.3406138847187971e-06, "loss": 9.4543, "step": 392610 }, { "epoch": 0.7931172404319703, "grad_norm": 142.71322631835938, "learning_rate": 1.340376027418452e-06, "loss": 31.255, "step": 392620 }, { "epoch": 0.7931374410646541, "grad_norm": 71.78744506835938, "learning_rate": 1.3401381879546716e-06, "loss": 16.0969, "step": 392630 }, { "epoch": 0.7931576416973379, "grad_norm": 485.1529541015625, "learning_rate": 1.3399003663286125e-06, "loss": 28.4524, "step": 392640 }, { "epoch": 0.7931778423300218, "grad_norm": 211.94520568847656, "learning_rate": 1.3396625625414362e-06, "loss": 26.5439, "step": 392650 }, { "epoch": 0.7931980429627056, "grad_norm": 427.2752685546875, "learning_rate": 1.3394247765943013e-06, "loss": 13.0431, "step": 392660 }, { "epoch": 0.7932182435953894, "grad_norm": 324.1437072753906, "learning_rate": 1.339187008488364e-06, "loss": 19.5998, "step": 392670 }, { "epoch": 0.7932384442280732, "grad_norm": 205.4122314453125, "learning_rate": 1.338949258224787e-06, "loss": 20.4837, "step": 392680 }, { "epoch": 0.793258644860757, "grad_norm": 244.03558349609375, "learning_rate": 1.3387115258047272e-06, "loss": 13.3546, "step": 392690 }, { "epoch": 0.7932788454934409, "grad_norm": 426.5598449707031, "learning_rate": 1.3384738112293415e-06, "loss": 18.9042, "step": 392700 }, { "epoch": 0.7932990461261247, "grad_norm": 254.97372436523438, "learning_rate": 1.3382361144997912e-06, "loss": 20.111, "step": 392710 }, { "epoch": 0.7933192467588085, "grad_norm": 285.9173583984375, "learning_rate": 1.337998435617235e-06, "loss": 14.1844, "step": 392720 }, { "epoch": 0.7933394473914923, "grad_norm": 758.956787109375, "learning_rate": 1.3377607745828302e-06, "loss": 14.0572, "step": 392730 }, { "epoch": 0.7933596480241761, "grad_norm": 194.4341583251953, "learning_rate": 1.337523131397734e-06, "loss": 14.8391, "step": 392740 }, { "epoch": 0.79337984865686, "grad_norm": 103.3471450805664, "learning_rate": 1.3372855060631067e-06, "loss": 18.4807, "step": 392750 }, { "epoch": 0.7934000492895438, "grad_norm": 163.5717315673828, "learning_rate": 1.3370478985801062e-06, "loss": 20.9635, "step": 392760 }, { "epoch": 0.7934202499222276, "grad_norm": 199.37950134277344, "learning_rate": 1.3368103089498886e-06, "loss": 22.3309, "step": 392770 }, { "epoch": 0.7934404505549114, "grad_norm": 244.03704833984375, "learning_rate": 1.3365727371736127e-06, "loss": 12.1587, "step": 392780 }, { "epoch": 0.7934606511875952, "grad_norm": 203.05616760253906, "learning_rate": 1.3363351832524385e-06, "loss": 9.1431, "step": 392790 }, { "epoch": 0.793480851820279, "grad_norm": 408.8966064453125, "learning_rate": 1.3360976471875226e-06, "loss": 15.2471, "step": 392800 }, { "epoch": 0.7935010524529629, "grad_norm": 367.71185302734375, "learning_rate": 1.3358601289800211e-06, "loss": 17.3462, "step": 392810 }, { "epoch": 0.7935212530856467, "grad_norm": 63.791107177734375, "learning_rate": 1.335622628631094e-06, "loss": 10.3598, "step": 392820 }, { "epoch": 0.7935414537183304, "grad_norm": 244.0222625732422, "learning_rate": 1.3353851461418976e-06, "loss": 10.7469, "step": 392830 }, { "epoch": 0.7935616543510142, "grad_norm": 383.0565490722656, "learning_rate": 1.3351476815135883e-06, "loss": 20.2904, "step": 392840 }, { "epoch": 0.793581854983698, "grad_norm": 182.74505615234375, "learning_rate": 1.3349102347473264e-06, "loss": 13.7074, "step": 392850 }, { "epoch": 0.7936020556163819, "grad_norm": 10.053631782531738, "learning_rate": 1.334672805844266e-06, "loss": 9.4783, "step": 392860 }, { "epoch": 0.7936222562490657, "grad_norm": 22.512046813964844, "learning_rate": 1.3344353948055672e-06, "loss": 13.208, "step": 392870 }, { "epoch": 0.7936424568817495, "grad_norm": 375.5625915527344, "learning_rate": 1.3341980016323841e-06, "loss": 22.3783, "step": 392880 }, { "epoch": 0.7936626575144333, "grad_norm": 245.97413635253906, "learning_rate": 1.333960626325877e-06, "loss": 42.1376, "step": 392890 }, { "epoch": 0.7936828581471171, "grad_norm": 272.2469482421875, "learning_rate": 1.333723268887201e-06, "loss": 18.9207, "step": 392900 }, { "epoch": 0.793703058779801, "grad_norm": 184.18504333496094, "learning_rate": 1.3334859293175113e-06, "loss": 13.612, "step": 392910 }, { "epoch": 0.7937232594124848, "grad_norm": 388.6477966308594, "learning_rate": 1.3332486076179684e-06, "loss": 21.5826, "step": 392920 }, { "epoch": 0.7937434600451686, "grad_norm": 107.17819213867188, "learning_rate": 1.3330113037897257e-06, "loss": 9.2164, "step": 392930 }, { "epoch": 0.7937636606778524, "grad_norm": 338.7286682128906, "learning_rate": 1.3327740178339421e-06, "loss": 8.0558, "step": 392940 }, { "epoch": 0.7937838613105362, "grad_norm": 72.96916961669922, "learning_rate": 1.3325367497517739e-06, "loss": 13.9598, "step": 392950 }, { "epoch": 0.7938040619432201, "grad_norm": 142.27342224121094, "learning_rate": 1.3322994995443744e-06, "loss": 27.6961, "step": 392960 }, { "epoch": 0.7938242625759039, "grad_norm": 215.4615936279297, "learning_rate": 1.3320622672129046e-06, "loss": 20.5717, "step": 392970 }, { "epoch": 0.7938444632085877, "grad_norm": 197.2680206298828, "learning_rate": 1.331825052758518e-06, "loss": 7.7976, "step": 392980 }, { "epoch": 0.7938646638412715, "grad_norm": 108.90486907958984, "learning_rate": 1.3315878561823697e-06, "loss": 14.1002, "step": 392990 }, { "epoch": 0.7938848644739553, "grad_norm": 408.47906494140625, "learning_rate": 1.3313506774856177e-06, "loss": 19.5174, "step": 393000 }, { "epoch": 0.7939050651066392, "grad_norm": 53.30064392089844, "learning_rate": 1.33111351666942e-06, "loss": 10.2356, "step": 393010 }, { "epoch": 0.793925265739323, "grad_norm": 298.9501037597656, "learning_rate": 1.3308763737349273e-06, "loss": 18.3207, "step": 393020 }, { "epoch": 0.7939454663720068, "grad_norm": 281.2471008300781, "learning_rate": 1.3306392486832982e-06, "loss": 23.7436, "step": 393030 }, { "epoch": 0.7939656670046906, "grad_norm": 97.62548828125, "learning_rate": 1.3304021415156898e-06, "loss": 11.8303, "step": 393040 }, { "epoch": 0.7939858676373744, "grad_norm": 257.8853759765625, "learning_rate": 1.3301650522332566e-06, "loss": 19.2906, "step": 393050 }, { "epoch": 0.7940060682700583, "grad_norm": 192.9200897216797, "learning_rate": 1.3299279808371517e-06, "loss": 12.6671, "step": 393060 }, { "epoch": 0.7940262689027421, "grad_norm": 192.97091674804688, "learning_rate": 1.329690927328533e-06, "loss": 9.7842, "step": 393070 }, { "epoch": 0.7940464695354259, "grad_norm": 85.63959503173828, "learning_rate": 1.3294538917085586e-06, "loss": 20.2864, "step": 393080 }, { "epoch": 0.7940666701681096, "grad_norm": 356.0364074707031, "learning_rate": 1.329216873978378e-06, "loss": 13.4421, "step": 393090 }, { "epoch": 0.7940868708007934, "grad_norm": 185.57374572753906, "learning_rate": 1.3289798741391486e-06, "loss": 17.002, "step": 393100 }, { "epoch": 0.7941070714334773, "grad_norm": 316.21295166015625, "learning_rate": 1.3287428921920275e-06, "loss": 14.2727, "step": 393110 }, { "epoch": 0.7941272720661611, "grad_norm": 51.31793212890625, "learning_rate": 1.328505928138169e-06, "loss": 23.2294, "step": 393120 }, { "epoch": 0.7941474726988449, "grad_norm": 278.34320068359375, "learning_rate": 1.3282689819787253e-06, "loss": 10.4321, "step": 393130 }, { "epoch": 0.7941676733315287, "grad_norm": 113.28660583496094, "learning_rate": 1.328032053714855e-06, "loss": 9.3822, "step": 393140 }, { "epoch": 0.7941878739642125, "grad_norm": 60.17445373535156, "learning_rate": 1.327795143347711e-06, "loss": 6.8671, "step": 393150 }, { "epoch": 0.7942080745968964, "grad_norm": 160.77700805664062, "learning_rate": 1.3275582508784462e-06, "loss": 17.8242, "step": 393160 }, { "epoch": 0.7942282752295802, "grad_norm": 205.61444091796875, "learning_rate": 1.3273213763082188e-06, "loss": 18.0419, "step": 393170 }, { "epoch": 0.794248475862264, "grad_norm": 820.8292236328125, "learning_rate": 1.3270845196381805e-06, "loss": 31.8239, "step": 393180 }, { "epoch": 0.7942686764949478, "grad_norm": 2.9397823810577393, "learning_rate": 1.3268476808694881e-06, "loss": 18.5469, "step": 393190 }, { "epoch": 0.7942888771276316, "grad_norm": 177.76744079589844, "learning_rate": 1.3266108600032928e-06, "loss": 15.7436, "step": 393200 }, { "epoch": 0.7943090777603155, "grad_norm": 238.83750915527344, "learning_rate": 1.3263740570407524e-06, "loss": 10.1452, "step": 393210 }, { "epoch": 0.7943292783929993, "grad_norm": 493.4523010253906, "learning_rate": 1.326137271983019e-06, "loss": 17.9118, "step": 393220 }, { "epoch": 0.7943494790256831, "grad_norm": 169.162353515625, "learning_rate": 1.3259005048312457e-06, "loss": 13.1674, "step": 393230 }, { "epoch": 0.7943696796583669, "grad_norm": 86.49345397949219, "learning_rate": 1.3256637555865892e-06, "loss": 9.0937, "step": 393240 }, { "epoch": 0.7943898802910507, "grad_norm": 233.2389678955078, "learning_rate": 1.3254270242502004e-06, "loss": 8.2885, "step": 393250 }, { "epoch": 0.7944100809237346, "grad_norm": 605.4463500976562, "learning_rate": 1.3251903108232362e-06, "loss": 25.8576, "step": 393260 }, { "epoch": 0.7944302815564184, "grad_norm": 190.46165466308594, "learning_rate": 1.3249536153068487e-06, "loss": 18.4976, "step": 393270 }, { "epoch": 0.7944504821891022, "grad_norm": 170.74588012695312, "learning_rate": 1.3247169377021896e-06, "loss": 17.4385, "step": 393280 }, { "epoch": 0.794470682821786, "grad_norm": 153.1312255859375, "learning_rate": 1.3244802780104166e-06, "loss": 27.7489, "step": 393290 }, { "epoch": 0.7944908834544698, "grad_norm": 52.14815139770508, "learning_rate": 1.3242436362326804e-06, "loss": 9.0858, "step": 393300 }, { "epoch": 0.7945110840871537, "grad_norm": 233.81898498535156, "learning_rate": 1.3240070123701337e-06, "loss": 9.1464, "step": 393310 }, { "epoch": 0.7945312847198375, "grad_norm": 806.1806640625, "learning_rate": 1.323770406423931e-06, "loss": 29.2783, "step": 393320 }, { "epoch": 0.7945514853525213, "grad_norm": 395.2083435058594, "learning_rate": 1.3235338183952268e-06, "loss": 34.61, "step": 393330 }, { "epoch": 0.7945716859852051, "grad_norm": 340.45330810546875, "learning_rate": 1.323297248285173e-06, "loss": 19.1847, "step": 393340 }, { "epoch": 0.7945918866178888, "grad_norm": 111.70598602294922, "learning_rate": 1.3230606960949204e-06, "loss": 9.0148, "step": 393350 }, { "epoch": 0.7946120872505726, "grad_norm": 190.54403686523438, "learning_rate": 1.322824161825626e-06, "loss": 15.3291, "step": 393360 }, { "epoch": 0.7946322878832565, "grad_norm": 248.9414520263672, "learning_rate": 1.3225876454784409e-06, "loss": 21.7343, "step": 393370 }, { "epoch": 0.7946524885159403, "grad_norm": 176.68727111816406, "learning_rate": 1.3223511470545158e-06, "loss": 11.7674, "step": 393380 }, { "epoch": 0.7946726891486241, "grad_norm": 297.8005676269531, "learning_rate": 1.3221146665550055e-06, "loss": 21.7978, "step": 393390 }, { "epoch": 0.7946928897813079, "grad_norm": 142.3574981689453, "learning_rate": 1.3218782039810634e-06, "loss": 18.9191, "step": 393400 }, { "epoch": 0.7947130904139917, "grad_norm": 365.3082580566406, "learning_rate": 1.321641759333841e-06, "loss": 18.3383, "step": 393410 }, { "epoch": 0.7947332910466756, "grad_norm": 149.17596435546875, "learning_rate": 1.3214053326144888e-06, "loss": 18.0697, "step": 393420 }, { "epoch": 0.7947534916793594, "grad_norm": 0.5815898776054382, "learning_rate": 1.321168923824162e-06, "loss": 12.2448, "step": 393430 }, { "epoch": 0.7947736923120432, "grad_norm": 178.906005859375, "learning_rate": 1.3209325329640126e-06, "loss": 17.2, "step": 393440 }, { "epoch": 0.794793892944727, "grad_norm": 54.58261489868164, "learning_rate": 1.3206961600351897e-06, "loss": 13.0191, "step": 393450 }, { "epoch": 0.7948140935774108, "grad_norm": 101.58492279052734, "learning_rate": 1.320459805038849e-06, "loss": 28.1889, "step": 393460 }, { "epoch": 0.7948342942100947, "grad_norm": 147.8878631591797, "learning_rate": 1.32022346797614e-06, "loss": 18.0921, "step": 393470 }, { "epoch": 0.7948544948427785, "grad_norm": 398.03863525390625, "learning_rate": 1.3199871488482163e-06, "loss": 10.2943, "step": 393480 }, { "epoch": 0.7948746954754623, "grad_norm": 240.48214721679688, "learning_rate": 1.3197508476562277e-06, "loss": 17.4853, "step": 393490 }, { "epoch": 0.7948948961081461, "grad_norm": 299.0706481933594, "learning_rate": 1.3195145644013286e-06, "loss": 13.2929, "step": 393500 }, { "epoch": 0.7949150967408299, "grad_norm": 163.60006713867188, "learning_rate": 1.3192782990846692e-06, "loss": 16.3905, "step": 393510 }, { "epoch": 0.7949352973735138, "grad_norm": 337.98394775390625, "learning_rate": 1.3190420517073993e-06, "loss": 22.6345, "step": 393520 }, { "epoch": 0.7949554980061976, "grad_norm": 300.5181884765625, "learning_rate": 1.3188058222706735e-06, "loss": 15.61, "step": 393530 }, { "epoch": 0.7949756986388814, "grad_norm": 63.15986251831055, "learning_rate": 1.3185696107756402e-06, "loss": 13.9191, "step": 393540 }, { "epoch": 0.7949958992715652, "grad_norm": 172.9537353515625, "learning_rate": 1.3183334172234536e-06, "loss": 20.2132, "step": 393550 }, { "epoch": 0.795016099904249, "grad_norm": 208.8089599609375, "learning_rate": 1.3180972416152637e-06, "loss": 12.0405, "step": 393560 }, { "epoch": 0.7950363005369329, "grad_norm": 186.70123291015625, "learning_rate": 1.3178610839522193e-06, "loss": 9.4785, "step": 393570 }, { "epoch": 0.7950565011696167, "grad_norm": 441.5470275878906, "learning_rate": 1.317624944235475e-06, "loss": 12.2331, "step": 393580 }, { "epoch": 0.7950767018023005, "grad_norm": 516.4625854492188, "learning_rate": 1.3173888224661802e-06, "loss": 13.2013, "step": 393590 }, { "epoch": 0.7950969024349842, "grad_norm": 201.08322143554688, "learning_rate": 1.317152718645484e-06, "loss": 16.757, "step": 393600 }, { "epoch": 0.795117103067668, "grad_norm": 130.7508544921875, "learning_rate": 1.3169166327745392e-06, "loss": 25.1023, "step": 393610 }, { "epoch": 0.7951373037003518, "grad_norm": 23.415002822875977, "learning_rate": 1.316680564854499e-06, "loss": 21.3062, "step": 393620 }, { "epoch": 0.7951575043330357, "grad_norm": 0.023515788838267326, "learning_rate": 1.3164445148865073e-06, "loss": 22.2597, "step": 393630 }, { "epoch": 0.7951777049657195, "grad_norm": 134.4195556640625, "learning_rate": 1.3162084828717187e-06, "loss": 16.5455, "step": 393640 }, { "epoch": 0.7951979055984033, "grad_norm": 309.4034729003906, "learning_rate": 1.3159724688112846e-06, "loss": 26.5361, "step": 393650 }, { "epoch": 0.7952181062310871, "grad_norm": 205.83053588867188, "learning_rate": 1.3157364727063542e-06, "loss": 11.9343, "step": 393660 }, { "epoch": 0.795238306863771, "grad_norm": 206.52474975585938, "learning_rate": 1.3155004945580757e-06, "loss": 13.8526, "step": 393670 }, { "epoch": 0.7952585074964548, "grad_norm": 119.48780059814453, "learning_rate": 1.3152645343676007e-06, "loss": 18.2349, "step": 393680 }, { "epoch": 0.7952787081291386, "grad_norm": 265.02081298828125, "learning_rate": 1.3150285921360823e-06, "loss": 19.1715, "step": 393690 }, { "epoch": 0.7952989087618224, "grad_norm": 270.5586242675781, "learning_rate": 1.314792667864665e-06, "loss": 12.9982, "step": 393700 }, { "epoch": 0.7953191093945062, "grad_norm": 195.99533081054688, "learning_rate": 1.3145567615545013e-06, "loss": 21.3887, "step": 393710 }, { "epoch": 0.79533931002719, "grad_norm": 162.3800506591797, "learning_rate": 1.3143208732067426e-06, "loss": 15.8001, "step": 393720 }, { "epoch": 0.7953595106598739, "grad_norm": 57.64122009277344, "learning_rate": 1.314085002822536e-06, "loss": 9.4556, "step": 393730 }, { "epoch": 0.7953797112925577, "grad_norm": 21.762073516845703, "learning_rate": 1.3138491504030314e-06, "loss": 8.9504, "step": 393740 }, { "epoch": 0.7953999119252415, "grad_norm": 247.30935668945312, "learning_rate": 1.3136133159493803e-06, "loss": 18.5175, "step": 393750 }, { "epoch": 0.7954201125579253, "grad_norm": 0.0, "learning_rate": 1.3133774994627307e-06, "loss": 24.3269, "step": 393760 }, { "epoch": 0.7954403131906091, "grad_norm": 170.1713409423828, "learning_rate": 1.313141700944231e-06, "loss": 18.6617, "step": 393770 }, { "epoch": 0.795460513823293, "grad_norm": 268.3996276855469, "learning_rate": 1.3129059203950306e-06, "loss": 19.1775, "step": 393780 }, { "epoch": 0.7954807144559768, "grad_norm": 231.27919006347656, "learning_rate": 1.312670157816282e-06, "loss": 13.131, "step": 393790 }, { "epoch": 0.7955009150886606, "grad_norm": 70.65496063232422, "learning_rate": 1.312434413209131e-06, "loss": 12.6134, "step": 393800 }, { "epoch": 0.7955211157213444, "grad_norm": 553.3592529296875, "learning_rate": 1.3121986865747267e-06, "loss": 14.1715, "step": 393810 }, { "epoch": 0.7955413163540282, "grad_norm": 347.2684326171875, "learning_rate": 1.3119629779142196e-06, "loss": 21.3706, "step": 393820 }, { "epoch": 0.7955615169867121, "grad_norm": 356.41607666015625, "learning_rate": 1.3117272872287578e-06, "loss": 19.6625, "step": 393830 }, { "epoch": 0.7955817176193959, "grad_norm": 185.1691131591797, "learning_rate": 1.3114916145194884e-06, "loss": 27.0623, "step": 393840 }, { "epoch": 0.7956019182520797, "grad_norm": 300.1994934082031, "learning_rate": 1.3112559597875628e-06, "loss": 19.5648, "step": 393850 }, { "epoch": 0.7956221188847634, "grad_norm": 467.4010314941406, "learning_rate": 1.3110203230341273e-06, "loss": 18.4519, "step": 393860 }, { "epoch": 0.7956423195174472, "grad_norm": 0.46879252791404724, "learning_rate": 1.3107847042603328e-06, "loss": 24.9828, "step": 393870 }, { "epoch": 0.7956625201501311, "grad_norm": 139.30465698242188, "learning_rate": 1.3105491034673256e-06, "loss": 12.4513, "step": 393880 }, { "epoch": 0.7956827207828149, "grad_norm": 179.56158447265625, "learning_rate": 1.3103135206562535e-06, "loss": 11.2883, "step": 393890 }, { "epoch": 0.7957029214154987, "grad_norm": 446.1418762207031, "learning_rate": 1.3100779558282673e-06, "loss": 17.56, "step": 393900 }, { "epoch": 0.7957231220481825, "grad_norm": 363.2339782714844, "learning_rate": 1.3098424089845136e-06, "loss": 23.381, "step": 393910 }, { "epoch": 0.7957433226808663, "grad_norm": 495.6734619140625, "learning_rate": 1.3096068801261386e-06, "loss": 16.019, "step": 393920 }, { "epoch": 0.7957635233135502, "grad_norm": 0.06504087895154953, "learning_rate": 1.3093713692542925e-06, "loss": 23.1123, "step": 393930 }, { "epoch": 0.795783723946234, "grad_norm": 586.4656982421875, "learning_rate": 1.309135876370124e-06, "loss": 13.911, "step": 393940 }, { "epoch": 0.7958039245789178, "grad_norm": 255.9374542236328, "learning_rate": 1.3089004014747797e-06, "loss": 15.63, "step": 393950 }, { "epoch": 0.7958241252116016, "grad_norm": 252.96490478515625, "learning_rate": 1.3086649445694056e-06, "loss": 19.4108, "step": 393960 }, { "epoch": 0.7958443258442854, "grad_norm": 224.26585388183594, "learning_rate": 1.308429505655152e-06, "loss": 19.3971, "step": 393970 }, { "epoch": 0.7958645264769693, "grad_norm": 173.17831420898438, "learning_rate": 1.3081940847331658e-06, "loss": 13.8584, "step": 393980 }, { "epoch": 0.7958847271096531, "grad_norm": 495.3941650390625, "learning_rate": 1.3079586818045925e-06, "loss": 25.4428, "step": 393990 }, { "epoch": 0.7959049277423369, "grad_norm": 76.12857055664062, "learning_rate": 1.3077232968705805e-06, "loss": 10.6322, "step": 394000 }, { "epoch": 0.7959251283750207, "grad_norm": 387.5611877441406, "learning_rate": 1.3074879299322802e-06, "loss": 16.2008, "step": 394010 }, { "epoch": 0.7959453290077045, "grad_norm": 419.7371520996094, "learning_rate": 1.3072525809908332e-06, "loss": 14.3214, "step": 394020 }, { "epoch": 0.7959655296403884, "grad_norm": 76.74159240722656, "learning_rate": 1.3070172500473888e-06, "loss": 19.3696, "step": 394030 }, { "epoch": 0.7959857302730722, "grad_norm": 388.9838562011719, "learning_rate": 1.3067819371030966e-06, "loss": 19.633, "step": 394040 }, { "epoch": 0.796005930905756, "grad_norm": 142.23985290527344, "learning_rate": 1.3065466421591006e-06, "loss": 22.0818, "step": 394050 }, { "epoch": 0.7960261315384398, "grad_norm": 324.7091979980469, "learning_rate": 1.306311365216547e-06, "loss": 11.9796, "step": 394060 }, { "epoch": 0.7960463321711236, "grad_norm": 213.2528533935547, "learning_rate": 1.3060761062765853e-06, "loss": 18.2241, "step": 394070 }, { "epoch": 0.7960665328038075, "grad_norm": 289.2394104003906, "learning_rate": 1.3058408653403609e-06, "loss": 23.5378, "step": 394080 }, { "epoch": 0.7960867334364913, "grad_norm": 443.9687194824219, "learning_rate": 1.3056056424090186e-06, "loss": 25.3611, "step": 394090 }, { "epoch": 0.7961069340691751, "grad_norm": 170.4636993408203, "learning_rate": 1.3053704374837063e-06, "loss": 6.7572, "step": 394100 }, { "epoch": 0.7961271347018588, "grad_norm": 77.45021057128906, "learning_rate": 1.3051352505655713e-06, "loss": 16.5451, "step": 394110 }, { "epoch": 0.7961473353345426, "grad_norm": 309.7838134765625, "learning_rate": 1.3049000816557595e-06, "loss": 11.6659, "step": 394120 }, { "epoch": 0.7961675359672264, "grad_norm": 147.88015747070312, "learning_rate": 1.304664930755415e-06, "loss": 19.3056, "step": 394130 }, { "epoch": 0.7961877365999103, "grad_norm": 350.6545715332031, "learning_rate": 1.3044297978656867e-06, "loss": 8.914, "step": 394140 }, { "epoch": 0.7962079372325941, "grad_norm": 9.670076370239258, "learning_rate": 1.3041946829877178e-06, "loss": 27.0971, "step": 394150 }, { "epoch": 0.7962281378652779, "grad_norm": 282.4226989746094, "learning_rate": 1.3039595861226579e-06, "loss": 10.6995, "step": 394160 }, { "epoch": 0.7962483384979617, "grad_norm": 353.44097900390625, "learning_rate": 1.3037245072716504e-06, "loss": 28.0684, "step": 394170 }, { "epoch": 0.7962685391306455, "grad_norm": 464.08642578125, "learning_rate": 1.3034894464358395e-06, "loss": 11.4519, "step": 394180 }, { "epoch": 0.7962887397633294, "grad_norm": 387.5968322753906, "learning_rate": 1.3032544036163742e-06, "loss": 8.6052, "step": 394190 }, { "epoch": 0.7963089403960132, "grad_norm": 12.337525367736816, "learning_rate": 1.3030193788143991e-06, "loss": 18.83, "step": 394200 }, { "epoch": 0.796329141028697, "grad_norm": 431.0447692871094, "learning_rate": 1.3027843720310574e-06, "loss": 25.552, "step": 394210 }, { "epoch": 0.7963493416613808, "grad_norm": 249.69444274902344, "learning_rate": 1.3025493832674963e-06, "loss": 34.5803, "step": 394220 }, { "epoch": 0.7963695422940646, "grad_norm": 35.73496627807617, "learning_rate": 1.302314412524862e-06, "loss": 34.5904, "step": 394230 }, { "epoch": 0.7963897429267485, "grad_norm": 241.94564819335938, "learning_rate": 1.3020794598042996e-06, "loss": 22.1486, "step": 394240 }, { "epoch": 0.7964099435594323, "grad_norm": 138.87379455566406, "learning_rate": 1.301844525106951e-06, "loss": 13.6201, "step": 394250 }, { "epoch": 0.7964301441921161, "grad_norm": 82.87177276611328, "learning_rate": 1.3016096084339658e-06, "loss": 21.4856, "step": 394260 }, { "epoch": 0.7964503448247999, "grad_norm": 248.30535888671875, "learning_rate": 1.301374709786487e-06, "loss": 14.806, "step": 394270 }, { "epoch": 0.7964705454574837, "grad_norm": 179.72030639648438, "learning_rate": 1.3011398291656575e-06, "loss": 10.9344, "step": 394280 }, { "epoch": 0.7964907460901676, "grad_norm": 173.0596923828125, "learning_rate": 1.3009049665726236e-06, "loss": 18.5035, "step": 394290 }, { "epoch": 0.7965109467228514, "grad_norm": 186.12197875976562, "learning_rate": 1.3006701220085338e-06, "loss": 14.0308, "step": 394300 }, { "epoch": 0.7965311473555352, "grad_norm": 132.2129669189453, "learning_rate": 1.3004352954745257e-06, "loss": 21.2937, "step": 394310 }, { "epoch": 0.796551347988219, "grad_norm": 940.4254760742188, "learning_rate": 1.3002004869717472e-06, "loss": 19.9796, "step": 394320 }, { "epoch": 0.7965715486209028, "grad_norm": 131.22406005859375, "learning_rate": 1.2999656965013447e-06, "loss": 8.9769, "step": 394330 }, { "epoch": 0.7965917492535867, "grad_norm": 420.09136962890625, "learning_rate": 1.2997309240644607e-06, "loss": 12.5894, "step": 394340 }, { "epoch": 0.7966119498862705, "grad_norm": 518.6962890625, "learning_rate": 1.299496169662237e-06, "loss": 15.8687, "step": 394350 }, { "epoch": 0.7966321505189543, "grad_norm": 297.3959045410156, "learning_rate": 1.2992614332958226e-06, "loss": 10.9719, "step": 394360 }, { "epoch": 0.796652351151638, "grad_norm": 105.30386352539062, "learning_rate": 1.2990267149663588e-06, "loss": 17.7874, "step": 394370 }, { "epoch": 0.7966725517843218, "grad_norm": 818.4884033203125, "learning_rate": 1.2987920146749883e-06, "loss": 14.5562, "step": 394380 }, { "epoch": 0.7966927524170057, "grad_norm": 458.752685546875, "learning_rate": 1.2985573324228568e-06, "loss": 22.5249, "step": 394390 }, { "epoch": 0.7967129530496895, "grad_norm": 98.76087188720703, "learning_rate": 1.2983226682111094e-06, "loss": 26.9835, "step": 394400 }, { "epoch": 0.7967331536823733, "grad_norm": 219.8277587890625, "learning_rate": 1.2980880220408887e-06, "loss": 11.4897, "step": 394410 }, { "epoch": 0.7967533543150571, "grad_norm": 210.3355712890625, "learning_rate": 1.2978533939133358e-06, "loss": 14.3098, "step": 394420 }, { "epoch": 0.7967735549477409, "grad_norm": 219.2844696044922, "learning_rate": 1.2976187838295984e-06, "loss": 13.2188, "step": 394430 }, { "epoch": 0.7967937555804248, "grad_norm": 286.1653137207031, "learning_rate": 1.2973841917908175e-06, "loss": 16.932, "step": 394440 }, { "epoch": 0.7968139562131086, "grad_norm": 172.17886352539062, "learning_rate": 1.2971496177981362e-06, "loss": 15.0857, "step": 394450 }, { "epoch": 0.7968341568457924, "grad_norm": 304.573486328125, "learning_rate": 1.2969150618527e-06, "loss": 25.8054, "step": 394460 }, { "epoch": 0.7968543574784762, "grad_norm": 267.05853271484375, "learning_rate": 1.2966805239556484e-06, "loss": 21.2457, "step": 394470 }, { "epoch": 0.79687455811116, "grad_norm": 2.9464051723480225, "learning_rate": 1.2964460041081288e-06, "loss": 15.0103, "step": 394480 }, { "epoch": 0.7968947587438439, "grad_norm": 89.25000762939453, "learning_rate": 1.296211502311282e-06, "loss": 17.5909, "step": 394490 }, { "epoch": 0.7969149593765277, "grad_norm": 379.5791320800781, "learning_rate": 1.2959770185662502e-06, "loss": 20.6322, "step": 394500 }, { "epoch": 0.7969351600092115, "grad_norm": 221.9597930908203, "learning_rate": 1.295742552874178e-06, "loss": 17.6303, "step": 394510 }, { "epoch": 0.7969553606418953, "grad_norm": 629.0286865234375, "learning_rate": 1.2955081052362072e-06, "loss": 28.5363, "step": 394520 }, { "epoch": 0.7969755612745791, "grad_norm": 251.88299560546875, "learning_rate": 1.2952736756534796e-06, "loss": 8.7255, "step": 394530 }, { "epoch": 0.796995761907263, "grad_norm": 219.55262756347656, "learning_rate": 1.2950392641271386e-06, "loss": 20.9323, "step": 394540 }, { "epoch": 0.7970159625399468, "grad_norm": 213.98741149902344, "learning_rate": 1.2948048706583284e-06, "loss": 16.1871, "step": 394550 }, { "epoch": 0.7970361631726306, "grad_norm": 386.5259094238281, "learning_rate": 1.2945704952481896e-06, "loss": 12.1671, "step": 394560 }, { "epoch": 0.7970563638053144, "grad_norm": 192.09848022460938, "learning_rate": 1.2943361378978636e-06, "loss": 22.2698, "step": 394570 }, { "epoch": 0.7970765644379982, "grad_norm": 350.692138671875, "learning_rate": 1.2941017986084953e-06, "loss": 13.1194, "step": 394580 }, { "epoch": 0.7970967650706821, "grad_norm": 389.60321044921875, "learning_rate": 1.2938674773812255e-06, "loss": 26.9056, "step": 394590 }, { "epoch": 0.7971169657033659, "grad_norm": 357.3770446777344, "learning_rate": 1.2936331742171943e-06, "loss": 15.8506, "step": 394600 }, { "epoch": 0.7971371663360497, "grad_norm": 192.43081665039062, "learning_rate": 1.2933988891175458e-06, "loss": 26.609, "step": 394610 }, { "epoch": 0.7971573669687335, "grad_norm": 57.71446990966797, "learning_rate": 1.2931646220834242e-06, "loss": 8.1435, "step": 394620 }, { "epoch": 0.7971775676014172, "grad_norm": 1122.2384033203125, "learning_rate": 1.292930373115966e-06, "loss": 12.8472, "step": 394630 }, { "epoch": 0.797197768234101, "grad_norm": 253.8313446044922, "learning_rate": 1.2926961422163154e-06, "loss": 9.4126, "step": 394640 }, { "epoch": 0.7972179688667849, "grad_norm": 216.83502197265625, "learning_rate": 1.2924619293856155e-06, "loss": 13.2119, "step": 394650 }, { "epoch": 0.7972381694994687, "grad_norm": 222.7720489501953, "learning_rate": 1.2922277346250067e-06, "loss": 15.2757, "step": 394660 }, { "epoch": 0.7972583701321525, "grad_norm": 247.2835235595703, "learning_rate": 1.2919935579356285e-06, "loss": 13.842, "step": 394670 }, { "epoch": 0.7972785707648363, "grad_norm": 208.08480834960938, "learning_rate": 1.2917593993186257e-06, "loss": 17.1479, "step": 394680 }, { "epoch": 0.7972987713975201, "grad_norm": 774.574462890625, "learning_rate": 1.2915252587751376e-06, "loss": 13.9384, "step": 394690 }, { "epoch": 0.797318972030204, "grad_norm": 194.92074584960938, "learning_rate": 1.2912911363063048e-06, "loss": 15.1589, "step": 394700 }, { "epoch": 0.7973391726628878, "grad_norm": 281.8524475097656, "learning_rate": 1.291057031913268e-06, "loss": 11.0465, "step": 394710 }, { "epoch": 0.7973593732955716, "grad_norm": 267.965576171875, "learning_rate": 1.2908229455971717e-06, "loss": 31.1553, "step": 394720 }, { "epoch": 0.7973795739282554, "grad_norm": 287.10028076171875, "learning_rate": 1.2905888773591546e-06, "loss": 19.4741, "step": 394730 }, { "epoch": 0.7973997745609392, "grad_norm": 255.349853515625, "learning_rate": 1.2903548272003552e-06, "loss": 14.1268, "step": 394740 }, { "epoch": 0.7974199751936231, "grad_norm": 270.5774841308594, "learning_rate": 1.2901207951219186e-06, "loss": 9.0975, "step": 394750 }, { "epoch": 0.7974401758263069, "grad_norm": 293.6265563964844, "learning_rate": 1.2898867811249832e-06, "loss": 28.6317, "step": 394760 }, { "epoch": 0.7974603764589907, "grad_norm": 193.71498107910156, "learning_rate": 1.2896527852106876e-06, "loss": 17.3726, "step": 394770 }, { "epoch": 0.7974805770916745, "grad_norm": 384.2185974121094, "learning_rate": 1.2894188073801766e-06, "loss": 22.4654, "step": 394780 }, { "epoch": 0.7975007777243583, "grad_norm": 304.72027587890625, "learning_rate": 1.2891848476345864e-06, "loss": 15.2762, "step": 394790 }, { "epoch": 0.7975209783570422, "grad_norm": 26.259021759033203, "learning_rate": 1.2889509059750605e-06, "loss": 11.2329, "step": 394800 }, { "epoch": 0.797541178989726, "grad_norm": 368.5821228027344, "learning_rate": 1.288716982402738e-06, "loss": 13.0379, "step": 394810 }, { "epoch": 0.7975613796224098, "grad_norm": 649.3244018554688, "learning_rate": 1.2884830769187572e-06, "loss": 16.0954, "step": 394820 }, { "epoch": 0.7975815802550936, "grad_norm": 597.8184814453125, "learning_rate": 1.2882491895242599e-06, "loss": 16.114, "step": 394830 }, { "epoch": 0.7976017808877774, "grad_norm": 136.8312530517578, "learning_rate": 1.2880153202203877e-06, "loss": 8.2952, "step": 394840 }, { "epoch": 0.7976219815204613, "grad_norm": 510.2694396972656, "learning_rate": 1.287781469008278e-06, "loss": 30.0497, "step": 394850 }, { "epoch": 0.7976421821531451, "grad_norm": 96.37393951416016, "learning_rate": 1.2875476358890698e-06, "loss": 10.502, "step": 394860 }, { "epoch": 0.7976623827858289, "grad_norm": 129.64698791503906, "learning_rate": 1.2873138208639057e-06, "loss": 9.7401, "step": 394870 }, { "epoch": 0.7976825834185126, "grad_norm": 79.2483901977539, "learning_rate": 1.2870800239339237e-06, "loss": 8.768, "step": 394880 }, { "epoch": 0.7977027840511964, "grad_norm": 341.1105041503906, "learning_rate": 1.2868462451002623e-06, "loss": 6.0175, "step": 394890 }, { "epoch": 0.7977229846838803, "grad_norm": 356.0732421875, "learning_rate": 1.2866124843640614e-06, "loss": 17.8297, "step": 394900 }, { "epoch": 0.7977431853165641, "grad_norm": 263.6865539550781, "learning_rate": 1.2863787417264639e-06, "loss": 26.8115, "step": 394910 }, { "epoch": 0.7977633859492479, "grad_norm": 162.0262908935547, "learning_rate": 1.2861450171886037e-06, "loss": 10.6038, "step": 394920 }, { "epoch": 0.7977835865819317, "grad_norm": 352.1432189941406, "learning_rate": 1.2859113107516212e-06, "loss": 17.1949, "step": 394930 }, { "epoch": 0.7978037872146155, "grad_norm": 101.93891143798828, "learning_rate": 1.2856776224166589e-06, "loss": 8.7758, "step": 394940 }, { "epoch": 0.7978239878472994, "grad_norm": 153.12538146972656, "learning_rate": 1.2854439521848526e-06, "loss": 11.7709, "step": 394950 }, { "epoch": 0.7978441884799832, "grad_norm": 437.7710266113281, "learning_rate": 1.2852103000573413e-06, "loss": 23.9861, "step": 394960 }, { "epoch": 0.797864389112667, "grad_norm": 109.10918426513672, "learning_rate": 1.2849766660352652e-06, "loss": 7.4901, "step": 394970 }, { "epoch": 0.7978845897453508, "grad_norm": 122.03852081298828, "learning_rate": 1.2847430501197627e-06, "loss": 16.9858, "step": 394980 }, { "epoch": 0.7979047903780346, "grad_norm": 110.2911605834961, "learning_rate": 1.2845094523119706e-06, "loss": 14.654, "step": 394990 }, { "epoch": 0.7979249910107185, "grad_norm": 86.35404205322266, "learning_rate": 1.2842758726130283e-06, "loss": 10.981, "step": 395000 }, { "epoch": 0.7979451916434023, "grad_norm": 15.624135971069336, "learning_rate": 1.2840423110240762e-06, "loss": 26.1013, "step": 395010 }, { "epoch": 0.7979653922760861, "grad_norm": 10.980106353759766, "learning_rate": 1.2838087675462518e-06, "loss": 15.2208, "step": 395020 }, { "epoch": 0.7979855929087699, "grad_norm": 203.3727569580078, "learning_rate": 1.2835752421806908e-06, "loss": 16.5381, "step": 395030 }, { "epoch": 0.7980057935414537, "grad_norm": 226.701416015625, "learning_rate": 1.283341734928535e-06, "loss": 20.9419, "step": 395040 }, { "epoch": 0.7980259941741376, "grad_norm": 110.25926971435547, "learning_rate": 1.2831082457909206e-06, "loss": 26.1868, "step": 395050 }, { "epoch": 0.7980461948068214, "grad_norm": 310.1398010253906, "learning_rate": 1.2828747747689846e-06, "loss": 19.8265, "step": 395060 }, { "epoch": 0.7980663954395052, "grad_norm": 168.42041015625, "learning_rate": 1.2826413218638672e-06, "loss": 17.615, "step": 395070 }, { "epoch": 0.798086596072189, "grad_norm": 526.8919677734375, "learning_rate": 1.2824078870767036e-06, "loss": 14.1562, "step": 395080 }, { "epoch": 0.7981067967048728, "grad_norm": 1.2583905458450317, "learning_rate": 1.2821744704086353e-06, "loss": 25.3458, "step": 395090 }, { "epoch": 0.7981269973375567, "grad_norm": 258.1378479003906, "learning_rate": 1.2819410718607972e-06, "loss": 35.9246, "step": 395100 }, { "epoch": 0.7981471979702405, "grad_norm": 10.087231636047363, "learning_rate": 1.2817076914343257e-06, "loss": 80.958, "step": 395110 }, { "epoch": 0.7981673986029243, "grad_norm": 260.6452941894531, "learning_rate": 1.2814743291303616e-06, "loss": 16.9283, "step": 395120 }, { "epoch": 0.7981875992356081, "grad_norm": 341.9921875, "learning_rate": 1.2812409849500408e-06, "loss": 11.5303, "step": 395130 }, { "epoch": 0.7982077998682918, "grad_norm": 75.74690246582031, "learning_rate": 1.2810076588944987e-06, "loss": 16.2587, "step": 395140 }, { "epoch": 0.7982280005009756, "grad_norm": 198.7671356201172, "learning_rate": 1.2807743509648745e-06, "loss": 22.9932, "step": 395150 }, { "epoch": 0.7982482011336595, "grad_norm": 248.513671875, "learning_rate": 1.280541061162306e-06, "loss": 8.2832, "step": 395160 }, { "epoch": 0.7982684017663433, "grad_norm": 264.4519348144531, "learning_rate": 1.2803077894879296e-06, "loss": 12.7597, "step": 395170 }, { "epoch": 0.7982886023990271, "grad_norm": 225.13975524902344, "learning_rate": 1.2800745359428807e-06, "loss": 21.7589, "step": 395180 }, { "epoch": 0.7983088030317109, "grad_norm": 253.72926330566406, "learning_rate": 1.2798413005282984e-06, "loss": 17.3013, "step": 395190 }, { "epoch": 0.7983290036643947, "grad_norm": 329.8512878417969, "learning_rate": 1.2796080832453183e-06, "loss": 10.9676, "step": 395200 }, { "epoch": 0.7983492042970786, "grad_norm": 136.9598846435547, "learning_rate": 1.279374884095076e-06, "loss": 26.4432, "step": 395210 }, { "epoch": 0.7983694049297624, "grad_norm": 158.43113708496094, "learning_rate": 1.279141703078709e-06, "loss": 18.593, "step": 395220 }, { "epoch": 0.7983896055624462, "grad_norm": 302.84906005859375, "learning_rate": 1.2789085401973572e-06, "loss": 23.2573, "step": 395230 }, { "epoch": 0.79840980619513, "grad_norm": 343.9611511230469, "learning_rate": 1.2786753954521508e-06, "loss": 20.8363, "step": 395240 }, { "epoch": 0.7984300068278138, "grad_norm": 257.0786437988281, "learning_rate": 1.2784422688442294e-06, "loss": 15.2696, "step": 395250 }, { "epoch": 0.7984502074604977, "grad_norm": 188.4160919189453, "learning_rate": 1.2782091603747304e-06, "loss": 10.0086, "step": 395260 }, { "epoch": 0.7984704080931815, "grad_norm": 174.40382385253906, "learning_rate": 1.2779760700447885e-06, "loss": 23.8219, "step": 395270 }, { "epoch": 0.7984906087258653, "grad_norm": 265.345947265625, "learning_rate": 1.2777429978555383e-06, "loss": 12.7815, "step": 395280 }, { "epoch": 0.7985108093585491, "grad_norm": 119.06078338623047, "learning_rate": 1.2775099438081173e-06, "loss": 15.0739, "step": 395290 }, { "epoch": 0.7985310099912329, "grad_norm": 538.5911254882812, "learning_rate": 1.2772769079036639e-06, "loss": 21.6963, "step": 395300 }, { "epoch": 0.7985512106239168, "grad_norm": 24.827228546142578, "learning_rate": 1.277043890143309e-06, "loss": 8.0809, "step": 395310 }, { "epoch": 0.7985714112566006, "grad_norm": 555.3018798828125, "learning_rate": 1.2768108905281906e-06, "loss": 15.6123, "step": 395320 }, { "epoch": 0.7985916118892844, "grad_norm": 58.99827575683594, "learning_rate": 1.2765779090594454e-06, "loss": 19.6072, "step": 395330 }, { "epoch": 0.7986118125219682, "grad_norm": 526.8296508789062, "learning_rate": 1.2763449457382083e-06, "loss": 29.756, "step": 395340 }, { "epoch": 0.798632013154652, "grad_norm": 111.27204132080078, "learning_rate": 1.2761120005656125e-06, "loss": 24.9345, "step": 395350 }, { "epoch": 0.7986522137873359, "grad_norm": 117.59506225585938, "learning_rate": 1.2758790735427966e-06, "loss": 12.0525, "step": 395360 }, { "epoch": 0.7986724144200197, "grad_norm": 162.22640991210938, "learning_rate": 1.275646164670895e-06, "loss": 20.3736, "step": 395370 }, { "epoch": 0.7986926150527035, "grad_norm": 205.48324584960938, "learning_rate": 1.27541327395104e-06, "loss": 22.3006, "step": 395380 }, { "epoch": 0.7987128156853872, "grad_norm": 163.18946838378906, "learning_rate": 1.275180401384371e-06, "loss": 17.4963, "step": 395390 }, { "epoch": 0.798733016318071, "grad_norm": 248.9812774658203, "learning_rate": 1.2749475469720196e-06, "loss": 23.4751, "step": 395400 }, { "epoch": 0.7987532169507549, "grad_norm": 370.6106872558594, "learning_rate": 1.274714710715123e-06, "loss": 17.2849, "step": 395410 }, { "epoch": 0.7987734175834387, "grad_norm": 152.36392211914062, "learning_rate": 1.2744818926148157e-06, "loss": 10.8284, "step": 395420 }, { "epoch": 0.7987936182161225, "grad_norm": 227.98939514160156, "learning_rate": 1.2742490926722295e-06, "loss": 19.3162, "step": 395430 }, { "epoch": 0.7988138188488063, "grad_norm": 285.50811767578125, "learning_rate": 1.2740163108885033e-06, "loss": 15.3282, "step": 395440 }, { "epoch": 0.7988340194814901, "grad_norm": 357.19146728515625, "learning_rate": 1.2737835472647686e-06, "loss": 34.5575, "step": 395450 }, { "epoch": 0.798854220114174, "grad_norm": 385.7685546875, "learning_rate": 1.273550801802162e-06, "loss": 23.0638, "step": 395460 }, { "epoch": 0.7988744207468578, "grad_norm": 138.3925018310547, "learning_rate": 1.2733180745018154e-06, "loss": 16.9074, "step": 395470 }, { "epoch": 0.7988946213795416, "grad_norm": 173.13693237304688, "learning_rate": 1.2730853653648657e-06, "loss": 18.7469, "step": 395480 }, { "epoch": 0.7989148220122254, "grad_norm": 442.8561706542969, "learning_rate": 1.2728526743924462e-06, "loss": 13.5918, "step": 395490 }, { "epoch": 0.7989350226449092, "grad_norm": 190.3169403076172, "learning_rate": 1.2726200015856893e-06, "loss": 19.2601, "step": 395500 }, { "epoch": 0.798955223277593, "grad_norm": 270.53436279296875, "learning_rate": 1.2723873469457304e-06, "loss": 17.298, "step": 395510 }, { "epoch": 0.7989754239102769, "grad_norm": 208.21005249023438, "learning_rate": 1.2721547104737065e-06, "loss": 21.7193, "step": 395520 }, { "epoch": 0.7989956245429607, "grad_norm": 115.04071807861328, "learning_rate": 1.2719220921707453e-06, "loss": 13.3136, "step": 395530 }, { "epoch": 0.7990158251756445, "grad_norm": 0.0, "learning_rate": 1.2716894920379835e-06, "loss": 13.022, "step": 395540 }, { "epoch": 0.7990360258083283, "grad_norm": 304.9453430175781, "learning_rate": 1.2714569100765567e-06, "loss": 17.8382, "step": 395550 }, { "epoch": 0.7990562264410122, "grad_norm": 376.6053466796875, "learning_rate": 1.2712243462875967e-06, "loss": 15.9462, "step": 395560 }, { "epoch": 0.799076427073696, "grad_norm": 61.37324142456055, "learning_rate": 1.2709918006722355e-06, "loss": 22.7655, "step": 395570 }, { "epoch": 0.7990966277063798, "grad_norm": 812.2053833007812, "learning_rate": 1.2707592732316092e-06, "loss": 23.2999, "step": 395580 }, { "epoch": 0.7991168283390636, "grad_norm": 187.53065490722656, "learning_rate": 1.2705267639668501e-06, "loss": 13.8223, "step": 395590 }, { "epoch": 0.7991370289717474, "grad_norm": 123.14891815185547, "learning_rate": 1.2702942728790897e-06, "loss": 10.6664, "step": 395600 }, { "epoch": 0.7991572296044313, "grad_norm": 359.4590148925781, "learning_rate": 1.2700617999694626e-06, "loss": 19.5869, "step": 395610 }, { "epoch": 0.7991774302371151, "grad_norm": 143.32301330566406, "learning_rate": 1.2698293452391036e-06, "loss": 21.4266, "step": 395620 }, { "epoch": 0.7991976308697989, "grad_norm": 346.0218200683594, "learning_rate": 1.2695969086891436e-06, "loss": 19.6607, "step": 395630 }, { "epoch": 0.7992178315024827, "grad_norm": 220.1179656982422, "learning_rate": 1.2693644903207146e-06, "loss": 13.6493, "step": 395640 }, { "epoch": 0.7992380321351664, "grad_norm": 380.8208312988281, "learning_rate": 1.2691320901349518e-06, "loss": 15.1762, "step": 395650 }, { "epoch": 0.7992582327678502, "grad_norm": 43.68898391723633, "learning_rate": 1.2688997081329874e-06, "loss": 19.3375, "step": 395660 }, { "epoch": 0.7992784334005341, "grad_norm": 271.71112060546875, "learning_rate": 1.2686673443159515e-06, "loss": 16.3444, "step": 395670 }, { "epoch": 0.7992986340332179, "grad_norm": 238.51829528808594, "learning_rate": 1.2684349986849791e-06, "loss": 21.1586, "step": 395680 }, { "epoch": 0.7993188346659017, "grad_norm": 299.1219787597656, "learning_rate": 1.2682026712412016e-06, "loss": 25.3184, "step": 395690 }, { "epoch": 0.7993390352985855, "grad_norm": 325.2257080078125, "learning_rate": 1.2679703619857525e-06, "loss": 18.3703, "step": 395700 }, { "epoch": 0.7993592359312693, "grad_norm": 195.0442352294922, "learning_rate": 1.2677380709197634e-06, "loss": 10.8941, "step": 395710 }, { "epoch": 0.7993794365639532, "grad_norm": 96.44918823242188, "learning_rate": 1.2675057980443644e-06, "loss": 9.3159, "step": 395720 }, { "epoch": 0.799399637196637, "grad_norm": 239.30307006835938, "learning_rate": 1.2672735433606914e-06, "loss": 19.8342, "step": 395730 }, { "epoch": 0.7994198378293208, "grad_norm": 308.88153076171875, "learning_rate": 1.2670413068698745e-06, "loss": 22.3338, "step": 395740 }, { "epoch": 0.7994400384620046, "grad_norm": 360.2878723144531, "learning_rate": 1.2668090885730439e-06, "loss": 17.4652, "step": 395750 }, { "epoch": 0.7994602390946884, "grad_norm": 512.7362670898438, "learning_rate": 1.2665768884713326e-06, "loss": 19.0719, "step": 395760 }, { "epoch": 0.7994804397273723, "grad_norm": 336.29766845703125, "learning_rate": 1.2663447065658746e-06, "loss": 25.411, "step": 395770 }, { "epoch": 0.7995006403600561, "grad_norm": 122.11836242675781, "learning_rate": 1.2661125428577998e-06, "loss": 13.7608, "step": 395780 }, { "epoch": 0.7995208409927399, "grad_norm": 0.21410268545150757, "learning_rate": 1.265880397348238e-06, "loss": 23.7151, "step": 395790 }, { "epoch": 0.7995410416254237, "grad_norm": 7.786190032958984, "learning_rate": 1.2656482700383238e-06, "loss": 8.8194, "step": 395800 }, { "epoch": 0.7995612422581075, "grad_norm": 435.97003173828125, "learning_rate": 1.2654161609291864e-06, "loss": 14.853, "step": 395810 }, { "epoch": 0.7995814428907914, "grad_norm": 126.4620361328125, "learning_rate": 1.265184070021957e-06, "loss": 30.6118, "step": 395820 }, { "epoch": 0.7996016435234752, "grad_norm": 254.8677520751953, "learning_rate": 1.2649519973177672e-06, "loss": 22.1945, "step": 395830 }, { "epoch": 0.799621844156159, "grad_norm": 526.0126342773438, "learning_rate": 1.2647199428177509e-06, "loss": 43.8862, "step": 395840 }, { "epoch": 0.7996420447888428, "grad_norm": 377.6842346191406, "learning_rate": 1.2644879065230343e-06, "loss": 28.9292, "step": 395850 }, { "epoch": 0.7996622454215266, "grad_norm": 99.98877716064453, "learning_rate": 1.26425588843475e-06, "loss": 22.6376, "step": 395860 }, { "epoch": 0.7996824460542105, "grad_norm": 322.96807861328125, "learning_rate": 1.2640238885540313e-06, "loss": 18.1419, "step": 395870 }, { "epoch": 0.7997026466868943, "grad_norm": 147.81179809570312, "learning_rate": 1.263791906882007e-06, "loss": 14.9539, "step": 395880 }, { "epoch": 0.7997228473195781, "grad_norm": 432.11614990234375, "learning_rate": 1.263559943419806e-06, "loss": 27.4115, "step": 395890 }, { "epoch": 0.7997430479522618, "grad_norm": 459.1332092285156, "learning_rate": 1.2633279981685608e-06, "loss": 20.6323, "step": 395900 }, { "epoch": 0.7997632485849456, "grad_norm": 225.7126922607422, "learning_rate": 1.2630960711294049e-06, "loss": 16.3474, "step": 395910 }, { "epoch": 0.7997834492176294, "grad_norm": 302.43414306640625, "learning_rate": 1.2628641623034627e-06, "loss": 27.8182, "step": 395920 }, { "epoch": 0.7998036498503133, "grad_norm": 351.20465087890625, "learning_rate": 1.2626322716918672e-06, "loss": 15.8938, "step": 395930 }, { "epoch": 0.7998238504829971, "grad_norm": 8745.283203125, "learning_rate": 1.2624003992957494e-06, "loss": 59.072, "step": 395940 }, { "epoch": 0.7998440511156809, "grad_norm": 254.19956970214844, "learning_rate": 1.2621685451162397e-06, "loss": 11.8629, "step": 395950 }, { "epoch": 0.7998642517483647, "grad_norm": 526.2318725585938, "learning_rate": 1.2619367091544654e-06, "loss": 23.3165, "step": 395960 }, { "epoch": 0.7998844523810485, "grad_norm": 83.66377258300781, "learning_rate": 1.2617048914115593e-06, "loss": 20.4381, "step": 395970 }, { "epoch": 0.7999046530137324, "grad_norm": 161.4769744873047, "learning_rate": 1.2614730918886509e-06, "loss": 25.4368, "step": 395980 }, { "epoch": 0.7999248536464162, "grad_norm": 250.94203186035156, "learning_rate": 1.261241310586867e-06, "loss": 18.6809, "step": 395990 }, { "epoch": 0.7999450542791, "grad_norm": 147.0688018798828, "learning_rate": 1.2610095475073415e-06, "loss": 19.8836, "step": 396000 }, { "epoch": 0.7999652549117838, "grad_norm": 8.057957649230957, "learning_rate": 1.2607778026512002e-06, "loss": 14.8339, "step": 396010 }, { "epoch": 0.7999854555444676, "grad_norm": 426.3250732421875, "learning_rate": 1.2605460760195759e-06, "loss": 21.1002, "step": 396020 }, { "epoch": 0.8000056561771515, "grad_norm": 128.7440948486328, "learning_rate": 1.2603143676135965e-06, "loss": 18.7541, "step": 396030 }, { "epoch": 0.8000258568098353, "grad_norm": 35.90017318725586, "learning_rate": 1.26008267743439e-06, "loss": 13.4015, "step": 396040 }, { "epoch": 0.8000460574425191, "grad_norm": 434.51458740234375, "learning_rate": 1.2598510054830888e-06, "loss": 14.4279, "step": 396050 }, { "epoch": 0.8000662580752029, "grad_norm": 150.1875, "learning_rate": 1.2596193517608179e-06, "loss": 22.6019, "step": 396060 }, { "epoch": 0.8000864587078867, "grad_norm": 234.52310180664062, "learning_rate": 1.25938771626871e-06, "loss": 28.2453, "step": 396070 }, { "epoch": 0.8001066593405706, "grad_norm": 0.0, "learning_rate": 1.259156099007892e-06, "loss": 16.2556, "step": 396080 }, { "epoch": 0.8001268599732544, "grad_norm": 268.42999267578125, "learning_rate": 1.2589244999794947e-06, "loss": 9.2134, "step": 396090 }, { "epoch": 0.8001470606059382, "grad_norm": 199.99293518066406, "learning_rate": 1.2586929191846453e-06, "loss": 19.8127, "step": 396100 }, { "epoch": 0.800167261238622, "grad_norm": 228.40719604492188, "learning_rate": 1.2584613566244713e-06, "loss": 21.6143, "step": 396110 }, { "epoch": 0.8001874618713058, "grad_norm": 296.930908203125, "learning_rate": 1.2582298123001046e-06, "loss": 8.3201, "step": 396120 }, { "epoch": 0.8002076625039897, "grad_norm": 46.50396728515625, "learning_rate": 1.2579982862126722e-06, "loss": 9.4618, "step": 396130 }, { "epoch": 0.8002278631366735, "grad_norm": 686.8037109375, "learning_rate": 1.2577667783633007e-06, "loss": 12.999, "step": 396140 }, { "epoch": 0.8002480637693573, "grad_norm": 340.1067810058594, "learning_rate": 1.25753528875312e-06, "loss": 16.3316, "step": 396150 }, { "epoch": 0.800268264402041, "grad_norm": 176.02413940429688, "learning_rate": 1.2573038173832597e-06, "loss": 30.0913, "step": 396160 }, { "epoch": 0.8002884650347248, "grad_norm": 332.09539794921875, "learning_rate": 1.2570723642548465e-06, "loss": 15.8266, "step": 396170 }, { "epoch": 0.8003086656674087, "grad_norm": 364.9923095703125, "learning_rate": 1.2568409293690077e-06, "loss": 12.8216, "step": 396180 }, { "epoch": 0.8003288663000925, "grad_norm": 61.77202606201172, "learning_rate": 1.2566095127268734e-06, "loss": 29.9194, "step": 396190 }, { "epoch": 0.8003490669327763, "grad_norm": 340.26239013671875, "learning_rate": 1.2563781143295705e-06, "loss": 30.6065, "step": 396200 }, { "epoch": 0.8003692675654601, "grad_norm": 391.4183044433594, "learning_rate": 1.2561467341782247e-06, "loss": 22.6166, "step": 396210 }, { "epoch": 0.8003894681981439, "grad_norm": 88.39815521240234, "learning_rate": 1.2559153722739658e-06, "loss": 15.9085, "step": 396220 }, { "epoch": 0.8004096688308278, "grad_norm": 97.71906280517578, "learning_rate": 1.2556840286179234e-06, "loss": 19.5931, "step": 396230 }, { "epoch": 0.8004298694635116, "grad_norm": 100.33430480957031, "learning_rate": 1.2554527032112224e-06, "loss": 5.9845, "step": 396240 }, { "epoch": 0.8004500700961954, "grad_norm": 267.4783935546875, "learning_rate": 1.2552213960549891e-06, "loss": 13.4509, "step": 396250 }, { "epoch": 0.8004702707288792, "grad_norm": 90.05357360839844, "learning_rate": 1.254990107150354e-06, "loss": 10.0633, "step": 396260 }, { "epoch": 0.800490471361563, "grad_norm": 627.791748046875, "learning_rate": 1.2547588364984431e-06, "loss": 24.0344, "step": 396270 }, { "epoch": 0.8005106719942469, "grad_norm": 576.5026245117188, "learning_rate": 1.2545275841003818e-06, "loss": 15.9451, "step": 396280 }, { "epoch": 0.8005308726269307, "grad_norm": 646.0071411132812, "learning_rate": 1.2542963499573007e-06, "loss": 20.2115, "step": 396290 }, { "epoch": 0.8005510732596145, "grad_norm": 252.44081115722656, "learning_rate": 1.2540651340703231e-06, "loss": 19.0461, "step": 396300 }, { "epoch": 0.8005712738922983, "grad_norm": 211.38392639160156, "learning_rate": 1.253833936440579e-06, "loss": 21.1582, "step": 396310 }, { "epoch": 0.8005914745249821, "grad_norm": 217.4888458251953, "learning_rate": 1.2536027570691938e-06, "loss": 14.4341, "step": 396320 }, { "epoch": 0.800611675157666, "grad_norm": 379.1426086425781, "learning_rate": 1.2533715959572935e-06, "loss": 16.487, "step": 396330 }, { "epoch": 0.8006318757903498, "grad_norm": 390.5389404296875, "learning_rate": 1.253140453106007e-06, "loss": 25.6821, "step": 396340 }, { "epoch": 0.8006520764230336, "grad_norm": 294.15777587890625, "learning_rate": 1.2529093285164579e-06, "loss": 22.4026, "step": 396350 }, { "epoch": 0.8006722770557174, "grad_norm": 403.8798828125, "learning_rate": 1.2526782221897755e-06, "loss": 6.6302, "step": 396360 }, { "epoch": 0.8006924776884012, "grad_norm": 352.99169921875, "learning_rate": 1.252447134127084e-06, "loss": 27.5729, "step": 396370 }, { "epoch": 0.8007126783210851, "grad_norm": 244.75059509277344, "learning_rate": 1.2522160643295112e-06, "loss": 9.2362, "step": 396380 }, { "epoch": 0.8007328789537689, "grad_norm": 253.2174835205078, "learning_rate": 1.2519850127981836e-06, "loss": 12.2704, "step": 396390 }, { "epoch": 0.8007530795864527, "grad_norm": 295.2263488769531, "learning_rate": 1.2517539795342248e-06, "loss": 20.3872, "step": 396400 }, { "epoch": 0.8007732802191365, "grad_norm": 164.6853485107422, "learning_rate": 1.2515229645387639e-06, "loss": 11.3304, "step": 396410 }, { "epoch": 0.8007934808518202, "grad_norm": 173.65811157226562, "learning_rate": 1.2512919678129254e-06, "loss": 10.3559, "step": 396420 }, { "epoch": 0.800813681484504, "grad_norm": 360.9266357421875, "learning_rate": 1.2510609893578335e-06, "loss": 31.4839, "step": 396430 }, { "epoch": 0.8008338821171879, "grad_norm": 493.28466796875, "learning_rate": 1.2508300291746162e-06, "loss": 24.0571, "step": 396440 }, { "epoch": 0.8008540827498717, "grad_norm": 307.3465881347656, "learning_rate": 1.2505990872644008e-06, "loss": 16.7367, "step": 396450 }, { "epoch": 0.8008742833825555, "grad_norm": 416.18084716796875, "learning_rate": 1.2503681636283082e-06, "loss": 15.7894, "step": 396460 }, { "epoch": 0.8008944840152393, "grad_norm": 268.5434265136719, "learning_rate": 1.2501372582674665e-06, "loss": 14.0664, "step": 396470 }, { "epoch": 0.8009146846479231, "grad_norm": 91.87339782714844, "learning_rate": 1.2499063711830023e-06, "loss": 15.5301, "step": 396480 }, { "epoch": 0.800934885280607, "grad_norm": 60.65346145629883, "learning_rate": 1.2496755023760398e-06, "loss": 23.6972, "step": 396490 }, { "epoch": 0.8009550859132908, "grad_norm": 282.6078796386719, "learning_rate": 1.2494446518477022e-06, "loss": 11.174, "step": 396500 }, { "epoch": 0.8009752865459746, "grad_norm": 310.2064208984375, "learning_rate": 1.249213819599117e-06, "loss": 10.1416, "step": 396510 }, { "epoch": 0.8009954871786584, "grad_norm": 293.44207763671875, "learning_rate": 1.248983005631411e-06, "loss": 13.1855, "step": 396520 }, { "epoch": 0.8010156878113422, "grad_norm": 0.0, "learning_rate": 1.2487522099457044e-06, "loss": 15.6434, "step": 396530 }, { "epoch": 0.8010358884440261, "grad_norm": 368.7659606933594, "learning_rate": 1.248521432543125e-06, "loss": 28.468, "step": 396540 }, { "epoch": 0.8010560890767099, "grad_norm": 274.6934814453125, "learning_rate": 1.248290673424798e-06, "loss": 11.7101, "step": 396550 }, { "epoch": 0.8010762897093937, "grad_norm": 79.55166625976562, "learning_rate": 1.2480599325918474e-06, "loss": 9.478, "step": 396560 }, { "epoch": 0.8010964903420775, "grad_norm": 33.00163269042969, "learning_rate": 1.247829210045396e-06, "loss": 30.499, "step": 396570 }, { "epoch": 0.8011166909747613, "grad_norm": 488.34515380859375, "learning_rate": 1.2475985057865714e-06, "loss": 18.7176, "step": 396580 }, { "epoch": 0.8011368916074452, "grad_norm": 176.9521484375, "learning_rate": 1.2473678198164967e-06, "loss": 21.3368, "step": 396590 }, { "epoch": 0.801157092240129, "grad_norm": 148.75375366210938, "learning_rate": 1.2471371521362946e-06, "loss": 13.4292, "step": 396600 }, { "epoch": 0.8011772928728128, "grad_norm": 494.89862060546875, "learning_rate": 1.2469065027470923e-06, "loss": 20.2862, "step": 396610 }, { "epoch": 0.8011974935054966, "grad_norm": 0.0, "learning_rate": 1.246675871650011e-06, "loss": 12.8207, "step": 396620 }, { "epoch": 0.8012176941381804, "grad_norm": 251.51931762695312, "learning_rate": 1.2464452588461778e-06, "loss": 8.1844, "step": 396630 }, { "epoch": 0.8012378947708643, "grad_norm": 295.48291015625, "learning_rate": 1.2462146643367156e-06, "loss": 17.6521, "step": 396640 }, { "epoch": 0.8012580954035481, "grad_norm": 193.55917358398438, "learning_rate": 1.2459840881227458e-06, "loss": 20.316, "step": 396650 }, { "epoch": 0.8012782960362319, "grad_norm": 354.9112243652344, "learning_rate": 1.2457535302053957e-06, "loss": 12.9615, "step": 396660 }, { "epoch": 0.8012984966689156, "grad_norm": 302.4898376464844, "learning_rate": 1.2455229905857863e-06, "loss": 13.8066, "step": 396670 }, { "epoch": 0.8013186973015994, "grad_norm": 364.9088439941406, "learning_rate": 1.2452924692650443e-06, "loss": 19.5735, "step": 396680 }, { "epoch": 0.8013388979342833, "grad_norm": 112.06468200683594, "learning_rate": 1.2450619662442892e-06, "loss": 10.1105, "step": 396690 }, { "epoch": 0.8013590985669671, "grad_norm": 345.9883117675781, "learning_rate": 1.2448314815246487e-06, "loss": 13.7083, "step": 396700 }, { "epoch": 0.8013792991996509, "grad_norm": 307.82928466796875, "learning_rate": 1.244601015107244e-06, "loss": 20.735, "step": 396710 }, { "epoch": 0.8013994998323347, "grad_norm": 475.2358093261719, "learning_rate": 1.2443705669931966e-06, "loss": 12.0839, "step": 396720 }, { "epoch": 0.8014197004650185, "grad_norm": 2260.130859375, "learning_rate": 1.2441401371836337e-06, "loss": 53.7924, "step": 396730 }, { "epoch": 0.8014399010977024, "grad_norm": 272.14459228515625, "learning_rate": 1.2439097256796756e-06, "loss": 24.0442, "step": 396740 }, { "epoch": 0.8014601017303862, "grad_norm": 218.529052734375, "learning_rate": 1.2436793324824448e-06, "loss": 25.535, "step": 396750 }, { "epoch": 0.80148030236307, "grad_norm": 168.72279357910156, "learning_rate": 1.2434489575930652e-06, "loss": 8.8924, "step": 396760 }, { "epoch": 0.8015005029957538, "grad_norm": 244.35552978515625, "learning_rate": 1.2432186010126613e-06, "loss": 12.4625, "step": 396770 }, { "epoch": 0.8015207036284376, "grad_norm": 247.98825073242188, "learning_rate": 1.2429882627423545e-06, "loss": 20.8364, "step": 396780 }, { "epoch": 0.8015409042611215, "grad_norm": 304.4253845214844, "learning_rate": 1.2427579427832654e-06, "loss": 18.3105, "step": 396790 }, { "epoch": 0.8015611048938053, "grad_norm": 96.70458221435547, "learning_rate": 1.24252764113652e-06, "loss": 20.7823, "step": 396800 }, { "epoch": 0.8015813055264891, "grad_norm": 114.90772247314453, "learning_rate": 1.2422973578032394e-06, "loss": 14.8714, "step": 396810 }, { "epoch": 0.8016015061591729, "grad_norm": 134.18841552734375, "learning_rate": 1.2420670927845441e-06, "loss": 23.1468, "step": 396820 }, { "epoch": 0.8016217067918567, "grad_norm": 36.978824615478516, "learning_rate": 1.2418368460815578e-06, "loss": 13.8459, "step": 396830 }, { "epoch": 0.8016419074245406, "grad_norm": 435.2635192871094, "learning_rate": 1.2416066176954044e-06, "loss": 15.6602, "step": 396840 }, { "epoch": 0.8016621080572244, "grad_norm": 506.8882751464844, "learning_rate": 1.241376407627205e-06, "loss": 18.1595, "step": 396850 }, { "epoch": 0.8016823086899082, "grad_norm": 258.618408203125, "learning_rate": 1.2411462158780791e-06, "loss": 25.7771, "step": 396860 }, { "epoch": 0.801702509322592, "grad_norm": 198.40570068359375, "learning_rate": 1.2409160424491524e-06, "loss": 15.2096, "step": 396870 }, { "epoch": 0.8017227099552758, "grad_norm": 106.71107482910156, "learning_rate": 1.240685887341545e-06, "loss": 16.5103, "step": 396880 }, { "epoch": 0.8017429105879597, "grad_norm": 265.5809631347656, "learning_rate": 1.240455750556377e-06, "loss": 22.6536, "step": 396890 }, { "epoch": 0.8017631112206435, "grad_norm": 217.96302795410156, "learning_rate": 1.240225632094773e-06, "loss": 18.8894, "step": 396900 }, { "epoch": 0.8017833118533273, "grad_norm": 273.5916748046875, "learning_rate": 1.2399955319578521e-06, "loss": 26.3029, "step": 396910 }, { "epoch": 0.8018035124860111, "grad_norm": 199.21949768066406, "learning_rate": 1.2397654501467387e-06, "loss": 19.6072, "step": 396920 }, { "epoch": 0.8018237131186948, "grad_norm": 471.6419677734375, "learning_rate": 1.2395353866625521e-06, "loss": 13.9212, "step": 396930 }, { "epoch": 0.8018439137513786, "grad_norm": 115.33697509765625, "learning_rate": 1.2393053415064121e-06, "loss": 11.599, "step": 396940 }, { "epoch": 0.8018641143840625, "grad_norm": 71.505615234375, "learning_rate": 1.2390753146794438e-06, "loss": 17.6238, "step": 396950 }, { "epoch": 0.8018843150167463, "grad_norm": 28.35445213317871, "learning_rate": 1.2388453061827644e-06, "loss": 14.7451, "step": 396960 }, { "epoch": 0.8019045156494301, "grad_norm": 1615.241455078125, "learning_rate": 1.2386153160174986e-06, "loss": 23.657, "step": 396970 }, { "epoch": 0.8019247162821139, "grad_norm": 287.8320007324219, "learning_rate": 1.2383853441847638e-06, "loss": 26.5884, "step": 396980 }, { "epoch": 0.8019449169147977, "grad_norm": 424.38397216796875, "learning_rate": 1.2381553906856842e-06, "loss": 18.97, "step": 396990 }, { "epoch": 0.8019651175474816, "grad_norm": 150.43409729003906, "learning_rate": 1.2379254555213788e-06, "loss": 21.8474, "step": 397000 }, { "epoch": 0.8019853181801654, "grad_norm": 39.58400344848633, "learning_rate": 1.2376955386929673e-06, "loss": 10.4278, "step": 397010 }, { "epoch": 0.8020055188128492, "grad_norm": 187.23861694335938, "learning_rate": 1.2374656402015728e-06, "loss": 20.1658, "step": 397020 }, { "epoch": 0.802025719445533, "grad_norm": 439.4674377441406, "learning_rate": 1.2372357600483142e-06, "loss": 14.7039, "step": 397030 }, { "epoch": 0.8020459200782168, "grad_norm": 487.6591491699219, "learning_rate": 1.2370058982343109e-06, "loss": 25.8952, "step": 397040 }, { "epoch": 0.8020661207109007, "grad_norm": 159.7469024658203, "learning_rate": 1.2367760547606844e-06, "loss": 13.7368, "step": 397050 }, { "epoch": 0.8020863213435845, "grad_norm": 181.9182586669922, "learning_rate": 1.236546229628558e-06, "loss": 12.5134, "step": 397060 }, { "epoch": 0.8021065219762683, "grad_norm": 37.34762191772461, "learning_rate": 1.2363164228390456e-06, "loss": 19.3505, "step": 397070 }, { "epoch": 0.8021267226089521, "grad_norm": 175.5815887451172, "learning_rate": 1.236086634393271e-06, "loss": 6.5382, "step": 397080 }, { "epoch": 0.802146923241636, "grad_norm": 259.8550109863281, "learning_rate": 1.2358568642923546e-06, "loss": 12.6938, "step": 397090 }, { "epoch": 0.8021671238743198, "grad_norm": 202.00611877441406, "learning_rate": 1.2356271125374153e-06, "loss": 17.4895, "step": 397100 }, { "epoch": 0.8021873245070036, "grad_norm": 23.87159538269043, "learning_rate": 1.2353973791295715e-06, "loss": 16.5736, "step": 397110 }, { "epoch": 0.8022075251396874, "grad_norm": 385.1756591796875, "learning_rate": 1.2351676640699444e-06, "loss": 31.5913, "step": 397120 }, { "epoch": 0.8022277257723712, "grad_norm": 203.4720458984375, "learning_rate": 1.2349379673596568e-06, "loss": 8.2035, "step": 397130 }, { "epoch": 0.802247926405055, "grad_norm": 424.03607177734375, "learning_rate": 1.2347082889998214e-06, "loss": 16.6293, "step": 397140 }, { "epoch": 0.8022681270377389, "grad_norm": 422.792724609375, "learning_rate": 1.234478628991561e-06, "loss": 17.1787, "step": 397150 }, { "epoch": 0.8022883276704227, "grad_norm": 329.4071350097656, "learning_rate": 1.234248987335997e-06, "loss": 17.5761, "step": 397160 }, { "epoch": 0.8023085283031065, "grad_norm": 159.201904296875, "learning_rate": 1.234019364034247e-06, "loss": 14.4929, "step": 397170 }, { "epoch": 0.8023287289357902, "grad_norm": 361.199462890625, "learning_rate": 1.2337897590874275e-06, "loss": 29.3296, "step": 397180 }, { "epoch": 0.802348929568474, "grad_norm": 199.7323455810547, "learning_rate": 1.2335601724966617e-06, "loss": 13.6068, "step": 397190 }, { "epoch": 0.8023691302011579, "grad_norm": 365.9956359863281, "learning_rate": 1.2333306042630672e-06, "loss": 21.2169, "step": 397200 }, { "epoch": 0.8023893308338417, "grad_norm": 357.32781982421875, "learning_rate": 1.2331010543877608e-06, "loss": 15.2384, "step": 397210 }, { "epoch": 0.8024095314665255, "grad_norm": 207.35882568359375, "learning_rate": 1.232871522871864e-06, "loss": 23.0565, "step": 397220 }, { "epoch": 0.8024297320992093, "grad_norm": 94.63365173339844, "learning_rate": 1.2326420097164938e-06, "loss": 17.4889, "step": 397230 }, { "epoch": 0.8024499327318931, "grad_norm": 167.00592041015625, "learning_rate": 1.2324125149227705e-06, "loss": 20.2553, "step": 397240 }, { "epoch": 0.802470133364577, "grad_norm": 212.19175720214844, "learning_rate": 1.2321830384918116e-06, "loss": 19.1417, "step": 397250 }, { "epoch": 0.8024903339972608, "grad_norm": 61.97660827636719, "learning_rate": 1.2319535804247345e-06, "loss": 8.2125, "step": 397260 }, { "epoch": 0.8025105346299446, "grad_norm": 384.7194519042969, "learning_rate": 1.2317241407226598e-06, "loss": 26.5421, "step": 397270 }, { "epoch": 0.8025307352626284, "grad_norm": 196.4254150390625, "learning_rate": 1.2314947193867034e-06, "loss": 9.9733, "step": 397280 }, { "epoch": 0.8025509358953122, "grad_norm": 104.04097747802734, "learning_rate": 1.2312653164179861e-06, "loss": 10.4877, "step": 397290 }, { "epoch": 0.802571136527996, "grad_norm": 192.03292846679688, "learning_rate": 1.2310359318176229e-06, "loss": 11.8193, "step": 397300 }, { "epoch": 0.8025913371606799, "grad_norm": 129.7720184326172, "learning_rate": 1.2308065655867346e-06, "loss": 19.9434, "step": 397310 }, { "epoch": 0.8026115377933637, "grad_norm": 290.4443664550781, "learning_rate": 1.2305772177264385e-06, "loss": 9.9242, "step": 397320 }, { "epoch": 0.8026317384260475, "grad_norm": 232.30136108398438, "learning_rate": 1.2303478882378506e-06, "loss": 17.0259, "step": 397330 }, { "epoch": 0.8026519390587313, "grad_norm": 134.59349060058594, "learning_rate": 1.2301185771220907e-06, "loss": 35.992, "step": 397340 }, { "epoch": 0.8026721396914152, "grad_norm": 245.8950958251953, "learning_rate": 1.2298892843802756e-06, "loss": 15.7384, "step": 397350 }, { "epoch": 0.802692340324099, "grad_norm": 399.4751281738281, "learning_rate": 1.2296600100135219e-06, "loss": 28.9305, "step": 397360 }, { "epoch": 0.8027125409567828, "grad_norm": 270.90814208984375, "learning_rate": 1.2294307540229478e-06, "loss": 18.9427, "step": 397370 }, { "epoch": 0.8027327415894666, "grad_norm": 370.8442077636719, "learning_rate": 1.2292015164096726e-06, "loss": 15.6411, "step": 397380 }, { "epoch": 0.8027529422221504, "grad_norm": 183.45126342773438, "learning_rate": 1.2289722971748113e-06, "loss": 18.3014, "step": 397390 }, { "epoch": 0.8027731428548343, "grad_norm": 227.86546325683594, "learning_rate": 1.2287430963194807e-06, "loss": 19.5854, "step": 397400 }, { "epoch": 0.8027933434875181, "grad_norm": 211.9777374267578, "learning_rate": 1.2285139138448005e-06, "loss": 11.3569, "step": 397410 }, { "epoch": 0.8028135441202019, "grad_norm": 369.38079833984375, "learning_rate": 1.2282847497518857e-06, "loss": 19.0939, "step": 397420 }, { "epoch": 0.8028337447528857, "grad_norm": 282.4060363769531, "learning_rate": 1.2280556040418517e-06, "loss": 19.038, "step": 397430 }, { "epoch": 0.8028539453855694, "grad_norm": 26.148338317871094, "learning_rate": 1.2278264767158176e-06, "loss": 9.952, "step": 397440 }, { "epoch": 0.8028741460182532, "grad_norm": 265.15118408203125, "learning_rate": 1.2275973677749015e-06, "loss": 8.3944, "step": 397450 }, { "epoch": 0.8028943466509371, "grad_norm": 226.5189208984375, "learning_rate": 1.2273682772202183e-06, "loss": 16.502, "step": 397460 }, { "epoch": 0.8029145472836209, "grad_norm": 336.2112121582031, "learning_rate": 1.2271392050528825e-06, "loss": 24.1014, "step": 397470 }, { "epoch": 0.8029347479163047, "grad_norm": 386.9812927246094, "learning_rate": 1.2269101512740145e-06, "loss": 12.213, "step": 397480 }, { "epoch": 0.8029549485489885, "grad_norm": 301.6419372558594, "learning_rate": 1.2266811158847285e-06, "loss": 14.2627, "step": 397490 }, { "epoch": 0.8029751491816723, "grad_norm": 138.30197143554688, "learning_rate": 1.22645209888614e-06, "loss": 18.6229, "step": 397500 }, { "epoch": 0.8029953498143562, "grad_norm": 499.4685363769531, "learning_rate": 1.226223100279368e-06, "loss": 19.8944, "step": 397510 }, { "epoch": 0.80301555044704, "grad_norm": 477.4286193847656, "learning_rate": 1.2259941200655246e-06, "loss": 9.528, "step": 397520 }, { "epoch": 0.8030357510797238, "grad_norm": 332.3381042480469, "learning_rate": 1.2257651582457302e-06, "loss": 19.7095, "step": 397530 }, { "epoch": 0.8030559517124076, "grad_norm": 162.37991333007812, "learning_rate": 1.2255362148210987e-06, "loss": 17.2979, "step": 397540 }, { "epoch": 0.8030761523450914, "grad_norm": 208.4164581298828, "learning_rate": 1.2253072897927437e-06, "loss": 13.4284, "step": 397550 }, { "epoch": 0.8030963529777753, "grad_norm": 168.43919372558594, "learning_rate": 1.2250783831617852e-06, "loss": 10.5122, "step": 397560 }, { "epoch": 0.8031165536104591, "grad_norm": 138.19631958007812, "learning_rate": 1.2248494949293354e-06, "loss": 31.9011, "step": 397570 }, { "epoch": 0.8031367542431429, "grad_norm": 246.8783416748047, "learning_rate": 1.2246206250965127e-06, "loss": 23.5343, "step": 397580 }, { "epoch": 0.8031569548758267, "grad_norm": 188.40248107910156, "learning_rate": 1.2243917736644296e-06, "loss": 14.4395, "step": 397590 }, { "epoch": 0.8031771555085105, "grad_norm": 406.2428894042969, "learning_rate": 1.2241629406342048e-06, "loss": 32.2776, "step": 397600 }, { "epoch": 0.8031973561411944, "grad_norm": 254.3419952392578, "learning_rate": 1.2239341260069516e-06, "loss": 17.5596, "step": 397610 }, { "epoch": 0.8032175567738782, "grad_norm": 112.38162231445312, "learning_rate": 1.2237053297837841e-06, "loss": 19.2531, "step": 397620 }, { "epoch": 0.803237757406562, "grad_norm": 275.3819580078125, "learning_rate": 1.2234765519658204e-06, "loss": 16.7495, "step": 397630 }, { "epoch": 0.8032579580392458, "grad_norm": 408.72369384765625, "learning_rate": 1.2232477925541736e-06, "loss": 22.6012, "step": 397640 }, { "epoch": 0.8032781586719296, "grad_norm": 121.64466857910156, "learning_rate": 1.223019051549958e-06, "loss": 28.2308, "step": 397650 }, { "epoch": 0.8032983593046135, "grad_norm": 199.79891967773438, "learning_rate": 1.2227903289542892e-06, "loss": 12.6063, "step": 397660 }, { "epoch": 0.8033185599372973, "grad_norm": 283.36175537109375, "learning_rate": 1.2225616247682848e-06, "loss": 17.1782, "step": 397670 }, { "epoch": 0.8033387605699811, "grad_norm": 146.77545166015625, "learning_rate": 1.2223329389930544e-06, "loss": 14.2625, "step": 397680 }, { "epoch": 0.8033589612026649, "grad_norm": 81.38027954101562, "learning_rate": 1.2221042716297148e-06, "loss": 9.9055, "step": 397690 }, { "epoch": 0.8033791618353486, "grad_norm": 627.0073852539062, "learning_rate": 1.2218756226793827e-06, "loss": 14.7687, "step": 397700 }, { "epoch": 0.8033993624680325, "grad_norm": 233.77366638183594, "learning_rate": 1.22164699214317e-06, "loss": 15.2893, "step": 397710 }, { "epoch": 0.8034195631007163, "grad_norm": 346.3612365722656, "learning_rate": 1.2214183800221906e-06, "loss": 14.7574, "step": 397720 }, { "epoch": 0.8034397637334001, "grad_norm": 191.14744567871094, "learning_rate": 1.2211897863175597e-06, "loss": 21.788, "step": 397730 }, { "epoch": 0.8034599643660839, "grad_norm": 248.18954467773438, "learning_rate": 1.2209612110303941e-06, "loss": 19.429, "step": 397740 }, { "epoch": 0.8034801649987677, "grad_norm": 0.0, "learning_rate": 1.2207326541618024e-06, "loss": 16.934, "step": 397750 }, { "epoch": 0.8035003656314516, "grad_norm": 367.9290466308594, "learning_rate": 1.2205041157129017e-06, "loss": 15.3372, "step": 397760 }, { "epoch": 0.8035205662641354, "grad_norm": 262.3650817871094, "learning_rate": 1.2202755956848067e-06, "loss": 14.9691, "step": 397770 }, { "epoch": 0.8035407668968192, "grad_norm": 311.2469787597656, "learning_rate": 1.2200470940786302e-06, "loss": 13.8611, "step": 397780 }, { "epoch": 0.803560967529503, "grad_norm": 200.50157165527344, "learning_rate": 1.219818610895484e-06, "loss": 16.0039, "step": 397790 }, { "epoch": 0.8035811681621868, "grad_norm": 383.5213928222656, "learning_rate": 1.2195901461364851e-06, "loss": 11.0469, "step": 397800 }, { "epoch": 0.8036013687948707, "grad_norm": 124.4923095703125, "learning_rate": 1.2193616998027452e-06, "loss": 14.8801, "step": 397810 }, { "epoch": 0.8036215694275545, "grad_norm": 73.41307830810547, "learning_rate": 1.2191332718953763e-06, "loss": 14.8167, "step": 397820 }, { "epoch": 0.8036417700602383, "grad_norm": 515.7150268554688, "learning_rate": 1.2189048624154948e-06, "loss": 20.4934, "step": 397830 }, { "epoch": 0.8036619706929221, "grad_norm": 512.2528686523438, "learning_rate": 1.2186764713642108e-06, "loss": 20.3204, "step": 397840 }, { "epoch": 0.8036821713256059, "grad_norm": 179.5736846923828, "learning_rate": 1.218448098742641e-06, "loss": 13.0528, "step": 397850 }, { "epoch": 0.8037023719582898, "grad_norm": 97.36751556396484, "learning_rate": 1.2182197445518946e-06, "loss": 14.3089, "step": 397860 }, { "epoch": 0.8037225725909736, "grad_norm": 140.2680206298828, "learning_rate": 1.2179914087930884e-06, "loss": 15.3194, "step": 397870 }, { "epoch": 0.8037427732236574, "grad_norm": 201.3263397216797, "learning_rate": 1.2177630914673327e-06, "loss": 15.8953, "step": 397880 }, { "epoch": 0.8037629738563412, "grad_norm": 468.73779296875, "learning_rate": 1.2175347925757397e-06, "loss": 44.5259, "step": 397890 }, { "epoch": 0.803783174489025, "grad_norm": 467.7637939453125, "learning_rate": 1.217306512119425e-06, "loss": 19.8756, "step": 397900 }, { "epoch": 0.8038033751217089, "grad_norm": 319.2969055175781, "learning_rate": 1.2170782500994983e-06, "loss": 27.4442, "step": 397910 }, { "epoch": 0.8038235757543927, "grad_norm": 124.95311737060547, "learning_rate": 1.2168500065170747e-06, "loss": 12.8769, "step": 397920 }, { "epoch": 0.8038437763870765, "grad_norm": 68.92601013183594, "learning_rate": 1.216621781373265e-06, "loss": 25.3759, "step": 397930 }, { "epoch": 0.8038639770197603, "grad_norm": 272.4542236328125, "learning_rate": 1.2163935746691807e-06, "loss": 18.0919, "step": 397940 }, { "epoch": 0.803884177652444, "grad_norm": 182.25563049316406, "learning_rate": 1.216165386405937e-06, "loss": 17.3764, "step": 397950 }, { "epoch": 0.8039043782851278, "grad_norm": 339.94793701171875, "learning_rate": 1.215937216584644e-06, "loss": 19.5213, "step": 397960 }, { "epoch": 0.8039245789178117, "grad_norm": 294.8418273925781, "learning_rate": 1.2157090652064124e-06, "loss": 21.7376, "step": 397970 }, { "epoch": 0.8039447795504955, "grad_norm": 29.99044418334961, "learning_rate": 1.215480932272356e-06, "loss": 10.2422, "step": 397980 }, { "epoch": 0.8039649801831793, "grad_norm": 225.97483825683594, "learning_rate": 1.2152528177835892e-06, "loss": 17.9698, "step": 397990 }, { "epoch": 0.8039851808158631, "grad_norm": 269.0800476074219, "learning_rate": 1.2150247217412186e-06, "loss": 11.3422, "step": 398000 }, { "epoch": 0.8040053814485469, "grad_norm": 93.34442138671875, "learning_rate": 1.2147966441463583e-06, "loss": 18.3126, "step": 398010 }, { "epoch": 0.8040255820812308, "grad_norm": 280.77978515625, "learning_rate": 1.2145685850001216e-06, "loss": 16.3848, "step": 398020 }, { "epoch": 0.8040457827139146, "grad_norm": 205.15957641601562, "learning_rate": 1.2143405443036182e-06, "loss": 15.7262, "step": 398030 }, { "epoch": 0.8040659833465984, "grad_norm": 256.5995178222656, "learning_rate": 1.2141125220579585e-06, "loss": 9.4315, "step": 398040 }, { "epoch": 0.8040861839792822, "grad_norm": 296.205078125, "learning_rate": 1.2138845182642555e-06, "loss": 20.1539, "step": 398050 }, { "epoch": 0.804106384611966, "grad_norm": 271.0738220214844, "learning_rate": 1.2136565329236217e-06, "loss": 14.3203, "step": 398060 }, { "epoch": 0.8041265852446499, "grad_norm": 168.38685607910156, "learning_rate": 1.2134285660371665e-06, "loss": 6.4825, "step": 398070 }, { "epoch": 0.8041467858773337, "grad_norm": 327.8859558105469, "learning_rate": 1.2132006176059997e-06, "loss": 20.4153, "step": 398080 }, { "epoch": 0.8041669865100175, "grad_norm": 120.73685455322266, "learning_rate": 1.2129726876312348e-06, "loss": 23.7981, "step": 398090 }, { "epoch": 0.8041871871427013, "grad_norm": 309.8375244140625, "learning_rate": 1.2127447761139821e-06, "loss": 15.3186, "step": 398100 }, { "epoch": 0.8042073877753851, "grad_norm": 269.47454833984375, "learning_rate": 1.2125168830553508e-06, "loss": 23.725, "step": 398110 }, { "epoch": 0.804227588408069, "grad_norm": 204.92672729492188, "learning_rate": 1.2122890084564542e-06, "loss": 14.0846, "step": 398120 }, { "epoch": 0.8042477890407528, "grad_norm": 0.0, "learning_rate": 1.2120611523184e-06, "loss": 14.8464, "step": 398130 }, { "epoch": 0.8042679896734366, "grad_norm": 262.072265625, "learning_rate": 1.2118333146423016e-06, "loss": 9.6489, "step": 398140 }, { "epoch": 0.8042881903061204, "grad_norm": 500.7174377441406, "learning_rate": 1.2116054954292688e-06, "loss": 27.9193, "step": 398150 }, { "epoch": 0.8043083909388042, "grad_norm": 110.99785614013672, "learning_rate": 1.2113776946804096e-06, "loss": 29.2869, "step": 398160 }, { "epoch": 0.8043285915714881, "grad_norm": 350.0794982910156, "learning_rate": 1.2111499123968374e-06, "loss": 14.8599, "step": 398170 }, { "epoch": 0.8043487922041719, "grad_norm": 150.34840393066406, "learning_rate": 1.2109221485796592e-06, "loss": 23.5857, "step": 398180 }, { "epoch": 0.8043689928368557, "grad_norm": 395.3791809082031, "learning_rate": 1.210694403229989e-06, "loss": 14.2273, "step": 398190 }, { "epoch": 0.8043891934695395, "grad_norm": 260.2132873535156, "learning_rate": 1.2104666763489326e-06, "loss": 12.3502, "step": 398200 }, { "epoch": 0.8044093941022232, "grad_norm": 188.606689453125, "learning_rate": 1.2102389679376037e-06, "loss": 19.4778, "step": 398210 }, { "epoch": 0.804429594734907, "grad_norm": 163.82281494140625, "learning_rate": 1.2100112779971107e-06, "loss": 16.9969, "step": 398220 }, { "epoch": 0.8044497953675909, "grad_norm": 277.0400695800781, "learning_rate": 1.2097836065285611e-06, "loss": 13.4515, "step": 398230 }, { "epoch": 0.8044699960002747, "grad_norm": 38.3205680847168, "learning_rate": 1.2095559535330681e-06, "loss": 5.4382, "step": 398240 }, { "epoch": 0.8044901966329585, "grad_norm": 16.85127067565918, "learning_rate": 1.20932831901174e-06, "loss": 13.8325, "step": 398250 }, { "epoch": 0.8045103972656423, "grad_norm": 320.6097106933594, "learning_rate": 1.2091007029656843e-06, "loss": 13.382, "step": 398260 }, { "epoch": 0.8045305978983261, "grad_norm": 302.9075622558594, "learning_rate": 1.2088731053960118e-06, "loss": 14.7996, "step": 398270 }, { "epoch": 0.80455079853101, "grad_norm": 336.9842224121094, "learning_rate": 1.2086455263038349e-06, "loss": 10.9834, "step": 398280 }, { "epoch": 0.8045709991636938, "grad_norm": 168.31488037109375, "learning_rate": 1.2084179656902573e-06, "loss": 20.9288, "step": 398290 }, { "epoch": 0.8045911997963776, "grad_norm": 262.40802001953125, "learning_rate": 1.2081904235563908e-06, "loss": 15.6244, "step": 398300 }, { "epoch": 0.8046114004290614, "grad_norm": 165.61788940429688, "learning_rate": 1.2079628999033449e-06, "loss": 23.3246, "step": 398310 }, { "epoch": 0.8046316010617452, "grad_norm": 681.7090454101562, "learning_rate": 1.2077353947322284e-06, "loss": 19.8467, "step": 398320 }, { "epoch": 0.8046518016944291, "grad_norm": 295.4180908203125, "learning_rate": 1.2075079080441482e-06, "loss": 13.1826, "step": 398330 }, { "epoch": 0.8046720023271129, "grad_norm": 117.75762176513672, "learning_rate": 1.207280439840215e-06, "loss": 9.0372, "step": 398340 }, { "epoch": 0.8046922029597967, "grad_norm": 139.77545166015625, "learning_rate": 1.2070529901215388e-06, "loss": 19.1832, "step": 398350 }, { "epoch": 0.8047124035924805, "grad_norm": 307.9924011230469, "learning_rate": 1.206825558889224e-06, "loss": 22.9239, "step": 398360 }, { "epoch": 0.8047326042251643, "grad_norm": 279.33001708984375, "learning_rate": 1.2065981461443815e-06, "loss": 12.1329, "step": 398370 }, { "epoch": 0.8047528048578482, "grad_norm": 406.6632385253906, "learning_rate": 1.2063707518881207e-06, "loss": 13.8407, "step": 398380 }, { "epoch": 0.804773005490532, "grad_norm": 333.20526123046875, "learning_rate": 1.206143376121549e-06, "loss": 30.6319, "step": 398390 }, { "epoch": 0.8047932061232158, "grad_norm": 235.8384552001953, "learning_rate": 1.2059160188457724e-06, "loss": 9.427, "step": 398400 }, { "epoch": 0.8048134067558996, "grad_norm": 465.1859130859375, "learning_rate": 1.2056886800619028e-06, "loss": 21.8705, "step": 398410 }, { "epoch": 0.8048336073885834, "grad_norm": 618.5145263671875, "learning_rate": 1.2054613597710463e-06, "loss": 16.6557, "step": 398420 }, { "epoch": 0.8048538080212673, "grad_norm": 206.8692626953125, "learning_rate": 1.2052340579743093e-06, "loss": 9.2732, "step": 398430 }, { "epoch": 0.8048740086539511, "grad_norm": 105.92778015136719, "learning_rate": 1.2050067746728033e-06, "loss": 15.8629, "step": 398440 }, { "epoch": 0.8048942092866349, "grad_norm": 625.5086059570312, "learning_rate": 1.2047795098676317e-06, "loss": 16.3607, "step": 398450 }, { "epoch": 0.8049144099193186, "grad_norm": 439.8845520019531, "learning_rate": 1.2045522635599066e-06, "loss": 19.4782, "step": 398460 }, { "epoch": 0.8049346105520024, "grad_norm": 95.932373046875, "learning_rate": 1.204325035750732e-06, "loss": 15.7428, "step": 398470 }, { "epoch": 0.8049548111846863, "grad_norm": 118.7497329711914, "learning_rate": 1.204097826441218e-06, "loss": 17.9142, "step": 398480 }, { "epoch": 0.8049750118173701, "grad_norm": 257.6410827636719, "learning_rate": 1.2038706356324703e-06, "loss": 19.8022, "step": 398490 }, { "epoch": 0.8049952124500539, "grad_norm": 179.03211975097656, "learning_rate": 1.203643463325596e-06, "loss": 10.5498, "step": 398500 }, { "epoch": 0.8050154130827377, "grad_norm": 154.95106506347656, "learning_rate": 1.2034163095217045e-06, "loss": 15.0052, "step": 398510 }, { "epoch": 0.8050356137154215, "grad_norm": 286.7881164550781, "learning_rate": 1.2031891742218992e-06, "loss": 20.9348, "step": 398520 }, { "epoch": 0.8050558143481054, "grad_norm": 264.2137756347656, "learning_rate": 1.2029620574272916e-06, "loss": 17.7529, "step": 398530 }, { "epoch": 0.8050760149807892, "grad_norm": 72.37132263183594, "learning_rate": 1.2027349591389858e-06, "loss": 10.622, "step": 398540 }, { "epoch": 0.805096215613473, "grad_norm": 222.1959228515625, "learning_rate": 1.2025078793580885e-06, "loss": 16.3596, "step": 398550 }, { "epoch": 0.8051164162461568, "grad_norm": 208.58372497558594, "learning_rate": 1.202280818085708e-06, "loss": 12.9192, "step": 398560 }, { "epoch": 0.8051366168788406, "grad_norm": 110.48745727539062, "learning_rate": 1.2020537753229506e-06, "loss": 26.4348, "step": 398570 }, { "epoch": 0.8051568175115245, "grad_norm": 200.78091430664062, "learning_rate": 1.2018267510709208e-06, "loss": 14.0025, "step": 398580 }, { "epoch": 0.8051770181442083, "grad_norm": 165.2290802001953, "learning_rate": 1.201599745330727e-06, "loss": 15.6407, "step": 398590 }, { "epoch": 0.8051972187768921, "grad_norm": 218.70245361328125, "learning_rate": 1.2013727581034783e-06, "loss": 14.105, "step": 398600 }, { "epoch": 0.8052174194095759, "grad_norm": 203.8513641357422, "learning_rate": 1.201145789390275e-06, "loss": 15.6538, "step": 398610 }, { "epoch": 0.8052376200422597, "grad_norm": 72.12773132324219, "learning_rate": 1.2009188391922261e-06, "loss": 15.0556, "step": 398620 }, { "epoch": 0.8052578206749436, "grad_norm": 407.7510986328125, "learning_rate": 1.2006919075104396e-06, "loss": 19.1825, "step": 398630 }, { "epoch": 0.8052780213076274, "grad_norm": 45.817867279052734, "learning_rate": 1.20046499434602e-06, "loss": 23.2415, "step": 398640 }, { "epoch": 0.8052982219403112, "grad_norm": 414.6622009277344, "learning_rate": 1.2002380997000717e-06, "loss": 25.9781, "step": 398650 }, { "epoch": 0.805318422572995, "grad_norm": 387.9441833496094, "learning_rate": 1.200011223573702e-06, "loss": 14.8246, "step": 398660 }, { "epoch": 0.8053386232056788, "grad_norm": 588.7049560546875, "learning_rate": 1.1997843659680202e-06, "loss": 22.3775, "step": 398670 }, { "epoch": 0.8053588238383627, "grad_norm": 95.54832458496094, "learning_rate": 1.1995575268841254e-06, "loss": 22.8317, "step": 398680 }, { "epoch": 0.8053790244710465, "grad_norm": 240.92782592773438, "learning_rate": 1.1993307063231258e-06, "loss": 19.4986, "step": 398690 }, { "epoch": 0.8053992251037303, "grad_norm": 1210.0072021484375, "learning_rate": 1.199103904286129e-06, "loss": 28.1644, "step": 398700 }, { "epoch": 0.8054194257364141, "grad_norm": 291.378662109375, "learning_rate": 1.1988771207742388e-06, "loss": 22.2814, "step": 398710 }, { "epoch": 0.8054396263690978, "grad_norm": 0.0, "learning_rate": 1.1986503557885587e-06, "loss": 19.72, "step": 398720 }, { "epoch": 0.8054598270017816, "grad_norm": 166.5487518310547, "learning_rate": 1.1984236093301976e-06, "loss": 19.7955, "step": 398730 }, { "epoch": 0.8054800276344655, "grad_norm": 182.96737670898438, "learning_rate": 1.1981968814002576e-06, "loss": 15.899, "step": 398740 }, { "epoch": 0.8055002282671493, "grad_norm": 358.73419189453125, "learning_rate": 1.1979701719998454e-06, "loss": 17.2248, "step": 398750 }, { "epoch": 0.8055204288998331, "grad_norm": 165.03488159179688, "learning_rate": 1.1977434811300664e-06, "loss": 15.2493, "step": 398760 }, { "epoch": 0.8055406295325169, "grad_norm": 55.36629104614258, "learning_rate": 1.1975168087920226e-06, "loss": 11.9144, "step": 398770 }, { "epoch": 0.8055608301652007, "grad_norm": 458.8868408203125, "learning_rate": 1.1972901549868222e-06, "loss": 15.7532, "step": 398780 }, { "epoch": 0.8055810307978846, "grad_norm": 37.14472961425781, "learning_rate": 1.1970635197155671e-06, "loss": 24.2755, "step": 398790 }, { "epoch": 0.8056012314305684, "grad_norm": 58.913291931152344, "learning_rate": 1.1968369029793642e-06, "loss": 22.3952, "step": 398800 }, { "epoch": 0.8056214320632522, "grad_norm": 531.3046875, "learning_rate": 1.1966103047793158e-06, "loss": 23.0386, "step": 398810 }, { "epoch": 0.805641632695936, "grad_norm": 154.9938201904297, "learning_rate": 1.196383725116529e-06, "loss": 22.3197, "step": 398820 }, { "epoch": 0.8056618333286198, "grad_norm": 232.80345153808594, "learning_rate": 1.1961571639921066e-06, "loss": 16.7718, "step": 398830 }, { "epoch": 0.8056820339613037, "grad_norm": 365.3929138183594, "learning_rate": 1.1959306214071508e-06, "loss": 30.4858, "step": 398840 }, { "epoch": 0.8057022345939875, "grad_norm": 275.28375244140625, "learning_rate": 1.1957040973627698e-06, "loss": 24.8302, "step": 398850 }, { "epoch": 0.8057224352266713, "grad_norm": 154.83529663085938, "learning_rate": 1.1954775918600658e-06, "loss": 16.1042, "step": 398860 }, { "epoch": 0.8057426358593551, "grad_norm": 344.1389465332031, "learning_rate": 1.1952511049001407e-06, "loss": 20.4838, "step": 398870 }, { "epoch": 0.805762836492039, "grad_norm": 566.6088256835938, "learning_rate": 1.1950246364841005e-06, "loss": 14.2111, "step": 398880 }, { "epoch": 0.8057830371247228, "grad_norm": 228.32220458984375, "learning_rate": 1.1947981866130515e-06, "loss": 8.6079, "step": 398890 }, { "epoch": 0.8058032377574066, "grad_norm": 303.71197509765625, "learning_rate": 1.1945717552880919e-06, "loss": 12.8364, "step": 398900 }, { "epoch": 0.8058234383900904, "grad_norm": 339.55267333984375, "learning_rate": 1.194345342510328e-06, "loss": 21.0118, "step": 398910 }, { "epoch": 0.8058436390227742, "grad_norm": 154.4351043701172, "learning_rate": 1.1941189482808645e-06, "loss": 12.8826, "step": 398920 }, { "epoch": 0.805863839655458, "grad_norm": 435.3111572265625, "learning_rate": 1.193892572600804e-06, "loss": 20.2506, "step": 398930 }, { "epoch": 0.8058840402881419, "grad_norm": 167.71466064453125, "learning_rate": 1.1936662154712475e-06, "loss": 10.3547, "step": 398940 }, { "epoch": 0.8059042409208257, "grad_norm": 375.5905456542969, "learning_rate": 1.193439876893301e-06, "loss": 9.9191, "step": 398950 }, { "epoch": 0.8059244415535095, "grad_norm": 293.1602478027344, "learning_rate": 1.1932135568680691e-06, "loss": 23.1765, "step": 398960 }, { "epoch": 0.8059446421861932, "grad_norm": 250.81666564941406, "learning_rate": 1.1929872553966497e-06, "loss": 13.2531, "step": 398970 }, { "epoch": 0.805964842818877, "grad_norm": 500.49951171875, "learning_rate": 1.1927609724801492e-06, "loss": 24.7115, "step": 398980 }, { "epoch": 0.8059850434515609, "grad_norm": 379.9617919921875, "learning_rate": 1.1925347081196709e-06, "loss": 13.52, "step": 398990 }, { "epoch": 0.8060052440842447, "grad_norm": 354.4438781738281, "learning_rate": 1.1923084623163172e-06, "loss": 14.7162, "step": 399000 }, { "epoch": 0.8060254447169285, "grad_norm": 82.84439086914062, "learning_rate": 1.192082235071188e-06, "loss": 14.2675, "step": 399010 }, { "epoch": 0.8060456453496123, "grad_norm": 707.4915161132812, "learning_rate": 1.1918560263853902e-06, "loss": 20.2562, "step": 399020 }, { "epoch": 0.8060658459822961, "grad_norm": 22.837814331054688, "learning_rate": 1.1916298362600243e-06, "loss": 16.474, "step": 399030 }, { "epoch": 0.80608604661498, "grad_norm": 270.079833984375, "learning_rate": 1.1914036646961907e-06, "loss": 17.6735, "step": 399040 }, { "epoch": 0.8061062472476638, "grad_norm": 9.748725891113281, "learning_rate": 1.1911775116949958e-06, "loss": 9.9788, "step": 399050 }, { "epoch": 0.8061264478803476, "grad_norm": 297.4463195800781, "learning_rate": 1.1909513772575383e-06, "loss": 14.2409, "step": 399060 }, { "epoch": 0.8061466485130314, "grad_norm": 114.02792358398438, "learning_rate": 1.1907252613849224e-06, "loss": 16.0235, "step": 399070 }, { "epoch": 0.8061668491457152, "grad_norm": 149.23251342773438, "learning_rate": 1.1904991640782487e-06, "loss": 20.0388, "step": 399080 }, { "epoch": 0.8061870497783991, "grad_norm": 233.6759033203125, "learning_rate": 1.190273085338622e-06, "loss": 15.1754, "step": 399090 }, { "epoch": 0.8062072504110829, "grad_norm": 280.7804870605469, "learning_rate": 1.1900470251671415e-06, "loss": 19.4538, "step": 399100 }, { "epoch": 0.8062274510437667, "grad_norm": 224.2080535888672, "learning_rate": 1.1898209835649083e-06, "loss": 12.5753, "step": 399110 }, { "epoch": 0.8062476516764505, "grad_norm": 63.96528625488281, "learning_rate": 1.189594960533027e-06, "loss": 13.236, "step": 399120 }, { "epoch": 0.8062678523091343, "grad_norm": 199.7959442138672, "learning_rate": 1.1893689560725963e-06, "loss": 9.7111, "step": 399130 }, { "epoch": 0.8062880529418182, "grad_norm": 71.29649353027344, "learning_rate": 1.1891429701847207e-06, "loss": 18.5232, "step": 399140 }, { "epoch": 0.806308253574502, "grad_norm": 271.61907958984375, "learning_rate": 1.1889170028705e-06, "loss": 23.1018, "step": 399150 }, { "epoch": 0.8063284542071858, "grad_norm": 657.7700805664062, "learning_rate": 1.1886910541310342e-06, "loss": 20.7727, "step": 399160 }, { "epoch": 0.8063486548398696, "grad_norm": 294.4358215332031, "learning_rate": 1.1884651239674272e-06, "loss": 24.4626, "step": 399170 }, { "epoch": 0.8063688554725534, "grad_norm": 335.00445556640625, "learning_rate": 1.188239212380779e-06, "loss": 18.1886, "step": 399180 }, { "epoch": 0.8063890561052373, "grad_norm": 410.5850524902344, "learning_rate": 1.1880133193721893e-06, "loss": 8.6333, "step": 399190 }, { "epoch": 0.8064092567379211, "grad_norm": 192.91400146484375, "learning_rate": 1.18778744494276e-06, "loss": 12.3074, "step": 399200 }, { "epoch": 0.8064294573706049, "grad_norm": 232.36941528320312, "learning_rate": 1.1875615890935954e-06, "loss": 18.4141, "step": 399210 }, { "epoch": 0.8064496580032887, "grad_norm": 139.28353881835938, "learning_rate": 1.1873357518257905e-06, "loss": 5.1217, "step": 399220 }, { "epoch": 0.8064698586359724, "grad_norm": 59.892879486083984, "learning_rate": 1.187109933140449e-06, "loss": 5.9729, "step": 399230 }, { "epoch": 0.8064900592686562, "grad_norm": 57.688533782958984, "learning_rate": 1.186884133038672e-06, "loss": 14.5395, "step": 399240 }, { "epoch": 0.8065102599013401, "grad_norm": 126.62397003173828, "learning_rate": 1.1866583515215597e-06, "loss": 15.7094, "step": 399250 }, { "epoch": 0.8065304605340239, "grad_norm": 531.3666381835938, "learning_rate": 1.18643258859021e-06, "loss": 19.1478, "step": 399260 }, { "epoch": 0.8065506611667077, "grad_norm": 253.88018798828125, "learning_rate": 1.1862068442457264e-06, "loss": 8.5033, "step": 399270 }, { "epoch": 0.8065708617993915, "grad_norm": 97.62865447998047, "learning_rate": 1.18598111848921e-06, "loss": 9.1533, "step": 399280 }, { "epoch": 0.8065910624320753, "grad_norm": 156.11648559570312, "learning_rate": 1.1857554113217568e-06, "loss": 22.9467, "step": 399290 }, { "epoch": 0.8066112630647592, "grad_norm": 8.152817726135254, "learning_rate": 1.185529722744469e-06, "loss": 15.5946, "step": 399300 }, { "epoch": 0.806631463697443, "grad_norm": 172.2769317626953, "learning_rate": 1.1853040527584475e-06, "loss": 13.9644, "step": 399310 }, { "epoch": 0.8066516643301268, "grad_norm": 69.96134185791016, "learning_rate": 1.185078401364792e-06, "loss": 17.1053, "step": 399320 }, { "epoch": 0.8066718649628106, "grad_norm": 300.71112060546875, "learning_rate": 1.1848527685646e-06, "loss": 13.1987, "step": 399330 }, { "epoch": 0.8066920655954944, "grad_norm": 150.75022888183594, "learning_rate": 1.1846271543589743e-06, "loss": 19.4473, "step": 399340 }, { "epoch": 0.8067122662281783, "grad_norm": 524.636962890625, "learning_rate": 1.1844015587490138e-06, "loss": 21.6838, "step": 399350 }, { "epoch": 0.8067324668608621, "grad_norm": 423.8528747558594, "learning_rate": 1.184175981735815e-06, "loss": 16.9267, "step": 399360 }, { "epoch": 0.8067526674935459, "grad_norm": 128.0084228515625, "learning_rate": 1.18395042332048e-06, "loss": 26.9176, "step": 399370 }, { "epoch": 0.8067728681262297, "grad_norm": 247.2524871826172, "learning_rate": 1.1837248835041093e-06, "loss": 14.1438, "step": 399380 }, { "epoch": 0.8067930687589135, "grad_norm": 561.0353393554688, "learning_rate": 1.1834993622878004e-06, "loss": 20.8819, "step": 399390 }, { "epoch": 0.8068132693915974, "grad_norm": 205.8802490234375, "learning_rate": 1.1832738596726518e-06, "loss": 20.3445, "step": 399400 }, { "epoch": 0.8068334700242812, "grad_norm": 169.2614288330078, "learning_rate": 1.1830483756597643e-06, "loss": 24.7525, "step": 399410 }, { "epoch": 0.806853670656965, "grad_norm": 180.25257873535156, "learning_rate": 1.1828229102502364e-06, "loss": 12.863, "step": 399420 }, { "epoch": 0.8068738712896488, "grad_norm": 13.585275650024414, "learning_rate": 1.1825974634451653e-06, "loss": 22.3966, "step": 399430 }, { "epoch": 0.8068940719223326, "grad_norm": 0.0, "learning_rate": 1.1823720352456525e-06, "loss": 12.7418, "step": 399440 }, { "epoch": 0.8069142725550165, "grad_norm": 261.3910827636719, "learning_rate": 1.1821466256527942e-06, "loss": 10.1638, "step": 399450 }, { "epoch": 0.8069344731877003, "grad_norm": 255.38241577148438, "learning_rate": 1.181921234667691e-06, "loss": 7.7034, "step": 399460 }, { "epoch": 0.8069546738203841, "grad_norm": 173.05081176757812, "learning_rate": 1.181695862291441e-06, "loss": 23.1084, "step": 399470 }, { "epoch": 0.8069748744530679, "grad_norm": 271.1430969238281, "learning_rate": 1.181470508525141e-06, "loss": 15.6248, "step": 399480 }, { "epoch": 0.8069950750857516, "grad_norm": 5.432796001434326, "learning_rate": 1.1812451733698905e-06, "loss": 16.3593, "step": 399490 }, { "epoch": 0.8070152757184355, "grad_norm": 277.3341369628906, "learning_rate": 1.1810198568267906e-06, "loss": 12.1287, "step": 399500 }, { "epoch": 0.8070354763511193, "grad_norm": 310.2237854003906, "learning_rate": 1.180794558896934e-06, "loss": 15.356, "step": 399510 }, { "epoch": 0.8070556769838031, "grad_norm": 1418.8232421875, "learning_rate": 1.180569279581421e-06, "loss": 27.3136, "step": 399520 }, { "epoch": 0.8070758776164869, "grad_norm": 287.5715026855469, "learning_rate": 1.1803440188813526e-06, "loss": 24.7846, "step": 399530 }, { "epoch": 0.8070960782491707, "grad_norm": 363.9814453125, "learning_rate": 1.1801187767978234e-06, "loss": 27.4829, "step": 399540 }, { "epoch": 0.8071162788818546, "grad_norm": 23.05392837524414, "learning_rate": 1.1798935533319305e-06, "loss": 9.2632, "step": 399550 }, { "epoch": 0.8071364795145384, "grad_norm": 385.9380187988281, "learning_rate": 1.1796683484847731e-06, "loss": 20.6488, "step": 399560 }, { "epoch": 0.8071566801472222, "grad_norm": 77.75495147705078, "learning_rate": 1.179443162257452e-06, "loss": 16.8697, "step": 399570 }, { "epoch": 0.807176880779906, "grad_norm": 317.1875305175781, "learning_rate": 1.179217994651059e-06, "loss": 12.0378, "step": 399580 }, { "epoch": 0.8071970814125898, "grad_norm": 193.00437927246094, "learning_rate": 1.1789928456666933e-06, "loss": 23.4707, "step": 399590 }, { "epoch": 0.8072172820452737, "grad_norm": 300.4735412597656, "learning_rate": 1.178767715305455e-06, "loss": 21.1703, "step": 399600 }, { "epoch": 0.8072374826779575, "grad_norm": 53.255409240722656, "learning_rate": 1.1785426035684395e-06, "loss": 30.742, "step": 399610 }, { "epoch": 0.8072576833106413, "grad_norm": 375.5081481933594, "learning_rate": 1.1783175104567418e-06, "loss": 21.0163, "step": 399620 }, { "epoch": 0.8072778839433251, "grad_norm": 239.88232421875, "learning_rate": 1.178092435971463e-06, "loss": 15.2669, "step": 399630 }, { "epoch": 0.8072980845760089, "grad_norm": 308.1907958984375, "learning_rate": 1.177867380113698e-06, "loss": 19.8267, "step": 399640 }, { "epoch": 0.8073182852086928, "grad_norm": 372.1334533691406, "learning_rate": 1.1776423428845423e-06, "loss": 20.8164, "step": 399650 }, { "epoch": 0.8073384858413766, "grad_norm": 393.211181640625, "learning_rate": 1.1774173242850955e-06, "loss": 17.4059, "step": 399660 }, { "epoch": 0.8073586864740604, "grad_norm": 280.3493347167969, "learning_rate": 1.1771923243164518e-06, "loss": 7.2661, "step": 399670 }, { "epoch": 0.8073788871067442, "grad_norm": 341.0635986328125, "learning_rate": 1.1769673429797107e-06, "loss": 21.5844, "step": 399680 }, { "epoch": 0.807399087739428, "grad_norm": 111.39185333251953, "learning_rate": 1.1767423802759653e-06, "loss": 11.8772, "step": 399690 }, { "epoch": 0.8074192883721119, "grad_norm": 344.21966552734375, "learning_rate": 1.1765174362063152e-06, "loss": 8.3405, "step": 399700 }, { "epoch": 0.8074394890047957, "grad_norm": 71.20906066894531, "learning_rate": 1.1762925107718558e-06, "loss": 21.3676, "step": 399710 }, { "epoch": 0.8074596896374795, "grad_norm": 156.96974182128906, "learning_rate": 1.1760676039736813e-06, "loss": 17.7822, "step": 399720 }, { "epoch": 0.8074798902701633, "grad_norm": 380.2327575683594, "learning_rate": 1.175842715812891e-06, "loss": 19.923, "step": 399730 }, { "epoch": 0.807500090902847, "grad_norm": 355.2200012207031, "learning_rate": 1.1756178462905782e-06, "loss": 20.809, "step": 399740 }, { "epoch": 0.8075202915355308, "grad_norm": 333.0960388183594, "learning_rate": 1.1753929954078414e-06, "loss": 17.5318, "step": 399750 }, { "epoch": 0.8075404921682147, "grad_norm": 244.53305053710938, "learning_rate": 1.1751681631657752e-06, "loss": 20.7759, "step": 399760 }, { "epoch": 0.8075606928008985, "grad_norm": 215.47056579589844, "learning_rate": 1.1749433495654743e-06, "loss": 20.6194, "step": 399770 }, { "epoch": 0.8075808934335823, "grad_norm": 367.5793762207031, "learning_rate": 1.174718554608037e-06, "loss": 20.4388, "step": 399780 }, { "epoch": 0.8076010940662661, "grad_norm": 183.8079376220703, "learning_rate": 1.174493778294557e-06, "loss": 12.2244, "step": 399790 }, { "epoch": 0.8076212946989499, "grad_norm": 283.8914489746094, "learning_rate": 1.1742690206261293e-06, "loss": 20.1712, "step": 399800 }, { "epoch": 0.8076414953316338, "grad_norm": 509.17169189453125, "learning_rate": 1.1740442816038505e-06, "loss": 24.975, "step": 399810 }, { "epoch": 0.8076616959643176, "grad_norm": 256.3719177246094, "learning_rate": 1.173819561228819e-06, "loss": 15.2413, "step": 399820 }, { "epoch": 0.8076818965970014, "grad_norm": 191.49972534179688, "learning_rate": 1.1735948595021234e-06, "loss": 17.7072, "step": 399830 }, { "epoch": 0.8077020972296852, "grad_norm": 467.4842529296875, "learning_rate": 1.1733701764248623e-06, "loss": 24.6718, "step": 399840 }, { "epoch": 0.807722297862369, "grad_norm": 162.53460693359375, "learning_rate": 1.1731455119981327e-06, "loss": 14.8806, "step": 399850 }, { "epoch": 0.8077424984950529, "grad_norm": 236.5585174560547, "learning_rate": 1.1729208662230273e-06, "loss": 19.6252, "step": 399860 }, { "epoch": 0.8077626991277367, "grad_norm": 50.34816360473633, "learning_rate": 1.1726962391006409e-06, "loss": 19.6513, "step": 399870 }, { "epoch": 0.8077828997604205, "grad_norm": 339.0187072753906, "learning_rate": 1.1724716306320676e-06, "loss": 19.9305, "step": 399880 }, { "epoch": 0.8078031003931043, "grad_norm": 67.13558959960938, "learning_rate": 1.1722470408184072e-06, "loss": 14.7635, "step": 399890 }, { "epoch": 0.8078233010257881, "grad_norm": 211.7030029296875, "learning_rate": 1.1720224696607474e-06, "loss": 12.387, "step": 399900 }, { "epoch": 0.807843501658472, "grad_norm": 913.49169921875, "learning_rate": 1.1717979171601857e-06, "loss": 18.0726, "step": 399910 }, { "epoch": 0.8078637022911558, "grad_norm": 312.7806701660156, "learning_rate": 1.1715733833178178e-06, "loss": 19.7221, "step": 399920 }, { "epoch": 0.8078839029238396, "grad_norm": 613.5134887695312, "learning_rate": 1.1713488681347375e-06, "loss": 21.2203, "step": 399930 }, { "epoch": 0.8079041035565234, "grad_norm": 33.54090881347656, "learning_rate": 1.1711243716120363e-06, "loss": 36.6315, "step": 399940 }, { "epoch": 0.8079243041892072, "grad_norm": 188.7884063720703, "learning_rate": 1.1708998937508126e-06, "loss": 8.1141, "step": 399950 }, { "epoch": 0.8079445048218911, "grad_norm": 437.2848815917969, "learning_rate": 1.1706754345521582e-06, "loss": 16.1798, "step": 399960 }, { "epoch": 0.8079647054545749, "grad_norm": 165.2116241455078, "learning_rate": 1.1704509940171655e-06, "loss": 21.7297, "step": 399970 }, { "epoch": 0.8079849060872587, "grad_norm": 275.6861267089844, "learning_rate": 1.1702265721469302e-06, "loss": 9.3952, "step": 399980 }, { "epoch": 0.8080051067199425, "grad_norm": 372.7326965332031, "learning_rate": 1.1700021689425478e-06, "loss": 22.825, "step": 399990 }, { "epoch": 0.8080253073526262, "grad_norm": 146.03695678710938, "learning_rate": 1.1697777844051105e-06, "loss": 19.1027, "step": 400000 }, { "epoch": 0.80804550798531, "grad_norm": 640.4072265625, "learning_rate": 1.16955341853571e-06, "loss": 23.4808, "step": 400010 }, { "epoch": 0.8080657086179939, "grad_norm": 282.7901916503906, "learning_rate": 1.1693290713354433e-06, "loss": 15.1071, "step": 400020 }, { "epoch": 0.8080859092506777, "grad_norm": 201.63792419433594, "learning_rate": 1.169104742805402e-06, "loss": 7.1797, "step": 400030 }, { "epoch": 0.8081061098833615, "grad_norm": 232.16336059570312, "learning_rate": 1.168880432946678e-06, "loss": 8.3606, "step": 400040 }, { "epoch": 0.8081263105160453, "grad_norm": 362.397216796875, "learning_rate": 1.1686561417603677e-06, "loss": 19.8036, "step": 400050 }, { "epoch": 0.8081465111487292, "grad_norm": 244.0271759033203, "learning_rate": 1.168431869247561e-06, "loss": 19.1216, "step": 400060 }, { "epoch": 0.808166711781413, "grad_norm": 216.07676696777344, "learning_rate": 1.1682076154093542e-06, "loss": 34.9256, "step": 400070 }, { "epoch": 0.8081869124140968, "grad_norm": 353.02362060546875, "learning_rate": 1.1679833802468387e-06, "loss": 12.2923, "step": 400080 }, { "epoch": 0.8082071130467806, "grad_norm": 165.57379150390625, "learning_rate": 1.1677591637611057e-06, "loss": 14.4861, "step": 400090 }, { "epoch": 0.8082273136794644, "grad_norm": 115.16368103027344, "learning_rate": 1.1675349659532514e-06, "loss": 24.987, "step": 400100 }, { "epoch": 0.8082475143121483, "grad_norm": 223.8692169189453, "learning_rate": 1.1673107868243672e-06, "loss": 16.0138, "step": 400110 }, { "epoch": 0.8082677149448321, "grad_norm": 332.94573974609375, "learning_rate": 1.1670866263755437e-06, "loss": 26.5142, "step": 400120 }, { "epoch": 0.8082879155775159, "grad_norm": 721.4041137695312, "learning_rate": 1.1668624846078752e-06, "loss": 25.844, "step": 400130 }, { "epoch": 0.8083081162101997, "grad_norm": 110.05020141601562, "learning_rate": 1.1666383615224553e-06, "loss": 14.7056, "step": 400140 }, { "epoch": 0.8083283168428835, "grad_norm": 306.3424072265625, "learning_rate": 1.1664142571203751e-06, "loss": 16.3092, "step": 400150 }, { "epoch": 0.8083485174755674, "grad_norm": 252.95721435546875, "learning_rate": 1.1661901714027258e-06, "loss": 11.7558, "step": 400160 }, { "epoch": 0.8083687181082512, "grad_norm": 431.20428466796875, "learning_rate": 1.1659661043706e-06, "loss": 23.668, "step": 400170 }, { "epoch": 0.808388918740935, "grad_norm": 122.36553955078125, "learning_rate": 1.1657420560250938e-06, "loss": 17.6765, "step": 400180 }, { "epoch": 0.8084091193736188, "grad_norm": 132.11375427246094, "learning_rate": 1.1655180263672928e-06, "loss": 17.0006, "step": 400190 }, { "epoch": 0.8084293200063026, "grad_norm": 183.6145477294922, "learning_rate": 1.1652940153982917e-06, "loss": 8.2812, "step": 400200 }, { "epoch": 0.8084495206389865, "grad_norm": 304.5958251953125, "learning_rate": 1.1650700231191842e-06, "loss": 14.1453, "step": 400210 }, { "epoch": 0.8084697212716703, "grad_norm": 71.94236755371094, "learning_rate": 1.16484604953106e-06, "loss": 16.6686, "step": 400220 }, { "epoch": 0.8084899219043541, "grad_norm": 299.6750793457031, "learning_rate": 1.1646220946350095e-06, "loss": 13.4315, "step": 400230 }, { "epoch": 0.8085101225370379, "grad_norm": 328.0590515136719, "learning_rate": 1.1643981584321273e-06, "loss": 15.9897, "step": 400240 }, { "epoch": 0.8085303231697216, "grad_norm": 57.3280029296875, "learning_rate": 1.164174240923503e-06, "loss": 26.8358, "step": 400250 }, { "epoch": 0.8085505238024054, "grad_norm": 222.09327697753906, "learning_rate": 1.1639503421102272e-06, "loss": 22.1101, "step": 400260 }, { "epoch": 0.8085707244350893, "grad_norm": 164.5921173095703, "learning_rate": 1.1637264619933936e-06, "loss": 15.7269, "step": 400270 }, { "epoch": 0.8085909250677731, "grad_norm": 243.57485961914062, "learning_rate": 1.1635026005740902e-06, "loss": 9.8075, "step": 400280 }, { "epoch": 0.8086111257004569, "grad_norm": 401.4932861328125, "learning_rate": 1.1632787578534116e-06, "loss": 19.9063, "step": 400290 }, { "epoch": 0.8086313263331407, "grad_norm": 486.9357604980469, "learning_rate": 1.1630549338324454e-06, "loss": 13.4036, "step": 400300 }, { "epoch": 0.8086515269658245, "grad_norm": 381.2542419433594, "learning_rate": 1.1628311285122857e-06, "loss": 17.7054, "step": 400310 }, { "epoch": 0.8086717275985084, "grad_norm": 458.1753845214844, "learning_rate": 1.1626073418940214e-06, "loss": 20.6198, "step": 400320 }, { "epoch": 0.8086919282311922, "grad_norm": 247.89788818359375, "learning_rate": 1.162383573978742e-06, "loss": 21.9839, "step": 400330 }, { "epoch": 0.808712128863876, "grad_norm": 223.84432983398438, "learning_rate": 1.1621598247675415e-06, "loss": 22.6154, "step": 400340 }, { "epoch": 0.8087323294965598, "grad_norm": 507.99310302734375, "learning_rate": 1.1619360942615065e-06, "loss": 22.7302, "step": 400350 }, { "epoch": 0.8087525301292436, "grad_norm": 510.2653503417969, "learning_rate": 1.1617123824617315e-06, "loss": 25.2216, "step": 400360 }, { "epoch": 0.8087727307619275, "grad_norm": 412.6650695800781, "learning_rate": 1.1614886893693044e-06, "loss": 23.3922, "step": 400370 }, { "epoch": 0.8087929313946113, "grad_norm": 303.8271484375, "learning_rate": 1.1612650149853144e-06, "loss": 28.3372, "step": 400380 }, { "epoch": 0.8088131320272951, "grad_norm": 239.27586364746094, "learning_rate": 1.161041359310855e-06, "loss": 9.0398, "step": 400390 }, { "epoch": 0.8088333326599789, "grad_norm": 223.31639099121094, "learning_rate": 1.160817722347014e-06, "loss": 15.7669, "step": 400400 }, { "epoch": 0.8088535332926627, "grad_norm": 403.01251220703125, "learning_rate": 1.1605941040948803e-06, "loss": 14.3906, "step": 400410 }, { "epoch": 0.8088737339253466, "grad_norm": 267.8360900878906, "learning_rate": 1.1603705045555457e-06, "loss": 14.2053, "step": 400420 }, { "epoch": 0.8088939345580304, "grad_norm": 245.7668914794922, "learning_rate": 1.160146923730101e-06, "loss": 14.148, "step": 400430 }, { "epoch": 0.8089141351907142, "grad_norm": 0.0, "learning_rate": 1.1599233616196343e-06, "loss": 13.1763, "step": 400440 }, { "epoch": 0.808934335823398, "grad_norm": 193.6412353515625, "learning_rate": 1.159699818225234e-06, "loss": 10.4858, "step": 400450 }, { "epoch": 0.8089545364560818, "grad_norm": 356.0779113769531, "learning_rate": 1.159476293547992e-06, "loss": 16.1115, "step": 400460 }, { "epoch": 0.8089747370887657, "grad_norm": 296.2171630859375, "learning_rate": 1.1592527875889969e-06, "loss": 14.5623, "step": 400470 }, { "epoch": 0.8089949377214495, "grad_norm": 233.22799682617188, "learning_rate": 1.159029300349337e-06, "loss": 28.9043, "step": 400480 }, { "epoch": 0.8090151383541333, "grad_norm": 369.2990417480469, "learning_rate": 1.1588058318301021e-06, "loss": 13.5847, "step": 400490 }, { "epoch": 0.8090353389868171, "grad_norm": 173.05709838867188, "learning_rate": 1.1585823820323845e-06, "loss": 9.5506, "step": 400500 }, { "epoch": 0.8090555396195008, "grad_norm": 406.1762390136719, "learning_rate": 1.1583589509572679e-06, "loss": 13.2489, "step": 400510 }, { "epoch": 0.8090757402521846, "grad_norm": 252.809814453125, "learning_rate": 1.1581355386058434e-06, "loss": 9.7253, "step": 400520 }, { "epoch": 0.8090959408848685, "grad_norm": 310.58349609375, "learning_rate": 1.1579121449792018e-06, "loss": 26.8553, "step": 400530 }, { "epoch": 0.8091161415175523, "grad_norm": 147.6504364013672, "learning_rate": 1.1576887700784307e-06, "loss": 17.1205, "step": 400540 }, { "epoch": 0.8091363421502361, "grad_norm": 237.35272216796875, "learning_rate": 1.1574654139046171e-06, "loss": 25.4648, "step": 400550 }, { "epoch": 0.8091565427829199, "grad_norm": 144.78765869140625, "learning_rate": 1.1572420764588522e-06, "loss": 8.9568, "step": 400560 }, { "epoch": 0.8091767434156037, "grad_norm": 112.19599914550781, "learning_rate": 1.1570187577422237e-06, "loss": 10.6925, "step": 400570 }, { "epoch": 0.8091969440482876, "grad_norm": 199.1861572265625, "learning_rate": 1.1567954577558177e-06, "loss": 9.1417, "step": 400580 }, { "epoch": 0.8092171446809714, "grad_norm": 525.9576416015625, "learning_rate": 1.1565721765007247e-06, "loss": 28.7777, "step": 400590 }, { "epoch": 0.8092373453136552, "grad_norm": 148.76515197753906, "learning_rate": 1.1563489139780344e-06, "loss": 16.3673, "step": 400600 }, { "epoch": 0.809257545946339, "grad_norm": 241.41952514648438, "learning_rate": 1.1561256701888335e-06, "loss": 16.7367, "step": 400610 }, { "epoch": 0.8092777465790228, "grad_norm": 667.485107421875, "learning_rate": 1.1559024451342082e-06, "loss": 27.1425, "step": 400620 }, { "epoch": 0.8092979472117067, "grad_norm": 922.7293701171875, "learning_rate": 1.1556792388152494e-06, "loss": 29.9633, "step": 400630 }, { "epoch": 0.8093181478443905, "grad_norm": 156.090576171875, "learning_rate": 1.1554560512330437e-06, "loss": 14.8942, "step": 400640 }, { "epoch": 0.8093383484770743, "grad_norm": 358.8300476074219, "learning_rate": 1.1552328823886776e-06, "loss": 17.621, "step": 400650 }, { "epoch": 0.8093585491097581, "grad_norm": 261.3934631347656, "learning_rate": 1.155009732283242e-06, "loss": 15.5071, "step": 400660 }, { "epoch": 0.809378749742442, "grad_norm": 303.0128173828125, "learning_rate": 1.1547866009178204e-06, "loss": 25.2852, "step": 400670 }, { "epoch": 0.8093989503751258, "grad_norm": 194.46005249023438, "learning_rate": 1.1545634882935048e-06, "loss": 29.4698, "step": 400680 }, { "epoch": 0.8094191510078096, "grad_norm": 136.1949462890625, "learning_rate": 1.1543403944113797e-06, "loss": 16.4344, "step": 400690 }, { "epoch": 0.8094393516404934, "grad_norm": 395.17657470703125, "learning_rate": 1.154117319272532e-06, "loss": 12.4152, "step": 400700 }, { "epoch": 0.8094595522731772, "grad_norm": 65.0011978149414, "learning_rate": 1.1538942628780513e-06, "loss": 13.2049, "step": 400710 }, { "epoch": 0.809479752905861, "grad_norm": 482.576416015625, "learning_rate": 1.153671225229024e-06, "loss": 17.1693, "step": 400720 }, { "epoch": 0.8094999535385449, "grad_norm": 134.6630859375, "learning_rate": 1.1534482063265346e-06, "loss": 8.1859, "step": 400730 }, { "epoch": 0.8095201541712287, "grad_norm": 396.3411865234375, "learning_rate": 1.153225206171672e-06, "loss": 18.7596, "step": 400740 }, { "epoch": 0.8095403548039125, "grad_norm": 71.36418151855469, "learning_rate": 1.1530022247655253e-06, "loss": 12.0561, "step": 400750 }, { "epoch": 0.8095605554365963, "grad_norm": 248.11892700195312, "learning_rate": 1.1527792621091787e-06, "loss": 22.0916, "step": 400760 }, { "epoch": 0.80958075606928, "grad_norm": 433.9194641113281, "learning_rate": 1.1525563182037181e-06, "loss": 40.5253, "step": 400770 }, { "epoch": 0.8096009567019639, "grad_norm": 445.4182434082031, "learning_rate": 1.1523333930502323e-06, "loss": 12.6555, "step": 400780 }, { "epoch": 0.8096211573346477, "grad_norm": 32.89529800415039, "learning_rate": 1.1521104866498073e-06, "loss": 24.6389, "step": 400790 }, { "epoch": 0.8096413579673315, "grad_norm": 266.0649108886719, "learning_rate": 1.1518875990035278e-06, "loss": 16.6936, "step": 400800 }, { "epoch": 0.8096615586000153, "grad_norm": 257.1155090332031, "learning_rate": 1.1516647301124812e-06, "loss": 15.5554, "step": 400810 }, { "epoch": 0.8096817592326991, "grad_norm": 532.7479248046875, "learning_rate": 1.1514418799777554e-06, "loss": 28.5511, "step": 400820 }, { "epoch": 0.809701959865383, "grad_norm": 295.2567443847656, "learning_rate": 1.1512190486004353e-06, "loss": 23.7825, "step": 400830 }, { "epoch": 0.8097221604980668, "grad_norm": 109.22172546386719, "learning_rate": 1.1509962359816052e-06, "loss": 16.6026, "step": 400840 }, { "epoch": 0.8097423611307506, "grad_norm": 168.68563842773438, "learning_rate": 1.1507734421223544e-06, "loss": 11.8387, "step": 400850 }, { "epoch": 0.8097625617634344, "grad_norm": 90.349853515625, "learning_rate": 1.1505506670237664e-06, "loss": 22.7322, "step": 400860 }, { "epoch": 0.8097827623961182, "grad_norm": 245.07273864746094, "learning_rate": 1.1503279106869264e-06, "loss": 15.5705, "step": 400870 }, { "epoch": 0.8098029630288021, "grad_norm": 188.72161865234375, "learning_rate": 1.1501051731129227e-06, "loss": 8.7121, "step": 400880 }, { "epoch": 0.8098231636614859, "grad_norm": 373.906982421875, "learning_rate": 1.149882454302838e-06, "loss": 13.6114, "step": 400890 }, { "epoch": 0.8098433642941697, "grad_norm": 83.35111999511719, "learning_rate": 1.1496597542577603e-06, "loss": 7.2784, "step": 400900 }, { "epoch": 0.8098635649268535, "grad_norm": 315.1995544433594, "learning_rate": 1.149437072978773e-06, "loss": 26.5243, "step": 400910 }, { "epoch": 0.8098837655595373, "grad_norm": 338.7139587402344, "learning_rate": 1.1492144104669639e-06, "loss": 12.065, "step": 400920 }, { "epoch": 0.8099039661922212, "grad_norm": 272.12554931640625, "learning_rate": 1.1489917667234162e-06, "loss": 17.3584, "step": 400930 }, { "epoch": 0.809924166824905, "grad_norm": 223.7919464111328, "learning_rate": 1.1487691417492147e-06, "loss": 17.7089, "step": 400940 }, { "epoch": 0.8099443674575888, "grad_norm": 362.22930908203125, "learning_rate": 1.1485465355454467e-06, "loss": 14.7973, "step": 400950 }, { "epoch": 0.8099645680902726, "grad_norm": 284.8341979980469, "learning_rate": 1.1483239481131942e-06, "loss": 15.6576, "step": 400960 }, { "epoch": 0.8099847687229564, "grad_norm": 197.68020629882812, "learning_rate": 1.148101379453545e-06, "loss": 12.2411, "step": 400970 }, { "epoch": 0.8100049693556403, "grad_norm": 299.678466796875, "learning_rate": 1.1478788295675824e-06, "loss": 14.9952, "step": 400980 }, { "epoch": 0.8100251699883241, "grad_norm": 56.665462493896484, "learning_rate": 1.14765629845639e-06, "loss": 20.804, "step": 400990 }, { "epoch": 0.8100453706210079, "grad_norm": 102.39153289794922, "learning_rate": 1.1474337861210543e-06, "loss": 16.9165, "step": 401000 }, { "epoch": 0.8100655712536917, "grad_norm": 145.8800811767578, "learning_rate": 1.14721129256266e-06, "loss": 11.5674, "step": 401010 }, { "epoch": 0.8100857718863754, "grad_norm": 284.7915344238281, "learning_rate": 1.1469888177822885e-06, "loss": 12.2353, "step": 401020 }, { "epoch": 0.8101059725190592, "grad_norm": 87.03179931640625, "learning_rate": 1.1467663617810264e-06, "loss": 15.04, "step": 401030 }, { "epoch": 0.8101261731517431, "grad_norm": 118.01471710205078, "learning_rate": 1.1465439245599591e-06, "loss": 23.952, "step": 401040 }, { "epoch": 0.8101463737844269, "grad_norm": 421.2896423339844, "learning_rate": 1.1463215061201694e-06, "loss": 22.8989, "step": 401050 }, { "epoch": 0.8101665744171107, "grad_norm": 188.29379272460938, "learning_rate": 1.1460991064627403e-06, "loss": 12.4487, "step": 401060 }, { "epoch": 0.8101867750497945, "grad_norm": 12.958653450012207, "learning_rate": 1.1458767255887576e-06, "loss": 20.4585, "step": 401070 }, { "epoch": 0.8102069756824783, "grad_norm": 356.626708984375, "learning_rate": 1.1456543634993046e-06, "loss": 21.7585, "step": 401080 }, { "epoch": 0.8102271763151622, "grad_norm": 346.2409973144531, "learning_rate": 1.1454320201954628e-06, "loss": 17.8468, "step": 401090 }, { "epoch": 0.810247376947846, "grad_norm": 128.6572265625, "learning_rate": 1.1452096956783181e-06, "loss": 9.5496, "step": 401100 }, { "epoch": 0.8102675775805298, "grad_norm": 471.9775695800781, "learning_rate": 1.1449873899489566e-06, "loss": 14.6206, "step": 401110 }, { "epoch": 0.8102877782132136, "grad_norm": 162.21258544921875, "learning_rate": 1.1447651030084567e-06, "loss": 16.6864, "step": 401120 }, { "epoch": 0.8103079788458974, "grad_norm": 221.123779296875, "learning_rate": 1.1445428348579035e-06, "loss": 14.0092, "step": 401130 }, { "epoch": 0.8103281794785813, "grad_norm": 193.92190551757812, "learning_rate": 1.1443205854983824e-06, "loss": 16.0266, "step": 401140 }, { "epoch": 0.8103483801112651, "grad_norm": 376.093994140625, "learning_rate": 1.1440983549309753e-06, "loss": 47.8578, "step": 401150 }, { "epoch": 0.8103685807439489, "grad_norm": 229.80181884765625, "learning_rate": 1.1438761431567641e-06, "loss": 20.3558, "step": 401160 }, { "epoch": 0.8103887813766327, "grad_norm": 180.99278259277344, "learning_rate": 1.1436539501768334e-06, "loss": 9.0518, "step": 401170 }, { "epoch": 0.8104089820093165, "grad_norm": 192.3219757080078, "learning_rate": 1.1434317759922664e-06, "loss": 15.8526, "step": 401180 }, { "epoch": 0.8104291826420004, "grad_norm": 226.60984802246094, "learning_rate": 1.1432096206041438e-06, "loss": 15.1898, "step": 401190 }, { "epoch": 0.8104493832746842, "grad_norm": 96.78643798828125, "learning_rate": 1.1429874840135492e-06, "loss": 20.0862, "step": 401200 }, { "epoch": 0.810469583907368, "grad_norm": 312.77978515625, "learning_rate": 1.1427653662215675e-06, "loss": 15.3598, "step": 401210 }, { "epoch": 0.8104897845400518, "grad_norm": 132.35386657714844, "learning_rate": 1.1425432672292798e-06, "loss": 27.9162, "step": 401220 }, { "epoch": 0.8105099851727356, "grad_norm": 378.843505859375, "learning_rate": 1.1423211870377666e-06, "loss": 25.6319, "step": 401230 }, { "epoch": 0.8105301858054195, "grad_norm": 8.694578170776367, "learning_rate": 1.1420991256481133e-06, "loss": 8.6959, "step": 401240 }, { "epoch": 0.8105503864381033, "grad_norm": 0.0, "learning_rate": 1.1418770830614012e-06, "loss": 21.2952, "step": 401250 }, { "epoch": 0.8105705870707871, "grad_norm": 648.4176635742188, "learning_rate": 1.1416550592787106e-06, "loss": 35.29, "step": 401260 }, { "epoch": 0.8105907877034709, "grad_norm": 336.6990661621094, "learning_rate": 1.141433054301127e-06, "loss": 9.765, "step": 401270 }, { "epoch": 0.8106109883361546, "grad_norm": 320.4399108886719, "learning_rate": 1.1412110681297296e-06, "loss": 12.2322, "step": 401280 }, { "epoch": 0.8106311889688385, "grad_norm": 160.41273498535156, "learning_rate": 1.1409891007656022e-06, "loss": 19.2889, "step": 401290 }, { "epoch": 0.8106513896015223, "grad_norm": 283.5056457519531, "learning_rate": 1.1407671522098262e-06, "loss": 17.9634, "step": 401300 }, { "epoch": 0.8106715902342061, "grad_norm": 205.41738891601562, "learning_rate": 1.1405452224634817e-06, "loss": 13.4512, "step": 401310 }, { "epoch": 0.8106917908668899, "grad_norm": 407.2828369140625, "learning_rate": 1.140323311527653e-06, "loss": 16.8564, "step": 401320 }, { "epoch": 0.8107119914995737, "grad_norm": 147.1327667236328, "learning_rate": 1.1401014194034205e-06, "loss": 23.0601, "step": 401330 }, { "epoch": 0.8107321921322576, "grad_norm": 204.29454040527344, "learning_rate": 1.1398795460918637e-06, "loss": 10.9572, "step": 401340 }, { "epoch": 0.8107523927649414, "grad_norm": 255.1951141357422, "learning_rate": 1.139657691594066e-06, "loss": 23.2346, "step": 401350 }, { "epoch": 0.8107725933976252, "grad_norm": 251.42381286621094, "learning_rate": 1.1394358559111101e-06, "loss": 14.5623, "step": 401360 }, { "epoch": 0.810792794030309, "grad_norm": 93.5746078491211, "learning_rate": 1.1392140390440754e-06, "loss": 11.3852, "step": 401370 }, { "epoch": 0.8108129946629928, "grad_norm": 374.60015869140625, "learning_rate": 1.1389922409940423e-06, "loss": 16.3197, "step": 401380 }, { "epoch": 0.8108331952956767, "grad_norm": 287.8190612792969, "learning_rate": 1.1387704617620937e-06, "loss": 15.7255, "step": 401390 }, { "epoch": 0.8108533959283605, "grad_norm": 9.7261381149292, "learning_rate": 1.1385487013493095e-06, "loss": 13.6906, "step": 401400 }, { "epoch": 0.8108735965610443, "grad_norm": 284.3092346191406, "learning_rate": 1.1383269597567691e-06, "loss": 24.0496, "step": 401410 }, { "epoch": 0.8108937971937281, "grad_norm": 513.0870971679688, "learning_rate": 1.138105236985555e-06, "loss": 14.9538, "step": 401420 }, { "epoch": 0.8109139978264119, "grad_norm": 53.64165115356445, "learning_rate": 1.1378835330367494e-06, "loss": 25.3546, "step": 401430 }, { "epoch": 0.8109341984590958, "grad_norm": 297.69598388671875, "learning_rate": 1.1376618479114304e-06, "loss": 9.3072, "step": 401440 }, { "epoch": 0.8109543990917796, "grad_norm": 429.6688537597656, "learning_rate": 1.1374401816106778e-06, "loss": 19.306, "step": 401450 }, { "epoch": 0.8109745997244634, "grad_norm": 93.5263442993164, "learning_rate": 1.1372185341355746e-06, "loss": 14.4126, "step": 401460 }, { "epoch": 0.8109948003571472, "grad_norm": 77.04842376708984, "learning_rate": 1.1369969054871998e-06, "loss": 21.8044, "step": 401470 }, { "epoch": 0.811015000989831, "grad_norm": 19.820981979370117, "learning_rate": 1.1367752956666322e-06, "loss": 15.0017, "step": 401480 }, { "epoch": 0.8110352016225149, "grad_norm": 109.28889465332031, "learning_rate": 1.1365537046749536e-06, "loss": 28.4252, "step": 401490 }, { "epoch": 0.8110554022551987, "grad_norm": 436.7582092285156, "learning_rate": 1.136332132513245e-06, "loss": 11.5079, "step": 401500 }, { "epoch": 0.8110756028878825, "grad_norm": 12.745617866516113, "learning_rate": 1.1361105791825845e-06, "loss": 17.6909, "step": 401510 }, { "epoch": 0.8110958035205663, "grad_norm": 296.5794982910156, "learning_rate": 1.1358890446840514e-06, "loss": 21.388, "step": 401520 }, { "epoch": 0.81111600415325, "grad_norm": 313.3630065917969, "learning_rate": 1.135667529018728e-06, "loss": 12.4084, "step": 401530 }, { "epoch": 0.8111362047859338, "grad_norm": 536.7811279296875, "learning_rate": 1.135446032187692e-06, "loss": 32.634, "step": 401540 }, { "epoch": 0.8111564054186177, "grad_norm": 511.2801513671875, "learning_rate": 1.135224554192022e-06, "loss": 24.8238, "step": 401550 }, { "epoch": 0.8111766060513015, "grad_norm": 171.4930419921875, "learning_rate": 1.1350030950328001e-06, "loss": 13.9258, "step": 401560 }, { "epoch": 0.8111968066839853, "grad_norm": 59.805511474609375, "learning_rate": 1.1347816547111029e-06, "loss": 13.6622, "step": 401570 }, { "epoch": 0.8112170073166691, "grad_norm": 240.22467041015625, "learning_rate": 1.1345602332280125e-06, "loss": 23.4179, "step": 401580 }, { "epoch": 0.811237207949353, "grad_norm": 371.9868469238281, "learning_rate": 1.134338830584607e-06, "loss": 14.8753, "step": 401590 }, { "epoch": 0.8112574085820368, "grad_norm": 352.7749938964844, "learning_rate": 1.1341174467819637e-06, "loss": 26.9339, "step": 401600 }, { "epoch": 0.8112776092147206, "grad_norm": 0.0, "learning_rate": 1.1338960818211647e-06, "loss": 4.8686, "step": 401610 }, { "epoch": 0.8112978098474044, "grad_norm": 97.12739562988281, "learning_rate": 1.1336747357032868e-06, "loss": 11.0758, "step": 401620 }, { "epoch": 0.8113180104800882, "grad_norm": 132.0693359375, "learning_rate": 1.1334534084294084e-06, "loss": 18.4386, "step": 401630 }, { "epoch": 0.811338211112772, "grad_norm": 137.25962829589844, "learning_rate": 1.1332321000006086e-06, "loss": 24.2282, "step": 401640 }, { "epoch": 0.8113584117454559, "grad_norm": 340.3682861328125, "learning_rate": 1.1330108104179682e-06, "loss": 15.6218, "step": 401650 }, { "epoch": 0.8113786123781397, "grad_norm": 220.10287475585938, "learning_rate": 1.1327895396825644e-06, "loss": 17.1844, "step": 401660 }, { "epoch": 0.8113988130108235, "grad_norm": 199.04335021972656, "learning_rate": 1.1325682877954736e-06, "loss": 19.4105, "step": 401670 }, { "epoch": 0.8114190136435073, "grad_norm": 218.74668884277344, "learning_rate": 1.1323470547577774e-06, "loss": 13.8366, "step": 401680 }, { "epoch": 0.8114392142761911, "grad_norm": 233.6142120361328, "learning_rate": 1.1321258405705526e-06, "loss": 19.6657, "step": 401690 }, { "epoch": 0.811459414908875, "grad_norm": 11.550461769104004, "learning_rate": 1.1319046452348758e-06, "loss": 21.7509, "step": 401700 }, { "epoch": 0.8114796155415588, "grad_norm": 170.6481170654297, "learning_rate": 1.131683468751827e-06, "loss": 21.4788, "step": 401710 }, { "epoch": 0.8114998161742426, "grad_norm": 248.20008850097656, "learning_rate": 1.1314623111224865e-06, "loss": 17.7964, "step": 401720 }, { "epoch": 0.8115200168069264, "grad_norm": 142.85548400878906, "learning_rate": 1.1312411723479261e-06, "loss": 16.4905, "step": 401730 }, { "epoch": 0.8115402174396102, "grad_norm": 377.189453125, "learning_rate": 1.1310200524292275e-06, "loss": 9.7298, "step": 401740 }, { "epoch": 0.8115604180722941, "grad_norm": 40.173274993896484, "learning_rate": 1.1307989513674695e-06, "loss": 11.2379, "step": 401750 }, { "epoch": 0.8115806187049779, "grad_norm": 126.35352325439453, "learning_rate": 1.1305778691637276e-06, "loss": 7.9297, "step": 401760 }, { "epoch": 0.8116008193376617, "grad_norm": 311.06353759765625, "learning_rate": 1.1303568058190789e-06, "loss": 31.1362, "step": 401770 }, { "epoch": 0.8116210199703455, "grad_norm": 143.04086303710938, "learning_rate": 1.1301357613346032e-06, "loss": 8.5578, "step": 401780 }, { "epoch": 0.8116412206030292, "grad_norm": 479.79962158203125, "learning_rate": 1.129914735711376e-06, "loss": 16.3565, "step": 401790 }, { "epoch": 0.811661421235713, "grad_norm": 340.1397705078125, "learning_rate": 1.129693728950474e-06, "loss": 19.5118, "step": 401800 }, { "epoch": 0.8116816218683969, "grad_norm": 49.2801513671875, "learning_rate": 1.1294727410529754e-06, "loss": 22.515, "step": 401810 }, { "epoch": 0.8117018225010807, "grad_norm": 267.4632263183594, "learning_rate": 1.1292517720199581e-06, "loss": 16.8837, "step": 401820 }, { "epoch": 0.8117220231337645, "grad_norm": 108.25377655029297, "learning_rate": 1.1290308218524986e-06, "loss": 16.2119, "step": 401830 }, { "epoch": 0.8117422237664483, "grad_norm": 95.78386688232422, "learning_rate": 1.128809890551672e-06, "loss": 16.4301, "step": 401840 }, { "epoch": 0.8117624243991322, "grad_norm": 372.2847595214844, "learning_rate": 1.1285889781185576e-06, "loss": 15.7834, "step": 401850 }, { "epoch": 0.811782625031816, "grad_norm": 241.87025451660156, "learning_rate": 1.1283680845542316e-06, "loss": 15.6026, "step": 401860 }, { "epoch": 0.8118028256644998, "grad_norm": 370.3241271972656, "learning_rate": 1.128147209859768e-06, "loss": 20.6744, "step": 401870 }, { "epoch": 0.8118230262971836, "grad_norm": 6.260766983032227, "learning_rate": 1.1279263540362468e-06, "loss": 32.4075, "step": 401880 }, { "epoch": 0.8118432269298674, "grad_norm": 370.2737731933594, "learning_rate": 1.1277055170847416e-06, "loss": 20.9614, "step": 401890 }, { "epoch": 0.8118634275625513, "grad_norm": 308.668701171875, "learning_rate": 1.1274846990063314e-06, "loss": 13.4553, "step": 401900 }, { "epoch": 0.8118836281952351, "grad_norm": 566.6569213867188, "learning_rate": 1.1272638998020913e-06, "loss": 16.5647, "step": 401910 }, { "epoch": 0.8119038288279189, "grad_norm": 94.34039306640625, "learning_rate": 1.1270431194730959e-06, "loss": 14.9828, "step": 401920 }, { "epoch": 0.8119240294606027, "grad_norm": 44.97969055175781, "learning_rate": 1.1268223580204234e-06, "loss": 7.4245, "step": 401930 }, { "epoch": 0.8119442300932865, "grad_norm": 256.9627380371094, "learning_rate": 1.1266016154451492e-06, "loss": 16.1385, "step": 401940 }, { "epoch": 0.8119644307259704, "grad_norm": 11.55721378326416, "learning_rate": 1.1263808917483476e-06, "loss": 16.3155, "step": 401950 }, { "epoch": 0.8119846313586542, "grad_norm": 317.98553466796875, "learning_rate": 1.1261601869310962e-06, "loss": 14.8667, "step": 401960 }, { "epoch": 0.812004831991338, "grad_norm": 94.88976287841797, "learning_rate": 1.1259395009944713e-06, "loss": 16.6379, "step": 401970 }, { "epoch": 0.8120250326240218, "grad_norm": 346.5973205566406, "learning_rate": 1.125718833939547e-06, "loss": 22.1428, "step": 401980 }, { "epoch": 0.8120452332567056, "grad_norm": 441.3203125, "learning_rate": 1.125498185767398e-06, "loss": 22.9341, "step": 401990 }, { "epoch": 0.8120654338893895, "grad_norm": 265.5954284667969, "learning_rate": 1.1252775564791023e-06, "loss": 17.9806, "step": 402000 }, { "epoch": 0.8120856345220733, "grad_norm": 450.2509460449219, "learning_rate": 1.1250569460757344e-06, "loss": 25.4171, "step": 402010 }, { "epoch": 0.8121058351547571, "grad_norm": 207.9984588623047, "learning_rate": 1.1248363545583675e-06, "loss": 22.0143, "step": 402020 }, { "epoch": 0.8121260357874409, "grad_norm": 177.72805786132812, "learning_rate": 1.1246157819280772e-06, "loss": 18.9015, "step": 402030 }, { "epoch": 0.8121462364201246, "grad_norm": 191.04737854003906, "learning_rate": 1.1243952281859422e-06, "loss": 22.1221, "step": 402040 }, { "epoch": 0.8121664370528084, "grad_norm": 288.6043395996094, "learning_rate": 1.1241746933330338e-06, "loss": 14.2533, "step": 402050 }, { "epoch": 0.8121866376854923, "grad_norm": 153.75120544433594, "learning_rate": 1.123954177370427e-06, "loss": 17.1717, "step": 402060 }, { "epoch": 0.8122068383181761, "grad_norm": 460.6047668457031, "learning_rate": 1.1237336802991989e-06, "loss": 21.9876, "step": 402070 }, { "epoch": 0.8122270389508599, "grad_norm": 234.52713012695312, "learning_rate": 1.1235132021204226e-06, "loss": 17.1451, "step": 402080 }, { "epoch": 0.8122472395835437, "grad_norm": 267.2678527832031, "learning_rate": 1.1232927428351714e-06, "loss": 30.2486, "step": 402090 }, { "epoch": 0.8122674402162275, "grad_norm": 389.3060302734375, "learning_rate": 1.1230723024445212e-06, "loss": 19.9784, "step": 402100 }, { "epoch": 0.8122876408489114, "grad_norm": 274.8794860839844, "learning_rate": 1.1228518809495475e-06, "loss": 29.8584, "step": 402110 }, { "epoch": 0.8123078414815952, "grad_norm": 82.20980072021484, "learning_rate": 1.1226314783513238e-06, "loss": 12.8208, "step": 402120 }, { "epoch": 0.812328042114279, "grad_norm": 159.84130859375, "learning_rate": 1.122411094650922e-06, "loss": 11.8511, "step": 402130 }, { "epoch": 0.8123482427469628, "grad_norm": 49.122894287109375, "learning_rate": 1.1221907298494205e-06, "loss": 14.1555, "step": 402140 }, { "epoch": 0.8123684433796466, "grad_norm": 394.5281677246094, "learning_rate": 1.1219703839478907e-06, "loss": 29.5356, "step": 402150 }, { "epoch": 0.8123886440123305, "grad_norm": 240.09591674804688, "learning_rate": 1.1217500569474054e-06, "loss": 12.3836, "step": 402160 }, { "epoch": 0.8124088446450143, "grad_norm": 525.8123779296875, "learning_rate": 1.1215297488490412e-06, "loss": 25.9609, "step": 402170 }, { "epoch": 0.8124290452776981, "grad_norm": 154.62078857421875, "learning_rate": 1.12130945965387e-06, "loss": 15.5397, "step": 402180 }, { "epoch": 0.8124492459103819, "grad_norm": 167.33688354492188, "learning_rate": 1.1210891893629665e-06, "loss": 12.1467, "step": 402190 }, { "epoch": 0.8124694465430657, "grad_norm": 248.79754638671875, "learning_rate": 1.120868937977404e-06, "loss": 12.4655, "step": 402200 }, { "epoch": 0.8124896471757496, "grad_norm": 3.3044540882110596, "learning_rate": 1.1206487054982551e-06, "loss": 9.8336, "step": 402210 }, { "epoch": 0.8125098478084334, "grad_norm": 151.3280792236328, "learning_rate": 1.1204284919265946e-06, "loss": 20.906, "step": 402220 }, { "epoch": 0.8125300484411172, "grad_norm": 468.8746643066406, "learning_rate": 1.1202082972634952e-06, "loss": 14.2768, "step": 402230 }, { "epoch": 0.812550249073801, "grad_norm": 242.4896697998047, "learning_rate": 1.119988121510029e-06, "loss": 27.006, "step": 402240 }, { "epoch": 0.8125704497064848, "grad_norm": 233.52549743652344, "learning_rate": 1.1197679646672698e-06, "loss": 16.8898, "step": 402250 }, { "epoch": 0.8125906503391687, "grad_norm": 518.6631469726562, "learning_rate": 1.1195478267362924e-06, "loss": 19.793, "step": 402260 }, { "epoch": 0.8126108509718525, "grad_norm": 328.3289794921875, "learning_rate": 1.119327707718168e-06, "loss": 13.0034, "step": 402270 }, { "epoch": 0.8126310516045363, "grad_norm": 1693.345703125, "learning_rate": 1.1191076076139684e-06, "loss": 14.4391, "step": 402280 }, { "epoch": 0.8126512522372201, "grad_norm": 357.6490173339844, "learning_rate": 1.1188875264247695e-06, "loss": 31.6032, "step": 402290 }, { "epoch": 0.8126714528699038, "grad_norm": 113.47588348388672, "learning_rate": 1.1186674641516415e-06, "loss": 8.6629, "step": 402300 }, { "epoch": 0.8126916535025877, "grad_norm": 11.096231460571289, "learning_rate": 1.1184474207956564e-06, "loss": 17.1936, "step": 402310 }, { "epoch": 0.8127118541352715, "grad_norm": 178.34913635253906, "learning_rate": 1.1182273963578877e-06, "loss": 34.396, "step": 402320 }, { "epoch": 0.8127320547679553, "grad_norm": 164.09608459472656, "learning_rate": 1.1180073908394108e-06, "loss": 16.3826, "step": 402330 }, { "epoch": 0.8127522554006391, "grad_norm": 262.7817077636719, "learning_rate": 1.1177874042412923e-06, "loss": 22.2358, "step": 402340 }, { "epoch": 0.8127724560333229, "grad_norm": 232.10989379882812, "learning_rate": 1.1175674365646067e-06, "loss": 13.6303, "step": 402350 }, { "epoch": 0.8127926566660068, "grad_norm": 212.78245544433594, "learning_rate": 1.1173474878104285e-06, "loss": 24.2224, "step": 402360 }, { "epoch": 0.8128128572986906, "grad_norm": 144.1623992919922, "learning_rate": 1.1171275579798274e-06, "loss": 14.0395, "step": 402370 }, { "epoch": 0.8128330579313744, "grad_norm": 525.379638671875, "learning_rate": 1.1169076470738744e-06, "loss": 35.3341, "step": 402380 }, { "epoch": 0.8128532585640582, "grad_norm": 111.95392608642578, "learning_rate": 1.1166877550936433e-06, "loss": 11.2006, "step": 402390 }, { "epoch": 0.812873459196742, "grad_norm": 180.03761291503906, "learning_rate": 1.1164678820402059e-06, "loss": 23.053, "step": 402400 }, { "epoch": 0.8128936598294259, "grad_norm": 215.98049926757812, "learning_rate": 1.1162480279146315e-06, "loss": 20.5174, "step": 402410 }, { "epoch": 0.8129138604621097, "grad_norm": 291.88775634765625, "learning_rate": 1.1160281927179927e-06, "loss": 5.5911, "step": 402420 }, { "epoch": 0.8129340610947935, "grad_norm": 303.0357971191406, "learning_rate": 1.1158083764513634e-06, "loss": 17.0726, "step": 402430 }, { "epoch": 0.8129542617274773, "grad_norm": 226.69064331054688, "learning_rate": 1.1155885791158128e-06, "loss": 12.2093, "step": 402440 }, { "epoch": 0.8129744623601611, "grad_norm": 325.1184387207031, "learning_rate": 1.1153688007124109e-06, "loss": 18.6826, "step": 402450 }, { "epoch": 0.812994662992845, "grad_norm": 377.18499755859375, "learning_rate": 1.1151490412422316e-06, "loss": 17.2736, "step": 402460 }, { "epoch": 0.8130148636255288, "grad_norm": 177.8535614013672, "learning_rate": 1.114929300706345e-06, "loss": 13.4113, "step": 402470 }, { "epoch": 0.8130350642582126, "grad_norm": 137.015625, "learning_rate": 1.1147095791058198e-06, "loss": 26.7251, "step": 402480 }, { "epoch": 0.8130552648908964, "grad_norm": 480.3047180175781, "learning_rate": 1.114489876441731e-06, "loss": 16.7304, "step": 402490 }, { "epoch": 0.8130754655235802, "grad_norm": 445.544921875, "learning_rate": 1.1142701927151456e-06, "loss": 15.6297, "step": 402500 }, { "epoch": 0.813095666156264, "grad_norm": 1568.1640625, "learning_rate": 1.1140505279271373e-06, "loss": 33.7608, "step": 402510 }, { "epoch": 0.8131158667889479, "grad_norm": 218.8968048095703, "learning_rate": 1.1138308820787752e-06, "loss": 14.3719, "step": 402520 }, { "epoch": 0.8131360674216317, "grad_norm": 130.31504821777344, "learning_rate": 1.1136112551711293e-06, "loss": 14.6224, "step": 402530 }, { "epoch": 0.8131562680543155, "grad_norm": 168.16104125976562, "learning_rate": 1.113391647205272e-06, "loss": 9.6879, "step": 402540 }, { "epoch": 0.8131764686869993, "grad_norm": 285.9687805175781, "learning_rate": 1.1131720581822703e-06, "loss": 18.069, "step": 402550 }, { "epoch": 0.813196669319683, "grad_norm": 339.5827331542969, "learning_rate": 1.1129524881031989e-06, "loss": 10.8594, "step": 402560 }, { "epoch": 0.8132168699523669, "grad_norm": 71.73473358154297, "learning_rate": 1.1127329369691236e-06, "loss": 13.5066, "step": 402570 }, { "epoch": 0.8132370705850507, "grad_norm": 47.83183670043945, "learning_rate": 1.1125134047811182e-06, "loss": 9.9433, "step": 402580 }, { "epoch": 0.8132572712177345, "grad_norm": 164.03692626953125, "learning_rate": 1.1122938915402508e-06, "loss": 6.7387, "step": 402590 }, { "epoch": 0.8132774718504183, "grad_norm": 253.02362060546875, "learning_rate": 1.11207439724759e-06, "loss": 16.3735, "step": 402600 }, { "epoch": 0.8132976724831021, "grad_norm": 287.6016845703125, "learning_rate": 1.1118549219042085e-06, "loss": 19.9706, "step": 402610 }, { "epoch": 0.813317873115786, "grad_norm": 370.5058898925781, "learning_rate": 1.111635465511175e-06, "loss": 16.5193, "step": 402620 }, { "epoch": 0.8133380737484698, "grad_norm": 454.0121765136719, "learning_rate": 1.1114160280695568e-06, "loss": 13.8021, "step": 402630 }, { "epoch": 0.8133582743811536, "grad_norm": 561.3067626953125, "learning_rate": 1.1111966095804254e-06, "loss": 17.1681, "step": 402640 }, { "epoch": 0.8133784750138374, "grad_norm": 73.6223373413086, "learning_rate": 1.1109772100448512e-06, "loss": 20.9585, "step": 402650 }, { "epoch": 0.8133986756465212, "grad_norm": 260.21923828125, "learning_rate": 1.1107578294639026e-06, "loss": 22.1203, "step": 402660 }, { "epoch": 0.8134188762792051, "grad_norm": 528.7289428710938, "learning_rate": 1.1105384678386472e-06, "loss": 17.6675, "step": 402670 }, { "epoch": 0.8134390769118889, "grad_norm": 425.2149658203125, "learning_rate": 1.1103191251701573e-06, "loss": 10.904, "step": 402680 }, { "epoch": 0.8134592775445727, "grad_norm": 308.6471252441406, "learning_rate": 1.1100998014594993e-06, "loss": 16.3118, "step": 402690 }, { "epoch": 0.8134794781772565, "grad_norm": 250.72427368164062, "learning_rate": 1.1098804967077425e-06, "loss": 17.4879, "step": 402700 }, { "epoch": 0.8134996788099403, "grad_norm": 403.79168701171875, "learning_rate": 1.109661210915956e-06, "loss": 12.6087, "step": 402710 }, { "epoch": 0.8135198794426242, "grad_norm": 174.78033447265625, "learning_rate": 1.1094419440852105e-06, "loss": 23.0667, "step": 402720 }, { "epoch": 0.813540080075308, "grad_norm": 94.85655212402344, "learning_rate": 1.109222696216573e-06, "loss": 15.6982, "step": 402730 }, { "epoch": 0.8135602807079918, "grad_norm": 1217.8206787109375, "learning_rate": 1.109003467311111e-06, "loss": 39.8227, "step": 402740 }, { "epoch": 0.8135804813406756, "grad_norm": 168.11790466308594, "learning_rate": 1.1087842573698953e-06, "loss": 23.506, "step": 402750 }, { "epoch": 0.8136006819733594, "grad_norm": 187.58653259277344, "learning_rate": 1.1085650663939933e-06, "loss": 24.4735, "step": 402760 }, { "epoch": 0.8136208826060433, "grad_norm": 351.3751220703125, "learning_rate": 1.1083458943844721e-06, "loss": 22.5244, "step": 402770 }, { "epoch": 0.8136410832387271, "grad_norm": 248.69512939453125, "learning_rate": 1.1081267413424018e-06, "loss": 17.9265, "step": 402780 }, { "epoch": 0.8136612838714109, "grad_norm": 241.5086669921875, "learning_rate": 1.1079076072688489e-06, "loss": 12.7883, "step": 402790 }, { "epoch": 0.8136814845040947, "grad_norm": 193.34486389160156, "learning_rate": 1.1076884921648834e-06, "loss": 16.4125, "step": 402800 }, { "epoch": 0.8137016851367784, "grad_norm": 328.86517333984375, "learning_rate": 1.1074693960315718e-06, "loss": 17.5452, "step": 402810 }, { "epoch": 0.8137218857694622, "grad_norm": 288.90557861328125, "learning_rate": 1.1072503188699811e-06, "loss": 10.8626, "step": 402820 }, { "epoch": 0.8137420864021461, "grad_norm": 396.7099304199219, "learning_rate": 1.1070312606811816e-06, "loss": 19.0971, "step": 402830 }, { "epoch": 0.8137622870348299, "grad_norm": 309.0019226074219, "learning_rate": 1.1068122214662397e-06, "loss": 22.995, "step": 402840 }, { "epoch": 0.8137824876675137, "grad_norm": 153.65895080566406, "learning_rate": 1.1065932012262215e-06, "loss": 18.4642, "step": 402850 }, { "epoch": 0.8138026883001975, "grad_norm": 277.7593688964844, "learning_rate": 1.1063741999621958e-06, "loss": 14.7063, "step": 402860 }, { "epoch": 0.8138228889328813, "grad_norm": 443.7995910644531, "learning_rate": 1.1061552176752311e-06, "loss": 20.9304, "step": 402870 }, { "epoch": 0.8138430895655652, "grad_norm": 243.0931396484375, "learning_rate": 1.1059362543663944e-06, "loss": 17.3964, "step": 402880 }, { "epoch": 0.813863290198249, "grad_norm": 41.78388595581055, "learning_rate": 1.1057173100367495e-06, "loss": 10.6041, "step": 402890 }, { "epoch": 0.8138834908309328, "grad_norm": 177.1113739013672, "learning_rate": 1.1054983846873684e-06, "loss": 18.1348, "step": 402900 }, { "epoch": 0.8139036914636166, "grad_norm": 228.78846740722656, "learning_rate": 1.1052794783193155e-06, "loss": 21.4952, "step": 402910 }, { "epoch": 0.8139238920963004, "grad_norm": 460.0074462890625, "learning_rate": 1.105060590933657e-06, "loss": 20.4823, "step": 402920 }, { "epoch": 0.8139440927289843, "grad_norm": 133.1265411376953, "learning_rate": 1.10484172253146e-06, "loss": 19.7873, "step": 402930 }, { "epoch": 0.8139642933616681, "grad_norm": 182.77430725097656, "learning_rate": 1.1046228731137953e-06, "loss": 13.3291, "step": 402940 }, { "epoch": 0.8139844939943519, "grad_norm": 400.0605163574219, "learning_rate": 1.1044040426817237e-06, "loss": 19.4662, "step": 402950 }, { "epoch": 0.8140046946270357, "grad_norm": 132.55755615234375, "learning_rate": 1.1041852312363144e-06, "loss": 13.6394, "step": 402960 }, { "epoch": 0.8140248952597195, "grad_norm": 390.87322998046875, "learning_rate": 1.1039664387786348e-06, "loss": 36.3251, "step": 402970 }, { "epoch": 0.8140450958924034, "grad_norm": 411.1525573730469, "learning_rate": 1.1037476653097501e-06, "loss": 24.1864, "step": 402980 }, { "epoch": 0.8140652965250872, "grad_norm": 109.0416030883789, "learning_rate": 1.1035289108307256e-06, "loss": 13.7228, "step": 402990 }, { "epoch": 0.814085497157771, "grad_norm": 55.12168884277344, "learning_rate": 1.1033101753426285e-06, "loss": 10.0834, "step": 403000 }, { "epoch": 0.8141056977904548, "grad_norm": 208.33316040039062, "learning_rate": 1.1030914588465281e-06, "loss": 15.9461, "step": 403010 }, { "epoch": 0.8141258984231386, "grad_norm": 243.14707946777344, "learning_rate": 1.1028727613434842e-06, "loss": 28.4291, "step": 403020 }, { "epoch": 0.8141460990558225, "grad_norm": 310.4918518066406, "learning_rate": 1.1026540828345656e-06, "loss": 17.8772, "step": 403030 }, { "epoch": 0.8141662996885063, "grad_norm": 282.3050231933594, "learning_rate": 1.10243542332084e-06, "loss": 14.9915, "step": 403040 }, { "epoch": 0.8141865003211901, "grad_norm": 107.14249420166016, "learning_rate": 1.1022167828033715e-06, "loss": 11.5238, "step": 403050 }, { "epoch": 0.8142067009538739, "grad_norm": 240.81834411621094, "learning_rate": 1.1019981612832243e-06, "loss": 34.1449, "step": 403060 }, { "epoch": 0.8142269015865576, "grad_norm": 189.17787170410156, "learning_rate": 1.101779558761466e-06, "loss": 9.4556, "step": 403070 }, { "epoch": 0.8142471022192415, "grad_norm": 465.57147216796875, "learning_rate": 1.101560975239162e-06, "loss": 14.7997, "step": 403080 }, { "epoch": 0.8142673028519253, "grad_norm": 469.3831481933594, "learning_rate": 1.1013424107173753e-06, "loss": 18.8059, "step": 403090 }, { "epoch": 0.8142875034846091, "grad_norm": 356.8483581542969, "learning_rate": 1.1011238651971744e-06, "loss": 19.3013, "step": 403100 }, { "epoch": 0.8143077041172929, "grad_norm": 313.829345703125, "learning_rate": 1.1009053386796215e-06, "loss": 16.411, "step": 403110 }, { "epoch": 0.8143279047499767, "grad_norm": 328.6156311035156, "learning_rate": 1.1006868311657848e-06, "loss": 15.0444, "step": 403120 }, { "epoch": 0.8143481053826606, "grad_norm": 605.4392700195312, "learning_rate": 1.100468342656727e-06, "loss": 20.2595, "step": 403130 }, { "epoch": 0.8143683060153444, "grad_norm": 253.5699920654297, "learning_rate": 1.1002498731535123e-06, "loss": 9.1928, "step": 403140 }, { "epoch": 0.8143885066480282, "grad_norm": 459.7940368652344, "learning_rate": 1.1000314226572083e-06, "loss": 20.5487, "step": 403150 }, { "epoch": 0.814408707280712, "grad_norm": 490.063232421875, "learning_rate": 1.0998129911688766e-06, "loss": 21.565, "step": 403160 }, { "epoch": 0.8144289079133958, "grad_norm": 126.96222686767578, "learning_rate": 1.0995945786895846e-06, "loss": 18.0429, "step": 403170 }, { "epoch": 0.8144491085460797, "grad_norm": 156.43560791015625, "learning_rate": 1.0993761852203943e-06, "loss": 13.0045, "step": 403180 }, { "epoch": 0.8144693091787635, "grad_norm": 272.5559997558594, "learning_rate": 1.099157810762373e-06, "loss": 23.4019, "step": 403190 }, { "epoch": 0.8144895098114473, "grad_norm": 398.17706298828125, "learning_rate": 1.0989394553165833e-06, "loss": 28.0403, "step": 403200 }, { "epoch": 0.8145097104441311, "grad_norm": 167.58114624023438, "learning_rate": 1.098721118884088e-06, "loss": 8.0853, "step": 403210 }, { "epoch": 0.8145299110768149, "grad_norm": 8.708654403686523, "learning_rate": 1.0985028014659544e-06, "loss": 10.1016, "step": 403220 }, { "epoch": 0.8145501117094988, "grad_norm": 365.8742370605469, "learning_rate": 1.0982845030632445e-06, "loss": 11.5529, "step": 403230 }, { "epoch": 0.8145703123421826, "grad_norm": 224.86334228515625, "learning_rate": 1.0980662236770217e-06, "loss": 9.2334, "step": 403240 }, { "epoch": 0.8145905129748664, "grad_norm": 263.55908203125, "learning_rate": 1.097847963308351e-06, "loss": 28.4329, "step": 403250 }, { "epoch": 0.8146107136075502, "grad_norm": 194.8883819580078, "learning_rate": 1.0976297219582988e-06, "loss": 27.3013, "step": 403260 }, { "epoch": 0.814630914240234, "grad_norm": 225.3062744140625, "learning_rate": 1.0974114996279229e-06, "loss": 14.9944, "step": 403270 }, { "epoch": 0.8146511148729179, "grad_norm": 285.8835754394531, "learning_rate": 1.09719329631829e-06, "loss": 17.1087, "step": 403280 }, { "epoch": 0.8146713155056017, "grad_norm": 1919.5220947265625, "learning_rate": 1.0969751120304656e-06, "loss": 34.192, "step": 403290 }, { "epoch": 0.8146915161382855, "grad_norm": 97.24964141845703, "learning_rate": 1.0967569467655104e-06, "loss": 15.9217, "step": 403300 }, { "epoch": 0.8147117167709693, "grad_norm": 91.8150863647461, "learning_rate": 1.0965388005244876e-06, "loss": 19.261, "step": 403310 }, { "epoch": 0.814731917403653, "grad_norm": 504.1994323730469, "learning_rate": 1.0963206733084607e-06, "loss": 29.5569, "step": 403320 }, { "epoch": 0.8147521180363368, "grad_norm": 277.9678955078125, "learning_rate": 1.096102565118497e-06, "loss": 8.6, "step": 403330 }, { "epoch": 0.8147723186690207, "grad_norm": 139.09864807128906, "learning_rate": 1.0958844759556525e-06, "loss": 8.112, "step": 403340 }, { "epoch": 0.8147925193017045, "grad_norm": 37.03032302856445, "learning_rate": 1.0956664058209936e-06, "loss": 28.972, "step": 403350 }, { "epoch": 0.8148127199343883, "grad_norm": 432.78289794921875, "learning_rate": 1.0954483547155846e-06, "loss": 18.2821, "step": 403360 }, { "epoch": 0.8148329205670721, "grad_norm": 103.33815002441406, "learning_rate": 1.095230322640487e-06, "loss": 16.2459, "step": 403370 }, { "epoch": 0.814853121199756, "grad_norm": 175.0149383544922, "learning_rate": 1.0950123095967614e-06, "loss": 8.0415, "step": 403380 }, { "epoch": 0.8148733218324398, "grad_norm": 237.2845458984375, "learning_rate": 1.094794315585474e-06, "loss": 17.4821, "step": 403390 }, { "epoch": 0.8148935224651236, "grad_norm": 101.88243103027344, "learning_rate": 1.0945763406076837e-06, "loss": 17.11, "step": 403400 }, { "epoch": 0.8149137230978074, "grad_norm": 325.753662109375, "learning_rate": 1.0943583846644561e-06, "loss": 19.5641, "step": 403410 }, { "epoch": 0.8149339237304912, "grad_norm": 198.37796020507812, "learning_rate": 1.0941404477568524e-06, "loss": 22.9559, "step": 403420 }, { "epoch": 0.814954124363175, "grad_norm": 337.3090515136719, "learning_rate": 1.0939225298859324e-06, "loss": 27.6585, "step": 403430 }, { "epoch": 0.8149743249958589, "grad_norm": 134.57977294921875, "learning_rate": 1.093704631052762e-06, "loss": 12.7271, "step": 403440 }, { "epoch": 0.8149945256285427, "grad_norm": 722.46533203125, "learning_rate": 1.0934867512584013e-06, "loss": 26.3897, "step": 403450 }, { "epoch": 0.8150147262612265, "grad_norm": 324.8207702636719, "learning_rate": 1.0932688905039113e-06, "loss": 22.6086, "step": 403460 }, { "epoch": 0.8150349268939103, "grad_norm": 183.954345703125, "learning_rate": 1.0930510487903544e-06, "loss": 25.7796, "step": 403470 }, { "epoch": 0.8150551275265941, "grad_norm": 480.4156494140625, "learning_rate": 1.0928332261187947e-06, "loss": 19.6411, "step": 403480 }, { "epoch": 0.815075328159278, "grad_norm": 224.7205352783203, "learning_rate": 1.0926154224902919e-06, "loss": 19.4202, "step": 403490 }, { "epoch": 0.8150955287919618, "grad_norm": 295.2506103515625, "learning_rate": 1.0923976379059059e-06, "loss": 21.9527, "step": 403500 }, { "epoch": 0.8151157294246456, "grad_norm": 628.2420043945312, "learning_rate": 1.0921798723667015e-06, "loss": 23.5203, "step": 403510 }, { "epoch": 0.8151359300573294, "grad_norm": 190.00462341308594, "learning_rate": 1.0919621258737384e-06, "loss": 17.314, "step": 403520 }, { "epoch": 0.8151561306900132, "grad_norm": 418.88812255859375, "learning_rate": 1.0917443984280769e-06, "loss": 15.0581, "step": 403530 }, { "epoch": 0.8151763313226971, "grad_norm": 287.5474853515625, "learning_rate": 1.0915266900307785e-06, "loss": 24.6484, "step": 403540 }, { "epoch": 0.8151965319553809, "grad_norm": 256.4420471191406, "learning_rate": 1.0913090006829085e-06, "loss": 29.6169, "step": 403550 }, { "epoch": 0.8152167325880647, "grad_norm": 19.511442184448242, "learning_rate": 1.0910913303855208e-06, "loss": 12.8713, "step": 403560 }, { "epoch": 0.8152369332207485, "grad_norm": 86.48223876953125, "learning_rate": 1.0908736791396807e-06, "loss": 9.8993, "step": 403570 }, { "epoch": 0.8152571338534322, "grad_norm": 432.2529602050781, "learning_rate": 1.0906560469464488e-06, "loss": 17.5455, "step": 403580 }, { "epoch": 0.8152773344861161, "grad_norm": 487.44561767578125, "learning_rate": 1.0904384338068856e-06, "loss": 18.4181, "step": 403590 }, { "epoch": 0.8152975351187999, "grad_norm": 218.40203857421875, "learning_rate": 1.09022083972205e-06, "loss": 14.6439, "step": 403600 }, { "epoch": 0.8153177357514837, "grad_norm": 246.67628479003906, "learning_rate": 1.090003264693003e-06, "loss": 15.3123, "step": 403610 }, { "epoch": 0.8153379363841675, "grad_norm": 152.4772186279297, "learning_rate": 1.0897857087208098e-06, "loss": 18.9474, "step": 403620 }, { "epoch": 0.8153581370168513, "grad_norm": 247.45223999023438, "learning_rate": 1.0895681718065231e-06, "loss": 22.1023, "step": 403630 }, { "epoch": 0.8153783376495352, "grad_norm": 550.1175537109375, "learning_rate": 1.0893506539512071e-06, "loss": 29.1986, "step": 403640 }, { "epoch": 0.815398538282219, "grad_norm": 168.80715942382812, "learning_rate": 1.0891331551559237e-06, "loss": 13.243, "step": 403650 }, { "epoch": 0.8154187389149028, "grad_norm": 268.01800537109375, "learning_rate": 1.0889156754217306e-06, "loss": 16.9527, "step": 403660 }, { "epoch": 0.8154389395475866, "grad_norm": 191.45591735839844, "learning_rate": 1.0886982147496866e-06, "loss": 21.7477, "step": 403670 }, { "epoch": 0.8154591401802704, "grad_norm": 151.2353057861328, "learning_rate": 1.0884807731408542e-06, "loss": 19.2914, "step": 403680 }, { "epoch": 0.8154793408129543, "grad_norm": 309.7168884277344, "learning_rate": 1.0882633505962924e-06, "loss": 23.7989, "step": 403690 }, { "epoch": 0.8154995414456381, "grad_norm": 421.23919677734375, "learning_rate": 1.0880459471170597e-06, "loss": 24.4488, "step": 403700 }, { "epoch": 0.8155197420783219, "grad_norm": 299.72735595703125, "learning_rate": 1.0878285627042173e-06, "loss": 17.3051, "step": 403710 }, { "epoch": 0.8155399427110057, "grad_norm": 239.96942138671875, "learning_rate": 1.0876111973588233e-06, "loss": 14.4354, "step": 403720 }, { "epoch": 0.8155601433436895, "grad_norm": 515.3560180664062, "learning_rate": 1.0873938510819381e-06, "loss": 23.4582, "step": 403730 }, { "epoch": 0.8155803439763734, "grad_norm": 389.5392761230469, "learning_rate": 1.0871765238746219e-06, "loss": 17.8438, "step": 403740 }, { "epoch": 0.8156005446090572, "grad_norm": 384.092529296875, "learning_rate": 1.0869592157379305e-06, "loss": 16.7279, "step": 403750 }, { "epoch": 0.815620745241741, "grad_norm": 353.696044921875, "learning_rate": 1.0867419266729274e-06, "loss": 15.2681, "step": 403760 }, { "epoch": 0.8156409458744248, "grad_norm": 170.84539794921875, "learning_rate": 1.0865246566806676e-06, "loss": 11.7426, "step": 403770 }, { "epoch": 0.8156611465071086, "grad_norm": 24.140304565429688, "learning_rate": 1.0863074057622136e-06, "loss": 21.4497, "step": 403780 }, { "epoch": 0.8156813471397925, "grad_norm": 239.61846923828125, "learning_rate": 1.0860901739186209e-06, "loss": 25.5635, "step": 403790 }, { "epoch": 0.8157015477724763, "grad_norm": 74.1986083984375, "learning_rate": 1.0858729611509516e-06, "loss": 19.0113, "step": 403800 }, { "epoch": 0.8157217484051601, "grad_norm": 220.3067169189453, "learning_rate": 1.085655767460263e-06, "loss": 16.1088, "step": 403810 }, { "epoch": 0.8157419490378439, "grad_norm": 121.27507019042969, "learning_rate": 1.085438592847612e-06, "loss": 17.1557, "step": 403820 }, { "epoch": 0.8157621496705277, "grad_norm": 350.6377868652344, "learning_rate": 1.0852214373140596e-06, "loss": 17.8307, "step": 403830 }, { "epoch": 0.8157823503032114, "grad_norm": 307.6701354980469, "learning_rate": 1.085004300860663e-06, "loss": 20.9507, "step": 403840 }, { "epoch": 0.8158025509358953, "grad_norm": 258.47210693359375, "learning_rate": 1.0847871834884798e-06, "loss": 21.5728, "step": 403850 }, { "epoch": 0.8158227515685791, "grad_norm": 14.024871826171875, "learning_rate": 1.0845700851985686e-06, "loss": 38.9172, "step": 403860 }, { "epoch": 0.8158429522012629, "grad_norm": 417.4260559082031, "learning_rate": 1.084353005991991e-06, "loss": 18.2087, "step": 403870 }, { "epoch": 0.8158631528339467, "grad_norm": 81.95653533935547, "learning_rate": 1.0841359458697986e-06, "loss": 9.3193, "step": 403880 }, { "epoch": 0.8158833534666305, "grad_norm": 232.32852172851562, "learning_rate": 1.0839189048330534e-06, "loss": 23.0786, "step": 403890 }, { "epoch": 0.8159035540993144, "grad_norm": 304.8473205566406, "learning_rate": 1.0837018828828133e-06, "loss": 12.2597, "step": 403900 }, { "epoch": 0.8159237547319982, "grad_norm": 391.31121826171875, "learning_rate": 1.0834848800201358e-06, "loss": 15.6182, "step": 403910 }, { "epoch": 0.815943955364682, "grad_norm": 0.0, "learning_rate": 1.0832678962460759e-06, "loss": 8.6355, "step": 403920 }, { "epoch": 0.8159641559973658, "grad_norm": 173.289794921875, "learning_rate": 1.0830509315616938e-06, "loss": 7.5412, "step": 403930 }, { "epoch": 0.8159843566300496, "grad_norm": 53.76713943481445, "learning_rate": 1.0828339859680487e-06, "loss": 10.2686, "step": 403940 }, { "epoch": 0.8160045572627335, "grad_norm": 243.79287719726562, "learning_rate": 1.0826170594661933e-06, "loss": 17.1756, "step": 403950 }, { "epoch": 0.8160247578954173, "grad_norm": 300.6907043457031, "learning_rate": 1.082400152057187e-06, "loss": 21.9349, "step": 403960 }, { "epoch": 0.8160449585281011, "grad_norm": 244.40829467773438, "learning_rate": 1.0821832637420887e-06, "loss": 12.436, "step": 403970 }, { "epoch": 0.8160651591607849, "grad_norm": 437.7255554199219, "learning_rate": 1.0819663945219538e-06, "loss": 11.0293, "step": 403980 }, { "epoch": 0.8160853597934687, "grad_norm": 31.64287567138672, "learning_rate": 1.0817495443978381e-06, "loss": 11.5967, "step": 403990 }, { "epoch": 0.8161055604261526, "grad_norm": 341.23358154296875, "learning_rate": 1.0815327133708015e-06, "loss": 30.3751, "step": 404000 }, { "epoch": 0.8161257610588364, "grad_norm": 391.6367492675781, "learning_rate": 1.0813159014418995e-06, "loss": 12.1602, "step": 404010 }, { "epoch": 0.8161459616915202, "grad_norm": 190.1560516357422, "learning_rate": 1.0810991086121863e-06, "loss": 20.9282, "step": 404020 }, { "epoch": 0.816166162324204, "grad_norm": 141.3811798095703, "learning_rate": 1.080882334882723e-06, "loss": 12.5468, "step": 404030 }, { "epoch": 0.8161863629568878, "grad_norm": 229.395751953125, "learning_rate": 1.0806655802545617e-06, "loss": 13.249, "step": 404040 }, { "epoch": 0.8162065635895717, "grad_norm": 131.4805145263672, "learning_rate": 1.080448844728763e-06, "loss": 15.7263, "step": 404050 }, { "epoch": 0.8162267642222555, "grad_norm": 269.5080871582031, "learning_rate": 1.0802321283063794e-06, "loss": 10.28, "step": 404060 }, { "epoch": 0.8162469648549393, "grad_norm": 988.2384033203125, "learning_rate": 1.0800154309884708e-06, "loss": 16.6204, "step": 404070 }, { "epoch": 0.8162671654876231, "grad_norm": 526.30517578125, "learning_rate": 1.07979875277609e-06, "loss": 16.943, "step": 404080 }, { "epoch": 0.8162873661203068, "grad_norm": 316.18182373046875, "learning_rate": 1.079582093670296e-06, "loss": 29.5563, "step": 404090 }, { "epoch": 0.8163075667529907, "grad_norm": 242.55918884277344, "learning_rate": 1.0793654536721432e-06, "loss": 24.681, "step": 404100 }, { "epoch": 0.8163277673856745, "grad_norm": 424.91510009765625, "learning_rate": 1.0791488327826865e-06, "loss": 12.0275, "step": 404110 }, { "epoch": 0.8163479680183583, "grad_norm": 266.5558776855469, "learning_rate": 1.0789322310029842e-06, "loss": 18.241, "step": 404120 }, { "epoch": 0.8163681686510421, "grad_norm": 15.049221992492676, "learning_rate": 1.0787156483340905e-06, "loss": 34.5878, "step": 404130 }, { "epoch": 0.8163883692837259, "grad_norm": 302.0994873046875, "learning_rate": 1.07849908477706e-06, "loss": 13.8381, "step": 404140 }, { "epoch": 0.8164085699164098, "grad_norm": 172.9310302734375, "learning_rate": 1.0782825403329488e-06, "loss": 8.1957, "step": 404150 }, { "epoch": 0.8164287705490936, "grad_norm": 115.13671875, "learning_rate": 1.0780660150028161e-06, "loss": 12.2736, "step": 404160 }, { "epoch": 0.8164489711817774, "grad_norm": 386.56036376953125, "learning_rate": 1.077849508787711e-06, "loss": 17.3178, "step": 404170 }, { "epoch": 0.8164691718144612, "grad_norm": 139.54556274414062, "learning_rate": 1.077633021688692e-06, "loss": 13.445, "step": 404180 }, { "epoch": 0.816489372447145, "grad_norm": 602.4515380859375, "learning_rate": 1.0774165537068154e-06, "loss": 21.3877, "step": 404190 }, { "epoch": 0.8165095730798289, "grad_norm": 305.52008056640625, "learning_rate": 1.077200104843134e-06, "loss": 20.373, "step": 404200 }, { "epoch": 0.8165297737125127, "grad_norm": 459.283203125, "learning_rate": 1.0769836750987028e-06, "loss": 24.2684, "step": 404210 }, { "epoch": 0.8165499743451965, "grad_norm": 216.46495056152344, "learning_rate": 1.0767672644745774e-06, "loss": 19.5112, "step": 404220 }, { "epoch": 0.8165701749778803, "grad_norm": 285.458740234375, "learning_rate": 1.076550872971815e-06, "loss": 18.2676, "step": 404230 }, { "epoch": 0.8165903756105641, "grad_norm": 328.09332275390625, "learning_rate": 1.0763345005914649e-06, "loss": 9.905, "step": 404240 }, { "epoch": 0.816610576243248, "grad_norm": 81.0307846069336, "learning_rate": 1.076118147334585e-06, "loss": 26.2393, "step": 404250 }, { "epoch": 0.8166307768759318, "grad_norm": 218.27403259277344, "learning_rate": 1.0759018132022302e-06, "loss": 17.9203, "step": 404260 }, { "epoch": 0.8166509775086156, "grad_norm": 318.4286804199219, "learning_rate": 1.0756854981954546e-06, "loss": 16.7945, "step": 404270 }, { "epoch": 0.8166711781412994, "grad_norm": 263.6123352050781, "learning_rate": 1.0754692023153101e-06, "loss": 12.3093, "step": 404280 }, { "epoch": 0.8166913787739832, "grad_norm": 317.9670104980469, "learning_rate": 1.0752529255628542e-06, "loss": 13.556, "step": 404290 }, { "epoch": 0.816711579406667, "grad_norm": 405.1814880371094, "learning_rate": 1.0750366679391393e-06, "loss": 29.9192, "step": 404300 }, { "epoch": 0.8167317800393509, "grad_norm": 119.90970611572266, "learning_rate": 1.0748204294452187e-06, "loss": 10.6919, "step": 404310 }, { "epoch": 0.8167519806720347, "grad_norm": 88.13058471679688, "learning_rate": 1.0746042100821485e-06, "loss": 18.9458, "step": 404320 }, { "epoch": 0.8167721813047185, "grad_norm": 33.982276916503906, "learning_rate": 1.0743880098509802e-06, "loss": 8.373, "step": 404330 }, { "epoch": 0.8167923819374023, "grad_norm": 299.37164306640625, "learning_rate": 1.074171828752769e-06, "loss": 17.485, "step": 404340 }, { "epoch": 0.816812582570086, "grad_norm": 255.22340393066406, "learning_rate": 1.0739556667885692e-06, "loss": 22.2629, "step": 404350 }, { "epoch": 0.8168327832027699, "grad_norm": 254.97079467773438, "learning_rate": 1.0737395239594318e-06, "loss": 14.8817, "step": 404360 }, { "epoch": 0.8168529838354537, "grad_norm": 308.0880126953125, "learning_rate": 1.0735234002664123e-06, "loss": 15.9804, "step": 404370 }, { "epoch": 0.8168731844681375, "grad_norm": 479.8957214355469, "learning_rate": 1.0733072957105633e-06, "loss": 16.146, "step": 404380 }, { "epoch": 0.8168933851008213, "grad_norm": 527.0227661132812, "learning_rate": 1.0730912102929392e-06, "loss": 18.3879, "step": 404390 }, { "epoch": 0.8169135857335051, "grad_norm": 153.64944458007812, "learning_rate": 1.0728751440145907e-06, "loss": 7.6426, "step": 404400 }, { "epoch": 0.816933786366189, "grad_norm": 197.2552947998047, "learning_rate": 1.0726590968765738e-06, "loss": 14.252, "step": 404410 }, { "epoch": 0.8169539869988728, "grad_norm": 0.0, "learning_rate": 1.0724430688799402e-06, "loss": 9.4771, "step": 404420 }, { "epoch": 0.8169741876315566, "grad_norm": 79.36580657958984, "learning_rate": 1.0722270600257411e-06, "loss": 24.0201, "step": 404430 }, { "epoch": 0.8169943882642404, "grad_norm": 751.60986328125, "learning_rate": 1.0720110703150327e-06, "loss": 27.1334, "step": 404440 }, { "epoch": 0.8170145888969242, "grad_norm": 297.7364807128906, "learning_rate": 1.0717950997488662e-06, "loss": 10.776, "step": 404450 }, { "epoch": 0.8170347895296081, "grad_norm": 31.027788162231445, "learning_rate": 1.0715791483282922e-06, "loss": 26.4153, "step": 404460 }, { "epoch": 0.8170549901622919, "grad_norm": 0.0, "learning_rate": 1.0713632160543647e-06, "loss": 11.7118, "step": 404470 }, { "epoch": 0.8170751907949757, "grad_norm": 464.6082763671875, "learning_rate": 1.0711473029281394e-06, "loss": 13.7547, "step": 404480 }, { "epoch": 0.8170953914276595, "grad_norm": 102.44761657714844, "learning_rate": 1.0709314089506634e-06, "loss": 16.2741, "step": 404490 }, { "epoch": 0.8171155920603433, "grad_norm": 325.610107421875, "learning_rate": 1.0707155341229902e-06, "loss": 16.493, "step": 404500 }, { "epoch": 0.8171357926930272, "grad_norm": 341.5953369140625, "learning_rate": 1.0704996784461753e-06, "loss": 31.7439, "step": 404510 }, { "epoch": 0.817155993325711, "grad_norm": 444.5843200683594, "learning_rate": 1.0702838419212674e-06, "loss": 18.4306, "step": 404520 }, { "epoch": 0.8171761939583948, "grad_norm": 341.1451721191406, "learning_rate": 1.0700680245493188e-06, "loss": 18.5624, "step": 404530 }, { "epoch": 0.8171963945910786, "grad_norm": 304.3035583496094, "learning_rate": 1.0698522263313816e-06, "loss": 9.2319, "step": 404540 }, { "epoch": 0.8172165952237624, "grad_norm": 247.0109100341797, "learning_rate": 1.0696364472685112e-06, "loss": 8.9845, "step": 404550 }, { "epoch": 0.8172367958564463, "grad_norm": 90.90008544921875, "learning_rate": 1.069420687361753e-06, "loss": 9.6228, "step": 404560 }, { "epoch": 0.8172569964891301, "grad_norm": 187.72535705566406, "learning_rate": 1.0692049466121613e-06, "loss": 12.4732, "step": 404570 }, { "epoch": 0.8172771971218139, "grad_norm": 285.7873229980469, "learning_rate": 1.0689892250207894e-06, "loss": 15.9385, "step": 404580 }, { "epoch": 0.8172973977544977, "grad_norm": 300.1120300292969, "learning_rate": 1.0687735225886874e-06, "loss": 22.6963, "step": 404590 }, { "epoch": 0.8173175983871814, "grad_norm": 552.6585693359375, "learning_rate": 1.0685578393169054e-06, "loss": 24.3308, "step": 404600 }, { "epoch": 0.8173377990198653, "grad_norm": 581.1524658203125, "learning_rate": 1.068342175206497e-06, "loss": 15.8675, "step": 404610 }, { "epoch": 0.8173579996525491, "grad_norm": 724.088134765625, "learning_rate": 1.0681265302585114e-06, "loss": 27.5447, "step": 404620 }, { "epoch": 0.8173782002852329, "grad_norm": 401.47296142578125, "learning_rate": 1.0679109044739994e-06, "loss": 11.82, "step": 404630 }, { "epoch": 0.8173984009179167, "grad_norm": 255.02484130859375, "learning_rate": 1.067695297854014e-06, "loss": 25.8225, "step": 404640 }, { "epoch": 0.8174186015506005, "grad_norm": 98.4615249633789, "learning_rate": 1.0674797103996033e-06, "loss": 9.6517, "step": 404650 }, { "epoch": 0.8174388021832844, "grad_norm": 304.14141845703125, "learning_rate": 1.0672641421118214e-06, "loss": 16.4447, "step": 404660 }, { "epoch": 0.8174590028159682, "grad_norm": 498.47491455078125, "learning_rate": 1.067048592991715e-06, "loss": 16.0436, "step": 404670 }, { "epoch": 0.817479203448652, "grad_norm": 165.97137451171875, "learning_rate": 1.0668330630403383e-06, "loss": 23.399, "step": 404680 }, { "epoch": 0.8174994040813358, "grad_norm": 498.13818359375, "learning_rate": 1.0666175522587402e-06, "loss": 13.2221, "step": 404690 }, { "epoch": 0.8175196047140196, "grad_norm": 299.44189453125, "learning_rate": 1.0664020606479702e-06, "loss": 18.4671, "step": 404700 }, { "epoch": 0.8175398053467035, "grad_norm": 197.46597290039062, "learning_rate": 1.0661865882090805e-06, "loss": 11.8291, "step": 404710 }, { "epoch": 0.8175600059793873, "grad_norm": 318.86773681640625, "learning_rate": 1.0659711349431184e-06, "loss": 14.8265, "step": 404720 }, { "epoch": 0.8175802066120711, "grad_norm": 189.82337951660156, "learning_rate": 1.0657557008511377e-06, "loss": 15.177, "step": 404730 }, { "epoch": 0.8176004072447549, "grad_norm": 196.31600952148438, "learning_rate": 1.0655402859341868e-06, "loss": 21.3637, "step": 404740 }, { "epoch": 0.8176206078774387, "grad_norm": 150.42042541503906, "learning_rate": 1.065324890193314e-06, "loss": 14.749, "step": 404750 }, { "epoch": 0.8176408085101226, "grad_norm": 175.3872833251953, "learning_rate": 1.0651095136295713e-06, "loss": 25.7236, "step": 404760 }, { "epoch": 0.8176610091428064, "grad_norm": 65.73326873779297, "learning_rate": 1.064894156244008e-06, "loss": 62.1478, "step": 404770 }, { "epoch": 0.8176812097754902, "grad_norm": 2.811586618423462, "learning_rate": 1.0646788180376716e-06, "loss": 16.7039, "step": 404780 }, { "epoch": 0.817701410408174, "grad_norm": 351.4166259765625, "learning_rate": 1.0644634990116132e-06, "loss": 15.4012, "step": 404790 }, { "epoch": 0.8177216110408578, "grad_norm": 188.87106323242188, "learning_rate": 1.064248199166884e-06, "loss": 19.9516, "step": 404800 }, { "epoch": 0.8177418116735417, "grad_norm": 267.4775695800781, "learning_rate": 1.0640329185045323e-06, "loss": 24.9504, "step": 404810 }, { "epoch": 0.8177620123062255, "grad_norm": 386.139404296875, "learning_rate": 1.0638176570256048e-06, "loss": 16.3425, "step": 404820 }, { "epoch": 0.8177822129389093, "grad_norm": 352.9129638671875, "learning_rate": 1.0636024147311524e-06, "loss": 27.4537, "step": 404830 }, { "epoch": 0.8178024135715931, "grad_norm": 216.7196807861328, "learning_rate": 1.0633871916222277e-06, "loss": 15.5567, "step": 404840 }, { "epoch": 0.8178226142042769, "grad_norm": 569.1050415039062, "learning_rate": 1.0631719876998736e-06, "loss": 14.0883, "step": 404850 }, { "epoch": 0.8178428148369606, "grad_norm": 597.375244140625, "learning_rate": 1.0629568029651416e-06, "loss": 9.0135, "step": 404860 }, { "epoch": 0.8178630154696445, "grad_norm": 280.1982421875, "learning_rate": 1.0627416374190818e-06, "loss": 18.0347, "step": 404870 }, { "epoch": 0.8178832161023283, "grad_norm": 320.2178955078125, "learning_rate": 1.062526491062742e-06, "loss": 14.1492, "step": 404880 }, { "epoch": 0.8179034167350121, "grad_norm": 276.9504699707031, "learning_rate": 1.0623113638971688e-06, "loss": 19.7827, "step": 404890 }, { "epoch": 0.8179236173676959, "grad_norm": 123.37667083740234, "learning_rate": 1.0620962559234144e-06, "loss": 7.1105, "step": 404900 }, { "epoch": 0.8179438180003797, "grad_norm": 516.1414794921875, "learning_rate": 1.0618811671425244e-06, "loss": 18.386, "step": 404910 }, { "epoch": 0.8179640186330636, "grad_norm": 237.60955810546875, "learning_rate": 1.0616660975555476e-06, "loss": 11.1755, "step": 404920 }, { "epoch": 0.8179842192657474, "grad_norm": 417.2143249511719, "learning_rate": 1.0614510471635332e-06, "loss": 20.1027, "step": 404930 }, { "epoch": 0.8180044198984312, "grad_norm": 32.330535888671875, "learning_rate": 1.0612360159675278e-06, "loss": 16.1956, "step": 404940 }, { "epoch": 0.818024620531115, "grad_norm": 355.2663269042969, "learning_rate": 1.0610210039685815e-06, "loss": 14.6692, "step": 404950 }, { "epoch": 0.8180448211637988, "grad_norm": 287.6656799316406, "learning_rate": 1.0608060111677409e-06, "loss": 17.8554, "step": 404960 }, { "epoch": 0.8180650217964827, "grad_norm": 334.52239990234375, "learning_rate": 1.0605910375660527e-06, "loss": 13.5917, "step": 404970 }, { "epoch": 0.8180852224291665, "grad_norm": 144.3714141845703, "learning_rate": 1.0603760831645677e-06, "loss": 20.0004, "step": 404980 }, { "epoch": 0.8181054230618503, "grad_norm": 57.82802200317383, "learning_rate": 1.0601611479643303e-06, "loss": 24.0335, "step": 404990 }, { "epoch": 0.8181256236945341, "grad_norm": 222.59144592285156, "learning_rate": 1.0599462319663906e-06, "loss": 13.4831, "step": 405000 }, { "epoch": 0.8181458243272179, "grad_norm": 288.19488525390625, "learning_rate": 1.0597313351717942e-06, "loss": 23.7148, "step": 405010 }, { "epoch": 0.8181660249599018, "grad_norm": 174.80062866210938, "learning_rate": 1.0595164575815909e-06, "loss": 20.6812, "step": 405020 }, { "epoch": 0.8181862255925856, "grad_norm": 538.6773071289062, "learning_rate": 1.0593015991968258e-06, "loss": 17.3669, "step": 405030 }, { "epoch": 0.8182064262252694, "grad_norm": 227.2083740234375, "learning_rate": 1.0590867600185462e-06, "loss": 11.5204, "step": 405040 }, { "epoch": 0.8182266268579532, "grad_norm": 218.55540466308594, "learning_rate": 1.0588719400478004e-06, "loss": 25.7913, "step": 405050 }, { "epoch": 0.818246827490637, "grad_norm": 16.992464065551758, "learning_rate": 1.0586571392856354e-06, "loss": 13.0807, "step": 405060 }, { "epoch": 0.8182670281233209, "grad_norm": 180.9469451904297, "learning_rate": 1.0584423577330955e-06, "loss": 14.0716, "step": 405070 }, { "epoch": 0.8182872287560047, "grad_norm": 207.26675415039062, "learning_rate": 1.0582275953912296e-06, "loss": 20.6319, "step": 405080 }, { "epoch": 0.8183074293886885, "grad_norm": 208.16290283203125, "learning_rate": 1.0580128522610872e-06, "loss": 8.9663, "step": 405090 }, { "epoch": 0.8183276300213723, "grad_norm": 202.23414611816406, "learning_rate": 1.0577981283437095e-06, "loss": 21.6791, "step": 405100 }, { "epoch": 0.818347830654056, "grad_norm": 321.67401123046875, "learning_rate": 1.0575834236401455e-06, "loss": 16.92, "step": 405110 }, { "epoch": 0.8183680312867398, "grad_norm": 169.3002471923828, "learning_rate": 1.057368738151443e-06, "loss": 16.8149, "step": 405120 }, { "epoch": 0.8183882319194237, "grad_norm": 369.4490051269531, "learning_rate": 1.0571540718786471e-06, "loss": 22.298, "step": 405130 }, { "epoch": 0.8184084325521075, "grad_norm": 91.614990234375, "learning_rate": 1.0569394248228026e-06, "loss": 19.3968, "step": 405140 }, { "epoch": 0.8184286331847913, "grad_norm": 172.7015838623047, "learning_rate": 1.0567247969849576e-06, "loss": 10.6505, "step": 405150 }, { "epoch": 0.8184488338174751, "grad_norm": 398.93194580078125, "learning_rate": 1.05651018836616e-06, "loss": 13.1286, "step": 405160 }, { "epoch": 0.818469034450159, "grad_norm": 55.3370361328125, "learning_rate": 1.0562955989674506e-06, "loss": 16.289, "step": 405170 }, { "epoch": 0.8184892350828428, "grad_norm": 284.4048156738281, "learning_rate": 1.0560810287898783e-06, "loss": 18.1475, "step": 405180 }, { "epoch": 0.8185094357155266, "grad_norm": 316.59710693359375, "learning_rate": 1.05586647783449e-06, "loss": 9.2838, "step": 405190 }, { "epoch": 0.8185296363482104, "grad_norm": 202.56716918945312, "learning_rate": 1.0556519461023301e-06, "loss": 10.5946, "step": 405200 }, { "epoch": 0.8185498369808942, "grad_norm": 158.3127899169922, "learning_rate": 1.0554374335944429e-06, "loss": 18.6313, "step": 405210 }, { "epoch": 0.818570037613578, "grad_norm": 23.87074089050293, "learning_rate": 1.055222940311877e-06, "loss": 13.1405, "step": 405220 }, { "epoch": 0.8185902382462619, "grad_norm": 383.5315856933594, "learning_rate": 1.0550084662556753e-06, "loss": 16.4595, "step": 405230 }, { "epoch": 0.8186104388789457, "grad_norm": 543.0101928710938, "learning_rate": 1.0547940114268828e-06, "loss": 26.5574, "step": 405240 }, { "epoch": 0.8186306395116295, "grad_norm": 381.8157958984375, "learning_rate": 1.0545795758265476e-06, "loss": 18.2483, "step": 405250 }, { "epoch": 0.8186508401443133, "grad_norm": 249.52008056640625, "learning_rate": 1.0543651594557113e-06, "loss": 21.6867, "step": 405260 }, { "epoch": 0.8186710407769971, "grad_norm": 432.4031982421875, "learning_rate": 1.0541507623154218e-06, "loss": 15.7907, "step": 405270 }, { "epoch": 0.818691241409681, "grad_norm": 378.11309814453125, "learning_rate": 1.0539363844067218e-06, "loss": 12.8796, "step": 405280 }, { "epoch": 0.8187114420423648, "grad_norm": 325.0871276855469, "learning_rate": 1.053722025730659e-06, "loss": 21.1912, "step": 405290 }, { "epoch": 0.8187316426750486, "grad_norm": 259.6477966308594, "learning_rate": 1.053507686288276e-06, "loss": 11.0579, "step": 405300 }, { "epoch": 0.8187518433077324, "grad_norm": 169.36337280273438, "learning_rate": 1.0532933660806166e-06, "loss": 16.595, "step": 405310 }, { "epoch": 0.8187720439404162, "grad_norm": 275.9342041015625, "learning_rate": 1.053079065108728e-06, "loss": 19.0664, "step": 405320 }, { "epoch": 0.8187922445731001, "grad_norm": 199.3434295654297, "learning_rate": 1.0528647833736516e-06, "loss": 6.8053, "step": 405330 }, { "epoch": 0.8188124452057839, "grad_norm": 118.13033294677734, "learning_rate": 1.0526505208764353e-06, "loss": 12.4617, "step": 405340 }, { "epoch": 0.8188326458384677, "grad_norm": 0.0, "learning_rate": 1.052436277618122e-06, "loss": 15.4853, "step": 405350 }, { "epoch": 0.8188528464711515, "grad_norm": 516.2611694335938, "learning_rate": 1.0522220535997534e-06, "loss": 12.4549, "step": 405360 }, { "epoch": 0.8188730471038352, "grad_norm": 38.306034088134766, "learning_rate": 1.0520078488223772e-06, "loss": 19.3303, "step": 405370 }, { "epoch": 0.8188932477365191, "grad_norm": 39.11674118041992, "learning_rate": 1.0517936632870362e-06, "loss": 14.4616, "step": 405380 }, { "epoch": 0.8189134483692029, "grad_norm": 46.31529998779297, "learning_rate": 1.0515794969947724e-06, "loss": 11.945, "step": 405390 }, { "epoch": 0.8189336490018867, "grad_norm": 268.09832763671875, "learning_rate": 1.0513653499466315e-06, "loss": 12.7435, "step": 405400 }, { "epoch": 0.8189538496345705, "grad_norm": 352.0765686035156, "learning_rate": 1.0511512221436581e-06, "loss": 15.2097, "step": 405410 }, { "epoch": 0.8189740502672543, "grad_norm": 453.1484069824219, "learning_rate": 1.0509371135868945e-06, "loss": 16.1051, "step": 405420 }, { "epoch": 0.8189942508999382, "grad_norm": 540.0082397460938, "learning_rate": 1.0507230242773836e-06, "loss": 26.8939, "step": 405430 }, { "epoch": 0.819014451532622, "grad_norm": 351.03570556640625, "learning_rate": 1.0505089542161707e-06, "loss": 17.0254, "step": 405440 }, { "epoch": 0.8190346521653058, "grad_norm": 176.3683624267578, "learning_rate": 1.0502949034042985e-06, "loss": 19.0502, "step": 405450 }, { "epoch": 0.8190548527979896, "grad_norm": 11.410710334777832, "learning_rate": 1.050080871842808e-06, "loss": 12.3323, "step": 405460 }, { "epoch": 0.8190750534306734, "grad_norm": 206.86138916015625, "learning_rate": 1.0498668595327448e-06, "loss": 8.8435, "step": 405470 }, { "epoch": 0.8190952540633573, "grad_norm": 196.73512268066406, "learning_rate": 1.0496528664751527e-06, "loss": 10.0861, "step": 405480 }, { "epoch": 0.8191154546960411, "grad_norm": 231.3568878173828, "learning_rate": 1.049438892671073e-06, "loss": 8.7687, "step": 405490 }, { "epoch": 0.8191356553287249, "grad_norm": 245.58192443847656, "learning_rate": 1.049224938121548e-06, "loss": 10.7136, "step": 405500 }, { "epoch": 0.8191558559614087, "grad_norm": 172.83468627929688, "learning_rate": 1.049011002827623e-06, "loss": 30.2287, "step": 405510 }, { "epoch": 0.8191760565940925, "grad_norm": 572.02197265625, "learning_rate": 1.0487970867903385e-06, "loss": 15.2862, "step": 405520 }, { "epoch": 0.8191962572267764, "grad_norm": 488.36444091796875, "learning_rate": 1.0485831900107368e-06, "loss": 21.3126, "step": 405530 }, { "epoch": 0.8192164578594602, "grad_norm": 210.5561065673828, "learning_rate": 1.0483693124898631e-06, "loss": 19.44, "step": 405540 }, { "epoch": 0.819236658492144, "grad_norm": 248.6859130859375, "learning_rate": 1.0481554542287565e-06, "loss": 15.5073, "step": 405550 }, { "epoch": 0.8192568591248278, "grad_norm": 975.2809448242188, "learning_rate": 1.0479416152284622e-06, "loss": 22.2656, "step": 405560 }, { "epoch": 0.8192770597575116, "grad_norm": 40.22349548339844, "learning_rate": 1.0477277954900194e-06, "loss": 18.9801, "step": 405570 }, { "epoch": 0.8192972603901955, "grad_norm": 129.30970764160156, "learning_rate": 1.047513995014474e-06, "loss": 16.388, "step": 405580 }, { "epoch": 0.8193174610228793, "grad_norm": 82.52376556396484, "learning_rate": 1.0473002138028654e-06, "loss": 18.9236, "step": 405590 }, { "epoch": 0.8193376616555631, "grad_norm": 13.446331977844238, "learning_rate": 1.047086451856235e-06, "loss": 6.554, "step": 405600 }, { "epoch": 0.8193578622882469, "grad_norm": 321.9046936035156, "learning_rate": 1.0468727091756275e-06, "loss": 14.6288, "step": 405610 }, { "epoch": 0.8193780629209307, "grad_norm": 734.3206787109375, "learning_rate": 1.0466589857620813e-06, "loss": 18.6406, "step": 405620 }, { "epoch": 0.8193982635536144, "grad_norm": 272.13330078125, "learning_rate": 1.0464452816166416e-06, "loss": 16.74, "step": 405630 }, { "epoch": 0.8194184641862983, "grad_norm": 367.9081726074219, "learning_rate": 1.0462315967403475e-06, "loss": 21.8071, "step": 405640 }, { "epoch": 0.8194386648189821, "grad_norm": 396.5562438964844, "learning_rate": 1.0460179311342394e-06, "loss": 18.1446, "step": 405650 }, { "epoch": 0.8194588654516659, "grad_norm": 70.21578979492188, "learning_rate": 1.0458042847993627e-06, "loss": 26.0452, "step": 405660 }, { "epoch": 0.8194790660843497, "grad_norm": 385.47589111328125, "learning_rate": 1.0455906577367553e-06, "loss": 18.9814, "step": 405670 }, { "epoch": 0.8194992667170335, "grad_norm": 210.4528350830078, "learning_rate": 1.0453770499474585e-06, "loss": 20.9039, "step": 405680 }, { "epoch": 0.8195194673497174, "grad_norm": 257.91607666015625, "learning_rate": 1.0451634614325146e-06, "loss": 15.8119, "step": 405690 }, { "epoch": 0.8195396679824012, "grad_norm": 165.5497589111328, "learning_rate": 1.0449498921929669e-06, "loss": 12.0576, "step": 405700 }, { "epoch": 0.819559868615085, "grad_norm": 196.23355102539062, "learning_rate": 1.0447363422298507e-06, "loss": 16.7731, "step": 405710 }, { "epoch": 0.8195800692477688, "grad_norm": 64.97459411621094, "learning_rate": 1.0445228115442102e-06, "loss": 11.5191, "step": 405720 }, { "epoch": 0.8196002698804526, "grad_norm": 242.86541748046875, "learning_rate": 1.044309300137087e-06, "loss": 14.4687, "step": 405730 }, { "epoch": 0.8196204705131365, "grad_norm": 188.00233459472656, "learning_rate": 1.0440958080095204e-06, "loss": 9.5961, "step": 405740 }, { "epoch": 0.8196406711458203, "grad_norm": 426.6351013183594, "learning_rate": 1.04388233516255e-06, "loss": 19.2857, "step": 405750 }, { "epoch": 0.8196608717785041, "grad_norm": 130.6101837158203, "learning_rate": 1.0436688815972168e-06, "loss": 18.6505, "step": 405760 }, { "epoch": 0.8196810724111879, "grad_norm": 381.77593994140625, "learning_rate": 1.0434554473145646e-06, "loss": 16.5964, "step": 405770 }, { "epoch": 0.8197012730438717, "grad_norm": 409.5113220214844, "learning_rate": 1.0432420323156284e-06, "loss": 17.169, "step": 405780 }, { "epoch": 0.8197214736765556, "grad_norm": 114.75640869140625, "learning_rate": 1.0430286366014496e-06, "loss": 16.0052, "step": 405790 }, { "epoch": 0.8197416743092394, "grad_norm": 204.36944580078125, "learning_rate": 1.0428152601730718e-06, "loss": 24.8317, "step": 405800 }, { "epoch": 0.8197618749419232, "grad_norm": 232.23973083496094, "learning_rate": 1.0426019030315314e-06, "loss": 14.8413, "step": 405810 }, { "epoch": 0.819782075574607, "grad_norm": 440.1769104003906, "learning_rate": 1.0423885651778688e-06, "loss": 9.006, "step": 405820 }, { "epoch": 0.8198022762072908, "grad_norm": 486.2380065917969, "learning_rate": 1.0421752466131258e-06, "loss": 20.2553, "step": 405830 }, { "epoch": 0.8198224768399747, "grad_norm": 431.1197204589844, "learning_rate": 1.0419619473383402e-06, "loss": 25.2384, "step": 405840 }, { "epoch": 0.8198426774726585, "grad_norm": 292.935791015625, "learning_rate": 1.0417486673545508e-06, "loss": 14.2571, "step": 405850 }, { "epoch": 0.8198628781053423, "grad_norm": 549.5126953125, "learning_rate": 1.0415354066627993e-06, "loss": 11.8601, "step": 405860 }, { "epoch": 0.8198830787380261, "grad_norm": 103.15132141113281, "learning_rate": 1.041322165264123e-06, "loss": 20.5082, "step": 405870 }, { "epoch": 0.8199032793707098, "grad_norm": 311.195068359375, "learning_rate": 1.0411089431595639e-06, "loss": 25.7932, "step": 405880 }, { "epoch": 0.8199234800033937, "grad_norm": 359.44439697265625, "learning_rate": 1.040895740350158e-06, "loss": 20.9251, "step": 405890 }, { "epoch": 0.8199436806360775, "grad_norm": 246.91383361816406, "learning_rate": 1.0406825568369478e-06, "loss": 12.7891, "step": 405900 }, { "epoch": 0.8199638812687613, "grad_norm": 327.2317810058594, "learning_rate": 1.0404693926209702e-06, "loss": 16.194, "step": 405910 }, { "epoch": 0.8199840819014451, "grad_norm": 0.0, "learning_rate": 1.0402562477032635e-06, "loss": 18.7994, "step": 405920 }, { "epoch": 0.8200042825341289, "grad_norm": 466.1953125, "learning_rate": 1.0400431220848688e-06, "loss": 25.6737, "step": 405930 }, { "epoch": 0.8200244831668128, "grad_norm": 581.4534301757812, "learning_rate": 1.0398300157668222e-06, "loss": 21.6007, "step": 405940 }, { "epoch": 0.8200446837994966, "grad_norm": 207.38961791992188, "learning_rate": 1.0396169287501652e-06, "loss": 18.0352, "step": 405950 }, { "epoch": 0.8200648844321804, "grad_norm": 546.5402221679688, "learning_rate": 1.0394038610359352e-06, "loss": 19.603, "step": 405960 }, { "epoch": 0.8200850850648642, "grad_norm": 39.632686614990234, "learning_rate": 1.0391908126251688e-06, "loss": 14.3446, "step": 405970 }, { "epoch": 0.820105285697548, "grad_norm": 481.43212890625, "learning_rate": 1.0389777835189075e-06, "loss": 15.8607, "step": 405980 }, { "epoch": 0.8201254863302319, "grad_norm": 252.04769897460938, "learning_rate": 1.0387647737181877e-06, "loss": 14.8261, "step": 405990 }, { "epoch": 0.8201456869629157, "grad_norm": 416.5263671875, "learning_rate": 1.0385517832240472e-06, "loss": 17.7035, "step": 406000 }, { "epoch": 0.8201658875955995, "grad_norm": 363.0948791503906, "learning_rate": 1.0383388120375242e-06, "loss": 17.9783, "step": 406010 }, { "epoch": 0.8201860882282833, "grad_norm": 250.7672576904297, "learning_rate": 1.0381258601596594e-06, "loss": 11.0796, "step": 406020 }, { "epoch": 0.8202062888609671, "grad_norm": 313.7210388183594, "learning_rate": 1.0379129275914878e-06, "loss": 26.7361, "step": 406030 }, { "epoch": 0.820226489493651, "grad_norm": 267.91925048828125, "learning_rate": 1.037700014334047e-06, "loss": 20.0699, "step": 406040 }, { "epoch": 0.8202466901263348, "grad_norm": 140.6750946044922, "learning_rate": 1.0374871203883774e-06, "loss": 14.7636, "step": 406050 }, { "epoch": 0.8202668907590186, "grad_norm": 135.94540405273438, "learning_rate": 1.0372742457555151e-06, "loss": 9.9186, "step": 406060 }, { "epoch": 0.8202870913917024, "grad_norm": 382.0945129394531, "learning_rate": 1.0370613904364957e-06, "loss": 12.2888, "step": 406070 }, { "epoch": 0.8203072920243862, "grad_norm": 263.43212890625, "learning_rate": 1.0368485544323586e-06, "loss": 17.8339, "step": 406080 }, { "epoch": 0.8203274926570701, "grad_norm": 77.17182159423828, "learning_rate": 1.0366357377441427e-06, "loss": 16.6208, "step": 406090 }, { "epoch": 0.8203476932897539, "grad_norm": 189.69189453125, "learning_rate": 1.036422940372883e-06, "loss": 10.881, "step": 406100 }, { "epoch": 0.8203678939224377, "grad_norm": 93.88059997558594, "learning_rate": 1.0362101623196158e-06, "loss": 7.3551, "step": 406110 }, { "epoch": 0.8203880945551215, "grad_norm": 167.5835418701172, "learning_rate": 1.0359974035853814e-06, "loss": 15.4943, "step": 406120 }, { "epoch": 0.8204082951878053, "grad_norm": 259.94512939453125, "learning_rate": 1.0357846641712143e-06, "loss": 6.3389, "step": 406130 }, { "epoch": 0.820428495820489, "grad_norm": 311.25775146484375, "learning_rate": 1.0355719440781508e-06, "loss": 12.4914, "step": 406140 }, { "epoch": 0.8204486964531729, "grad_norm": 300.4091491699219, "learning_rate": 1.0353592433072302e-06, "loss": 17.7268, "step": 406150 }, { "epoch": 0.8204688970858567, "grad_norm": 243.65689086914062, "learning_rate": 1.035146561859487e-06, "loss": 17.0959, "step": 406160 }, { "epoch": 0.8204890977185405, "grad_norm": 60.447635650634766, "learning_rate": 1.0349338997359593e-06, "loss": 13.9472, "step": 406170 }, { "epoch": 0.8205092983512243, "grad_norm": 158.20603942871094, "learning_rate": 1.0347212569376814e-06, "loss": 15.3285, "step": 406180 }, { "epoch": 0.8205294989839081, "grad_norm": 85.56019592285156, "learning_rate": 1.0345086334656929e-06, "loss": 14.976, "step": 406190 }, { "epoch": 0.820549699616592, "grad_norm": 101.99317932128906, "learning_rate": 1.0342960293210281e-06, "loss": 20.2005, "step": 406200 }, { "epoch": 0.8205699002492758, "grad_norm": 165.2855224609375, "learning_rate": 1.034083444504722e-06, "loss": 12.558, "step": 406210 }, { "epoch": 0.8205901008819596, "grad_norm": 98.16161346435547, "learning_rate": 1.0338708790178136e-06, "loss": 23.3894, "step": 406220 }, { "epoch": 0.8206103015146434, "grad_norm": 116.78408813476562, "learning_rate": 1.0336583328613364e-06, "loss": 26.8875, "step": 406230 }, { "epoch": 0.8206305021473272, "grad_norm": 320.4471740722656, "learning_rate": 1.0334458060363289e-06, "loss": 19.3972, "step": 406240 }, { "epoch": 0.8206507027800111, "grad_norm": 132.66539001464844, "learning_rate": 1.0332332985438248e-06, "loss": 11.954, "step": 406250 }, { "epoch": 0.8206709034126949, "grad_norm": 295.5437316894531, "learning_rate": 1.03302081038486e-06, "loss": 13.2981, "step": 406260 }, { "epoch": 0.8206911040453787, "grad_norm": 264.42578125, "learning_rate": 1.032808341560471e-06, "loss": 30.938, "step": 406270 }, { "epoch": 0.8207113046780625, "grad_norm": 605.0203857421875, "learning_rate": 1.032595892071694e-06, "loss": 36.9434, "step": 406280 }, { "epoch": 0.8207315053107463, "grad_norm": 2.2533929347991943, "learning_rate": 1.0323834619195617e-06, "loss": 8.6835, "step": 406290 }, { "epoch": 0.8207517059434302, "grad_norm": 82.27595520019531, "learning_rate": 1.0321710511051108e-06, "loss": 7.2976, "step": 406300 }, { "epoch": 0.820771906576114, "grad_norm": 325.17864990234375, "learning_rate": 1.03195865962938e-06, "loss": 9.746, "step": 406310 }, { "epoch": 0.8207921072087978, "grad_norm": 186.6896209716797, "learning_rate": 1.0317462874933987e-06, "loss": 26.1346, "step": 406320 }, { "epoch": 0.8208123078414816, "grad_norm": 506.2678527832031, "learning_rate": 1.0315339346982044e-06, "loss": 30.4214, "step": 406330 }, { "epoch": 0.8208325084741654, "grad_norm": 135.5747833251953, "learning_rate": 1.0313216012448341e-06, "loss": 20.2887, "step": 406340 }, { "epoch": 0.8208527091068493, "grad_norm": 449.65594482421875, "learning_rate": 1.0311092871343209e-06, "loss": 24.1353, "step": 406350 }, { "epoch": 0.8208729097395331, "grad_norm": 361.7408142089844, "learning_rate": 1.0308969923676987e-06, "loss": 15.1523, "step": 406360 }, { "epoch": 0.8208931103722169, "grad_norm": 290.1417541503906, "learning_rate": 1.0306847169460028e-06, "loss": 20.1371, "step": 406370 }, { "epoch": 0.8209133110049007, "grad_norm": 372.1707763671875, "learning_rate": 1.0304724608702704e-06, "loss": 13.7624, "step": 406380 }, { "epoch": 0.8209335116375844, "grad_norm": 283.0932312011719, "learning_rate": 1.0302602241415316e-06, "loss": 14.5085, "step": 406390 }, { "epoch": 0.8209537122702683, "grad_norm": 124.39419555664062, "learning_rate": 1.0300480067608232e-06, "loss": 11.6285, "step": 406400 }, { "epoch": 0.8209739129029521, "grad_norm": 201.22998046875, "learning_rate": 1.0298358087291803e-06, "loss": 20.8803, "step": 406410 }, { "epoch": 0.8209941135356359, "grad_norm": 316.2378845214844, "learning_rate": 1.0296236300476359e-06, "loss": 12.4225, "step": 406420 }, { "epoch": 0.8210143141683197, "grad_norm": 414.67333984375, "learning_rate": 1.0294114707172236e-06, "loss": 16.0564, "step": 406430 }, { "epoch": 0.8210345148010035, "grad_norm": 5.444242477416992, "learning_rate": 1.0291993307389792e-06, "loss": 11.9964, "step": 406440 }, { "epoch": 0.8210547154336874, "grad_norm": 0.0, "learning_rate": 1.0289872101139359e-06, "loss": 14.2407, "step": 406450 }, { "epoch": 0.8210749160663712, "grad_norm": 204.42660522460938, "learning_rate": 1.0287751088431257e-06, "loss": 11.2703, "step": 406460 }, { "epoch": 0.821095116699055, "grad_norm": 291.5847473144531, "learning_rate": 1.028563026927586e-06, "loss": 12.7488, "step": 406470 }, { "epoch": 0.8211153173317388, "grad_norm": 292.3769226074219, "learning_rate": 1.0283509643683464e-06, "loss": 13.3311, "step": 406480 }, { "epoch": 0.8211355179644226, "grad_norm": 91.97225952148438, "learning_rate": 1.0281389211664439e-06, "loss": 14.5058, "step": 406490 }, { "epoch": 0.8211557185971065, "grad_norm": 112.354248046875, "learning_rate": 1.0279268973229089e-06, "loss": 14.8241, "step": 406500 }, { "epoch": 0.8211759192297903, "grad_norm": 32.42730712890625, "learning_rate": 1.0277148928387788e-06, "loss": 15.4836, "step": 406510 }, { "epoch": 0.8211961198624741, "grad_norm": 339.1059875488281, "learning_rate": 1.0275029077150838e-06, "loss": 23.0982, "step": 406520 }, { "epoch": 0.8212163204951579, "grad_norm": 275.5375671386719, "learning_rate": 1.0272909419528565e-06, "loss": 21.8713, "step": 406530 }, { "epoch": 0.8212365211278417, "grad_norm": 185.5475616455078, "learning_rate": 1.0270789955531329e-06, "loss": 13.8777, "step": 406540 }, { "epoch": 0.8212567217605256, "grad_norm": 420.8015441894531, "learning_rate": 1.026867068516943e-06, "loss": 22.9787, "step": 406550 }, { "epoch": 0.8212769223932094, "grad_norm": 523.9444580078125, "learning_rate": 1.0266551608453224e-06, "loss": 20.3071, "step": 406560 }, { "epoch": 0.8212971230258932, "grad_norm": 281.4036865234375, "learning_rate": 1.0264432725393026e-06, "loss": 26.7694, "step": 406570 }, { "epoch": 0.821317323658577, "grad_norm": 195.6807403564453, "learning_rate": 1.026231403599915e-06, "loss": 35.4312, "step": 406580 }, { "epoch": 0.8213375242912608, "grad_norm": 376.1221618652344, "learning_rate": 1.0260195540281948e-06, "loss": 26.3182, "step": 406590 }, { "epoch": 0.8213577249239447, "grad_norm": 92.48028564453125, "learning_rate": 1.0258077238251735e-06, "loss": 10.7195, "step": 406600 }, { "epoch": 0.8213779255566285, "grad_norm": 588.0762329101562, "learning_rate": 1.0255959129918825e-06, "loss": 25.7414, "step": 406610 }, { "epoch": 0.8213981261893123, "grad_norm": 235.24195861816406, "learning_rate": 1.0253841215293541e-06, "loss": 25.2879, "step": 406620 }, { "epoch": 0.8214183268219961, "grad_norm": 189.87583923339844, "learning_rate": 1.0251723494386234e-06, "loss": 21.6835, "step": 406630 }, { "epoch": 0.8214385274546799, "grad_norm": 440.2449035644531, "learning_rate": 1.0249605967207204e-06, "loss": 15.1699, "step": 406640 }, { "epoch": 0.8214587280873636, "grad_norm": 372.5279235839844, "learning_rate": 1.0247488633766756e-06, "loss": 20.7011, "step": 406650 }, { "epoch": 0.8214789287200475, "grad_norm": 258.9936218261719, "learning_rate": 1.0245371494075246e-06, "loss": 13.6428, "step": 406660 }, { "epoch": 0.8214991293527313, "grad_norm": 542.8057250976562, "learning_rate": 1.0243254548142973e-06, "loss": 27.5963, "step": 406670 }, { "epoch": 0.8215193299854151, "grad_norm": 384.0791015625, "learning_rate": 1.0241137795980239e-06, "loss": 28.0409, "step": 406680 }, { "epoch": 0.8215395306180989, "grad_norm": 223.52798461914062, "learning_rate": 1.023902123759738e-06, "loss": 22.3204, "step": 406690 }, { "epoch": 0.8215597312507827, "grad_norm": 348.0323791503906, "learning_rate": 1.0236904873004722e-06, "loss": 21.9819, "step": 406700 }, { "epoch": 0.8215799318834666, "grad_norm": 371.3511047363281, "learning_rate": 1.023478870221256e-06, "loss": 14.799, "step": 406710 }, { "epoch": 0.8216001325161504, "grad_norm": 529.8207397460938, "learning_rate": 1.0232672725231213e-06, "loss": 20.8331, "step": 406720 }, { "epoch": 0.8216203331488342, "grad_norm": 446.2799072265625, "learning_rate": 1.0230556942071002e-06, "loss": 24.3354, "step": 406730 }, { "epoch": 0.821640533781518, "grad_norm": 110.34098052978516, "learning_rate": 1.0228441352742236e-06, "loss": 10.6493, "step": 406740 }, { "epoch": 0.8216607344142018, "grad_norm": 404.6952209472656, "learning_rate": 1.0226325957255207e-06, "loss": 17.8287, "step": 406750 }, { "epoch": 0.8216809350468857, "grad_norm": 721.1470336914062, "learning_rate": 1.0224210755620257e-06, "loss": 28.3078, "step": 406760 }, { "epoch": 0.8217011356795695, "grad_norm": 368.6654052734375, "learning_rate": 1.0222095747847666e-06, "loss": 31.6188, "step": 406770 }, { "epoch": 0.8217213363122533, "grad_norm": 93.41463470458984, "learning_rate": 1.0219980933947772e-06, "loss": 22.5248, "step": 406780 }, { "epoch": 0.8217415369449371, "grad_norm": 224.04542541503906, "learning_rate": 1.0217866313930847e-06, "loss": 16.1668, "step": 406790 }, { "epoch": 0.8217617375776209, "grad_norm": 382.7378234863281, "learning_rate": 1.0215751887807228e-06, "loss": 22.4136, "step": 406800 }, { "epoch": 0.8217819382103048, "grad_norm": 1227.5050048828125, "learning_rate": 1.0213637655587216e-06, "loss": 34.2403, "step": 406810 }, { "epoch": 0.8218021388429886, "grad_norm": 106.53482818603516, "learning_rate": 1.0211523617281095e-06, "loss": 12.4821, "step": 406820 }, { "epoch": 0.8218223394756724, "grad_norm": 159.04653930664062, "learning_rate": 1.0209409772899192e-06, "loss": 16.0928, "step": 406830 }, { "epoch": 0.8218425401083562, "grad_norm": 64.48246765136719, "learning_rate": 1.0207296122451789e-06, "loss": 11.8857, "step": 406840 }, { "epoch": 0.82186274074104, "grad_norm": 0.0, "learning_rate": 1.020518266594921e-06, "loss": 11.6178, "step": 406850 }, { "epoch": 0.8218829413737239, "grad_norm": 81.94050598144531, "learning_rate": 1.0203069403401743e-06, "loss": 18.4746, "step": 406860 }, { "epoch": 0.8219031420064077, "grad_norm": 186.90463256835938, "learning_rate": 1.0200956334819678e-06, "loss": 21.7208, "step": 406870 }, { "epoch": 0.8219233426390915, "grad_norm": 14.004890441894531, "learning_rate": 1.0198843460213337e-06, "loss": 7.8485, "step": 406880 }, { "epoch": 0.8219435432717753, "grad_norm": 490.0527648925781, "learning_rate": 1.0196730779593006e-06, "loss": 20.1846, "step": 406890 }, { "epoch": 0.8219637439044591, "grad_norm": 327.397216796875, "learning_rate": 1.0194618292968972e-06, "loss": 20.3725, "step": 406900 }, { "epoch": 0.8219839445371429, "grad_norm": 135.89395141601562, "learning_rate": 1.0192506000351532e-06, "loss": 15.2423, "step": 406910 }, { "epoch": 0.8220041451698267, "grad_norm": 34.89638900756836, "learning_rate": 1.0190393901751023e-06, "loss": 16.8446, "step": 406920 }, { "epoch": 0.8220243458025105, "grad_norm": 231.89024353027344, "learning_rate": 1.0188281997177679e-06, "loss": 16.6565, "step": 406930 }, { "epoch": 0.8220445464351943, "grad_norm": 252.86410522460938, "learning_rate": 1.0186170286641816e-06, "loss": 16.6328, "step": 406940 }, { "epoch": 0.8220647470678781, "grad_norm": 228.35977172851562, "learning_rate": 1.0184058770153748e-06, "loss": 12.6928, "step": 406950 }, { "epoch": 0.822084947700562, "grad_norm": 210.2059326171875, "learning_rate": 1.0181947447723744e-06, "loss": 19.592, "step": 406960 }, { "epoch": 0.8221051483332458, "grad_norm": 142.0568389892578, "learning_rate": 1.017983631936209e-06, "loss": 14.8192, "step": 406970 }, { "epoch": 0.8221253489659296, "grad_norm": 1.1920547485351562, "learning_rate": 1.0177725385079084e-06, "loss": 7.5257, "step": 406980 }, { "epoch": 0.8221455495986134, "grad_norm": 299.2579345703125, "learning_rate": 1.017561464488504e-06, "loss": 15.0475, "step": 406990 }, { "epoch": 0.8221657502312972, "grad_norm": 153.19769287109375, "learning_rate": 1.0173504098790188e-06, "loss": 11.1207, "step": 407000 }, { "epoch": 0.822185950863981, "grad_norm": 272.5093078613281, "learning_rate": 1.0171393746804854e-06, "loss": 16.9803, "step": 407010 }, { "epoch": 0.8222061514966649, "grad_norm": 141.45648193359375, "learning_rate": 1.0169283588939326e-06, "loss": 16.3761, "step": 407020 }, { "epoch": 0.8222263521293487, "grad_norm": 320.8656311035156, "learning_rate": 1.016717362520388e-06, "loss": 20.8336, "step": 407030 }, { "epoch": 0.8222465527620325, "grad_norm": 67.12684631347656, "learning_rate": 1.0165063855608786e-06, "loss": 7.4821, "step": 407040 }, { "epoch": 0.8222667533947163, "grad_norm": 389.3594970703125, "learning_rate": 1.016295428016435e-06, "loss": 21.0399, "step": 407050 }, { "epoch": 0.8222869540274002, "grad_norm": 925.6519775390625, "learning_rate": 1.0160844898880845e-06, "loss": 27.5401, "step": 407060 }, { "epoch": 0.822307154660084, "grad_norm": 418.4648132324219, "learning_rate": 1.0158735711768542e-06, "loss": 21.5594, "step": 407070 }, { "epoch": 0.8223273552927678, "grad_norm": 161.351318359375, "learning_rate": 1.0156626718837737e-06, "loss": 27.5131, "step": 407080 }, { "epoch": 0.8223475559254516, "grad_norm": 322.439208984375, "learning_rate": 1.0154517920098682e-06, "loss": 11.7366, "step": 407090 }, { "epoch": 0.8223677565581354, "grad_norm": 96.33719635009766, "learning_rate": 1.0152409315561696e-06, "loss": 8.2074, "step": 407100 }, { "epoch": 0.8223879571908193, "grad_norm": 472.7923278808594, "learning_rate": 1.0150300905237015e-06, "loss": 30.4726, "step": 407110 }, { "epoch": 0.8224081578235031, "grad_norm": 164.49525451660156, "learning_rate": 1.014819268913495e-06, "loss": 17.063, "step": 407120 }, { "epoch": 0.8224283584561869, "grad_norm": 289.7359313964844, "learning_rate": 1.0146084667265766e-06, "loss": 12.2118, "step": 407130 }, { "epoch": 0.8224485590888707, "grad_norm": 293.6818542480469, "learning_rate": 1.0143976839639713e-06, "loss": 17.5861, "step": 407140 }, { "epoch": 0.8224687597215545, "grad_norm": 322.701904296875, "learning_rate": 1.0141869206267097e-06, "loss": 19.6596, "step": 407150 }, { "epoch": 0.8224889603542382, "grad_norm": 171.6469268798828, "learning_rate": 1.0139761767158158e-06, "loss": 17.5886, "step": 407160 }, { "epoch": 0.8225091609869221, "grad_norm": 1024.0159912109375, "learning_rate": 1.0137654522323204e-06, "loss": 24.0646, "step": 407170 }, { "epoch": 0.8225293616196059, "grad_norm": 162.4088897705078, "learning_rate": 1.0135547471772488e-06, "loss": 12.4852, "step": 407180 }, { "epoch": 0.8225495622522897, "grad_norm": 48.355018615722656, "learning_rate": 1.013344061551626e-06, "loss": 16.3851, "step": 407190 }, { "epoch": 0.8225697628849735, "grad_norm": 326.6407165527344, "learning_rate": 1.0131333953564825e-06, "loss": 9.7206, "step": 407200 }, { "epoch": 0.8225899635176573, "grad_norm": 309.07415771484375, "learning_rate": 1.0129227485928432e-06, "loss": 14.9623, "step": 407210 }, { "epoch": 0.8226101641503412, "grad_norm": 199.2472381591797, "learning_rate": 1.0127121212617335e-06, "loss": 9.283, "step": 407220 }, { "epoch": 0.822630364783025, "grad_norm": 306.81280517578125, "learning_rate": 1.0125015133641813e-06, "loss": 17.3083, "step": 407230 }, { "epoch": 0.8226505654157088, "grad_norm": 409.6065673828125, "learning_rate": 1.0122909249012148e-06, "loss": 17.3997, "step": 407240 }, { "epoch": 0.8226707660483926, "grad_norm": 140.45953369140625, "learning_rate": 1.0120803558738585e-06, "loss": 17.4695, "step": 407250 }, { "epoch": 0.8226909666810764, "grad_norm": 540.8089599609375, "learning_rate": 1.0118698062831372e-06, "loss": 23.8016, "step": 407260 }, { "epoch": 0.8227111673137603, "grad_norm": 158.61888122558594, "learning_rate": 1.0116592761300804e-06, "loss": 8.8799, "step": 407270 }, { "epoch": 0.8227313679464441, "grad_norm": 119.53913116455078, "learning_rate": 1.0114487654157123e-06, "loss": 21.4427, "step": 407280 }, { "epoch": 0.8227515685791279, "grad_norm": 336.2322082519531, "learning_rate": 1.0112382741410582e-06, "loss": 14.0677, "step": 407290 }, { "epoch": 0.8227717692118117, "grad_norm": 69.20723724365234, "learning_rate": 1.0110278023071445e-06, "loss": 10.4497, "step": 407300 }, { "epoch": 0.8227919698444955, "grad_norm": 149.98699951171875, "learning_rate": 1.0108173499149991e-06, "loss": 16.6218, "step": 407310 }, { "epoch": 0.8228121704771794, "grad_norm": 0.0, "learning_rate": 1.0106069169656464e-06, "loss": 9.6561, "step": 407320 }, { "epoch": 0.8228323711098632, "grad_norm": 250.79969787597656, "learning_rate": 1.0103965034601098e-06, "loss": 18.0521, "step": 407330 }, { "epoch": 0.822852571742547, "grad_norm": 458.0596923828125, "learning_rate": 1.0101861093994182e-06, "loss": 23.0668, "step": 407340 }, { "epoch": 0.8228727723752308, "grad_norm": 258.3382263183594, "learning_rate": 1.0099757347845957e-06, "loss": 14.3803, "step": 407350 }, { "epoch": 0.8228929730079146, "grad_norm": 238.36541748046875, "learning_rate": 1.0097653796166662e-06, "loss": 15.8854, "step": 407360 }, { "epoch": 0.8229131736405985, "grad_norm": 116.28070068359375, "learning_rate": 1.0095550438966578e-06, "loss": 14.8548, "step": 407370 }, { "epoch": 0.8229333742732823, "grad_norm": 497.35406494140625, "learning_rate": 1.009344727625593e-06, "loss": 22.6178, "step": 407380 }, { "epoch": 0.8229535749059661, "grad_norm": 126.77920532226562, "learning_rate": 1.0091344308044987e-06, "loss": 10.3349, "step": 407390 }, { "epoch": 0.8229737755386499, "grad_norm": 162.2659912109375, "learning_rate": 1.0089241534343986e-06, "loss": 13.3786, "step": 407400 }, { "epoch": 0.8229939761713337, "grad_norm": 194.38665771484375, "learning_rate": 1.008713895516319e-06, "loss": 19.005, "step": 407410 }, { "epoch": 0.8230141768040174, "grad_norm": 140.01242065429688, "learning_rate": 1.0085036570512836e-06, "loss": 14.6131, "step": 407420 }, { "epoch": 0.8230343774367013, "grad_norm": 427.372802734375, "learning_rate": 1.0082934380403159e-06, "loss": 27.7424, "step": 407430 }, { "epoch": 0.8230545780693851, "grad_norm": 202.61428833007812, "learning_rate": 1.0080832384844437e-06, "loss": 25.4659, "step": 407440 }, { "epoch": 0.8230747787020689, "grad_norm": 0.0, "learning_rate": 1.0078730583846879e-06, "loss": 11.7188, "step": 407450 }, { "epoch": 0.8230949793347527, "grad_norm": 301.8184814453125, "learning_rate": 1.0076628977420761e-06, "loss": 21.4619, "step": 407460 }, { "epoch": 0.8231151799674365, "grad_norm": 203.44117736816406, "learning_rate": 1.0074527565576308e-06, "loss": 7.5409, "step": 407470 }, { "epoch": 0.8231353806001204, "grad_norm": 169.74037170410156, "learning_rate": 1.0072426348323754e-06, "loss": 13.4888, "step": 407480 }, { "epoch": 0.8231555812328042, "grad_norm": 180.08114624023438, "learning_rate": 1.0070325325673364e-06, "loss": 28.8851, "step": 407490 }, { "epoch": 0.823175781865488, "grad_norm": 289.3807678222656, "learning_rate": 1.006822449763537e-06, "loss": 23.3163, "step": 407500 }, { "epoch": 0.8231959824981718, "grad_norm": 353.6951599121094, "learning_rate": 1.006612386421999e-06, "loss": 10.3057, "step": 407510 }, { "epoch": 0.8232161831308556, "grad_norm": 139.89080810546875, "learning_rate": 1.006402342543748e-06, "loss": 11.2781, "step": 407520 }, { "epoch": 0.8232363837635395, "grad_norm": 324.4170227050781, "learning_rate": 1.00619231812981e-06, "loss": 11.6467, "step": 407530 }, { "epoch": 0.8232565843962233, "grad_norm": 120.69601440429688, "learning_rate": 1.0059823131812035e-06, "loss": 11.9026, "step": 407540 }, { "epoch": 0.8232767850289071, "grad_norm": 138.65576171875, "learning_rate": 1.0057723276989551e-06, "loss": 15.731, "step": 407550 }, { "epoch": 0.8232969856615909, "grad_norm": 164.2235870361328, "learning_rate": 1.0055623616840893e-06, "loss": 12.3652, "step": 407560 }, { "epoch": 0.8233171862942747, "grad_norm": 117.944580078125, "learning_rate": 1.0053524151376283e-06, "loss": 8.9745, "step": 407570 }, { "epoch": 0.8233373869269586, "grad_norm": 994.2530517578125, "learning_rate": 1.005142488060593e-06, "loss": 29.1113, "step": 407580 }, { "epoch": 0.8233575875596424, "grad_norm": 259.1741638183594, "learning_rate": 1.0049325804540094e-06, "loss": 20.4269, "step": 407590 }, { "epoch": 0.8233777881923262, "grad_norm": 348.46185302734375, "learning_rate": 1.0047226923189024e-06, "loss": 12.0474, "step": 407600 }, { "epoch": 0.82339798882501, "grad_norm": 74.88739776611328, "learning_rate": 1.0045128236562895e-06, "loss": 21.1189, "step": 407610 }, { "epoch": 0.8234181894576938, "grad_norm": 439.6065673828125, "learning_rate": 1.0043029744671967e-06, "loss": 13.5525, "step": 407620 }, { "epoch": 0.8234383900903777, "grad_norm": 168.26136779785156, "learning_rate": 1.004093144752648e-06, "loss": 11.9214, "step": 407630 }, { "epoch": 0.8234585907230615, "grad_norm": 197.80319213867188, "learning_rate": 1.0038833345136644e-06, "loss": 10.2141, "step": 407640 }, { "epoch": 0.8234787913557453, "grad_norm": 711.656982421875, "learning_rate": 1.003673543751268e-06, "loss": 13.1826, "step": 407650 }, { "epoch": 0.8234989919884291, "grad_norm": 230.8804931640625, "learning_rate": 1.0034637724664832e-06, "loss": 12.6996, "step": 407660 }, { "epoch": 0.8235191926211128, "grad_norm": 40.745399475097656, "learning_rate": 1.0032540206603309e-06, "loss": 20.1811, "step": 407670 }, { "epoch": 0.8235393932537967, "grad_norm": 400.2413330078125, "learning_rate": 1.0030442883338325e-06, "loss": 20.5742, "step": 407680 }, { "epoch": 0.8235595938864805, "grad_norm": 322.8829040527344, "learning_rate": 1.0028345754880114e-06, "loss": 27.9959, "step": 407690 }, { "epoch": 0.8235797945191643, "grad_norm": 147.51536560058594, "learning_rate": 1.0026248821238915e-06, "loss": 19.1854, "step": 407700 }, { "epoch": 0.8235999951518481, "grad_norm": 379.37060546875, "learning_rate": 1.0024152082424926e-06, "loss": 8.6571, "step": 407710 }, { "epoch": 0.8236201957845319, "grad_norm": 289.9367370605469, "learning_rate": 1.002205553844836e-06, "loss": 29.0539, "step": 407720 }, { "epoch": 0.8236403964172158, "grad_norm": 263.45465087890625, "learning_rate": 1.0019959189319462e-06, "loss": 15.0695, "step": 407730 }, { "epoch": 0.8236605970498996, "grad_norm": 322.9071044921875, "learning_rate": 1.0017863035048431e-06, "loss": 16.7116, "step": 407740 }, { "epoch": 0.8236807976825834, "grad_norm": 160.8217010498047, "learning_rate": 1.0015767075645472e-06, "loss": 26.3215, "step": 407750 }, { "epoch": 0.8237009983152672, "grad_norm": 5.981611251831055, "learning_rate": 1.0013671311120832e-06, "loss": 9.3314, "step": 407760 }, { "epoch": 0.823721198947951, "grad_norm": 261.9791259765625, "learning_rate": 1.001157574148469e-06, "loss": 23.4171, "step": 407770 }, { "epoch": 0.8237413995806349, "grad_norm": 214.92404174804688, "learning_rate": 1.00094803667473e-06, "loss": 18.7316, "step": 407780 }, { "epoch": 0.8237616002133187, "grad_norm": 206.47471618652344, "learning_rate": 1.0007385186918844e-06, "loss": 7.8493, "step": 407790 }, { "epoch": 0.8237818008460025, "grad_norm": 175.4320526123047, "learning_rate": 1.0005290202009533e-06, "loss": 10.7734, "step": 407800 }, { "epoch": 0.8238020014786863, "grad_norm": 175.28323364257812, "learning_rate": 1.00031954120296e-06, "loss": 15.3756, "step": 407810 }, { "epoch": 0.8238222021113701, "grad_norm": 379.42779541015625, "learning_rate": 1.000110081698924e-06, "loss": 30.8592, "step": 407820 }, { "epoch": 0.823842402744054, "grad_norm": 492.3541259765625, "learning_rate": 9.99900641689865e-07, "loss": 14.671, "step": 407830 }, { "epoch": 0.8238626033767378, "grad_norm": 470.49212646484375, "learning_rate": 9.99691221176805e-07, "loss": 12.4986, "step": 407840 }, { "epoch": 0.8238828040094216, "grad_norm": 20.245018005371094, "learning_rate": 9.994818201607665e-07, "loss": 26.8585, "step": 407850 }, { "epoch": 0.8239030046421054, "grad_norm": 331.5124206542969, "learning_rate": 9.992724386427676e-07, "loss": 24.2796, "step": 407860 }, { "epoch": 0.8239232052747892, "grad_norm": 291.6085510253906, "learning_rate": 9.990630766238292e-07, "loss": 12.0007, "step": 407870 }, { "epoch": 0.8239434059074731, "grad_norm": 256.7472229003906, "learning_rate": 9.988537341049732e-07, "loss": 11.3345, "step": 407880 }, { "epoch": 0.8239636065401569, "grad_norm": 474.2231140136719, "learning_rate": 9.986444110872185e-07, "loss": 13.0152, "step": 407890 }, { "epoch": 0.8239838071728407, "grad_norm": 123.5870132446289, "learning_rate": 9.984351075715848e-07, "loss": 10.0065, "step": 407900 }, { "epoch": 0.8240040078055245, "grad_norm": 641.5662841796875, "learning_rate": 9.982258235590926e-07, "loss": 24.2908, "step": 407910 }, { "epoch": 0.8240242084382083, "grad_norm": 260.7022399902344, "learning_rate": 9.98016559050765e-07, "loss": 28.061, "step": 407920 }, { "epoch": 0.824044409070892, "grad_norm": 92.6684341430664, "learning_rate": 9.978073140476169e-07, "loss": 12.0006, "step": 407930 }, { "epoch": 0.8240646097035759, "grad_norm": 210.86251831054688, "learning_rate": 9.975980885506708e-07, "loss": 8.3693, "step": 407940 }, { "epoch": 0.8240848103362597, "grad_norm": 117.0738296508789, "learning_rate": 9.973888825609474e-07, "loss": 21.332, "step": 407950 }, { "epoch": 0.8241050109689435, "grad_norm": 367.8218994140625, "learning_rate": 9.971796960794644e-07, "loss": 20.9376, "step": 407960 }, { "epoch": 0.8241252116016273, "grad_norm": 181.21823120117188, "learning_rate": 9.969705291072419e-07, "loss": 29.6716, "step": 407970 }, { "epoch": 0.8241454122343111, "grad_norm": 0.0, "learning_rate": 9.967613816452997e-07, "loss": 17.3003, "step": 407980 }, { "epoch": 0.824165612866995, "grad_norm": 374.946044921875, "learning_rate": 9.965522536946564e-07, "loss": 17.4248, "step": 407990 }, { "epoch": 0.8241858134996788, "grad_norm": 295.0700378417969, "learning_rate": 9.963431452563331e-07, "loss": 19.8733, "step": 408000 }, { "epoch": 0.8242060141323626, "grad_norm": 262.47528076171875, "learning_rate": 9.96134056331346e-07, "loss": 15.4893, "step": 408010 }, { "epoch": 0.8242262147650464, "grad_norm": 249.3992462158203, "learning_rate": 9.959249869207177e-07, "loss": 17.0903, "step": 408020 }, { "epoch": 0.8242464153977302, "grad_norm": 349.7080383300781, "learning_rate": 9.957159370254654e-07, "loss": 18.8234, "step": 408030 }, { "epoch": 0.8242666160304141, "grad_norm": 195.68194580078125, "learning_rate": 9.95506906646606e-07, "loss": 14.0454, "step": 408040 }, { "epoch": 0.8242868166630979, "grad_norm": 169.67481994628906, "learning_rate": 9.952978957851622e-07, "loss": 15.2308, "step": 408050 }, { "epoch": 0.8243070172957817, "grad_norm": 404.4700622558594, "learning_rate": 9.95088904442149e-07, "loss": 10.8896, "step": 408060 }, { "epoch": 0.8243272179284655, "grad_norm": 229.98309326171875, "learning_rate": 9.948799326185886e-07, "loss": 28.4919, "step": 408070 }, { "epoch": 0.8243474185611493, "grad_norm": 182.05044555664062, "learning_rate": 9.946709803154975e-07, "loss": 13.3742, "step": 408080 }, { "epoch": 0.8243676191938332, "grad_norm": 59.530242919921875, "learning_rate": 9.94462047533893e-07, "loss": 17.968, "step": 408090 }, { "epoch": 0.824387819826517, "grad_norm": 172.33163452148438, "learning_rate": 9.942531342747953e-07, "loss": 15.0103, "step": 408100 }, { "epoch": 0.8244080204592008, "grad_norm": 215.0967254638672, "learning_rate": 9.940442405392226e-07, "loss": 11.611, "step": 408110 }, { "epoch": 0.8244282210918846, "grad_norm": 173.8942108154297, "learning_rate": 9.938353663281908e-07, "loss": 14.3809, "step": 408120 }, { "epoch": 0.8244484217245684, "grad_norm": 268.28155517578125, "learning_rate": 9.936265116427195e-07, "loss": 16.8704, "step": 408130 }, { "epoch": 0.8244686223572523, "grad_norm": 78.1587142944336, "learning_rate": 9.9341767648383e-07, "loss": 17.7647, "step": 408140 }, { "epoch": 0.8244888229899361, "grad_norm": 355.10687255859375, "learning_rate": 9.932088608525336e-07, "loss": 15.9907, "step": 408150 }, { "epoch": 0.8245090236226199, "grad_norm": 341.6861267089844, "learning_rate": 9.93000064749851e-07, "loss": 20.717, "step": 408160 }, { "epoch": 0.8245292242553037, "grad_norm": 74.31090545654297, "learning_rate": 9.92791288176802e-07, "loss": 7.203, "step": 408170 }, { "epoch": 0.8245494248879874, "grad_norm": 233.4920196533203, "learning_rate": 9.925825311344018e-07, "loss": 13.5278, "step": 408180 }, { "epoch": 0.8245696255206713, "grad_norm": 579.834716796875, "learning_rate": 9.923737936236671e-07, "loss": 26.7775, "step": 408190 }, { "epoch": 0.8245898261533551, "grad_norm": 744.987060546875, "learning_rate": 9.921650756456164e-07, "loss": 28.6856, "step": 408200 }, { "epoch": 0.8246100267860389, "grad_norm": 224.882568359375, "learning_rate": 9.919563772012697e-07, "loss": 16.2234, "step": 408210 }, { "epoch": 0.8246302274187227, "grad_norm": 383.8169250488281, "learning_rate": 9.917476982916391e-07, "loss": 16.6583, "step": 408220 }, { "epoch": 0.8246504280514065, "grad_norm": 17.38800048828125, "learning_rate": 9.915390389177438e-07, "loss": 25.0056, "step": 408230 }, { "epoch": 0.8246706286840904, "grad_norm": 2515.052490234375, "learning_rate": 9.913303990806028e-07, "loss": 20.2544, "step": 408240 }, { "epoch": 0.8246908293167742, "grad_norm": 318.1338806152344, "learning_rate": 9.911217787812305e-07, "loss": 15.5269, "step": 408250 }, { "epoch": 0.824711029949458, "grad_norm": 120.08428192138672, "learning_rate": 9.909131780206437e-07, "loss": 12.8349, "step": 408260 }, { "epoch": 0.8247312305821418, "grad_norm": 195.03048706054688, "learning_rate": 9.907045967998613e-07, "loss": 8.0315, "step": 408270 }, { "epoch": 0.8247514312148256, "grad_norm": 136.87171936035156, "learning_rate": 9.904960351198977e-07, "loss": 7.4457, "step": 408280 }, { "epoch": 0.8247716318475095, "grad_norm": 596.9129028320312, "learning_rate": 9.902874929817696e-07, "loss": 18.4587, "step": 408290 }, { "epoch": 0.8247918324801933, "grad_norm": 192.48336791992188, "learning_rate": 9.900789703864933e-07, "loss": 24.8108, "step": 408300 }, { "epoch": 0.8248120331128771, "grad_norm": 206.43690490722656, "learning_rate": 9.898704673350872e-07, "loss": 16.0706, "step": 408310 }, { "epoch": 0.8248322337455609, "grad_norm": 313.2366027832031, "learning_rate": 9.896619838285664e-07, "loss": 16.5623, "step": 408320 }, { "epoch": 0.8248524343782447, "grad_norm": 190.9024200439453, "learning_rate": 9.89453519867945e-07, "loss": 16.7583, "step": 408330 }, { "epoch": 0.8248726350109286, "grad_norm": 182.9867706298828, "learning_rate": 9.892450754542427e-07, "loss": 22.8096, "step": 408340 }, { "epoch": 0.8248928356436124, "grad_norm": 424.4227294921875, "learning_rate": 9.890366505884725e-07, "loss": 15.895, "step": 408350 }, { "epoch": 0.8249130362762962, "grad_norm": 169.8470001220703, "learning_rate": 9.888282452716507e-07, "loss": 23.8239, "step": 408360 }, { "epoch": 0.82493323690898, "grad_norm": 288.3057861328125, "learning_rate": 9.88619859504794e-07, "loss": 25.9847, "step": 408370 }, { "epoch": 0.8249534375416638, "grad_norm": 275.2030029296875, "learning_rate": 9.884114932889172e-07, "loss": 16.5474, "step": 408380 }, { "epoch": 0.8249736381743477, "grad_norm": 115.44715881347656, "learning_rate": 9.88203146625037e-07, "loss": 22.93, "step": 408390 }, { "epoch": 0.8249938388070315, "grad_norm": 256.2182312011719, "learning_rate": 9.879948195141681e-07, "loss": 16.1844, "step": 408400 }, { "epoch": 0.8250140394397153, "grad_norm": 421.4730529785156, "learning_rate": 9.877865119573249e-07, "loss": 24.4897, "step": 408410 }, { "epoch": 0.8250342400723991, "grad_norm": 707.7069091796875, "learning_rate": 9.87578223955525e-07, "loss": 24.104, "step": 408420 }, { "epoch": 0.8250544407050829, "grad_norm": 48.01713180541992, "learning_rate": 9.873699555097815e-07, "loss": 18.2168, "step": 408430 }, { "epoch": 0.8250746413377666, "grad_norm": 2266.85498046875, "learning_rate": 9.871617066211092e-07, "loss": 16.6405, "step": 408440 }, { "epoch": 0.8250948419704505, "grad_norm": 255.90672302246094, "learning_rate": 9.869534772905242e-07, "loss": 7.4002, "step": 408450 }, { "epoch": 0.8251150426031343, "grad_norm": 458.92352294921875, "learning_rate": 9.867452675190425e-07, "loss": 11.6007, "step": 408460 }, { "epoch": 0.8251352432358181, "grad_norm": 506.4468078613281, "learning_rate": 9.86537077307677e-07, "loss": 21.9979, "step": 408470 }, { "epoch": 0.8251554438685019, "grad_norm": 355.1929626464844, "learning_rate": 9.863289066574426e-07, "loss": 17.9224, "step": 408480 }, { "epoch": 0.8251756445011857, "grad_norm": 155.5542449951172, "learning_rate": 9.861207555693552e-07, "loss": 11.7626, "step": 408490 }, { "epoch": 0.8251958451338696, "grad_norm": 161.9386749267578, "learning_rate": 9.859126240444284e-07, "loss": 18.3051, "step": 408500 }, { "epoch": 0.8252160457665534, "grad_norm": 110.46293640136719, "learning_rate": 9.857045120836756e-07, "loss": 20.0706, "step": 408510 }, { "epoch": 0.8252362463992372, "grad_norm": 179.5891876220703, "learning_rate": 9.854964196881117e-07, "loss": 22.5995, "step": 408520 }, { "epoch": 0.825256447031921, "grad_norm": 440.28057861328125, "learning_rate": 9.852883468587544e-07, "loss": 25.3377, "step": 408530 }, { "epoch": 0.8252766476646048, "grad_norm": 266.16900634765625, "learning_rate": 9.85080293596612e-07, "loss": 13.79, "step": 408540 }, { "epoch": 0.8252968482972887, "grad_norm": 71.6457748413086, "learning_rate": 9.848722599027012e-07, "loss": 14.1537, "step": 408550 }, { "epoch": 0.8253170489299725, "grad_norm": 421.3498840332031, "learning_rate": 9.84664245778037e-07, "loss": 24.2899, "step": 408560 }, { "epoch": 0.8253372495626563, "grad_norm": 172.49050903320312, "learning_rate": 9.844562512236327e-07, "loss": 9.5031, "step": 408570 }, { "epoch": 0.8253574501953401, "grad_norm": 315.3858642578125, "learning_rate": 9.842482762405004e-07, "loss": 25.3143, "step": 408580 }, { "epoch": 0.825377650828024, "grad_norm": 375.4794921875, "learning_rate": 9.840403208296556e-07, "loss": 21.4724, "step": 408590 }, { "epoch": 0.8253978514607078, "grad_norm": 366.5629577636719, "learning_rate": 9.838323849921123e-07, "loss": 12.8621, "step": 408600 }, { "epoch": 0.8254180520933916, "grad_norm": 288.332275390625, "learning_rate": 9.836244687288803e-07, "loss": 14.1053, "step": 408610 }, { "epoch": 0.8254382527260754, "grad_norm": 425.09295654296875, "learning_rate": 9.834165720409767e-07, "loss": 12.3351, "step": 408620 }, { "epoch": 0.8254584533587592, "grad_norm": 577.1239624023438, "learning_rate": 9.83208694929414e-07, "loss": 17.2466, "step": 408630 }, { "epoch": 0.825478653991443, "grad_norm": 725.6005859375, "learning_rate": 9.830008373952054e-07, "loss": 22.021, "step": 408640 }, { "epoch": 0.8254988546241269, "grad_norm": 144.6629638671875, "learning_rate": 9.82792999439362e-07, "loss": 19.5045, "step": 408650 }, { "epoch": 0.8255190552568107, "grad_norm": 390.7684631347656, "learning_rate": 9.825851810628995e-07, "loss": 19.5706, "step": 408660 }, { "epoch": 0.8255392558894945, "grad_norm": 55.422325134277344, "learning_rate": 9.823773822668298e-07, "loss": 27.9582, "step": 408670 }, { "epoch": 0.8255594565221783, "grad_norm": 117.72154998779297, "learning_rate": 9.821696030521644e-07, "loss": 15.7702, "step": 408680 }, { "epoch": 0.8255796571548621, "grad_norm": 273.53399658203125, "learning_rate": 9.81961843419918e-07, "loss": 25.2647, "step": 408690 }, { "epoch": 0.8255998577875459, "grad_norm": 156.60174560546875, "learning_rate": 9.81754103371101e-07, "loss": 16.057, "step": 408700 }, { "epoch": 0.8256200584202297, "grad_norm": 180.11094665527344, "learning_rate": 9.815463829067284e-07, "loss": 8.8479, "step": 408710 }, { "epoch": 0.8256402590529135, "grad_norm": 197.033447265625, "learning_rate": 9.813386820278114e-07, "loss": 23.1811, "step": 408720 }, { "epoch": 0.8256604596855973, "grad_norm": 47.270938873291016, "learning_rate": 9.811310007353608e-07, "loss": 17.8826, "step": 408730 }, { "epoch": 0.8256806603182811, "grad_norm": 389.3727111816406, "learning_rate": 9.809233390303901e-07, "loss": 24.5249, "step": 408740 }, { "epoch": 0.825700860950965, "grad_norm": 270.75390625, "learning_rate": 9.807156969139136e-07, "loss": 10.3027, "step": 408750 }, { "epoch": 0.8257210615836488, "grad_norm": 19.021453857421875, "learning_rate": 9.805080743869406e-07, "loss": 13.2405, "step": 408760 }, { "epoch": 0.8257412622163326, "grad_norm": 262.58367919921875, "learning_rate": 9.803004714504827e-07, "loss": 38.7582, "step": 408770 }, { "epoch": 0.8257614628490164, "grad_norm": 57.16415023803711, "learning_rate": 9.800928881055543e-07, "loss": 33.4537, "step": 408780 }, { "epoch": 0.8257816634817002, "grad_norm": 123.43624114990234, "learning_rate": 9.798853243531654e-07, "loss": 14.4363, "step": 408790 }, { "epoch": 0.825801864114384, "grad_norm": 311.95037841796875, "learning_rate": 9.79677780194327e-07, "loss": 28.1362, "step": 408800 }, { "epoch": 0.8258220647470679, "grad_norm": 334.7828369140625, "learning_rate": 9.794702556300505e-07, "loss": 30.3781, "step": 408810 }, { "epoch": 0.8258422653797517, "grad_norm": 532.325927734375, "learning_rate": 9.792627506613517e-07, "loss": 16.291, "step": 408820 }, { "epoch": 0.8258624660124355, "grad_norm": 105.81990814208984, "learning_rate": 9.79055265289236e-07, "loss": 9.5742, "step": 408830 }, { "epoch": 0.8258826666451193, "grad_norm": 379.5443420410156, "learning_rate": 9.788477995147173e-07, "loss": 28.9958, "step": 408840 }, { "epoch": 0.8259028672778032, "grad_norm": 256.0005798339844, "learning_rate": 9.786403533388072e-07, "loss": 14.4329, "step": 408850 }, { "epoch": 0.825923067910487, "grad_norm": 353.9476318359375, "learning_rate": 9.78432926762517e-07, "loss": 20.5935, "step": 408860 }, { "epoch": 0.8259432685431708, "grad_norm": 228.50607299804688, "learning_rate": 9.782255197868556e-07, "loss": 12.0759, "step": 408870 }, { "epoch": 0.8259634691758546, "grad_norm": 93.47823333740234, "learning_rate": 9.780181324128368e-07, "loss": 8.2096, "step": 408880 }, { "epoch": 0.8259836698085384, "grad_norm": 260.0494079589844, "learning_rate": 9.778107646414691e-07, "loss": 7.5039, "step": 408890 }, { "epoch": 0.8260038704412223, "grad_norm": 366.97686767578125, "learning_rate": 9.77603416473763e-07, "loss": 12.8172, "step": 408900 }, { "epoch": 0.8260240710739061, "grad_norm": 389.7434997558594, "learning_rate": 9.773960879107303e-07, "loss": 30.1656, "step": 408910 }, { "epoch": 0.8260442717065899, "grad_norm": 427.1439514160156, "learning_rate": 9.771887789533818e-07, "loss": 21.4487, "step": 408920 }, { "epoch": 0.8260644723392737, "grad_norm": 333.1944885253906, "learning_rate": 9.76981489602728e-07, "loss": 21.5627, "step": 408930 }, { "epoch": 0.8260846729719575, "grad_norm": 624.4745483398438, "learning_rate": 9.767742198597769e-07, "loss": 19.6745, "step": 408940 }, { "epoch": 0.8261048736046412, "grad_norm": 208.72796630859375, "learning_rate": 9.765669697255413e-07, "loss": 18.3344, "step": 408950 }, { "epoch": 0.8261250742373251, "grad_norm": 376.18109130859375, "learning_rate": 9.763597392010304e-07, "loss": 19.4158, "step": 408960 }, { "epoch": 0.8261452748700089, "grad_norm": 216.4193878173828, "learning_rate": 9.76152528287253e-07, "loss": 9.551, "step": 408970 }, { "epoch": 0.8261654755026927, "grad_norm": 88.88388061523438, "learning_rate": 9.759453369852213e-07, "loss": 12.9348, "step": 408980 }, { "epoch": 0.8261856761353765, "grad_norm": 161.16651916503906, "learning_rate": 9.75738165295943e-07, "loss": 12.9594, "step": 408990 }, { "epoch": 0.8262058767680603, "grad_norm": 889.0460205078125, "learning_rate": 9.7553101322043e-07, "loss": 30.4366, "step": 409000 }, { "epoch": 0.8262260774007442, "grad_norm": 137.49957275390625, "learning_rate": 9.753238807596903e-07, "loss": 15.4364, "step": 409010 }, { "epoch": 0.826246278033428, "grad_norm": 0.46797338128089905, "learning_rate": 9.751167679147328e-07, "loss": 13.6652, "step": 409020 }, { "epoch": 0.8262664786661118, "grad_norm": 214.5445556640625, "learning_rate": 9.749096746865695e-07, "loss": 19.3583, "step": 409030 }, { "epoch": 0.8262866792987956, "grad_norm": 91.67375946044922, "learning_rate": 9.747026010762084e-07, "loss": 10.6592, "step": 409040 }, { "epoch": 0.8263068799314794, "grad_norm": 374.8708801269531, "learning_rate": 9.744955470846567e-07, "loss": 19.4799, "step": 409050 }, { "epoch": 0.8263270805641633, "grad_norm": 459.388916015625, "learning_rate": 9.74288512712926e-07, "loss": 15.9328, "step": 409060 }, { "epoch": 0.8263472811968471, "grad_norm": 389.89447021484375, "learning_rate": 9.740814979620262e-07, "loss": 24.6895, "step": 409070 }, { "epoch": 0.8263674818295309, "grad_norm": 156.86831665039062, "learning_rate": 9.738745028329643e-07, "loss": 13.5755, "step": 409080 }, { "epoch": 0.8263876824622147, "grad_norm": 421.29986572265625, "learning_rate": 9.736675273267488e-07, "loss": 15.9551, "step": 409090 }, { "epoch": 0.8264078830948985, "grad_norm": 227.47747802734375, "learning_rate": 9.734605714443906e-07, "loss": 16.4324, "step": 409100 }, { "epoch": 0.8264280837275824, "grad_norm": 30.238361358642578, "learning_rate": 9.732536351868977e-07, "loss": 16.0615, "step": 409110 }, { "epoch": 0.8264482843602662, "grad_norm": 142.38270568847656, "learning_rate": 9.730467185552762e-07, "loss": 11.7185, "step": 409120 }, { "epoch": 0.82646848499295, "grad_norm": 78.54763793945312, "learning_rate": 9.728398215505369e-07, "loss": 30.091, "step": 409130 }, { "epoch": 0.8264886856256338, "grad_norm": 311.2696533203125, "learning_rate": 9.72632944173691e-07, "loss": 38.2034, "step": 409140 }, { "epoch": 0.8265088862583176, "grad_norm": 286.2693176269531, "learning_rate": 9.724260864257401e-07, "loss": 17.5118, "step": 409150 }, { "epoch": 0.8265290868910015, "grad_norm": 235.6892547607422, "learning_rate": 9.722192483076965e-07, "loss": 15.721, "step": 409160 }, { "epoch": 0.8265492875236853, "grad_norm": 534.564453125, "learning_rate": 9.720124298205692e-07, "loss": 17.9862, "step": 409170 }, { "epoch": 0.8265694881563691, "grad_norm": 172.3673095703125, "learning_rate": 9.718056309653646e-07, "loss": 27.6954, "step": 409180 }, { "epoch": 0.8265896887890529, "grad_norm": 394.4425964355469, "learning_rate": 9.715988517430896e-07, "loss": 18.196, "step": 409190 }, { "epoch": 0.8266098894217367, "grad_norm": 274.9172668457031, "learning_rate": 9.713920921547532e-07, "loss": 14.3221, "step": 409200 }, { "epoch": 0.8266300900544205, "grad_norm": 140.2721405029297, "learning_rate": 9.711853522013653e-07, "loss": 26.4491, "step": 409210 }, { "epoch": 0.8266502906871043, "grad_norm": 226.80250549316406, "learning_rate": 9.709786318839293e-07, "loss": 22.9387, "step": 409220 }, { "epoch": 0.8266704913197881, "grad_norm": 298.55963134765625, "learning_rate": 9.707719312034548e-07, "loss": 10.6125, "step": 409230 }, { "epoch": 0.8266906919524719, "grad_norm": 47.26167678833008, "learning_rate": 9.705652501609503e-07, "loss": 10.3865, "step": 409240 }, { "epoch": 0.8267108925851557, "grad_norm": 258.88616943359375, "learning_rate": 9.70358588757422e-07, "loss": 13.759, "step": 409250 }, { "epoch": 0.8267310932178396, "grad_norm": 102.4092788696289, "learning_rate": 9.701519469938759e-07, "loss": 17.7496, "step": 409260 }, { "epoch": 0.8267512938505234, "grad_norm": 42.92491149902344, "learning_rate": 9.699453248713215e-07, "loss": 11.6326, "step": 409270 }, { "epoch": 0.8267714944832072, "grad_norm": 88.5890121459961, "learning_rate": 9.69738722390765e-07, "loss": 18.7951, "step": 409280 }, { "epoch": 0.826791695115891, "grad_norm": 10.747011184692383, "learning_rate": 9.69532139553212e-07, "loss": 9.7803, "step": 409290 }, { "epoch": 0.8268118957485748, "grad_norm": 737.9528198242188, "learning_rate": 9.69325576359672e-07, "loss": 19.121, "step": 409300 }, { "epoch": 0.8268320963812587, "grad_norm": 408.0089111328125, "learning_rate": 9.691190328111488e-07, "loss": 16.844, "step": 409310 }, { "epoch": 0.8268522970139425, "grad_norm": 172.6240692138672, "learning_rate": 9.689125089086514e-07, "loss": 12.5224, "step": 409320 }, { "epoch": 0.8268724976466263, "grad_norm": 97.5411605834961, "learning_rate": 9.687060046531866e-07, "loss": 20.5884, "step": 409330 }, { "epoch": 0.8268926982793101, "grad_norm": 336.0673522949219, "learning_rate": 9.684995200457574e-07, "loss": 19.8194, "step": 409340 }, { "epoch": 0.8269128989119939, "grad_norm": 380.5863952636719, "learning_rate": 9.682930550873742e-07, "loss": 11.5593, "step": 409350 }, { "epoch": 0.8269330995446778, "grad_norm": 289.55792236328125, "learning_rate": 9.680866097790409e-07, "loss": 9.4181, "step": 409360 }, { "epoch": 0.8269533001773616, "grad_norm": 90.4924087524414, "learning_rate": 9.67880184121765e-07, "loss": 11.8581, "step": 409370 }, { "epoch": 0.8269735008100454, "grad_norm": 443.6423034667969, "learning_rate": 9.676737781165513e-07, "loss": 14.6567, "step": 409380 }, { "epoch": 0.8269937014427292, "grad_norm": 124.7345962524414, "learning_rate": 9.674673917644072e-07, "loss": 22.8964, "step": 409390 }, { "epoch": 0.827013902075413, "grad_norm": 246.6186065673828, "learning_rate": 9.67261025066339e-07, "loss": 13.1644, "step": 409400 }, { "epoch": 0.8270341027080969, "grad_norm": 318.2370300292969, "learning_rate": 9.670546780233493e-07, "loss": 17.1723, "step": 409410 }, { "epoch": 0.8270543033407807, "grad_norm": 172.4796142578125, "learning_rate": 9.668483506364462e-07, "loss": 10.9732, "step": 409420 }, { "epoch": 0.8270745039734645, "grad_norm": 317.2063903808594, "learning_rate": 9.666420429066376e-07, "loss": 15.3229, "step": 409430 }, { "epoch": 0.8270947046061483, "grad_norm": 357.3596496582031, "learning_rate": 9.66435754834924e-07, "loss": 17.9814, "step": 409440 }, { "epoch": 0.8271149052388321, "grad_norm": 230.9656219482422, "learning_rate": 9.662294864223132e-07, "loss": 24.3644, "step": 409450 }, { "epoch": 0.8271351058715158, "grad_norm": 131.02561950683594, "learning_rate": 9.66023237669812e-07, "loss": 15.0017, "step": 409460 }, { "epoch": 0.8271553065041997, "grad_norm": 216.1681671142578, "learning_rate": 9.658170085784242e-07, "loss": 18.2355, "step": 409470 }, { "epoch": 0.8271755071368835, "grad_norm": 403.6612243652344, "learning_rate": 9.656107991491536e-07, "loss": 20.2006, "step": 409480 }, { "epoch": 0.8271957077695673, "grad_norm": 115.29170227050781, "learning_rate": 9.654046093830084e-07, "loss": 18.1891, "step": 409490 }, { "epoch": 0.8272159084022511, "grad_norm": 121.4883804321289, "learning_rate": 9.651984392809916e-07, "loss": 7.6444, "step": 409500 }, { "epoch": 0.8272361090349349, "grad_norm": 167.8259735107422, "learning_rate": 9.649922888441065e-07, "loss": 19.5987, "step": 409510 }, { "epoch": 0.8272563096676188, "grad_norm": 203.21533203125, "learning_rate": 9.647861580733603e-07, "loss": 19.0789, "step": 409520 }, { "epoch": 0.8272765103003026, "grad_norm": 392.5436706542969, "learning_rate": 9.645800469697575e-07, "loss": 27.2304, "step": 409530 }, { "epoch": 0.8272967109329864, "grad_norm": 205.415771484375, "learning_rate": 9.64373955534303e-07, "loss": 20.3137, "step": 409540 }, { "epoch": 0.8273169115656702, "grad_norm": 317.9059753417969, "learning_rate": 9.641678837679985e-07, "loss": 28.7504, "step": 409550 }, { "epoch": 0.827337112198354, "grad_norm": 434.599853515625, "learning_rate": 9.639618316718519e-07, "loss": 15.9014, "step": 409560 }, { "epoch": 0.8273573128310379, "grad_norm": 388.0644226074219, "learning_rate": 9.637557992468655e-07, "loss": 16.2761, "step": 409570 }, { "epoch": 0.8273775134637217, "grad_norm": 165.4010772705078, "learning_rate": 9.635497864940425e-07, "loss": 15.4285, "step": 409580 }, { "epoch": 0.8273977140964055, "grad_norm": 236.78475952148438, "learning_rate": 9.633437934143896e-07, "loss": 12.7176, "step": 409590 }, { "epoch": 0.8274179147290893, "grad_norm": 179.90467834472656, "learning_rate": 9.631378200089082e-07, "loss": 33.9785, "step": 409600 }, { "epoch": 0.8274381153617731, "grad_norm": 477.7040100097656, "learning_rate": 9.629318662786047e-07, "loss": 24.0612, "step": 409610 }, { "epoch": 0.827458315994457, "grad_norm": 200.7135009765625, "learning_rate": 9.62725932224482e-07, "loss": 14.4447, "step": 409620 }, { "epoch": 0.8274785166271408, "grad_norm": 334.4525146484375, "learning_rate": 9.62520017847542e-07, "loss": 11.159, "step": 409630 }, { "epoch": 0.8274987172598246, "grad_norm": 409.64898681640625, "learning_rate": 9.623141231487904e-07, "loss": 21.2946, "step": 409640 }, { "epoch": 0.8275189178925084, "grad_norm": 195.8665313720703, "learning_rate": 9.621082481292309e-07, "loss": 10.9201, "step": 409650 }, { "epoch": 0.8275391185251922, "grad_norm": 455.36669921875, "learning_rate": 9.61902392789864e-07, "loss": 26.1455, "step": 409660 }, { "epoch": 0.8275593191578761, "grad_norm": 438.5113220214844, "learning_rate": 9.616965571316956e-07, "loss": 14.672, "step": 409670 }, { "epoch": 0.8275795197905599, "grad_norm": 316.32159423828125, "learning_rate": 9.6149074115573e-07, "loss": 18.2571, "step": 409680 }, { "epoch": 0.8275997204232437, "grad_norm": 309.5303649902344, "learning_rate": 9.61284944862968e-07, "loss": 20.4558, "step": 409690 }, { "epoch": 0.8276199210559275, "grad_norm": 255.615966796875, "learning_rate": 9.610791682544123e-07, "loss": 5.0855, "step": 409700 }, { "epoch": 0.8276401216886113, "grad_norm": 434.9753112792969, "learning_rate": 9.608734113310685e-07, "loss": 32.8699, "step": 409710 }, { "epoch": 0.827660322321295, "grad_norm": 94.48103332519531, "learning_rate": 9.606676740939375e-07, "loss": 21.7466, "step": 409720 }, { "epoch": 0.8276805229539789, "grad_norm": 0.0, "learning_rate": 9.60461956544021e-07, "loss": 7.2681, "step": 409730 }, { "epoch": 0.8277007235866627, "grad_norm": 144.42807006835938, "learning_rate": 9.602562586823232e-07, "loss": 51.75, "step": 409740 }, { "epoch": 0.8277209242193465, "grad_norm": 113.88189697265625, "learning_rate": 9.600505805098486e-07, "loss": 4.793, "step": 409750 }, { "epoch": 0.8277411248520303, "grad_norm": 205.02870178222656, "learning_rate": 9.59844922027595e-07, "loss": 17.6645, "step": 409760 }, { "epoch": 0.8277613254847141, "grad_norm": 335.8193359375, "learning_rate": 9.596392832365676e-07, "loss": 19.6117, "step": 409770 }, { "epoch": 0.827781526117398, "grad_norm": 224.22412109375, "learning_rate": 9.594336641377695e-07, "loss": 13.1641, "step": 409780 }, { "epoch": 0.8278017267500818, "grad_norm": 321.8125915527344, "learning_rate": 9.592280647322015e-07, "loss": 26.003, "step": 409790 }, { "epoch": 0.8278219273827656, "grad_norm": 1043.1671142578125, "learning_rate": 9.590224850208645e-07, "loss": 33.8985, "step": 409800 }, { "epoch": 0.8278421280154494, "grad_norm": 327.7283630371094, "learning_rate": 9.588169250047624e-07, "loss": 11.2007, "step": 409810 }, { "epoch": 0.8278623286481332, "grad_norm": 422.17431640625, "learning_rate": 9.586113846848982e-07, "loss": 17.1714, "step": 409820 }, { "epoch": 0.8278825292808171, "grad_norm": 252.18179321289062, "learning_rate": 9.584058640622702e-07, "loss": 14.0754, "step": 409830 }, { "epoch": 0.8279027299135009, "grad_norm": 239.5164794921875, "learning_rate": 9.58200363137881e-07, "loss": 20.2893, "step": 409840 }, { "epoch": 0.8279229305461847, "grad_norm": 140.21127319335938, "learning_rate": 9.57994881912735e-07, "loss": 18.8065, "step": 409850 }, { "epoch": 0.8279431311788685, "grad_norm": 512.3609619140625, "learning_rate": 9.577894203878313e-07, "loss": 30.1831, "step": 409860 }, { "epoch": 0.8279633318115523, "grad_norm": 129.37081909179688, "learning_rate": 9.575839785641706e-07, "loss": 34.1444, "step": 409870 }, { "epoch": 0.8279835324442362, "grad_norm": 384.0455627441406, "learning_rate": 9.573785564427563e-07, "loss": 10.0706, "step": 409880 }, { "epoch": 0.82800373307692, "grad_norm": 327.7283630371094, "learning_rate": 9.571731540245887e-07, "loss": 14.9142, "step": 409890 }, { "epoch": 0.8280239337096038, "grad_norm": 307.9181823730469, "learning_rate": 9.569677713106673e-07, "loss": 19.5872, "step": 409900 }, { "epoch": 0.8280441343422876, "grad_norm": 225.7193145751953, "learning_rate": 9.567624083019966e-07, "loss": 10.0013, "step": 409910 }, { "epoch": 0.8280643349749714, "grad_norm": 290.69085693359375, "learning_rate": 9.565570649995736e-07, "loss": 21.9275, "step": 409920 }, { "epoch": 0.8280845356076553, "grad_norm": 106.05171203613281, "learning_rate": 9.563517414044028e-07, "loss": 10.3378, "step": 409930 }, { "epoch": 0.8281047362403391, "grad_norm": 139.64109802246094, "learning_rate": 9.561464375174827e-07, "loss": 10.5329, "step": 409940 }, { "epoch": 0.8281249368730229, "grad_norm": 313.7691650390625, "learning_rate": 9.559411533398139e-07, "loss": 13.5532, "step": 409950 }, { "epoch": 0.8281451375057067, "grad_norm": 254.87405395507812, "learning_rate": 9.557358888723977e-07, "loss": 9.8802, "step": 409960 }, { "epoch": 0.8281653381383905, "grad_norm": 216.95628356933594, "learning_rate": 9.555306441162337e-07, "loss": 12.9033, "step": 409970 }, { "epoch": 0.8281855387710743, "grad_norm": 501.4894104003906, "learning_rate": 9.553254190723239e-07, "loss": 29.9019, "step": 409980 }, { "epoch": 0.8282057394037581, "grad_norm": 243.97128295898438, "learning_rate": 9.55120213741666e-07, "loss": 12.0684, "step": 409990 }, { "epoch": 0.8282259400364419, "grad_norm": 36.584564208984375, "learning_rate": 9.549150281252633e-07, "loss": 13.4329, "step": 410000 }, { "epoch": 0.8282461406691257, "grad_norm": 262.5791015625, "learning_rate": 9.54709862224114e-07, "loss": 17.5964, "step": 410010 }, { "epoch": 0.8282663413018095, "grad_norm": 60.09251403808594, "learning_rate": 9.545047160392169e-07, "loss": 14.2294, "step": 410020 }, { "epoch": 0.8282865419344934, "grad_norm": 245.7078399658203, "learning_rate": 9.54299589571574e-07, "loss": 20.3928, "step": 410030 }, { "epoch": 0.8283067425671772, "grad_norm": 268.2927551269531, "learning_rate": 9.540944828221848e-07, "loss": 15.2373, "step": 410040 }, { "epoch": 0.828326943199861, "grad_norm": 1.1214512586593628, "learning_rate": 9.538893957920464e-07, "loss": 5.3185, "step": 410050 }, { "epoch": 0.8283471438325448, "grad_norm": 3.7599427700042725, "learning_rate": 9.536843284821612e-07, "loss": 6.8195, "step": 410060 }, { "epoch": 0.8283673444652286, "grad_norm": 49.542354583740234, "learning_rate": 9.534792808935284e-07, "loss": 10.1195, "step": 410070 }, { "epoch": 0.8283875450979125, "grad_norm": 32.80672073364258, "learning_rate": 9.532742530271471e-07, "loss": 13.0705, "step": 410080 }, { "epoch": 0.8284077457305963, "grad_norm": 311.0405578613281, "learning_rate": 9.530692448840151e-07, "loss": 18.8114, "step": 410090 }, { "epoch": 0.8284279463632801, "grad_norm": 536.824951171875, "learning_rate": 9.528642564651341e-07, "loss": 32.619, "step": 410100 }, { "epoch": 0.8284481469959639, "grad_norm": 182.45458984375, "learning_rate": 9.526592877715019e-07, "loss": 15.7676, "step": 410110 }, { "epoch": 0.8284683476286477, "grad_norm": 165.89573669433594, "learning_rate": 9.524543388041157e-07, "loss": 53.3673, "step": 410120 }, { "epoch": 0.8284885482613316, "grad_norm": 147.05892944335938, "learning_rate": 9.522494095639762e-07, "loss": 12.7387, "step": 410130 }, { "epoch": 0.8285087488940154, "grad_norm": 6.714029788970947, "learning_rate": 9.52044500052084e-07, "loss": 8.1583, "step": 410140 }, { "epoch": 0.8285289495266992, "grad_norm": 218.7359619140625, "learning_rate": 9.518396102694355e-07, "loss": 32.9237, "step": 410150 }, { "epoch": 0.828549150159383, "grad_norm": 257.1134948730469, "learning_rate": 9.516347402170284e-07, "loss": 16.1984, "step": 410160 }, { "epoch": 0.8285693507920668, "grad_norm": 330.681396484375, "learning_rate": 9.514298898958641e-07, "loss": 28.6677, "step": 410170 }, { "epoch": 0.8285895514247507, "grad_norm": 516.0695190429688, "learning_rate": 9.512250593069394e-07, "loss": 13.8821, "step": 410180 }, { "epoch": 0.8286097520574345, "grad_norm": 380.1822204589844, "learning_rate": 9.510202484512516e-07, "loss": 12.9193, "step": 410190 }, { "epoch": 0.8286299526901183, "grad_norm": 102.53484344482422, "learning_rate": 9.508154573298012e-07, "loss": 18.2395, "step": 410200 }, { "epoch": 0.8286501533228021, "grad_norm": 110.82614135742188, "learning_rate": 9.506106859435838e-07, "loss": 15.0357, "step": 410210 }, { "epoch": 0.8286703539554859, "grad_norm": 194.42674255371094, "learning_rate": 9.504059342936001e-07, "loss": 15.7436, "step": 410220 }, { "epoch": 0.8286905545881696, "grad_norm": 176.99172973632812, "learning_rate": 9.502012023808466e-07, "loss": 21.3947, "step": 410230 }, { "epoch": 0.8287107552208535, "grad_norm": 217.5649871826172, "learning_rate": 9.499964902063203e-07, "loss": 13.4836, "step": 410240 }, { "epoch": 0.8287309558535373, "grad_norm": 365.7051086425781, "learning_rate": 9.497917977710208e-07, "loss": 21.7936, "step": 410250 }, { "epoch": 0.8287511564862211, "grad_norm": 159.91734313964844, "learning_rate": 9.495871250759437e-07, "loss": 17.1623, "step": 410260 }, { "epoch": 0.8287713571189049, "grad_norm": 328.34375, "learning_rate": 9.49382472122089e-07, "loss": 22.868, "step": 410270 }, { "epoch": 0.8287915577515887, "grad_norm": 72.84598541259766, "learning_rate": 9.491778389104511e-07, "loss": 69.1836, "step": 410280 }, { "epoch": 0.8288117583842726, "grad_norm": 63.695457458496094, "learning_rate": 9.489732254420315e-07, "loss": 11.9858, "step": 410290 }, { "epoch": 0.8288319590169564, "grad_norm": 148.08123779296875, "learning_rate": 9.487686317178241e-07, "loss": 11.014, "step": 410300 }, { "epoch": 0.8288521596496402, "grad_norm": 108.00569915771484, "learning_rate": 9.48564057738826e-07, "loss": 13.8051, "step": 410310 }, { "epoch": 0.828872360282324, "grad_norm": 68.67027282714844, "learning_rate": 9.483595035060367e-07, "loss": 11.1223, "step": 410320 }, { "epoch": 0.8288925609150078, "grad_norm": 293.92486572265625, "learning_rate": 9.481549690204517e-07, "loss": 15.5717, "step": 410330 }, { "epoch": 0.8289127615476917, "grad_norm": 240.76226806640625, "learning_rate": 9.47950454283067e-07, "loss": 26.255, "step": 410340 }, { "epoch": 0.8289329621803755, "grad_norm": 80.6728515625, "learning_rate": 9.477459592948796e-07, "loss": 13.5701, "step": 410350 }, { "epoch": 0.8289531628130593, "grad_norm": 251.0513916015625, "learning_rate": 9.475414840568903e-07, "loss": 9.2371, "step": 410360 }, { "epoch": 0.8289733634457431, "grad_norm": 314.894287109375, "learning_rate": 9.473370285700889e-07, "loss": 17.923, "step": 410370 }, { "epoch": 0.828993564078427, "grad_norm": 31.855649948120117, "learning_rate": 9.471325928354758e-07, "loss": 16.0794, "step": 410380 }, { "epoch": 0.8290137647111108, "grad_norm": 349.7760314941406, "learning_rate": 9.469281768540484e-07, "loss": 22.2016, "step": 410390 }, { "epoch": 0.8290339653437946, "grad_norm": 273.1689453125, "learning_rate": 9.467237806268009e-07, "loss": 12.9165, "step": 410400 }, { "epoch": 0.8290541659764784, "grad_norm": 472.6404724121094, "learning_rate": 9.465194041547294e-07, "loss": 13.4341, "step": 410410 }, { "epoch": 0.8290743666091622, "grad_norm": 347.4904479980469, "learning_rate": 9.463150474388305e-07, "loss": 21.257, "step": 410420 }, { "epoch": 0.829094567241846, "grad_norm": 36.35457229614258, "learning_rate": 9.461107104801026e-07, "loss": 13.5956, "step": 410430 }, { "epoch": 0.8291147678745299, "grad_norm": 6.013411998748779, "learning_rate": 9.45906393279537e-07, "loss": 14.5089, "step": 410440 }, { "epoch": 0.8291349685072137, "grad_norm": 414.74853515625, "learning_rate": 9.457020958381324e-07, "loss": 22.4405, "step": 410450 }, { "epoch": 0.8291551691398975, "grad_norm": 225.8941650390625, "learning_rate": 9.454978181568847e-07, "loss": 11.0425, "step": 410460 }, { "epoch": 0.8291753697725813, "grad_norm": 386.0409240722656, "learning_rate": 9.452935602367897e-07, "loss": 13.3936, "step": 410470 }, { "epoch": 0.8291955704052651, "grad_norm": 355.720703125, "learning_rate": 9.450893220788399e-07, "loss": 17.4569, "step": 410480 }, { "epoch": 0.8292157710379489, "grad_norm": 210.11192321777344, "learning_rate": 9.448851036840345e-07, "loss": 26.528, "step": 410490 }, { "epoch": 0.8292359716706327, "grad_norm": 287.39227294921875, "learning_rate": 9.446809050533679e-07, "loss": 10.4552, "step": 410500 }, { "epoch": 0.8292561723033165, "grad_norm": 102.47966003417969, "learning_rate": 9.444767261878329e-07, "loss": 23.9491, "step": 410510 }, { "epoch": 0.8292763729360003, "grad_norm": 175.15866088867188, "learning_rate": 9.442725670884278e-07, "loss": 12.1121, "step": 410520 }, { "epoch": 0.8292965735686841, "grad_norm": 312.2544250488281, "learning_rate": 9.440684277561452e-07, "loss": 17.3933, "step": 410530 }, { "epoch": 0.829316774201368, "grad_norm": 401.8553466796875, "learning_rate": 9.438643081919818e-07, "loss": 26.2286, "step": 410540 }, { "epoch": 0.8293369748340518, "grad_norm": 856.045654296875, "learning_rate": 9.436602083969326e-07, "loss": 31.6391, "step": 410550 }, { "epoch": 0.8293571754667356, "grad_norm": 851.8276977539062, "learning_rate": 9.434561283719901e-07, "loss": 22.8242, "step": 410560 }, { "epoch": 0.8293773760994194, "grad_norm": 53.68601608276367, "learning_rate": 9.432520681181512e-07, "loss": 14.8198, "step": 410570 }, { "epoch": 0.8293975767321032, "grad_norm": 177.16592407226562, "learning_rate": 9.430480276364091e-07, "loss": 11.9092, "step": 410580 }, { "epoch": 0.8294177773647871, "grad_norm": 96.69482421875, "learning_rate": 9.428440069277595e-07, "loss": 15.3038, "step": 410590 }, { "epoch": 0.8294379779974709, "grad_norm": 6.7825164794921875, "learning_rate": 9.426400059931956e-07, "loss": 16.8528, "step": 410600 }, { "epoch": 0.8294581786301547, "grad_norm": 649.8616943359375, "learning_rate": 9.424360248337128e-07, "loss": 19.7849, "step": 410610 }, { "epoch": 0.8294783792628385, "grad_norm": 333.5193176269531, "learning_rate": 9.422320634503052e-07, "loss": 15.8497, "step": 410620 }, { "epoch": 0.8294985798955223, "grad_norm": 12.165814399719238, "learning_rate": 9.420281218439648e-07, "loss": 14.759, "step": 410630 }, { "epoch": 0.8295187805282062, "grad_norm": 146.24635314941406, "learning_rate": 9.418242000156886e-07, "loss": 21.3349, "step": 410640 }, { "epoch": 0.82953898116089, "grad_norm": 258.538818359375, "learning_rate": 9.41620297966469e-07, "loss": 7.4266, "step": 410650 }, { "epoch": 0.8295591817935738, "grad_norm": 0.0, "learning_rate": 9.414164156972982e-07, "loss": 17.7462, "step": 410660 }, { "epoch": 0.8295793824262576, "grad_norm": 86.33041381835938, "learning_rate": 9.41212553209172e-07, "loss": 17.7832, "step": 410670 }, { "epoch": 0.8295995830589414, "grad_norm": 209.195556640625, "learning_rate": 9.410087105030846e-07, "loss": 21.6568, "step": 410680 }, { "epoch": 0.8296197836916253, "grad_norm": 296.2096862792969, "learning_rate": 9.408048875800286e-07, "loss": 11.2683, "step": 410690 }, { "epoch": 0.8296399843243091, "grad_norm": 231.32177734375, "learning_rate": 9.406010844409957e-07, "loss": 14.9906, "step": 410700 }, { "epoch": 0.8296601849569929, "grad_norm": 2.3841521739959717, "learning_rate": 9.403973010869826e-07, "loss": 10.5271, "step": 410710 }, { "epoch": 0.8296803855896767, "grad_norm": 231.14926147460938, "learning_rate": 9.401935375189802e-07, "loss": 12.5556, "step": 410720 }, { "epoch": 0.8297005862223605, "grad_norm": 824.7369995117188, "learning_rate": 9.39989793737981e-07, "loss": 26.4877, "step": 410730 }, { "epoch": 0.8297207868550442, "grad_norm": 194.06727600097656, "learning_rate": 9.39786069744979e-07, "loss": 9.1835, "step": 410740 }, { "epoch": 0.8297409874877281, "grad_norm": 119.08084106445312, "learning_rate": 9.395823655409686e-07, "loss": 13.3664, "step": 410750 }, { "epoch": 0.8297611881204119, "grad_norm": 369.6117858886719, "learning_rate": 9.393786811269418e-07, "loss": 25.5274, "step": 410760 }, { "epoch": 0.8297813887530957, "grad_norm": 218.77713012695312, "learning_rate": 9.391750165038887e-07, "loss": 22.0267, "step": 410770 }, { "epoch": 0.8298015893857795, "grad_norm": 475.15203857421875, "learning_rate": 9.38971371672806e-07, "loss": 10.7547, "step": 410780 }, { "epoch": 0.8298217900184633, "grad_norm": 156.6975555419922, "learning_rate": 9.387677466346839e-07, "loss": 12.8292, "step": 410790 }, { "epoch": 0.8298419906511472, "grad_norm": 222.63597106933594, "learning_rate": 9.385641413905139e-07, "loss": 10.7548, "step": 410800 }, { "epoch": 0.829862191283831, "grad_norm": 520.4935913085938, "learning_rate": 9.383605559412911e-07, "loss": 24.5541, "step": 410810 }, { "epoch": 0.8298823919165148, "grad_norm": 230.3719940185547, "learning_rate": 9.38156990288005e-07, "loss": 16.7129, "step": 410820 }, { "epoch": 0.8299025925491986, "grad_norm": 286.4466247558594, "learning_rate": 9.379534444316507e-07, "loss": 24.6331, "step": 410830 }, { "epoch": 0.8299227931818824, "grad_norm": 270.3553161621094, "learning_rate": 9.37749918373218e-07, "loss": 8.0356, "step": 410840 }, { "epoch": 0.8299429938145663, "grad_norm": 342.84991455078125, "learning_rate": 9.375464121136984e-07, "loss": 10.1839, "step": 410850 }, { "epoch": 0.8299631944472501, "grad_norm": 214.2918701171875, "learning_rate": 9.373429256540866e-07, "loss": 20.2067, "step": 410860 }, { "epoch": 0.8299833950799339, "grad_norm": 72.42308044433594, "learning_rate": 9.371394589953714e-07, "loss": 11.7814, "step": 410870 }, { "epoch": 0.8300035957126177, "grad_norm": 524.4677734375, "learning_rate": 9.369360121385463e-07, "loss": 17.439, "step": 410880 }, { "epoch": 0.8300237963453015, "grad_norm": 479.42840576171875, "learning_rate": 9.367325850846015e-07, "loss": 12.9637, "step": 410890 }, { "epoch": 0.8300439969779854, "grad_norm": 411.5046081542969, "learning_rate": 9.365291778345303e-07, "loss": 15.7591, "step": 410900 }, { "epoch": 0.8300641976106692, "grad_norm": 314.82537841796875, "learning_rate": 9.363257903893235e-07, "loss": 14.4305, "step": 410910 }, { "epoch": 0.830084398243353, "grad_norm": 272.7388000488281, "learning_rate": 9.361224227499704e-07, "loss": 15.8626, "step": 410920 }, { "epoch": 0.8301045988760368, "grad_norm": 355.81671142578125, "learning_rate": 9.359190749174645e-07, "loss": 33.5128, "step": 410930 }, { "epoch": 0.8301247995087206, "grad_norm": 327.7281494140625, "learning_rate": 9.357157468927969e-07, "loss": 25.7719, "step": 410940 }, { "epoch": 0.8301450001414045, "grad_norm": 492.9275207519531, "learning_rate": 9.355124386769559e-07, "loss": 21.9312, "step": 410950 }, { "epoch": 0.8301652007740883, "grad_norm": 124.33065032958984, "learning_rate": 9.353091502709349e-07, "loss": 20.5142, "step": 410960 }, { "epoch": 0.8301854014067721, "grad_norm": 159.9413604736328, "learning_rate": 9.351058816757264e-07, "loss": 13.3696, "step": 410970 }, { "epoch": 0.8302056020394559, "grad_norm": 224.1479949951172, "learning_rate": 9.349026328923161e-07, "loss": 14.7056, "step": 410980 }, { "epoch": 0.8302258026721397, "grad_norm": 301.3833923339844, "learning_rate": 9.346994039216972e-07, "loss": 9.8421, "step": 410990 }, { "epoch": 0.8302460033048235, "grad_norm": 342.03240966796875, "learning_rate": 9.344961947648624e-07, "loss": 6.5016, "step": 411000 }, { "epoch": 0.8302662039375073, "grad_norm": 137.11825561523438, "learning_rate": 9.342930054227994e-07, "loss": 17.4337, "step": 411010 }, { "epoch": 0.8302864045701911, "grad_norm": 432.76715087890625, "learning_rate": 9.340898358964978e-07, "loss": 9.5961, "step": 411020 }, { "epoch": 0.8303066052028749, "grad_norm": 127.70624542236328, "learning_rate": 9.338866861869494e-07, "loss": 20.5379, "step": 411030 }, { "epoch": 0.8303268058355587, "grad_norm": 207.30862426757812, "learning_rate": 9.336835562951468e-07, "loss": 7.6163, "step": 411040 }, { "epoch": 0.8303470064682426, "grad_norm": 156.91061401367188, "learning_rate": 9.334804462220748e-07, "loss": 15.8361, "step": 411050 }, { "epoch": 0.8303672071009264, "grad_norm": 303.18023681640625, "learning_rate": 9.332773559687258e-07, "loss": 20.3374, "step": 411060 }, { "epoch": 0.8303874077336102, "grad_norm": 224.73666381835938, "learning_rate": 9.330742855360914e-07, "loss": 25.0015, "step": 411070 }, { "epoch": 0.830407608366294, "grad_norm": 469.3471984863281, "learning_rate": 9.32871234925159e-07, "loss": 23.3163, "step": 411080 }, { "epoch": 0.8304278089989778, "grad_norm": 17.375762939453125, "learning_rate": 9.326682041369178e-07, "loss": 19.8786, "step": 411090 }, { "epoch": 0.8304480096316617, "grad_norm": 273.0326232910156, "learning_rate": 9.3246519317236e-07, "loss": 11.8191, "step": 411100 }, { "epoch": 0.8304682102643455, "grad_norm": 245.9439697265625, "learning_rate": 9.322622020324734e-07, "loss": 17.8062, "step": 411110 }, { "epoch": 0.8304884108970293, "grad_norm": 12.72941780090332, "learning_rate": 9.320592307182463e-07, "loss": 27.3794, "step": 411120 }, { "epoch": 0.8305086115297131, "grad_norm": 297.28643798828125, "learning_rate": 9.318562792306707e-07, "loss": 17.7099, "step": 411130 }, { "epoch": 0.8305288121623969, "grad_norm": 570.4725341796875, "learning_rate": 9.316533475707324e-07, "loss": 14.3911, "step": 411140 }, { "epoch": 0.8305490127950808, "grad_norm": 178.91732788085938, "learning_rate": 9.314504357394233e-07, "loss": 20.0979, "step": 411150 }, { "epoch": 0.8305692134277646, "grad_norm": 318.291259765625, "learning_rate": 9.312475437377322e-07, "loss": 22.2655, "step": 411160 }, { "epoch": 0.8305894140604484, "grad_norm": 329.31256103515625, "learning_rate": 9.310446715666449e-07, "loss": 14.4944, "step": 411170 }, { "epoch": 0.8306096146931322, "grad_norm": 413.19342041015625, "learning_rate": 9.30841819227154e-07, "loss": 15.2204, "step": 411180 }, { "epoch": 0.830629815325816, "grad_norm": 235.3437042236328, "learning_rate": 9.306389867202454e-07, "loss": 20.8483, "step": 411190 }, { "epoch": 0.8306500159584999, "grad_norm": 265.0558166503906, "learning_rate": 9.304361740469103e-07, "loss": 11.7237, "step": 411200 }, { "epoch": 0.8306702165911837, "grad_norm": 417.8369445800781, "learning_rate": 9.302333812081338e-07, "loss": 22.845, "step": 411210 }, { "epoch": 0.8306904172238675, "grad_norm": 77.45326232910156, "learning_rate": 9.300306082049082e-07, "loss": 15.8332, "step": 411220 }, { "epoch": 0.8307106178565513, "grad_norm": 141.16854858398438, "learning_rate": 9.298278550382189e-07, "loss": 7.568, "step": 411230 }, { "epoch": 0.8307308184892351, "grad_norm": 317.944091796875, "learning_rate": 9.296251217090546e-07, "loss": 18.0116, "step": 411240 }, { "epoch": 0.8307510191219188, "grad_norm": 211.9301300048828, "learning_rate": 9.294224082184045e-07, "loss": 21.3034, "step": 411250 }, { "epoch": 0.8307712197546027, "grad_norm": 249.1339874267578, "learning_rate": 9.29219714567256e-07, "loss": 6.3479, "step": 411260 }, { "epoch": 0.8307914203872865, "grad_norm": 129.55760192871094, "learning_rate": 9.290170407565957e-07, "loss": 16.5448, "step": 411270 }, { "epoch": 0.8308116210199703, "grad_norm": 318.5682678222656, "learning_rate": 9.288143867874127e-07, "loss": 23.9776, "step": 411280 }, { "epoch": 0.8308318216526541, "grad_norm": 220.9269561767578, "learning_rate": 9.286117526606958e-07, "loss": 14.5967, "step": 411290 }, { "epoch": 0.8308520222853379, "grad_norm": 264.6311340332031, "learning_rate": 9.284091383774313e-07, "loss": 16.1553, "step": 411300 }, { "epoch": 0.8308722229180218, "grad_norm": 239.48046875, "learning_rate": 9.282065439386057e-07, "loss": 17.614, "step": 411310 }, { "epoch": 0.8308924235507056, "grad_norm": 10.523683547973633, "learning_rate": 9.280039693452086e-07, "loss": 10.6724, "step": 411320 }, { "epoch": 0.8309126241833894, "grad_norm": 188.47384643554688, "learning_rate": 9.278014145982261e-07, "loss": 11.0976, "step": 411330 }, { "epoch": 0.8309328248160732, "grad_norm": 189.58709716796875, "learning_rate": 9.275988796986451e-07, "loss": 17.5269, "step": 411340 }, { "epoch": 0.830953025448757, "grad_norm": 362.7837219238281, "learning_rate": 9.273963646474527e-07, "loss": 17.1241, "step": 411350 }, { "epoch": 0.8309732260814409, "grad_norm": 98.236083984375, "learning_rate": 9.271938694456378e-07, "loss": 15.1582, "step": 411360 }, { "epoch": 0.8309934267141247, "grad_norm": 289.61065673828125, "learning_rate": 9.26991394094186e-07, "loss": 21.4113, "step": 411370 }, { "epoch": 0.8310136273468085, "grad_norm": 591.3011474609375, "learning_rate": 9.267889385940826e-07, "loss": 32.5456, "step": 411380 }, { "epoch": 0.8310338279794923, "grad_norm": 30.476734161376953, "learning_rate": 9.265865029463178e-07, "loss": 23.7093, "step": 411390 }, { "epoch": 0.8310540286121761, "grad_norm": 599.0242919921875, "learning_rate": 9.263840871518759e-07, "loss": 27.0061, "step": 411400 }, { "epoch": 0.83107422924486, "grad_norm": 214.10906982421875, "learning_rate": 9.261816912117428e-07, "loss": 25.594, "step": 411410 }, { "epoch": 0.8310944298775438, "grad_norm": 178.16827392578125, "learning_rate": 9.259793151269075e-07, "loss": 20.8952, "step": 411420 }, { "epoch": 0.8311146305102276, "grad_norm": 2.3764688968658447, "learning_rate": 9.257769588983533e-07, "loss": 21.5293, "step": 411430 }, { "epoch": 0.8311348311429114, "grad_norm": 303.5603942871094, "learning_rate": 9.255746225270689e-07, "loss": 16.6554, "step": 411440 }, { "epoch": 0.8311550317755952, "grad_norm": 112.90088653564453, "learning_rate": 9.253723060140407e-07, "loss": 13.087, "step": 411450 }, { "epoch": 0.8311752324082791, "grad_norm": 366.8277893066406, "learning_rate": 9.251700093602517e-07, "loss": 30.8089, "step": 411460 }, { "epoch": 0.8311954330409629, "grad_norm": 154.587646484375, "learning_rate": 9.249677325666912e-07, "loss": 16.0865, "step": 411470 }, { "epoch": 0.8312156336736467, "grad_norm": 337.88568115234375, "learning_rate": 9.247654756343427e-07, "loss": 24.7234, "step": 411480 }, { "epoch": 0.8312358343063305, "grad_norm": 100.53938293457031, "learning_rate": 9.24563238564194e-07, "loss": 14.5205, "step": 411490 }, { "epoch": 0.8312560349390143, "grad_norm": 324.0766906738281, "learning_rate": 9.243610213572285e-07, "loss": 26.2477, "step": 411500 }, { "epoch": 0.831276235571698, "grad_norm": 346.0293273925781, "learning_rate": 9.241588240144345e-07, "loss": 24.1318, "step": 411510 }, { "epoch": 0.8312964362043819, "grad_norm": 52.525142669677734, "learning_rate": 9.23956646536796e-07, "loss": 23.8312, "step": 411520 }, { "epoch": 0.8313166368370657, "grad_norm": 556.91796875, "learning_rate": 9.237544889252969e-07, "loss": 20.6802, "step": 411530 }, { "epoch": 0.8313368374697495, "grad_norm": 151.1796875, "learning_rate": 9.235523511809258e-07, "loss": 16.7178, "step": 411540 }, { "epoch": 0.8313570381024333, "grad_norm": 373.0771484375, "learning_rate": 9.233502333046662e-07, "loss": 18.695, "step": 411550 }, { "epoch": 0.8313772387351172, "grad_norm": 446.68450927734375, "learning_rate": 9.231481352975014e-07, "loss": 17.1666, "step": 411560 }, { "epoch": 0.831397439367801, "grad_norm": 168.5589599609375, "learning_rate": 9.229460571604182e-07, "loss": 34.2046, "step": 411570 }, { "epoch": 0.8314176400004848, "grad_norm": 543.7969970703125, "learning_rate": 9.227439988944042e-07, "loss": 18.9667, "step": 411580 }, { "epoch": 0.8314378406331686, "grad_norm": 375.8150939941406, "learning_rate": 9.225419605004387e-07, "loss": 36.1783, "step": 411590 }, { "epoch": 0.8314580412658524, "grad_norm": 299.0545959472656, "learning_rate": 9.223399419795093e-07, "loss": 31.3696, "step": 411600 }, { "epoch": 0.8314782418985363, "grad_norm": 710.476318359375, "learning_rate": 9.221379433326017e-07, "loss": 28.3065, "step": 411610 }, { "epoch": 0.8314984425312201, "grad_norm": 361.71075439453125, "learning_rate": 9.21935964560699e-07, "loss": 17.8031, "step": 411620 }, { "epoch": 0.8315186431639039, "grad_norm": 88.85189056396484, "learning_rate": 9.217340056647844e-07, "loss": 9.9401, "step": 411630 }, { "epoch": 0.8315388437965877, "grad_norm": 910.599365234375, "learning_rate": 9.215320666458438e-07, "loss": 20.3631, "step": 411640 }, { "epoch": 0.8315590444292715, "grad_norm": 931.801025390625, "learning_rate": 9.213301475048642e-07, "loss": 28.2905, "step": 411650 }, { "epoch": 0.8315792450619554, "grad_norm": 229.4099884033203, "learning_rate": 9.211282482428241e-07, "loss": 11.2278, "step": 411660 }, { "epoch": 0.8315994456946392, "grad_norm": 323.3447265625, "learning_rate": 9.209263688607095e-07, "loss": 17.2399, "step": 411670 }, { "epoch": 0.831619646327323, "grad_norm": 258.5351257324219, "learning_rate": 9.207245093595068e-07, "loss": 10.1602, "step": 411680 }, { "epoch": 0.8316398469600068, "grad_norm": 98.12583923339844, "learning_rate": 9.205226697401981e-07, "loss": 18.9354, "step": 411690 }, { "epoch": 0.8316600475926906, "grad_norm": 183.52688598632812, "learning_rate": 9.203208500037664e-07, "loss": 16.5627, "step": 411700 }, { "epoch": 0.8316802482253745, "grad_norm": 16.94515609741211, "learning_rate": 9.201190501511964e-07, "loss": 6.6081, "step": 411710 }, { "epoch": 0.8317004488580583, "grad_norm": 384.93231201171875, "learning_rate": 9.199172701834718e-07, "loss": 13.8169, "step": 411720 }, { "epoch": 0.8317206494907421, "grad_norm": 245.5651092529297, "learning_rate": 9.197155101015742e-07, "loss": 11.324, "step": 411730 }, { "epoch": 0.8317408501234259, "grad_norm": 459.01348876953125, "learning_rate": 9.195137699064899e-07, "loss": 16.8517, "step": 411740 }, { "epoch": 0.8317610507561097, "grad_norm": 276.58734130859375, "learning_rate": 9.193120495991986e-07, "loss": 26.2357, "step": 411750 }, { "epoch": 0.8317812513887936, "grad_norm": 791.8773193359375, "learning_rate": 9.191103491806875e-07, "loss": 32.1368, "step": 411760 }, { "epoch": 0.8318014520214773, "grad_norm": 157.01980590820312, "learning_rate": 9.189086686519361e-07, "loss": 10.7995, "step": 411770 }, { "epoch": 0.8318216526541611, "grad_norm": 335.3235168457031, "learning_rate": 9.187070080139299e-07, "loss": 17.8915, "step": 411780 }, { "epoch": 0.8318418532868449, "grad_norm": 148.98536682128906, "learning_rate": 9.185053672676508e-07, "loss": 18.6652, "step": 411790 }, { "epoch": 0.8318620539195287, "grad_norm": 154.6010284423828, "learning_rate": 9.183037464140804e-07, "loss": 44.4547, "step": 411800 }, { "epoch": 0.8318822545522125, "grad_norm": 251.26669311523438, "learning_rate": 9.181021454542033e-07, "loss": 12.9493, "step": 411810 }, { "epoch": 0.8319024551848964, "grad_norm": 152.32383728027344, "learning_rate": 9.179005643890005e-07, "loss": 15.3635, "step": 411820 }, { "epoch": 0.8319226558175802, "grad_norm": 284.79229736328125, "learning_rate": 9.176990032194566e-07, "loss": 17.1892, "step": 411830 }, { "epoch": 0.831942856450264, "grad_norm": 258.5731201171875, "learning_rate": 9.174974619465521e-07, "loss": 15.7157, "step": 411840 }, { "epoch": 0.8319630570829478, "grad_norm": 76.55590057373047, "learning_rate": 9.17295940571269e-07, "loss": 18.6424, "step": 411850 }, { "epoch": 0.8319832577156316, "grad_norm": 354.7213134765625, "learning_rate": 9.170944390945918e-07, "loss": 20.2372, "step": 411860 }, { "epoch": 0.8320034583483155, "grad_norm": 1.4333564043045044, "learning_rate": 9.168929575175006e-07, "loss": 21.1692, "step": 411870 }, { "epoch": 0.8320236589809993, "grad_norm": 151.2689971923828, "learning_rate": 9.166914958409767e-07, "loss": 20.4791, "step": 411880 }, { "epoch": 0.8320438596136831, "grad_norm": 59.702354431152344, "learning_rate": 9.164900540660032e-07, "loss": 13.3553, "step": 411890 }, { "epoch": 0.8320640602463669, "grad_norm": 281.0140075683594, "learning_rate": 9.162886321935632e-07, "loss": 11.8082, "step": 411900 }, { "epoch": 0.8320842608790507, "grad_norm": 668.9278564453125, "learning_rate": 9.160872302246376e-07, "loss": 21.6644, "step": 411910 }, { "epoch": 0.8321044615117346, "grad_norm": 219.61668395996094, "learning_rate": 9.158858481602057e-07, "loss": 9.6473, "step": 411920 }, { "epoch": 0.8321246621444184, "grad_norm": 203.65528869628906, "learning_rate": 9.15684486001252e-07, "loss": 24.9497, "step": 411930 }, { "epoch": 0.8321448627771022, "grad_norm": 436.2219543457031, "learning_rate": 9.154831437487571e-07, "loss": 35.2718, "step": 411940 }, { "epoch": 0.832165063409786, "grad_norm": 239.51417541503906, "learning_rate": 9.152818214037007e-07, "loss": 34.3037, "step": 411950 }, { "epoch": 0.8321852640424698, "grad_norm": 323.2263488769531, "learning_rate": 9.150805189670653e-07, "loss": 16.6825, "step": 411960 }, { "epoch": 0.8322054646751537, "grad_norm": 221.3290252685547, "learning_rate": 9.148792364398328e-07, "loss": 12.5264, "step": 411970 }, { "epoch": 0.8322256653078375, "grad_norm": 395.4976806640625, "learning_rate": 9.146779738229838e-07, "loss": 31.9479, "step": 411980 }, { "epoch": 0.8322458659405213, "grad_norm": 230.811279296875, "learning_rate": 9.144767311174979e-07, "loss": 34.8361, "step": 411990 }, { "epoch": 0.8322660665732051, "grad_norm": 322.2712707519531, "learning_rate": 9.142755083243577e-07, "loss": 24.9979, "step": 412000 }, { "epoch": 0.8322862672058889, "grad_norm": 226.15841674804688, "learning_rate": 9.140743054445434e-07, "loss": 24.8086, "step": 412010 }, { "epoch": 0.8323064678385727, "grad_norm": 251.577880859375, "learning_rate": 9.138731224790337e-07, "loss": 18.6506, "step": 412020 }, { "epoch": 0.8323266684712565, "grad_norm": 451.17730712890625, "learning_rate": 9.136719594288124e-07, "loss": 23.1196, "step": 412030 }, { "epoch": 0.8323468691039403, "grad_norm": 192.0828399658203, "learning_rate": 9.134708162948575e-07, "loss": 19.2363, "step": 412040 }, { "epoch": 0.8323670697366241, "grad_norm": 246.48403930664062, "learning_rate": 9.132696930781509e-07, "loss": 18.6824, "step": 412050 }, { "epoch": 0.8323872703693079, "grad_norm": 297.5436096191406, "learning_rate": 9.130685897796721e-07, "loss": 18.3743, "step": 412060 }, { "epoch": 0.8324074710019918, "grad_norm": 275.0685119628906, "learning_rate": 9.128675064004006e-07, "loss": 14.841, "step": 412070 }, { "epoch": 0.8324276716346756, "grad_norm": 150.78086853027344, "learning_rate": 9.126664429413179e-07, "loss": 16.685, "step": 412080 }, { "epoch": 0.8324478722673594, "grad_norm": 291.4930725097656, "learning_rate": 9.124653994034022e-07, "loss": 15.4038, "step": 412090 }, { "epoch": 0.8324680729000432, "grad_norm": 277.358154296875, "learning_rate": 9.122643757876354e-07, "loss": 8.7487, "step": 412100 }, { "epoch": 0.832488273532727, "grad_norm": 234.3372039794922, "learning_rate": 9.120633720949951e-07, "loss": 19.6536, "step": 412110 }, { "epoch": 0.8325084741654108, "grad_norm": 347.8551025390625, "learning_rate": 9.118623883264633e-07, "loss": 16.7644, "step": 412120 }, { "epoch": 0.8325286747980947, "grad_norm": 135.3734130859375, "learning_rate": 9.116614244830186e-07, "loss": 12.0352, "step": 412130 }, { "epoch": 0.8325488754307785, "grad_norm": 288.440185546875, "learning_rate": 9.11460480565639e-07, "loss": 22.1301, "step": 412140 }, { "epoch": 0.8325690760634623, "grad_norm": 255.90582275390625, "learning_rate": 9.112595565753063e-07, "loss": 23.8462, "step": 412150 }, { "epoch": 0.8325892766961461, "grad_norm": 288.17706298828125, "learning_rate": 9.110586525129988e-07, "loss": 10.4492, "step": 412160 }, { "epoch": 0.83260947732883, "grad_norm": 115.60209655761719, "learning_rate": 9.108577683796938e-07, "loss": 12.44, "step": 412170 }, { "epoch": 0.8326296779615138, "grad_norm": 173.71881103515625, "learning_rate": 9.106569041763725e-07, "loss": 18.1898, "step": 412180 }, { "epoch": 0.8326498785941976, "grad_norm": 264.0632019042969, "learning_rate": 9.104560599040158e-07, "loss": 11.0732, "step": 412190 }, { "epoch": 0.8326700792268814, "grad_norm": 524.9210205078125, "learning_rate": 9.10255235563598e-07, "loss": 9.0536, "step": 412200 }, { "epoch": 0.8326902798595652, "grad_norm": 332.8991394042969, "learning_rate": 9.100544311561e-07, "loss": 14.9676, "step": 412210 }, { "epoch": 0.832710480492249, "grad_norm": 149.10218811035156, "learning_rate": 9.098536466825014e-07, "loss": 21.6883, "step": 412220 }, { "epoch": 0.8327306811249329, "grad_norm": 452.45361328125, "learning_rate": 9.096528821437806e-07, "loss": 19.6112, "step": 412230 }, { "epoch": 0.8327508817576167, "grad_norm": 274.8936767578125, "learning_rate": 9.094521375409143e-07, "loss": 22.4528, "step": 412240 }, { "epoch": 0.8327710823903005, "grad_norm": 77.36784362792969, "learning_rate": 9.09251412874882e-07, "loss": 27.8912, "step": 412250 }, { "epoch": 0.8327912830229843, "grad_norm": 224.7070770263672, "learning_rate": 9.090507081466648e-07, "loss": 19.2184, "step": 412260 }, { "epoch": 0.8328114836556681, "grad_norm": 50.71030807495117, "learning_rate": 9.088500233572356e-07, "loss": 7.7483, "step": 412270 }, { "epoch": 0.8328316842883519, "grad_norm": 405.861328125, "learning_rate": 9.086493585075757e-07, "loss": 23.1779, "step": 412280 }, { "epoch": 0.8328518849210357, "grad_norm": 153.75119018554688, "learning_rate": 9.08448713598663e-07, "loss": 24.8805, "step": 412290 }, { "epoch": 0.8328720855537195, "grad_norm": 442.55438232421875, "learning_rate": 9.08248088631476e-07, "loss": 15.9903, "step": 412300 }, { "epoch": 0.8328922861864033, "grad_norm": 307.6817321777344, "learning_rate": 9.080474836069896e-07, "loss": 12.1104, "step": 412310 }, { "epoch": 0.8329124868190871, "grad_norm": 351.05706787109375, "learning_rate": 9.078468985261851e-07, "loss": 10.0132, "step": 412320 }, { "epoch": 0.832932687451771, "grad_norm": 157.57220458984375, "learning_rate": 9.076463333900382e-07, "loss": 12.9428, "step": 412330 }, { "epoch": 0.8329528880844548, "grad_norm": 41.71080780029297, "learning_rate": 9.074457881995252e-07, "loss": 22.8596, "step": 412340 }, { "epoch": 0.8329730887171386, "grad_norm": 366.6396789550781, "learning_rate": 9.072452629556272e-07, "loss": 18.126, "step": 412350 }, { "epoch": 0.8329932893498224, "grad_norm": 242.27835083007812, "learning_rate": 9.070447576593172e-07, "loss": 29.0946, "step": 412360 }, { "epoch": 0.8330134899825062, "grad_norm": 387.7580261230469, "learning_rate": 9.068442723115766e-07, "loss": 28.8666, "step": 412370 }, { "epoch": 0.8330336906151901, "grad_norm": 620.4930419921875, "learning_rate": 9.066438069133787e-07, "loss": 26.3067, "step": 412380 }, { "epoch": 0.8330538912478739, "grad_norm": 207.75900268554688, "learning_rate": 9.064433614657042e-07, "loss": 13.0702, "step": 412390 }, { "epoch": 0.8330740918805577, "grad_norm": 344.1200256347656, "learning_rate": 9.06242935969528e-07, "loss": 17.2269, "step": 412400 }, { "epoch": 0.8330942925132415, "grad_norm": 166.13816833496094, "learning_rate": 9.060425304258263e-07, "loss": 15.0197, "step": 412410 }, { "epoch": 0.8331144931459253, "grad_norm": 417.953369140625, "learning_rate": 9.058421448355775e-07, "loss": 15.0879, "step": 412420 }, { "epoch": 0.8331346937786092, "grad_norm": 266.64788818359375, "learning_rate": 9.056417791997568e-07, "loss": 12.0577, "step": 412430 }, { "epoch": 0.833154894411293, "grad_norm": 118.12489318847656, "learning_rate": 9.054414335193424e-07, "loss": 16.7493, "step": 412440 }, { "epoch": 0.8331750950439768, "grad_norm": 292.557861328125, "learning_rate": 9.052411077953099e-07, "loss": 19.0807, "step": 412450 }, { "epoch": 0.8331952956766606, "grad_norm": 409.37091064453125, "learning_rate": 9.050408020286344e-07, "loss": 17.5583, "step": 412460 }, { "epoch": 0.8332154963093444, "grad_norm": 354.3134765625, "learning_rate": 9.048405162202944e-07, "loss": 20.2171, "step": 412470 }, { "epoch": 0.8332356969420283, "grad_norm": 312.1829528808594, "learning_rate": 9.046402503712653e-07, "loss": 22.1662, "step": 412480 }, { "epoch": 0.8332558975747121, "grad_norm": 0.0, "learning_rate": 9.044400044825219e-07, "loss": 21.8373, "step": 412490 }, { "epoch": 0.8332760982073959, "grad_norm": 258.4537048339844, "learning_rate": 9.042397785550405e-07, "loss": 18.6718, "step": 412500 }, { "epoch": 0.8332962988400797, "grad_norm": 127.43373107910156, "learning_rate": 9.04039572589801e-07, "loss": 25.5437, "step": 412510 }, { "epoch": 0.8333164994727635, "grad_norm": 386.5230407714844, "learning_rate": 9.038393865877725e-07, "loss": 21.1883, "step": 412520 }, { "epoch": 0.8333367001054472, "grad_norm": 362.5669250488281, "learning_rate": 9.036392205499344e-07, "loss": 13.2472, "step": 412530 }, { "epoch": 0.8333569007381311, "grad_norm": 156.44659423828125, "learning_rate": 9.034390744772637e-07, "loss": 22.0577, "step": 412540 }, { "epoch": 0.8333771013708149, "grad_norm": 28.910980224609375, "learning_rate": 9.032389483707332e-07, "loss": 22.0371, "step": 412550 }, { "epoch": 0.8333973020034987, "grad_norm": 527.949462890625, "learning_rate": 9.030388422313185e-07, "loss": 25.0462, "step": 412560 }, { "epoch": 0.8334175026361825, "grad_norm": 33.165714263916016, "learning_rate": 9.028387560599955e-07, "loss": 16.1664, "step": 412570 }, { "epoch": 0.8334377032688663, "grad_norm": 189.52769470214844, "learning_rate": 9.026386898577417e-07, "loss": 16.9787, "step": 412580 }, { "epoch": 0.8334579039015502, "grad_norm": 192.579833984375, "learning_rate": 9.024386436255278e-07, "loss": 18.974, "step": 412590 }, { "epoch": 0.833478104534234, "grad_norm": 95.60814666748047, "learning_rate": 9.022386173643305e-07, "loss": 10.7693, "step": 412600 }, { "epoch": 0.8334983051669178, "grad_norm": 43.70400619506836, "learning_rate": 9.020386110751267e-07, "loss": 10.8637, "step": 412610 }, { "epoch": 0.8335185057996016, "grad_norm": 198.74070739746094, "learning_rate": 9.018386247588901e-07, "loss": 8.3321, "step": 412620 }, { "epoch": 0.8335387064322854, "grad_norm": 610.537841796875, "learning_rate": 9.016386584165932e-07, "loss": 35.9469, "step": 412630 }, { "epoch": 0.8335589070649693, "grad_norm": 310.95111083984375, "learning_rate": 9.014387120492141e-07, "loss": 15.5189, "step": 412640 }, { "epoch": 0.8335791076976531, "grad_norm": 274.9654235839844, "learning_rate": 9.012387856577238e-07, "loss": 20.2978, "step": 412650 }, { "epoch": 0.8335993083303369, "grad_norm": 307.10931396484375, "learning_rate": 9.010388792431002e-07, "loss": 17.7969, "step": 412660 }, { "epoch": 0.8336195089630207, "grad_norm": 86.99068450927734, "learning_rate": 9.008389928063161e-07, "loss": 19.2728, "step": 412670 }, { "epoch": 0.8336397095957045, "grad_norm": 149.92062377929688, "learning_rate": 9.006391263483438e-07, "loss": 6.7, "step": 412680 }, { "epoch": 0.8336599102283884, "grad_norm": 19.083406448364258, "learning_rate": 9.004392798701605e-07, "loss": 7.6801, "step": 412690 }, { "epoch": 0.8336801108610722, "grad_norm": 115.79694366455078, "learning_rate": 9.002394533727382e-07, "loss": 12.5117, "step": 412700 }, { "epoch": 0.833700311493756, "grad_norm": 213.28221130371094, "learning_rate": 9.000396468570527e-07, "loss": 20.7953, "step": 412710 }, { "epoch": 0.8337205121264398, "grad_norm": 873.066650390625, "learning_rate": 8.998398603240755e-07, "loss": 31.7136, "step": 412720 }, { "epoch": 0.8337407127591236, "grad_norm": 400.9732360839844, "learning_rate": 8.996400937747823e-07, "loss": 16.5624, "step": 412730 }, { "epoch": 0.8337609133918075, "grad_norm": 315.30999755859375, "learning_rate": 8.994403472101465e-07, "loss": 12.7594, "step": 412740 }, { "epoch": 0.8337811140244913, "grad_norm": 130.17044067382812, "learning_rate": 8.992406206311394e-07, "loss": 10.9296, "step": 412750 }, { "epoch": 0.8338013146571751, "grad_norm": 301.9109191894531, "learning_rate": 8.990409140387374e-07, "loss": 18.6728, "step": 412760 }, { "epoch": 0.8338215152898589, "grad_norm": 314.758056640625, "learning_rate": 8.988412274339131e-07, "loss": 25.2493, "step": 412770 }, { "epoch": 0.8338417159225427, "grad_norm": 331.45880126953125, "learning_rate": 8.986415608176375e-07, "loss": 16.8022, "step": 412780 }, { "epoch": 0.8338619165552265, "grad_norm": 464.63690185546875, "learning_rate": 8.984419141908857e-07, "loss": 19.5087, "step": 412790 }, { "epoch": 0.8338821171879103, "grad_norm": 894.884521484375, "learning_rate": 8.982422875546332e-07, "loss": 18.3637, "step": 412800 }, { "epoch": 0.8339023178205941, "grad_norm": 197.435546875, "learning_rate": 8.980426809098475e-07, "loss": 16.7934, "step": 412810 }, { "epoch": 0.8339225184532779, "grad_norm": 36.75721740722656, "learning_rate": 8.978430942575045e-07, "loss": 16.0683, "step": 412820 }, { "epoch": 0.8339427190859617, "grad_norm": 1023.606201171875, "learning_rate": 8.976435275985779e-07, "loss": 22.7153, "step": 412830 }, { "epoch": 0.8339629197186456, "grad_norm": 111.61725616455078, "learning_rate": 8.974439809340391e-07, "loss": 13.2987, "step": 412840 }, { "epoch": 0.8339831203513294, "grad_norm": 534.7609252929688, "learning_rate": 8.972444542648595e-07, "loss": 15.765, "step": 412850 }, { "epoch": 0.8340033209840132, "grad_norm": 129.47967529296875, "learning_rate": 8.970449475920129e-07, "loss": 21.1342, "step": 412860 }, { "epoch": 0.834023521616697, "grad_norm": 499.8721923828125, "learning_rate": 8.968454609164745e-07, "loss": 37.1606, "step": 412870 }, { "epoch": 0.8340437222493808, "grad_norm": 350.1219787597656, "learning_rate": 8.966459942392108e-07, "loss": 14.2011, "step": 412880 }, { "epoch": 0.8340639228820647, "grad_norm": 167.04541015625, "learning_rate": 8.964465475611967e-07, "loss": 11.2451, "step": 412890 }, { "epoch": 0.8340841235147485, "grad_norm": 129.08018493652344, "learning_rate": 8.962471208834056e-07, "loss": 24.8392, "step": 412900 }, { "epoch": 0.8341043241474323, "grad_norm": 302.9072265625, "learning_rate": 8.960477142068085e-07, "loss": 13.0826, "step": 412910 }, { "epoch": 0.8341245247801161, "grad_norm": 426.129638671875, "learning_rate": 8.958483275323759e-07, "loss": 28.527, "step": 412920 }, { "epoch": 0.8341447254127999, "grad_norm": 75.61614227294922, "learning_rate": 8.956489608610825e-07, "loss": 11.6145, "step": 412930 }, { "epoch": 0.8341649260454838, "grad_norm": 469.23895263671875, "learning_rate": 8.954496141938973e-07, "loss": 27.9991, "step": 412940 }, { "epoch": 0.8341851266781676, "grad_norm": 334.9708251953125, "learning_rate": 8.95250287531792e-07, "loss": 15.1293, "step": 412950 }, { "epoch": 0.8342053273108514, "grad_norm": 269.94146728515625, "learning_rate": 8.950509808757408e-07, "loss": 31.8621, "step": 412960 }, { "epoch": 0.8342255279435352, "grad_norm": 127.4312515258789, "learning_rate": 8.94851694226711e-07, "loss": 12.3814, "step": 412970 }, { "epoch": 0.834245728576219, "grad_norm": 255.662841796875, "learning_rate": 8.946524275856783e-07, "loss": 19.022, "step": 412980 }, { "epoch": 0.8342659292089029, "grad_norm": 187.6837615966797, "learning_rate": 8.9445318095361e-07, "loss": 11.7037, "step": 412990 }, { "epoch": 0.8342861298415867, "grad_norm": 193.17514038085938, "learning_rate": 8.942539543314799e-07, "loss": 24.9263, "step": 413000 }, { "epoch": 0.8343063304742705, "grad_norm": 187.07073974609375, "learning_rate": 8.940547477202588e-07, "loss": 14.3232, "step": 413010 }, { "epoch": 0.8343265311069543, "grad_norm": 439.3637390136719, "learning_rate": 8.938555611209149e-07, "loss": 21.9024, "step": 413020 }, { "epoch": 0.8343467317396381, "grad_norm": 227.25631713867188, "learning_rate": 8.936563945344229e-07, "loss": 9.7824, "step": 413030 }, { "epoch": 0.8343669323723218, "grad_norm": 340.919677734375, "learning_rate": 8.934572479617498e-07, "loss": 10.9799, "step": 413040 }, { "epoch": 0.8343871330050057, "grad_norm": 259.6977233886719, "learning_rate": 8.932581214038693e-07, "loss": 19.8122, "step": 413050 }, { "epoch": 0.8344073336376895, "grad_norm": 279.10198974609375, "learning_rate": 8.930590148617513e-07, "loss": 17.8266, "step": 413060 }, { "epoch": 0.8344275342703733, "grad_norm": 321.66790771484375, "learning_rate": 8.928599283363637e-07, "loss": 11.6333, "step": 413070 }, { "epoch": 0.8344477349030571, "grad_norm": 133.40652465820312, "learning_rate": 8.926608618286797e-07, "loss": 12.242, "step": 413080 }, { "epoch": 0.834467935535741, "grad_norm": 149.44602966308594, "learning_rate": 8.924618153396691e-07, "loss": 11.6056, "step": 413090 }, { "epoch": 0.8344881361684248, "grad_norm": 218.36961364746094, "learning_rate": 8.922627888703e-07, "loss": 12.9567, "step": 413100 }, { "epoch": 0.8345083368011086, "grad_norm": 366.2778015136719, "learning_rate": 8.920637824215433e-07, "loss": 17.1104, "step": 413110 }, { "epoch": 0.8345285374337924, "grad_norm": 404.5040283203125, "learning_rate": 8.918647959943727e-07, "loss": 19.5129, "step": 413120 }, { "epoch": 0.8345487380664762, "grad_norm": 17.79667854309082, "learning_rate": 8.916658295897523e-07, "loss": 20.8276, "step": 413130 }, { "epoch": 0.83456893869916, "grad_norm": 247.7096405029297, "learning_rate": 8.914668832086543e-07, "loss": 11.0947, "step": 413140 }, { "epoch": 0.8345891393318439, "grad_norm": 404.6821594238281, "learning_rate": 8.912679568520494e-07, "loss": 16.0631, "step": 413150 }, { "epoch": 0.8346093399645277, "grad_norm": 428.7170104980469, "learning_rate": 8.910690505209063e-07, "loss": 18.2515, "step": 413160 }, { "epoch": 0.8346295405972115, "grad_norm": 405.0267639160156, "learning_rate": 8.908701642161927e-07, "loss": 15.6834, "step": 413170 }, { "epoch": 0.8346497412298953, "grad_norm": 288.65509033203125, "learning_rate": 8.906712979388799e-07, "loss": 20.9157, "step": 413180 }, { "epoch": 0.8346699418625791, "grad_norm": 453.7732849121094, "learning_rate": 8.904724516899394e-07, "loss": 17.1009, "step": 413190 }, { "epoch": 0.834690142495263, "grad_norm": 274.40673828125, "learning_rate": 8.902736254703347e-07, "loss": 19.3447, "step": 413200 }, { "epoch": 0.8347103431279468, "grad_norm": 129.41476440429688, "learning_rate": 8.900748192810387e-07, "loss": 12.3541, "step": 413210 }, { "epoch": 0.8347305437606306, "grad_norm": 69.88551330566406, "learning_rate": 8.898760331230206e-07, "loss": 17.0083, "step": 413220 }, { "epoch": 0.8347507443933144, "grad_norm": 211.6928253173828, "learning_rate": 8.896772669972475e-07, "loss": 14.4656, "step": 413230 }, { "epoch": 0.8347709450259982, "grad_norm": 266.93133544921875, "learning_rate": 8.894785209046886e-07, "loss": 28.4402, "step": 413240 }, { "epoch": 0.8347911456586821, "grad_norm": 442.09576416015625, "learning_rate": 8.892797948463134e-07, "loss": 19.9675, "step": 413250 }, { "epoch": 0.8348113462913659, "grad_norm": 379.21649169921875, "learning_rate": 8.8908108882309e-07, "loss": 29.8716, "step": 413260 }, { "epoch": 0.8348315469240497, "grad_norm": 213.49110412597656, "learning_rate": 8.888824028359855e-07, "loss": 11.29, "step": 413270 }, { "epoch": 0.8348517475567335, "grad_norm": 167.77769470214844, "learning_rate": 8.886837368859713e-07, "loss": 11.9455, "step": 413280 }, { "epoch": 0.8348719481894173, "grad_norm": 63.819610595703125, "learning_rate": 8.884850909740123e-07, "loss": 15.3651, "step": 413290 }, { "epoch": 0.834892148822101, "grad_norm": 176.69622802734375, "learning_rate": 8.882864651010798e-07, "loss": 30.0849, "step": 413300 }, { "epoch": 0.8349123494547849, "grad_norm": 193.23312377929688, "learning_rate": 8.880878592681386e-07, "loss": 16.2767, "step": 413310 }, { "epoch": 0.8349325500874687, "grad_norm": 113.10722351074219, "learning_rate": 8.878892734761602e-07, "loss": 13.8889, "step": 413320 }, { "epoch": 0.8349527507201525, "grad_norm": 209.2921905517578, "learning_rate": 8.876907077261093e-07, "loss": 7.1366, "step": 413330 }, { "epoch": 0.8349729513528363, "grad_norm": 15.959732055664062, "learning_rate": 8.874921620189564e-07, "loss": 16.959, "step": 413340 }, { "epoch": 0.8349931519855202, "grad_norm": 3.5665526390075684, "learning_rate": 8.872936363556678e-07, "loss": 13.8882, "step": 413350 }, { "epoch": 0.835013352618204, "grad_norm": 45.434326171875, "learning_rate": 8.8709513073721e-07, "loss": 12.6826, "step": 413360 }, { "epoch": 0.8350335532508878, "grad_norm": 301.7564392089844, "learning_rate": 8.868966451645533e-07, "loss": 17.2946, "step": 413370 }, { "epoch": 0.8350537538835716, "grad_norm": 425.86932373046875, "learning_rate": 8.866981796386631e-07, "loss": 30.2565, "step": 413380 }, { "epoch": 0.8350739545162554, "grad_norm": 156.07806396484375, "learning_rate": 8.864997341605059e-07, "loss": 12.5026, "step": 413390 }, { "epoch": 0.8350941551489393, "grad_norm": 6.209137439727783, "learning_rate": 8.863013087310502e-07, "loss": 18.9101, "step": 413400 }, { "epoch": 0.8351143557816231, "grad_norm": 40.78044509887695, "learning_rate": 8.861029033512652e-07, "loss": 18.9816, "step": 413410 }, { "epoch": 0.8351345564143069, "grad_norm": 192.6046600341797, "learning_rate": 8.859045180221137e-07, "loss": 15.9683, "step": 413420 }, { "epoch": 0.8351547570469907, "grad_norm": 319.6946716308594, "learning_rate": 8.857061527445643e-07, "loss": 14.0145, "step": 413430 }, { "epoch": 0.8351749576796745, "grad_norm": 404.8847961425781, "learning_rate": 8.85507807519585e-07, "loss": 24.7391, "step": 413440 }, { "epoch": 0.8351951583123584, "grad_norm": 139.3520050048828, "learning_rate": 8.853094823481423e-07, "loss": 17.5142, "step": 413450 }, { "epoch": 0.8352153589450422, "grad_norm": 593.1425170898438, "learning_rate": 8.851111772312004e-07, "loss": 30.387, "step": 413460 }, { "epoch": 0.835235559577726, "grad_norm": 532.3296508789062, "learning_rate": 8.849128921697276e-07, "loss": 19.2349, "step": 413470 }, { "epoch": 0.8352557602104098, "grad_norm": 128.8043670654297, "learning_rate": 8.847146271646928e-07, "loss": 20.4427, "step": 413480 }, { "epoch": 0.8352759608430936, "grad_norm": 525.5438842773438, "learning_rate": 8.845163822170577e-07, "loss": 15.6037, "step": 413490 }, { "epoch": 0.8352961614757775, "grad_norm": 435.2064514160156, "learning_rate": 8.843181573277904e-07, "loss": 13.6694, "step": 413500 }, { "epoch": 0.8353163621084613, "grad_norm": 143.55349731445312, "learning_rate": 8.841199524978583e-07, "loss": 12.7829, "step": 413510 }, { "epoch": 0.8353365627411451, "grad_norm": 197.24652099609375, "learning_rate": 8.839217677282264e-07, "loss": 18.5668, "step": 413520 }, { "epoch": 0.8353567633738289, "grad_norm": 223.94488525390625, "learning_rate": 8.837236030198593e-07, "loss": 22.7118, "step": 413530 }, { "epoch": 0.8353769640065127, "grad_norm": 234.2515106201172, "learning_rate": 8.835254583737251e-07, "loss": 29.6582, "step": 413540 }, { "epoch": 0.8353971646391966, "grad_norm": 264.32427978515625, "learning_rate": 8.833273337907888e-07, "loss": 17.1786, "step": 413550 }, { "epoch": 0.8354173652718803, "grad_norm": 362.5565185546875, "learning_rate": 8.831292292720151e-07, "loss": 11.0104, "step": 413560 }, { "epoch": 0.8354375659045641, "grad_norm": 737.795166015625, "learning_rate": 8.829311448183708e-07, "loss": 27.4012, "step": 413570 }, { "epoch": 0.8354577665372479, "grad_norm": 329.68194580078125, "learning_rate": 8.827330804308199e-07, "loss": 11.2578, "step": 413580 }, { "epoch": 0.8354779671699317, "grad_norm": 169.51951599121094, "learning_rate": 8.825350361103291e-07, "loss": 13.2886, "step": 413590 }, { "epoch": 0.8354981678026155, "grad_norm": 163.74740600585938, "learning_rate": 8.823370118578628e-07, "loss": 19.3062, "step": 413600 }, { "epoch": 0.8355183684352994, "grad_norm": 158.4401092529297, "learning_rate": 8.821390076743874e-07, "loss": 12.2418, "step": 413610 }, { "epoch": 0.8355385690679832, "grad_norm": 252.3665008544922, "learning_rate": 8.819410235608666e-07, "loss": 20.1108, "step": 413620 }, { "epoch": 0.835558769700667, "grad_norm": 383.4457092285156, "learning_rate": 8.817430595182652e-07, "loss": 9.7022, "step": 413630 }, { "epoch": 0.8355789703333508, "grad_norm": 105.75942993164062, "learning_rate": 8.815451155475496e-07, "loss": 12.7713, "step": 413640 }, { "epoch": 0.8355991709660346, "grad_norm": 341.0040588378906, "learning_rate": 8.813471916496824e-07, "loss": 11.7804, "step": 413650 }, { "epoch": 0.8356193715987185, "grad_norm": 231.97000122070312, "learning_rate": 8.811492878256306e-07, "loss": 22.2059, "step": 413660 }, { "epoch": 0.8356395722314023, "grad_norm": 473.5282897949219, "learning_rate": 8.809514040763578e-07, "loss": 17.4466, "step": 413670 }, { "epoch": 0.8356597728640861, "grad_norm": 154.89828491210938, "learning_rate": 8.807535404028267e-07, "loss": 62.5148, "step": 413680 }, { "epoch": 0.8356799734967699, "grad_norm": 209.90728759765625, "learning_rate": 8.805556968060047e-07, "loss": 10.9259, "step": 413690 }, { "epoch": 0.8357001741294537, "grad_norm": 331.815185546875, "learning_rate": 8.803578732868545e-07, "loss": 12.7226, "step": 413700 }, { "epoch": 0.8357203747621376, "grad_norm": 214.2098846435547, "learning_rate": 8.801600698463397e-07, "loss": 11.6859, "step": 413710 }, { "epoch": 0.8357405753948214, "grad_norm": 151.37193298339844, "learning_rate": 8.799622864854246e-07, "loss": 18.3774, "step": 413720 }, { "epoch": 0.8357607760275052, "grad_norm": 167.34091186523438, "learning_rate": 8.797645232050761e-07, "loss": 14.3168, "step": 413730 }, { "epoch": 0.835780976660189, "grad_norm": 391.0142822265625, "learning_rate": 8.795667800062529e-07, "loss": 22.2468, "step": 413740 }, { "epoch": 0.8358011772928728, "grad_norm": 183.8346710205078, "learning_rate": 8.793690568899216e-07, "loss": 8.8957, "step": 413750 }, { "epoch": 0.8358213779255567, "grad_norm": 66.4180679321289, "learning_rate": 8.791713538570474e-07, "loss": 12.9661, "step": 413760 }, { "epoch": 0.8358415785582405, "grad_norm": 33.19932174682617, "learning_rate": 8.789736709085917e-07, "loss": 17.7938, "step": 413770 }, { "epoch": 0.8358617791909243, "grad_norm": 344.3016662597656, "learning_rate": 8.787760080455171e-07, "loss": 14.7978, "step": 413780 }, { "epoch": 0.8358819798236081, "grad_norm": 499.5542907714844, "learning_rate": 8.78578365268789e-07, "loss": 24.341, "step": 413790 }, { "epoch": 0.8359021804562919, "grad_norm": 321.3106384277344, "learning_rate": 8.783807425793722e-07, "loss": 12.9674, "step": 413800 }, { "epoch": 0.8359223810889757, "grad_norm": 157.91650390625, "learning_rate": 8.781831399782254e-07, "loss": 13.2854, "step": 413810 }, { "epoch": 0.8359425817216595, "grad_norm": 315.389404296875, "learning_rate": 8.779855574663138e-07, "loss": 19.29, "step": 413820 }, { "epoch": 0.8359627823543433, "grad_norm": 366.7017822265625, "learning_rate": 8.777879950446022e-07, "loss": 19.8913, "step": 413830 }, { "epoch": 0.8359829829870271, "grad_norm": 330.5120849609375, "learning_rate": 8.775904527140522e-07, "loss": 22.7396, "step": 413840 }, { "epoch": 0.8360031836197109, "grad_norm": 189.5631866455078, "learning_rate": 8.773929304756246e-07, "loss": 25.5619, "step": 413850 }, { "epoch": 0.8360233842523948, "grad_norm": 302.5855712890625, "learning_rate": 8.771954283302852e-07, "loss": 15.2727, "step": 413860 }, { "epoch": 0.8360435848850786, "grad_norm": 7.903520107269287, "learning_rate": 8.769979462789957e-07, "loss": 8.6139, "step": 413870 }, { "epoch": 0.8360637855177624, "grad_norm": 20.694547653198242, "learning_rate": 8.768004843227162e-07, "loss": 9.0887, "step": 413880 }, { "epoch": 0.8360839861504462, "grad_norm": 0.0, "learning_rate": 8.766030424624117e-07, "loss": 16.1197, "step": 413890 }, { "epoch": 0.83610418678313, "grad_norm": 106.90850067138672, "learning_rate": 8.764056206990446e-07, "loss": 13.819, "step": 413900 }, { "epoch": 0.8361243874158139, "grad_norm": 219.71388244628906, "learning_rate": 8.762082190335763e-07, "loss": 20.8292, "step": 413910 }, { "epoch": 0.8361445880484977, "grad_norm": 298.3263854980469, "learning_rate": 8.760108374669679e-07, "loss": 18.1902, "step": 413920 }, { "epoch": 0.8361647886811815, "grad_norm": 422.6291198730469, "learning_rate": 8.75813476000184e-07, "loss": 9.6727, "step": 413930 }, { "epoch": 0.8361849893138653, "grad_norm": 124.7054443359375, "learning_rate": 8.756161346341851e-07, "loss": 13.6625, "step": 413940 }, { "epoch": 0.8362051899465491, "grad_norm": 280.4652404785156, "learning_rate": 8.754188133699316e-07, "loss": 22.6385, "step": 413950 }, { "epoch": 0.836225390579233, "grad_norm": 306.5396728515625, "learning_rate": 8.752215122083874e-07, "loss": 19.4553, "step": 413960 }, { "epoch": 0.8362455912119168, "grad_norm": 218.40274047851562, "learning_rate": 8.750242311505125e-07, "loss": 16.2668, "step": 413970 }, { "epoch": 0.8362657918446006, "grad_norm": 260.2209167480469, "learning_rate": 8.7482697019727e-07, "loss": 28.3043, "step": 413980 }, { "epoch": 0.8362859924772844, "grad_norm": 274.47003173828125, "learning_rate": 8.746297293496209e-07, "loss": 15.6778, "step": 413990 }, { "epoch": 0.8363061931099682, "grad_norm": 209.7880401611328, "learning_rate": 8.744325086085248e-07, "loss": 15.5746, "step": 414000 }, { "epoch": 0.836326393742652, "grad_norm": 233.43719482421875, "learning_rate": 8.74235307974945e-07, "loss": 24.5785, "step": 414010 }, { "epoch": 0.8363465943753359, "grad_norm": 150.94580078125, "learning_rate": 8.740381274498427e-07, "loss": 17.6099, "step": 414020 }, { "epoch": 0.8363667950080197, "grad_norm": 183.77345275878906, "learning_rate": 8.738409670341764e-07, "loss": 19.8493, "step": 414030 }, { "epoch": 0.8363869956407035, "grad_norm": 280.5299987792969, "learning_rate": 8.736438267289088e-07, "loss": 16.6112, "step": 414040 }, { "epoch": 0.8364071962733873, "grad_norm": 58.861270904541016, "learning_rate": 8.734467065350022e-07, "loss": 25.3317, "step": 414050 }, { "epoch": 0.8364273969060712, "grad_norm": 546.0576782226562, "learning_rate": 8.732496064534163e-07, "loss": 29.9853, "step": 414060 }, { "epoch": 0.8364475975387549, "grad_norm": 26.80593490600586, "learning_rate": 8.730525264851092e-07, "loss": 10.7344, "step": 414070 }, { "epoch": 0.8364677981714387, "grad_norm": 203.58743286132812, "learning_rate": 8.728554666310441e-07, "loss": 15.471, "step": 414080 }, { "epoch": 0.8364879988041225, "grad_norm": 536.3981323242188, "learning_rate": 8.726584268921829e-07, "loss": 14.6587, "step": 414090 }, { "epoch": 0.8365081994368063, "grad_norm": 234.03761291503906, "learning_rate": 8.72461407269482e-07, "loss": 11.5718, "step": 414100 }, { "epoch": 0.8365284000694901, "grad_norm": 332.7059631347656, "learning_rate": 8.722644077639031e-07, "loss": 27.6905, "step": 414110 }, { "epoch": 0.836548600702174, "grad_norm": 254.392822265625, "learning_rate": 8.720674283764086e-07, "loss": 17.282, "step": 414120 }, { "epoch": 0.8365688013348578, "grad_norm": 18.005281448364258, "learning_rate": 8.718704691079566e-07, "loss": 15.2275, "step": 414130 }, { "epoch": 0.8365890019675416, "grad_norm": 110.33973693847656, "learning_rate": 8.716735299595059e-07, "loss": 12.4104, "step": 414140 }, { "epoch": 0.8366092026002254, "grad_norm": 85.01341247558594, "learning_rate": 8.714766109320188e-07, "loss": 16.0275, "step": 414150 }, { "epoch": 0.8366294032329092, "grad_norm": 323.1333312988281, "learning_rate": 8.712797120264543e-07, "loss": 16.4371, "step": 414160 }, { "epoch": 0.8366496038655931, "grad_norm": 80.76183319091797, "learning_rate": 8.710828332437704e-07, "loss": 15.2803, "step": 414170 }, { "epoch": 0.8366698044982769, "grad_norm": 163.00003051757812, "learning_rate": 8.70885974584929e-07, "loss": 15.7303, "step": 414180 }, { "epoch": 0.8366900051309607, "grad_norm": 107.52113342285156, "learning_rate": 8.706891360508874e-07, "loss": 14.9579, "step": 414190 }, { "epoch": 0.8367102057636445, "grad_norm": 197.36692810058594, "learning_rate": 8.704923176426072e-07, "loss": 13.0244, "step": 414200 }, { "epoch": 0.8367304063963283, "grad_norm": 191.49429321289062, "learning_rate": 8.702955193610457e-07, "loss": 10.9146, "step": 414210 }, { "epoch": 0.8367506070290122, "grad_norm": 243.19407653808594, "learning_rate": 8.700987412071643e-07, "loss": 14.9434, "step": 414220 }, { "epoch": 0.836770807661696, "grad_norm": 225.71658325195312, "learning_rate": 8.699019831819206e-07, "loss": 10.6056, "step": 414230 }, { "epoch": 0.8367910082943798, "grad_norm": 43.118133544921875, "learning_rate": 8.697052452862726e-07, "loss": 11.0322, "step": 414240 }, { "epoch": 0.8368112089270636, "grad_norm": 140.6959991455078, "learning_rate": 8.695085275211812e-07, "loss": 23.6498, "step": 414250 }, { "epoch": 0.8368314095597474, "grad_norm": 181.30079650878906, "learning_rate": 8.69311829887603e-07, "loss": 13.9085, "step": 414260 }, { "epoch": 0.8368516101924313, "grad_norm": 56.2595329284668, "learning_rate": 8.691151523864993e-07, "loss": 19.484, "step": 414270 }, { "epoch": 0.8368718108251151, "grad_norm": 315.16436767578125, "learning_rate": 8.689184950188279e-07, "loss": 23.1867, "step": 414280 }, { "epoch": 0.8368920114577989, "grad_norm": 174.95767211914062, "learning_rate": 8.687218577855444e-07, "loss": 18.4427, "step": 414290 }, { "epoch": 0.8369122120904827, "grad_norm": 281.5946044921875, "learning_rate": 8.685252406876116e-07, "loss": 20.3793, "step": 414300 }, { "epoch": 0.8369324127231665, "grad_norm": 195.04408264160156, "learning_rate": 8.683286437259852e-07, "loss": 17.0489, "step": 414310 }, { "epoch": 0.8369526133558503, "grad_norm": 318.3096923828125, "learning_rate": 8.68132066901623e-07, "loss": 22.772, "step": 414320 }, { "epoch": 0.8369728139885341, "grad_norm": 399.84332275390625, "learning_rate": 8.679355102154841e-07, "loss": 15.6137, "step": 414330 }, { "epoch": 0.8369930146212179, "grad_norm": 191.85360717773438, "learning_rate": 8.677389736685271e-07, "loss": 13.8592, "step": 414340 }, { "epoch": 0.8370132152539017, "grad_norm": 252.5229949951172, "learning_rate": 8.675424572617092e-07, "loss": 8.6366, "step": 414350 }, { "epoch": 0.8370334158865855, "grad_norm": 541.6778564453125, "learning_rate": 8.673459609959872e-07, "loss": 18.6444, "step": 414360 }, { "epoch": 0.8370536165192694, "grad_norm": 284.8799743652344, "learning_rate": 8.671494848723211e-07, "loss": 17.1924, "step": 414370 }, { "epoch": 0.8370738171519532, "grad_norm": 256.8577575683594, "learning_rate": 8.669530288916667e-07, "loss": 17.2641, "step": 414380 }, { "epoch": 0.837094017784637, "grad_norm": 161.31915283203125, "learning_rate": 8.667565930549809e-07, "loss": 20.2442, "step": 414390 }, { "epoch": 0.8371142184173208, "grad_norm": 385.49468994140625, "learning_rate": 8.665601773632226e-07, "loss": 12.0254, "step": 414400 }, { "epoch": 0.8371344190500046, "grad_norm": 430.1437683105469, "learning_rate": 8.663637818173504e-07, "loss": 16.278, "step": 414410 }, { "epoch": 0.8371546196826885, "grad_norm": 196.4607391357422, "learning_rate": 8.661674064183179e-07, "loss": 16.9478, "step": 414420 }, { "epoch": 0.8371748203153723, "grad_norm": 479.87506103515625, "learning_rate": 8.659710511670838e-07, "loss": 17.9661, "step": 414430 }, { "epoch": 0.8371950209480561, "grad_norm": 417.27471923828125, "learning_rate": 8.657747160646068e-07, "loss": 17.8909, "step": 414440 }, { "epoch": 0.8372152215807399, "grad_norm": 322.0943603515625, "learning_rate": 8.655784011118424e-07, "loss": 22.3142, "step": 414450 }, { "epoch": 0.8372354222134237, "grad_norm": 570.3749389648438, "learning_rate": 8.653821063097462e-07, "loss": 25.6249, "step": 414460 }, { "epoch": 0.8372556228461076, "grad_norm": 167.9744415283203, "learning_rate": 8.65185831659277e-07, "loss": 11.3715, "step": 414470 }, { "epoch": 0.8372758234787914, "grad_norm": 270.15673828125, "learning_rate": 8.649895771613909e-07, "loss": 22.8874, "step": 414480 }, { "epoch": 0.8372960241114752, "grad_norm": 370.10162353515625, "learning_rate": 8.64793342817043e-07, "loss": 23.1194, "step": 414490 }, { "epoch": 0.837316224744159, "grad_norm": 131.26419067382812, "learning_rate": 8.645971286271903e-07, "loss": 16.4128, "step": 414500 }, { "epoch": 0.8373364253768428, "grad_norm": 500.9389953613281, "learning_rate": 8.644009345927912e-07, "loss": 22.4138, "step": 414510 }, { "epoch": 0.8373566260095267, "grad_norm": 443.0558166503906, "learning_rate": 8.642047607148008e-07, "loss": 18.391, "step": 414520 }, { "epoch": 0.8373768266422105, "grad_norm": 407.6045227050781, "learning_rate": 8.640086069941727e-07, "loss": 18.0497, "step": 414530 }, { "epoch": 0.8373970272748943, "grad_norm": 480.2431945800781, "learning_rate": 8.638124734318664e-07, "loss": 22.4925, "step": 414540 }, { "epoch": 0.8374172279075781, "grad_norm": 179.4869842529297, "learning_rate": 8.636163600288372e-07, "loss": 17.757, "step": 414550 }, { "epoch": 0.8374374285402619, "grad_norm": 336.7057800292969, "learning_rate": 8.634202667860381e-07, "loss": 23.9821, "step": 414560 }, { "epoch": 0.8374576291729458, "grad_norm": 439.4710388183594, "learning_rate": 8.632241937044283e-07, "loss": 15.3036, "step": 414570 }, { "epoch": 0.8374778298056295, "grad_norm": 25.490495681762695, "learning_rate": 8.630281407849612e-07, "loss": 11.172, "step": 414580 }, { "epoch": 0.8374980304383133, "grad_norm": 282.605712890625, "learning_rate": 8.628321080285945e-07, "loss": 19.1504, "step": 414590 }, { "epoch": 0.8375182310709971, "grad_norm": 252.8698272705078, "learning_rate": 8.626360954362817e-07, "loss": 11.8641, "step": 414600 }, { "epoch": 0.8375384317036809, "grad_norm": 277.7889404296875, "learning_rate": 8.62440103008978e-07, "loss": 23.6256, "step": 414610 }, { "epoch": 0.8375586323363647, "grad_norm": 596.74853515625, "learning_rate": 8.622441307476404e-07, "loss": 28.0368, "step": 414620 }, { "epoch": 0.8375788329690486, "grad_norm": 336.85687255859375, "learning_rate": 8.62048178653223e-07, "loss": 11.3265, "step": 414630 }, { "epoch": 0.8375990336017324, "grad_norm": 131.66384887695312, "learning_rate": 8.618522467266799e-07, "loss": 13.0395, "step": 414640 }, { "epoch": 0.8376192342344162, "grad_norm": 9.178425788879395, "learning_rate": 8.616563349689672e-07, "loss": 17.3413, "step": 414650 }, { "epoch": 0.8376394348671, "grad_norm": 11.545313835144043, "learning_rate": 8.614604433810408e-07, "loss": 12.4469, "step": 414660 }, { "epoch": 0.8376596354997838, "grad_norm": 347.11419677734375, "learning_rate": 8.612645719638541e-07, "loss": 8.1575, "step": 414670 }, { "epoch": 0.8376798361324677, "grad_norm": 402.7702941894531, "learning_rate": 8.610687207183604e-07, "loss": 28.0914, "step": 414680 }, { "epoch": 0.8377000367651515, "grad_norm": 168.47830200195312, "learning_rate": 8.608728896455177e-07, "loss": 9.4222, "step": 414690 }, { "epoch": 0.8377202373978353, "grad_norm": 453.04620361328125, "learning_rate": 8.606770787462776e-07, "loss": 26.189, "step": 414700 }, { "epoch": 0.8377404380305191, "grad_norm": 466.7608947753906, "learning_rate": 8.604812880215946e-07, "loss": 22.7566, "step": 414710 }, { "epoch": 0.8377606386632029, "grad_norm": 259.7493591308594, "learning_rate": 8.60285517472424e-07, "loss": 20.6435, "step": 414720 }, { "epoch": 0.8377808392958868, "grad_norm": 14.335615158081055, "learning_rate": 8.600897670997205e-07, "loss": 12.8334, "step": 414730 }, { "epoch": 0.8378010399285706, "grad_norm": 164.2548828125, "learning_rate": 8.598940369044378e-07, "loss": 22.1528, "step": 414740 }, { "epoch": 0.8378212405612544, "grad_norm": 333.78106689453125, "learning_rate": 8.596983268875281e-07, "loss": 10.9789, "step": 414750 }, { "epoch": 0.8378414411939382, "grad_norm": 1787.5018310546875, "learning_rate": 8.595026370499477e-07, "loss": 19.8841, "step": 414760 }, { "epoch": 0.837861641826622, "grad_norm": 123.53643035888672, "learning_rate": 8.59306967392649e-07, "loss": 12.7642, "step": 414770 }, { "epoch": 0.8378818424593059, "grad_norm": 81.70970916748047, "learning_rate": 8.59111317916585e-07, "loss": 15.7224, "step": 414780 }, { "epoch": 0.8379020430919897, "grad_norm": 379.9226379394531, "learning_rate": 8.589156886227112e-07, "loss": 15.2916, "step": 414790 }, { "epoch": 0.8379222437246735, "grad_norm": 234.67413330078125, "learning_rate": 8.587200795119793e-07, "loss": 6.8874, "step": 414800 }, { "epoch": 0.8379424443573573, "grad_norm": 150.3189697265625, "learning_rate": 8.585244905853446e-07, "loss": 43.6563, "step": 414810 }, { "epoch": 0.8379626449900411, "grad_norm": 217.57150268554688, "learning_rate": 8.583289218437574e-07, "loss": 39.709, "step": 414820 }, { "epoch": 0.837982845622725, "grad_norm": 145.3787078857422, "learning_rate": 8.581333732881747e-07, "loss": 9.1687, "step": 414830 }, { "epoch": 0.8380030462554087, "grad_norm": 378.66497802734375, "learning_rate": 8.579378449195469e-07, "loss": 16.0829, "step": 414840 }, { "epoch": 0.8380232468880925, "grad_norm": 525.75439453125, "learning_rate": 8.577423367388271e-07, "loss": 27.0119, "step": 414850 }, { "epoch": 0.8380434475207763, "grad_norm": 132.99227905273438, "learning_rate": 8.575468487469696e-07, "loss": 22.3966, "step": 414860 }, { "epoch": 0.8380636481534601, "grad_norm": 31.263547897338867, "learning_rate": 8.573513809449252e-07, "loss": 13.8671, "step": 414870 }, { "epoch": 0.838083848786144, "grad_norm": 825.5869140625, "learning_rate": 8.571559333336488e-07, "loss": 21.7527, "step": 414880 }, { "epoch": 0.8381040494188278, "grad_norm": 190.6361846923828, "learning_rate": 8.569605059140923e-07, "loss": 19.9845, "step": 414890 }, { "epoch": 0.8381242500515116, "grad_norm": 196.1671142578125, "learning_rate": 8.567650986872061e-07, "loss": 20.4749, "step": 414900 }, { "epoch": 0.8381444506841954, "grad_norm": 952.746337890625, "learning_rate": 8.565697116539462e-07, "loss": 27.8025, "step": 414910 }, { "epoch": 0.8381646513168792, "grad_norm": 286.4110107421875, "learning_rate": 8.563743448152623e-07, "loss": 17.9973, "step": 414920 }, { "epoch": 0.838184851949563, "grad_norm": 223.2017364501953, "learning_rate": 8.561789981721064e-07, "loss": 12.8659, "step": 414930 }, { "epoch": 0.8382050525822469, "grad_norm": 317.06500244140625, "learning_rate": 8.559836717254316e-07, "loss": 29.7436, "step": 414940 }, { "epoch": 0.8382252532149307, "grad_norm": 353.8282775878906, "learning_rate": 8.557883654761906e-07, "loss": 36.2793, "step": 414950 }, { "epoch": 0.8382454538476145, "grad_norm": 320.6378479003906, "learning_rate": 8.555930794253347e-07, "loss": 10.8001, "step": 414960 }, { "epoch": 0.8382656544802983, "grad_norm": 5.422746181488037, "learning_rate": 8.553978135738139e-07, "loss": 11.0617, "step": 414970 }, { "epoch": 0.8382858551129821, "grad_norm": 211.48233032226562, "learning_rate": 8.552025679225834e-07, "loss": 11.7528, "step": 414980 }, { "epoch": 0.838306055745666, "grad_norm": 146.37513732910156, "learning_rate": 8.550073424725924e-07, "loss": 16.3507, "step": 414990 }, { "epoch": 0.8383262563783498, "grad_norm": 135.98483276367188, "learning_rate": 8.54812137224792e-07, "loss": 16.4662, "step": 415000 }, { "epoch": 0.8383464570110336, "grad_norm": 140.6303253173828, "learning_rate": 8.54616952180134e-07, "loss": 23.8522, "step": 415010 }, { "epoch": 0.8383666576437174, "grad_norm": 114.8542709350586, "learning_rate": 8.544217873395727e-07, "loss": 23.6581, "step": 415020 }, { "epoch": 0.8383868582764012, "grad_norm": 13.554418563842773, "learning_rate": 8.542266427040546e-07, "loss": 8.2832, "step": 415030 }, { "epoch": 0.8384070589090851, "grad_norm": 291.3658447265625, "learning_rate": 8.540315182745329e-07, "loss": 15.8585, "step": 415040 }, { "epoch": 0.8384272595417689, "grad_norm": 214.11924743652344, "learning_rate": 8.5383641405196e-07, "loss": 23.9343, "step": 415050 }, { "epoch": 0.8384474601744527, "grad_norm": 342.9591369628906, "learning_rate": 8.536413300372859e-07, "loss": 9.7775, "step": 415060 }, { "epoch": 0.8384676608071365, "grad_norm": 232.239990234375, "learning_rate": 8.534462662314597e-07, "loss": 18.7387, "step": 415070 }, { "epoch": 0.8384878614398203, "grad_norm": 283.0412902832031, "learning_rate": 8.532512226354345e-07, "loss": 14.5672, "step": 415080 }, { "epoch": 0.8385080620725041, "grad_norm": 302.8951721191406, "learning_rate": 8.530561992501596e-07, "loss": 23.541, "step": 415090 }, { "epoch": 0.8385282627051879, "grad_norm": 195.97979736328125, "learning_rate": 8.528611960765853e-07, "loss": 20.869, "step": 415100 }, { "epoch": 0.8385484633378717, "grad_norm": 211.0697479248047, "learning_rate": 8.526662131156621e-07, "loss": 12.5433, "step": 415110 }, { "epoch": 0.8385686639705555, "grad_norm": 215.28033447265625, "learning_rate": 8.524712503683419e-07, "loss": 15.3362, "step": 415120 }, { "epoch": 0.8385888646032393, "grad_norm": 321.8779602050781, "learning_rate": 8.522763078355739e-07, "loss": 11.3497, "step": 415130 }, { "epoch": 0.8386090652359232, "grad_norm": 327.73175048828125, "learning_rate": 8.520813855183069e-07, "loss": 9.7973, "step": 415140 }, { "epoch": 0.838629265868607, "grad_norm": 209.07139587402344, "learning_rate": 8.518864834174939e-07, "loss": 12.0724, "step": 415150 }, { "epoch": 0.8386494665012908, "grad_norm": 606.3671264648438, "learning_rate": 8.516916015340826e-07, "loss": 24.7763, "step": 415160 }, { "epoch": 0.8386696671339746, "grad_norm": 179.931396484375, "learning_rate": 8.514967398690215e-07, "loss": 16.136, "step": 415170 }, { "epoch": 0.8386898677666584, "grad_norm": 142.1053466796875, "learning_rate": 8.513018984232641e-07, "loss": 19.5031, "step": 415180 }, { "epoch": 0.8387100683993423, "grad_norm": 322.91265869140625, "learning_rate": 8.511070771977569e-07, "loss": 17.573, "step": 415190 }, { "epoch": 0.8387302690320261, "grad_norm": 551.8099365234375, "learning_rate": 8.509122761934519e-07, "loss": 17.7988, "step": 415200 }, { "epoch": 0.8387504696647099, "grad_norm": 458.09405517578125, "learning_rate": 8.507174954112968e-07, "loss": 17.8225, "step": 415210 }, { "epoch": 0.8387706702973937, "grad_norm": 346.6591796875, "learning_rate": 8.505227348522404e-07, "loss": 32.0316, "step": 415220 }, { "epoch": 0.8387908709300775, "grad_norm": 445.4053649902344, "learning_rate": 8.503279945172338e-07, "loss": 22.0309, "step": 415230 }, { "epoch": 0.8388110715627614, "grad_norm": 280.9601135253906, "learning_rate": 8.501332744072255e-07, "loss": 13.3432, "step": 415240 }, { "epoch": 0.8388312721954452, "grad_norm": 185.4805145263672, "learning_rate": 8.499385745231631e-07, "loss": 13.6384, "step": 415250 }, { "epoch": 0.838851472828129, "grad_norm": 362.6714172363281, "learning_rate": 8.497438948659969e-07, "loss": 21.4054, "step": 415260 }, { "epoch": 0.8388716734608128, "grad_norm": 204.6597137451172, "learning_rate": 8.495492354366764e-07, "loss": 8.6514, "step": 415270 }, { "epoch": 0.8388918740934966, "grad_norm": 390.95526123046875, "learning_rate": 8.493545962361499e-07, "loss": 23.9221, "step": 415280 }, { "epoch": 0.8389120747261805, "grad_norm": 775.6529541015625, "learning_rate": 8.491599772653647e-07, "loss": 23.4602, "step": 415290 }, { "epoch": 0.8389322753588643, "grad_norm": 232.59945678710938, "learning_rate": 8.489653785252711e-07, "loss": 22.492, "step": 415300 }, { "epoch": 0.8389524759915481, "grad_norm": 314.16375732421875, "learning_rate": 8.487708000168166e-07, "loss": 18.8281, "step": 415310 }, { "epoch": 0.8389726766242319, "grad_norm": 344.5976867675781, "learning_rate": 8.485762417409488e-07, "loss": 25.5404, "step": 415320 }, { "epoch": 0.8389928772569157, "grad_norm": 92.3617935180664, "learning_rate": 8.483817036986169e-07, "loss": 13.2673, "step": 415330 }, { "epoch": 0.8390130778895996, "grad_norm": 186.29055786132812, "learning_rate": 8.481871858907703e-07, "loss": 17.2819, "step": 415340 }, { "epoch": 0.8390332785222833, "grad_norm": 197.59742736816406, "learning_rate": 8.479926883183559e-07, "loss": 16.5889, "step": 415350 }, { "epoch": 0.8390534791549671, "grad_norm": 293.3792724609375, "learning_rate": 8.477982109823202e-07, "loss": 19.6252, "step": 415360 }, { "epoch": 0.8390736797876509, "grad_norm": 46.032405853271484, "learning_rate": 8.476037538836134e-07, "loss": 12.6984, "step": 415370 }, { "epoch": 0.8390938804203347, "grad_norm": 282.4099426269531, "learning_rate": 8.474093170231828e-07, "loss": 14.5155, "step": 415380 }, { "epoch": 0.8391140810530185, "grad_norm": 382.4593505859375, "learning_rate": 8.472149004019742e-07, "loss": 13.1272, "step": 415390 }, { "epoch": 0.8391342816857024, "grad_norm": 308.9916076660156, "learning_rate": 8.470205040209362e-07, "loss": 18.197, "step": 415400 }, { "epoch": 0.8391544823183862, "grad_norm": 223.21714782714844, "learning_rate": 8.46826127881018e-07, "loss": 14.9543, "step": 415410 }, { "epoch": 0.83917468295107, "grad_norm": 302.3641357421875, "learning_rate": 8.466317719831657e-07, "loss": 19.7706, "step": 415420 }, { "epoch": 0.8391948835837538, "grad_norm": 128.34242248535156, "learning_rate": 8.464374363283245e-07, "loss": 18.8862, "step": 415430 }, { "epoch": 0.8392150842164376, "grad_norm": 233.16934204101562, "learning_rate": 8.462431209174454e-07, "loss": 14.2022, "step": 415440 }, { "epoch": 0.8392352848491215, "grad_norm": 568.4068603515625, "learning_rate": 8.460488257514731e-07, "loss": 34.3173, "step": 415450 }, { "epoch": 0.8392554854818053, "grad_norm": 279.92254638671875, "learning_rate": 8.458545508313543e-07, "loss": 9.9893, "step": 415460 }, { "epoch": 0.8392756861144891, "grad_norm": 207.1848907470703, "learning_rate": 8.456602961580374e-07, "loss": 16.2524, "step": 415470 }, { "epoch": 0.8392958867471729, "grad_norm": 461.9085693359375, "learning_rate": 8.454660617324672e-07, "loss": 23.8592, "step": 415480 }, { "epoch": 0.8393160873798567, "grad_norm": 37.96525192260742, "learning_rate": 8.452718475555927e-07, "loss": 14.8712, "step": 415490 }, { "epoch": 0.8393362880125406, "grad_norm": 97.9795913696289, "learning_rate": 8.450776536283594e-07, "loss": 12.6921, "step": 415500 }, { "epoch": 0.8393564886452244, "grad_norm": 219.89158630371094, "learning_rate": 8.448834799517125e-07, "loss": 19.5282, "step": 415510 }, { "epoch": 0.8393766892779082, "grad_norm": 116.95333099365234, "learning_rate": 8.446893265266005e-07, "loss": 13.982, "step": 415520 }, { "epoch": 0.839396889910592, "grad_norm": 93.02816009521484, "learning_rate": 8.444951933539691e-07, "loss": 15.5802, "step": 415530 }, { "epoch": 0.8394170905432758, "grad_norm": 312.5517578125, "learning_rate": 8.443010804347629e-07, "loss": 10.8696, "step": 415540 }, { "epoch": 0.8394372911759597, "grad_norm": 283.6369934082031, "learning_rate": 8.441069877699287e-07, "loss": 28.5093, "step": 415550 }, { "epoch": 0.8394574918086435, "grad_norm": 166.25437927246094, "learning_rate": 8.439129153604148e-07, "loss": 19.8871, "step": 415560 }, { "epoch": 0.8394776924413273, "grad_norm": 448.29290771484375, "learning_rate": 8.437188632071652e-07, "loss": 22.7707, "step": 415570 }, { "epoch": 0.8394978930740111, "grad_norm": 283.9698486328125, "learning_rate": 8.435248313111244e-07, "loss": 18.2204, "step": 415580 }, { "epoch": 0.839518093706695, "grad_norm": 172.68319702148438, "learning_rate": 8.433308196732403e-07, "loss": 11.5939, "step": 415590 }, { "epoch": 0.8395382943393787, "grad_norm": 0.0, "learning_rate": 8.431368282944585e-07, "loss": 20.33, "step": 415600 }, { "epoch": 0.8395584949720625, "grad_norm": 370.36212158203125, "learning_rate": 8.42942857175722e-07, "loss": 11.8863, "step": 415610 }, { "epoch": 0.8395786956047463, "grad_norm": 426.9788818359375, "learning_rate": 8.427489063179778e-07, "loss": 21.381, "step": 415620 }, { "epoch": 0.8395988962374301, "grad_norm": 306.0714416503906, "learning_rate": 8.425549757221734e-07, "loss": 21.8705, "step": 415630 }, { "epoch": 0.8396190968701139, "grad_norm": 201.43809509277344, "learning_rate": 8.423610653892494e-07, "loss": 19.7011, "step": 415640 }, { "epoch": 0.8396392975027978, "grad_norm": 45.57717514038086, "learning_rate": 8.421671753201538e-07, "loss": 16.8929, "step": 415650 }, { "epoch": 0.8396594981354816, "grad_norm": 223.25119018554688, "learning_rate": 8.419733055158319e-07, "loss": 12.5993, "step": 415660 }, { "epoch": 0.8396796987681654, "grad_norm": 414.7265625, "learning_rate": 8.41779455977228e-07, "loss": 14.2236, "step": 415670 }, { "epoch": 0.8396998994008492, "grad_norm": 250.3630828857422, "learning_rate": 8.415856267052852e-07, "loss": 13.0204, "step": 415680 }, { "epoch": 0.839720100033533, "grad_norm": 483.5674133300781, "learning_rate": 8.413918177009512e-07, "loss": 15.0705, "step": 415690 }, { "epoch": 0.8397403006662169, "grad_norm": 338.7718811035156, "learning_rate": 8.411980289651689e-07, "loss": 17.5926, "step": 415700 }, { "epoch": 0.8397605012989007, "grad_norm": 351.5898742675781, "learning_rate": 8.410042604988822e-07, "loss": 20.5201, "step": 415710 }, { "epoch": 0.8397807019315845, "grad_norm": 224.1457977294922, "learning_rate": 8.408105123030358e-07, "loss": 15.434, "step": 415720 }, { "epoch": 0.8398009025642683, "grad_norm": 662.4718017578125, "learning_rate": 8.406167843785762e-07, "loss": 18.361, "step": 415730 }, { "epoch": 0.8398211031969521, "grad_norm": 385.6331787109375, "learning_rate": 8.404230767264454e-07, "loss": 16.9584, "step": 415740 }, { "epoch": 0.839841303829636, "grad_norm": 178.1551513671875, "learning_rate": 8.402293893475872e-07, "loss": 29.176, "step": 415750 }, { "epoch": 0.8398615044623198, "grad_norm": 358.7070007324219, "learning_rate": 8.400357222429473e-07, "loss": 6.3388, "step": 415760 }, { "epoch": 0.8398817050950036, "grad_norm": 246.84339904785156, "learning_rate": 8.39842075413469e-07, "loss": 20.3625, "step": 415770 }, { "epoch": 0.8399019057276874, "grad_norm": 753.1893920898438, "learning_rate": 8.396484488600948e-07, "loss": 27.4793, "step": 415780 }, { "epoch": 0.8399221063603712, "grad_norm": 13.945518493652344, "learning_rate": 8.394548425837706e-07, "loss": 12.5758, "step": 415790 }, { "epoch": 0.839942306993055, "grad_norm": 257.33404541015625, "learning_rate": 8.392612565854374e-07, "loss": 17.9956, "step": 415800 }, { "epoch": 0.8399625076257389, "grad_norm": 254.9739532470703, "learning_rate": 8.390676908660417e-07, "loss": 21.9159, "step": 415810 }, { "epoch": 0.8399827082584227, "grad_norm": 163.40708923339844, "learning_rate": 8.388741454265254e-07, "loss": 29.7866, "step": 415820 }, { "epoch": 0.8400029088911065, "grad_norm": 852.9907836914062, "learning_rate": 8.386806202678305e-07, "loss": 34.6078, "step": 415830 }, { "epoch": 0.8400231095237903, "grad_norm": 286.45904541015625, "learning_rate": 8.384871153909025e-07, "loss": 30.0547, "step": 415840 }, { "epoch": 0.8400433101564742, "grad_norm": 600.2648315429688, "learning_rate": 8.382936307966838e-07, "loss": 14.7042, "step": 415850 }, { "epoch": 0.8400635107891579, "grad_norm": 238.147216796875, "learning_rate": 8.381001664861161e-07, "loss": 17.0011, "step": 415860 }, { "epoch": 0.8400837114218417, "grad_norm": 267.9955139160156, "learning_rate": 8.379067224601433e-07, "loss": 22.0742, "step": 415870 }, { "epoch": 0.8401039120545255, "grad_norm": 527.9053955078125, "learning_rate": 8.3771329871971e-07, "loss": 26.9575, "step": 415880 }, { "epoch": 0.8401241126872093, "grad_norm": 2.894015312194824, "learning_rate": 8.375198952657565e-07, "loss": 16.206, "step": 415890 }, { "epoch": 0.8401443133198931, "grad_norm": 166.35169982910156, "learning_rate": 8.373265120992252e-07, "loss": 16.0475, "step": 415900 }, { "epoch": 0.840164513952577, "grad_norm": 143.21441650390625, "learning_rate": 8.371331492210611e-07, "loss": 25.259, "step": 415910 }, { "epoch": 0.8401847145852608, "grad_norm": 345.2638854980469, "learning_rate": 8.369398066322049e-07, "loss": 18.6365, "step": 415920 }, { "epoch": 0.8402049152179446, "grad_norm": 215.2998046875, "learning_rate": 8.367464843335981e-07, "loss": 18.198, "step": 415930 }, { "epoch": 0.8402251158506284, "grad_norm": 277.3001403808594, "learning_rate": 8.365531823261841e-07, "loss": 18.4931, "step": 415940 }, { "epoch": 0.8402453164833122, "grad_norm": 784.74462890625, "learning_rate": 8.363599006109057e-07, "loss": 27.0408, "step": 415950 }, { "epoch": 0.8402655171159961, "grad_norm": 169.82662963867188, "learning_rate": 8.361666391887047e-07, "loss": 16.422, "step": 415960 }, { "epoch": 0.8402857177486799, "grad_norm": 569.0744018554688, "learning_rate": 8.359733980605211e-07, "loss": 16.4352, "step": 415970 }, { "epoch": 0.8403059183813637, "grad_norm": 154.89569091796875, "learning_rate": 8.357801772272988e-07, "loss": 17.9488, "step": 415980 }, { "epoch": 0.8403261190140475, "grad_norm": 161.10255432128906, "learning_rate": 8.355869766899793e-07, "loss": 22.8508, "step": 415990 }, { "epoch": 0.8403463196467313, "grad_norm": 581.5272216796875, "learning_rate": 8.353937964495029e-07, "loss": 16.3175, "step": 416000 }, { "epoch": 0.8403665202794152, "grad_norm": 272.4446716308594, "learning_rate": 8.352006365068116e-07, "loss": 19.4851, "step": 416010 }, { "epoch": 0.840386720912099, "grad_norm": 390.79071044921875, "learning_rate": 8.350074968628486e-07, "loss": 11.443, "step": 416020 }, { "epoch": 0.8404069215447828, "grad_norm": 901.3424072265625, "learning_rate": 8.348143775185536e-07, "loss": 29.6974, "step": 416030 }, { "epoch": 0.8404271221774666, "grad_norm": 210.54656982421875, "learning_rate": 8.346212784748676e-07, "loss": 9.7308, "step": 416040 }, { "epoch": 0.8404473228101504, "grad_norm": 269.81658935546875, "learning_rate": 8.344281997327331e-07, "loss": 16.9395, "step": 416050 }, { "epoch": 0.8404675234428343, "grad_norm": 235.6056365966797, "learning_rate": 8.342351412930899e-07, "loss": 19.3126, "step": 416060 }, { "epoch": 0.8404877240755181, "grad_norm": 31.081138610839844, "learning_rate": 8.340421031568791e-07, "loss": 22.9181, "step": 416070 }, { "epoch": 0.8405079247082019, "grad_norm": 154.01858520507812, "learning_rate": 8.338490853250425e-07, "loss": 17.7353, "step": 416080 }, { "epoch": 0.8405281253408857, "grad_norm": 234.2892608642578, "learning_rate": 8.336560877985189e-07, "loss": 18.4745, "step": 416090 }, { "epoch": 0.8405483259735695, "grad_norm": 13.171346664428711, "learning_rate": 8.334631105782515e-07, "loss": 20.1426, "step": 416100 }, { "epoch": 0.8405685266062533, "grad_norm": 153.8193817138672, "learning_rate": 8.332701536651794e-07, "loss": 11.9751, "step": 416110 }, { "epoch": 0.8405887272389371, "grad_norm": 179.3269500732422, "learning_rate": 8.330772170602424e-07, "loss": 18.4301, "step": 416120 }, { "epoch": 0.8406089278716209, "grad_norm": 227.1163330078125, "learning_rate": 8.328843007643828e-07, "loss": 8.1332, "step": 416130 }, { "epoch": 0.8406291285043047, "grad_norm": 0.0, "learning_rate": 8.326914047785395e-07, "loss": 12.582, "step": 416140 }, { "epoch": 0.8406493291369885, "grad_norm": 456.7984313964844, "learning_rate": 8.324985291036513e-07, "loss": 22.064, "step": 416150 }, { "epoch": 0.8406695297696724, "grad_norm": 228.72694396972656, "learning_rate": 8.323056737406604e-07, "loss": 26.7164, "step": 416160 }, { "epoch": 0.8406897304023562, "grad_norm": 339.3033142089844, "learning_rate": 8.32112838690507e-07, "loss": 38.1767, "step": 416170 }, { "epoch": 0.84070993103504, "grad_norm": 330.8023376464844, "learning_rate": 8.319200239541303e-07, "loss": 15.1, "step": 416180 }, { "epoch": 0.8407301316677238, "grad_norm": 292.2325439453125, "learning_rate": 8.317272295324691e-07, "loss": 26.432, "step": 416190 }, { "epoch": 0.8407503323004076, "grad_norm": 106.44873809814453, "learning_rate": 8.315344554264643e-07, "loss": 9.5934, "step": 416200 }, { "epoch": 0.8407705329330915, "grad_norm": 245.3540496826172, "learning_rate": 8.313417016370557e-07, "loss": 12.6829, "step": 416210 }, { "epoch": 0.8407907335657753, "grad_norm": 538.5496826171875, "learning_rate": 8.311489681651803e-07, "loss": 21.0759, "step": 416220 }, { "epoch": 0.8408109341984591, "grad_norm": 218.37852478027344, "learning_rate": 8.309562550117789e-07, "loss": 16.8421, "step": 416230 }, { "epoch": 0.8408311348311429, "grad_norm": 79.12779998779297, "learning_rate": 8.307635621777943e-07, "loss": 16.9748, "step": 416240 }, { "epoch": 0.8408513354638267, "grad_norm": 515.6277465820312, "learning_rate": 8.305708896641596e-07, "loss": 24.2234, "step": 416250 }, { "epoch": 0.8408715360965106, "grad_norm": 437.4981689453125, "learning_rate": 8.303782374718167e-07, "loss": 18.3015, "step": 416260 }, { "epoch": 0.8408917367291944, "grad_norm": 342.31292724609375, "learning_rate": 8.30185605601706e-07, "loss": 15.7057, "step": 416270 }, { "epoch": 0.8409119373618782, "grad_norm": 495.4305114746094, "learning_rate": 8.299929940547646e-07, "loss": 22.288, "step": 416280 }, { "epoch": 0.840932137994562, "grad_norm": 182.36294555664062, "learning_rate": 8.298004028319306e-07, "loss": 19.4468, "step": 416290 }, { "epoch": 0.8409523386272458, "grad_norm": 286.6905822753906, "learning_rate": 8.296078319341444e-07, "loss": 24.5549, "step": 416300 }, { "epoch": 0.8409725392599297, "grad_norm": 277.4048156738281, "learning_rate": 8.294152813623446e-07, "loss": 20.0957, "step": 416310 }, { "epoch": 0.8409927398926135, "grad_norm": 1208.8223876953125, "learning_rate": 8.292227511174671e-07, "loss": 26.8361, "step": 416320 }, { "epoch": 0.8410129405252973, "grad_norm": 22.846601486206055, "learning_rate": 8.29030241200452e-07, "loss": 14.6221, "step": 416330 }, { "epoch": 0.8410331411579811, "grad_norm": 220.26394653320312, "learning_rate": 8.288377516122393e-07, "loss": 23.0588, "step": 416340 }, { "epoch": 0.8410533417906649, "grad_norm": 250.53338623046875, "learning_rate": 8.286452823537649e-07, "loss": 13.2585, "step": 416350 }, { "epoch": 0.8410735424233488, "grad_norm": 5.4355974197387695, "learning_rate": 8.284528334259667e-07, "loss": 14.0041, "step": 416360 }, { "epoch": 0.8410937430560325, "grad_norm": 167.6811981201172, "learning_rate": 8.282604048297848e-07, "loss": 11.6128, "step": 416370 }, { "epoch": 0.8411139436887163, "grad_norm": 443.85406494140625, "learning_rate": 8.280679965661554e-07, "loss": 17.165, "step": 416380 }, { "epoch": 0.8411341443214001, "grad_norm": 398.3467102050781, "learning_rate": 8.278756086360157e-07, "loss": 17.1773, "step": 416390 }, { "epoch": 0.8411543449540839, "grad_norm": 225.92886352539062, "learning_rate": 8.276832410403051e-07, "loss": 20.6166, "step": 416400 }, { "epoch": 0.8411745455867677, "grad_norm": 569.8917846679688, "learning_rate": 8.274908937799592e-07, "loss": 19.1262, "step": 416410 }, { "epoch": 0.8411947462194516, "grad_norm": 567.8353881835938, "learning_rate": 8.27298566855918e-07, "loss": 19.2651, "step": 416420 }, { "epoch": 0.8412149468521354, "grad_norm": 211.9987030029297, "learning_rate": 8.271062602691171e-07, "loss": 41.0871, "step": 416430 }, { "epoch": 0.8412351474848192, "grad_norm": 381.0174560546875, "learning_rate": 8.269139740204935e-07, "loss": 19.9504, "step": 416440 }, { "epoch": 0.841255348117503, "grad_norm": 180.1284637451172, "learning_rate": 8.267217081109863e-07, "loss": 23.8699, "step": 416450 }, { "epoch": 0.8412755487501868, "grad_norm": 210.49949645996094, "learning_rate": 8.265294625415299e-07, "loss": 13.3772, "step": 416460 }, { "epoch": 0.8412957493828707, "grad_norm": 171.65863037109375, "learning_rate": 8.263372373130635e-07, "loss": 22.0054, "step": 416470 }, { "epoch": 0.8413159500155545, "grad_norm": 72.53048706054688, "learning_rate": 8.261450324265225e-07, "loss": 14.0243, "step": 416480 }, { "epoch": 0.8413361506482383, "grad_norm": 131.6386260986328, "learning_rate": 8.259528478828455e-07, "loss": 24.0992, "step": 416490 }, { "epoch": 0.8413563512809221, "grad_norm": 328.5111083984375, "learning_rate": 8.25760683682968e-07, "loss": 28.6846, "step": 416500 }, { "epoch": 0.8413765519136059, "grad_norm": 508.3952941894531, "learning_rate": 8.255685398278257e-07, "loss": 22.9963, "step": 416510 }, { "epoch": 0.8413967525462898, "grad_norm": 449.0733642578125, "learning_rate": 8.25376416318357e-07, "loss": 16.2011, "step": 416520 }, { "epoch": 0.8414169531789736, "grad_norm": 0.0, "learning_rate": 8.25184313155497e-07, "loss": 11.8838, "step": 416530 }, { "epoch": 0.8414371538116574, "grad_norm": 632.0184936523438, "learning_rate": 8.249922303401814e-07, "loss": 14.1475, "step": 416540 }, { "epoch": 0.8414573544443412, "grad_norm": 464.84503173828125, "learning_rate": 8.248001678733475e-07, "loss": 20.4043, "step": 416550 }, { "epoch": 0.841477555077025, "grad_norm": 279.06195068359375, "learning_rate": 8.246081257559324e-07, "loss": 28.984, "step": 416560 }, { "epoch": 0.8414977557097089, "grad_norm": 236.7400665283203, "learning_rate": 8.244161039888709e-07, "loss": 10.4742, "step": 416570 }, { "epoch": 0.8415179563423927, "grad_norm": 180.23191833496094, "learning_rate": 8.242241025730974e-07, "loss": 20.2387, "step": 416580 }, { "epoch": 0.8415381569750765, "grad_norm": 212.406494140625, "learning_rate": 8.240321215095504e-07, "loss": 26.1684, "step": 416590 }, { "epoch": 0.8415583576077603, "grad_norm": 135.87879943847656, "learning_rate": 8.238401607991647e-07, "loss": 21.6275, "step": 416600 }, { "epoch": 0.8415785582404441, "grad_norm": 352.84722900390625, "learning_rate": 8.236482204428737e-07, "loss": 14.9866, "step": 416610 }, { "epoch": 0.841598758873128, "grad_norm": 362.3521423339844, "learning_rate": 8.234563004416151e-07, "loss": 7.3022, "step": 416620 }, { "epoch": 0.8416189595058117, "grad_norm": 462.068115234375, "learning_rate": 8.232644007963253e-07, "loss": 39.4302, "step": 416630 }, { "epoch": 0.8416391601384955, "grad_norm": 511.39337158203125, "learning_rate": 8.230725215079383e-07, "loss": 18.1727, "step": 416640 }, { "epoch": 0.8416593607711793, "grad_norm": 340.6414489746094, "learning_rate": 8.228806625773878e-07, "loss": 13.1622, "step": 416650 }, { "epoch": 0.8416795614038631, "grad_norm": 159.79379272460938, "learning_rate": 8.226888240056114e-07, "loss": 13.4764, "step": 416660 }, { "epoch": 0.841699762036547, "grad_norm": 489.9790344238281, "learning_rate": 8.224970057935433e-07, "loss": 13.4472, "step": 416670 }, { "epoch": 0.8417199626692308, "grad_norm": 325.3725891113281, "learning_rate": 8.223052079421167e-07, "loss": 19.6244, "step": 416680 }, { "epoch": 0.8417401633019146, "grad_norm": 544.5877075195312, "learning_rate": 8.221134304522694e-07, "loss": 19.5634, "step": 416690 }, { "epoch": 0.8417603639345984, "grad_norm": 313.67694091796875, "learning_rate": 8.21921673324933e-07, "loss": 12.8849, "step": 416700 }, { "epoch": 0.8417805645672822, "grad_norm": 389.61370849609375, "learning_rate": 8.217299365610448e-07, "loss": 16.3484, "step": 416710 }, { "epoch": 0.841800765199966, "grad_norm": 189.2657012939453, "learning_rate": 8.215382201615379e-07, "loss": 17.73, "step": 416720 }, { "epoch": 0.8418209658326499, "grad_norm": 297.194580078125, "learning_rate": 8.213465241273461e-07, "loss": 8.3405, "step": 416730 }, { "epoch": 0.8418411664653337, "grad_norm": 161.0182342529297, "learning_rate": 8.211548484594057e-07, "loss": 20.1885, "step": 416740 }, { "epoch": 0.8418613670980175, "grad_norm": 270.6279602050781, "learning_rate": 8.209631931586499e-07, "loss": 15.3418, "step": 416750 }, { "epoch": 0.8418815677307013, "grad_norm": 131.4957733154297, "learning_rate": 8.207715582260112e-07, "loss": 26.1803, "step": 416760 }, { "epoch": 0.8419017683633852, "grad_norm": 190.56053161621094, "learning_rate": 8.205799436624251e-07, "loss": 9.7997, "step": 416770 }, { "epoch": 0.841921968996069, "grad_norm": 574.0950927734375, "learning_rate": 8.203883494688264e-07, "loss": 31.8117, "step": 416780 }, { "epoch": 0.8419421696287528, "grad_norm": 332.5416564941406, "learning_rate": 8.201967756461482e-07, "loss": 14.1505, "step": 416790 }, { "epoch": 0.8419623702614366, "grad_norm": 278.66064453125, "learning_rate": 8.200052221953231e-07, "loss": 18.9455, "step": 416800 }, { "epoch": 0.8419825708941204, "grad_norm": 108.77664184570312, "learning_rate": 8.198136891172864e-07, "loss": 18.3172, "step": 416810 }, { "epoch": 0.8420027715268043, "grad_norm": 244.78622436523438, "learning_rate": 8.196221764129708e-07, "loss": 18.8413, "step": 416820 }, { "epoch": 0.8420229721594881, "grad_norm": 416.7197570800781, "learning_rate": 8.194306840833083e-07, "loss": 30.0423, "step": 416830 }, { "epoch": 0.8420431727921719, "grad_norm": 512.8495483398438, "learning_rate": 8.192392121292336e-07, "loss": 26.7855, "step": 416840 }, { "epoch": 0.8420633734248557, "grad_norm": 115.77965545654297, "learning_rate": 8.190477605516828e-07, "loss": 18.4727, "step": 416850 }, { "epoch": 0.8420835740575395, "grad_norm": 264.286376953125, "learning_rate": 8.188563293515834e-07, "loss": 31.2557, "step": 416860 }, { "epoch": 0.8421037746902234, "grad_norm": 257.04852294921875, "learning_rate": 8.186649185298712e-07, "loss": 13.0479, "step": 416870 }, { "epoch": 0.8421239753229071, "grad_norm": 536.2850952148438, "learning_rate": 8.184735280874801e-07, "loss": 19.4256, "step": 416880 }, { "epoch": 0.8421441759555909, "grad_norm": 475.94158935546875, "learning_rate": 8.182821580253425e-07, "loss": 19.7305, "step": 416890 }, { "epoch": 0.8421643765882747, "grad_norm": 97.86799621582031, "learning_rate": 8.180908083443884e-07, "loss": 14.897, "step": 416900 }, { "epoch": 0.8421845772209585, "grad_norm": 375.2889404296875, "learning_rate": 8.178994790455541e-07, "loss": 28.2616, "step": 416910 }, { "epoch": 0.8422047778536423, "grad_norm": 91.23908233642578, "learning_rate": 8.177081701297706e-07, "loss": 18.147, "step": 416920 }, { "epoch": 0.8422249784863262, "grad_norm": 347.74603271484375, "learning_rate": 8.175168815979689e-07, "loss": 15.7763, "step": 416930 }, { "epoch": 0.84224517911901, "grad_norm": 365.4523010253906, "learning_rate": 8.173256134510827e-07, "loss": 13.617, "step": 416940 }, { "epoch": 0.8422653797516938, "grad_norm": 525.0000610351562, "learning_rate": 8.171343656900455e-07, "loss": 19.2889, "step": 416950 }, { "epoch": 0.8422855803843776, "grad_norm": 187.98133850097656, "learning_rate": 8.169431383157877e-07, "loss": 14.0866, "step": 416960 }, { "epoch": 0.8423057810170614, "grad_norm": 91.7159652709961, "learning_rate": 8.16751931329241e-07, "loss": 18.1176, "step": 416970 }, { "epoch": 0.8423259816497453, "grad_norm": 152.12509155273438, "learning_rate": 8.16560744731339e-07, "loss": 10.5476, "step": 416980 }, { "epoch": 0.8423461822824291, "grad_norm": 169.0170440673828, "learning_rate": 8.163695785230125e-07, "loss": 22.6367, "step": 416990 }, { "epoch": 0.8423663829151129, "grad_norm": 200.45126342773438, "learning_rate": 8.161784327051919e-07, "loss": 13.8543, "step": 417000 }, { "epoch": 0.8423865835477967, "grad_norm": 29.255558013916016, "learning_rate": 8.159873072788116e-07, "loss": 10.6084, "step": 417010 }, { "epoch": 0.8424067841804805, "grad_norm": 365.3880310058594, "learning_rate": 8.157962022448001e-07, "loss": 11.9844, "step": 417020 }, { "epoch": 0.8424269848131644, "grad_norm": 928.1983032226562, "learning_rate": 8.156051176040919e-07, "loss": 18.4349, "step": 417030 }, { "epoch": 0.8424471854458482, "grad_norm": 309.2545471191406, "learning_rate": 8.154140533576171e-07, "loss": 17.2742, "step": 417040 }, { "epoch": 0.842467386078532, "grad_norm": 175.72640991210938, "learning_rate": 8.152230095063051e-07, "loss": 24.0631, "step": 417050 }, { "epoch": 0.8424875867112158, "grad_norm": 211.6483917236328, "learning_rate": 8.150319860510903e-07, "loss": 13.3728, "step": 417060 }, { "epoch": 0.8425077873438996, "grad_norm": 254.79193115234375, "learning_rate": 8.148409829929005e-07, "loss": 10.8926, "step": 417070 }, { "epoch": 0.8425279879765835, "grad_norm": 108.91744995117188, "learning_rate": 8.14650000332669e-07, "loss": 8.3972, "step": 417080 }, { "epoch": 0.8425481886092673, "grad_norm": 537.0311889648438, "learning_rate": 8.144590380713252e-07, "loss": 13.1333, "step": 417090 }, { "epoch": 0.8425683892419511, "grad_norm": 222.94715881347656, "learning_rate": 8.142680962098016e-07, "loss": 14.637, "step": 417100 }, { "epoch": 0.8425885898746349, "grad_norm": 24.780733108520508, "learning_rate": 8.140771747490273e-07, "loss": 12.7297, "step": 417110 }, { "epoch": 0.8426087905073187, "grad_norm": 353.41839599609375, "learning_rate": 8.138862736899317e-07, "loss": 17.5354, "step": 417120 }, { "epoch": 0.8426289911400026, "grad_norm": 425.8451232910156, "learning_rate": 8.136953930334484e-07, "loss": 12.9214, "step": 417130 }, { "epoch": 0.8426491917726863, "grad_norm": 220.97088623046875, "learning_rate": 8.135045327805058e-07, "loss": 17.9575, "step": 417140 }, { "epoch": 0.8426693924053701, "grad_norm": 69.35327911376953, "learning_rate": 8.133136929320329e-07, "loss": 20.5659, "step": 417150 }, { "epoch": 0.8426895930380539, "grad_norm": 77.07563781738281, "learning_rate": 8.131228734889618e-07, "loss": 10.4113, "step": 417160 }, { "epoch": 0.8427097936707377, "grad_norm": 316.2534484863281, "learning_rate": 8.12932074452224e-07, "loss": 14.3827, "step": 417170 }, { "epoch": 0.8427299943034215, "grad_norm": 328.6934509277344, "learning_rate": 8.127412958227454e-07, "loss": 16.7839, "step": 417180 }, { "epoch": 0.8427501949361054, "grad_norm": 2.4409639835357666, "learning_rate": 8.125505376014576e-07, "loss": 18.3461, "step": 417190 }, { "epoch": 0.8427703955687892, "grad_norm": 96.01769256591797, "learning_rate": 8.123597997892918e-07, "loss": 20.7124, "step": 417200 }, { "epoch": 0.842790596201473, "grad_norm": 184.1000213623047, "learning_rate": 8.121690823871764e-07, "loss": 19.9601, "step": 417210 }, { "epoch": 0.8428107968341568, "grad_norm": 353.1975402832031, "learning_rate": 8.119783853960401e-07, "loss": 25.1228, "step": 417220 }, { "epoch": 0.8428309974668406, "grad_norm": 64.52377319335938, "learning_rate": 8.11787708816813e-07, "loss": 19.1177, "step": 417230 }, { "epoch": 0.8428511980995245, "grad_norm": 356.2233581542969, "learning_rate": 8.115970526504258e-07, "loss": 17.035, "step": 417240 }, { "epoch": 0.8428713987322083, "grad_norm": 226.4406280517578, "learning_rate": 8.114064168978064e-07, "loss": 12.2009, "step": 417250 }, { "epoch": 0.8428915993648921, "grad_norm": 537.150634765625, "learning_rate": 8.112158015598832e-07, "loss": 20.0987, "step": 417260 }, { "epoch": 0.8429117999975759, "grad_norm": 41.989990234375, "learning_rate": 8.110252066375873e-07, "loss": 11.7688, "step": 417270 }, { "epoch": 0.8429320006302597, "grad_norm": 112.29536437988281, "learning_rate": 8.108346321318467e-07, "loss": 11.3195, "step": 417280 }, { "epoch": 0.8429522012629436, "grad_norm": 548.8794555664062, "learning_rate": 8.106440780435881e-07, "loss": 13.095, "step": 417290 }, { "epoch": 0.8429724018956274, "grad_norm": 95.47740936279297, "learning_rate": 8.104535443737438e-07, "loss": 16.2874, "step": 417300 }, { "epoch": 0.8429926025283112, "grad_norm": 261.355712890625, "learning_rate": 8.102630311232395e-07, "loss": 20.4205, "step": 417310 }, { "epoch": 0.843012803160995, "grad_norm": 454.4987487792969, "learning_rate": 8.100725382930064e-07, "loss": 21.5333, "step": 417320 }, { "epoch": 0.8430330037936788, "grad_norm": 557.5668334960938, "learning_rate": 8.098820658839718e-07, "loss": 19.7518, "step": 417330 }, { "epoch": 0.8430532044263627, "grad_norm": 280.5194091796875, "learning_rate": 8.096916138970623e-07, "loss": 18.4529, "step": 417340 }, { "epoch": 0.8430734050590465, "grad_norm": 408.3387451171875, "learning_rate": 8.095011823332089e-07, "loss": 17.602, "step": 417350 }, { "epoch": 0.8430936056917303, "grad_norm": 158.57940673828125, "learning_rate": 8.093107711933385e-07, "loss": 6.591, "step": 417360 }, { "epoch": 0.8431138063244141, "grad_norm": 311.4345397949219, "learning_rate": 8.091203804783776e-07, "loss": 15.3923, "step": 417370 }, { "epoch": 0.843134006957098, "grad_norm": 75.57437896728516, "learning_rate": 8.089300101892561e-07, "loss": 14.1673, "step": 417380 }, { "epoch": 0.8431542075897817, "grad_norm": 204.05343627929688, "learning_rate": 8.087396603269027e-07, "loss": 10.9325, "step": 417390 }, { "epoch": 0.8431744082224655, "grad_norm": 126.0959243774414, "learning_rate": 8.085493308922432e-07, "loss": 15.9081, "step": 417400 }, { "epoch": 0.8431946088551493, "grad_norm": 246.98411560058594, "learning_rate": 8.083590218862053e-07, "loss": 20.4266, "step": 417410 }, { "epoch": 0.8432148094878331, "grad_norm": 327.4219055175781, "learning_rate": 8.081687333097183e-07, "loss": 22.7116, "step": 417420 }, { "epoch": 0.8432350101205169, "grad_norm": 225.0398712158203, "learning_rate": 8.079784651637084e-07, "loss": 15.1217, "step": 417430 }, { "epoch": 0.8432552107532008, "grad_norm": 307.7953186035156, "learning_rate": 8.077882174491014e-07, "loss": 16.109, "step": 417440 }, { "epoch": 0.8432754113858846, "grad_norm": 366.5951232910156, "learning_rate": 8.075979901668269e-07, "loss": 23.2215, "step": 417450 }, { "epoch": 0.8432956120185684, "grad_norm": 337.32269287109375, "learning_rate": 8.074077833178135e-07, "loss": 13.607, "step": 417460 }, { "epoch": 0.8433158126512522, "grad_norm": 87.312744140625, "learning_rate": 8.072175969029832e-07, "loss": 10.297, "step": 417470 }, { "epoch": 0.843336013283936, "grad_norm": 373.5894470214844, "learning_rate": 8.070274309232662e-07, "loss": 16.6932, "step": 417480 }, { "epoch": 0.8433562139166199, "grad_norm": 14.395997047424316, "learning_rate": 8.068372853795903e-07, "loss": 10.8317, "step": 417490 }, { "epoch": 0.8433764145493037, "grad_norm": 174.0565643310547, "learning_rate": 8.066471602728804e-07, "loss": 13.9431, "step": 417500 }, { "epoch": 0.8433966151819875, "grad_norm": 0.0, "learning_rate": 8.064570556040629e-07, "loss": 7.3829, "step": 417510 }, { "epoch": 0.8434168158146713, "grad_norm": 473.8428039550781, "learning_rate": 8.06266971374065e-07, "loss": 69.2349, "step": 417520 }, { "epoch": 0.8434370164473551, "grad_norm": 243.30958557128906, "learning_rate": 8.060769075838154e-07, "loss": 21.3427, "step": 417530 }, { "epoch": 0.843457217080039, "grad_norm": 319.12017822265625, "learning_rate": 8.058868642342366e-07, "loss": 22.7645, "step": 417540 }, { "epoch": 0.8434774177127228, "grad_norm": 354.8446960449219, "learning_rate": 8.056968413262555e-07, "loss": 19.6838, "step": 417550 }, { "epoch": 0.8434976183454066, "grad_norm": 450.49285888671875, "learning_rate": 8.055068388608011e-07, "loss": 18.4876, "step": 417560 }, { "epoch": 0.8435178189780904, "grad_norm": 0.0, "learning_rate": 8.053168568387976e-07, "loss": 19.2499, "step": 417570 }, { "epoch": 0.8435380196107742, "grad_norm": 241.9593048095703, "learning_rate": 8.051268952611696e-07, "loss": 19.4098, "step": 417580 }, { "epoch": 0.8435582202434581, "grad_norm": 178.7821807861328, "learning_rate": 8.04936954128846e-07, "loss": 24.0981, "step": 417590 }, { "epoch": 0.8435784208761419, "grad_norm": 286.7995910644531, "learning_rate": 8.047470334427504e-07, "loss": 24.2255, "step": 417600 }, { "epoch": 0.8435986215088257, "grad_norm": 181.5628662109375, "learning_rate": 8.045571332038082e-07, "loss": 13.613, "step": 417610 }, { "epoch": 0.8436188221415095, "grad_norm": 119.30364990234375, "learning_rate": 8.043672534129465e-07, "loss": 23.6132, "step": 417620 }, { "epoch": 0.8436390227741933, "grad_norm": 386.001953125, "learning_rate": 8.041773940710884e-07, "loss": 19.0254, "step": 417630 }, { "epoch": 0.8436592234068772, "grad_norm": 375.1712951660156, "learning_rate": 8.039875551791626e-07, "loss": 19.5697, "step": 417640 }, { "epoch": 0.8436794240395609, "grad_norm": 35.66468811035156, "learning_rate": 8.037977367380922e-07, "loss": 15.7012, "step": 417650 }, { "epoch": 0.8436996246722447, "grad_norm": 73.52401733398438, "learning_rate": 8.036079387488016e-07, "loss": 10.4436, "step": 417660 }, { "epoch": 0.8437198253049285, "grad_norm": 170.89988708496094, "learning_rate": 8.034181612122183e-07, "loss": 19.2288, "step": 417670 }, { "epoch": 0.8437400259376123, "grad_norm": 384.93896484375, "learning_rate": 8.032284041292649e-07, "loss": 22.6525, "step": 417680 }, { "epoch": 0.8437602265702961, "grad_norm": 502.8800354003906, "learning_rate": 8.030386675008678e-07, "loss": 23.0976, "step": 417690 }, { "epoch": 0.84378042720298, "grad_norm": 442.6712341308594, "learning_rate": 8.028489513279503e-07, "loss": 23.3129, "step": 417700 }, { "epoch": 0.8438006278356638, "grad_norm": 649.2350463867188, "learning_rate": 8.026592556114393e-07, "loss": 31.9002, "step": 417710 }, { "epoch": 0.8438208284683476, "grad_norm": 266.51507568359375, "learning_rate": 8.02469580352258e-07, "loss": 19.7998, "step": 417720 }, { "epoch": 0.8438410291010314, "grad_norm": 205.36984252929688, "learning_rate": 8.022799255513297e-07, "loss": 15.3339, "step": 417730 }, { "epoch": 0.8438612297337152, "grad_norm": 111.22181701660156, "learning_rate": 8.020902912095807e-07, "loss": 10.6801, "step": 417740 }, { "epoch": 0.8438814303663991, "grad_norm": 391.03997802734375, "learning_rate": 8.019006773279348e-07, "loss": 23.1067, "step": 417750 }, { "epoch": 0.8439016309990829, "grad_norm": 225.76760864257812, "learning_rate": 8.01711083907315e-07, "loss": 10.5187, "step": 417760 }, { "epoch": 0.8439218316317667, "grad_norm": 521.1165771484375, "learning_rate": 8.015215109486457e-07, "loss": 13.9432, "step": 417770 }, { "epoch": 0.8439420322644505, "grad_norm": 309.98321533203125, "learning_rate": 8.013319584528539e-07, "loss": 25.0889, "step": 417780 }, { "epoch": 0.8439622328971343, "grad_norm": 266.58740234375, "learning_rate": 8.011424264208584e-07, "loss": 13.1176, "step": 417790 }, { "epoch": 0.8439824335298182, "grad_norm": 11.70690631866455, "learning_rate": 8.009529148535855e-07, "loss": 25.3439, "step": 417800 }, { "epoch": 0.844002634162502, "grad_norm": 301.5768737792969, "learning_rate": 8.007634237519595e-07, "loss": 20.4889, "step": 417810 }, { "epoch": 0.8440228347951858, "grad_norm": 507.21112060546875, "learning_rate": 8.005739531169044e-07, "loss": 15.694, "step": 417820 }, { "epoch": 0.8440430354278696, "grad_norm": 303.1771545410156, "learning_rate": 8.003845029493407e-07, "loss": 12.5995, "step": 417830 }, { "epoch": 0.8440632360605534, "grad_norm": 370.5364685058594, "learning_rate": 8.001950732501934e-07, "loss": 13.2588, "step": 417840 }, { "epoch": 0.8440834366932373, "grad_norm": 0.0, "learning_rate": 8.000056640203885e-07, "loss": 16.1487, "step": 417850 }, { "epoch": 0.8441036373259211, "grad_norm": 304.07415771484375, "learning_rate": 7.99816275260844e-07, "loss": 22.8371, "step": 417860 }, { "epoch": 0.8441238379586049, "grad_norm": 213.01974487304688, "learning_rate": 7.996269069724861e-07, "loss": 19.4586, "step": 417870 }, { "epoch": 0.8441440385912887, "grad_norm": 258.6704406738281, "learning_rate": 7.994375591562376e-07, "loss": 11.2081, "step": 417880 }, { "epoch": 0.8441642392239725, "grad_norm": 69.58675384521484, "learning_rate": 7.992482318130218e-07, "loss": 13.2909, "step": 417890 }, { "epoch": 0.8441844398566564, "grad_norm": 530.9094848632812, "learning_rate": 7.990589249437591e-07, "loss": 15.7657, "step": 417900 }, { "epoch": 0.8442046404893401, "grad_norm": 223.39622497558594, "learning_rate": 7.988696385493744e-07, "loss": 15.9642, "step": 417910 }, { "epoch": 0.8442248411220239, "grad_norm": 192.00222778320312, "learning_rate": 7.986803726307901e-07, "loss": 15.281, "step": 417920 }, { "epoch": 0.8442450417547077, "grad_norm": 422.16455078125, "learning_rate": 7.984911271889267e-07, "loss": 19.39, "step": 417930 }, { "epoch": 0.8442652423873915, "grad_norm": 266.6922607421875, "learning_rate": 7.983019022247096e-07, "loss": 10.9239, "step": 417940 }, { "epoch": 0.8442854430200754, "grad_norm": 528.60498046875, "learning_rate": 7.98112697739058e-07, "loss": 21.9627, "step": 417950 }, { "epoch": 0.8443056436527592, "grad_norm": 54.88615417480469, "learning_rate": 7.979235137328961e-07, "loss": 7.2034, "step": 417960 }, { "epoch": 0.844325844285443, "grad_norm": 28.351037979125977, "learning_rate": 7.97734350207145e-07, "loss": 18.4054, "step": 417970 }, { "epoch": 0.8443460449181268, "grad_norm": 221.80360412597656, "learning_rate": 7.975452071627277e-07, "loss": 16.6061, "step": 417980 }, { "epoch": 0.8443662455508106, "grad_norm": 33.2858772277832, "learning_rate": 7.973560846005646e-07, "loss": 37.2365, "step": 417990 }, { "epoch": 0.8443864461834945, "grad_norm": 230.93865966796875, "learning_rate": 7.971669825215789e-07, "loss": 11.9403, "step": 418000 }, { "epoch": 0.8444066468161783, "grad_norm": 137.49484252929688, "learning_rate": 7.969779009266915e-07, "loss": 11.4415, "step": 418010 }, { "epoch": 0.8444268474488621, "grad_norm": 228.41421508789062, "learning_rate": 7.967888398168233e-07, "loss": 16.9954, "step": 418020 }, { "epoch": 0.8444470480815459, "grad_norm": 89.01964569091797, "learning_rate": 7.965997991928975e-07, "loss": 22.7601, "step": 418030 }, { "epoch": 0.8444672487142297, "grad_norm": 27.896099090576172, "learning_rate": 7.964107790558345e-07, "loss": 10.7563, "step": 418040 }, { "epoch": 0.8444874493469136, "grad_norm": 259.76312255859375, "learning_rate": 7.962217794065547e-07, "loss": 15.3442, "step": 418050 }, { "epoch": 0.8445076499795974, "grad_norm": 41.69110107421875, "learning_rate": 7.960328002459794e-07, "loss": 26.4213, "step": 418060 }, { "epoch": 0.8445278506122812, "grad_norm": 1506.3770751953125, "learning_rate": 7.958438415750331e-07, "loss": 19.3705, "step": 418070 }, { "epoch": 0.844548051244965, "grad_norm": 419.8367004394531, "learning_rate": 7.956549033946314e-07, "loss": 9.6227, "step": 418080 }, { "epoch": 0.8445682518776488, "grad_norm": 22.47806739807129, "learning_rate": 7.954659857056984e-07, "loss": 23.0181, "step": 418090 }, { "epoch": 0.8445884525103327, "grad_norm": 150.64083862304688, "learning_rate": 7.952770885091548e-07, "loss": 11.5915, "step": 418100 }, { "epoch": 0.8446086531430165, "grad_norm": 280.8748474121094, "learning_rate": 7.950882118059211e-07, "loss": 15.9849, "step": 418110 }, { "epoch": 0.8446288537757003, "grad_norm": 1.775250792503357, "learning_rate": 7.948993555969159e-07, "loss": 19.1233, "step": 418120 }, { "epoch": 0.8446490544083841, "grad_norm": 168.44053649902344, "learning_rate": 7.947105198830612e-07, "loss": 15.6858, "step": 418130 }, { "epoch": 0.8446692550410679, "grad_norm": 492.3616027832031, "learning_rate": 7.945217046652804e-07, "loss": 19.0548, "step": 418140 }, { "epoch": 0.8446894556737518, "grad_norm": 172.2943572998047, "learning_rate": 7.94332909944488e-07, "loss": 27.8747, "step": 418150 }, { "epoch": 0.8447096563064355, "grad_norm": 362.9201965332031, "learning_rate": 7.941441357216068e-07, "loss": 12.8676, "step": 418160 }, { "epoch": 0.8447298569391193, "grad_norm": 726.9605102539062, "learning_rate": 7.939553819975582e-07, "loss": 33.5452, "step": 418170 }, { "epoch": 0.8447500575718031, "grad_norm": 316.0181579589844, "learning_rate": 7.937666487732609e-07, "loss": 18.9639, "step": 418180 }, { "epoch": 0.8447702582044869, "grad_norm": 242.84410095214844, "learning_rate": 7.935779360496337e-07, "loss": 16.4849, "step": 418190 }, { "epoch": 0.8447904588371707, "grad_norm": 160.95989990234375, "learning_rate": 7.933892438275987e-07, "loss": 19.6859, "step": 418200 }, { "epoch": 0.8448106594698546, "grad_norm": 207.83961486816406, "learning_rate": 7.932005721080738e-07, "loss": 15.6383, "step": 418210 }, { "epoch": 0.8448308601025384, "grad_norm": 292.7705993652344, "learning_rate": 7.930119208919784e-07, "loss": 7.8671, "step": 418220 }, { "epoch": 0.8448510607352222, "grad_norm": 409.3346252441406, "learning_rate": 7.92823290180234e-07, "loss": 15.9231, "step": 418230 }, { "epoch": 0.844871261367906, "grad_norm": 388.8248291015625, "learning_rate": 7.926346799737572e-07, "loss": 21.3437, "step": 418240 }, { "epoch": 0.8448914620005898, "grad_norm": 243.6528778076172, "learning_rate": 7.924460902734698e-07, "loss": 33.8353, "step": 418250 }, { "epoch": 0.8449116626332737, "grad_norm": 266.9147033691406, "learning_rate": 7.922575210802896e-07, "loss": 17.6398, "step": 418260 }, { "epoch": 0.8449318632659575, "grad_norm": 265.2134094238281, "learning_rate": 7.920689723951353e-07, "loss": 16.5091, "step": 418270 }, { "epoch": 0.8449520638986413, "grad_norm": 30.463001251220703, "learning_rate": 7.918804442189271e-07, "loss": 22.2017, "step": 418280 }, { "epoch": 0.8449722645313251, "grad_norm": 301.3177795410156, "learning_rate": 7.916919365525827e-07, "loss": 8.4603, "step": 418290 }, { "epoch": 0.8449924651640089, "grad_norm": 158.36184692382812, "learning_rate": 7.91503449397022e-07, "loss": 22.1436, "step": 418300 }, { "epoch": 0.8450126657966928, "grad_norm": 361.0083923339844, "learning_rate": 7.913149827531619e-07, "loss": 18.0479, "step": 418310 }, { "epoch": 0.8450328664293766, "grad_norm": 223.05116271972656, "learning_rate": 7.911265366219234e-07, "loss": 16.068, "step": 418320 }, { "epoch": 0.8450530670620604, "grad_norm": 121.52783966064453, "learning_rate": 7.909381110042241e-07, "loss": 9.1954, "step": 418330 }, { "epoch": 0.8450732676947442, "grad_norm": 471.07623291015625, "learning_rate": 7.907497059009806e-07, "loss": 25.1484, "step": 418340 }, { "epoch": 0.845093468327428, "grad_norm": 75.26834106445312, "learning_rate": 7.90561321313113e-07, "loss": 10.9123, "step": 418350 }, { "epoch": 0.8451136689601119, "grad_norm": 207.7906494140625, "learning_rate": 7.903729572415397e-07, "loss": 14.5202, "step": 418360 }, { "epoch": 0.8451338695927957, "grad_norm": 136.6814422607422, "learning_rate": 7.901846136871766e-07, "loss": 11.5344, "step": 418370 }, { "epoch": 0.8451540702254795, "grad_norm": 210.18292236328125, "learning_rate": 7.899962906509434e-07, "loss": 10.3495, "step": 418380 }, { "epoch": 0.8451742708581633, "grad_norm": 318.5513000488281, "learning_rate": 7.898079881337594e-07, "loss": 27.3364, "step": 418390 }, { "epoch": 0.8451944714908471, "grad_norm": 22.192745208740234, "learning_rate": 7.89619706136539e-07, "loss": 14.4309, "step": 418400 }, { "epoch": 0.845214672123531, "grad_norm": 315.7263488769531, "learning_rate": 7.894314446602013e-07, "loss": 28.3204, "step": 418410 }, { "epoch": 0.8452348727562147, "grad_norm": 480.8759460449219, "learning_rate": 7.892432037056652e-07, "loss": 17.5233, "step": 418420 }, { "epoch": 0.8452550733888985, "grad_norm": 221.65704345703125, "learning_rate": 7.890549832738465e-07, "loss": 21.0742, "step": 418430 }, { "epoch": 0.8452752740215823, "grad_norm": 210.5298309326172, "learning_rate": 7.888667833656627e-07, "loss": 16.948, "step": 418440 }, { "epoch": 0.8452954746542661, "grad_norm": 112.18061828613281, "learning_rate": 7.88678603982031e-07, "loss": 19.9811, "step": 418450 }, { "epoch": 0.84531567528695, "grad_norm": 89.71009063720703, "learning_rate": 7.884904451238712e-07, "loss": 8.5408, "step": 418460 }, { "epoch": 0.8453358759196338, "grad_norm": 291.0071105957031, "learning_rate": 7.883023067920964e-07, "loss": 11.101, "step": 418470 }, { "epoch": 0.8453560765523176, "grad_norm": 203.8237762451172, "learning_rate": 7.881141889876248e-07, "loss": 12.762, "step": 418480 }, { "epoch": 0.8453762771850014, "grad_norm": 0.0, "learning_rate": 7.879260917113751e-07, "loss": 14.9486, "step": 418490 }, { "epoch": 0.8453964778176852, "grad_norm": 135.83151245117188, "learning_rate": 7.877380149642628e-07, "loss": 11.3867, "step": 418500 }, { "epoch": 0.845416678450369, "grad_norm": 205.89895629882812, "learning_rate": 7.875499587472035e-07, "loss": 5.0876, "step": 418510 }, { "epoch": 0.8454368790830529, "grad_norm": 32.48879623413086, "learning_rate": 7.873619230611157e-07, "loss": 10.2425, "step": 418520 }, { "epoch": 0.8454570797157367, "grad_norm": 413.2861022949219, "learning_rate": 7.871739079069152e-07, "loss": 16.8644, "step": 418530 }, { "epoch": 0.8454772803484205, "grad_norm": 309.8213195800781, "learning_rate": 7.869859132855168e-07, "loss": 22.5572, "step": 418540 }, { "epoch": 0.8454974809811043, "grad_norm": 58.5374755859375, "learning_rate": 7.867979391978398e-07, "loss": 22.1598, "step": 418550 }, { "epoch": 0.8455176816137882, "grad_norm": 313.8316650390625, "learning_rate": 7.866099856447968e-07, "loss": 12.3486, "step": 418560 }, { "epoch": 0.845537882246472, "grad_norm": 255.20057678222656, "learning_rate": 7.864220526273069e-07, "loss": 22.2058, "step": 418570 }, { "epoch": 0.8455580828791558, "grad_norm": 183.54238891601562, "learning_rate": 7.862341401462842e-07, "loss": 19.3766, "step": 418580 }, { "epoch": 0.8455782835118396, "grad_norm": 415.2626647949219, "learning_rate": 7.86046248202646e-07, "loss": 21.5693, "step": 418590 }, { "epoch": 0.8455984841445234, "grad_norm": 275.727294921875, "learning_rate": 7.858583767973071e-07, "loss": 9.2826, "step": 418600 }, { "epoch": 0.8456186847772073, "grad_norm": 295.2415466308594, "learning_rate": 7.856705259311826e-07, "loss": 15.407, "step": 418610 }, { "epoch": 0.8456388854098911, "grad_norm": 623.8258056640625, "learning_rate": 7.854826956051897e-07, "loss": 22.3412, "step": 418620 }, { "epoch": 0.8456590860425749, "grad_norm": 503.14129638671875, "learning_rate": 7.852948858202419e-07, "loss": 36.1466, "step": 418630 }, { "epoch": 0.8456792866752587, "grad_norm": 414.8552551269531, "learning_rate": 7.851070965772572e-07, "loss": 26.9905, "step": 418640 }, { "epoch": 0.8456994873079425, "grad_norm": 317.86376953125, "learning_rate": 7.849193278771489e-07, "loss": 16.9348, "step": 418650 }, { "epoch": 0.8457196879406264, "grad_norm": 99.41777801513672, "learning_rate": 7.847315797208316e-07, "loss": 18.4496, "step": 418660 }, { "epoch": 0.8457398885733101, "grad_norm": 318.8612365722656, "learning_rate": 7.845438521092213e-07, "loss": 13.1501, "step": 418670 }, { "epoch": 0.8457600892059939, "grad_norm": 114.52713775634766, "learning_rate": 7.843561450432352e-07, "loss": 11.411, "step": 418680 }, { "epoch": 0.8457802898386777, "grad_norm": 206.9661102294922, "learning_rate": 7.841684585237836e-07, "loss": 19.7493, "step": 418690 }, { "epoch": 0.8458004904713615, "grad_norm": 370.2718505859375, "learning_rate": 7.839807925517834e-07, "loss": 16.8029, "step": 418700 }, { "epoch": 0.8458206911040453, "grad_norm": 188.53329467773438, "learning_rate": 7.837931471281513e-07, "loss": 10.9134, "step": 418710 }, { "epoch": 0.8458408917367292, "grad_norm": 360.9389953613281, "learning_rate": 7.836055222537997e-07, "loss": 10.1335, "step": 418720 }, { "epoch": 0.845861092369413, "grad_norm": 367.79119873046875, "learning_rate": 7.834179179296419e-07, "loss": 15.5781, "step": 418730 }, { "epoch": 0.8458812930020968, "grad_norm": 143.9136505126953, "learning_rate": 7.832303341565938e-07, "loss": 15.9415, "step": 418740 }, { "epoch": 0.8459014936347806, "grad_norm": 557.5685424804688, "learning_rate": 7.830427709355726e-07, "loss": 11.2197, "step": 418750 }, { "epoch": 0.8459216942674644, "grad_norm": 382.8085021972656, "learning_rate": 7.828552282674867e-07, "loss": 26.4081, "step": 418760 }, { "epoch": 0.8459418949001483, "grad_norm": 523.3223266601562, "learning_rate": 7.826677061532528e-07, "loss": 19.1081, "step": 418770 }, { "epoch": 0.8459620955328321, "grad_norm": 294.28997802734375, "learning_rate": 7.824802045937863e-07, "loss": 27.2879, "step": 418780 }, { "epoch": 0.8459822961655159, "grad_norm": 371.58319091796875, "learning_rate": 7.822927235900001e-07, "loss": 11.3237, "step": 418790 }, { "epoch": 0.8460024967981997, "grad_norm": 125.66659545898438, "learning_rate": 7.821052631428061e-07, "loss": 10.9287, "step": 418800 }, { "epoch": 0.8460226974308835, "grad_norm": 294.57086181640625, "learning_rate": 7.819178232531205e-07, "loss": 11.4494, "step": 418810 }, { "epoch": 0.8460428980635674, "grad_norm": 142.6394805908203, "learning_rate": 7.81730403921856e-07, "loss": 20.1645, "step": 418820 }, { "epoch": 0.8460630986962512, "grad_norm": 164.38522338867188, "learning_rate": 7.815430051499251e-07, "loss": 18.3891, "step": 418830 }, { "epoch": 0.846083299328935, "grad_norm": 145.92263793945312, "learning_rate": 7.813556269382427e-07, "loss": 18.282, "step": 418840 }, { "epoch": 0.8461034999616188, "grad_norm": 179.31971740722656, "learning_rate": 7.811682692877204e-07, "loss": 15.6195, "step": 418850 }, { "epoch": 0.8461237005943026, "grad_norm": 296.96502685546875, "learning_rate": 7.809809321992729e-07, "loss": 18.4453, "step": 418860 }, { "epoch": 0.8461439012269865, "grad_norm": 223.33470153808594, "learning_rate": 7.807936156738133e-07, "loss": 11.2644, "step": 418870 }, { "epoch": 0.8461641018596703, "grad_norm": 20.11041831970215, "learning_rate": 7.80606319712252e-07, "loss": 11.9764, "step": 418880 }, { "epoch": 0.8461843024923541, "grad_norm": 236.30360412597656, "learning_rate": 7.804190443155057e-07, "loss": 8.7645, "step": 418890 }, { "epoch": 0.8462045031250379, "grad_norm": 393.8714904785156, "learning_rate": 7.802317894844835e-07, "loss": 14.177, "step": 418900 }, { "epoch": 0.8462247037577217, "grad_norm": 102.66844177246094, "learning_rate": 7.800445552201014e-07, "loss": 22.5148, "step": 418910 }, { "epoch": 0.8462449043904056, "grad_norm": 292.3620300292969, "learning_rate": 7.798573415232686e-07, "loss": 13.6744, "step": 418920 }, { "epoch": 0.8462651050230893, "grad_norm": 168.82101440429688, "learning_rate": 7.79670148394901e-07, "loss": 20.507, "step": 418930 }, { "epoch": 0.8462853056557731, "grad_norm": 81.64151763916016, "learning_rate": 7.794829758359085e-07, "loss": 17.4314, "step": 418940 }, { "epoch": 0.8463055062884569, "grad_norm": 355.2381286621094, "learning_rate": 7.792958238472037e-07, "loss": 17.5293, "step": 418950 }, { "epoch": 0.8463257069211407, "grad_norm": 245.11447143554688, "learning_rate": 7.791086924296998e-07, "loss": 19.7603, "step": 418960 }, { "epoch": 0.8463459075538246, "grad_norm": 416.0592346191406, "learning_rate": 7.789215815843082e-07, "loss": 28.4353, "step": 418970 }, { "epoch": 0.8463661081865084, "grad_norm": 254.7152862548828, "learning_rate": 7.787344913119399e-07, "loss": 10.1425, "step": 418980 }, { "epoch": 0.8463863088191922, "grad_norm": 166.51541137695312, "learning_rate": 7.785474216135081e-07, "loss": 5.4226, "step": 418990 }, { "epoch": 0.846406509451876, "grad_norm": 261.1175537109375, "learning_rate": 7.783603724899258e-07, "loss": 7.5904, "step": 419000 }, { "epoch": 0.8464267100845598, "grad_norm": 183.1525115966797, "learning_rate": 7.781733439421013e-07, "loss": 16.9643, "step": 419010 }, { "epoch": 0.8464469107172437, "grad_norm": 235.56138610839844, "learning_rate": 7.779863359709472e-07, "loss": 14.2526, "step": 419020 }, { "epoch": 0.8464671113499275, "grad_norm": 267.1103820800781, "learning_rate": 7.777993485773771e-07, "loss": 7.1728, "step": 419030 }, { "epoch": 0.8464873119826113, "grad_norm": 239.46324157714844, "learning_rate": 7.776123817623011e-07, "loss": 19.2, "step": 419040 }, { "epoch": 0.8465075126152951, "grad_norm": 275.8646240234375, "learning_rate": 7.774254355266287e-07, "loss": 7.2621, "step": 419050 }, { "epoch": 0.8465277132479789, "grad_norm": 124.61710357666016, "learning_rate": 7.772385098712731e-07, "loss": 29.3966, "step": 419060 }, { "epoch": 0.8465479138806628, "grad_norm": 68.98516845703125, "learning_rate": 7.770516047971466e-07, "loss": 10.5067, "step": 419070 }, { "epoch": 0.8465681145133466, "grad_norm": 159.63670349121094, "learning_rate": 7.768647203051566e-07, "loss": 17.4997, "step": 419080 }, { "epoch": 0.8465883151460304, "grad_norm": 189.43275451660156, "learning_rate": 7.766778563962152e-07, "loss": 14.4695, "step": 419090 }, { "epoch": 0.8466085157787142, "grad_norm": 335.21990966796875, "learning_rate": 7.76491013071235e-07, "loss": 29.3792, "step": 419100 }, { "epoch": 0.846628716411398, "grad_norm": 230.95803833007812, "learning_rate": 7.763041903311258e-07, "loss": 21.7272, "step": 419110 }, { "epoch": 0.8466489170440819, "grad_norm": 193.43482971191406, "learning_rate": 7.761173881767958e-07, "loss": 17.8074, "step": 419120 }, { "epoch": 0.8466691176767657, "grad_norm": 276.3856201171875, "learning_rate": 7.759306066091593e-07, "loss": 13.9534, "step": 419130 }, { "epoch": 0.8466893183094495, "grad_norm": 32.589385986328125, "learning_rate": 7.757438456291245e-07, "loss": 15.6735, "step": 419140 }, { "epoch": 0.8467095189421333, "grad_norm": 562.0736694335938, "learning_rate": 7.755571052376004e-07, "loss": 26.1156, "step": 419150 }, { "epoch": 0.8467297195748171, "grad_norm": 192.48678588867188, "learning_rate": 7.753703854354999e-07, "loss": 13.4708, "step": 419160 }, { "epoch": 0.846749920207501, "grad_norm": 294.00750732421875, "learning_rate": 7.751836862237305e-07, "loss": 22.9229, "step": 419170 }, { "epoch": 0.8467701208401847, "grad_norm": 195.16390991210938, "learning_rate": 7.749970076032048e-07, "loss": 11.2372, "step": 419180 }, { "epoch": 0.8467903214728685, "grad_norm": 295.3279113769531, "learning_rate": 7.748103495748299e-07, "loss": 15.2764, "step": 419190 }, { "epoch": 0.8468105221055523, "grad_norm": 357.1981506347656, "learning_rate": 7.746237121395184e-07, "loss": 16.6917, "step": 419200 }, { "epoch": 0.8468307227382361, "grad_norm": 281.7844543457031, "learning_rate": 7.744370952981778e-07, "loss": 14.9912, "step": 419210 }, { "epoch": 0.8468509233709199, "grad_norm": 191.52902221679688, "learning_rate": 7.742504990517174e-07, "loss": 19.5492, "step": 419220 }, { "epoch": 0.8468711240036038, "grad_norm": 227.82151794433594, "learning_rate": 7.740639234010488e-07, "loss": 14.5077, "step": 419230 }, { "epoch": 0.8468913246362876, "grad_norm": 212.77410888671875, "learning_rate": 7.73877368347079e-07, "loss": 20.7518, "step": 419240 }, { "epoch": 0.8469115252689714, "grad_norm": 0.6228466033935547, "learning_rate": 7.736908338907195e-07, "loss": 23.3167, "step": 419250 }, { "epoch": 0.8469317259016552, "grad_norm": 319.5693054199219, "learning_rate": 7.735043200328784e-07, "loss": 12.517, "step": 419260 }, { "epoch": 0.846951926534339, "grad_norm": 84.47393035888672, "learning_rate": 7.733178267744634e-07, "loss": 22.4205, "step": 419270 }, { "epoch": 0.8469721271670229, "grad_norm": 251.12008666992188, "learning_rate": 7.73131354116386e-07, "loss": 18.7598, "step": 419280 }, { "epoch": 0.8469923277997067, "grad_norm": 278.4853820800781, "learning_rate": 7.729449020595531e-07, "loss": 21.4272, "step": 419290 }, { "epoch": 0.8470125284323905, "grad_norm": 128.65440368652344, "learning_rate": 7.727584706048735e-07, "loss": 11.0083, "step": 419300 }, { "epoch": 0.8470327290650743, "grad_norm": 273.97418212890625, "learning_rate": 7.72572059753256e-07, "loss": 24.5506, "step": 419310 }, { "epoch": 0.8470529296977581, "grad_norm": 217.05433654785156, "learning_rate": 7.723856695056109e-07, "loss": 10.9804, "step": 419320 }, { "epoch": 0.847073130330442, "grad_norm": 213.7815399169922, "learning_rate": 7.721992998628452e-07, "loss": 15.6746, "step": 419330 }, { "epoch": 0.8470933309631258, "grad_norm": 157.66583251953125, "learning_rate": 7.720129508258667e-07, "loss": 11.8468, "step": 419340 }, { "epoch": 0.8471135315958096, "grad_norm": 132.3120574951172, "learning_rate": 7.71826622395585e-07, "loss": 12.3597, "step": 419350 }, { "epoch": 0.8471337322284934, "grad_norm": 37.68947982788086, "learning_rate": 7.716403145729073e-07, "loss": 24.1461, "step": 419360 }, { "epoch": 0.8471539328611772, "grad_norm": 464.0668029785156, "learning_rate": 7.714540273587412e-07, "loss": 14.1437, "step": 419370 }, { "epoch": 0.8471741334938611, "grad_norm": 440.4002685546875, "learning_rate": 7.712677607539948e-07, "loss": 18.4602, "step": 419380 }, { "epoch": 0.8471943341265449, "grad_norm": 175.86553955078125, "learning_rate": 7.710815147595779e-07, "loss": 13.6648, "step": 419390 }, { "epoch": 0.8472145347592287, "grad_norm": 407.2461242675781, "learning_rate": 7.708952893763972e-07, "loss": 21.645, "step": 419400 }, { "epoch": 0.8472347353919125, "grad_norm": 99.37748718261719, "learning_rate": 7.707090846053577e-07, "loss": 36.8502, "step": 419410 }, { "epoch": 0.8472549360245963, "grad_norm": 0.0, "learning_rate": 7.705229004473713e-07, "loss": 11.9214, "step": 419420 }, { "epoch": 0.8472751366572802, "grad_norm": 288.2516784667969, "learning_rate": 7.703367369033432e-07, "loss": 18.2918, "step": 419430 }, { "epoch": 0.8472953372899639, "grad_norm": 104.54570007324219, "learning_rate": 7.701505939741793e-07, "loss": 15.5991, "step": 419440 }, { "epoch": 0.8473155379226477, "grad_norm": 348.97833251953125, "learning_rate": 7.699644716607896e-07, "loss": 16.0717, "step": 419450 }, { "epoch": 0.8473357385553315, "grad_norm": 66.58470153808594, "learning_rate": 7.697783699640793e-07, "loss": 13.2711, "step": 419460 }, { "epoch": 0.8473559391880153, "grad_norm": 139.7555694580078, "learning_rate": 7.695922888849566e-07, "loss": 44.969, "step": 419470 }, { "epoch": 0.8473761398206991, "grad_norm": 205.95394897460938, "learning_rate": 7.694062284243287e-07, "loss": 16.8198, "step": 419480 }, { "epoch": 0.847396340453383, "grad_norm": 253.16851806640625, "learning_rate": 7.692201885831002e-07, "loss": 37.8906, "step": 419490 }, { "epoch": 0.8474165410860668, "grad_norm": 274.63623046875, "learning_rate": 7.690341693621805e-07, "loss": 17.3523, "step": 419500 }, { "epoch": 0.8474367417187506, "grad_norm": 190.86514282226562, "learning_rate": 7.68848170762474e-07, "loss": 18.6979, "step": 419510 }, { "epoch": 0.8474569423514344, "grad_norm": 475.6833190917969, "learning_rate": 7.686621927848898e-07, "loss": 17.6981, "step": 419520 }, { "epoch": 0.8474771429841182, "grad_norm": 291.0531311035156, "learning_rate": 7.684762354303316e-07, "loss": 31.0168, "step": 419530 }, { "epoch": 0.8474973436168021, "grad_norm": 202.85459899902344, "learning_rate": 7.682902986997076e-07, "loss": 8.2894, "step": 419540 }, { "epoch": 0.8475175442494859, "grad_norm": 111.10723114013672, "learning_rate": 7.681043825939238e-07, "loss": 11.5746, "step": 419550 }, { "epoch": 0.8475377448821697, "grad_norm": 182.5100555419922, "learning_rate": 7.679184871138851e-07, "loss": 11.4575, "step": 419560 }, { "epoch": 0.8475579455148535, "grad_norm": 345.07769775390625, "learning_rate": 7.677326122604995e-07, "loss": 41.2544, "step": 419570 }, { "epoch": 0.8475781461475373, "grad_norm": 593.8002319335938, "learning_rate": 7.675467580346719e-07, "loss": 14.9949, "step": 419580 }, { "epoch": 0.8475983467802212, "grad_norm": 422.88824462890625, "learning_rate": 7.673609244373065e-07, "loss": 20.6717, "step": 419590 }, { "epoch": 0.847618547412905, "grad_norm": 165.75279235839844, "learning_rate": 7.671751114693104e-07, "loss": 22.239, "step": 419600 }, { "epoch": 0.8476387480455888, "grad_norm": 234.065673828125, "learning_rate": 7.669893191315924e-07, "loss": 17.9109, "step": 419610 }, { "epoch": 0.8476589486782726, "grad_norm": 343.1844482421875, "learning_rate": 7.668035474250523e-07, "loss": 33.5137, "step": 419620 }, { "epoch": 0.8476791493109564, "grad_norm": 241.33856201171875, "learning_rate": 7.666177963505989e-07, "loss": 9.1083, "step": 419630 }, { "epoch": 0.8476993499436403, "grad_norm": 356.629150390625, "learning_rate": 7.664320659091373e-07, "loss": 28.1051, "step": 419640 }, { "epoch": 0.8477195505763241, "grad_norm": 547.921875, "learning_rate": 7.662463561015726e-07, "loss": 16.6537, "step": 419650 }, { "epoch": 0.8477397512090079, "grad_norm": 118.60931396484375, "learning_rate": 7.66060666928809e-07, "loss": 12.7615, "step": 419660 }, { "epoch": 0.8477599518416917, "grad_norm": 323.72265625, "learning_rate": 7.658749983917512e-07, "loss": 22.8208, "step": 419670 }, { "epoch": 0.8477801524743755, "grad_norm": 67.41857147216797, "learning_rate": 7.656893504913082e-07, "loss": 10.1983, "step": 419680 }, { "epoch": 0.8478003531070594, "grad_norm": 729.8850708007812, "learning_rate": 7.655037232283791e-07, "loss": 29.0754, "step": 419690 }, { "epoch": 0.8478205537397431, "grad_norm": 279.55413818359375, "learning_rate": 7.653181166038715e-07, "loss": 27.4217, "step": 419700 }, { "epoch": 0.8478407543724269, "grad_norm": 312.2660217285156, "learning_rate": 7.651325306186908e-07, "loss": 24.9283, "step": 419710 }, { "epoch": 0.8478609550051107, "grad_norm": 217.29945373535156, "learning_rate": 7.649469652737407e-07, "loss": 20.7779, "step": 419720 }, { "epoch": 0.8478811556377945, "grad_norm": 362.56494140625, "learning_rate": 7.647614205699244e-07, "loss": 20.1727, "step": 419730 }, { "epoch": 0.8479013562704784, "grad_norm": 17.462560653686523, "learning_rate": 7.645758965081478e-07, "loss": 22.9335, "step": 419740 }, { "epoch": 0.8479215569031622, "grad_norm": 406.2235107421875, "learning_rate": 7.643903930893154e-07, "loss": 15.82, "step": 419750 }, { "epoch": 0.847941757535846, "grad_norm": 493.501953125, "learning_rate": 7.64204910314329e-07, "loss": 22.1433, "step": 419760 }, { "epoch": 0.8479619581685298, "grad_norm": 510.3091125488281, "learning_rate": 7.640194481840951e-07, "loss": 14.9808, "step": 419770 }, { "epoch": 0.8479821588012136, "grad_norm": 194.7623748779297, "learning_rate": 7.638340066995154e-07, "loss": 12.7146, "step": 419780 }, { "epoch": 0.8480023594338975, "grad_norm": 255.74661254882812, "learning_rate": 7.636485858614962e-07, "loss": 20.3722, "step": 419790 }, { "epoch": 0.8480225600665813, "grad_norm": 23.644744873046875, "learning_rate": 7.63463185670939e-07, "loss": 34.1561, "step": 419800 }, { "epoch": 0.8480427606992651, "grad_norm": 176.94729614257812, "learning_rate": 7.632778061287494e-07, "loss": 10.4886, "step": 419810 }, { "epoch": 0.8480629613319489, "grad_norm": 0.23744302988052368, "learning_rate": 7.630924472358304e-07, "loss": 15.2712, "step": 419820 }, { "epoch": 0.8480831619646327, "grad_norm": 168.18275451660156, "learning_rate": 7.629071089930834e-07, "loss": 17.2235, "step": 419830 }, { "epoch": 0.8481033625973166, "grad_norm": 173.34823608398438, "learning_rate": 7.62721791401414e-07, "loss": 24.3232, "step": 419840 }, { "epoch": 0.8481235632300004, "grad_norm": 278.3913269042969, "learning_rate": 7.625364944617242e-07, "loss": 26.1698, "step": 419850 }, { "epoch": 0.8481437638626842, "grad_norm": 244.33982849121094, "learning_rate": 7.623512181749182e-07, "loss": 13.8549, "step": 419860 }, { "epoch": 0.848163964495368, "grad_norm": 394.82794189453125, "learning_rate": 7.621659625418987e-07, "loss": 24.5833, "step": 419870 }, { "epoch": 0.8481841651280518, "grad_norm": 293.8733215332031, "learning_rate": 7.619807275635672e-07, "loss": 16.7539, "step": 419880 }, { "epoch": 0.8482043657607357, "grad_norm": 237.5089111328125, "learning_rate": 7.617955132408289e-07, "loss": 8.9772, "step": 419890 }, { "epoch": 0.8482245663934195, "grad_norm": 226.7406768798828, "learning_rate": 7.61610319574585e-07, "loss": 11.9357, "step": 419900 }, { "epoch": 0.8482447670261033, "grad_norm": 486.04156494140625, "learning_rate": 7.614251465657374e-07, "loss": 19.6871, "step": 419910 }, { "epoch": 0.8482649676587871, "grad_norm": 7.608290672302246, "learning_rate": 7.612399942151894e-07, "loss": 12.5991, "step": 419920 }, { "epoch": 0.8482851682914709, "grad_norm": 265.33343505859375, "learning_rate": 7.610548625238445e-07, "loss": 15.584, "step": 419930 }, { "epoch": 0.8483053689241548, "grad_norm": 418.29498291015625, "learning_rate": 7.608697514926045e-07, "loss": 13.2026, "step": 419940 }, { "epoch": 0.8483255695568385, "grad_norm": 300.98663330078125, "learning_rate": 7.606846611223695e-07, "loss": 10.8739, "step": 419950 }, { "epoch": 0.8483457701895223, "grad_norm": 292.3473205566406, "learning_rate": 7.60499591414045e-07, "loss": 35.9929, "step": 419960 }, { "epoch": 0.8483659708222061, "grad_norm": 26.240802764892578, "learning_rate": 7.60314542368531e-07, "loss": 15.6928, "step": 419970 }, { "epoch": 0.8483861714548899, "grad_norm": 163.01925659179688, "learning_rate": 7.601295139867287e-07, "loss": 17.6743, "step": 419980 }, { "epoch": 0.8484063720875737, "grad_norm": 349.1629638671875, "learning_rate": 7.599445062695404e-07, "loss": 21.1763, "step": 419990 }, { "epoch": 0.8484265727202576, "grad_norm": 205.2567138671875, "learning_rate": 7.597595192178702e-07, "loss": 7.8189, "step": 420000 }, { "epoch": 0.8484467733529414, "grad_norm": 143.18463134765625, "learning_rate": 7.595745528326176e-07, "loss": 32.6341, "step": 420010 }, { "epoch": 0.8484669739856252, "grad_norm": 240.4794921875, "learning_rate": 7.593896071146828e-07, "loss": 12.1568, "step": 420020 }, { "epoch": 0.848487174618309, "grad_norm": 353.1625061035156, "learning_rate": 7.592046820649706e-07, "loss": 13.0547, "step": 420030 }, { "epoch": 0.8485073752509928, "grad_norm": 183.46510314941406, "learning_rate": 7.5901977768438e-07, "loss": 14.1486, "step": 420040 }, { "epoch": 0.8485275758836767, "grad_norm": 314.8819885253906, "learning_rate": 7.588348939738116e-07, "loss": 12.7561, "step": 420050 }, { "epoch": 0.8485477765163605, "grad_norm": 4.625089168548584, "learning_rate": 7.586500309341682e-07, "loss": 13.5346, "step": 420060 }, { "epoch": 0.8485679771490443, "grad_norm": 298.79095458984375, "learning_rate": 7.584651885663497e-07, "loss": 8.6733, "step": 420070 }, { "epoch": 0.8485881777817281, "grad_norm": 323.5435485839844, "learning_rate": 7.582803668712579e-07, "loss": 12.3314, "step": 420080 }, { "epoch": 0.848608378414412, "grad_norm": 139.5428009033203, "learning_rate": 7.580955658497924e-07, "loss": 20.1745, "step": 420090 }, { "epoch": 0.8486285790470958, "grad_norm": 200.63113403320312, "learning_rate": 7.579107855028562e-07, "loss": 18.4388, "step": 420100 }, { "epoch": 0.8486487796797796, "grad_norm": 332.6578674316406, "learning_rate": 7.577260258313474e-07, "loss": 23.8662, "step": 420110 }, { "epoch": 0.8486689803124634, "grad_norm": 339.2360534667969, "learning_rate": 7.57541286836167e-07, "loss": 14.9669, "step": 420120 }, { "epoch": 0.8486891809451472, "grad_norm": 249.14703369140625, "learning_rate": 7.573565685182166e-07, "loss": 16.6554, "step": 420130 }, { "epoch": 0.848709381577831, "grad_norm": 69.66209411621094, "learning_rate": 7.571718708783948e-07, "loss": 12.1606, "step": 420140 }, { "epoch": 0.8487295822105149, "grad_norm": 360.481689453125, "learning_rate": 7.569871939176037e-07, "loss": 20.4303, "step": 420150 }, { "epoch": 0.8487497828431987, "grad_norm": 212.6834259033203, "learning_rate": 7.568025376367422e-07, "loss": 11.8966, "step": 420160 }, { "epoch": 0.8487699834758825, "grad_norm": 320.1981506347656, "learning_rate": 7.566179020367098e-07, "loss": 18.0786, "step": 420170 }, { "epoch": 0.8487901841085663, "grad_norm": 300.99017333984375, "learning_rate": 7.564332871184077e-07, "loss": 21.4287, "step": 420180 }, { "epoch": 0.8488103847412501, "grad_norm": 500.338134765625, "learning_rate": 7.562486928827356e-07, "loss": 24.5273, "step": 420190 }, { "epoch": 0.848830585373934, "grad_norm": 448.1721496582031, "learning_rate": 7.560641193305912e-07, "loss": 26.6969, "step": 420200 }, { "epoch": 0.8488507860066177, "grad_norm": 371.9790954589844, "learning_rate": 7.55879566462876e-07, "loss": 21.3599, "step": 420210 }, { "epoch": 0.8488709866393015, "grad_norm": 260.514892578125, "learning_rate": 7.556950342804908e-07, "loss": 13.8027, "step": 420220 }, { "epoch": 0.8488911872719853, "grad_norm": 445.916015625, "learning_rate": 7.555105227843312e-07, "loss": 22.9075, "step": 420230 }, { "epoch": 0.8489113879046691, "grad_norm": 157.91195678710938, "learning_rate": 7.553260319752986e-07, "loss": 12.0151, "step": 420240 }, { "epoch": 0.848931588537353, "grad_norm": 167.00160217285156, "learning_rate": 7.551415618542928e-07, "loss": 9.3074, "step": 420250 }, { "epoch": 0.8489517891700368, "grad_norm": 352.47314453125, "learning_rate": 7.549571124222127e-07, "loss": 25.1924, "step": 420260 }, { "epoch": 0.8489719898027206, "grad_norm": 142.6769561767578, "learning_rate": 7.547726836799551e-07, "loss": 13.6861, "step": 420270 }, { "epoch": 0.8489921904354044, "grad_norm": 278.2664489746094, "learning_rate": 7.545882756284212e-07, "loss": 8.349, "step": 420280 }, { "epoch": 0.8490123910680882, "grad_norm": 334.56671142578125, "learning_rate": 7.544038882685112e-07, "loss": 26.9028, "step": 420290 }, { "epoch": 0.849032591700772, "grad_norm": 355.2579650878906, "learning_rate": 7.542195216011188e-07, "loss": 18.8921, "step": 420300 }, { "epoch": 0.8490527923334559, "grad_norm": 244.13587951660156, "learning_rate": 7.540351756271464e-07, "loss": 17.2238, "step": 420310 }, { "epoch": 0.8490729929661397, "grad_norm": 296.42291259765625, "learning_rate": 7.538508503474923e-07, "loss": 17.2842, "step": 420320 }, { "epoch": 0.8490931935988235, "grad_norm": 169.27049255371094, "learning_rate": 7.536665457630544e-07, "loss": 20.7123, "step": 420330 }, { "epoch": 0.8491133942315073, "grad_norm": 237.7201690673828, "learning_rate": 7.534822618747289e-07, "loss": 34.8462, "step": 420340 }, { "epoch": 0.8491335948641912, "grad_norm": 399.43304443359375, "learning_rate": 7.532979986834177e-07, "loss": 26.8091, "step": 420350 }, { "epoch": 0.849153795496875, "grad_norm": 265.9767150878906, "learning_rate": 7.53113756190017e-07, "loss": 10.1793, "step": 420360 }, { "epoch": 0.8491739961295588, "grad_norm": 165.91192626953125, "learning_rate": 7.529295343954229e-07, "loss": 5.8625, "step": 420370 }, { "epoch": 0.8491941967622426, "grad_norm": 327.5436096191406, "learning_rate": 7.527453333005368e-07, "loss": 14.9604, "step": 420380 }, { "epoch": 0.8492143973949264, "grad_norm": 487.19476318359375, "learning_rate": 7.525611529062538e-07, "loss": 16.9882, "step": 420390 }, { "epoch": 0.8492345980276103, "grad_norm": 276.6081848144531, "learning_rate": 7.523769932134739e-07, "loss": 16.2134, "step": 420400 }, { "epoch": 0.8492547986602941, "grad_norm": 90.24506378173828, "learning_rate": 7.521928542230916e-07, "loss": 18.9382, "step": 420410 }, { "epoch": 0.8492749992929779, "grad_norm": 210.87503051757812, "learning_rate": 7.520087359360073e-07, "loss": 7.2551, "step": 420420 }, { "epoch": 0.8492951999256617, "grad_norm": 236.0486602783203, "learning_rate": 7.51824638353118e-07, "loss": 17.1328, "step": 420430 }, { "epoch": 0.8493154005583455, "grad_norm": 25.791648864746094, "learning_rate": 7.51640561475318e-07, "loss": 14.2958, "step": 420440 }, { "epoch": 0.8493356011910294, "grad_norm": 89.52472686767578, "learning_rate": 7.514565053035083e-07, "loss": 10.9837, "step": 420450 }, { "epoch": 0.8493558018237131, "grad_norm": 348.5061950683594, "learning_rate": 7.512724698385831e-07, "loss": 12.7353, "step": 420460 }, { "epoch": 0.8493760024563969, "grad_norm": 168.38575744628906, "learning_rate": 7.510884550814418e-07, "loss": 14.3834, "step": 420470 }, { "epoch": 0.8493962030890807, "grad_norm": 459.49310302734375, "learning_rate": 7.509044610329803e-07, "loss": 29.4848, "step": 420480 }, { "epoch": 0.8494164037217645, "grad_norm": 345.7757263183594, "learning_rate": 7.507204876940938e-07, "loss": 14.1724, "step": 420490 }, { "epoch": 0.8494366043544483, "grad_norm": 184.4434356689453, "learning_rate": 7.505365350656813e-07, "loss": 12.8978, "step": 420500 }, { "epoch": 0.8494568049871322, "grad_norm": 433.6654357910156, "learning_rate": 7.50352603148638e-07, "loss": 25.0021, "step": 420510 }, { "epoch": 0.849477005619816, "grad_norm": 239.95843505859375, "learning_rate": 7.5016869194386e-07, "loss": 8.2559, "step": 420520 }, { "epoch": 0.8494972062524998, "grad_norm": 117.70085906982422, "learning_rate": 7.499848014522443e-07, "loss": 18.0787, "step": 420530 }, { "epoch": 0.8495174068851836, "grad_norm": 142.1168670654297, "learning_rate": 7.498009316746879e-07, "loss": 28.6552, "step": 420540 }, { "epoch": 0.8495376075178674, "grad_norm": 398.52874755859375, "learning_rate": 7.496170826120869e-07, "loss": 19.4175, "step": 420550 }, { "epoch": 0.8495578081505513, "grad_norm": 640.2725219726562, "learning_rate": 7.494332542653349e-07, "loss": 17.8616, "step": 420560 }, { "epoch": 0.8495780087832351, "grad_norm": 237.51490783691406, "learning_rate": 7.492494466353317e-07, "loss": 23.6668, "step": 420570 }, { "epoch": 0.8495982094159189, "grad_norm": 333.6739196777344, "learning_rate": 7.490656597229707e-07, "loss": 38.5586, "step": 420580 }, { "epoch": 0.8496184100486027, "grad_norm": 269.1446228027344, "learning_rate": 7.488818935291465e-07, "loss": 25.3678, "step": 420590 }, { "epoch": 0.8496386106812865, "grad_norm": 264.438720703125, "learning_rate": 7.486981480547567e-07, "loss": 31.9247, "step": 420600 }, { "epoch": 0.8496588113139704, "grad_norm": 150.271240234375, "learning_rate": 7.48514423300698e-07, "loss": 16.3825, "step": 420610 }, { "epoch": 0.8496790119466542, "grad_norm": 44.59429931640625, "learning_rate": 7.48330719267864e-07, "loss": 10.0314, "step": 420620 }, { "epoch": 0.849699212579338, "grad_norm": 426.0372009277344, "learning_rate": 7.481470359571497e-07, "loss": 15.0743, "step": 420630 }, { "epoch": 0.8497194132120218, "grad_norm": 180.54214477539062, "learning_rate": 7.479633733694519e-07, "loss": 14.0011, "step": 420640 }, { "epoch": 0.8497396138447056, "grad_norm": 499.466552734375, "learning_rate": 7.477797315056645e-07, "loss": 21.3051, "step": 420650 }, { "epoch": 0.8497598144773895, "grad_norm": 290.8893127441406, "learning_rate": 7.475961103666824e-07, "loss": 18.4215, "step": 420660 }, { "epoch": 0.8497800151100733, "grad_norm": 198.52719116210938, "learning_rate": 7.474125099534019e-07, "loss": 18.0158, "step": 420670 }, { "epoch": 0.8498002157427571, "grad_norm": 102.0829849243164, "learning_rate": 7.472289302667163e-07, "loss": 13.2004, "step": 420680 }, { "epoch": 0.8498204163754409, "grad_norm": 219.46578979492188, "learning_rate": 7.470453713075215e-07, "loss": 23.4809, "step": 420690 }, { "epoch": 0.8498406170081247, "grad_norm": 119.0762710571289, "learning_rate": 7.468618330767114e-07, "loss": 11.8714, "step": 420700 }, { "epoch": 0.8498608176408086, "grad_norm": 373.8700256347656, "learning_rate": 7.466783155751816e-07, "loss": 15.1683, "step": 420710 }, { "epoch": 0.8498810182734923, "grad_norm": 244.5621337890625, "learning_rate": 7.464948188038262e-07, "loss": 15.3436, "step": 420720 }, { "epoch": 0.8499012189061761, "grad_norm": 484.18206787109375, "learning_rate": 7.463113427635376e-07, "loss": 24.0569, "step": 420730 }, { "epoch": 0.8499214195388599, "grad_norm": 150.07470703125, "learning_rate": 7.461278874552131e-07, "loss": 8.4789, "step": 420740 }, { "epoch": 0.8499416201715437, "grad_norm": 170.98745727539062, "learning_rate": 7.459444528797438e-07, "loss": 10.2145, "step": 420750 }, { "epoch": 0.8499618208042276, "grad_norm": 225.51129150390625, "learning_rate": 7.457610390380265e-07, "loss": 16.562, "step": 420760 }, { "epoch": 0.8499820214369114, "grad_norm": 307.02899169921875, "learning_rate": 7.455776459309538e-07, "loss": 15.5736, "step": 420770 }, { "epoch": 0.8500022220695952, "grad_norm": 205.0052947998047, "learning_rate": 7.453942735594189e-07, "loss": 29.7461, "step": 420780 }, { "epoch": 0.850022422702279, "grad_norm": 91.76826477050781, "learning_rate": 7.452109219243175e-07, "loss": 14.1393, "step": 420790 }, { "epoch": 0.8500426233349628, "grad_norm": 110.80081939697266, "learning_rate": 7.450275910265415e-07, "loss": 29.49, "step": 420800 }, { "epoch": 0.8500628239676467, "grad_norm": 442.0294494628906, "learning_rate": 7.448442808669842e-07, "loss": 14.5325, "step": 420810 }, { "epoch": 0.8500830246003305, "grad_norm": 274.3088073730469, "learning_rate": 7.446609914465397e-07, "loss": 19.1061, "step": 420820 }, { "epoch": 0.8501032252330143, "grad_norm": 166.27975463867188, "learning_rate": 7.444777227661037e-07, "loss": 19.6636, "step": 420830 }, { "epoch": 0.8501234258656981, "grad_norm": 366.283203125, "learning_rate": 7.442944748265651e-07, "loss": 16.053, "step": 420840 }, { "epoch": 0.8501436264983819, "grad_norm": 197.868408203125, "learning_rate": 7.441112476288187e-07, "loss": 23.3065, "step": 420850 }, { "epoch": 0.8501638271310658, "grad_norm": 214.66725158691406, "learning_rate": 7.439280411737592e-07, "loss": 12.0663, "step": 420860 }, { "epoch": 0.8501840277637496, "grad_norm": 159.5771026611328, "learning_rate": 7.437448554622783e-07, "loss": 6.4042, "step": 420870 }, { "epoch": 0.8502042283964334, "grad_norm": 280.25396728515625, "learning_rate": 7.435616904952675e-07, "loss": 19.2452, "step": 420880 }, { "epoch": 0.8502244290291172, "grad_norm": 389.5762939453125, "learning_rate": 7.433785462736209e-07, "loss": 19.7389, "step": 420890 }, { "epoch": 0.850244629661801, "grad_norm": 401.09918212890625, "learning_rate": 7.43195422798233e-07, "loss": 20.0947, "step": 420900 }, { "epoch": 0.8502648302944849, "grad_norm": 865.834228515625, "learning_rate": 7.430123200699924e-07, "loss": 32.5864, "step": 420910 }, { "epoch": 0.8502850309271687, "grad_norm": 391.0254821777344, "learning_rate": 7.428292380897933e-07, "loss": 17.4661, "step": 420920 }, { "epoch": 0.8503052315598525, "grad_norm": 251.45692443847656, "learning_rate": 7.426461768585291e-07, "loss": 23.0858, "step": 420930 }, { "epoch": 0.8503254321925363, "grad_norm": 66.18639373779297, "learning_rate": 7.424631363770912e-07, "loss": 13.5784, "step": 420940 }, { "epoch": 0.8503456328252201, "grad_norm": 273.02703857421875, "learning_rate": 7.422801166463706e-07, "loss": 19.4245, "step": 420950 }, { "epoch": 0.850365833457904, "grad_norm": 132.9244384765625, "learning_rate": 7.420971176672614e-07, "loss": 17.1853, "step": 420960 }, { "epoch": 0.8503860340905878, "grad_norm": 238.39646911621094, "learning_rate": 7.419141394406543e-07, "loss": 18.5046, "step": 420970 }, { "epoch": 0.8504062347232715, "grad_norm": 259.7991638183594, "learning_rate": 7.4173118196744e-07, "loss": 33.2685, "step": 420980 }, { "epoch": 0.8504264353559553, "grad_norm": 175.85108947753906, "learning_rate": 7.415482452485129e-07, "loss": 19.9167, "step": 420990 }, { "epoch": 0.8504466359886391, "grad_norm": 241.6034698486328, "learning_rate": 7.413653292847617e-07, "loss": 14.3266, "step": 421000 }, { "epoch": 0.8504668366213229, "grad_norm": 393.329345703125, "learning_rate": 7.411824340770813e-07, "loss": 21.5633, "step": 421010 }, { "epoch": 0.8504870372540068, "grad_norm": 7.533237934112549, "learning_rate": 7.409995596263591e-07, "loss": 9.5544, "step": 421020 }, { "epoch": 0.8505072378866906, "grad_norm": 351.93182373046875, "learning_rate": 7.408167059334897e-07, "loss": 35.8951, "step": 421030 }, { "epoch": 0.8505274385193744, "grad_norm": 145.52764892578125, "learning_rate": 7.40633872999364e-07, "loss": 13.7638, "step": 421040 }, { "epoch": 0.8505476391520582, "grad_norm": 313.92559814453125, "learning_rate": 7.4045106082487e-07, "loss": 26.5784, "step": 421050 }, { "epoch": 0.850567839784742, "grad_norm": 180.81326293945312, "learning_rate": 7.402682694109026e-07, "loss": 17.3721, "step": 421060 }, { "epoch": 0.8505880404174259, "grad_norm": 217.1885986328125, "learning_rate": 7.4008549875835e-07, "loss": 14.7441, "step": 421070 }, { "epoch": 0.8506082410501097, "grad_norm": 452.09124755859375, "learning_rate": 7.399027488681049e-07, "loss": 29.7664, "step": 421080 }, { "epoch": 0.8506284416827935, "grad_norm": 440.850341796875, "learning_rate": 7.39720019741057e-07, "loss": 27.2273, "step": 421090 }, { "epoch": 0.8506486423154773, "grad_norm": 313.632568359375, "learning_rate": 7.395373113780962e-07, "loss": 17.5803, "step": 421100 }, { "epoch": 0.8506688429481611, "grad_norm": 241.4678955078125, "learning_rate": 7.393546237801147e-07, "loss": 19.4375, "step": 421110 }, { "epoch": 0.850689043580845, "grad_norm": 3.194425582885742, "learning_rate": 7.391719569480021e-07, "loss": 12.7599, "step": 421120 }, { "epoch": 0.8507092442135288, "grad_norm": 344.6592712402344, "learning_rate": 7.389893108826473e-07, "loss": 23.5103, "step": 421130 }, { "epoch": 0.8507294448462126, "grad_norm": 94.7010498046875, "learning_rate": 7.388066855849418e-07, "loss": 12.9681, "step": 421140 }, { "epoch": 0.8507496454788964, "grad_norm": 211.59535217285156, "learning_rate": 7.386240810557771e-07, "loss": 28.2957, "step": 421150 }, { "epoch": 0.8507698461115802, "grad_norm": 1358.984130859375, "learning_rate": 7.384414972960419e-07, "loss": 29.0185, "step": 421160 }, { "epoch": 0.8507900467442641, "grad_norm": 309.71270751953125, "learning_rate": 7.382589343066243e-07, "loss": 15.0344, "step": 421170 }, { "epoch": 0.8508102473769479, "grad_norm": 488.8822326660156, "learning_rate": 7.380763920884171e-07, "loss": 18.2589, "step": 421180 }, { "epoch": 0.8508304480096317, "grad_norm": 97.44561004638672, "learning_rate": 7.378938706423089e-07, "loss": 13.1631, "step": 421190 }, { "epoch": 0.8508506486423155, "grad_norm": 57.355247497558594, "learning_rate": 7.377113699691879e-07, "loss": 15.6864, "step": 421200 }, { "epoch": 0.8508708492749993, "grad_norm": 286.2916259765625, "learning_rate": 7.375288900699445e-07, "loss": 10.1951, "step": 421210 }, { "epoch": 0.8508910499076832, "grad_norm": 104.08824157714844, "learning_rate": 7.373464309454698e-07, "loss": 22.4203, "step": 421220 }, { "epoch": 0.8509112505403669, "grad_norm": 101.53482818603516, "learning_rate": 7.371639925966512e-07, "loss": 11.1957, "step": 421230 }, { "epoch": 0.8509314511730507, "grad_norm": 0.0, "learning_rate": 7.369815750243769e-07, "loss": 9.5287, "step": 421240 }, { "epoch": 0.8509516518057345, "grad_norm": 0.0, "learning_rate": 7.367991782295392e-07, "loss": 21.7929, "step": 421250 }, { "epoch": 0.8509718524384183, "grad_norm": 449.0422668457031, "learning_rate": 7.366168022130249e-07, "loss": 22.1363, "step": 421260 }, { "epoch": 0.8509920530711022, "grad_norm": 409.2569274902344, "learning_rate": 7.364344469757223e-07, "loss": 21.8383, "step": 421270 }, { "epoch": 0.851012253703786, "grad_norm": 324.7436828613281, "learning_rate": 7.362521125185218e-07, "loss": 16.7726, "step": 421280 }, { "epoch": 0.8510324543364698, "grad_norm": 204.50772094726562, "learning_rate": 7.360697988423105e-07, "loss": 13.0349, "step": 421290 }, { "epoch": 0.8510526549691536, "grad_norm": 147.9385223388672, "learning_rate": 7.358875059479792e-07, "loss": 13.7852, "step": 421300 }, { "epoch": 0.8510728556018374, "grad_norm": 406.71234130859375, "learning_rate": 7.357052338364134e-07, "loss": 15.4422, "step": 421310 }, { "epoch": 0.8510930562345213, "grad_norm": 7.824883460998535, "learning_rate": 7.355229825085047e-07, "loss": 10.0945, "step": 421320 }, { "epoch": 0.8511132568672051, "grad_norm": 317.2963562011719, "learning_rate": 7.353407519651395e-07, "loss": 25.9794, "step": 421330 }, { "epoch": 0.8511334574998889, "grad_norm": 354.7696838378906, "learning_rate": 7.351585422072049e-07, "loss": 12.8454, "step": 421340 }, { "epoch": 0.8511536581325727, "grad_norm": 266.3562316894531, "learning_rate": 7.349763532355919e-07, "loss": 13.0762, "step": 421350 }, { "epoch": 0.8511738587652565, "grad_norm": 38.6658935546875, "learning_rate": 7.347941850511853e-07, "loss": 21.8872, "step": 421360 }, { "epoch": 0.8511940593979404, "grad_norm": 364.42510986328125, "learning_rate": 7.34612037654876e-07, "loss": 12.2892, "step": 421370 }, { "epoch": 0.8512142600306242, "grad_norm": 209.03140258789062, "learning_rate": 7.344299110475506e-07, "loss": 21.4583, "step": 421380 }, { "epoch": 0.851234460663308, "grad_norm": 214.24380493164062, "learning_rate": 7.342478052300945e-07, "loss": 21.37, "step": 421390 }, { "epoch": 0.8512546612959918, "grad_norm": 344.2757263183594, "learning_rate": 7.34065720203399e-07, "loss": 12.0375, "step": 421400 }, { "epoch": 0.8512748619286756, "grad_norm": 126.75021362304688, "learning_rate": 7.338836559683493e-07, "loss": 6.9576, "step": 421410 }, { "epoch": 0.8512950625613595, "grad_norm": 248.9415283203125, "learning_rate": 7.337016125258323e-07, "loss": 17.6076, "step": 421420 }, { "epoch": 0.8513152631940433, "grad_norm": 294.7452087402344, "learning_rate": 7.335195898767367e-07, "loss": 17.3172, "step": 421430 }, { "epoch": 0.8513354638267271, "grad_norm": 216.88279724121094, "learning_rate": 7.333375880219507e-07, "loss": 52.5011, "step": 421440 }, { "epoch": 0.8513556644594109, "grad_norm": 8.080552101135254, "learning_rate": 7.33155606962358e-07, "loss": 24.598, "step": 421450 }, { "epoch": 0.8513758650920947, "grad_norm": 396.85626220703125, "learning_rate": 7.329736466988469e-07, "loss": 11.212, "step": 421460 }, { "epoch": 0.8513960657247786, "grad_norm": 138.68101501464844, "learning_rate": 7.327917072323065e-07, "loss": 29.2493, "step": 421470 }, { "epoch": 0.8514162663574624, "grad_norm": 125.24776458740234, "learning_rate": 7.326097885636214e-07, "loss": 21.586, "step": 421480 }, { "epoch": 0.8514364669901461, "grad_norm": 174.2076873779297, "learning_rate": 7.324278906936771e-07, "loss": 19.386, "step": 421490 }, { "epoch": 0.8514566676228299, "grad_norm": 612.1419067382812, "learning_rate": 7.322460136233622e-07, "loss": 27.7387, "step": 421500 }, { "epoch": 0.8514768682555137, "grad_norm": 170.1300811767578, "learning_rate": 7.320641573535647e-07, "loss": 8.62, "step": 421510 }, { "epoch": 0.8514970688881975, "grad_norm": 307.4579162597656, "learning_rate": 7.318823218851668e-07, "loss": 27.0224, "step": 421520 }, { "epoch": 0.8515172695208814, "grad_norm": 362.7073974609375, "learning_rate": 7.31700507219057e-07, "loss": 18.0192, "step": 421530 }, { "epoch": 0.8515374701535652, "grad_norm": 223.84349060058594, "learning_rate": 7.315187133561219e-07, "loss": 15.0062, "step": 421540 }, { "epoch": 0.851557670786249, "grad_norm": 12.543330192565918, "learning_rate": 7.31336940297247e-07, "loss": 16.1014, "step": 421550 }, { "epoch": 0.8515778714189328, "grad_norm": 239.0854949951172, "learning_rate": 7.311551880433171e-07, "loss": 19.9421, "step": 421560 }, { "epoch": 0.8515980720516166, "grad_norm": 360.0005798339844, "learning_rate": 7.309734565952198e-07, "loss": 15.6721, "step": 421570 }, { "epoch": 0.8516182726843005, "grad_norm": 219.37277221679688, "learning_rate": 7.307917459538405e-07, "loss": 22.07, "step": 421580 }, { "epoch": 0.8516384733169843, "grad_norm": 328.8840026855469, "learning_rate": 7.30610056120063e-07, "loss": 24.5609, "step": 421590 }, { "epoch": 0.8516586739496681, "grad_norm": 47.774227142333984, "learning_rate": 7.304283870947748e-07, "loss": 6.9133, "step": 421600 }, { "epoch": 0.8516788745823519, "grad_norm": 346.6888122558594, "learning_rate": 7.302467388788614e-07, "loss": 21.3566, "step": 421610 }, { "epoch": 0.8516990752150357, "grad_norm": 316.48431396484375, "learning_rate": 7.300651114732077e-07, "loss": 19.3937, "step": 421620 }, { "epoch": 0.8517192758477196, "grad_norm": 179.59140014648438, "learning_rate": 7.298835048786979e-07, "loss": 15.0749, "step": 421630 }, { "epoch": 0.8517394764804034, "grad_norm": 266.63946533203125, "learning_rate": 7.29701919096219e-07, "loss": 16.4188, "step": 421640 }, { "epoch": 0.8517596771130872, "grad_norm": 388.5896301269531, "learning_rate": 7.295203541266549e-07, "loss": 22.2077, "step": 421650 }, { "epoch": 0.851779877745771, "grad_norm": 154.12948608398438, "learning_rate": 7.293388099708892e-07, "loss": 17.1119, "step": 421660 }, { "epoch": 0.8518000783784548, "grad_norm": 165.70606994628906, "learning_rate": 7.291572866298102e-07, "loss": 21.2053, "step": 421670 }, { "epoch": 0.8518202790111387, "grad_norm": 60.134315490722656, "learning_rate": 7.289757841042988e-07, "loss": 23.3809, "step": 421680 }, { "epoch": 0.8518404796438225, "grad_norm": 266.1582946777344, "learning_rate": 7.287943023952426e-07, "loss": 14.6149, "step": 421690 }, { "epoch": 0.8518606802765063, "grad_norm": 424.4773254394531, "learning_rate": 7.286128415035249e-07, "loss": 19.9244, "step": 421700 }, { "epoch": 0.8518808809091901, "grad_norm": 330.6685791015625, "learning_rate": 7.284314014300292e-07, "loss": 14.8661, "step": 421710 }, { "epoch": 0.8519010815418739, "grad_norm": 341.5940246582031, "learning_rate": 7.282499821756417e-07, "loss": 11.3875, "step": 421720 }, { "epoch": 0.8519212821745578, "grad_norm": 169.5347900390625, "learning_rate": 7.28068583741246e-07, "loss": 25.3427, "step": 421730 }, { "epoch": 0.8519414828072415, "grad_norm": 498.1751403808594, "learning_rate": 7.278872061277248e-07, "loss": 23.8784, "step": 421740 }, { "epoch": 0.8519616834399253, "grad_norm": 439.14599609375, "learning_rate": 7.277058493359629e-07, "loss": 28.2358, "step": 421750 }, { "epoch": 0.8519818840726091, "grad_norm": 293.0405578613281, "learning_rate": 7.275245133668457e-07, "loss": 20.6621, "step": 421760 }, { "epoch": 0.8520020847052929, "grad_norm": 174.77377319335938, "learning_rate": 7.273431982212559e-07, "loss": 14.6315, "step": 421770 }, { "epoch": 0.8520222853379767, "grad_norm": 320.4103088378906, "learning_rate": 7.27161903900076e-07, "loss": 15.2102, "step": 421780 }, { "epoch": 0.8520424859706606, "grad_norm": 212.31796264648438, "learning_rate": 7.269806304041915e-07, "loss": 11.6942, "step": 421790 }, { "epoch": 0.8520626866033444, "grad_norm": 411.11383056640625, "learning_rate": 7.267993777344856e-07, "loss": 22.8013, "step": 421800 }, { "epoch": 0.8520828872360282, "grad_norm": 267.9853820800781, "learning_rate": 7.266181458918403e-07, "loss": 34.6362, "step": 421810 }, { "epoch": 0.852103087868712, "grad_norm": 207.4847412109375, "learning_rate": 7.264369348771394e-07, "loss": 17.2793, "step": 421820 }, { "epoch": 0.8521232885013958, "grad_norm": 272.58392333984375, "learning_rate": 7.262557446912693e-07, "loss": 10.2373, "step": 421830 }, { "epoch": 0.8521434891340797, "grad_norm": 244.28067016601562, "learning_rate": 7.260745753351078e-07, "loss": 13.4829, "step": 421840 }, { "epoch": 0.8521636897667635, "grad_norm": 278.79168701171875, "learning_rate": 7.258934268095402e-07, "loss": 12.1236, "step": 421850 }, { "epoch": 0.8521838903994473, "grad_norm": 434.40142822265625, "learning_rate": 7.257122991154514e-07, "loss": 23.3375, "step": 421860 }, { "epoch": 0.8522040910321311, "grad_norm": 370.8033752441406, "learning_rate": 7.255311922537217e-07, "loss": 21.3806, "step": 421870 }, { "epoch": 0.852224291664815, "grad_norm": 9.067605018615723, "learning_rate": 7.253501062252338e-07, "loss": 17.9573, "step": 421880 }, { "epoch": 0.8522444922974988, "grad_norm": 91.92496490478516, "learning_rate": 7.251690410308726e-07, "loss": 21.9577, "step": 421890 }, { "epoch": 0.8522646929301826, "grad_norm": 176.9308319091797, "learning_rate": 7.249879966715174e-07, "loss": 12.7566, "step": 421900 }, { "epoch": 0.8522848935628664, "grad_norm": 232.9305877685547, "learning_rate": 7.248069731480533e-07, "loss": 11.7812, "step": 421910 }, { "epoch": 0.8523050941955502, "grad_norm": 336.79071044921875, "learning_rate": 7.246259704613606e-07, "loss": 19.2704, "step": 421920 }, { "epoch": 0.852325294828234, "grad_norm": 213.23631286621094, "learning_rate": 7.244449886123233e-07, "loss": 21.2992, "step": 421930 }, { "epoch": 0.8523454954609179, "grad_norm": 164.0391387939453, "learning_rate": 7.242640276018226e-07, "loss": 14.1989, "step": 421940 }, { "epoch": 0.8523656960936017, "grad_norm": 18.23884391784668, "learning_rate": 7.240830874307392e-07, "loss": 21.7085, "step": 421950 }, { "epoch": 0.8523858967262855, "grad_norm": 117.4522933959961, "learning_rate": 7.239021680999575e-07, "loss": 17.7126, "step": 421960 }, { "epoch": 0.8524060973589693, "grad_norm": 306.1466979980469, "learning_rate": 7.237212696103568e-07, "loss": 18.9851, "step": 421970 }, { "epoch": 0.8524262979916531, "grad_norm": 11.861287117004395, "learning_rate": 7.235403919628214e-07, "loss": 9.2666, "step": 421980 }, { "epoch": 0.852446498624337, "grad_norm": 172.203125, "learning_rate": 7.233595351582313e-07, "loss": 8.9995, "step": 421990 }, { "epoch": 0.8524666992570207, "grad_norm": 95.42937469482422, "learning_rate": 7.23178699197467e-07, "loss": 20.1615, "step": 422000 }, { "epoch": 0.8524868998897045, "grad_norm": 62.41229248046875, "learning_rate": 7.229978840814122e-07, "loss": 24.9385, "step": 422010 }, { "epoch": 0.8525071005223883, "grad_norm": 263.0828857421875, "learning_rate": 7.228170898109465e-07, "loss": 10.9628, "step": 422020 }, { "epoch": 0.8525273011550721, "grad_norm": 43.96076965332031, "learning_rate": 7.22636316386951e-07, "loss": 16.8388, "step": 422030 }, { "epoch": 0.852547501787756, "grad_norm": 169.77389526367188, "learning_rate": 7.22455563810307e-07, "loss": 18.0018, "step": 422040 }, { "epoch": 0.8525677024204398, "grad_norm": 147.6004638671875, "learning_rate": 7.222748320818984e-07, "loss": 7.4699, "step": 422050 }, { "epoch": 0.8525879030531236, "grad_norm": 98.68069458007812, "learning_rate": 7.220941212026005e-07, "loss": 13.1359, "step": 422060 }, { "epoch": 0.8526081036858074, "grad_norm": 347.5632019042969, "learning_rate": 7.219134311732978e-07, "loss": 16.8121, "step": 422070 }, { "epoch": 0.8526283043184912, "grad_norm": 360.92425537109375, "learning_rate": 7.217327619948705e-07, "loss": 11.1647, "step": 422080 }, { "epoch": 0.8526485049511751, "grad_norm": 215.7709197998047, "learning_rate": 7.215521136681997e-07, "loss": 13.1727, "step": 422090 }, { "epoch": 0.8526687055838589, "grad_norm": 227.2262725830078, "learning_rate": 7.213714861941628e-07, "loss": 21.1539, "step": 422100 }, { "epoch": 0.8526889062165427, "grad_norm": 192.76121520996094, "learning_rate": 7.211908795736433e-07, "loss": 14.9538, "step": 422110 }, { "epoch": 0.8527091068492265, "grad_norm": 246.01089477539062, "learning_rate": 7.210102938075225e-07, "loss": 13.461, "step": 422120 }, { "epoch": 0.8527293074819103, "grad_norm": 636.9603881835938, "learning_rate": 7.20829728896676e-07, "loss": 31.8066, "step": 422130 }, { "epoch": 0.8527495081145942, "grad_norm": 320.2397155761719, "learning_rate": 7.206491848419867e-07, "loss": 13.6174, "step": 422140 }, { "epoch": 0.852769708747278, "grad_norm": 208.51138305664062, "learning_rate": 7.204686616443352e-07, "loss": 23.4678, "step": 422150 }, { "epoch": 0.8527899093799618, "grad_norm": 183.90054321289062, "learning_rate": 7.202881593046002e-07, "loss": 17.5226, "step": 422160 }, { "epoch": 0.8528101100126456, "grad_norm": 197.4811248779297, "learning_rate": 7.20107677823661e-07, "loss": 29.9472, "step": 422170 }, { "epoch": 0.8528303106453294, "grad_norm": 580.2433471679688, "learning_rate": 7.199272172023986e-07, "loss": 29.312, "step": 422180 }, { "epoch": 0.8528505112780133, "grad_norm": 7.971329212188721, "learning_rate": 7.197467774416921e-07, "loss": 17.3739, "step": 422190 }, { "epoch": 0.8528707119106971, "grad_norm": 336.0027160644531, "learning_rate": 7.195663585424195e-07, "loss": 11.411, "step": 422200 }, { "epoch": 0.8528909125433809, "grad_norm": 130.11996459960938, "learning_rate": 7.193859605054615e-07, "loss": 18.222, "step": 422210 }, { "epoch": 0.8529111131760647, "grad_norm": 168.29141235351562, "learning_rate": 7.19205583331698e-07, "loss": 11.4336, "step": 422220 }, { "epoch": 0.8529313138087485, "grad_norm": 266.99658203125, "learning_rate": 7.190252270220071e-07, "loss": 10.7985, "step": 422230 }, { "epoch": 0.8529515144414324, "grad_norm": 128.3966522216797, "learning_rate": 7.188448915772673e-07, "loss": 12.6436, "step": 422240 }, { "epoch": 0.8529717150741161, "grad_norm": 11.934555053710938, "learning_rate": 7.186645769983591e-07, "loss": 14.0058, "step": 422250 }, { "epoch": 0.8529919157067999, "grad_norm": 61.79963684082031, "learning_rate": 7.18484283286161e-07, "loss": 19.3607, "step": 422260 }, { "epoch": 0.8530121163394837, "grad_norm": 296.8731384277344, "learning_rate": 7.183040104415495e-07, "loss": 10.9771, "step": 422270 }, { "epoch": 0.8530323169721675, "grad_norm": 27.604801177978516, "learning_rate": 7.181237584654066e-07, "loss": 9.6931, "step": 422280 }, { "epoch": 0.8530525176048513, "grad_norm": 392.4912414550781, "learning_rate": 7.179435273586078e-07, "loss": 12.0542, "step": 422290 }, { "epoch": 0.8530727182375352, "grad_norm": 62.817718505859375, "learning_rate": 7.177633171220339e-07, "loss": 19.1299, "step": 422300 }, { "epoch": 0.853092918870219, "grad_norm": 0.0, "learning_rate": 7.17583127756562e-07, "loss": 37.4762, "step": 422310 }, { "epoch": 0.8531131195029028, "grad_norm": 210.4340057373047, "learning_rate": 7.1740295926307e-07, "loss": 17.0518, "step": 422320 }, { "epoch": 0.8531333201355866, "grad_norm": 107.98930358886719, "learning_rate": 7.172228116424374e-07, "loss": 22.7958, "step": 422330 }, { "epoch": 0.8531535207682704, "grad_norm": 221.2230224609375, "learning_rate": 7.170426848955408e-07, "loss": 12.9899, "step": 422340 }, { "epoch": 0.8531737214009543, "grad_norm": 539.4725952148438, "learning_rate": 7.168625790232586e-07, "loss": 18.9985, "step": 422350 }, { "epoch": 0.8531939220336381, "grad_norm": 1025.2767333984375, "learning_rate": 7.166824940264683e-07, "loss": 26.3441, "step": 422360 }, { "epoch": 0.8532141226663219, "grad_norm": 262.8194274902344, "learning_rate": 7.165024299060486e-07, "loss": 11.7782, "step": 422370 }, { "epoch": 0.8532343232990057, "grad_norm": 453.4293518066406, "learning_rate": 7.163223866628771e-07, "loss": 15.8715, "step": 422380 }, { "epoch": 0.8532545239316895, "grad_norm": 451.14459228515625, "learning_rate": 7.161423642978299e-07, "loss": 18.8672, "step": 422390 }, { "epoch": 0.8532747245643734, "grad_norm": 111.09237670898438, "learning_rate": 7.159623628117856e-07, "loss": 12.8857, "step": 422400 }, { "epoch": 0.8532949251970572, "grad_norm": 185.869873046875, "learning_rate": 7.157823822056214e-07, "loss": 11.1398, "step": 422410 }, { "epoch": 0.853315125829741, "grad_norm": 165.09800720214844, "learning_rate": 7.156024224802139e-07, "loss": 14.6144, "step": 422420 }, { "epoch": 0.8533353264624248, "grad_norm": 523.0018920898438, "learning_rate": 7.154224836364398e-07, "loss": 23.4361, "step": 422430 }, { "epoch": 0.8533555270951086, "grad_norm": 204.12245178222656, "learning_rate": 7.152425656751794e-07, "loss": 13.7914, "step": 422440 }, { "epoch": 0.8533757277277925, "grad_norm": 419.858642578125, "learning_rate": 7.150626685973045e-07, "loss": 15.6425, "step": 422450 }, { "epoch": 0.8533959283604763, "grad_norm": 80.26962280273438, "learning_rate": 7.148827924036944e-07, "loss": 17.9933, "step": 422460 }, { "epoch": 0.8534161289931601, "grad_norm": 266.3953857421875, "learning_rate": 7.147029370952274e-07, "loss": 25.6853, "step": 422470 }, { "epoch": 0.8534363296258439, "grad_norm": 179.11001586914062, "learning_rate": 7.145231026727783e-07, "loss": 19.0045, "step": 422480 }, { "epoch": 0.8534565302585277, "grad_norm": 542.6293334960938, "learning_rate": 7.143432891372226e-07, "loss": 15.4474, "step": 422490 }, { "epoch": 0.8534767308912116, "grad_norm": 477.26751708984375, "learning_rate": 7.141634964894389e-07, "loss": 18.5066, "step": 422500 }, { "epoch": 0.8534969315238953, "grad_norm": 291.8429260253906, "learning_rate": 7.139837247303027e-07, "loss": 13.9174, "step": 422510 }, { "epoch": 0.8535171321565791, "grad_norm": 166.04727172851562, "learning_rate": 7.138039738606894e-07, "loss": 32.3205, "step": 422520 }, { "epoch": 0.8535373327892629, "grad_norm": 177.7947540283203, "learning_rate": 7.13624243881475e-07, "loss": 21.2513, "step": 422530 }, { "epoch": 0.8535575334219467, "grad_norm": 425.36773681640625, "learning_rate": 7.134445347935376e-07, "loss": 16.2399, "step": 422540 }, { "epoch": 0.8535777340546306, "grad_norm": 425.3691711425781, "learning_rate": 7.132648465977515e-07, "loss": 11.8947, "step": 422550 }, { "epoch": 0.8535979346873144, "grad_norm": 14.243558883666992, "learning_rate": 7.130851792949916e-07, "loss": 11.6833, "step": 422560 }, { "epoch": 0.8536181353199982, "grad_norm": 229.00413513183594, "learning_rate": 7.129055328861356e-07, "loss": 15.1281, "step": 422570 }, { "epoch": 0.853638335952682, "grad_norm": 77.62419128417969, "learning_rate": 7.127259073720571e-07, "loss": 17.7049, "step": 422580 }, { "epoch": 0.8536585365853658, "grad_norm": 284.9870300292969, "learning_rate": 7.125463027536334e-07, "loss": 23.6866, "step": 422590 }, { "epoch": 0.8536787372180497, "grad_norm": 173.53244018554688, "learning_rate": 7.123667190317396e-07, "loss": 16.4071, "step": 422600 }, { "epoch": 0.8536989378507335, "grad_norm": 175.91168212890625, "learning_rate": 7.121871562072486e-07, "loss": 27.7758, "step": 422610 }, { "epoch": 0.8537191384834173, "grad_norm": 178.91546630859375, "learning_rate": 7.12007614281039e-07, "loss": 22.4176, "step": 422620 }, { "epoch": 0.8537393391161011, "grad_norm": 376.6715393066406, "learning_rate": 7.11828093253984e-07, "loss": 15.8543, "step": 422630 }, { "epoch": 0.8537595397487849, "grad_norm": 521.6480102539062, "learning_rate": 7.116485931269573e-07, "loss": 17.6456, "step": 422640 }, { "epoch": 0.8537797403814688, "grad_norm": 234.2122344970703, "learning_rate": 7.114691139008356e-07, "loss": 12.6867, "step": 422650 }, { "epoch": 0.8537999410141526, "grad_norm": 289.37091064453125, "learning_rate": 7.112896555764943e-07, "loss": 13.7824, "step": 422660 }, { "epoch": 0.8538201416468364, "grad_norm": 493.8099365234375, "learning_rate": 7.111102181548074e-07, "loss": 22.7064, "step": 422670 }, { "epoch": 0.8538403422795202, "grad_norm": 242.32406616210938, "learning_rate": 7.109308016366473e-07, "loss": 9.8361, "step": 422680 }, { "epoch": 0.853860542912204, "grad_norm": 79.6270751953125, "learning_rate": 7.107514060228921e-07, "loss": 11.2522, "step": 422690 }, { "epoch": 0.8538807435448879, "grad_norm": 393.1864318847656, "learning_rate": 7.105720313144143e-07, "loss": 9.7228, "step": 422700 }, { "epoch": 0.8539009441775717, "grad_norm": 201.590087890625, "learning_rate": 7.103926775120867e-07, "loss": 32.6815, "step": 422710 }, { "epoch": 0.8539211448102555, "grad_norm": 371.31463623046875, "learning_rate": 7.102133446167847e-07, "loss": 19.7571, "step": 422720 }, { "epoch": 0.8539413454429393, "grad_norm": 126.90672302246094, "learning_rate": 7.100340326293853e-07, "loss": 10.3946, "step": 422730 }, { "epoch": 0.8539615460756231, "grad_norm": 60.477325439453125, "learning_rate": 7.098547415507572e-07, "loss": 11.4186, "step": 422740 }, { "epoch": 0.853981746708307, "grad_norm": 226.64784240722656, "learning_rate": 7.096754713817771e-07, "loss": 23.1537, "step": 422750 }, { "epoch": 0.8540019473409908, "grad_norm": 12.693187713623047, "learning_rate": 7.094962221233192e-07, "loss": 12.7288, "step": 422760 }, { "epoch": 0.8540221479736745, "grad_norm": 414.6326599121094, "learning_rate": 7.093169937762562e-07, "loss": 18.8912, "step": 422770 }, { "epoch": 0.8540423486063583, "grad_norm": 26.593154907226562, "learning_rate": 7.091377863414611e-07, "loss": 13.203, "step": 422780 }, { "epoch": 0.8540625492390421, "grad_norm": 202.7290496826172, "learning_rate": 7.08958599819809e-07, "loss": 15.4635, "step": 422790 }, { "epoch": 0.8540827498717259, "grad_norm": 202.21673583984375, "learning_rate": 7.087794342121724e-07, "loss": 10.0292, "step": 422800 }, { "epoch": 0.8541029505044098, "grad_norm": 233.0935821533203, "learning_rate": 7.086002895194227e-07, "loss": 14.3608, "step": 422810 }, { "epoch": 0.8541231511370936, "grad_norm": 472.7007751464844, "learning_rate": 7.08421165742435e-07, "loss": 25.4754, "step": 422820 }, { "epoch": 0.8541433517697774, "grad_norm": 73.6697769165039, "learning_rate": 7.08242062882083e-07, "loss": 22.1243, "step": 422830 }, { "epoch": 0.8541635524024612, "grad_norm": 469.64703369140625, "learning_rate": 7.080629809392392e-07, "loss": 17.5183, "step": 422840 }, { "epoch": 0.854183753035145, "grad_norm": 117.58245849609375, "learning_rate": 7.078839199147741e-07, "loss": 22.3515, "step": 422850 }, { "epoch": 0.8542039536678289, "grad_norm": 481.6075439453125, "learning_rate": 7.077048798095637e-07, "loss": 26.7704, "step": 422860 }, { "epoch": 0.8542241543005127, "grad_norm": 0.6864868998527527, "learning_rate": 7.07525860624479e-07, "loss": 8.767, "step": 422870 }, { "epoch": 0.8542443549331965, "grad_norm": 432.57745361328125, "learning_rate": 7.073468623603918e-07, "loss": 21.2184, "step": 422880 }, { "epoch": 0.8542645555658803, "grad_norm": 322.0793151855469, "learning_rate": 7.071678850181762e-07, "loss": 23.1704, "step": 422890 }, { "epoch": 0.8542847561985641, "grad_norm": 10.186527252197266, "learning_rate": 7.069889285987025e-07, "loss": 22.2997, "step": 422900 }, { "epoch": 0.854304956831248, "grad_norm": 30.24995994567871, "learning_rate": 7.068099931028449e-07, "loss": 12.5749, "step": 422910 }, { "epoch": 0.8543251574639318, "grad_norm": 356.6954345703125, "learning_rate": 7.066310785314756e-07, "loss": 14.4012, "step": 422920 }, { "epoch": 0.8543453580966156, "grad_norm": 611.784912109375, "learning_rate": 7.064521848854639e-07, "loss": 39.5429, "step": 422930 }, { "epoch": 0.8543655587292994, "grad_norm": 195.5189971923828, "learning_rate": 7.062733121656845e-07, "loss": 17.5024, "step": 422940 }, { "epoch": 0.8543857593619832, "grad_norm": 321.9447326660156, "learning_rate": 7.060944603730086e-07, "loss": 10.3179, "step": 422950 }, { "epoch": 0.8544059599946671, "grad_norm": 167.77120971679688, "learning_rate": 7.059156295083064e-07, "loss": 17.4644, "step": 422960 }, { "epoch": 0.8544261606273509, "grad_norm": 0.0, "learning_rate": 7.057368195724506e-07, "loss": 27.8772, "step": 422970 }, { "epoch": 0.8544463612600347, "grad_norm": 35.62004852294922, "learning_rate": 7.055580305663135e-07, "loss": 11.1785, "step": 422980 }, { "epoch": 0.8544665618927185, "grad_norm": 87.44515228271484, "learning_rate": 7.053792624907662e-07, "loss": 13.9931, "step": 422990 }, { "epoch": 0.8544867625254023, "grad_norm": 285.41497802734375, "learning_rate": 7.052005153466779e-07, "loss": 20.6078, "step": 423000 }, { "epoch": 0.8545069631580862, "grad_norm": 51.798980712890625, "learning_rate": 7.050217891349226e-07, "loss": 15.6915, "step": 423010 }, { "epoch": 0.8545271637907699, "grad_norm": 1351.5948486328125, "learning_rate": 7.048430838563708e-07, "loss": 31.2372, "step": 423020 }, { "epoch": 0.8545473644234537, "grad_norm": 247.1757049560547, "learning_rate": 7.046643995118913e-07, "loss": 16.505, "step": 423030 }, { "epoch": 0.8545675650561375, "grad_norm": 17.80440902709961, "learning_rate": 7.04485736102356e-07, "loss": 25.8629, "step": 423040 }, { "epoch": 0.8545877656888213, "grad_norm": 229.20510864257812, "learning_rate": 7.043070936286395e-07, "loss": 9.3663, "step": 423050 }, { "epoch": 0.8546079663215052, "grad_norm": 222.9828338623047, "learning_rate": 7.041284720916064e-07, "loss": 13.8725, "step": 423060 }, { "epoch": 0.854628166954189, "grad_norm": 190.86056518554688, "learning_rate": 7.0394987149213e-07, "loss": 17.0278, "step": 423070 }, { "epoch": 0.8546483675868728, "grad_norm": 187.04701232910156, "learning_rate": 7.037712918310818e-07, "loss": 20.86, "step": 423080 }, { "epoch": 0.8546685682195566, "grad_norm": 690.170654296875, "learning_rate": 7.035927331093318e-07, "loss": 19.8455, "step": 423090 }, { "epoch": 0.8546887688522404, "grad_norm": 235.44935607910156, "learning_rate": 7.034141953277484e-07, "loss": 13.6837, "step": 423100 }, { "epoch": 0.8547089694849243, "grad_norm": 113.28761291503906, "learning_rate": 7.032356784872035e-07, "loss": 24.8232, "step": 423110 }, { "epoch": 0.8547291701176081, "grad_norm": 314.83636474609375, "learning_rate": 7.030571825885685e-07, "loss": 10.4736, "step": 423120 }, { "epoch": 0.8547493707502919, "grad_norm": 181.3869171142578, "learning_rate": 7.028787076327093e-07, "loss": 22.3134, "step": 423130 }, { "epoch": 0.8547695713829757, "grad_norm": 387.256591796875, "learning_rate": 7.027002536204986e-07, "loss": 15.7749, "step": 423140 }, { "epoch": 0.8547897720156595, "grad_norm": 106.8989028930664, "learning_rate": 7.025218205528061e-07, "loss": 13.6919, "step": 423150 }, { "epoch": 0.8548099726483434, "grad_norm": 168.5610809326172, "learning_rate": 7.02343408430502e-07, "loss": 11.6839, "step": 423160 }, { "epoch": 0.8548301732810272, "grad_norm": 365.92218017578125, "learning_rate": 7.021650172544531e-07, "loss": 13.3508, "step": 423170 }, { "epoch": 0.854850373913711, "grad_norm": 328.9350280761719, "learning_rate": 7.019866470255315e-07, "loss": 24.3665, "step": 423180 }, { "epoch": 0.8548705745463948, "grad_norm": 254.15921020507812, "learning_rate": 7.018082977446061e-07, "loss": 12.6612, "step": 423190 }, { "epoch": 0.8548907751790786, "grad_norm": 0.0, "learning_rate": 7.01629969412545e-07, "loss": 12.7893, "step": 423200 }, { "epoch": 0.8549109758117625, "grad_norm": 160.4410400390625, "learning_rate": 7.014516620302186e-07, "loss": 8.5363, "step": 423210 }, { "epoch": 0.8549311764444463, "grad_norm": 194.77122497558594, "learning_rate": 7.012733755984946e-07, "loss": 9.312, "step": 423220 }, { "epoch": 0.8549513770771301, "grad_norm": 204.65611267089844, "learning_rate": 7.010951101182439e-07, "loss": 16.3291, "step": 423230 }, { "epoch": 0.8549715777098139, "grad_norm": 2.822115182876587, "learning_rate": 7.009168655903342e-07, "loss": 14.8745, "step": 423240 }, { "epoch": 0.8549917783424977, "grad_norm": 105.02120971679688, "learning_rate": 7.007386420156332e-07, "loss": 15.3415, "step": 423250 }, { "epoch": 0.8550119789751816, "grad_norm": 260.77850341796875, "learning_rate": 7.005604393950116e-07, "loss": 9.1493, "step": 423260 }, { "epoch": 0.8550321796078654, "grad_norm": 170.01780700683594, "learning_rate": 7.003822577293362e-07, "loss": 12.8393, "step": 423270 }, { "epoch": 0.8550523802405491, "grad_norm": 291.23760986328125, "learning_rate": 7.002040970194768e-07, "loss": 12.2394, "step": 423280 }, { "epoch": 0.8550725808732329, "grad_norm": 518.68115234375, "learning_rate": 7.000259572663004e-07, "loss": 35.0356, "step": 423290 }, { "epoch": 0.8550927815059167, "grad_norm": 326.2279357910156, "learning_rate": 6.99847838470677e-07, "loss": 8.9791, "step": 423300 }, { "epoch": 0.8551129821386005, "grad_norm": 443.4004211425781, "learning_rate": 6.996697406334735e-07, "loss": 9.3229, "step": 423310 }, { "epoch": 0.8551331827712844, "grad_norm": 188.73171997070312, "learning_rate": 6.994916637555571e-07, "loss": 16.0772, "step": 423320 }, { "epoch": 0.8551533834039682, "grad_norm": 302.5928955078125, "learning_rate": 6.993136078377965e-07, "loss": 10.4423, "step": 423330 }, { "epoch": 0.855173584036652, "grad_norm": 344.1739196777344, "learning_rate": 6.991355728810623e-07, "loss": 26.7812, "step": 423340 }, { "epoch": 0.8551937846693358, "grad_norm": 361.71649169921875, "learning_rate": 6.989575588862174e-07, "loss": 18.9623, "step": 423350 }, { "epoch": 0.8552139853020196, "grad_norm": 219.8914337158203, "learning_rate": 6.987795658541319e-07, "loss": 14.8403, "step": 423360 }, { "epoch": 0.8552341859347035, "grad_norm": 288.67132568359375, "learning_rate": 6.986015937856743e-07, "loss": 23.9568, "step": 423370 }, { "epoch": 0.8552543865673873, "grad_norm": 373.989501953125, "learning_rate": 6.984236426817104e-07, "loss": 30.0804, "step": 423380 }, { "epoch": 0.8552745872000711, "grad_norm": 496.7789001464844, "learning_rate": 6.982457125431069e-07, "loss": 31.4124, "step": 423390 }, { "epoch": 0.8552947878327549, "grad_norm": 483.5967712402344, "learning_rate": 6.980678033707333e-07, "loss": 18.7176, "step": 423400 }, { "epoch": 0.8553149884654387, "grad_norm": 479.885498046875, "learning_rate": 6.978899151654556e-07, "loss": 22.4568, "step": 423410 }, { "epoch": 0.8553351890981226, "grad_norm": 311.95025634765625, "learning_rate": 6.977120479281396e-07, "loss": 18.9853, "step": 423420 }, { "epoch": 0.8553553897308064, "grad_norm": 205.40097045898438, "learning_rate": 6.975342016596531e-07, "loss": 23.6384, "step": 423430 }, { "epoch": 0.8553755903634902, "grad_norm": 184.26663208007812, "learning_rate": 6.973563763608643e-07, "loss": 17.1032, "step": 423440 }, { "epoch": 0.855395790996174, "grad_norm": 256.02398681640625, "learning_rate": 6.971785720326385e-07, "loss": 16.908, "step": 423450 }, { "epoch": 0.8554159916288578, "grad_norm": 212.7161102294922, "learning_rate": 6.970007886758412e-07, "loss": 15.3301, "step": 423460 }, { "epoch": 0.8554361922615417, "grad_norm": 294.705322265625, "learning_rate": 6.968230262913417e-07, "loss": 22.1643, "step": 423470 }, { "epoch": 0.8554563928942255, "grad_norm": 278.9425964355469, "learning_rate": 6.966452848800043e-07, "loss": 17.536, "step": 423480 }, { "epoch": 0.8554765935269093, "grad_norm": 353.4668884277344, "learning_rate": 6.964675644426955e-07, "loss": 19.0159, "step": 423490 }, { "epoch": 0.8554967941595931, "grad_norm": 5.04774808883667, "learning_rate": 6.962898649802824e-07, "loss": 16.2927, "step": 423500 }, { "epoch": 0.8555169947922769, "grad_norm": 614.4879150390625, "learning_rate": 6.961121864936294e-07, "loss": 21.0884, "step": 423510 }, { "epoch": 0.8555371954249608, "grad_norm": 377.22772216796875, "learning_rate": 6.95934528983605e-07, "loss": 11.0351, "step": 423520 }, { "epoch": 0.8555573960576445, "grad_norm": 204.89877319335938, "learning_rate": 6.957568924510733e-07, "loss": 11.6271, "step": 423530 }, { "epoch": 0.8555775966903283, "grad_norm": 346.3027038574219, "learning_rate": 6.955792768969e-07, "loss": 17.2245, "step": 423540 }, { "epoch": 0.8555977973230121, "grad_norm": 538.1143798828125, "learning_rate": 6.954016823219517e-07, "loss": 15.2407, "step": 423550 }, { "epoch": 0.8556179979556959, "grad_norm": 178.46185302734375, "learning_rate": 6.952241087270938e-07, "loss": 13.5602, "step": 423560 }, { "epoch": 0.8556381985883798, "grad_norm": 338.64031982421875, "learning_rate": 6.950465561131903e-07, "loss": 15.5635, "step": 423570 }, { "epoch": 0.8556583992210636, "grad_norm": 290.6140441894531, "learning_rate": 6.948690244811079e-07, "loss": 23.7052, "step": 423580 }, { "epoch": 0.8556785998537474, "grad_norm": 270.82904052734375, "learning_rate": 6.946915138317129e-07, "loss": 12.9421, "step": 423590 }, { "epoch": 0.8556988004864312, "grad_norm": 277.2585754394531, "learning_rate": 6.945140241658688e-07, "loss": 17.0889, "step": 423600 }, { "epoch": 0.855719001119115, "grad_norm": 236.5377960205078, "learning_rate": 6.943365554844406e-07, "loss": 16.1848, "step": 423610 }, { "epoch": 0.8557392017517989, "grad_norm": 130.89395141601562, "learning_rate": 6.941591077882948e-07, "loss": 27.2633, "step": 423620 }, { "epoch": 0.8557594023844827, "grad_norm": 207.0721893310547, "learning_rate": 6.939816810782952e-07, "loss": 22.6192, "step": 423630 }, { "epoch": 0.8557796030171665, "grad_norm": 411.8767395019531, "learning_rate": 6.938042753553054e-07, "loss": 33.0819, "step": 423640 }, { "epoch": 0.8557998036498503, "grad_norm": 456.506103515625, "learning_rate": 6.936268906201915e-07, "loss": 14.6687, "step": 423650 }, { "epoch": 0.8558200042825341, "grad_norm": 511.1331787109375, "learning_rate": 6.934495268738195e-07, "loss": 19.5431, "step": 423660 }, { "epoch": 0.855840204915218, "grad_norm": 344.2784423828125, "learning_rate": 6.932721841170503e-07, "loss": 12.1683, "step": 423670 }, { "epoch": 0.8558604055479018, "grad_norm": 296.3919982910156, "learning_rate": 6.930948623507505e-07, "loss": 17.1141, "step": 423680 }, { "epoch": 0.8558806061805856, "grad_norm": 535.4503784179688, "learning_rate": 6.92917561575785e-07, "loss": 22.8237, "step": 423690 }, { "epoch": 0.8559008068132694, "grad_norm": 335.8371887207031, "learning_rate": 6.927402817930168e-07, "loss": 17.9307, "step": 423700 }, { "epoch": 0.8559210074459532, "grad_norm": 213.9453582763672, "learning_rate": 6.925630230033087e-07, "loss": 24.1029, "step": 423710 }, { "epoch": 0.855941208078637, "grad_norm": 551.3712158203125, "learning_rate": 6.923857852075261e-07, "loss": 17.4713, "step": 423720 }, { "epoch": 0.8559614087113209, "grad_norm": 303.08935546875, "learning_rate": 6.922085684065349e-07, "loss": 20.5313, "step": 423730 }, { "epoch": 0.8559816093440047, "grad_norm": 83.74275970458984, "learning_rate": 6.920313726011945e-07, "loss": 9.113, "step": 423740 }, { "epoch": 0.8560018099766885, "grad_norm": 179.73873901367188, "learning_rate": 6.918541977923709e-07, "loss": 23.3962, "step": 423750 }, { "epoch": 0.8560220106093723, "grad_norm": 302.4434509277344, "learning_rate": 6.916770439809283e-07, "loss": 18.7612, "step": 423760 }, { "epoch": 0.8560422112420562, "grad_norm": 208.94137573242188, "learning_rate": 6.914999111677295e-07, "loss": 16.5773, "step": 423770 }, { "epoch": 0.85606241187474, "grad_norm": 160.61373901367188, "learning_rate": 6.913227993536364e-07, "loss": 22.4776, "step": 423780 }, { "epoch": 0.8560826125074237, "grad_norm": 275.61907958984375, "learning_rate": 6.911457085395146e-07, "loss": 12.3802, "step": 423790 }, { "epoch": 0.8561028131401075, "grad_norm": 249.42898559570312, "learning_rate": 6.909686387262255e-07, "loss": 12.3299, "step": 423800 }, { "epoch": 0.8561230137727913, "grad_norm": 104.07897186279297, "learning_rate": 6.907915899146322e-07, "loss": 14.0462, "step": 423810 }, { "epoch": 0.8561432144054751, "grad_norm": 391.08917236328125, "learning_rate": 6.906145621055987e-07, "loss": 17.7155, "step": 423820 }, { "epoch": 0.856163415038159, "grad_norm": 133.88150024414062, "learning_rate": 6.904375552999859e-07, "loss": 14.4247, "step": 423830 }, { "epoch": 0.8561836156708428, "grad_norm": 240.05923461914062, "learning_rate": 6.902605694986592e-07, "loss": 27.7457, "step": 423840 }, { "epoch": 0.8562038163035266, "grad_norm": 102.07037353515625, "learning_rate": 6.9008360470248e-07, "loss": 9.8815, "step": 423850 }, { "epoch": 0.8562240169362104, "grad_norm": 22.271902084350586, "learning_rate": 6.89906660912309e-07, "loss": 18.5037, "step": 423860 }, { "epoch": 0.8562442175688942, "grad_norm": 276.6137390136719, "learning_rate": 6.897297381290113e-07, "loss": 13.3876, "step": 423870 }, { "epoch": 0.8562644182015781, "grad_norm": 54.55327606201172, "learning_rate": 6.895528363534476e-07, "loss": 19.4396, "step": 423880 }, { "epoch": 0.8562846188342619, "grad_norm": 213.33628845214844, "learning_rate": 6.89375955586481e-07, "loss": 9.2699, "step": 423890 }, { "epoch": 0.8563048194669457, "grad_norm": 269.0273742675781, "learning_rate": 6.891990958289724e-07, "loss": 15.5632, "step": 423900 }, { "epoch": 0.8563250200996295, "grad_norm": 188.8880157470703, "learning_rate": 6.890222570817856e-07, "loss": 19.4988, "step": 423910 }, { "epoch": 0.8563452207323133, "grad_norm": 359.8377380371094, "learning_rate": 6.888454393457817e-07, "loss": 21.2789, "step": 423920 }, { "epoch": 0.8563654213649972, "grad_norm": 142.61070251464844, "learning_rate": 6.886686426218209e-07, "loss": 16.467, "step": 423930 }, { "epoch": 0.856385621997681, "grad_norm": 526.3711547851562, "learning_rate": 6.884918669107671e-07, "loss": 15.4915, "step": 423940 }, { "epoch": 0.8564058226303648, "grad_norm": 162.6741943359375, "learning_rate": 6.883151122134812e-07, "loss": 13.6569, "step": 423950 }, { "epoch": 0.8564260232630486, "grad_norm": 38.595970153808594, "learning_rate": 6.881383785308232e-07, "loss": 14.0991, "step": 423960 }, { "epoch": 0.8564462238957324, "grad_norm": 304.71112060546875, "learning_rate": 6.879616658636562e-07, "loss": 18.2779, "step": 423970 }, { "epoch": 0.8564664245284163, "grad_norm": 212.12672424316406, "learning_rate": 6.877849742128423e-07, "loss": 13.2207, "step": 423980 }, { "epoch": 0.8564866251611001, "grad_norm": 306.0458984375, "learning_rate": 6.876083035792408e-07, "loss": 13.0411, "step": 423990 }, { "epoch": 0.8565068257937839, "grad_norm": 542.5480346679688, "learning_rate": 6.874316539637127e-07, "loss": 25.6927, "step": 424000 }, { "epoch": 0.8565270264264677, "grad_norm": 395.2640686035156, "learning_rate": 6.872550253671207e-07, "loss": 23.6256, "step": 424010 }, { "epoch": 0.8565472270591515, "grad_norm": 1355.140625, "learning_rate": 6.870784177903244e-07, "loss": 32.2784, "step": 424020 }, { "epoch": 0.8565674276918354, "grad_norm": 127.0737533569336, "learning_rate": 6.869018312341841e-07, "loss": 13.1837, "step": 424030 }, { "epoch": 0.8565876283245192, "grad_norm": 36.54533767700195, "learning_rate": 6.86725265699561e-07, "loss": 16.7496, "step": 424040 }, { "epoch": 0.8566078289572029, "grad_norm": 471.26873779296875, "learning_rate": 6.865487211873167e-07, "loss": 19.949, "step": 424050 }, { "epoch": 0.8566280295898867, "grad_norm": 342.75933837890625, "learning_rate": 6.863721976983112e-07, "loss": 24.4788, "step": 424060 }, { "epoch": 0.8566482302225705, "grad_norm": 656.39453125, "learning_rate": 6.861956952334031e-07, "loss": 19.6866, "step": 424070 }, { "epoch": 0.8566684308552543, "grad_norm": 198.0663299560547, "learning_rate": 6.860192137934552e-07, "loss": 17.8723, "step": 424080 }, { "epoch": 0.8566886314879382, "grad_norm": 217.781982421875, "learning_rate": 6.858427533793261e-07, "loss": 12.8337, "step": 424090 }, { "epoch": 0.856708832120622, "grad_norm": 436.1506652832031, "learning_rate": 6.856663139918751e-07, "loss": 11.4889, "step": 424100 }, { "epoch": 0.8567290327533058, "grad_norm": 229.63490295410156, "learning_rate": 6.854898956319644e-07, "loss": 25.8869, "step": 424110 }, { "epoch": 0.8567492333859896, "grad_norm": 290.39178466796875, "learning_rate": 6.853134983004517e-07, "loss": 8.3607, "step": 424120 }, { "epoch": 0.8567694340186734, "grad_norm": 339.2444152832031, "learning_rate": 6.851371219981989e-07, "loss": 16.9643, "step": 424130 }, { "epoch": 0.8567896346513573, "grad_norm": 270.17657470703125, "learning_rate": 6.849607667260643e-07, "loss": 26.7416, "step": 424140 }, { "epoch": 0.8568098352840411, "grad_norm": 309.4647216796875, "learning_rate": 6.847844324849062e-07, "loss": 36.7673, "step": 424150 }, { "epoch": 0.8568300359167249, "grad_norm": 27.725566864013672, "learning_rate": 6.846081192755871e-07, "loss": 14.3943, "step": 424160 }, { "epoch": 0.8568502365494087, "grad_norm": 513.9178466796875, "learning_rate": 6.844318270989631e-07, "loss": 17.2254, "step": 424170 }, { "epoch": 0.8568704371820925, "grad_norm": 280.4992980957031, "learning_rate": 6.842555559558961e-07, "loss": 11.4802, "step": 424180 }, { "epoch": 0.8568906378147764, "grad_norm": 255.67034912109375, "learning_rate": 6.840793058472434e-07, "loss": 12.5791, "step": 424190 }, { "epoch": 0.8569108384474602, "grad_norm": 0.0, "learning_rate": 6.839030767738653e-07, "loss": 13.0745, "step": 424200 }, { "epoch": 0.856931039080144, "grad_norm": 570.7583618164062, "learning_rate": 6.837268687366199e-07, "loss": 24.4529, "step": 424210 }, { "epoch": 0.8569512397128278, "grad_norm": 310.4892883300781, "learning_rate": 6.835506817363657e-07, "loss": 13.3413, "step": 424220 }, { "epoch": 0.8569714403455116, "grad_norm": 21.324195861816406, "learning_rate": 6.83374515773963e-07, "loss": 23.01, "step": 424230 }, { "epoch": 0.8569916409781955, "grad_norm": 235.50726318359375, "learning_rate": 6.831983708502693e-07, "loss": 12.8942, "step": 424240 }, { "epoch": 0.8570118416108793, "grad_norm": 315.3659973144531, "learning_rate": 6.830222469661419e-07, "loss": 12.0015, "step": 424250 }, { "epoch": 0.8570320422435631, "grad_norm": 279.39208984375, "learning_rate": 6.828461441224405e-07, "loss": 14.5823, "step": 424260 }, { "epoch": 0.8570522428762469, "grad_norm": 141.75877380371094, "learning_rate": 6.826700623200255e-07, "loss": 13.2339, "step": 424270 }, { "epoch": 0.8570724435089307, "grad_norm": 268.73138427734375, "learning_rate": 6.824940015597514e-07, "loss": 15.8064, "step": 424280 }, { "epoch": 0.8570926441416146, "grad_norm": 66.41516876220703, "learning_rate": 6.823179618424774e-07, "loss": 7.1832, "step": 424290 }, { "epoch": 0.8571128447742983, "grad_norm": 131.48736572265625, "learning_rate": 6.821419431690629e-07, "loss": 12.0447, "step": 424300 }, { "epoch": 0.8571330454069821, "grad_norm": 110.49434661865234, "learning_rate": 6.819659455403654e-07, "loss": 12.5332, "step": 424310 }, { "epoch": 0.8571532460396659, "grad_norm": 460.3204345703125, "learning_rate": 6.817899689572405e-07, "loss": 17.4025, "step": 424320 }, { "epoch": 0.8571734466723497, "grad_norm": 298.1443176269531, "learning_rate": 6.816140134205479e-07, "loss": 15.143, "step": 424330 }, { "epoch": 0.8571936473050336, "grad_norm": 313.5130920410156, "learning_rate": 6.81438078931147e-07, "loss": 13.8155, "step": 424340 }, { "epoch": 0.8572138479377174, "grad_norm": 8.115134239196777, "learning_rate": 6.81262165489891e-07, "loss": 13.0447, "step": 424350 }, { "epoch": 0.8572340485704012, "grad_norm": 13.400871276855469, "learning_rate": 6.810862730976392e-07, "loss": 12.5523, "step": 424360 }, { "epoch": 0.857254249203085, "grad_norm": 300.0811462402344, "learning_rate": 6.809104017552503e-07, "loss": 10.1747, "step": 424370 }, { "epoch": 0.8572744498357688, "grad_norm": 104.60641479492188, "learning_rate": 6.807345514635805e-07, "loss": 16.5223, "step": 424380 }, { "epoch": 0.8572946504684527, "grad_norm": 284.20074462890625, "learning_rate": 6.80558722223485e-07, "loss": 13.5617, "step": 424390 }, { "epoch": 0.8573148511011365, "grad_norm": 176.078857421875, "learning_rate": 6.803829140358237e-07, "loss": 16.442, "step": 424400 }, { "epoch": 0.8573350517338203, "grad_norm": 495.5717468261719, "learning_rate": 6.802071269014527e-07, "loss": 20.9467, "step": 424410 }, { "epoch": 0.8573552523665041, "grad_norm": 232.489501953125, "learning_rate": 6.800313608212261e-07, "loss": 17.0568, "step": 424420 }, { "epoch": 0.8573754529991879, "grad_norm": 238.38931274414062, "learning_rate": 6.798556157960046e-07, "loss": 11.8175, "step": 424430 }, { "epoch": 0.8573956536318718, "grad_norm": 196.57803344726562, "learning_rate": 6.796798918266417e-07, "loss": 20.7263, "step": 424440 }, { "epoch": 0.8574158542645556, "grad_norm": 15.518020629882812, "learning_rate": 6.795041889139958e-07, "loss": 15.2344, "step": 424450 }, { "epoch": 0.8574360548972394, "grad_norm": 256.23760986328125, "learning_rate": 6.793285070589229e-07, "loss": 17.4303, "step": 424460 }, { "epoch": 0.8574562555299232, "grad_norm": 298.38067626953125, "learning_rate": 6.79152846262277e-07, "loss": 27.6497, "step": 424470 }, { "epoch": 0.857476456162607, "grad_norm": 383.74127197265625, "learning_rate": 6.789772065249178e-07, "loss": 12.2496, "step": 424480 }, { "epoch": 0.8574966567952909, "grad_norm": 257.5712585449219, "learning_rate": 6.788015878476983e-07, "loss": 13.5638, "step": 424490 }, { "epoch": 0.8575168574279747, "grad_norm": 113.62663269042969, "learning_rate": 6.786259902314768e-07, "loss": 11.9673, "step": 424500 }, { "epoch": 0.8575370580606585, "grad_norm": 265.4411315917969, "learning_rate": 6.784504136771075e-07, "loss": 25.6099, "step": 424510 }, { "epoch": 0.8575572586933423, "grad_norm": 281.8363952636719, "learning_rate": 6.782748581854471e-07, "loss": 14.7695, "step": 424520 }, { "epoch": 0.8575774593260261, "grad_norm": 15.098465919494629, "learning_rate": 6.780993237573513e-07, "loss": 9.7567, "step": 424530 }, { "epoch": 0.85759765995871, "grad_norm": 284.7987365722656, "learning_rate": 6.779238103936742e-07, "loss": 15.353, "step": 424540 }, { "epoch": 0.8576178605913938, "grad_norm": 154.20411682128906, "learning_rate": 6.777483180952732e-07, "loss": 14.5259, "step": 424550 }, { "epoch": 0.8576380612240775, "grad_norm": 78.2432632446289, "learning_rate": 6.775728468630027e-07, "loss": 20.6444, "step": 424560 }, { "epoch": 0.8576582618567613, "grad_norm": 298.0225830078125, "learning_rate": 6.773973966977165e-07, "loss": 15.1713, "step": 424570 }, { "epoch": 0.8576784624894451, "grad_norm": 301.41937255859375, "learning_rate": 6.772219676002717e-07, "loss": 17.4354, "step": 424580 }, { "epoch": 0.857698663122129, "grad_norm": 208.0994110107422, "learning_rate": 6.770465595715231e-07, "loss": 14.8364, "step": 424590 }, { "epoch": 0.8577188637548128, "grad_norm": 186.12237548828125, "learning_rate": 6.768711726123261e-07, "loss": 10.0088, "step": 424600 }, { "epoch": 0.8577390643874966, "grad_norm": 385.00579833984375, "learning_rate": 6.76695806723533e-07, "loss": 14.7199, "step": 424610 }, { "epoch": 0.8577592650201804, "grad_norm": 253.53329467773438, "learning_rate": 6.765204619060012e-07, "loss": 19.1844, "step": 424620 }, { "epoch": 0.8577794656528642, "grad_norm": 269.28094482421875, "learning_rate": 6.763451381605846e-07, "loss": 15.1143, "step": 424630 }, { "epoch": 0.857799666285548, "grad_norm": 25.068782806396484, "learning_rate": 6.761698354881363e-07, "loss": 27.9622, "step": 424640 }, { "epoch": 0.8578198669182319, "grad_norm": 477.1641845703125, "learning_rate": 6.759945538895119e-07, "loss": 22.6392, "step": 424650 }, { "epoch": 0.8578400675509157, "grad_norm": 243.30755615234375, "learning_rate": 6.758192933655667e-07, "loss": 23.6183, "step": 424660 }, { "epoch": 0.8578602681835995, "grad_norm": 260.8081970214844, "learning_rate": 6.756440539171533e-07, "loss": 17.6033, "step": 424670 }, { "epoch": 0.8578804688162833, "grad_norm": 212.6053009033203, "learning_rate": 6.754688355451256e-07, "loss": 16.9901, "step": 424680 }, { "epoch": 0.8579006694489671, "grad_norm": 306.0473327636719, "learning_rate": 6.752936382503394e-07, "loss": 14.9527, "step": 424690 }, { "epoch": 0.857920870081651, "grad_norm": 279.94818115234375, "learning_rate": 6.751184620336471e-07, "loss": 19.3849, "step": 424700 }, { "epoch": 0.8579410707143348, "grad_norm": 225.95159912109375, "learning_rate": 6.749433068959022e-07, "loss": 8.6361, "step": 424710 }, { "epoch": 0.8579612713470186, "grad_norm": 215.16275024414062, "learning_rate": 6.747681728379601e-07, "loss": 18.9797, "step": 424720 }, { "epoch": 0.8579814719797024, "grad_norm": 318.85797119140625, "learning_rate": 6.745930598606721e-07, "loss": 18.8878, "step": 424730 }, { "epoch": 0.8580016726123862, "grad_norm": 293.9589538574219, "learning_rate": 6.744179679648943e-07, "loss": 14.6065, "step": 424740 }, { "epoch": 0.8580218732450701, "grad_norm": 232.8202667236328, "learning_rate": 6.742428971514786e-07, "loss": 13.9461, "step": 424750 }, { "epoch": 0.8580420738777539, "grad_norm": 308.7608947753906, "learning_rate": 6.74067847421277e-07, "loss": 31.7157, "step": 424760 }, { "epoch": 0.8580622745104377, "grad_norm": 279.5561218261719, "learning_rate": 6.738928187751454e-07, "loss": 17.1414, "step": 424770 }, { "epoch": 0.8580824751431215, "grad_norm": 471.01397705078125, "learning_rate": 6.737178112139342e-07, "loss": 17.0854, "step": 424780 }, { "epoch": 0.8581026757758053, "grad_norm": 155.662841796875, "learning_rate": 6.735428247384989e-07, "loss": 26.9572, "step": 424790 }, { "epoch": 0.8581228764084892, "grad_norm": 217.6192169189453, "learning_rate": 6.733678593496901e-07, "loss": 14.6221, "step": 424800 }, { "epoch": 0.8581430770411729, "grad_norm": 259.729248046875, "learning_rate": 6.731929150483624e-07, "loss": 13.059, "step": 424810 }, { "epoch": 0.8581632776738567, "grad_norm": 312.6268310546875, "learning_rate": 6.73017991835368e-07, "loss": 12.951, "step": 424820 }, { "epoch": 0.8581834783065405, "grad_norm": 282.1890563964844, "learning_rate": 6.728430897115578e-07, "loss": 19.1124, "step": 424830 }, { "epoch": 0.8582036789392243, "grad_norm": 273.0890808105469, "learning_rate": 6.726682086777869e-07, "loss": 14.1467, "step": 424840 }, { "epoch": 0.8582238795719082, "grad_norm": 122.47418212890625, "learning_rate": 6.724933487349061e-07, "loss": 15.8845, "step": 424850 }, { "epoch": 0.858244080204592, "grad_norm": 112.8675765991211, "learning_rate": 6.723185098837665e-07, "loss": 8.5903, "step": 424860 }, { "epoch": 0.8582642808372758, "grad_norm": 146.3318634033203, "learning_rate": 6.721436921252223e-07, "loss": 26.0564, "step": 424870 }, { "epoch": 0.8582844814699596, "grad_norm": 86.2370834350586, "learning_rate": 6.719688954601266e-07, "loss": 15.4216, "step": 424880 }, { "epoch": 0.8583046821026434, "grad_norm": 254.19744873046875, "learning_rate": 6.717941198893274e-07, "loss": 14.0053, "step": 424890 }, { "epoch": 0.8583248827353273, "grad_norm": 297.9875183105469, "learning_rate": 6.716193654136788e-07, "loss": 25.1894, "step": 424900 }, { "epoch": 0.8583450833680111, "grad_norm": 64.05819702148438, "learning_rate": 6.714446320340334e-07, "loss": 11.539, "step": 424910 }, { "epoch": 0.8583652840006949, "grad_norm": 592.8305053710938, "learning_rate": 6.712699197512418e-07, "loss": 10.5414, "step": 424920 }, { "epoch": 0.8583854846333787, "grad_norm": 5.57110595703125, "learning_rate": 6.710952285661549e-07, "loss": 22.1617, "step": 424930 }, { "epoch": 0.8584056852660625, "grad_norm": 379.78790283203125, "learning_rate": 6.709205584796241e-07, "loss": 14.0794, "step": 424940 }, { "epoch": 0.8584258858987464, "grad_norm": 131.6482696533203, "learning_rate": 6.707459094925045e-07, "loss": 10.1108, "step": 424950 }, { "epoch": 0.8584460865314302, "grad_norm": 216.09938049316406, "learning_rate": 6.705712816056415e-07, "loss": 15.0898, "step": 424960 }, { "epoch": 0.858466287164114, "grad_norm": 278.9071350097656, "learning_rate": 6.703966748198892e-07, "loss": 12.0141, "step": 424970 }, { "epoch": 0.8584864877967978, "grad_norm": 110.3006820678711, "learning_rate": 6.702220891360994e-07, "loss": 9.9564, "step": 424980 }, { "epoch": 0.8585066884294816, "grad_norm": 48.94906997680664, "learning_rate": 6.700475245551218e-07, "loss": 9.6111, "step": 424990 }, { "epoch": 0.8585268890621655, "grad_norm": 309.4346618652344, "learning_rate": 6.698729810778065e-07, "loss": 20.069, "step": 425000 }, { "epoch": 0.8585470896948493, "grad_norm": 392.30596923828125, "learning_rate": 6.696984587050065e-07, "loss": 11.179, "step": 425010 }, { "epoch": 0.8585672903275331, "grad_norm": 245.6202850341797, "learning_rate": 6.695239574375706e-07, "loss": 8.6832, "step": 425020 }, { "epoch": 0.8585874909602169, "grad_norm": 107.75462341308594, "learning_rate": 6.693494772763487e-07, "loss": 12.9651, "step": 425030 }, { "epoch": 0.8586076915929007, "grad_norm": 174.42953491210938, "learning_rate": 6.691750182221935e-07, "loss": 22.0103, "step": 425040 }, { "epoch": 0.8586278922255846, "grad_norm": 257.4122009277344, "learning_rate": 6.69000580275953e-07, "loss": 13.8525, "step": 425050 }, { "epoch": 0.8586480928582684, "grad_norm": 3.2712879180908203, "learning_rate": 6.688261634384791e-07, "loss": 18.0985, "step": 425060 }, { "epoch": 0.8586682934909521, "grad_norm": 55.044979095458984, "learning_rate": 6.686517677106214e-07, "loss": 13.2182, "step": 425070 }, { "epoch": 0.8586884941236359, "grad_norm": 83.34304809570312, "learning_rate": 6.684773930932281e-07, "loss": 15.1368, "step": 425080 }, { "epoch": 0.8587086947563197, "grad_norm": 51.55891418457031, "learning_rate": 6.683030395871526e-07, "loss": 32.3659, "step": 425090 }, { "epoch": 0.8587288953890035, "grad_norm": 140.5757293701172, "learning_rate": 6.681287071932408e-07, "loss": 22.7444, "step": 425100 }, { "epoch": 0.8587490960216874, "grad_norm": 348.9292907714844, "learning_rate": 6.679543959123458e-07, "loss": 19.0408, "step": 425110 }, { "epoch": 0.8587692966543712, "grad_norm": 264.0926818847656, "learning_rate": 6.677801057453143e-07, "loss": 17.0035, "step": 425120 }, { "epoch": 0.858789497287055, "grad_norm": 312.43890380859375, "learning_rate": 6.676058366929988e-07, "loss": 12.3295, "step": 425130 }, { "epoch": 0.8588096979197388, "grad_norm": 260.50286865234375, "learning_rate": 6.674315887562466e-07, "loss": 16.7676, "step": 425140 }, { "epoch": 0.8588298985524226, "grad_norm": 246.723388671875, "learning_rate": 6.672573619359063e-07, "loss": 20.2221, "step": 425150 }, { "epoch": 0.8588500991851065, "grad_norm": 150.3035430908203, "learning_rate": 6.67083156232829e-07, "loss": 14.6619, "step": 425160 }, { "epoch": 0.8588702998177903, "grad_norm": 295.06048583984375, "learning_rate": 6.669089716478627e-07, "loss": 24.0341, "step": 425170 }, { "epoch": 0.8588905004504741, "grad_norm": 138.03895568847656, "learning_rate": 6.667348081818559e-07, "loss": 10.1707, "step": 425180 }, { "epoch": 0.8589107010831579, "grad_norm": 520.5072631835938, "learning_rate": 6.665606658356583e-07, "loss": 17.2871, "step": 425190 }, { "epoch": 0.8589309017158417, "grad_norm": 30.284364700317383, "learning_rate": 6.663865446101192e-07, "loss": 11.9157, "step": 425200 }, { "epoch": 0.8589511023485256, "grad_norm": 169.39254760742188, "learning_rate": 6.662124445060863e-07, "loss": 15.764, "step": 425210 }, { "epoch": 0.8589713029812094, "grad_norm": 1701.76171875, "learning_rate": 6.660383655244074e-07, "loss": 11.6654, "step": 425220 }, { "epoch": 0.8589915036138932, "grad_norm": 62.773502349853516, "learning_rate": 6.658643076659327e-07, "loss": 18.329, "step": 425230 }, { "epoch": 0.859011704246577, "grad_norm": 424.9864807128906, "learning_rate": 6.6569027093151e-07, "loss": 22.7725, "step": 425240 }, { "epoch": 0.8590319048792608, "grad_norm": 532.7898559570312, "learning_rate": 6.655162553219862e-07, "loss": 31.5603, "step": 425250 }, { "epoch": 0.8590521055119447, "grad_norm": 411.4581298828125, "learning_rate": 6.653422608382105e-07, "loss": 28.9217, "step": 425260 }, { "epoch": 0.8590723061446285, "grad_norm": 137.09744262695312, "learning_rate": 6.651682874810317e-07, "loss": 10.206, "step": 425270 }, { "epoch": 0.8590925067773123, "grad_norm": 362.9930419921875, "learning_rate": 6.649943352512972e-07, "loss": 19.2364, "step": 425280 }, { "epoch": 0.8591127074099961, "grad_norm": 313.9099426269531, "learning_rate": 6.648204041498534e-07, "loss": 19.1666, "step": 425290 }, { "epoch": 0.8591329080426799, "grad_norm": 218.5279083251953, "learning_rate": 6.646464941775499e-07, "loss": 11.514, "step": 425300 }, { "epoch": 0.8591531086753638, "grad_norm": 265.6346435546875, "learning_rate": 6.64472605335234e-07, "loss": 14.4691, "step": 425310 }, { "epoch": 0.8591733093080475, "grad_norm": 84.3724365234375, "learning_rate": 6.642987376237514e-07, "loss": 30.2023, "step": 425320 }, { "epoch": 0.8591935099407313, "grad_norm": 241.3916473388672, "learning_rate": 6.641248910439518e-07, "loss": 12.6862, "step": 425330 }, { "epoch": 0.8592137105734151, "grad_norm": 330.7503662109375, "learning_rate": 6.639510655966813e-07, "loss": 14.9626, "step": 425340 }, { "epoch": 0.8592339112060989, "grad_norm": 192.02622985839844, "learning_rate": 6.637772612827881e-07, "loss": 12.9559, "step": 425350 }, { "epoch": 0.8592541118387828, "grad_norm": 24.877470016479492, "learning_rate": 6.636034781031181e-07, "loss": 13.4675, "step": 425360 }, { "epoch": 0.8592743124714666, "grad_norm": 316.19451904296875, "learning_rate": 6.634297160585184e-07, "loss": 17.2891, "step": 425370 }, { "epoch": 0.8592945131041504, "grad_norm": 216.85694885253906, "learning_rate": 6.632559751498369e-07, "loss": 15.8184, "step": 425380 }, { "epoch": 0.8593147137368342, "grad_norm": 355.86871337890625, "learning_rate": 6.630822553779193e-07, "loss": 27.9583, "step": 425390 }, { "epoch": 0.859334914369518, "grad_norm": 207.43653869628906, "learning_rate": 6.629085567436133e-07, "loss": 26.3952, "step": 425400 }, { "epoch": 0.8593551150022019, "grad_norm": 277.5365295410156, "learning_rate": 6.627348792477639e-07, "loss": 19.8084, "step": 425410 }, { "epoch": 0.8593753156348857, "grad_norm": 15.553412437438965, "learning_rate": 6.625612228912199e-07, "loss": 29.9787, "step": 425420 }, { "epoch": 0.8593955162675695, "grad_norm": 202.20901489257812, "learning_rate": 6.623875876748265e-07, "loss": 19.5246, "step": 425430 }, { "epoch": 0.8594157169002533, "grad_norm": 419.8519592285156, "learning_rate": 6.622139735994288e-07, "loss": 17.6903, "step": 425440 }, { "epoch": 0.8594359175329371, "grad_norm": 301.2924499511719, "learning_rate": 6.620403806658754e-07, "loss": 9.176, "step": 425450 }, { "epoch": 0.859456118165621, "grad_norm": 241.92251586914062, "learning_rate": 6.618668088750107e-07, "loss": 21.5056, "step": 425460 }, { "epoch": 0.8594763187983048, "grad_norm": 151.03933715820312, "learning_rate": 6.616932582276798e-07, "loss": 12.2095, "step": 425470 }, { "epoch": 0.8594965194309886, "grad_norm": 128.7923583984375, "learning_rate": 6.615197287247299e-07, "loss": 13.9771, "step": 425480 }, { "epoch": 0.8595167200636724, "grad_norm": 849.5505981445312, "learning_rate": 6.61346220367009e-07, "loss": 9.1107, "step": 425490 }, { "epoch": 0.8595369206963562, "grad_norm": 566.014892578125, "learning_rate": 6.611727331553585e-07, "loss": 22.3019, "step": 425500 }, { "epoch": 0.85955712132904, "grad_norm": 401.216796875, "learning_rate": 6.609992670906251e-07, "loss": 13.9003, "step": 425510 }, { "epoch": 0.8595773219617239, "grad_norm": 368.768798828125, "learning_rate": 6.608258221736568e-07, "loss": 16.6687, "step": 425520 }, { "epoch": 0.8595975225944077, "grad_norm": 299.59320068359375, "learning_rate": 6.60652398405297e-07, "loss": 26.3777, "step": 425530 }, { "epoch": 0.8596177232270915, "grad_norm": 263.8229675292969, "learning_rate": 6.604789957863899e-07, "loss": 14.5886, "step": 425540 }, { "epoch": 0.8596379238597753, "grad_norm": 432.37298583984375, "learning_rate": 6.603056143177817e-07, "loss": 15.311, "step": 425550 }, { "epoch": 0.8596581244924592, "grad_norm": 0.0, "learning_rate": 6.601322540003202e-07, "loss": 26.5307, "step": 425560 }, { "epoch": 0.859678325125143, "grad_norm": 212.65748596191406, "learning_rate": 6.599589148348451e-07, "loss": 18.4903, "step": 425570 }, { "epoch": 0.8596985257578267, "grad_norm": 483.0402526855469, "learning_rate": 6.597855968222038e-07, "loss": 13.8314, "step": 425580 }, { "epoch": 0.8597187263905105, "grad_norm": 102.29916381835938, "learning_rate": 6.596122999632426e-07, "loss": 21.5249, "step": 425590 }, { "epoch": 0.8597389270231943, "grad_norm": 93.99577331542969, "learning_rate": 6.594390242588044e-07, "loss": 12.53, "step": 425600 }, { "epoch": 0.8597591276558781, "grad_norm": 3.3775837421417236, "learning_rate": 6.592657697097333e-07, "loss": 20.3366, "step": 425610 }, { "epoch": 0.859779328288562, "grad_norm": 512.7830200195312, "learning_rate": 6.590925363168749e-07, "loss": 23.7357, "step": 425620 }, { "epoch": 0.8597995289212458, "grad_norm": 384.3551025390625, "learning_rate": 6.589193240810732e-07, "loss": 30.2439, "step": 425630 }, { "epoch": 0.8598197295539296, "grad_norm": 1.1212437152862549, "learning_rate": 6.587461330031714e-07, "loss": 15.3178, "step": 425640 }, { "epoch": 0.8598399301866134, "grad_norm": 226.32542419433594, "learning_rate": 6.585729630840149e-07, "loss": 17.0284, "step": 425650 }, { "epoch": 0.8598601308192972, "grad_norm": 174.52976989746094, "learning_rate": 6.583998143244463e-07, "loss": 13.3965, "step": 425660 }, { "epoch": 0.8598803314519811, "grad_norm": 115.52770233154297, "learning_rate": 6.582266867253118e-07, "loss": 8.3107, "step": 425670 }, { "epoch": 0.8599005320846649, "grad_norm": 177.9041290283203, "learning_rate": 6.580535802874538e-07, "loss": 15.6579, "step": 425680 }, { "epoch": 0.8599207327173487, "grad_norm": 277.89324951171875, "learning_rate": 6.578804950117146e-07, "loss": 14.2583, "step": 425690 }, { "epoch": 0.8599409333500325, "grad_norm": 272.5760192871094, "learning_rate": 6.577074308989406e-07, "loss": 20.3047, "step": 425700 }, { "epoch": 0.8599611339827163, "grad_norm": 21.752309799194336, "learning_rate": 6.575343879499729e-07, "loss": 16.8871, "step": 425710 }, { "epoch": 0.8599813346154002, "grad_norm": 290.4754333496094, "learning_rate": 6.57361366165657e-07, "loss": 18.9391, "step": 425720 }, { "epoch": 0.860001535248084, "grad_norm": 284.32489013671875, "learning_rate": 6.571883655468336e-07, "loss": 22.1051, "step": 425730 }, { "epoch": 0.8600217358807678, "grad_norm": 203.83856201171875, "learning_rate": 6.57015386094349e-07, "loss": 18.6123, "step": 425740 }, { "epoch": 0.8600419365134516, "grad_norm": 173.1763153076172, "learning_rate": 6.568424278090446e-07, "loss": 17.9551, "step": 425750 }, { "epoch": 0.8600621371461354, "grad_norm": 356.8979797363281, "learning_rate": 6.56669490691762e-07, "loss": 17.9615, "step": 425760 }, { "epoch": 0.8600823377788193, "grad_norm": 468.1793212890625, "learning_rate": 6.564965747433472e-07, "loss": 28.7568, "step": 425770 }, { "epoch": 0.8601025384115031, "grad_norm": 1004.8388061523438, "learning_rate": 6.563236799646405e-07, "loss": 16.6156, "step": 425780 }, { "epoch": 0.8601227390441869, "grad_norm": 442.2641296386719, "learning_rate": 6.561508063564847e-07, "loss": 14.7327, "step": 425790 }, { "epoch": 0.8601429396768707, "grad_norm": 308.0071716308594, "learning_rate": 6.559779539197231e-07, "loss": 28.2071, "step": 425800 }, { "epoch": 0.8601631403095545, "grad_norm": 878.6277465820312, "learning_rate": 6.558051226551992e-07, "loss": 19.1349, "step": 425810 }, { "epoch": 0.8601833409422384, "grad_norm": 252.41970825195312, "learning_rate": 6.556323125637542e-07, "loss": 11.9119, "step": 425820 }, { "epoch": 0.8602035415749222, "grad_norm": 3.001262903213501, "learning_rate": 6.554595236462291e-07, "loss": 16.0101, "step": 425830 }, { "epoch": 0.8602237422076059, "grad_norm": 280.01397705078125, "learning_rate": 6.552867559034687e-07, "loss": 37.686, "step": 425840 }, { "epoch": 0.8602439428402897, "grad_norm": 202.0115203857422, "learning_rate": 6.551140093363135e-07, "loss": 17.1321, "step": 425850 }, { "epoch": 0.8602641434729735, "grad_norm": 294.38616943359375, "learning_rate": 6.549412839456048e-07, "loss": 14.5224, "step": 425860 }, { "epoch": 0.8602843441056574, "grad_norm": 407.4664611816406, "learning_rate": 6.547685797321851e-07, "loss": 20.1108, "step": 425870 }, { "epoch": 0.8603045447383412, "grad_norm": 20.88553810119629, "learning_rate": 6.545958966968974e-07, "loss": 9.0207, "step": 425880 }, { "epoch": 0.860324745371025, "grad_norm": 390.5597229003906, "learning_rate": 6.544232348405821e-07, "loss": 18.4068, "step": 425890 }, { "epoch": 0.8603449460037088, "grad_norm": 128.73533630371094, "learning_rate": 6.542505941640803e-07, "loss": 26.7374, "step": 425900 }, { "epoch": 0.8603651466363926, "grad_norm": 414.9536437988281, "learning_rate": 6.540779746682346e-07, "loss": 28.0502, "step": 425910 }, { "epoch": 0.8603853472690765, "grad_norm": 280.2080993652344, "learning_rate": 6.53905376353886e-07, "loss": 18.9999, "step": 425920 }, { "epoch": 0.8604055479017603, "grad_norm": 367.2652893066406, "learning_rate": 6.537327992218745e-07, "loss": 15.2771, "step": 425930 }, { "epoch": 0.8604257485344441, "grad_norm": 66.76766967773438, "learning_rate": 6.535602432730432e-07, "loss": 14.1451, "step": 425940 }, { "epoch": 0.8604459491671279, "grad_norm": 112.82831573486328, "learning_rate": 6.533877085082307e-07, "loss": 12.8695, "step": 425950 }, { "epoch": 0.8604661497998117, "grad_norm": 205.36558532714844, "learning_rate": 6.532151949282811e-07, "loss": 16.2951, "step": 425960 }, { "epoch": 0.8604863504324956, "grad_norm": 121.46744537353516, "learning_rate": 6.53042702534033e-07, "loss": 11.2585, "step": 425970 }, { "epoch": 0.8605065510651794, "grad_norm": 277.3069763183594, "learning_rate": 6.528702313263264e-07, "loss": 17.367, "step": 425980 }, { "epoch": 0.8605267516978632, "grad_norm": 322.7912292480469, "learning_rate": 6.526977813060042e-07, "loss": 13.0788, "step": 425990 }, { "epoch": 0.860546952330547, "grad_norm": 188.09046936035156, "learning_rate": 6.52525352473905e-07, "loss": 12.747, "step": 426000 }, { "epoch": 0.8605671529632308, "grad_norm": 577.46630859375, "learning_rate": 6.523529448308708e-07, "loss": 18.9068, "step": 426010 }, { "epoch": 0.8605873535959147, "grad_norm": 419.2146911621094, "learning_rate": 6.521805583777396e-07, "loss": 16.0069, "step": 426020 }, { "epoch": 0.8606075542285985, "grad_norm": 259.5483703613281, "learning_rate": 6.520081931153544e-07, "loss": 17.9167, "step": 426030 }, { "epoch": 0.8606277548612823, "grad_norm": 564.1424560546875, "learning_rate": 6.518358490445542e-07, "loss": 20.2891, "step": 426040 }, { "epoch": 0.8606479554939661, "grad_norm": 196.95143127441406, "learning_rate": 6.516635261661775e-07, "loss": 16.5217, "step": 426050 }, { "epoch": 0.8606681561266499, "grad_norm": 206.94544982910156, "learning_rate": 6.514912244810662e-07, "loss": 9.7611, "step": 426060 }, { "epoch": 0.8606883567593338, "grad_norm": 224.210205078125, "learning_rate": 6.513189439900591e-07, "loss": 16.0879, "step": 426070 }, { "epoch": 0.8607085573920176, "grad_norm": 307.1372985839844, "learning_rate": 6.511466846939956e-07, "loss": 17.7233, "step": 426080 }, { "epoch": 0.8607287580247013, "grad_norm": 32.34778594970703, "learning_rate": 6.509744465937151e-07, "loss": 17.4611, "step": 426090 }, { "epoch": 0.8607489586573851, "grad_norm": 294.2017517089844, "learning_rate": 6.508022296900601e-07, "loss": 23.336, "step": 426100 }, { "epoch": 0.8607691592900689, "grad_norm": 32.162803649902344, "learning_rate": 6.506300339838656e-07, "loss": 6.6925, "step": 426110 }, { "epoch": 0.8607893599227527, "grad_norm": 122.28897857666016, "learning_rate": 6.504578594759725e-07, "loss": 13.2891, "step": 426120 }, { "epoch": 0.8608095605554366, "grad_norm": 172.11935424804688, "learning_rate": 6.502857061672213e-07, "loss": 24.6641, "step": 426130 }, { "epoch": 0.8608297611881204, "grad_norm": 99.5322036743164, "learning_rate": 6.501135740584502e-07, "loss": 18.3852, "step": 426140 }, { "epoch": 0.8608499618208042, "grad_norm": 181.11062622070312, "learning_rate": 6.499414631504969e-07, "loss": 26.8739, "step": 426150 }, { "epoch": 0.860870162453488, "grad_norm": 196.2686004638672, "learning_rate": 6.497693734442007e-07, "loss": 16.5601, "step": 426160 }, { "epoch": 0.8608903630861718, "grad_norm": 454.75830078125, "learning_rate": 6.495973049404037e-07, "loss": 14.189, "step": 426170 }, { "epoch": 0.8609105637188557, "grad_norm": 85.71923065185547, "learning_rate": 6.494252576399395e-07, "loss": 15.6298, "step": 426180 }, { "epoch": 0.8609307643515395, "grad_norm": 129.8979034423828, "learning_rate": 6.49253231543649e-07, "loss": 20.9725, "step": 426190 }, { "epoch": 0.8609509649842233, "grad_norm": 42.13568878173828, "learning_rate": 6.490812266523716e-07, "loss": 15.4088, "step": 426200 }, { "epoch": 0.8609711656169071, "grad_norm": 147.41806030273438, "learning_rate": 6.489092429669447e-07, "loss": 12.642, "step": 426210 }, { "epoch": 0.8609913662495909, "grad_norm": 535.2359619140625, "learning_rate": 6.487372804882053e-07, "loss": 17.0153, "step": 426220 }, { "epoch": 0.8610115668822748, "grad_norm": 418.01416015625, "learning_rate": 6.485653392169938e-07, "loss": 22.6266, "step": 426230 }, { "epoch": 0.8610317675149586, "grad_norm": 167.09933471679688, "learning_rate": 6.483934191541469e-07, "loss": 14.8782, "step": 426240 }, { "epoch": 0.8610519681476424, "grad_norm": 227.04527282714844, "learning_rate": 6.482215203005016e-07, "loss": 17.4681, "step": 426250 }, { "epoch": 0.8610721687803262, "grad_norm": 281.4587707519531, "learning_rate": 6.480496426568983e-07, "loss": 34.5975, "step": 426260 }, { "epoch": 0.86109236941301, "grad_norm": 180.57151794433594, "learning_rate": 6.478777862241714e-07, "loss": 12.0356, "step": 426270 }, { "epoch": 0.8611125700456939, "grad_norm": 51.872047424316406, "learning_rate": 6.477059510031619e-07, "loss": 18.1245, "step": 426280 }, { "epoch": 0.8611327706783777, "grad_norm": 286.2590637207031, "learning_rate": 6.475341369947047e-07, "loss": 17.6567, "step": 426290 }, { "epoch": 0.8611529713110615, "grad_norm": 283.1455078125, "learning_rate": 6.47362344199639e-07, "loss": 10.539, "step": 426300 }, { "epoch": 0.8611731719437453, "grad_norm": 261.40655517578125, "learning_rate": 6.471905726188015e-07, "loss": 16.6252, "step": 426310 }, { "epoch": 0.8611933725764291, "grad_norm": 343.0833740234375, "learning_rate": 6.470188222530282e-07, "loss": 16.7142, "step": 426320 }, { "epoch": 0.861213573209113, "grad_norm": 400.0595703125, "learning_rate": 6.468470931031584e-07, "loss": 27.5833, "step": 426330 }, { "epoch": 0.8612337738417968, "grad_norm": 279.7911071777344, "learning_rate": 6.466753851700264e-07, "loss": 15.3073, "step": 426340 }, { "epoch": 0.8612539744744805, "grad_norm": 348.87896728515625, "learning_rate": 6.465036984544721e-07, "loss": 27.4831, "step": 426350 }, { "epoch": 0.8612741751071643, "grad_norm": 185.6573486328125, "learning_rate": 6.463320329573303e-07, "loss": 10.596, "step": 426360 }, { "epoch": 0.8612943757398481, "grad_norm": 179.58221435546875, "learning_rate": 6.46160388679437e-07, "loss": 13.5588, "step": 426370 }, { "epoch": 0.861314576372532, "grad_norm": 174.3289031982422, "learning_rate": 6.459887656216318e-07, "loss": 19.3493, "step": 426380 }, { "epoch": 0.8613347770052158, "grad_norm": 380.564208984375, "learning_rate": 6.458171637847488e-07, "loss": 12.3408, "step": 426390 }, { "epoch": 0.8613549776378996, "grad_norm": 140.37326049804688, "learning_rate": 6.456455831696234e-07, "loss": 15.1851, "step": 426400 }, { "epoch": 0.8613751782705834, "grad_norm": 23.614601135253906, "learning_rate": 6.454740237770934e-07, "loss": 20.7154, "step": 426410 }, { "epoch": 0.8613953789032672, "grad_norm": 117.17767333984375, "learning_rate": 6.453024856079976e-07, "loss": 10.1878, "step": 426420 }, { "epoch": 0.861415579535951, "grad_norm": 66.0915756225586, "learning_rate": 6.451309686631668e-07, "loss": 14.5932, "step": 426430 }, { "epoch": 0.8614357801686349, "grad_norm": 115.52134704589844, "learning_rate": 6.449594729434394e-07, "loss": 23.6677, "step": 426440 }, { "epoch": 0.8614559808013187, "grad_norm": 200.53895568847656, "learning_rate": 6.447879984496525e-07, "loss": 22.4611, "step": 426450 }, { "epoch": 0.8614761814340025, "grad_norm": 55.7606201171875, "learning_rate": 6.446165451826409e-07, "loss": 14.9015, "step": 426460 }, { "epoch": 0.8614963820666863, "grad_norm": 367.1582946777344, "learning_rate": 6.444451131432383e-07, "loss": 25.4361, "step": 426470 }, { "epoch": 0.8615165826993701, "grad_norm": 477.4083557128906, "learning_rate": 6.442737023322826e-07, "loss": 16.5428, "step": 426480 }, { "epoch": 0.861536783332054, "grad_norm": 640.9025268554688, "learning_rate": 6.441023127506096e-07, "loss": 22.848, "step": 426490 }, { "epoch": 0.8615569839647378, "grad_norm": 593.6867065429688, "learning_rate": 6.439309443990532e-07, "loss": 17.403, "step": 426500 }, { "epoch": 0.8615771845974216, "grad_norm": 128.9621124267578, "learning_rate": 6.437595972784483e-07, "loss": 14.719, "step": 426510 }, { "epoch": 0.8615973852301054, "grad_norm": 102.8722915649414, "learning_rate": 6.435882713896319e-07, "loss": 21.571, "step": 426520 }, { "epoch": 0.8616175858627892, "grad_norm": 324.9844970703125, "learning_rate": 6.434169667334378e-07, "loss": 13.7523, "step": 426530 }, { "epoch": 0.8616377864954731, "grad_norm": 164.3453826904297, "learning_rate": 6.432456833106998e-07, "loss": 29.9197, "step": 426540 }, { "epoch": 0.8616579871281569, "grad_norm": 290.2485046386719, "learning_rate": 6.43074421122255e-07, "loss": 13.7085, "step": 426550 }, { "epoch": 0.8616781877608407, "grad_norm": 162.91885375976562, "learning_rate": 6.429031801689362e-07, "loss": 23.4219, "step": 426560 }, { "epoch": 0.8616983883935245, "grad_norm": 152.35107421875, "learning_rate": 6.427319604515797e-07, "loss": 11.5542, "step": 426570 }, { "epoch": 0.8617185890262083, "grad_norm": 72.03697204589844, "learning_rate": 6.425607619710195e-07, "loss": 13.8766, "step": 426580 }, { "epoch": 0.8617387896588922, "grad_norm": 410.8104553222656, "learning_rate": 6.423895847280881e-07, "loss": 16.488, "step": 426590 }, { "epoch": 0.8617589902915759, "grad_norm": 369.3254699707031, "learning_rate": 6.422184287236227e-07, "loss": 16.7072, "step": 426600 }, { "epoch": 0.8617791909242597, "grad_norm": 258.9056701660156, "learning_rate": 6.420472939584549e-07, "loss": 16.8887, "step": 426610 }, { "epoch": 0.8617993915569435, "grad_norm": 283.20623779296875, "learning_rate": 6.418761804334212e-07, "loss": 23.0235, "step": 426620 }, { "epoch": 0.8618195921896273, "grad_norm": 229.67816162109375, "learning_rate": 6.417050881493536e-07, "loss": 23.8269, "step": 426630 }, { "epoch": 0.8618397928223112, "grad_norm": 298.65057373046875, "learning_rate": 6.415340171070877e-07, "loss": 13.5707, "step": 426640 }, { "epoch": 0.861859993454995, "grad_norm": 189.23648071289062, "learning_rate": 6.413629673074562e-07, "loss": 10.3198, "step": 426650 }, { "epoch": 0.8618801940876788, "grad_norm": 482.4182434082031, "learning_rate": 6.411919387512922e-07, "loss": 15.7056, "step": 426660 }, { "epoch": 0.8619003947203626, "grad_norm": 316.34271240234375, "learning_rate": 6.410209314394305e-07, "loss": 36.8376, "step": 426670 }, { "epoch": 0.8619205953530464, "grad_norm": 269.3982849121094, "learning_rate": 6.408499453727046e-07, "loss": 33.9866, "step": 426680 }, { "epoch": 0.8619407959857303, "grad_norm": 109.6086196899414, "learning_rate": 6.406789805519464e-07, "loss": 12.0312, "step": 426690 }, { "epoch": 0.8619609966184141, "grad_norm": 271.09686279296875, "learning_rate": 6.405080369779898e-07, "loss": 15.8506, "step": 426700 }, { "epoch": 0.8619811972510979, "grad_norm": 332.6595764160156, "learning_rate": 6.403371146516707e-07, "loss": 20.6489, "step": 426710 }, { "epoch": 0.8620013978837817, "grad_norm": 20.008983612060547, "learning_rate": 6.401662135738174e-07, "loss": 14.4732, "step": 426720 }, { "epoch": 0.8620215985164655, "grad_norm": 477.0677185058594, "learning_rate": 6.399953337452652e-07, "loss": 20.5175, "step": 426730 }, { "epoch": 0.8620417991491494, "grad_norm": 240.05218505859375, "learning_rate": 6.398244751668481e-07, "loss": 14.0642, "step": 426740 }, { "epoch": 0.8620619997818332, "grad_norm": 179.6294403076172, "learning_rate": 6.396536378393975e-07, "loss": 29.609, "step": 426750 }, { "epoch": 0.862082200414517, "grad_norm": 256.0860900878906, "learning_rate": 6.394828217637455e-07, "loss": 13.8209, "step": 426760 }, { "epoch": 0.8621024010472008, "grad_norm": 427.2187805175781, "learning_rate": 6.393120269407249e-07, "loss": 12.9257, "step": 426770 }, { "epoch": 0.8621226016798846, "grad_norm": 734.8992309570312, "learning_rate": 6.391412533711711e-07, "loss": 25.0278, "step": 426780 }, { "epoch": 0.8621428023125685, "grad_norm": 194.74415588378906, "learning_rate": 6.389705010559117e-07, "loss": 17.5674, "step": 426790 }, { "epoch": 0.8621630029452523, "grad_norm": 78.94375610351562, "learning_rate": 6.387997699957815e-07, "loss": 15.628, "step": 426800 }, { "epoch": 0.8621832035779361, "grad_norm": 178.01707458496094, "learning_rate": 6.386290601916129e-07, "loss": 11.1852, "step": 426810 }, { "epoch": 0.8622034042106199, "grad_norm": 331.4090881347656, "learning_rate": 6.384583716442371e-07, "loss": 28.6777, "step": 426820 }, { "epoch": 0.8622236048433037, "grad_norm": 314.58819580078125, "learning_rate": 6.382877043544855e-07, "loss": 7.6431, "step": 426830 }, { "epoch": 0.8622438054759876, "grad_norm": 733.0621337890625, "learning_rate": 6.381170583231916e-07, "loss": 22.413, "step": 426840 }, { "epoch": 0.8622640061086714, "grad_norm": 0.0, "learning_rate": 6.379464335511859e-07, "loss": 27.8916, "step": 426850 }, { "epoch": 0.8622842067413551, "grad_norm": 646.348388671875, "learning_rate": 6.377758300392994e-07, "loss": 21.2316, "step": 426860 }, { "epoch": 0.8623044073740389, "grad_norm": 218.392822265625, "learning_rate": 6.376052477883655e-07, "loss": 18.2486, "step": 426870 }, { "epoch": 0.8623246080067227, "grad_norm": 199.77964782714844, "learning_rate": 6.374346867992138e-07, "loss": 10.8684, "step": 426880 }, { "epoch": 0.8623448086394065, "grad_norm": 355.8582763671875, "learning_rate": 6.372641470726765e-07, "loss": 20.5506, "step": 426890 }, { "epoch": 0.8623650092720904, "grad_norm": 223.13693237304688, "learning_rate": 6.370936286095842e-07, "loss": 10.9489, "step": 426900 }, { "epoch": 0.8623852099047742, "grad_norm": 261.5447998046875, "learning_rate": 6.369231314107693e-07, "loss": 21.3465, "step": 426910 }, { "epoch": 0.862405410537458, "grad_norm": 166.69236755371094, "learning_rate": 6.36752655477062e-07, "loss": 21.7841, "step": 426920 }, { "epoch": 0.8624256111701418, "grad_norm": 1603.099853515625, "learning_rate": 6.36582200809292e-07, "loss": 30.7529, "step": 426930 }, { "epoch": 0.8624458118028256, "grad_norm": 296.5548400878906, "learning_rate": 6.36411767408292e-07, "loss": 18.8484, "step": 426940 }, { "epoch": 0.8624660124355095, "grad_norm": 248.10475158691406, "learning_rate": 6.362413552748908e-07, "loss": 34.0839, "step": 426950 }, { "epoch": 0.8624862130681933, "grad_norm": 375.36016845703125, "learning_rate": 6.360709644099211e-07, "loss": 19.8768, "step": 426960 }, { "epoch": 0.8625064137008771, "grad_norm": 316.27777099609375, "learning_rate": 6.359005948142122e-07, "loss": 14.5303, "step": 426970 }, { "epoch": 0.8625266143335609, "grad_norm": 248.04403686523438, "learning_rate": 6.357302464885934e-07, "loss": 14.8746, "step": 426980 }, { "epoch": 0.8625468149662447, "grad_norm": 516.7435913085938, "learning_rate": 6.355599194338974e-07, "loss": 28.9377, "step": 426990 }, { "epoch": 0.8625670155989286, "grad_norm": 120.26335144042969, "learning_rate": 6.353896136509524e-07, "loss": 11.3951, "step": 427000 }, { "epoch": 0.8625872162316124, "grad_norm": 42.786903381347656, "learning_rate": 6.352193291405884e-07, "loss": 7.7517, "step": 427010 }, { "epoch": 0.8626074168642962, "grad_norm": 533.2800903320312, "learning_rate": 6.350490659036362e-07, "loss": 25.1994, "step": 427020 }, { "epoch": 0.86262761749698, "grad_norm": 232.50086975097656, "learning_rate": 6.348788239409271e-07, "loss": 10.8617, "step": 427030 }, { "epoch": 0.8626478181296638, "grad_norm": 56.26165008544922, "learning_rate": 6.347086032532873e-07, "loss": 12.3419, "step": 427040 }, { "epoch": 0.8626680187623477, "grad_norm": 276.5984191894531, "learning_rate": 6.345384038415486e-07, "loss": 13.2447, "step": 427050 }, { "epoch": 0.8626882193950315, "grad_norm": 138.6144256591797, "learning_rate": 6.343682257065408e-07, "loss": 15.331, "step": 427060 }, { "epoch": 0.8627084200277153, "grad_norm": 546.62939453125, "learning_rate": 6.341980688490934e-07, "loss": 36.8719, "step": 427070 }, { "epoch": 0.8627286206603991, "grad_norm": 750.87451171875, "learning_rate": 6.340279332700333e-07, "loss": 23.4612, "step": 427080 }, { "epoch": 0.862748821293083, "grad_norm": 248.04440307617188, "learning_rate": 6.338578189701921e-07, "loss": 11.5574, "step": 427090 }, { "epoch": 0.8627690219257668, "grad_norm": 429.3407287597656, "learning_rate": 6.336877259504004e-07, "loss": 14.9368, "step": 427100 }, { "epoch": 0.8627892225584506, "grad_norm": 296.7677001953125, "learning_rate": 6.335176542114829e-07, "loss": 20.3626, "step": 427110 }, { "epoch": 0.8628094231911343, "grad_norm": 383.21343994140625, "learning_rate": 6.333476037542707e-07, "loss": 20.7349, "step": 427120 }, { "epoch": 0.8628296238238181, "grad_norm": 161.70468139648438, "learning_rate": 6.331775745795937e-07, "loss": 24.1944, "step": 427130 }, { "epoch": 0.8628498244565019, "grad_norm": 558.531005859375, "learning_rate": 6.330075666882795e-07, "loss": 21.6414, "step": 427140 }, { "epoch": 0.8628700250891858, "grad_norm": 429.7414855957031, "learning_rate": 6.328375800811559e-07, "loss": 19.0061, "step": 427150 }, { "epoch": 0.8628902257218696, "grad_norm": 613.4002075195312, "learning_rate": 6.326676147590533e-07, "loss": 20.4218, "step": 427160 }, { "epoch": 0.8629104263545534, "grad_norm": 324.2467041015625, "learning_rate": 6.324976707227993e-07, "loss": 6.6113, "step": 427170 }, { "epoch": 0.8629306269872372, "grad_norm": 0.0, "learning_rate": 6.323277479732203e-07, "loss": 13.0625, "step": 427180 }, { "epoch": 0.862950827619921, "grad_norm": 705.1793212890625, "learning_rate": 6.321578465111478e-07, "loss": 24.1333, "step": 427190 }, { "epoch": 0.8629710282526049, "grad_norm": 453.9203186035156, "learning_rate": 6.319879663374068e-07, "loss": 22.468, "step": 427200 }, { "epoch": 0.8629912288852887, "grad_norm": 239.30142211914062, "learning_rate": 6.318181074528279e-07, "loss": 25.7906, "step": 427210 }, { "epoch": 0.8630114295179725, "grad_norm": 391.6506652832031, "learning_rate": 6.316482698582365e-07, "loss": 9.0698, "step": 427220 }, { "epoch": 0.8630316301506563, "grad_norm": 57.62110137939453, "learning_rate": 6.314784535544627e-07, "loss": 12.9475, "step": 427230 }, { "epoch": 0.8630518307833401, "grad_norm": 92.63350677490234, "learning_rate": 6.313086585423316e-07, "loss": 14.8664, "step": 427240 }, { "epoch": 0.863072031416024, "grad_norm": 255.9056854248047, "learning_rate": 6.311388848226741e-07, "loss": 20.5877, "step": 427250 }, { "epoch": 0.8630922320487078, "grad_norm": 173.49542236328125, "learning_rate": 6.309691323963152e-07, "loss": 21.9859, "step": 427260 }, { "epoch": 0.8631124326813916, "grad_norm": 1932.9476318359375, "learning_rate": 6.307994012640822e-07, "loss": 27.2474, "step": 427270 }, { "epoch": 0.8631326333140754, "grad_norm": 410.6073303222656, "learning_rate": 6.30629691426804e-07, "loss": 17.2769, "step": 427280 }, { "epoch": 0.8631528339467592, "grad_norm": 466.3726501464844, "learning_rate": 6.304600028853065e-07, "loss": 16.4853, "step": 427290 }, { "epoch": 0.863173034579443, "grad_norm": 1031.7923583984375, "learning_rate": 6.302903356404161e-07, "loss": 19.3864, "step": 427300 }, { "epoch": 0.8631932352121269, "grad_norm": 114.4725341796875, "learning_rate": 6.301206896929607e-07, "loss": 11.8854, "step": 427310 }, { "epoch": 0.8632134358448107, "grad_norm": 333.98724365234375, "learning_rate": 6.29951065043769e-07, "loss": 17.1401, "step": 427320 }, { "epoch": 0.8632336364774945, "grad_norm": 187.3678436279297, "learning_rate": 6.297814616936637e-07, "loss": 18.1092, "step": 427330 }, { "epoch": 0.8632538371101783, "grad_norm": 267.3026428222656, "learning_rate": 6.296118796434735e-07, "loss": 14.566, "step": 427340 }, { "epoch": 0.8632740377428622, "grad_norm": 216.7257537841797, "learning_rate": 6.294423188940263e-07, "loss": 9.6769, "step": 427350 }, { "epoch": 0.863294238375546, "grad_norm": 264.22955322265625, "learning_rate": 6.292727794461468e-07, "loss": 21.8646, "step": 427360 }, { "epoch": 0.8633144390082297, "grad_norm": 341.1600341796875, "learning_rate": 6.291032613006604e-07, "loss": 23.4711, "step": 427370 }, { "epoch": 0.8633346396409135, "grad_norm": 241.09535217285156, "learning_rate": 6.289337644583949e-07, "loss": 11.735, "step": 427380 }, { "epoch": 0.8633548402735973, "grad_norm": 340.7505187988281, "learning_rate": 6.287642889201783e-07, "loss": 16.4057, "step": 427390 }, { "epoch": 0.8633750409062811, "grad_norm": 267.3469543457031, "learning_rate": 6.28594834686832e-07, "loss": 17.5358, "step": 427400 }, { "epoch": 0.863395241538965, "grad_norm": 103.78594970703125, "learning_rate": 6.284254017591845e-07, "loss": 13.5318, "step": 427410 }, { "epoch": 0.8634154421716488, "grad_norm": 490.9798583984375, "learning_rate": 6.282559901380625e-07, "loss": 16.8245, "step": 427420 }, { "epoch": 0.8634356428043326, "grad_norm": 297.1769104003906, "learning_rate": 6.280865998242908e-07, "loss": 16.6822, "step": 427430 }, { "epoch": 0.8634558434370164, "grad_norm": 249.63978576660156, "learning_rate": 6.279172308186931e-07, "loss": 17.7045, "step": 427440 }, { "epoch": 0.8634760440697002, "grad_norm": 219.16079711914062, "learning_rate": 6.277478831220979e-07, "loss": 20.2264, "step": 427450 }, { "epoch": 0.8634962447023841, "grad_norm": 120.80721282958984, "learning_rate": 6.275785567353293e-07, "loss": 14.207, "step": 427460 }, { "epoch": 0.8635164453350679, "grad_norm": 152.2388458251953, "learning_rate": 6.274092516592111e-07, "loss": 9.7045, "step": 427470 }, { "epoch": 0.8635366459677517, "grad_norm": 282.41778564453125, "learning_rate": 6.272399678945712e-07, "loss": 10.4501, "step": 427480 }, { "epoch": 0.8635568466004355, "grad_norm": 539.1661376953125, "learning_rate": 6.27070705442232e-07, "loss": 20.9257, "step": 427490 }, { "epoch": 0.8635770472331193, "grad_norm": 397.8507385253906, "learning_rate": 6.269014643030214e-07, "loss": 20.8538, "step": 427500 }, { "epoch": 0.8635972478658032, "grad_norm": 242.8325653076172, "learning_rate": 6.267322444777612e-07, "loss": 14.1207, "step": 427510 }, { "epoch": 0.863617448498487, "grad_norm": 347.0693359375, "learning_rate": 6.265630459672789e-07, "loss": 10.4487, "step": 427520 }, { "epoch": 0.8636376491311708, "grad_norm": 114.38526153564453, "learning_rate": 6.263938687723981e-07, "loss": 19.362, "step": 427530 }, { "epoch": 0.8636578497638546, "grad_norm": 223.4969482421875, "learning_rate": 6.262247128939414e-07, "loss": 10.003, "step": 427540 }, { "epoch": 0.8636780503965384, "grad_norm": 290.72357177734375, "learning_rate": 6.260555783327366e-07, "loss": 20.083, "step": 427550 }, { "epoch": 0.8636982510292223, "grad_norm": 2.7025504112243652, "learning_rate": 6.258864650896051e-07, "loss": 10.6744, "step": 427560 }, { "epoch": 0.8637184516619061, "grad_norm": 568.3017578125, "learning_rate": 6.257173731653738e-07, "loss": 22.8239, "step": 427570 }, { "epoch": 0.8637386522945899, "grad_norm": 264.5793151855469, "learning_rate": 6.25548302560865e-07, "loss": 20.8718, "step": 427580 }, { "epoch": 0.8637588529272737, "grad_norm": 425.9575500488281, "learning_rate": 6.253792532769026e-07, "loss": 13.1263, "step": 427590 }, { "epoch": 0.8637790535599575, "grad_norm": 428.3517761230469, "learning_rate": 6.252102253143122e-07, "loss": 24.8398, "step": 427600 }, { "epoch": 0.8637992541926414, "grad_norm": 350.8314208984375, "learning_rate": 6.250412186739163e-07, "loss": 15.2756, "step": 427610 }, { "epoch": 0.8638194548253252, "grad_norm": 480.5556945800781, "learning_rate": 6.248722333565377e-07, "loss": 19.9842, "step": 427620 }, { "epoch": 0.8638396554580089, "grad_norm": 45.072906494140625, "learning_rate": 6.247032693630012e-07, "loss": 22.4469, "step": 427630 }, { "epoch": 0.8638598560906927, "grad_norm": 325.568359375, "learning_rate": 6.245343266941328e-07, "loss": 17.0111, "step": 427640 }, { "epoch": 0.8638800567233765, "grad_norm": 29.840423583984375, "learning_rate": 6.243654053507515e-07, "loss": 10.7177, "step": 427650 }, { "epoch": 0.8639002573560604, "grad_norm": 824.6751708984375, "learning_rate": 6.241965053336818e-07, "loss": 21.3112, "step": 427660 }, { "epoch": 0.8639204579887442, "grad_norm": 420.8976745605469, "learning_rate": 6.24027626643749e-07, "loss": 20.7418, "step": 427670 }, { "epoch": 0.863940658621428, "grad_norm": 387.7466735839844, "learning_rate": 6.238587692817749e-07, "loss": 11.6445, "step": 427680 }, { "epoch": 0.8639608592541118, "grad_norm": 350.6593933105469, "learning_rate": 6.236899332485813e-07, "loss": 16.1901, "step": 427690 }, { "epoch": 0.8639810598867956, "grad_norm": 340.4710693359375, "learning_rate": 6.235211185449919e-07, "loss": 12.8927, "step": 427700 }, { "epoch": 0.8640012605194795, "grad_norm": 278.5534973144531, "learning_rate": 6.233523251718321e-07, "loss": 15.8561, "step": 427710 }, { "epoch": 0.8640214611521633, "grad_norm": 253.22122192382812, "learning_rate": 6.231835531299202e-07, "loss": 21.5683, "step": 427720 }, { "epoch": 0.8640416617848471, "grad_norm": 315.7308654785156, "learning_rate": 6.23014802420081e-07, "loss": 18.8528, "step": 427730 }, { "epoch": 0.8640618624175309, "grad_norm": 118.27812957763672, "learning_rate": 6.228460730431374e-07, "loss": 11.3726, "step": 427740 }, { "epoch": 0.8640820630502147, "grad_norm": 78.91473388671875, "learning_rate": 6.226773649999113e-07, "loss": 11.204, "step": 427750 }, { "epoch": 0.8641022636828986, "grad_norm": 241.45413208007812, "learning_rate": 6.225086782912237e-07, "loss": 20.8489, "step": 427760 }, { "epoch": 0.8641224643155824, "grad_norm": 147.634765625, "learning_rate": 6.223400129178992e-07, "loss": 10.5424, "step": 427770 }, { "epoch": 0.8641426649482662, "grad_norm": 196.81192016601562, "learning_rate": 6.221713688807585e-07, "loss": 30.416, "step": 427780 }, { "epoch": 0.86416286558095, "grad_norm": 49.05225372314453, "learning_rate": 6.220027461806222e-07, "loss": 8.995, "step": 427790 }, { "epoch": 0.8641830662136338, "grad_norm": 287.02032470703125, "learning_rate": 6.218341448183141e-07, "loss": 24.6338, "step": 427800 }, { "epoch": 0.8642032668463177, "grad_norm": 224.36782836914062, "learning_rate": 6.216655647946556e-07, "loss": 14.2381, "step": 427810 }, { "epoch": 0.8642234674790015, "grad_norm": 148.0165557861328, "learning_rate": 6.214970061104686e-07, "loss": 29.8094, "step": 427820 }, { "epoch": 0.8642436681116853, "grad_norm": 323.8502502441406, "learning_rate": 6.213284687665733e-07, "loss": 24.0944, "step": 427830 }, { "epoch": 0.8642638687443691, "grad_norm": 350.33251953125, "learning_rate": 6.21159952763793e-07, "loss": 13.3549, "step": 427840 }, { "epoch": 0.8642840693770529, "grad_norm": 196.468017578125, "learning_rate": 6.209914581029474e-07, "loss": 24.0399, "step": 427850 }, { "epoch": 0.8643042700097368, "grad_norm": 234.9713592529297, "learning_rate": 6.20822984784858e-07, "loss": 14.5471, "step": 427860 }, { "epoch": 0.8643244706424206, "grad_norm": 250.5386199951172, "learning_rate": 6.20654532810347e-07, "loss": 18.132, "step": 427870 }, { "epoch": 0.8643446712751043, "grad_norm": 156.40628051757812, "learning_rate": 6.204861021802333e-07, "loss": 9.2212, "step": 427880 }, { "epoch": 0.8643648719077881, "grad_norm": 282.8708190917969, "learning_rate": 6.203176928953403e-07, "loss": 16.3524, "step": 427890 }, { "epoch": 0.8643850725404719, "grad_norm": 752.982421875, "learning_rate": 6.201493049564883e-07, "loss": 14.544, "step": 427900 }, { "epoch": 0.8644052731731557, "grad_norm": 416.86932373046875, "learning_rate": 6.199809383644956e-07, "loss": 11.0486, "step": 427910 }, { "epoch": 0.8644254738058396, "grad_norm": 222.2848358154297, "learning_rate": 6.198125931201848e-07, "loss": 19.7606, "step": 427920 }, { "epoch": 0.8644456744385234, "grad_norm": 133.27310180664062, "learning_rate": 6.196442692243787e-07, "loss": 21.9536, "step": 427930 }, { "epoch": 0.8644658750712072, "grad_norm": 599.850830078125, "learning_rate": 6.194759666778927e-07, "loss": 20.2955, "step": 427940 }, { "epoch": 0.864486075703891, "grad_norm": 325.1663513183594, "learning_rate": 6.193076854815494e-07, "loss": 18.8125, "step": 427950 }, { "epoch": 0.8645062763365748, "grad_norm": 276.27618408203125, "learning_rate": 6.191394256361699e-07, "loss": 21.4611, "step": 427960 }, { "epoch": 0.8645264769692587, "grad_norm": 255.7522430419922, "learning_rate": 6.189711871425741e-07, "loss": 14.2042, "step": 427970 }, { "epoch": 0.8645466776019425, "grad_norm": 314.2364501953125, "learning_rate": 6.188029700015802e-07, "loss": 13.2705, "step": 427980 }, { "epoch": 0.8645668782346263, "grad_norm": 315.06170654296875, "learning_rate": 6.186347742140092e-07, "loss": 30.0308, "step": 427990 }, { "epoch": 0.8645870788673101, "grad_norm": 787.111328125, "learning_rate": 6.184665997806832e-07, "loss": 19.6346, "step": 428000 }, { "epoch": 0.8646072794999939, "grad_norm": 374.6004638671875, "learning_rate": 6.182984467024173e-07, "loss": 19.146, "step": 428010 }, { "epoch": 0.8646274801326778, "grad_norm": 391.1521911621094, "learning_rate": 6.181303149800333e-07, "loss": 20.1257, "step": 428020 }, { "epoch": 0.8646476807653616, "grad_norm": 101.3083267211914, "learning_rate": 6.179622046143513e-07, "loss": 20.4961, "step": 428030 }, { "epoch": 0.8646678813980454, "grad_norm": 271.12725830078125, "learning_rate": 6.177941156061906e-07, "loss": 9.0437, "step": 428040 }, { "epoch": 0.8646880820307292, "grad_norm": 105.76915740966797, "learning_rate": 6.17626047956369e-07, "loss": 15.4698, "step": 428050 }, { "epoch": 0.864708282663413, "grad_norm": 37.18621826171875, "learning_rate": 6.174580016657073e-07, "loss": 11.5056, "step": 428060 }, { "epoch": 0.8647284832960969, "grad_norm": 721.3698120117188, "learning_rate": 6.172899767350238e-07, "loss": 17.9816, "step": 428070 }, { "epoch": 0.8647486839287807, "grad_norm": 278.7106018066406, "learning_rate": 6.171219731651362e-07, "loss": 23.648, "step": 428080 }, { "epoch": 0.8647688845614645, "grad_norm": 372.8246765136719, "learning_rate": 6.169539909568656e-07, "loss": 19.188, "step": 428090 }, { "epoch": 0.8647890851941483, "grad_norm": 313.6341247558594, "learning_rate": 6.167860301110284e-07, "loss": 22.3156, "step": 428100 }, { "epoch": 0.8648092858268321, "grad_norm": 133.35025024414062, "learning_rate": 6.166180906284458e-07, "loss": 23.5627, "step": 428110 }, { "epoch": 0.864829486459516, "grad_norm": 80.14603424072266, "learning_rate": 6.164501725099342e-07, "loss": 20.9143, "step": 428120 }, { "epoch": 0.8648496870921998, "grad_norm": 236.74549865722656, "learning_rate": 6.162822757563136e-07, "loss": 15.9507, "step": 428130 }, { "epoch": 0.8648698877248835, "grad_norm": 366.94036865234375, "learning_rate": 6.161144003684017e-07, "loss": 20.8792, "step": 428140 }, { "epoch": 0.8648900883575673, "grad_norm": 60.14889907836914, "learning_rate": 6.159465463470149e-07, "loss": 11.6091, "step": 428150 }, { "epoch": 0.8649102889902511, "grad_norm": 298.1804504394531, "learning_rate": 6.157787136929743e-07, "loss": 13.0195, "step": 428160 }, { "epoch": 0.864930489622935, "grad_norm": 63.309974670410156, "learning_rate": 6.156109024070955e-07, "loss": 19.1855, "step": 428170 }, { "epoch": 0.8649506902556188, "grad_norm": 387.5739440917969, "learning_rate": 6.154431124901983e-07, "loss": 9.8993, "step": 428180 }, { "epoch": 0.8649708908883026, "grad_norm": 644.6115112304688, "learning_rate": 6.152753439430997e-07, "loss": 15.9906, "step": 428190 }, { "epoch": 0.8649910915209864, "grad_norm": 351.52520751953125, "learning_rate": 6.151075967666165e-07, "loss": 20.633, "step": 428200 }, { "epoch": 0.8650112921536702, "grad_norm": 261.1536865234375, "learning_rate": 6.149398709615678e-07, "loss": 15.0145, "step": 428210 }, { "epoch": 0.865031492786354, "grad_norm": 256.3823547363281, "learning_rate": 6.147721665287703e-07, "loss": 8.0933, "step": 428220 }, { "epoch": 0.8650516934190379, "grad_norm": 15.405685424804688, "learning_rate": 6.146044834690401e-07, "loss": 8.8332, "step": 428230 }, { "epoch": 0.8650718940517217, "grad_norm": 199.33229064941406, "learning_rate": 6.144368217831965e-07, "loss": 7.77, "step": 428240 }, { "epoch": 0.8650920946844055, "grad_norm": 215.8980712890625, "learning_rate": 6.142691814720575e-07, "loss": 20.3456, "step": 428250 }, { "epoch": 0.8651122953170893, "grad_norm": 255.4296417236328, "learning_rate": 6.141015625364366e-07, "loss": 8.5226, "step": 428260 }, { "epoch": 0.8651324959497732, "grad_norm": 225.79835510253906, "learning_rate": 6.139339649771525e-07, "loss": 16.9742, "step": 428270 }, { "epoch": 0.865152696582457, "grad_norm": 139.10226440429688, "learning_rate": 6.137663887950235e-07, "loss": 13.3792, "step": 428280 }, { "epoch": 0.8651728972151408, "grad_norm": 816.4263305664062, "learning_rate": 6.135988339908655e-07, "loss": 21.1025, "step": 428290 }, { "epoch": 0.8651930978478246, "grad_norm": 396.3490905761719, "learning_rate": 6.134313005654929e-07, "loss": 17.1094, "step": 428300 }, { "epoch": 0.8652132984805084, "grad_norm": 302.6025390625, "learning_rate": 6.132637885197251e-07, "loss": 20.2784, "step": 428310 }, { "epoch": 0.8652334991131923, "grad_norm": 144.55792236328125, "learning_rate": 6.130962978543792e-07, "loss": 11.4848, "step": 428320 }, { "epoch": 0.8652536997458761, "grad_norm": 254.57180786132812, "learning_rate": 6.129288285702672e-07, "loss": 13.2996, "step": 428330 }, { "epoch": 0.8652739003785599, "grad_norm": 144.44224548339844, "learning_rate": 6.127613806682087e-07, "loss": 21.9786, "step": 428340 }, { "epoch": 0.8652941010112437, "grad_norm": 210.07656860351562, "learning_rate": 6.1259395414902e-07, "loss": 11.6278, "step": 428350 }, { "epoch": 0.8653143016439275, "grad_norm": 202.88331604003906, "learning_rate": 6.124265490135161e-07, "loss": 19.1447, "step": 428360 }, { "epoch": 0.8653345022766114, "grad_norm": 374.83978271484375, "learning_rate": 6.122591652625126e-07, "loss": 13.9947, "step": 428370 }, { "epoch": 0.8653547029092952, "grad_norm": 1035.8675537109375, "learning_rate": 6.120918028968265e-07, "loss": 15.8294, "step": 428380 }, { "epoch": 0.8653749035419789, "grad_norm": 681.4384155273438, "learning_rate": 6.119244619172727e-07, "loss": 20.7848, "step": 428390 }, { "epoch": 0.8653951041746627, "grad_norm": 382.2265930175781, "learning_rate": 6.117571423246655e-07, "loss": 16.582, "step": 428400 }, { "epoch": 0.8654153048073465, "grad_norm": 252.7736358642578, "learning_rate": 6.11589844119822e-07, "loss": 8.6321, "step": 428410 }, { "epoch": 0.8654355054400303, "grad_norm": 1083.947998046875, "learning_rate": 6.114225673035584e-07, "loss": 21.8293, "step": 428420 }, { "epoch": 0.8654557060727142, "grad_norm": 152.75503540039062, "learning_rate": 6.112553118766889e-07, "loss": 22.3239, "step": 428430 }, { "epoch": 0.865475906705398, "grad_norm": 91.88584899902344, "learning_rate": 6.110880778400275e-07, "loss": 17.9463, "step": 428440 }, { "epoch": 0.8654961073380818, "grad_norm": 205.8636932373047, "learning_rate": 6.109208651943921e-07, "loss": 19.4268, "step": 428450 }, { "epoch": 0.8655163079707656, "grad_norm": 12.514071464538574, "learning_rate": 6.107536739405956e-07, "loss": 18.0592, "step": 428460 }, { "epoch": 0.8655365086034494, "grad_norm": 41.81550598144531, "learning_rate": 6.105865040794523e-07, "loss": 20.8292, "step": 428470 }, { "epoch": 0.8655567092361333, "grad_norm": 390.52593994140625, "learning_rate": 6.104193556117793e-07, "loss": 24.3912, "step": 428480 }, { "epoch": 0.8655769098688171, "grad_norm": 324.08642578125, "learning_rate": 6.102522285383888e-07, "loss": 11.2023, "step": 428490 }, { "epoch": 0.8655971105015009, "grad_norm": 257.45123291015625, "learning_rate": 6.100851228600974e-07, "loss": 30.6622, "step": 428500 }, { "epoch": 0.8656173111341847, "grad_norm": 381.84051513671875, "learning_rate": 6.099180385777192e-07, "loss": 18.7013, "step": 428510 }, { "epoch": 0.8656375117668685, "grad_norm": 375.1788635253906, "learning_rate": 6.097509756920667e-07, "loss": 25.043, "step": 428520 }, { "epoch": 0.8656577123995524, "grad_norm": 195.8188934326172, "learning_rate": 6.095839342039561e-07, "loss": 11.9097, "step": 428530 }, { "epoch": 0.8656779130322362, "grad_norm": 551.5490112304688, "learning_rate": 6.094169141142014e-07, "loss": 19.8383, "step": 428540 }, { "epoch": 0.86569811366492, "grad_norm": 33.70741653442383, "learning_rate": 6.092499154236148e-07, "loss": 15.3881, "step": 428550 }, { "epoch": 0.8657183142976038, "grad_norm": 307.7119140625, "learning_rate": 6.090829381330116e-07, "loss": 21.007, "step": 428560 }, { "epoch": 0.8657385149302876, "grad_norm": 235.68484497070312, "learning_rate": 6.089159822432073e-07, "loss": 15.3744, "step": 428570 }, { "epoch": 0.8657587155629715, "grad_norm": 263.1143493652344, "learning_rate": 6.087490477550129e-07, "loss": 20.2029, "step": 428580 }, { "epoch": 0.8657789161956553, "grad_norm": 235.42340087890625, "learning_rate": 6.085821346692427e-07, "loss": 14.6232, "step": 428590 }, { "epoch": 0.8657991168283391, "grad_norm": 162.44549560546875, "learning_rate": 6.084152429867113e-07, "loss": 13.1799, "step": 428600 }, { "epoch": 0.8658193174610229, "grad_norm": 219.5083770751953, "learning_rate": 6.082483727082317e-07, "loss": 19.9044, "step": 428610 }, { "epoch": 0.8658395180937067, "grad_norm": 18.062034606933594, "learning_rate": 6.080815238346155e-07, "loss": 11.8987, "step": 428620 }, { "epoch": 0.8658597187263906, "grad_norm": 176.3072052001953, "learning_rate": 6.079146963666777e-07, "loss": 20.2592, "step": 428630 }, { "epoch": 0.8658799193590744, "grad_norm": 196.42071533203125, "learning_rate": 6.077478903052314e-07, "loss": 9.7356, "step": 428640 }, { "epoch": 0.8659001199917581, "grad_norm": 156.17787170410156, "learning_rate": 6.075811056510894e-07, "loss": 19.1281, "step": 428650 }, { "epoch": 0.8659203206244419, "grad_norm": 282.8064880371094, "learning_rate": 6.074143424050638e-07, "loss": 11.9913, "step": 428660 }, { "epoch": 0.8659405212571257, "grad_norm": 197.72618103027344, "learning_rate": 6.072476005679684e-07, "loss": 17.2073, "step": 428670 }, { "epoch": 0.8659607218898095, "grad_norm": 118.91537475585938, "learning_rate": 6.070808801406158e-07, "loss": 14.8192, "step": 428680 }, { "epoch": 0.8659809225224934, "grad_norm": 325.2646789550781, "learning_rate": 6.069141811238166e-07, "loss": 24.0653, "step": 428690 }, { "epoch": 0.8660011231551772, "grad_norm": 277.614501953125, "learning_rate": 6.067475035183862e-07, "loss": 18.0918, "step": 428700 }, { "epoch": 0.866021323787861, "grad_norm": 157.31773376464844, "learning_rate": 6.06580847325135e-07, "loss": 24.915, "step": 428710 }, { "epoch": 0.8660415244205448, "grad_norm": 269.2095642089844, "learning_rate": 6.064142125448763e-07, "loss": 13.1471, "step": 428720 }, { "epoch": 0.8660617250532286, "grad_norm": 101.45182800292969, "learning_rate": 6.062475991784211e-07, "loss": 7.803, "step": 428730 }, { "epoch": 0.8660819256859125, "grad_norm": 337.7289123535156, "learning_rate": 6.060810072265833e-07, "loss": 16.5348, "step": 428740 }, { "epoch": 0.8661021263185963, "grad_norm": 143.8042755126953, "learning_rate": 6.059144366901737e-07, "loss": 25.8053, "step": 428750 }, { "epoch": 0.8661223269512801, "grad_norm": 247.76853942871094, "learning_rate": 6.057478875700035e-07, "loss": 18.8556, "step": 428760 }, { "epoch": 0.8661425275839639, "grad_norm": 244.16229248046875, "learning_rate": 6.055813598668853e-07, "loss": 27.3512, "step": 428770 }, { "epoch": 0.8661627282166477, "grad_norm": 577.4889526367188, "learning_rate": 6.054148535816301e-07, "loss": 14.9934, "step": 428780 }, { "epoch": 0.8661829288493316, "grad_norm": 150.03895568847656, "learning_rate": 6.052483687150512e-07, "loss": 27.7736, "step": 428790 }, { "epoch": 0.8662031294820154, "grad_norm": 507.7596435546875, "learning_rate": 6.050819052679585e-07, "loss": 18.6382, "step": 428800 }, { "epoch": 0.8662233301146992, "grad_norm": 388.2171936035156, "learning_rate": 6.049154632411625e-07, "loss": 17.4887, "step": 428810 }, { "epoch": 0.866243530747383, "grad_norm": 451.86663818359375, "learning_rate": 6.047490426354763e-07, "loss": 36.7194, "step": 428820 }, { "epoch": 0.8662637313800668, "grad_norm": 526.371826171875, "learning_rate": 6.045826434517104e-07, "loss": 15.8983, "step": 428830 }, { "epoch": 0.8662839320127507, "grad_norm": 326.0290832519531, "learning_rate": 6.044162656906744e-07, "loss": 21.5066, "step": 428840 }, { "epoch": 0.8663041326454345, "grad_norm": 162.91583251953125, "learning_rate": 6.042499093531806e-07, "loss": 12.7883, "step": 428850 }, { "epoch": 0.8663243332781183, "grad_norm": 134.56124877929688, "learning_rate": 6.040835744400403e-07, "loss": 13.7821, "step": 428860 }, { "epoch": 0.8663445339108021, "grad_norm": 303.36114501953125, "learning_rate": 6.039172609520639e-07, "loss": 19.4464, "step": 428870 }, { "epoch": 0.866364734543486, "grad_norm": 183.8982391357422, "learning_rate": 6.037509688900606e-07, "loss": 16.5134, "step": 428880 }, { "epoch": 0.8663849351761698, "grad_norm": 228.36627197265625, "learning_rate": 6.035846982548427e-07, "loss": 12.6935, "step": 428890 }, { "epoch": 0.8664051358088536, "grad_norm": 462.7586669921875, "learning_rate": 6.034184490472195e-07, "loss": 21.9789, "step": 428900 }, { "epoch": 0.8664253364415373, "grad_norm": 352.4219665527344, "learning_rate": 6.032522212680009e-07, "loss": 22.4572, "step": 428910 }, { "epoch": 0.8664455370742211, "grad_norm": 310.3164978027344, "learning_rate": 6.030860149179973e-07, "loss": 29.9658, "step": 428920 }, { "epoch": 0.8664657377069049, "grad_norm": 452.3450622558594, "learning_rate": 6.029198299980216e-07, "loss": 18.4972, "step": 428930 }, { "epoch": 0.8664859383395888, "grad_norm": 215.80081176757812, "learning_rate": 6.027536665088795e-07, "loss": 15.1169, "step": 428940 }, { "epoch": 0.8665061389722726, "grad_norm": 553.4271850585938, "learning_rate": 6.025875244513824e-07, "loss": 22.0291, "step": 428950 }, { "epoch": 0.8665263396049564, "grad_norm": 201.82196044921875, "learning_rate": 6.024214038263415e-07, "loss": 16.4352, "step": 428960 }, { "epoch": 0.8665465402376402, "grad_norm": 415.3941650390625, "learning_rate": 6.022553046345647e-07, "loss": 14.2913, "step": 428970 }, { "epoch": 0.866566740870324, "grad_norm": 242.05711364746094, "learning_rate": 6.020892268768619e-07, "loss": 15.1337, "step": 428980 }, { "epoch": 0.8665869415030079, "grad_norm": 328.94195556640625, "learning_rate": 6.019231705540435e-07, "loss": 28.022, "step": 428990 }, { "epoch": 0.8666071421356917, "grad_norm": 280.80255126953125, "learning_rate": 6.017571356669183e-07, "loss": 17.7552, "step": 429000 }, { "epoch": 0.8666273427683755, "grad_norm": 910.9259643554688, "learning_rate": 6.015911222162946e-07, "loss": 32.5377, "step": 429010 }, { "epoch": 0.8666475434010593, "grad_norm": 199.94970703125, "learning_rate": 6.014251302029817e-07, "loss": 13.5446, "step": 429020 }, { "epoch": 0.8666677440337431, "grad_norm": 558.7119750976562, "learning_rate": 6.012591596277906e-07, "loss": 23.016, "step": 429030 }, { "epoch": 0.866687944666427, "grad_norm": 146.7935028076172, "learning_rate": 6.01093210491529e-07, "loss": 11.2062, "step": 429040 }, { "epoch": 0.8667081452991108, "grad_norm": 381.5577697753906, "learning_rate": 6.009272827950042e-07, "loss": 24.3796, "step": 429050 }, { "epoch": 0.8667283459317946, "grad_norm": 342.403076171875, "learning_rate": 6.007613765390274e-07, "loss": 20.4655, "step": 429060 }, { "epoch": 0.8667485465644784, "grad_norm": 295.2065734863281, "learning_rate": 6.005954917244062e-07, "loss": 19.4918, "step": 429070 }, { "epoch": 0.8667687471971622, "grad_norm": 463.19683837890625, "learning_rate": 6.004296283519478e-07, "loss": 11.7546, "step": 429080 }, { "epoch": 0.8667889478298461, "grad_norm": 61.19007873535156, "learning_rate": 6.002637864224631e-07, "loss": 23.4715, "step": 429090 }, { "epoch": 0.8668091484625299, "grad_norm": 859.3980102539062, "learning_rate": 6.000979659367579e-07, "loss": 28.7117, "step": 429100 }, { "epoch": 0.8668293490952137, "grad_norm": 394.579833984375, "learning_rate": 5.999321668956425e-07, "loss": 34.4005, "step": 429110 }, { "epoch": 0.8668495497278975, "grad_norm": 18.669734954833984, "learning_rate": 5.997663892999239e-07, "loss": 15.9816, "step": 429120 }, { "epoch": 0.8668697503605813, "grad_norm": 24.459758758544922, "learning_rate": 5.996006331504095e-07, "loss": 9.6649, "step": 429130 }, { "epoch": 0.8668899509932652, "grad_norm": 282.9619140625, "learning_rate": 5.994348984479092e-07, "loss": 9.8981, "step": 429140 }, { "epoch": 0.866910151625949, "grad_norm": 252.2718505859375, "learning_rate": 5.992691851932292e-07, "loss": 15.6859, "step": 429150 }, { "epoch": 0.8669303522586327, "grad_norm": 630.28125, "learning_rate": 5.991034933871764e-07, "loss": 27.2728, "step": 429160 }, { "epoch": 0.8669505528913165, "grad_norm": 322.893310546875, "learning_rate": 5.989378230305592e-07, "loss": 16.9584, "step": 429170 }, { "epoch": 0.8669707535240003, "grad_norm": 592.335693359375, "learning_rate": 5.987721741241864e-07, "loss": 16.7208, "step": 429180 }, { "epoch": 0.8669909541566841, "grad_norm": 223.1149444580078, "learning_rate": 5.986065466688645e-07, "loss": 8.3642, "step": 429190 }, { "epoch": 0.867011154789368, "grad_norm": 315.1728820800781, "learning_rate": 5.98440940665399e-07, "loss": 7.3146, "step": 429200 }, { "epoch": 0.8670313554220518, "grad_norm": 263.8191833496094, "learning_rate": 5.982753561145999e-07, "loss": 12.7024, "step": 429210 }, { "epoch": 0.8670515560547356, "grad_norm": 391.21453857421875, "learning_rate": 5.981097930172725e-07, "loss": 12.3601, "step": 429220 }, { "epoch": 0.8670717566874194, "grad_norm": 81.99147033691406, "learning_rate": 5.979442513742234e-07, "loss": 16.3302, "step": 429230 }, { "epoch": 0.8670919573201032, "grad_norm": 91.91902923583984, "learning_rate": 5.977787311862598e-07, "loss": 29.2279, "step": 429240 }, { "epoch": 0.8671121579527871, "grad_norm": 290.50225830078125, "learning_rate": 5.9761323245419e-07, "loss": 12.1331, "step": 429250 }, { "epoch": 0.8671323585854709, "grad_norm": 386.3379821777344, "learning_rate": 5.974477551788194e-07, "loss": 11.5684, "step": 429260 }, { "epoch": 0.8671525592181547, "grad_norm": 8.265846252441406, "learning_rate": 5.972822993609534e-07, "loss": 19.5983, "step": 429270 }, { "epoch": 0.8671727598508385, "grad_norm": 323.8424072265625, "learning_rate": 5.971168650014008e-07, "loss": 17.9333, "step": 429280 }, { "epoch": 0.8671929604835223, "grad_norm": 573.0355834960938, "learning_rate": 5.969514521009662e-07, "loss": 18.3358, "step": 429290 }, { "epoch": 0.8672131611162062, "grad_norm": 368.9503173828125, "learning_rate": 5.967860606604553e-07, "loss": 9.7872, "step": 429300 }, { "epoch": 0.86723336174889, "grad_norm": 634.7498168945312, "learning_rate": 5.966206906806748e-07, "loss": 22.7565, "step": 429310 }, { "epoch": 0.8672535623815738, "grad_norm": 189.865478515625, "learning_rate": 5.964553421624325e-07, "loss": 11.0884, "step": 429320 }, { "epoch": 0.8672737630142576, "grad_norm": 229.6313934326172, "learning_rate": 5.962900151065326e-07, "loss": 13.2101, "step": 429330 }, { "epoch": 0.8672939636469414, "grad_norm": 147.2005615234375, "learning_rate": 5.961247095137795e-07, "loss": 11.2871, "step": 429340 }, { "epoch": 0.8673141642796253, "grad_norm": 219.44606018066406, "learning_rate": 5.959594253849821e-07, "loss": 21.7122, "step": 429350 }, { "epoch": 0.8673343649123091, "grad_norm": 391.3631591796875, "learning_rate": 5.95794162720944e-07, "loss": 22.3558, "step": 429360 }, { "epoch": 0.8673545655449929, "grad_norm": 332.0225830078125, "learning_rate": 5.956289215224703e-07, "loss": 5.7098, "step": 429370 }, { "epoch": 0.8673747661776767, "grad_norm": 146.01632690429688, "learning_rate": 5.95463701790368e-07, "loss": 10.8803, "step": 429380 }, { "epoch": 0.8673949668103605, "grad_norm": 195.25027465820312, "learning_rate": 5.9529850352544e-07, "loss": 5.1518, "step": 429390 }, { "epoch": 0.8674151674430444, "grad_norm": 222.6243133544922, "learning_rate": 5.951333267284942e-07, "loss": 17.026, "step": 429400 }, { "epoch": 0.8674353680757282, "grad_norm": 353.83624267578125, "learning_rate": 5.949681714003347e-07, "loss": 16.2362, "step": 429410 }, { "epoch": 0.8674555687084119, "grad_norm": 417.9424133300781, "learning_rate": 5.948030375417646e-07, "loss": 18.7633, "step": 429420 }, { "epoch": 0.8674757693410957, "grad_norm": 157.91519165039062, "learning_rate": 5.946379251535911e-07, "loss": 16.1146, "step": 429430 }, { "epoch": 0.8674959699737795, "grad_norm": 1.0067952871322632, "learning_rate": 5.944728342366179e-07, "loss": 16.6756, "step": 429440 }, { "epoch": 0.8675161706064634, "grad_norm": 355.3564758300781, "learning_rate": 5.943077647916496e-07, "loss": 14.706, "step": 429450 }, { "epoch": 0.8675363712391472, "grad_norm": 320.314453125, "learning_rate": 5.941427168194902e-07, "loss": 20.1117, "step": 429460 }, { "epoch": 0.867556571871831, "grad_norm": 350.0729064941406, "learning_rate": 5.93977690320946e-07, "loss": 22.0971, "step": 429470 }, { "epoch": 0.8675767725045148, "grad_norm": 410.6178894042969, "learning_rate": 5.938126852968201e-07, "loss": 11.3771, "step": 429480 }, { "epoch": 0.8675969731371986, "grad_norm": 353.26922607421875, "learning_rate": 5.936477017479158e-07, "loss": 12.4809, "step": 429490 }, { "epoch": 0.8676171737698825, "grad_norm": 100.64823150634766, "learning_rate": 5.934827396750392e-07, "loss": 26.9218, "step": 429500 }, { "epoch": 0.8676373744025663, "grad_norm": 194.20530700683594, "learning_rate": 5.933177990789934e-07, "loss": 11.9476, "step": 429510 }, { "epoch": 0.8676575750352501, "grad_norm": 345.63226318359375, "learning_rate": 5.931528799605813e-07, "loss": 20.6152, "step": 429520 }, { "epoch": 0.8676777756679339, "grad_norm": 494.8049621582031, "learning_rate": 5.92987982320607e-07, "loss": 27.0089, "step": 429530 }, { "epoch": 0.8676979763006177, "grad_norm": 0.0, "learning_rate": 5.928231061598772e-07, "loss": 14.0555, "step": 429540 }, { "epoch": 0.8677181769333016, "grad_norm": 249.08834838867188, "learning_rate": 5.926582514791912e-07, "loss": 15.6392, "step": 429550 }, { "epoch": 0.8677383775659854, "grad_norm": 352.59246826171875, "learning_rate": 5.92493418279354e-07, "loss": 17.8044, "step": 429560 }, { "epoch": 0.8677585781986692, "grad_norm": 3.3819949626922607, "learning_rate": 5.923286065611705e-07, "loss": 10.717, "step": 429570 }, { "epoch": 0.867778778831353, "grad_norm": 401.1866455078125, "learning_rate": 5.921638163254423e-07, "loss": 26.1984, "step": 429580 }, { "epoch": 0.8677989794640368, "grad_norm": 331.5385437011719, "learning_rate": 5.919990475729725e-07, "loss": 20.3919, "step": 429590 }, { "epoch": 0.8678191800967207, "grad_norm": 181.6807403564453, "learning_rate": 5.918343003045656e-07, "loss": 6.8367, "step": 429600 }, { "epoch": 0.8678393807294045, "grad_norm": 0.0, "learning_rate": 5.916695745210238e-07, "loss": 11.5288, "step": 429610 }, { "epoch": 0.8678595813620883, "grad_norm": 455.24578857421875, "learning_rate": 5.915048702231491e-07, "loss": 15.6658, "step": 429620 }, { "epoch": 0.8678797819947721, "grad_norm": 199.43023681640625, "learning_rate": 5.913401874117447e-07, "loss": 13.4704, "step": 429630 }, { "epoch": 0.8678999826274559, "grad_norm": 383.42352294921875, "learning_rate": 5.911755260876145e-07, "loss": 10.191, "step": 429640 }, { "epoch": 0.8679201832601398, "grad_norm": 3.932265281677246, "learning_rate": 5.910108862515596e-07, "loss": 18.7308, "step": 429650 }, { "epoch": 0.8679403838928236, "grad_norm": 317.3648376464844, "learning_rate": 5.908462679043825e-07, "loss": 17.859, "step": 429660 }, { "epoch": 0.8679605845255073, "grad_norm": 756.0564575195312, "learning_rate": 5.906816710468866e-07, "loss": 18.1549, "step": 429670 }, { "epoch": 0.8679807851581911, "grad_norm": 171.04759216308594, "learning_rate": 5.905170956798739e-07, "loss": 18.7377, "step": 429680 }, { "epoch": 0.8680009857908749, "grad_norm": 316.7099304199219, "learning_rate": 5.903525418041445e-07, "loss": 19.8947, "step": 429690 }, { "epoch": 0.8680211864235587, "grad_norm": 454.18304443359375, "learning_rate": 5.901880094205037e-07, "loss": 25.8028, "step": 429700 }, { "epoch": 0.8680413870562426, "grad_norm": 155.0983428955078, "learning_rate": 5.900234985297498e-07, "loss": 18.6755, "step": 429710 }, { "epoch": 0.8680615876889264, "grad_norm": 251.7493438720703, "learning_rate": 5.898590091326884e-07, "loss": 11.8688, "step": 429720 }, { "epoch": 0.8680817883216102, "grad_norm": 270.7951965332031, "learning_rate": 5.896945412301186e-07, "loss": 19.235, "step": 429730 }, { "epoch": 0.868101988954294, "grad_norm": 352.2210388183594, "learning_rate": 5.895300948228421e-07, "loss": 19.2813, "step": 429740 }, { "epoch": 0.8681221895869778, "grad_norm": 80.35448455810547, "learning_rate": 5.893656699116618e-07, "loss": 11.6999, "step": 429750 }, { "epoch": 0.8681423902196617, "grad_norm": 90.20613098144531, "learning_rate": 5.892012664973784e-07, "loss": 17.4248, "step": 429760 }, { "epoch": 0.8681625908523455, "grad_norm": 21.49197769165039, "learning_rate": 5.89036884580792e-07, "loss": 11.329, "step": 429770 }, { "epoch": 0.8681827914850293, "grad_norm": 106.82868957519531, "learning_rate": 5.888725241627047e-07, "loss": 26.8885, "step": 429780 }, { "epoch": 0.8682029921177131, "grad_norm": 514.7811279296875, "learning_rate": 5.887081852439186e-07, "loss": 16.6248, "step": 429790 }, { "epoch": 0.8682231927503969, "grad_norm": 229.88723754882812, "learning_rate": 5.885438678252342e-07, "loss": 7.963, "step": 429800 }, { "epoch": 0.8682433933830808, "grad_norm": 109.64239501953125, "learning_rate": 5.883795719074509e-07, "loss": 12.7741, "step": 429810 }, { "epoch": 0.8682635940157646, "grad_norm": 144.66868591308594, "learning_rate": 5.882152974913713e-07, "loss": 13.0036, "step": 429820 }, { "epoch": 0.8682837946484484, "grad_norm": 333.4307556152344, "learning_rate": 5.880510445777954e-07, "loss": 25.0166, "step": 429830 }, { "epoch": 0.8683039952811322, "grad_norm": 199.61651611328125, "learning_rate": 5.878868131675225e-07, "loss": 11.6992, "step": 429840 }, { "epoch": 0.868324195913816, "grad_norm": 50.70903015136719, "learning_rate": 5.877226032613542e-07, "loss": 15.0655, "step": 429850 }, { "epoch": 0.8683443965464999, "grad_norm": 103.51262664794922, "learning_rate": 5.875584148600916e-07, "loss": 11.3752, "step": 429860 }, { "epoch": 0.8683645971791837, "grad_norm": 44.83159255981445, "learning_rate": 5.873942479645345e-07, "loss": 18.3231, "step": 429870 }, { "epoch": 0.8683847978118675, "grad_norm": 338.2083435058594, "learning_rate": 5.872301025754812e-07, "loss": 12.4635, "step": 429880 }, { "epoch": 0.8684049984445513, "grad_norm": 374.15869140625, "learning_rate": 5.870659786937344e-07, "loss": 13.0173, "step": 429890 }, { "epoch": 0.8684251990772351, "grad_norm": 57.11210632324219, "learning_rate": 5.869018763200929e-07, "loss": 24.6361, "step": 429900 }, { "epoch": 0.868445399709919, "grad_norm": 58.70858383178711, "learning_rate": 5.867377954553555e-07, "loss": 29.2412, "step": 429910 }, { "epoch": 0.8684656003426028, "grad_norm": 292.7398681640625, "learning_rate": 5.865737361003226e-07, "loss": 16.5664, "step": 429920 }, { "epoch": 0.8684858009752865, "grad_norm": 217.97293090820312, "learning_rate": 5.864096982557949e-07, "loss": 21.1409, "step": 429930 }, { "epoch": 0.8685060016079703, "grad_norm": 613.9413452148438, "learning_rate": 5.862456819225715e-07, "loss": 19.6642, "step": 429940 }, { "epoch": 0.8685262022406541, "grad_norm": 627.24072265625, "learning_rate": 5.860816871014496e-07, "loss": 18.9572, "step": 429950 }, { "epoch": 0.868546402873338, "grad_norm": 354.0593566894531, "learning_rate": 5.859177137932315e-07, "loss": 20.9379, "step": 429960 }, { "epoch": 0.8685666035060218, "grad_norm": 430.450927734375, "learning_rate": 5.857537619987152e-07, "loss": 14.9089, "step": 429970 }, { "epoch": 0.8685868041387056, "grad_norm": 211.65225219726562, "learning_rate": 5.855898317186992e-07, "loss": 13.5329, "step": 429980 }, { "epoch": 0.8686070047713894, "grad_norm": 142.22438049316406, "learning_rate": 5.854259229539833e-07, "loss": 20.4537, "step": 429990 }, { "epoch": 0.8686272054040732, "grad_norm": 397.9911804199219, "learning_rate": 5.852620357053651e-07, "loss": 20.0013, "step": 430000 }, { "epoch": 0.868647406036757, "grad_norm": 211.5362091064453, "learning_rate": 5.850981699736453e-07, "loss": 30.0219, "step": 430010 }, { "epoch": 0.8686676066694409, "grad_norm": 334.8950500488281, "learning_rate": 5.849343257596218e-07, "loss": 15.0287, "step": 430020 }, { "epoch": 0.8686878073021247, "grad_norm": 406.8493347167969, "learning_rate": 5.847705030640915e-07, "loss": 22.8116, "step": 430030 }, { "epoch": 0.8687080079348085, "grad_norm": 266.61248779296875, "learning_rate": 5.84606701887856e-07, "loss": 14.4064, "step": 430040 }, { "epoch": 0.8687282085674923, "grad_norm": 3.96455717086792, "learning_rate": 5.844429222317111e-07, "loss": 5.1193, "step": 430050 }, { "epoch": 0.8687484092001762, "grad_norm": 405.39666748046875, "learning_rate": 5.842791640964551e-07, "loss": 22.1602, "step": 430060 }, { "epoch": 0.86876860983286, "grad_norm": 109.13443756103516, "learning_rate": 5.841154274828869e-07, "loss": 14.5743, "step": 430070 }, { "epoch": 0.8687888104655438, "grad_norm": 357.2668762207031, "learning_rate": 5.839517123918059e-07, "loss": 16.6738, "step": 430080 }, { "epoch": 0.8688090110982276, "grad_norm": 161.05580139160156, "learning_rate": 5.83788018824008e-07, "loss": 11.4569, "step": 430090 }, { "epoch": 0.8688292117309114, "grad_norm": 260.228271484375, "learning_rate": 5.836243467802915e-07, "loss": 16.5401, "step": 430100 }, { "epoch": 0.8688494123635953, "grad_norm": 113.09424591064453, "learning_rate": 5.834606962614548e-07, "loss": 7.3593, "step": 430110 }, { "epoch": 0.8688696129962791, "grad_norm": 264.3537902832031, "learning_rate": 5.832970672682948e-07, "loss": 30.4637, "step": 430120 }, { "epoch": 0.8688898136289629, "grad_norm": 748.5376586914062, "learning_rate": 5.831334598016086e-07, "loss": 21.3425, "step": 430130 }, { "epoch": 0.8689100142616467, "grad_norm": 207.56895446777344, "learning_rate": 5.829698738621941e-07, "loss": 21.533, "step": 430140 }, { "epoch": 0.8689302148943305, "grad_norm": 250.3556671142578, "learning_rate": 5.828063094508507e-07, "loss": 22.9276, "step": 430150 }, { "epoch": 0.8689504155270144, "grad_norm": 236.70950317382812, "learning_rate": 5.826427665683715e-07, "loss": 9.2027, "step": 430160 }, { "epoch": 0.8689706161596982, "grad_norm": 214.5115203857422, "learning_rate": 5.824792452155558e-07, "loss": 15.084, "step": 430170 }, { "epoch": 0.868990816792382, "grad_norm": 366.3526916503906, "learning_rate": 5.823157453932015e-07, "loss": 26.1997, "step": 430180 }, { "epoch": 0.8690110174250657, "grad_norm": 9.377196311950684, "learning_rate": 5.821522671021041e-07, "loss": 18.4233, "step": 430190 }, { "epoch": 0.8690312180577495, "grad_norm": 493.40399169921875, "learning_rate": 5.819888103430598e-07, "loss": 14.3293, "step": 430200 }, { "epoch": 0.8690514186904333, "grad_norm": 587.1106567382812, "learning_rate": 5.818253751168679e-07, "loss": 16.7316, "step": 430210 }, { "epoch": 0.8690716193231172, "grad_norm": 181.7979278564453, "learning_rate": 5.816619614243224e-07, "loss": 16.6902, "step": 430220 }, { "epoch": 0.869091819955801, "grad_norm": 221.2801055908203, "learning_rate": 5.814985692662201e-07, "loss": 29.7676, "step": 430230 }, { "epoch": 0.8691120205884848, "grad_norm": 205.5895538330078, "learning_rate": 5.81335198643358e-07, "loss": 10.7289, "step": 430240 }, { "epoch": 0.8691322212211686, "grad_norm": 170.81640625, "learning_rate": 5.811718495565327e-07, "loss": 13.0024, "step": 430250 }, { "epoch": 0.8691524218538524, "grad_norm": 643.1643676757812, "learning_rate": 5.810085220065404e-07, "loss": 43.2111, "step": 430260 }, { "epoch": 0.8691726224865363, "grad_norm": 365.8726806640625, "learning_rate": 5.808452159941752e-07, "loss": 14.3514, "step": 430270 }, { "epoch": 0.8691928231192201, "grad_norm": 131.76368713378906, "learning_rate": 5.806819315202361e-07, "loss": 15.9404, "step": 430280 }, { "epoch": 0.8692130237519039, "grad_norm": 213.5436248779297, "learning_rate": 5.805186685855163e-07, "loss": 9.1838, "step": 430290 }, { "epoch": 0.8692332243845877, "grad_norm": 116.876220703125, "learning_rate": 5.803554271908124e-07, "loss": 15.4149, "step": 430300 }, { "epoch": 0.8692534250172715, "grad_norm": 422.8739318847656, "learning_rate": 5.801922073369203e-07, "loss": 14.9275, "step": 430310 }, { "epoch": 0.8692736256499554, "grad_norm": 323.6262512207031, "learning_rate": 5.800290090246346e-07, "loss": 11.0438, "step": 430320 }, { "epoch": 0.8692938262826392, "grad_norm": 235.24993896484375, "learning_rate": 5.798658322547529e-07, "loss": 16.0167, "step": 430330 }, { "epoch": 0.869314026915323, "grad_norm": 190.68240356445312, "learning_rate": 5.797026770280683e-07, "loss": 6.4879, "step": 430340 }, { "epoch": 0.8693342275480068, "grad_norm": 4.9392924308776855, "learning_rate": 5.795395433453765e-07, "loss": 13.5663, "step": 430350 }, { "epoch": 0.8693544281806906, "grad_norm": 124.97154998779297, "learning_rate": 5.793764312074735e-07, "loss": 16.773, "step": 430360 }, { "epoch": 0.8693746288133745, "grad_norm": 196.126220703125, "learning_rate": 5.792133406151523e-07, "loss": 13.8697, "step": 430370 }, { "epoch": 0.8693948294460583, "grad_norm": 293.3162536621094, "learning_rate": 5.790502715692104e-07, "loss": 23.1517, "step": 430380 }, { "epoch": 0.8694150300787421, "grad_norm": 188.63087463378906, "learning_rate": 5.788872240704402e-07, "loss": 9.6314, "step": 430390 }, { "epoch": 0.8694352307114259, "grad_norm": 467.492431640625, "learning_rate": 5.787241981196384e-07, "loss": 24.1223, "step": 430400 }, { "epoch": 0.8694554313441097, "grad_norm": 379.414794921875, "learning_rate": 5.785611937175989e-07, "loss": 12.4083, "step": 430410 }, { "epoch": 0.8694756319767936, "grad_norm": 36.301517486572266, "learning_rate": 5.783982108651148e-07, "loss": 14.7152, "step": 430420 }, { "epoch": 0.8694958326094774, "grad_norm": 358.90313720703125, "learning_rate": 5.782352495629822e-07, "loss": 24.9715, "step": 430430 }, { "epoch": 0.8695160332421611, "grad_norm": 23.102815628051758, "learning_rate": 5.780723098119951e-07, "loss": 23.9682, "step": 430440 }, { "epoch": 0.8695362338748449, "grad_norm": 267.063720703125, "learning_rate": 5.779093916129464e-07, "loss": 18.9859, "step": 430450 }, { "epoch": 0.8695564345075287, "grad_norm": 198.9434051513672, "learning_rate": 5.777464949666306e-07, "loss": 33.027, "step": 430460 }, { "epoch": 0.8695766351402126, "grad_norm": 112.8031234741211, "learning_rate": 5.775836198738427e-07, "loss": 49.3809, "step": 430470 }, { "epoch": 0.8695968357728964, "grad_norm": 86.52124786376953, "learning_rate": 5.774207663353765e-07, "loss": 17.5758, "step": 430480 }, { "epoch": 0.8696170364055802, "grad_norm": 138.0382843017578, "learning_rate": 5.772579343520241e-07, "loss": 27.4977, "step": 430490 }, { "epoch": 0.869637237038264, "grad_norm": 160.53768920898438, "learning_rate": 5.770951239245803e-07, "loss": 20.9604, "step": 430500 }, { "epoch": 0.8696574376709478, "grad_norm": 573.6380615234375, "learning_rate": 5.769323350538391e-07, "loss": 26.4926, "step": 430510 }, { "epoch": 0.8696776383036317, "grad_norm": 221.80967712402344, "learning_rate": 5.767695677405921e-07, "loss": 23.4499, "step": 430520 }, { "epoch": 0.8696978389363155, "grad_norm": 75.57762908935547, "learning_rate": 5.766068219856341e-07, "loss": 25.7678, "step": 430530 }, { "epoch": 0.8697180395689993, "grad_norm": 231.1885223388672, "learning_rate": 5.764440977897584e-07, "loss": 21.1663, "step": 430540 }, { "epoch": 0.8697382402016831, "grad_norm": 212.92323303222656, "learning_rate": 5.762813951537582e-07, "loss": 10.6941, "step": 430550 }, { "epoch": 0.8697584408343669, "grad_norm": 170.4713592529297, "learning_rate": 5.761187140784247e-07, "loss": 13.3565, "step": 430560 }, { "epoch": 0.8697786414670508, "grad_norm": 189.28836059570312, "learning_rate": 5.759560545645527e-07, "loss": 12.5459, "step": 430570 }, { "epoch": 0.8697988420997346, "grad_norm": 190.5909881591797, "learning_rate": 5.757934166129347e-07, "loss": 11.0884, "step": 430580 }, { "epoch": 0.8698190427324184, "grad_norm": 355.5611877441406, "learning_rate": 5.756308002243622e-07, "loss": 22.4758, "step": 430590 }, { "epoch": 0.8698392433651022, "grad_norm": 286.4625549316406, "learning_rate": 5.754682053996291e-07, "loss": 13.3158, "step": 430600 }, { "epoch": 0.869859443997786, "grad_norm": 263.8946533203125, "learning_rate": 5.753056321395267e-07, "loss": 13.0268, "step": 430610 }, { "epoch": 0.8698796446304699, "grad_norm": 394.4566955566406, "learning_rate": 5.751430804448488e-07, "loss": 16.362, "step": 430620 }, { "epoch": 0.8698998452631537, "grad_norm": 232.8829803466797, "learning_rate": 5.749805503163869e-07, "loss": 12.2157, "step": 430630 }, { "epoch": 0.8699200458958375, "grad_norm": 28.242095947265625, "learning_rate": 5.748180417549321e-07, "loss": 9.1866, "step": 430640 }, { "epoch": 0.8699402465285213, "grad_norm": 146.7534942626953, "learning_rate": 5.746555547612781e-07, "loss": 22.0129, "step": 430650 }, { "epoch": 0.8699604471612051, "grad_norm": 537.522705078125, "learning_rate": 5.744930893362166e-07, "loss": 23.2022, "step": 430660 }, { "epoch": 0.869980647793889, "grad_norm": 299.1604309082031, "learning_rate": 5.74330645480538e-07, "loss": 16.9488, "step": 430670 }, { "epoch": 0.8700008484265728, "grad_norm": 251.43258666992188, "learning_rate": 5.741682231950351e-07, "loss": 19.7603, "step": 430680 }, { "epoch": 0.8700210490592566, "grad_norm": 169.90646362304688, "learning_rate": 5.740058224805001e-07, "loss": 30.0848, "step": 430690 }, { "epoch": 0.8700412496919403, "grad_norm": 8.344276428222656, "learning_rate": 5.738434433377244e-07, "loss": 17.9386, "step": 430700 }, { "epoch": 0.8700614503246241, "grad_norm": 240.35447692871094, "learning_rate": 5.736810857674979e-07, "loss": 30.5829, "step": 430710 }, { "epoch": 0.8700816509573079, "grad_norm": 625.955078125, "learning_rate": 5.735187497706135e-07, "loss": 25.5631, "step": 430720 }, { "epoch": 0.8701018515899918, "grad_norm": 303.2730407714844, "learning_rate": 5.733564353478622e-07, "loss": 13.2096, "step": 430730 }, { "epoch": 0.8701220522226756, "grad_norm": 0.0, "learning_rate": 5.731941425000337e-07, "loss": 26.3781, "step": 430740 }, { "epoch": 0.8701422528553594, "grad_norm": 170.39614868164062, "learning_rate": 5.730318712279203e-07, "loss": 8.2164, "step": 430750 }, { "epoch": 0.8701624534880432, "grad_norm": 369.62908935546875, "learning_rate": 5.728696215323143e-07, "loss": 13.3138, "step": 430760 }, { "epoch": 0.870182654120727, "grad_norm": 4.718437194824219, "learning_rate": 5.727073934140026e-07, "loss": 17.5682, "step": 430770 }, { "epoch": 0.8702028547534109, "grad_norm": 15.770259857177734, "learning_rate": 5.725451868737786e-07, "loss": 10.4337, "step": 430780 }, { "epoch": 0.8702230553860947, "grad_norm": 57.28717041015625, "learning_rate": 5.723830019124332e-07, "loss": 10.0944, "step": 430790 }, { "epoch": 0.8702432560187785, "grad_norm": 337.8901672363281, "learning_rate": 5.722208385307559e-07, "loss": 10.8083, "step": 430800 }, { "epoch": 0.8702634566514623, "grad_norm": 271.6981506347656, "learning_rate": 5.720586967295366e-07, "loss": 10.8632, "step": 430810 }, { "epoch": 0.8702836572841461, "grad_norm": 930.3674926757812, "learning_rate": 5.718965765095669e-07, "loss": 17.5699, "step": 430820 }, { "epoch": 0.87030385791683, "grad_norm": 76.50482177734375, "learning_rate": 5.717344778716361e-07, "loss": 11.3183, "step": 430830 }, { "epoch": 0.8703240585495138, "grad_norm": 262.0967712402344, "learning_rate": 5.715724008165335e-07, "loss": 18.3484, "step": 430840 }, { "epoch": 0.8703442591821976, "grad_norm": 253.44366455078125, "learning_rate": 5.714103453450498e-07, "loss": 15.9579, "step": 430850 }, { "epoch": 0.8703644598148814, "grad_norm": 283.9560241699219, "learning_rate": 5.712483114579758e-07, "loss": 20.7914, "step": 430860 }, { "epoch": 0.8703846604475652, "grad_norm": 158.31321716308594, "learning_rate": 5.710862991561006e-07, "loss": 13.6228, "step": 430870 }, { "epoch": 0.8704048610802491, "grad_norm": 201.527587890625, "learning_rate": 5.709243084402128e-07, "loss": 12.1568, "step": 430880 }, { "epoch": 0.8704250617129329, "grad_norm": 3.624937057495117, "learning_rate": 5.70762339311104e-07, "loss": 15.7504, "step": 430890 }, { "epoch": 0.8704452623456167, "grad_norm": 351.1962890625, "learning_rate": 5.706003917695619e-07, "loss": 25.6406, "step": 430900 }, { "epoch": 0.8704654629783005, "grad_norm": 728.9517211914062, "learning_rate": 5.704384658163748e-07, "loss": 20.4086, "step": 430910 }, { "epoch": 0.8704856636109843, "grad_norm": 509.3713073730469, "learning_rate": 5.702765614523354e-07, "loss": 28.421, "step": 430920 }, { "epoch": 0.8705058642436682, "grad_norm": 411.2549743652344, "learning_rate": 5.701146786782291e-07, "loss": 22.6689, "step": 430930 }, { "epoch": 0.870526064876352, "grad_norm": 541.7310791015625, "learning_rate": 5.699528174948477e-07, "loss": 19.4127, "step": 430940 }, { "epoch": 0.8705462655090357, "grad_norm": 407.40045166015625, "learning_rate": 5.697909779029786e-07, "loss": 16.2929, "step": 430950 }, { "epoch": 0.8705664661417195, "grad_norm": 226.20111083984375, "learning_rate": 5.696291599034104e-07, "loss": 33.7135, "step": 430960 }, { "epoch": 0.8705866667744033, "grad_norm": 212.94078063964844, "learning_rate": 5.694673634969334e-07, "loss": 8.0405, "step": 430970 }, { "epoch": 0.8706068674070871, "grad_norm": 305.7719421386719, "learning_rate": 5.693055886843341e-07, "loss": 22.9908, "step": 430980 }, { "epoch": 0.870627068039771, "grad_norm": 152.94149780273438, "learning_rate": 5.691438354664031e-07, "loss": 21.9984, "step": 430990 }, { "epoch": 0.8706472686724548, "grad_norm": 34.74179458618164, "learning_rate": 5.689821038439264e-07, "loss": 20.4876, "step": 431000 }, { "epoch": 0.8706674693051386, "grad_norm": 41.64984893798828, "learning_rate": 5.688203938176945e-07, "loss": 16.412, "step": 431010 }, { "epoch": 0.8706876699378224, "grad_norm": 222.1383056640625, "learning_rate": 5.686587053884946e-07, "loss": 9.7577, "step": 431020 }, { "epoch": 0.8707078705705062, "grad_norm": 222.21275329589844, "learning_rate": 5.684970385571137e-07, "loss": 14.1611, "step": 431030 }, { "epoch": 0.8707280712031901, "grad_norm": 361.74908447265625, "learning_rate": 5.683353933243418e-07, "loss": 28.9463, "step": 431040 }, { "epoch": 0.8707482718358739, "grad_norm": 118.4416275024414, "learning_rate": 5.681737696909656e-07, "loss": 16.3694, "step": 431050 }, { "epoch": 0.8707684724685577, "grad_norm": 393.716796875, "learning_rate": 5.680121676577721e-07, "loss": 12.5934, "step": 431060 }, { "epoch": 0.8707886731012415, "grad_norm": 510.8108215332031, "learning_rate": 5.678505872255496e-07, "loss": 22.5018, "step": 431070 }, { "epoch": 0.8708088737339253, "grad_norm": 211.59291076660156, "learning_rate": 5.676890283950881e-07, "loss": 12.1117, "step": 431080 }, { "epoch": 0.8708290743666092, "grad_norm": 241.6061553955078, "learning_rate": 5.675274911671702e-07, "loss": 13.4908, "step": 431090 }, { "epoch": 0.870849274999293, "grad_norm": 20.293502807617188, "learning_rate": 5.673659755425859e-07, "loss": 30.7483, "step": 431100 }, { "epoch": 0.8708694756319768, "grad_norm": 216.94862365722656, "learning_rate": 5.672044815221234e-07, "loss": 17.7979, "step": 431110 }, { "epoch": 0.8708896762646606, "grad_norm": 189.7763671875, "learning_rate": 5.670430091065682e-07, "loss": 11.0608, "step": 431120 }, { "epoch": 0.8709098768973444, "grad_norm": 445.8848571777344, "learning_rate": 5.668815582967074e-07, "loss": 17.2823, "step": 431130 }, { "epoch": 0.8709300775300283, "grad_norm": 261.6067810058594, "learning_rate": 5.667201290933278e-07, "loss": 11.3204, "step": 431140 }, { "epoch": 0.8709502781627121, "grad_norm": 100.90369415283203, "learning_rate": 5.665587214972173e-07, "loss": 17.1483, "step": 431150 }, { "epoch": 0.8709704787953959, "grad_norm": 190.43043518066406, "learning_rate": 5.663973355091624e-07, "loss": 22.1384, "step": 431160 }, { "epoch": 0.8709906794280797, "grad_norm": 22.338645935058594, "learning_rate": 5.662359711299481e-07, "loss": 20.7694, "step": 431170 }, { "epoch": 0.8710108800607635, "grad_norm": 785.1405029296875, "learning_rate": 5.66074628360363e-07, "loss": 13.5858, "step": 431180 }, { "epoch": 0.8710310806934474, "grad_norm": 298.8449401855469, "learning_rate": 5.659133072011919e-07, "loss": 9.4031, "step": 431190 }, { "epoch": 0.8710512813261312, "grad_norm": 337.2384338378906, "learning_rate": 5.657520076532208e-07, "loss": 22.3716, "step": 431200 }, { "epoch": 0.8710714819588149, "grad_norm": 405.7594299316406, "learning_rate": 5.655907297172375e-07, "loss": 18.8737, "step": 431210 }, { "epoch": 0.8710916825914987, "grad_norm": 286.75213623046875, "learning_rate": 5.654294733940263e-07, "loss": 18.9708, "step": 431220 }, { "epoch": 0.8711118832241825, "grad_norm": 127.1952133178711, "learning_rate": 5.65268238684375e-07, "loss": 13.1412, "step": 431230 }, { "epoch": 0.8711320838568664, "grad_norm": 256.9698181152344, "learning_rate": 5.651070255890689e-07, "loss": 14.3125, "step": 431240 }, { "epoch": 0.8711522844895502, "grad_norm": 62.01204299926758, "learning_rate": 5.649458341088915e-07, "loss": 13.6529, "step": 431250 }, { "epoch": 0.871172485122234, "grad_norm": 275.6658020019531, "learning_rate": 5.647846642446314e-07, "loss": 18.8037, "step": 431260 }, { "epoch": 0.8711926857549178, "grad_norm": 8.213630676269531, "learning_rate": 5.646235159970731e-07, "loss": 16.7657, "step": 431270 }, { "epoch": 0.8712128863876016, "grad_norm": 339.7079162597656, "learning_rate": 5.64462389367001e-07, "loss": 10.0411, "step": 431280 }, { "epoch": 0.8712330870202855, "grad_norm": 299.1888122558594, "learning_rate": 5.64301284355201e-07, "loss": 28.3854, "step": 431290 }, { "epoch": 0.8712532876529693, "grad_norm": 697.38671875, "learning_rate": 5.641402009624591e-07, "loss": 14.7147, "step": 431300 }, { "epoch": 0.8712734882856531, "grad_norm": 106.74177551269531, "learning_rate": 5.639791391895605e-07, "loss": 30.6131, "step": 431310 }, { "epoch": 0.8712936889183369, "grad_norm": 30.877931594848633, "learning_rate": 5.638180990372882e-07, "loss": 15.0457, "step": 431320 }, { "epoch": 0.8713138895510207, "grad_norm": 304.257568359375, "learning_rate": 5.6365708050643e-07, "loss": 15.3636, "step": 431330 }, { "epoch": 0.8713340901837046, "grad_norm": 267.9161682128906, "learning_rate": 5.634960835977688e-07, "loss": 11.9889, "step": 431340 }, { "epoch": 0.8713542908163884, "grad_norm": 11.02646541595459, "learning_rate": 5.633351083120886e-07, "loss": 14.1122, "step": 431350 }, { "epoch": 0.8713744914490722, "grad_norm": 25.108898162841797, "learning_rate": 5.631741546501746e-07, "loss": 13.3595, "step": 431360 }, { "epoch": 0.871394692081756, "grad_norm": 526.7843627929688, "learning_rate": 5.630132226128143e-07, "loss": 13.9046, "step": 431370 }, { "epoch": 0.8714148927144398, "grad_norm": 264.2872009277344, "learning_rate": 5.628523122007867e-07, "loss": 17.9338, "step": 431380 }, { "epoch": 0.8714350933471237, "grad_norm": 719.3394165039062, "learning_rate": 5.626914234148794e-07, "loss": 15.2507, "step": 431390 }, { "epoch": 0.8714552939798075, "grad_norm": 296.2950134277344, "learning_rate": 5.625305562558764e-07, "loss": 18.7133, "step": 431400 }, { "epoch": 0.8714754946124913, "grad_norm": 280.3843688964844, "learning_rate": 5.623697107245619e-07, "loss": 19.4132, "step": 431410 }, { "epoch": 0.8714956952451751, "grad_norm": 176.71728515625, "learning_rate": 5.622088868217179e-07, "loss": 9.178, "step": 431420 }, { "epoch": 0.8715158958778589, "grad_norm": 202.95556640625, "learning_rate": 5.620480845481291e-07, "loss": 14.7262, "step": 431430 }, { "epoch": 0.8715360965105428, "grad_norm": 244.365478515625, "learning_rate": 5.618873039045825e-07, "loss": 12.8064, "step": 431440 }, { "epoch": 0.8715562971432266, "grad_norm": 137.4836883544922, "learning_rate": 5.617265448918563e-07, "loss": 17.5753, "step": 431450 }, { "epoch": 0.8715764977759103, "grad_norm": 169.54965209960938, "learning_rate": 5.615658075107366e-07, "loss": 18.1799, "step": 431460 }, { "epoch": 0.8715966984085941, "grad_norm": 285.4529113769531, "learning_rate": 5.614050917620084e-07, "loss": 13.4641, "step": 431470 }, { "epoch": 0.8716168990412779, "grad_norm": 91.17354583740234, "learning_rate": 5.612443976464527e-07, "loss": 16.9061, "step": 431480 }, { "epoch": 0.8716370996739617, "grad_norm": 187.19305419921875, "learning_rate": 5.610837251648532e-07, "loss": 18.3684, "step": 431490 }, { "epoch": 0.8716573003066456, "grad_norm": 221.01214599609375, "learning_rate": 5.609230743179939e-07, "loss": 13.5701, "step": 431500 }, { "epoch": 0.8716775009393294, "grad_norm": 332.5506591796875, "learning_rate": 5.607624451066568e-07, "loss": 14.1987, "step": 431510 }, { "epoch": 0.8716977015720132, "grad_norm": 206.41293334960938, "learning_rate": 5.606018375316246e-07, "loss": 13.4488, "step": 431520 }, { "epoch": 0.871717902204697, "grad_norm": 276.80419921875, "learning_rate": 5.604412515936814e-07, "loss": 15.5291, "step": 431530 }, { "epoch": 0.8717381028373808, "grad_norm": 231.42120361328125, "learning_rate": 5.602806872936079e-07, "loss": 19.2064, "step": 431540 }, { "epoch": 0.8717583034700647, "grad_norm": 480.11328125, "learning_rate": 5.601201446321891e-07, "loss": 11.7327, "step": 431550 }, { "epoch": 0.8717785041027485, "grad_norm": 294.0952453613281, "learning_rate": 5.599596236102068e-07, "loss": 21.1699, "step": 431560 }, { "epoch": 0.8717987047354323, "grad_norm": 163.67523193359375, "learning_rate": 5.597991242284407e-07, "loss": 18.4039, "step": 431570 }, { "epoch": 0.8718189053681161, "grad_norm": 306.99981689453125, "learning_rate": 5.596386464876769e-07, "loss": 23.6068, "step": 431580 }, { "epoch": 0.8718391060008, "grad_norm": 324.1244812011719, "learning_rate": 5.594781903886942e-07, "loss": 15.7907, "step": 431590 }, { "epoch": 0.8718593066334838, "grad_norm": 341.48944091796875, "learning_rate": 5.593177559322776e-07, "loss": 41.7083, "step": 431600 }, { "epoch": 0.8718795072661676, "grad_norm": 171.02854919433594, "learning_rate": 5.591573431192066e-07, "loss": 15.9038, "step": 431610 }, { "epoch": 0.8718997078988514, "grad_norm": 260.77545166015625, "learning_rate": 5.589969519502652e-07, "loss": 18.2895, "step": 431620 }, { "epoch": 0.8719199085315352, "grad_norm": 316.0719909667969, "learning_rate": 5.588365824262343e-07, "loss": 14.1478, "step": 431630 }, { "epoch": 0.871940109164219, "grad_norm": 307.8163146972656, "learning_rate": 5.586762345478935e-07, "loss": 11.6938, "step": 431640 }, { "epoch": 0.8719603097969029, "grad_norm": 262.48516845703125, "learning_rate": 5.585159083160274e-07, "loss": 18.0119, "step": 431650 }, { "epoch": 0.8719805104295867, "grad_norm": 337.4559020996094, "learning_rate": 5.583556037314164e-07, "loss": 11.2033, "step": 431660 }, { "epoch": 0.8720007110622705, "grad_norm": 370.2848815917969, "learning_rate": 5.581953207948404e-07, "loss": 12.8963, "step": 431670 }, { "epoch": 0.8720209116949543, "grad_norm": 218.72433471679688, "learning_rate": 5.58035059507081e-07, "loss": 12.2936, "step": 431680 }, { "epoch": 0.8720411123276381, "grad_norm": 207.97972106933594, "learning_rate": 5.578748198689226e-07, "loss": 16.1314, "step": 431690 }, { "epoch": 0.872061312960322, "grad_norm": 369.6402893066406, "learning_rate": 5.577146018811419e-07, "loss": 23.1407, "step": 431700 }, { "epoch": 0.8720815135930058, "grad_norm": 316.8023986816406, "learning_rate": 5.575544055445209e-07, "loss": 18.9181, "step": 431710 }, { "epoch": 0.8721017142256895, "grad_norm": 333.3664245605469, "learning_rate": 5.573942308598418e-07, "loss": 14.8765, "step": 431720 }, { "epoch": 0.8721219148583733, "grad_norm": 0.0, "learning_rate": 5.572340778278845e-07, "loss": 22.3963, "step": 431730 }, { "epoch": 0.8721421154910571, "grad_norm": 142.32965087890625, "learning_rate": 5.570739464494284e-07, "loss": 24.0365, "step": 431740 }, { "epoch": 0.872162316123741, "grad_norm": 211.55694580078125, "learning_rate": 5.569138367252553e-07, "loss": 7.9193, "step": 431750 }, { "epoch": 0.8721825167564248, "grad_norm": 211.8068389892578, "learning_rate": 5.567537486561476e-07, "loss": 10.1956, "step": 431760 }, { "epoch": 0.8722027173891086, "grad_norm": 172.6950225830078, "learning_rate": 5.565936822428808e-07, "loss": 12.9672, "step": 431770 }, { "epoch": 0.8722229180217924, "grad_norm": 128.5299530029297, "learning_rate": 5.564336374862373e-07, "loss": 13.7229, "step": 431780 }, { "epoch": 0.8722431186544762, "grad_norm": 308.6335754394531, "learning_rate": 5.562736143869984e-07, "loss": 14.2043, "step": 431790 }, { "epoch": 0.87226331928716, "grad_norm": 286.74591064453125, "learning_rate": 5.561136129459432e-07, "loss": 11.8774, "step": 431800 }, { "epoch": 0.8722835199198439, "grad_norm": 107.65582275390625, "learning_rate": 5.559536331638498e-07, "loss": 15.646, "step": 431810 }, { "epoch": 0.8723037205525277, "grad_norm": 259.1940002441406, "learning_rate": 5.557936750415011e-07, "loss": 10.7716, "step": 431820 }, { "epoch": 0.8723239211852115, "grad_norm": 307.3171691894531, "learning_rate": 5.556337385796734e-07, "loss": 10.6385, "step": 431830 }, { "epoch": 0.8723441218178953, "grad_norm": 158.9854278564453, "learning_rate": 5.55473823779149e-07, "loss": 17.9587, "step": 431840 }, { "epoch": 0.8723643224505792, "grad_norm": 44.122806549072266, "learning_rate": 5.553139306407062e-07, "loss": 21.5023, "step": 431850 }, { "epoch": 0.872384523083263, "grad_norm": 276.1822814941406, "learning_rate": 5.551540591651234e-07, "loss": 7.8274, "step": 431860 }, { "epoch": 0.8724047237159468, "grad_norm": 404.6112060546875, "learning_rate": 5.549942093531812e-07, "loss": 24.195, "step": 431870 }, { "epoch": 0.8724249243486306, "grad_norm": 843.1182250976562, "learning_rate": 5.548343812056584e-07, "loss": 20.9582, "step": 431880 }, { "epoch": 0.8724451249813144, "grad_norm": 184.09068298339844, "learning_rate": 5.546745747233323e-07, "loss": 9.6226, "step": 431890 }, { "epoch": 0.8724653256139983, "grad_norm": 117.47186279296875, "learning_rate": 5.545147899069836e-07, "loss": 11.6845, "step": 431900 }, { "epoch": 0.8724855262466821, "grad_norm": 707.5753784179688, "learning_rate": 5.543550267573916e-07, "loss": 18.8461, "step": 431910 }, { "epoch": 0.8725057268793659, "grad_norm": 172.70492553710938, "learning_rate": 5.541952852753341e-07, "loss": 11.6597, "step": 431920 }, { "epoch": 0.8725259275120497, "grad_norm": 126.88516998291016, "learning_rate": 5.540355654615881e-07, "loss": 14.0196, "step": 431930 }, { "epoch": 0.8725461281447335, "grad_norm": 138.84596252441406, "learning_rate": 5.538758673169348e-07, "loss": 12.0072, "step": 431940 }, { "epoch": 0.8725663287774174, "grad_norm": 415.06658935546875, "learning_rate": 5.537161908421512e-07, "loss": 19.3967, "step": 431950 }, { "epoch": 0.8725865294101012, "grad_norm": 128.38372802734375, "learning_rate": 5.535565360380146e-07, "loss": 36.6789, "step": 431960 }, { "epoch": 0.872606730042785, "grad_norm": 229.2674102783203, "learning_rate": 5.533969029053043e-07, "loss": 8.6572, "step": 431970 }, { "epoch": 0.8726269306754687, "grad_norm": 375.0791320800781, "learning_rate": 5.532372914448003e-07, "loss": 16.2197, "step": 431980 }, { "epoch": 0.8726471313081525, "grad_norm": 191.91627502441406, "learning_rate": 5.530777016572763e-07, "loss": 20.0092, "step": 431990 }, { "epoch": 0.8726673319408363, "grad_norm": 126.85926055908203, "learning_rate": 5.529181335435124e-07, "loss": 15.4848, "step": 432000 }, { "epoch": 0.8726875325735202, "grad_norm": 192.4467010498047, "learning_rate": 5.527585871042867e-07, "loss": 26.0368, "step": 432010 }, { "epoch": 0.872707733206204, "grad_norm": 6.8492255210876465, "learning_rate": 5.525990623403765e-07, "loss": 21.4868, "step": 432020 }, { "epoch": 0.8727279338388878, "grad_norm": 593.39404296875, "learning_rate": 5.524395592525584e-07, "loss": 12.9314, "step": 432030 }, { "epoch": 0.8727481344715716, "grad_norm": 155.49880981445312, "learning_rate": 5.522800778416099e-07, "loss": 34.3999, "step": 432040 }, { "epoch": 0.8727683351042554, "grad_norm": 338.1510314941406, "learning_rate": 5.521206181083111e-07, "loss": 22.7508, "step": 432050 }, { "epoch": 0.8727885357369393, "grad_norm": 108.6231689453125, "learning_rate": 5.519611800534347e-07, "loss": 3.8747, "step": 432060 }, { "epoch": 0.8728087363696231, "grad_norm": 209.6058807373047, "learning_rate": 5.518017636777606e-07, "loss": 12.9019, "step": 432070 }, { "epoch": 0.8728289370023069, "grad_norm": 258.5714416503906, "learning_rate": 5.516423689820655e-07, "loss": 21.6524, "step": 432080 }, { "epoch": 0.8728491376349907, "grad_norm": 304.92889404296875, "learning_rate": 5.514829959671264e-07, "loss": 14.3826, "step": 432090 }, { "epoch": 0.8728693382676745, "grad_norm": 279.6145935058594, "learning_rate": 5.51323644633719e-07, "loss": 6.3146, "step": 432100 }, { "epoch": 0.8728895389003584, "grad_norm": 224.16091918945312, "learning_rate": 5.511643149826206e-07, "loss": 25.1425, "step": 432110 }, { "epoch": 0.8729097395330422, "grad_norm": 270.8456115722656, "learning_rate": 5.510050070146083e-07, "loss": 14.7547, "step": 432120 }, { "epoch": 0.872929940165726, "grad_norm": 384.7947998046875, "learning_rate": 5.508457207304574e-07, "loss": 22.2386, "step": 432130 }, { "epoch": 0.8729501407984098, "grad_norm": 289.92950439453125, "learning_rate": 5.506864561309455e-07, "loss": 12.8796, "step": 432140 }, { "epoch": 0.8729703414310936, "grad_norm": 148.26498413085938, "learning_rate": 5.505272132168471e-07, "loss": 12.9029, "step": 432150 }, { "epoch": 0.8729905420637775, "grad_norm": 294.7095642089844, "learning_rate": 5.503679919889404e-07, "loss": 17.0287, "step": 432160 }, { "epoch": 0.8730107426964613, "grad_norm": 165.29808044433594, "learning_rate": 5.502087924480005e-07, "loss": 24.1768, "step": 432170 }, { "epoch": 0.8730309433291451, "grad_norm": 197.2328338623047, "learning_rate": 5.50049614594802e-07, "loss": 15.0723, "step": 432180 }, { "epoch": 0.8730511439618289, "grad_norm": 257.3417053222656, "learning_rate": 5.498904584301235e-07, "loss": 21.5337, "step": 432190 }, { "epoch": 0.8730713445945127, "grad_norm": 197.20399475097656, "learning_rate": 5.497313239547374e-07, "loss": 14.8936, "step": 432200 }, { "epoch": 0.8730915452271966, "grad_norm": 221.76002502441406, "learning_rate": 5.49572211169423e-07, "loss": 23.2643, "step": 432210 }, { "epoch": 0.8731117458598804, "grad_norm": 269.3241271972656, "learning_rate": 5.49413120074952e-07, "loss": 20.478, "step": 432220 }, { "epoch": 0.8731319464925641, "grad_norm": 567.644775390625, "learning_rate": 5.492540506721033e-07, "loss": 16.5414, "step": 432230 }, { "epoch": 0.8731521471252479, "grad_norm": 0.8764551281929016, "learning_rate": 5.490950029616504e-07, "loss": 19.1325, "step": 432240 }, { "epoch": 0.8731723477579317, "grad_norm": 431.1417236328125, "learning_rate": 5.489359769443675e-07, "loss": 21.3286, "step": 432250 }, { "epoch": 0.8731925483906156, "grad_norm": 611.9507446289062, "learning_rate": 5.487769726210318e-07, "loss": 17.4648, "step": 432260 }, { "epoch": 0.8732127490232994, "grad_norm": 367.40814208984375, "learning_rate": 5.486179899924171e-07, "loss": 11.3435, "step": 432270 }, { "epoch": 0.8732329496559832, "grad_norm": 85.75486755371094, "learning_rate": 5.484590290592979e-07, "loss": 18.5802, "step": 432280 }, { "epoch": 0.873253150288667, "grad_norm": 125.14513397216797, "learning_rate": 5.483000898224494e-07, "loss": 11.8362, "step": 432290 }, { "epoch": 0.8732733509213508, "grad_norm": 279.9031677246094, "learning_rate": 5.48141172282648e-07, "loss": 58.6816, "step": 432300 }, { "epoch": 0.8732935515540347, "grad_norm": 318.6170959472656, "learning_rate": 5.479822764406645e-07, "loss": 22.523, "step": 432310 }, { "epoch": 0.8733137521867185, "grad_norm": 291.5913391113281, "learning_rate": 5.478234022972756e-07, "loss": 12.7422, "step": 432320 }, { "epoch": 0.8733339528194023, "grad_norm": 260.2528076171875, "learning_rate": 5.476645498532567e-07, "loss": 11.3449, "step": 432330 }, { "epoch": 0.8733541534520861, "grad_norm": 408.50860595703125, "learning_rate": 5.475057191093808e-07, "loss": 16.1845, "step": 432340 }, { "epoch": 0.8733743540847699, "grad_norm": 132.4649658203125, "learning_rate": 5.473469100664208e-07, "loss": 18.9693, "step": 432350 }, { "epoch": 0.8733945547174538, "grad_norm": 383.5670471191406, "learning_rate": 5.471881227251518e-07, "loss": 13.5977, "step": 432360 }, { "epoch": 0.8734147553501376, "grad_norm": 350.06976318359375, "learning_rate": 5.470293570863499e-07, "loss": 20.193, "step": 432370 }, { "epoch": 0.8734349559828214, "grad_norm": 140.1104278564453, "learning_rate": 5.46870613150785e-07, "loss": 3.5753, "step": 432380 }, { "epoch": 0.8734551566155052, "grad_norm": 290.8729248046875, "learning_rate": 5.467118909192326e-07, "loss": 23.7194, "step": 432390 }, { "epoch": 0.873475357248189, "grad_norm": 366.9217529296875, "learning_rate": 5.46553190392467e-07, "loss": 17.7457, "step": 432400 }, { "epoch": 0.8734955578808729, "grad_norm": 294.6042175292969, "learning_rate": 5.46394511571261e-07, "loss": 10.6936, "step": 432410 }, { "epoch": 0.8735157585135567, "grad_norm": 139.17315673828125, "learning_rate": 5.462358544563873e-07, "loss": 16.0233, "step": 432420 }, { "epoch": 0.8735359591462405, "grad_norm": 99.56596374511719, "learning_rate": 5.460772190486208e-07, "loss": 14.7335, "step": 432430 }, { "epoch": 0.8735561597789243, "grad_norm": 80.13524627685547, "learning_rate": 5.459186053487336e-07, "loss": 13.6393, "step": 432440 }, { "epoch": 0.8735763604116081, "grad_norm": 540.6686401367188, "learning_rate": 5.457600133574987e-07, "loss": 22.7271, "step": 432450 }, { "epoch": 0.873596561044292, "grad_norm": 190.26438903808594, "learning_rate": 5.456014430756895e-07, "loss": 16.4118, "step": 432460 }, { "epoch": 0.8736167616769758, "grad_norm": 189.78236389160156, "learning_rate": 5.454428945040774e-07, "loss": 12.8243, "step": 432470 }, { "epoch": 0.8736369623096596, "grad_norm": 232.68580627441406, "learning_rate": 5.452843676434377e-07, "loss": 16.0286, "step": 432480 }, { "epoch": 0.8736571629423433, "grad_norm": 111.18245697021484, "learning_rate": 5.45125862494541e-07, "loss": 19.2267, "step": 432490 }, { "epoch": 0.8736773635750271, "grad_norm": 126.48763275146484, "learning_rate": 5.449673790581611e-07, "loss": 25.2206, "step": 432500 }, { "epoch": 0.8736975642077109, "grad_norm": 405.0051574707031, "learning_rate": 5.448089173350696e-07, "loss": 9.0047, "step": 432510 }, { "epoch": 0.8737177648403948, "grad_norm": 128.05975341796875, "learning_rate": 5.446504773260386e-07, "loss": 36.9651, "step": 432520 }, { "epoch": 0.8737379654730786, "grad_norm": 0.0, "learning_rate": 5.44492059031842e-07, "loss": 14.3221, "step": 432530 }, { "epoch": 0.8737581661057624, "grad_norm": 451.4018859863281, "learning_rate": 5.443336624532492e-07, "loss": 15.6212, "step": 432540 }, { "epoch": 0.8737783667384462, "grad_norm": 452.4760437011719, "learning_rate": 5.44175287591035e-07, "loss": 17.0667, "step": 432550 }, { "epoch": 0.87379856737113, "grad_norm": 183.5372314453125, "learning_rate": 5.440169344459701e-07, "loss": 29.9927, "step": 432560 }, { "epoch": 0.8738187680038139, "grad_norm": 41.35247039794922, "learning_rate": 5.438586030188247e-07, "loss": 8.9583, "step": 432570 }, { "epoch": 0.8738389686364977, "grad_norm": 183.9976806640625, "learning_rate": 5.437002933103724e-07, "loss": 18.7638, "step": 432580 }, { "epoch": 0.8738591692691815, "grad_norm": 116.30809020996094, "learning_rate": 5.435420053213863e-07, "loss": 16.6084, "step": 432590 }, { "epoch": 0.8738793699018653, "grad_norm": 87.30075073242188, "learning_rate": 5.433837390526341e-07, "loss": 14.0521, "step": 432600 }, { "epoch": 0.8738995705345491, "grad_norm": 169.73892211914062, "learning_rate": 5.432254945048887e-07, "loss": 9.207, "step": 432610 }, { "epoch": 0.873919771167233, "grad_norm": 75.16477966308594, "learning_rate": 5.430672716789232e-07, "loss": 10.9678, "step": 432620 }, { "epoch": 0.8739399717999168, "grad_norm": 281.18536376953125, "learning_rate": 5.429090705755069e-07, "loss": 10.1701, "step": 432630 }, { "epoch": 0.8739601724326006, "grad_norm": 303.79815673828125, "learning_rate": 5.427508911954105e-07, "loss": 13.4879, "step": 432640 }, { "epoch": 0.8739803730652844, "grad_norm": 246.65687561035156, "learning_rate": 5.425927335394054e-07, "loss": 18.9741, "step": 432650 }, { "epoch": 0.8740005736979682, "grad_norm": 272.3833312988281, "learning_rate": 5.424345976082645e-07, "loss": 16.4858, "step": 432660 }, { "epoch": 0.8740207743306521, "grad_norm": 305.1922912597656, "learning_rate": 5.42276483402755e-07, "loss": 38.6647, "step": 432670 }, { "epoch": 0.8740409749633359, "grad_norm": 414.1039123535156, "learning_rate": 5.421183909236494e-07, "loss": 12.9707, "step": 432680 }, { "epoch": 0.8740611755960197, "grad_norm": 112.91834259033203, "learning_rate": 5.419603201717189e-07, "loss": 14.2765, "step": 432690 }, { "epoch": 0.8740813762287035, "grad_norm": 14.11253547668457, "learning_rate": 5.418022711477333e-07, "loss": 19.9561, "step": 432700 }, { "epoch": 0.8741015768613873, "grad_norm": 221.5771484375, "learning_rate": 5.416442438524616e-07, "loss": 33.8349, "step": 432710 }, { "epoch": 0.8741217774940712, "grad_norm": 268.58001708984375, "learning_rate": 5.414862382866759e-07, "loss": 13.9833, "step": 432720 }, { "epoch": 0.874141978126755, "grad_norm": 159.95399475097656, "learning_rate": 5.413282544511455e-07, "loss": 13.2408, "step": 432730 }, { "epoch": 0.8741621787594387, "grad_norm": 201.13697814941406, "learning_rate": 5.4117029234664e-07, "loss": 14.5072, "step": 432740 }, { "epoch": 0.8741823793921225, "grad_norm": 13.50367546081543, "learning_rate": 5.410123519739302e-07, "loss": 12.3527, "step": 432750 }, { "epoch": 0.8742025800248063, "grad_norm": 463.3029479980469, "learning_rate": 5.408544333337845e-07, "loss": 15.4947, "step": 432760 }, { "epoch": 0.8742227806574902, "grad_norm": 169.9464111328125, "learning_rate": 5.406965364269745e-07, "loss": 14.3322, "step": 432770 }, { "epoch": 0.874242981290174, "grad_norm": 374.7083740234375, "learning_rate": 5.405386612542685e-07, "loss": 8.4789, "step": 432780 }, { "epoch": 0.8742631819228578, "grad_norm": 122.0746841430664, "learning_rate": 5.403808078164358e-07, "loss": 9.1264, "step": 432790 }, { "epoch": 0.8742833825555416, "grad_norm": 255.9857940673828, "learning_rate": 5.402229761142464e-07, "loss": 15.9121, "step": 432800 }, { "epoch": 0.8743035831882254, "grad_norm": 111.83055877685547, "learning_rate": 5.400651661484684e-07, "loss": 8.633, "step": 432810 }, { "epoch": 0.8743237838209093, "grad_norm": 305.3992004394531, "learning_rate": 5.399073779198732e-07, "loss": 14.5601, "step": 432820 }, { "epoch": 0.8743439844535931, "grad_norm": 17.42432975769043, "learning_rate": 5.397496114292278e-07, "loss": 27.667, "step": 432830 }, { "epoch": 0.8743641850862769, "grad_norm": 271.02520751953125, "learning_rate": 5.395918666773026e-07, "loss": 18.6716, "step": 432840 }, { "epoch": 0.8743843857189607, "grad_norm": 242.5654754638672, "learning_rate": 5.394341436648653e-07, "loss": 22.1673, "step": 432850 }, { "epoch": 0.8744045863516445, "grad_norm": 113.94843292236328, "learning_rate": 5.392764423926844e-07, "loss": 21.2269, "step": 432860 }, { "epoch": 0.8744247869843284, "grad_norm": 180.2521209716797, "learning_rate": 5.391187628615296e-07, "loss": 16.6291, "step": 432870 }, { "epoch": 0.8744449876170122, "grad_norm": 431.97552490234375, "learning_rate": 5.389611050721694e-07, "loss": 19.8089, "step": 432880 }, { "epoch": 0.874465188249696, "grad_norm": 374.6367492675781, "learning_rate": 5.388034690253701e-07, "loss": 9.5884, "step": 432890 }, { "epoch": 0.8744853888823798, "grad_norm": 116.84526062011719, "learning_rate": 5.386458547219026e-07, "loss": 22.0699, "step": 432900 }, { "epoch": 0.8745055895150636, "grad_norm": 177.0876922607422, "learning_rate": 5.384882621625353e-07, "loss": 12.8417, "step": 432910 }, { "epoch": 0.8745257901477475, "grad_norm": 1729.6988525390625, "learning_rate": 5.383306913480335e-07, "loss": 34.3606, "step": 432920 }, { "epoch": 0.8745459907804313, "grad_norm": 370.0172424316406, "learning_rate": 5.381731422791664e-07, "loss": 23.8157, "step": 432930 }, { "epoch": 0.8745661914131151, "grad_norm": 659.2686767578125, "learning_rate": 5.380156149567034e-07, "loss": 18.0229, "step": 432940 }, { "epoch": 0.8745863920457989, "grad_norm": 233.52113342285156, "learning_rate": 5.378581093814112e-07, "loss": 10.3576, "step": 432950 }, { "epoch": 0.8746065926784827, "grad_norm": 266.9483337402344, "learning_rate": 5.377006255540562e-07, "loss": 20.8076, "step": 432960 }, { "epoch": 0.8746267933111666, "grad_norm": 127.8639144897461, "learning_rate": 5.375431634754074e-07, "loss": 17.1147, "step": 432970 }, { "epoch": 0.8746469939438504, "grad_norm": 361.4924011230469, "learning_rate": 5.373857231462337e-07, "loss": 21.1542, "step": 432980 }, { "epoch": 0.8746671945765342, "grad_norm": 1143.997802734375, "learning_rate": 5.372283045672994e-07, "loss": 29.7198, "step": 432990 }, { "epoch": 0.8746873952092179, "grad_norm": 44.34175491333008, "learning_rate": 5.370709077393721e-07, "loss": 25.0875, "step": 433000 }, { "epoch": 0.8747075958419017, "grad_norm": 197.0361785888672, "learning_rate": 5.369135326632219e-07, "loss": 25.3473, "step": 433010 }, { "epoch": 0.8747277964745855, "grad_norm": 46.178123474121094, "learning_rate": 5.367561793396132e-07, "loss": 13.566, "step": 433020 }, { "epoch": 0.8747479971072694, "grad_norm": 182.61131286621094, "learning_rate": 5.365988477693124e-07, "loss": 18.0747, "step": 433030 }, { "epoch": 0.8747681977399532, "grad_norm": 254.03570556640625, "learning_rate": 5.364415379530891e-07, "loss": 24.1083, "step": 433040 }, { "epoch": 0.874788398372637, "grad_norm": 198.87677001953125, "learning_rate": 5.362842498917081e-07, "loss": 11.2923, "step": 433050 }, { "epoch": 0.8748085990053208, "grad_norm": 150.2322998046875, "learning_rate": 5.36126983585935e-07, "loss": 15.313, "step": 433060 }, { "epoch": 0.8748287996380046, "grad_norm": 495.1058044433594, "learning_rate": 5.359697390365387e-07, "loss": 17.2233, "step": 433070 }, { "epoch": 0.8748490002706885, "grad_norm": 332.6954650878906, "learning_rate": 5.35812516244284e-07, "loss": 18.017, "step": 433080 }, { "epoch": 0.8748692009033723, "grad_norm": 500.8349609375, "learning_rate": 5.356553152099381e-07, "loss": 26.3494, "step": 433090 }, { "epoch": 0.8748894015360561, "grad_norm": 255.11306762695312, "learning_rate": 5.354981359342659e-07, "loss": 17.2023, "step": 433100 }, { "epoch": 0.8749096021687399, "grad_norm": 180.84852600097656, "learning_rate": 5.353409784180352e-07, "loss": 5.9042, "step": 433110 }, { "epoch": 0.8749298028014237, "grad_norm": 24.889875411987305, "learning_rate": 5.35183842662011e-07, "loss": 16.8743, "step": 433120 }, { "epoch": 0.8749500034341076, "grad_norm": 275.861083984375, "learning_rate": 5.350267286669585e-07, "loss": 9.4849, "step": 433130 }, { "epoch": 0.8749702040667914, "grad_norm": 897.2681274414062, "learning_rate": 5.348696364336448e-07, "loss": 17.6884, "step": 433140 }, { "epoch": 0.8749904046994752, "grad_norm": 435.4981994628906, "learning_rate": 5.347125659628344e-07, "loss": 19.0055, "step": 433150 }, { "epoch": 0.875010605332159, "grad_norm": 34.28255844116211, "learning_rate": 5.345555172552941e-07, "loss": 29.3929, "step": 433160 }, { "epoch": 0.8750308059648428, "grad_norm": 430.943115234375, "learning_rate": 5.343984903117889e-07, "loss": 11.3227, "step": 433170 }, { "epoch": 0.8750510065975267, "grad_norm": 369.8266906738281, "learning_rate": 5.342414851330824e-07, "loss": 24.4882, "step": 433180 }, { "epoch": 0.8750712072302105, "grad_norm": 137.533935546875, "learning_rate": 5.340845017199425e-07, "loss": 22.292, "step": 433190 }, { "epoch": 0.8750914078628943, "grad_norm": 203.81004333496094, "learning_rate": 5.339275400731331e-07, "loss": 23.8607, "step": 433200 }, { "epoch": 0.8751116084955781, "grad_norm": 219.90093994140625, "learning_rate": 5.337706001934184e-07, "loss": 12.4077, "step": 433210 }, { "epoch": 0.8751318091282619, "grad_norm": 127.35514068603516, "learning_rate": 5.33613682081564e-07, "loss": 14.9779, "step": 433220 }, { "epoch": 0.8751520097609458, "grad_norm": 471.42706298828125, "learning_rate": 5.334567857383354e-07, "loss": 27.8361, "step": 433230 }, { "epoch": 0.8751722103936296, "grad_norm": 354.9244079589844, "learning_rate": 5.332999111644971e-07, "loss": 16.9136, "step": 433240 }, { "epoch": 0.8751924110263134, "grad_norm": 182.89178466796875, "learning_rate": 5.331430583608122e-07, "loss": 16.3957, "step": 433250 }, { "epoch": 0.8752126116589971, "grad_norm": 450.6783752441406, "learning_rate": 5.329862273280462e-07, "loss": 25.9119, "step": 433260 }, { "epoch": 0.8752328122916809, "grad_norm": 7.982193946838379, "learning_rate": 5.328294180669658e-07, "loss": 15.3313, "step": 433270 }, { "epoch": 0.8752530129243647, "grad_norm": 130.57077026367188, "learning_rate": 5.326726305783308e-07, "loss": 23.8535, "step": 433280 }, { "epoch": 0.8752732135570486, "grad_norm": 17.09382438659668, "learning_rate": 5.325158648629075e-07, "loss": 16.2155, "step": 433290 }, { "epoch": 0.8752934141897324, "grad_norm": 190.3108673095703, "learning_rate": 5.323591209214612e-07, "loss": 20.9664, "step": 433300 }, { "epoch": 0.8753136148224162, "grad_norm": 183.39068603515625, "learning_rate": 5.322023987547547e-07, "loss": 13.8173, "step": 433310 }, { "epoch": 0.8753338154551, "grad_norm": 115.43177795410156, "learning_rate": 5.320456983635508e-07, "loss": 15.826, "step": 433320 }, { "epoch": 0.8753540160877838, "grad_norm": 463.8359069824219, "learning_rate": 5.318890197486154e-07, "loss": 20.1505, "step": 433330 }, { "epoch": 0.8753742167204677, "grad_norm": 145.56065368652344, "learning_rate": 5.317323629107108e-07, "loss": 11.3221, "step": 433340 }, { "epoch": 0.8753944173531515, "grad_norm": 219.30812072753906, "learning_rate": 5.315757278505995e-07, "loss": 18.8657, "step": 433350 }, { "epoch": 0.8754146179858353, "grad_norm": 95.59041595458984, "learning_rate": 5.314191145690473e-07, "loss": 20.0257, "step": 433360 }, { "epoch": 0.8754348186185191, "grad_norm": 283.51708984375, "learning_rate": 5.312625230668155e-07, "loss": 22.2627, "step": 433370 }, { "epoch": 0.875455019251203, "grad_norm": 205.8687744140625, "learning_rate": 5.311059533446694e-07, "loss": 11.2702, "step": 433380 }, { "epoch": 0.8754752198838868, "grad_norm": 209.12066650390625, "learning_rate": 5.309494054033704e-07, "loss": 11.8368, "step": 433390 }, { "epoch": 0.8754954205165706, "grad_norm": 377.4494323730469, "learning_rate": 5.307928792436812e-07, "loss": 22.0285, "step": 433400 }, { "epoch": 0.8755156211492544, "grad_norm": 508.06512451171875, "learning_rate": 5.306363748663668e-07, "loss": 29.6034, "step": 433410 }, { "epoch": 0.8755358217819382, "grad_norm": 273.0721130371094, "learning_rate": 5.304798922721871e-07, "loss": 15.6005, "step": 433420 }, { "epoch": 0.875556022414622, "grad_norm": 223.41030883789062, "learning_rate": 5.303234314619071e-07, "loss": 10.0523, "step": 433430 }, { "epoch": 0.8755762230473059, "grad_norm": 342.0409240722656, "learning_rate": 5.301669924362884e-07, "loss": 24.0745, "step": 433440 }, { "epoch": 0.8755964236799897, "grad_norm": 268.7024230957031, "learning_rate": 5.300105751960943e-07, "loss": 18.359, "step": 433450 }, { "epoch": 0.8756166243126735, "grad_norm": 127.2983169555664, "learning_rate": 5.298541797420864e-07, "loss": 12.788, "step": 433460 }, { "epoch": 0.8756368249453573, "grad_norm": 361.1843566894531, "learning_rate": 5.296978060750257e-07, "loss": 16.1108, "step": 433470 }, { "epoch": 0.8756570255780411, "grad_norm": 294.2516784667969, "learning_rate": 5.295414541956773e-07, "loss": 21.2914, "step": 433480 }, { "epoch": 0.875677226210725, "grad_norm": 137.48095703125, "learning_rate": 5.293851241048015e-07, "loss": 11.4306, "step": 433490 }, { "epoch": 0.8756974268434088, "grad_norm": 106.34599304199219, "learning_rate": 5.292288158031595e-07, "loss": 15.4627, "step": 433500 }, { "epoch": 0.8757176274760925, "grad_norm": 152.8980712890625, "learning_rate": 5.290725292915138e-07, "loss": 16.1661, "step": 433510 }, { "epoch": 0.8757378281087763, "grad_norm": 80.7688217163086, "learning_rate": 5.28916264570628e-07, "loss": 19.8095, "step": 433520 }, { "epoch": 0.8757580287414601, "grad_norm": 138.82008361816406, "learning_rate": 5.287600216412609e-07, "loss": 24.6289, "step": 433530 }, { "epoch": 0.875778229374144, "grad_norm": 509.1368103027344, "learning_rate": 5.286038005041744e-07, "loss": 23.4458, "step": 433540 }, { "epoch": 0.8757984300068278, "grad_norm": 62.8992805480957, "learning_rate": 5.28447601160132e-07, "loss": 21.7229, "step": 433550 }, { "epoch": 0.8758186306395116, "grad_norm": 324.73089599609375, "learning_rate": 5.28291423609894e-07, "loss": 19.2802, "step": 433560 }, { "epoch": 0.8758388312721954, "grad_norm": 322.7063903808594, "learning_rate": 5.281352678542195e-07, "loss": 15.1388, "step": 433570 }, { "epoch": 0.8758590319048792, "grad_norm": 36.00373840332031, "learning_rate": 5.279791338938717e-07, "loss": 23.9129, "step": 433580 }, { "epoch": 0.8758792325375631, "grad_norm": 233.86868286132812, "learning_rate": 5.278230217296132e-07, "loss": 26.1835, "step": 433590 }, { "epoch": 0.8758994331702469, "grad_norm": 339.3141174316406, "learning_rate": 5.276669313622013e-07, "loss": 19.1924, "step": 433600 }, { "epoch": 0.8759196338029307, "grad_norm": 332.3990783691406, "learning_rate": 5.275108627923975e-07, "loss": 18.69, "step": 433610 }, { "epoch": 0.8759398344356145, "grad_norm": 212.42300415039062, "learning_rate": 5.273548160209651e-07, "loss": 20.9045, "step": 433620 }, { "epoch": 0.8759600350682983, "grad_norm": 318.3375244140625, "learning_rate": 5.271987910486625e-07, "loss": 21.3603, "step": 433630 }, { "epoch": 0.8759802357009822, "grad_norm": 455.908447265625, "learning_rate": 5.270427878762496e-07, "loss": 17.5253, "step": 433640 }, { "epoch": 0.876000436333666, "grad_norm": 355.2138366699219, "learning_rate": 5.268868065044886e-07, "loss": 18.036, "step": 433650 }, { "epoch": 0.8760206369663498, "grad_norm": 169.66409301757812, "learning_rate": 5.267308469341387e-07, "loss": 19.3718, "step": 433660 }, { "epoch": 0.8760408375990336, "grad_norm": 0.0, "learning_rate": 5.265749091659589e-07, "loss": 14.599, "step": 433670 }, { "epoch": 0.8760610382317174, "grad_norm": 185.46743774414062, "learning_rate": 5.264189932007119e-07, "loss": 14.404, "step": 433680 }, { "epoch": 0.8760812388644013, "grad_norm": 342.9581298828125, "learning_rate": 5.262630990391549e-07, "loss": 18.9033, "step": 433690 }, { "epoch": 0.8761014394970851, "grad_norm": 367.83795166015625, "learning_rate": 5.2610722668205e-07, "loss": 20.94, "step": 433700 }, { "epoch": 0.8761216401297689, "grad_norm": 334.108642578125, "learning_rate": 5.259513761301549e-07, "loss": 19.7453, "step": 433710 }, { "epoch": 0.8761418407624527, "grad_norm": 285.80804443359375, "learning_rate": 5.257955473842314e-07, "loss": 12.9342, "step": 433720 }, { "epoch": 0.8761620413951365, "grad_norm": 207.70826721191406, "learning_rate": 5.25639740445037e-07, "loss": 20.3354, "step": 433730 }, { "epoch": 0.8761822420278204, "grad_norm": 351.1932373046875, "learning_rate": 5.254839553133312e-07, "loss": 10.8909, "step": 433740 }, { "epoch": 0.8762024426605042, "grad_norm": 3.709616184234619, "learning_rate": 5.253281919898751e-07, "loss": 32.6381, "step": 433750 }, { "epoch": 0.876222643293188, "grad_norm": 195.4125213623047, "learning_rate": 5.251724504754258e-07, "loss": 9.7482, "step": 433760 }, { "epoch": 0.8762428439258717, "grad_norm": 455.1700134277344, "learning_rate": 5.250167307707437e-07, "loss": 19.7197, "step": 433770 }, { "epoch": 0.8762630445585555, "grad_norm": 357.58306884765625, "learning_rate": 5.24861032876588e-07, "loss": 20.1256, "step": 433780 }, { "epoch": 0.8762832451912393, "grad_norm": 264.9433288574219, "learning_rate": 5.247053567937155e-07, "loss": 20.2208, "step": 433790 }, { "epoch": 0.8763034458239232, "grad_norm": 93.81202697753906, "learning_rate": 5.245497025228874e-07, "loss": 32.6254, "step": 433800 }, { "epoch": 0.876323646456607, "grad_norm": 574.3193359375, "learning_rate": 5.243940700648609e-07, "loss": 14.8747, "step": 433810 }, { "epoch": 0.8763438470892908, "grad_norm": 114.02771759033203, "learning_rate": 5.242384594203942e-07, "loss": 19.4134, "step": 433820 }, { "epoch": 0.8763640477219746, "grad_norm": 220.89328002929688, "learning_rate": 5.240828705902462e-07, "loss": 6.8964, "step": 433830 }, { "epoch": 0.8763842483546584, "grad_norm": 213.7437744140625, "learning_rate": 5.239273035751763e-07, "loss": 30.1331, "step": 433840 }, { "epoch": 0.8764044489873423, "grad_norm": 580.44189453125, "learning_rate": 5.237717583759421e-07, "loss": 16.9098, "step": 433850 }, { "epoch": 0.8764246496200261, "grad_norm": 283.7548828125, "learning_rate": 5.236162349933005e-07, "loss": 16.622, "step": 433860 }, { "epoch": 0.8764448502527099, "grad_norm": 517.726318359375, "learning_rate": 5.234607334280117e-07, "loss": 12.846, "step": 433870 }, { "epoch": 0.8764650508853937, "grad_norm": 401.5696716308594, "learning_rate": 5.23305253680832e-07, "loss": 19.2747, "step": 433880 }, { "epoch": 0.8764852515180775, "grad_norm": 517.2724609375, "learning_rate": 5.231497957525184e-07, "loss": 16.5344, "step": 433890 }, { "epoch": 0.8765054521507614, "grad_norm": 118.29544067382812, "learning_rate": 5.229943596438297e-07, "loss": 17.5756, "step": 433900 }, { "epoch": 0.8765256527834452, "grad_norm": 280.3024597167969, "learning_rate": 5.22838945355525e-07, "loss": 14.8495, "step": 433910 }, { "epoch": 0.876545853416129, "grad_norm": 107.56294250488281, "learning_rate": 5.2268355288836e-07, "loss": 7.7354, "step": 433920 }, { "epoch": 0.8765660540488128, "grad_norm": 171.7476043701172, "learning_rate": 5.225281822430911e-07, "loss": 17.65, "step": 433930 }, { "epoch": 0.8765862546814966, "grad_norm": 568.8677368164062, "learning_rate": 5.22372833420478e-07, "loss": 31.2041, "step": 433940 }, { "epoch": 0.8766064553141805, "grad_norm": 139.4942169189453, "learning_rate": 5.222175064212764e-07, "loss": 9.4249, "step": 433950 }, { "epoch": 0.8766266559468643, "grad_norm": 631.3897094726562, "learning_rate": 5.220622012462429e-07, "loss": 24.6887, "step": 433960 }, { "epoch": 0.8766468565795481, "grad_norm": 144.6816864013672, "learning_rate": 5.219069178961361e-07, "loss": 16.9325, "step": 433970 }, { "epoch": 0.8766670572122319, "grad_norm": 431.0509948730469, "learning_rate": 5.217516563717107e-07, "loss": 28.0626, "step": 433980 }, { "epoch": 0.8766872578449157, "grad_norm": 1093.6778564453125, "learning_rate": 5.215964166737258e-07, "loss": 23.2797, "step": 433990 }, { "epoch": 0.8767074584775996, "grad_norm": 351.3003845214844, "learning_rate": 5.214411988029355e-07, "loss": 13.4805, "step": 434000 }, { "epoch": 0.8767276591102834, "grad_norm": 317.1842041015625, "learning_rate": 5.212860027600986e-07, "loss": 13.3169, "step": 434010 }, { "epoch": 0.8767478597429671, "grad_norm": 549.303466796875, "learning_rate": 5.21130828545971e-07, "loss": 9.3565, "step": 434020 }, { "epoch": 0.8767680603756509, "grad_norm": 245.1389923095703, "learning_rate": 5.209756761613072e-07, "loss": 20.0705, "step": 434030 }, { "epoch": 0.8767882610083347, "grad_norm": 56.18154525756836, "learning_rate": 5.208205456068655e-07, "loss": 20.1889, "step": 434040 }, { "epoch": 0.8768084616410186, "grad_norm": 357.11993408203125, "learning_rate": 5.206654368834002e-07, "loss": 28.9057, "step": 434050 }, { "epoch": 0.8768286622737024, "grad_norm": 261.443115234375, "learning_rate": 5.205103499916697e-07, "loss": 10.3796, "step": 434060 }, { "epoch": 0.8768488629063862, "grad_norm": 862.103515625, "learning_rate": 5.203552849324284e-07, "loss": 18.4036, "step": 434070 }, { "epoch": 0.87686906353907, "grad_norm": 325.0450439453125, "learning_rate": 5.202002417064306e-07, "loss": 28.2807, "step": 434080 }, { "epoch": 0.8768892641717538, "grad_norm": 343.0401611328125, "learning_rate": 5.200452203144352e-07, "loss": 17.4518, "step": 434090 }, { "epoch": 0.8769094648044377, "grad_norm": 277.0809326171875, "learning_rate": 5.198902207571955e-07, "loss": 12.2072, "step": 434100 }, { "epoch": 0.8769296654371215, "grad_norm": 293.6759033203125, "learning_rate": 5.197352430354669e-07, "loss": 19.1621, "step": 434110 }, { "epoch": 0.8769498660698053, "grad_norm": 180.9759979248047, "learning_rate": 5.19580287150005e-07, "loss": 25.1278, "step": 434120 }, { "epoch": 0.8769700667024891, "grad_norm": 772.21435546875, "learning_rate": 5.194253531015675e-07, "loss": 16.2817, "step": 434130 }, { "epoch": 0.8769902673351729, "grad_norm": 180.82733154296875, "learning_rate": 5.192704408909055e-07, "loss": 17.4808, "step": 434140 }, { "epoch": 0.8770104679678568, "grad_norm": 152.46719360351562, "learning_rate": 5.191155505187756e-07, "loss": 17.088, "step": 434150 }, { "epoch": 0.8770306686005406, "grad_norm": 28.198760986328125, "learning_rate": 5.189606819859344e-07, "loss": 10.0622, "step": 434160 }, { "epoch": 0.8770508692332244, "grad_norm": 316.7867431640625, "learning_rate": 5.188058352931352e-07, "loss": 20.1294, "step": 434170 }, { "epoch": 0.8770710698659082, "grad_norm": 655.7664794921875, "learning_rate": 5.186510104411319e-07, "loss": 20.0069, "step": 434180 }, { "epoch": 0.877091270498592, "grad_norm": 229.0889129638672, "learning_rate": 5.184962074306798e-07, "loss": 17.3417, "step": 434190 }, { "epoch": 0.8771114711312759, "grad_norm": 303.02496337890625, "learning_rate": 5.183414262625364e-07, "loss": 13.3623, "step": 434200 }, { "epoch": 0.8771316717639597, "grad_norm": 140.8537139892578, "learning_rate": 5.181866669374507e-07, "loss": 12.6561, "step": 434210 }, { "epoch": 0.8771518723966435, "grad_norm": 695.171875, "learning_rate": 5.180319294561797e-07, "loss": 21.0463, "step": 434220 }, { "epoch": 0.8771720730293273, "grad_norm": 147.87380981445312, "learning_rate": 5.178772138194782e-07, "loss": 15.1771, "step": 434230 }, { "epoch": 0.8771922736620111, "grad_norm": 681.9638061523438, "learning_rate": 5.177225200281e-07, "loss": 18.6806, "step": 434240 }, { "epoch": 0.877212474294695, "grad_norm": 274.4325256347656, "learning_rate": 5.175678480827972e-07, "loss": 15.7013, "step": 434250 }, { "epoch": 0.8772326749273788, "grad_norm": 392.75830078125, "learning_rate": 5.174131979843266e-07, "loss": 20.9433, "step": 434260 }, { "epoch": 0.8772528755600626, "grad_norm": 167.98194885253906, "learning_rate": 5.172585697334398e-07, "loss": 20.6973, "step": 434270 }, { "epoch": 0.8772730761927463, "grad_norm": 369.706298828125, "learning_rate": 5.171039633308905e-07, "loss": 14.5283, "step": 434280 }, { "epoch": 0.8772932768254301, "grad_norm": 217.7832794189453, "learning_rate": 5.169493787774338e-07, "loss": 14.155, "step": 434290 }, { "epoch": 0.8773134774581139, "grad_norm": 301.4135437011719, "learning_rate": 5.167948160738206e-07, "loss": 10.2434, "step": 434300 }, { "epoch": 0.8773336780907978, "grad_norm": 12.187145233154297, "learning_rate": 5.166402752208071e-07, "loss": 7.3482, "step": 434310 }, { "epoch": 0.8773538787234816, "grad_norm": 310.41082763671875, "learning_rate": 5.164857562191439e-07, "loss": 15.5349, "step": 434320 }, { "epoch": 0.8773740793561654, "grad_norm": 570.3519897460938, "learning_rate": 5.163312590695869e-07, "loss": 20.4465, "step": 434330 }, { "epoch": 0.8773942799888492, "grad_norm": 214.05459594726562, "learning_rate": 5.161767837728871e-07, "loss": 21.3943, "step": 434340 }, { "epoch": 0.877414480621533, "grad_norm": 7.265511989593506, "learning_rate": 5.160223303297967e-07, "loss": 12.4559, "step": 434350 }, { "epoch": 0.8774346812542169, "grad_norm": 231.39935302734375, "learning_rate": 5.15867898741071e-07, "loss": 17.0576, "step": 434360 }, { "epoch": 0.8774548818869007, "grad_norm": 164.5531463623047, "learning_rate": 5.1571348900746e-07, "loss": 17.2305, "step": 434370 }, { "epoch": 0.8774750825195845, "grad_norm": 292.3380432128906, "learning_rate": 5.155591011297184e-07, "loss": 28.4496, "step": 434380 }, { "epoch": 0.8774952831522683, "grad_norm": 108.02655029296875, "learning_rate": 5.154047351085983e-07, "loss": 12.7543, "step": 434390 }, { "epoch": 0.8775154837849521, "grad_norm": 410.6449279785156, "learning_rate": 5.152503909448503e-07, "loss": 19.7891, "step": 434400 }, { "epoch": 0.877535684417636, "grad_norm": 595.8047485351562, "learning_rate": 5.150960686392293e-07, "loss": 21.3929, "step": 434410 }, { "epoch": 0.8775558850503198, "grad_norm": 420.0340881347656, "learning_rate": 5.149417681924856e-07, "loss": 20.7275, "step": 434420 }, { "epoch": 0.8775760856830036, "grad_norm": 73.7215576171875, "learning_rate": 5.147874896053711e-07, "loss": 15.5586, "step": 434430 }, { "epoch": 0.8775962863156874, "grad_norm": 202.3787841796875, "learning_rate": 5.146332328786386e-07, "loss": 19.9977, "step": 434440 }, { "epoch": 0.8776164869483712, "grad_norm": 172.36013793945312, "learning_rate": 5.144789980130404e-07, "loss": 14.8833, "step": 434450 }, { "epoch": 0.8776366875810551, "grad_norm": 486.5126647949219, "learning_rate": 5.143247850093274e-07, "loss": 23.1488, "step": 434460 }, { "epoch": 0.8776568882137389, "grad_norm": 287.8289489746094, "learning_rate": 5.141705938682506e-07, "loss": 19.85, "step": 434470 }, { "epoch": 0.8776770888464227, "grad_norm": 246.57994079589844, "learning_rate": 5.140164245905633e-07, "loss": 14.6019, "step": 434480 }, { "epoch": 0.8776972894791065, "grad_norm": 321.09375, "learning_rate": 5.138622771770157e-07, "loss": 20.4411, "step": 434490 }, { "epoch": 0.8777174901117903, "grad_norm": 283.07440185546875, "learning_rate": 5.137081516283582e-07, "loss": 15.5579, "step": 434500 }, { "epoch": 0.8777376907444742, "grad_norm": 343.1944580078125, "learning_rate": 5.135540479453432e-07, "loss": 16.5614, "step": 434510 }, { "epoch": 0.877757891377158, "grad_norm": 273.73388671875, "learning_rate": 5.133999661287226e-07, "loss": 26.7135, "step": 434520 }, { "epoch": 0.8777780920098417, "grad_norm": 213.76499938964844, "learning_rate": 5.13245906179246e-07, "loss": 34.9487, "step": 434530 }, { "epoch": 0.8777982926425255, "grad_norm": 108.06747436523438, "learning_rate": 5.130918680976643e-07, "loss": 12.7697, "step": 434540 }, { "epoch": 0.8778184932752093, "grad_norm": 184.45631408691406, "learning_rate": 5.129378518847295e-07, "loss": 21.9418, "step": 434550 }, { "epoch": 0.8778386939078932, "grad_norm": 334.7638244628906, "learning_rate": 5.127838575411908e-07, "loss": 13.8198, "step": 434560 }, { "epoch": 0.877858894540577, "grad_norm": 224.79470825195312, "learning_rate": 5.126298850677991e-07, "loss": 13.9634, "step": 434570 }, { "epoch": 0.8778790951732608, "grad_norm": 226.68106079101562, "learning_rate": 5.124759344653057e-07, "loss": 8.0073, "step": 434580 }, { "epoch": 0.8778992958059446, "grad_norm": 336.3865966796875, "learning_rate": 5.123220057344597e-07, "loss": 21.1141, "step": 434590 }, { "epoch": 0.8779194964386284, "grad_norm": 348.7720947265625, "learning_rate": 5.121680988760125e-07, "loss": 19.3948, "step": 434600 }, { "epoch": 0.8779396970713123, "grad_norm": 139.48974609375, "learning_rate": 5.120142138907131e-07, "loss": 15.3754, "step": 434610 }, { "epoch": 0.8779598977039961, "grad_norm": 451.96209716796875, "learning_rate": 5.11860350779313e-07, "loss": 19.5513, "step": 434620 }, { "epoch": 0.8779800983366799, "grad_norm": 367.45257568359375, "learning_rate": 5.11706509542561e-07, "loss": 15.1161, "step": 434630 }, { "epoch": 0.8780002989693637, "grad_norm": 0.0, "learning_rate": 5.115526901812062e-07, "loss": 18.2227, "step": 434640 }, { "epoch": 0.8780204996020475, "grad_norm": 395.2568359375, "learning_rate": 5.113988926960001e-07, "loss": 16.2354, "step": 434650 }, { "epoch": 0.8780407002347314, "grad_norm": 362.00653076171875, "learning_rate": 5.112451170876903e-07, "loss": 18.4036, "step": 434660 }, { "epoch": 0.8780609008674152, "grad_norm": 210.4418487548828, "learning_rate": 5.110913633570286e-07, "loss": 29.6425, "step": 434670 }, { "epoch": 0.878081101500099, "grad_norm": 278.4564514160156, "learning_rate": 5.109376315047632e-07, "loss": 12.1836, "step": 434680 }, { "epoch": 0.8781013021327828, "grad_norm": 403.2332458496094, "learning_rate": 5.107839215316424e-07, "loss": 26.7823, "step": 434690 }, { "epoch": 0.8781215027654666, "grad_norm": 302.29144287109375, "learning_rate": 5.106302334384172e-07, "loss": 16.1126, "step": 434700 }, { "epoch": 0.8781417033981505, "grad_norm": 496.5931701660156, "learning_rate": 5.104765672258355e-07, "loss": 23.4361, "step": 434710 }, { "epoch": 0.8781619040308343, "grad_norm": 290.95648193359375, "learning_rate": 5.103229228946455e-07, "loss": 12.0244, "step": 434720 }, { "epoch": 0.8781821046635181, "grad_norm": 125.8919906616211, "learning_rate": 5.101693004455977e-07, "loss": 10.4043, "step": 434730 }, { "epoch": 0.8782023052962019, "grad_norm": 319.6761474609375, "learning_rate": 5.100156998794415e-07, "loss": 17.449, "step": 434740 }, { "epoch": 0.8782225059288857, "grad_norm": 253.58807373046875, "learning_rate": 5.098621211969224e-07, "loss": 19.8291, "step": 434750 }, { "epoch": 0.8782427065615696, "grad_norm": 12.38747787475586, "learning_rate": 5.09708564398791e-07, "loss": 12.9324, "step": 434760 }, { "epoch": 0.8782629071942534, "grad_norm": 227.8798065185547, "learning_rate": 5.095550294857959e-07, "loss": 13.6221, "step": 434770 }, { "epoch": 0.8782831078269372, "grad_norm": 225.2969970703125, "learning_rate": 5.094015164586852e-07, "loss": 15.3277, "step": 434780 }, { "epoch": 0.8783033084596209, "grad_norm": 224.01995849609375, "learning_rate": 5.092480253182058e-07, "loss": 21.3851, "step": 434790 }, { "epoch": 0.8783235090923047, "grad_norm": 125.3165512084961, "learning_rate": 5.090945560651073e-07, "loss": 16.2378, "step": 434800 }, { "epoch": 0.8783437097249885, "grad_norm": 240.43637084960938, "learning_rate": 5.08941108700139e-07, "loss": 8.1387, "step": 434810 }, { "epoch": 0.8783639103576724, "grad_norm": 198.0110626220703, "learning_rate": 5.087876832240446e-07, "loss": 22.9592, "step": 434820 }, { "epoch": 0.8783841109903562, "grad_norm": 131.37515258789062, "learning_rate": 5.086342796375749e-07, "loss": 6.9379, "step": 434830 }, { "epoch": 0.87840431162304, "grad_norm": 170.5103302001953, "learning_rate": 5.084808979414779e-07, "loss": 15.2344, "step": 434840 }, { "epoch": 0.8784245122557238, "grad_norm": 158.17288208007812, "learning_rate": 5.083275381364999e-07, "loss": 24.6042, "step": 434850 }, { "epoch": 0.8784447128884076, "grad_norm": 281.2638244628906, "learning_rate": 5.081742002233881e-07, "loss": 13.3936, "step": 434860 }, { "epoch": 0.8784649135210915, "grad_norm": 300.9057922363281, "learning_rate": 5.080208842028911e-07, "loss": 25.3561, "step": 434870 }, { "epoch": 0.8784851141537753, "grad_norm": 394.9960021972656, "learning_rate": 5.078675900757557e-07, "loss": 21.546, "step": 434880 }, { "epoch": 0.8785053147864591, "grad_norm": 309.2256774902344, "learning_rate": 5.07714317842728e-07, "loss": 12.8756, "step": 434890 }, { "epoch": 0.8785255154191429, "grad_norm": 348.014892578125, "learning_rate": 5.075610675045567e-07, "loss": 16.8524, "step": 434900 }, { "epoch": 0.8785457160518267, "grad_norm": 316.774169921875, "learning_rate": 5.074078390619869e-07, "loss": 15.8469, "step": 434910 }, { "epoch": 0.8785659166845106, "grad_norm": 376.3726806640625, "learning_rate": 5.072546325157673e-07, "loss": 12.6933, "step": 434920 }, { "epoch": 0.8785861173171944, "grad_norm": 225.87057495117188, "learning_rate": 5.071014478666425e-07, "loss": 15.8447, "step": 434930 }, { "epoch": 0.8786063179498782, "grad_norm": 293.9652404785156, "learning_rate": 5.069482851153618e-07, "loss": 14.8278, "step": 434940 }, { "epoch": 0.878626518582562, "grad_norm": 228.35572814941406, "learning_rate": 5.0679514426267e-07, "loss": 16.2477, "step": 434950 }, { "epoch": 0.8786467192152458, "grad_norm": 12.565250396728516, "learning_rate": 5.06642025309313e-07, "loss": 10.6839, "step": 434960 }, { "epoch": 0.8786669198479297, "grad_norm": 613.4114990234375, "learning_rate": 5.064889282560382e-07, "loss": 22.458, "step": 434970 }, { "epoch": 0.8786871204806135, "grad_norm": 112.433837890625, "learning_rate": 5.063358531035906e-07, "loss": 13.6767, "step": 434980 }, { "epoch": 0.8787073211132973, "grad_norm": 289.1630554199219, "learning_rate": 5.06182799852718e-07, "loss": 13.6845, "step": 434990 }, { "epoch": 0.8787275217459811, "grad_norm": 907.2689819335938, "learning_rate": 5.06029768504166e-07, "loss": 23.3365, "step": 435000 }, { "epoch": 0.8787477223786649, "grad_norm": 188.04006958007812, "learning_rate": 5.058767590586783e-07, "loss": 15.6127, "step": 435010 }, { "epoch": 0.8787679230113488, "grad_norm": 290.1544189453125, "learning_rate": 5.057237715170032e-07, "loss": 15.7476, "step": 435020 }, { "epoch": 0.8787881236440326, "grad_norm": 391.63677978515625, "learning_rate": 5.055708058798853e-07, "loss": 22.9555, "step": 435030 }, { "epoch": 0.8788083242767164, "grad_norm": 388.8981628417969, "learning_rate": 5.054178621480694e-07, "loss": 16.7432, "step": 435040 }, { "epoch": 0.8788285249094001, "grad_norm": 307.1207275390625, "learning_rate": 5.052649403223015e-07, "loss": 20.3882, "step": 435050 }, { "epoch": 0.8788487255420839, "grad_norm": 291.4189147949219, "learning_rate": 5.051120404033283e-07, "loss": 17.1048, "step": 435060 }, { "epoch": 0.8788689261747678, "grad_norm": 41.89212417602539, "learning_rate": 5.049591623918937e-07, "loss": 25.6163, "step": 435070 }, { "epoch": 0.8788891268074516, "grad_norm": 299.0412902832031, "learning_rate": 5.04806306288742e-07, "loss": 20.8267, "step": 435080 }, { "epoch": 0.8789093274401354, "grad_norm": 344.2184143066406, "learning_rate": 5.046534720946206e-07, "loss": 17.9315, "step": 435090 }, { "epoch": 0.8789295280728192, "grad_norm": 13.170239448547363, "learning_rate": 5.045006598102725e-07, "loss": 23.2014, "step": 435100 }, { "epoch": 0.878949728705503, "grad_norm": 234.60589599609375, "learning_rate": 5.043478694364423e-07, "loss": 15.3005, "step": 435110 }, { "epoch": 0.8789699293381869, "grad_norm": 296.8214416503906, "learning_rate": 5.04195100973875e-07, "loss": 15.956, "step": 435120 }, { "epoch": 0.8789901299708707, "grad_norm": 175.46246337890625, "learning_rate": 5.040423544233164e-07, "loss": 15.195, "step": 435130 }, { "epoch": 0.8790103306035545, "grad_norm": 334.2615661621094, "learning_rate": 5.0388962978551e-07, "loss": 8.3599, "step": 435140 }, { "epoch": 0.8790305312362383, "grad_norm": 187.0152130126953, "learning_rate": 5.037369270611997e-07, "loss": 15.5271, "step": 435150 }, { "epoch": 0.8790507318689221, "grad_norm": 99.10022735595703, "learning_rate": 5.035842462511309e-07, "loss": 31.9883, "step": 435160 }, { "epoch": 0.879070932501606, "grad_norm": 145.28305053710938, "learning_rate": 5.034315873560475e-07, "loss": 15.9212, "step": 435170 }, { "epoch": 0.8790911331342898, "grad_norm": 272.4645080566406, "learning_rate": 5.032789503766922e-07, "loss": 19.081, "step": 435180 }, { "epoch": 0.8791113337669736, "grad_norm": 275.4917907714844, "learning_rate": 5.031263353138105e-07, "loss": 24.3935, "step": 435190 }, { "epoch": 0.8791315343996574, "grad_norm": 224.77224731445312, "learning_rate": 5.029737421681446e-07, "loss": 26.4198, "step": 435200 }, { "epoch": 0.8791517350323412, "grad_norm": 267.1885070800781, "learning_rate": 5.028211709404407e-07, "loss": 24.4465, "step": 435210 }, { "epoch": 0.879171935665025, "grad_norm": 468.73748779296875, "learning_rate": 5.026686216314397e-07, "loss": 14.3564, "step": 435220 }, { "epoch": 0.8791921362977089, "grad_norm": 160.945556640625, "learning_rate": 5.025160942418872e-07, "loss": 16.11, "step": 435230 }, { "epoch": 0.8792123369303927, "grad_norm": 464.5129089355469, "learning_rate": 5.023635887725259e-07, "loss": 18.668, "step": 435240 }, { "epoch": 0.8792325375630765, "grad_norm": 263.7014465332031, "learning_rate": 5.022111052240985e-07, "loss": 19.4779, "step": 435250 }, { "epoch": 0.8792527381957603, "grad_norm": 104.21866607666016, "learning_rate": 5.020586435973491e-07, "loss": 20.5533, "step": 435260 }, { "epoch": 0.8792729388284442, "grad_norm": 420.8669738769531, "learning_rate": 5.019062038930195e-07, "loss": 21.5184, "step": 435270 }, { "epoch": 0.879293139461128, "grad_norm": 229.93014526367188, "learning_rate": 5.017537861118543e-07, "loss": 15.564, "step": 435280 }, { "epoch": 0.8793133400938118, "grad_norm": 79.67237854003906, "learning_rate": 5.016013902545957e-07, "loss": 13.9606, "step": 435290 }, { "epoch": 0.8793335407264955, "grad_norm": 224.75228881835938, "learning_rate": 5.014490163219854e-07, "loss": 18.3317, "step": 435300 }, { "epoch": 0.8793537413591793, "grad_norm": 92.64762115478516, "learning_rate": 5.012966643147682e-07, "loss": 10.4126, "step": 435310 }, { "epoch": 0.8793739419918631, "grad_norm": 328.7536926269531, "learning_rate": 5.011443342336852e-07, "loss": 9.2842, "step": 435320 }, { "epoch": 0.879394142624547, "grad_norm": 136.69464111328125, "learning_rate": 5.009920260794782e-07, "loss": 10.1657, "step": 435330 }, { "epoch": 0.8794143432572308, "grad_norm": 213.4883270263672, "learning_rate": 5.008397398528903e-07, "loss": 11.9219, "step": 435340 }, { "epoch": 0.8794345438899146, "grad_norm": 344.8109436035156, "learning_rate": 5.006874755546654e-07, "loss": 16.0257, "step": 435350 }, { "epoch": 0.8794547445225984, "grad_norm": 716.1160278320312, "learning_rate": 5.005352331855423e-07, "loss": 18.9226, "step": 435360 }, { "epoch": 0.8794749451552822, "grad_norm": 349.8487854003906, "learning_rate": 5.00383012746265e-07, "loss": 30.1648, "step": 435370 }, { "epoch": 0.8794951457879661, "grad_norm": 137.87156677246094, "learning_rate": 5.002308142375762e-07, "loss": 13.6947, "step": 435380 }, { "epoch": 0.8795153464206499, "grad_norm": 49.19867706298828, "learning_rate": 5.000786376602162e-07, "loss": 13.3448, "step": 435390 }, { "epoch": 0.8795355470533337, "grad_norm": 270.9266052246094, "learning_rate": 4.99926483014927e-07, "loss": 13.8228, "step": 435400 }, { "epoch": 0.8795557476860175, "grad_norm": 215.8948516845703, "learning_rate": 4.997743503024494e-07, "loss": 18.7824, "step": 435410 }, { "epoch": 0.8795759483187013, "grad_norm": 124.60919952392578, "learning_rate": 4.996222395235283e-07, "loss": 22.5475, "step": 435420 }, { "epoch": 0.8795961489513852, "grad_norm": 195.76988220214844, "learning_rate": 4.994701506789007e-07, "loss": 16.7493, "step": 435430 }, { "epoch": 0.879616349584069, "grad_norm": 357.6434020996094, "learning_rate": 4.99318083769309e-07, "loss": 13.8251, "step": 435440 }, { "epoch": 0.8796365502167528, "grad_norm": 467.0783386230469, "learning_rate": 4.991660387954967e-07, "loss": 21.4056, "step": 435450 }, { "epoch": 0.8796567508494366, "grad_norm": 240.75360107421875, "learning_rate": 4.990140157582036e-07, "loss": 13.7297, "step": 435460 }, { "epoch": 0.8796769514821204, "grad_norm": 85.39241027832031, "learning_rate": 4.988620146581685e-07, "loss": 15.4104, "step": 435470 }, { "epoch": 0.8796971521148043, "grad_norm": 165.2989044189453, "learning_rate": 4.987100354961355e-07, "loss": 15.2054, "step": 435480 }, { "epoch": 0.8797173527474881, "grad_norm": 404.2278137207031, "learning_rate": 4.985580782728433e-07, "loss": 18.9878, "step": 435490 }, { "epoch": 0.8797375533801719, "grad_norm": 328.2271423339844, "learning_rate": 4.984061429890324e-07, "loss": 13.1969, "step": 435500 }, { "epoch": 0.8797577540128557, "grad_norm": 207.3946075439453, "learning_rate": 4.98254229645444e-07, "loss": 23.9554, "step": 435510 }, { "epoch": 0.8797779546455395, "grad_norm": 303.2662048339844, "learning_rate": 4.981023382428196e-07, "loss": 26.0353, "step": 435520 }, { "epoch": 0.8797981552782234, "grad_norm": 275.2373962402344, "learning_rate": 4.979504687818987e-07, "loss": 21.5372, "step": 435530 }, { "epoch": 0.8798183559109072, "grad_norm": 281.1122131347656, "learning_rate": 4.977986212634195e-07, "loss": 19.5761, "step": 435540 }, { "epoch": 0.879838556543591, "grad_norm": 407.7804260253906, "learning_rate": 4.976467956881254e-07, "loss": 19.3831, "step": 435550 }, { "epoch": 0.8798587571762747, "grad_norm": 180.14730834960938, "learning_rate": 4.97494992056754e-07, "loss": 18.2869, "step": 435560 }, { "epoch": 0.8798789578089585, "grad_norm": 247.82423400878906, "learning_rate": 4.973432103700454e-07, "loss": 26.2631, "step": 435570 }, { "epoch": 0.8798991584416423, "grad_norm": 193.4473419189453, "learning_rate": 4.971914506287407e-07, "loss": 22.6933, "step": 435580 }, { "epoch": 0.8799193590743262, "grad_norm": 227.37025451660156, "learning_rate": 4.97039712833578e-07, "loss": 29.4937, "step": 435590 }, { "epoch": 0.87993955970701, "grad_norm": 331.55841064453125, "learning_rate": 4.968879969852985e-07, "loss": 20.8808, "step": 435600 }, { "epoch": 0.8799597603396938, "grad_norm": 24.882539749145508, "learning_rate": 4.967363030846406e-07, "loss": 21.8269, "step": 435610 }, { "epoch": 0.8799799609723776, "grad_norm": 351.8355712890625, "learning_rate": 4.965846311323431e-07, "loss": 24.9885, "step": 435620 }, { "epoch": 0.8800001616050614, "grad_norm": 178.48428344726562, "learning_rate": 4.964329811291463e-07, "loss": 10.7361, "step": 435630 }, { "epoch": 0.8800203622377453, "grad_norm": 39.56497573852539, "learning_rate": 4.962813530757893e-07, "loss": 15.5416, "step": 435640 }, { "epoch": 0.8800405628704291, "grad_norm": 33.96110916137695, "learning_rate": 4.961297469730097e-07, "loss": 14.3991, "step": 435650 }, { "epoch": 0.8800607635031129, "grad_norm": 327.0130310058594, "learning_rate": 4.959781628215476e-07, "loss": 18.6497, "step": 435660 }, { "epoch": 0.8800809641357967, "grad_norm": 71.4016342163086, "learning_rate": 4.95826600622143e-07, "loss": 16.6944, "step": 435670 }, { "epoch": 0.8801011647684805, "grad_norm": 6.107741355895996, "learning_rate": 4.956750603755328e-07, "loss": 13.4702, "step": 435680 }, { "epoch": 0.8801213654011644, "grad_norm": 260.7806701660156, "learning_rate": 4.95523542082455e-07, "loss": 15.5015, "step": 435690 }, { "epoch": 0.8801415660338482, "grad_norm": 563.1996459960938, "learning_rate": 4.9537204574365e-07, "loss": 25.0301, "step": 435700 }, { "epoch": 0.880161766666532, "grad_norm": 576.7715454101562, "learning_rate": 4.952205713598557e-07, "loss": 22.6019, "step": 435710 }, { "epoch": 0.8801819672992158, "grad_norm": 203.0591278076172, "learning_rate": 4.950691189318086e-07, "loss": 12.8879, "step": 435720 }, { "epoch": 0.8802021679318996, "grad_norm": 191.82142639160156, "learning_rate": 4.949176884602486e-07, "loss": 40.915, "step": 435730 }, { "epoch": 0.8802223685645835, "grad_norm": 185.26254272460938, "learning_rate": 4.947662799459152e-07, "loss": 18.2691, "step": 435740 }, { "epoch": 0.8802425691972673, "grad_norm": 203.30308532714844, "learning_rate": 4.946148933895423e-07, "loss": 23.5285, "step": 435750 }, { "epoch": 0.8802627698299511, "grad_norm": 70.34530639648438, "learning_rate": 4.944635287918703e-07, "loss": 9.6555, "step": 435760 }, { "epoch": 0.8802829704626349, "grad_norm": 172.27745056152344, "learning_rate": 4.943121861536376e-07, "loss": 19.9147, "step": 435770 }, { "epoch": 0.8803031710953187, "grad_norm": 197.68466186523438, "learning_rate": 4.941608654755808e-07, "loss": 10.9635, "step": 435780 }, { "epoch": 0.8803233717280026, "grad_norm": 509.8621826171875, "learning_rate": 4.940095667584366e-07, "loss": 21.1964, "step": 435790 }, { "epoch": 0.8803435723606864, "grad_norm": 197.72409057617188, "learning_rate": 4.938582900029437e-07, "loss": 20.0291, "step": 435800 }, { "epoch": 0.8803637729933701, "grad_norm": 461.6060485839844, "learning_rate": 4.937070352098384e-07, "loss": 23.1458, "step": 435810 }, { "epoch": 0.8803839736260539, "grad_norm": 458.0393981933594, "learning_rate": 4.935558023798592e-07, "loss": 18.0468, "step": 435820 }, { "epoch": 0.8804041742587377, "grad_norm": 129.13357543945312, "learning_rate": 4.934045915137419e-07, "loss": 8.1902, "step": 435830 }, { "epoch": 0.8804243748914216, "grad_norm": 549.9965209960938, "learning_rate": 4.932534026122249e-07, "loss": 11.7202, "step": 435840 }, { "epoch": 0.8804445755241054, "grad_norm": 248.71759033203125, "learning_rate": 4.931022356760439e-07, "loss": 24.2159, "step": 435850 }, { "epoch": 0.8804647761567892, "grad_norm": 260.09674072265625, "learning_rate": 4.929510907059354e-07, "loss": 19.5019, "step": 435860 }, { "epoch": 0.880484976789473, "grad_norm": 16.353187561035156, "learning_rate": 4.927999677026374e-07, "loss": 14.9975, "step": 435870 }, { "epoch": 0.8805051774221568, "grad_norm": 2.6839330196380615, "learning_rate": 4.926488666668844e-07, "loss": 12.1912, "step": 435880 }, { "epoch": 0.8805253780548407, "grad_norm": 370.10015869140625, "learning_rate": 4.924977875994159e-07, "loss": 17.5826, "step": 435890 }, { "epoch": 0.8805455786875245, "grad_norm": 219.7120819091797, "learning_rate": 4.92346730500966e-07, "loss": 8.7014, "step": 435900 }, { "epoch": 0.8805657793202083, "grad_norm": 147.40560913085938, "learning_rate": 4.921956953722701e-07, "loss": 10.3238, "step": 435910 }, { "epoch": 0.8805859799528921, "grad_norm": 225.93197631835938, "learning_rate": 4.920446822140673e-07, "loss": 10.7507, "step": 435920 }, { "epoch": 0.8806061805855759, "grad_norm": 371.28826904296875, "learning_rate": 4.918936910270916e-07, "loss": 11.0804, "step": 435930 }, { "epoch": 0.8806263812182598, "grad_norm": 154.0360565185547, "learning_rate": 4.917427218120785e-07, "loss": 22.1393, "step": 435940 }, { "epoch": 0.8806465818509436, "grad_norm": 220.62680053710938, "learning_rate": 4.915917745697645e-07, "loss": 21.1326, "step": 435950 }, { "epoch": 0.8806667824836274, "grad_norm": 273.08770751953125, "learning_rate": 4.914408493008871e-07, "loss": 26.6632, "step": 435960 }, { "epoch": 0.8806869831163112, "grad_norm": 73.09130859375, "learning_rate": 4.912899460061787e-07, "loss": 13.6529, "step": 435970 }, { "epoch": 0.880707183748995, "grad_norm": 258.6308288574219, "learning_rate": 4.911390646863757e-07, "loss": 11.7623, "step": 435980 }, { "epoch": 0.8807273843816789, "grad_norm": 500.3720703125, "learning_rate": 4.909882053422154e-07, "loss": 17.3371, "step": 435990 }, { "epoch": 0.8807475850143627, "grad_norm": 126.74539184570312, "learning_rate": 4.908373679744316e-07, "loss": 17.5183, "step": 436000 }, { "epoch": 0.8807677856470465, "grad_norm": 88.03468322753906, "learning_rate": 4.90686552583759e-07, "loss": 16.3623, "step": 436010 }, { "epoch": 0.8807879862797303, "grad_norm": 169.06326293945312, "learning_rate": 4.905357591709325e-07, "loss": 43.2152, "step": 436020 }, { "epoch": 0.8808081869124141, "grad_norm": 191.628662109375, "learning_rate": 4.9038498773669e-07, "loss": 16.7811, "step": 436030 }, { "epoch": 0.880828387545098, "grad_norm": 302.7685546875, "learning_rate": 4.902342382817626e-07, "loss": 33.722, "step": 436040 }, { "epoch": 0.8808485881777818, "grad_norm": 218.401611328125, "learning_rate": 4.900835108068863e-07, "loss": 9.8418, "step": 436050 }, { "epoch": 0.8808687888104656, "grad_norm": 253.24652099609375, "learning_rate": 4.899328053127966e-07, "loss": 20.1009, "step": 436060 }, { "epoch": 0.8808889894431493, "grad_norm": 271.9893493652344, "learning_rate": 4.89782121800228e-07, "loss": 14.6151, "step": 436070 }, { "epoch": 0.8809091900758331, "grad_norm": 162.2383575439453, "learning_rate": 4.896314602699126e-07, "loss": 15.5056, "step": 436080 }, { "epoch": 0.880929390708517, "grad_norm": 372.5563659667969, "learning_rate": 4.894808207225882e-07, "loss": 22.343, "step": 436090 }, { "epoch": 0.8809495913412008, "grad_norm": 243.0578155517578, "learning_rate": 4.893302031589864e-07, "loss": 14.2327, "step": 436100 }, { "epoch": 0.8809697919738846, "grad_norm": 303.23931884765625, "learning_rate": 4.891796075798416e-07, "loss": 41.0122, "step": 436110 }, { "epoch": 0.8809899926065684, "grad_norm": 368.97467041015625, "learning_rate": 4.890290339858883e-07, "loss": 14.1473, "step": 436120 }, { "epoch": 0.8810101932392522, "grad_norm": 220.75567626953125, "learning_rate": 4.888784823778614e-07, "loss": 14.5418, "step": 436130 }, { "epoch": 0.881030393871936, "grad_norm": 265.8623352050781, "learning_rate": 4.887279527564936e-07, "loss": 8.7599, "step": 436140 }, { "epoch": 0.8810505945046199, "grad_norm": 377.706787109375, "learning_rate": 4.885774451225178e-07, "loss": 17.3379, "step": 436150 }, { "epoch": 0.8810707951373037, "grad_norm": 299.96881103515625, "learning_rate": 4.884269594766689e-07, "loss": 17.8336, "step": 436160 }, { "epoch": 0.8810909957699875, "grad_norm": 175.4420928955078, "learning_rate": 4.8827649581968e-07, "loss": 18.5962, "step": 436170 }, { "epoch": 0.8811111964026713, "grad_norm": 351.2290344238281, "learning_rate": 4.881260541522831e-07, "loss": 27.2167, "step": 436180 }, { "epoch": 0.8811313970353551, "grad_norm": 373.7426452636719, "learning_rate": 4.87975634475214e-07, "loss": 22.5026, "step": 436190 }, { "epoch": 0.881151597668039, "grad_norm": 329.24462890625, "learning_rate": 4.878252367892033e-07, "loss": 16.8049, "step": 436200 }, { "epoch": 0.8811717983007228, "grad_norm": 270.5513610839844, "learning_rate": 4.87674861094986e-07, "loss": 11.1077, "step": 436210 }, { "epoch": 0.8811919989334066, "grad_norm": 193.58004760742188, "learning_rate": 4.875245073932944e-07, "loss": 10.6014, "step": 436220 }, { "epoch": 0.8812121995660904, "grad_norm": 215.2318115234375, "learning_rate": 4.873741756848594e-07, "loss": 22.3199, "step": 436230 }, { "epoch": 0.8812324001987742, "grad_norm": 210.8828125, "learning_rate": 4.87223865970417e-07, "loss": 12.7562, "step": 436240 }, { "epoch": 0.8812526008314581, "grad_norm": 309.1809997558594, "learning_rate": 4.87073578250698e-07, "loss": 12.5274, "step": 436250 }, { "epoch": 0.8812728014641419, "grad_norm": 217.73388671875, "learning_rate": 4.869233125264339e-07, "loss": 9.0682, "step": 436260 }, { "epoch": 0.8812930020968257, "grad_norm": 276.2313537597656, "learning_rate": 4.867730687983585e-07, "loss": 11.0931, "step": 436270 }, { "epoch": 0.8813132027295095, "grad_norm": 293.5137023925781, "learning_rate": 4.866228470672041e-07, "loss": 11.4537, "step": 436280 }, { "epoch": 0.8813334033621933, "grad_norm": 161.2014923095703, "learning_rate": 4.864726473337034e-07, "loss": 20.0816, "step": 436290 }, { "epoch": 0.8813536039948772, "grad_norm": 303.9931640625, "learning_rate": 4.863224695985858e-07, "loss": 18.5378, "step": 436300 }, { "epoch": 0.881373804627561, "grad_norm": 268.39166259765625, "learning_rate": 4.861723138625862e-07, "loss": 16.5742, "step": 436310 }, { "epoch": 0.8813940052602448, "grad_norm": 335.0809631347656, "learning_rate": 4.860221801264358e-07, "loss": 17.8246, "step": 436320 }, { "epoch": 0.8814142058929285, "grad_norm": 234.76483154296875, "learning_rate": 4.858720683908646e-07, "loss": 21.3723, "step": 436330 }, { "epoch": 0.8814344065256123, "grad_norm": 373.08587646484375, "learning_rate": 4.857219786566053e-07, "loss": 22.7518, "step": 436340 }, { "epoch": 0.8814546071582962, "grad_norm": 366.5385437011719, "learning_rate": 4.855719109243917e-07, "loss": 22.0361, "step": 436350 }, { "epoch": 0.88147480779098, "grad_norm": 180.44496154785156, "learning_rate": 4.85421865194951e-07, "loss": 26.9665, "step": 436360 }, { "epoch": 0.8814950084236638, "grad_norm": 551.3493041992188, "learning_rate": 4.852718414690166e-07, "loss": 12.4436, "step": 436370 }, { "epoch": 0.8815152090563476, "grad_norm": 233.517578125, "learning_rate": 4.851218397473206e-07, "loss": 34.3805, "step": 436380 }, { "epoch": 0.8815354096890314, "grad_norm": 461.6867980957031, "learning_rate": 4.84971860030593e-07, "loss": 23.0374, "step": 436390 }, { "epoch": 0.8815556103217153, "grad_norm": 128.4858856201172, "learning_rate": 4.848219023195644e-07, "loss": 9.9658, "step": 436400 }, { "epoch": 0.8815758109543991, "grad_norm": 190.66598510742188, "learning_rate": 4.846719666149668e-07, "loss": 15.6482, "step": 436410 }, { "epoch": 0.8815960115870829, "grad_norm": 330.7430725097656, "learning_rate": 4.845220529175304e-07, "loss": 16.6642, "step": 436420 }, { "epoch": 0.8816162122197667, "grad_norm": 388.33587646484375, "learning_rate": 4.84372161227985e-07, "loss": 13.6561, "step": 436430 }, { "epoch": 0.8816364128524505, "grad_norm": 1.1295737028121948, "learning_rate": 4.842222915470618e-07, "loss": 12.1377, "step": 436440 }, { "epoch": 0.8816566134851344, "grad_norm": 228.8750457763672, "learning_rate": 4.840724438754929e-07, "loss": 21.4074, "step": 436450 }, { "epoch": 0.8816768141178182, "grad_norm": 345.87420654296875, "learning_rate": 4.839226182140072e-07, "loss": 14.3642, "step": 436460 }, { "epoch": 0.881697014750502, "grad_norm": 318.98095703125, "learning_rate": 4.837728145633335e-07, "loss": 20.8921, "step": 436470 }, { "epoch": 0.8817172153831858, "grad_norm": 273.87939453125, "learning_rate": 4.836230329242042e-07, "loss": 21.3956, "step": 436480 }, { "epoch": 0.8817374160158696, "grad_norm": 107.8420181274414, "learning_rate": 4.83473273297348e-07, "loss": 18.0669, "step": 436490 }, { "epoch": 0.8817576166485535, "grad_norm": 210.99746704101562, "learning_rate": 4.833235356834959e-07, "loss": 16.3307, "step": 436500 }, { "epoch": 0.8817778172812373, "grad_norm": 315.83880615234375, "learning_rate": 4.831738200833775e-07, "loss": 14.4722, "step": 436510 }, { "epoch": 0.8817980179139211, "grad_norm": 388.5218505859375, "learning_rate": 4.830241264977209e-07, "loss": 19.1907, "step": 436520 }, { "epoch": 0.8818182185466049, "grad_norm": 235.53318786621094, "learning_rate": 4.828744549272579e-07, "loss": 21.654, "step": 436530 }, { "epoch": 0.8818384191792887, "grad_norm": 498.8838806152344, "learning_rate": 4.827248053727168e-07, "loss": 18.7685, "step": 436540 }, { "epoch": 0.8818586198119726, "grad_norm": 276.6015319824219, "learning_rate": 4.825751778348259e-07, "loss": 16.0047, "step": 436550 }, { "epoch": 0.8818788204446564, "grad_norm": 286.6764221191406, "learning_rate": 4.824255723143162e-07, "loss": 18.0164, "step": 436560 }, { "epoch": 0.8818990210773402, "grad_norm": 358.30426025390625, "learning_rate": 4.822759888119171e-07, "loss": 14.5864, "step": 436570 }, { "epoch": 0.8819192217100239, "grad_norm": 66.34335327148438, "learning_rate": 4.821264273283566e-07, "loss": 10.056, "step": 436580 }, { "epoch": 0.8819394223427077, "grad_norm": 0.0, "learning_rate": 4.819768878643633e-07, "loss": 19.3382, "step": 436590 }, { "epoch": 0.8819596229753915, "grad_norm": 172.5146484375, "learning_rate": 4.818273704206678e-07, "loss": 11.7529, "step": 436600 }, { "epoch": 0.8819798236080754, "grad_norm": 97.26643371582031, "learning_rate": 4.816778749979973e-07, "loss": 17.0861, "step": 436610 }, { "epoch": 0.8820000242407592, "grad_norm": 45.69021987915039, "learning_rate": 4.815284015970801e-07, "loss": 10.7153, "step": 436620 }, { "epoch": 0.882020224873443, "grad_norm": 490.1202697753906, "learning_rate": 4.813789502186456e-07, "loss": 12.2839, "step": 436630 }, { "epoch": 0.8820404255061268, "grad_norm": 163.06820678710938, "learning_rate": 4.812295208634238e-07, "loss": 19.1205, "step": 436640 }, { "epoch": 0.8820606261388106, "grad_norm": 229.81317138671875, "learning_rate": 4.810801135321391e-07, "loss": 26.0161, "step": 436650 }, { "epoch": 0.8820808267714945, "grad_norm": 256.9088439941406, "learning_rate": 4.809307282255221e-07, "loss": 12.7764, "step": 436660 }, { "epoch": 0.8821010274041783, "grad_norm": 494.6328125, "learning_rate": 4.807813649443016e-07, "loss": 24.5318, "step": 436670 }, { "epoch": 0.8821212280368621, "grad_norm": 661.3333740234375, "learning_rate": 4.806320236892048e-07, "loss": 24.8753, "step": 436680 }, { "epoch": 0.8821414286695459, "grad_norm": 251.65919494628906, "learning_rate": 4.804827044609578e-07, "loss": 16.3106, "step": 436690 }, { "epoch": 0.8821616293022297, "grad_norm": 275.04315185546875, "learning_rate": 4.803334072602917e-07, "loss": 14.7408, "step": 436700 }, { "epoch": 0.8821818299349136, "grad_norm": 592.0226440429688, "learning_rate": 4.801841320879319e-07, "loss": 21.9463, "step": 436710 }, { "epoch": 0.8822020305675974, "grad_norm": 362.9889831542969, "learning_rate": 4.800348789446058e-07, "loss": 15.9184, "step": 436720 }, { "epoch": 0.8822222312002812, "grad_norm": 105.56087493896484, "learning_rate": 4.798856478310409e-07, "loss": 18.2868, "step": 436730 }, { "epoch": 0.882242431832965, "grad_norm": 428.64385986328125, "learning_rate": 4.797364387479664e-07, "loss": 25.9225, "step": 436740 }, { "epoch": 0.8822626324656488, "grad_norm": 281.4597473144531, "learning_rate": 4.795872516961087e-07, "loss": 12.4037, "step": 436750 }, { "epoch": 0.8822828330983327, "grad_norm": 430.5715026855469, "learning_rate": 4.794380866761928e-07, "loss": 13.3098, "step": 436760 }, { "epoch": 0.8823030337310165, "grad_norm": 285.2254638671875, "learning_rate": 4.792889436889487e-07, "loss": 7.0888, "step": 436770 }, { "epoch": 0.8823232343637003, "grad_norm": 167.1251678466797, "learning_rate": 4.791398227351024e-07, "loss": 10.9766, "step": 436780 }, { "epoch": 0.8823434349963841, "grad_norm": 247.575439453125, "learning_rate": 4.789907238153785e-07, "loss": 21.4148, "step": 436790 }, { "epoch": 0.8823636356290679, "grad_norm": 445.097412109375, "learning_rate": 4.788416469305068e-07, "loss": 22.3645, "step": 436800 }, { "epoch": 0.8823838362617518, "grad_norm": 122.95352172851562, "learning_rate": 4.786925920812119e-07, "loss": 12.1629, "step": 436810 }, { "epoch": 0.8824040368944356, "grad_norm": 658.0155029296875, "learning_rate": 4.78543559268222e-07, "loss": 23.374, "step": 436820 }, { "epoch": 0.8824242375271194, "grad_norm": 192.72930908203125, "learning_rate": 4.78394548492262e-07, "loss": 18.5223, "step": 436830 }, { "epoch": 0.8824444381598031, "grad_norm": 466.5446472167969, "learning_rate": 4.782455597540576e-07, "loss": 30.9484, "step": 436840 }, { "epoch": 0.8824646387924869, "grad_norm": 350.6367492675781, "learning_rate": 4.780965930543369e-07, "loss": 13.1498, "step": 436850 }, { "epoch": 0.8824848394251708, "grad_norm": 283.62823486328125, "learning_rate": 4.779476483938251e-07, "loss": 13.2417, "step": 436860 }, { "epoch": 0.8825050400578546, "grad_norm": 564.380126953125, "learning_rate": 4.777987257732469e-07, "loss": 20.0031, "step": 436870 }, { "epoch": 0.8825252406905384, "grad_norm": 112.91577911376953, "learning_rate": 4.776498251933292e-07, "loss": 19.3378, "step": 436880 }, { "epoch": 0.8825454413232222, "grad_norm": 480.53521728515625, "learning_rate": 4.775009466547986e-07, "loss": 21.5935, "step": 436890 }, { "epoch": 0.882565641955906, "grad_norm": 91.14021301269531, "learning_rate": 4.773520901583801e-07, "loss": 19.3872, "step": 436900 }, { "epoch": 0.8825858425885899, "grad_norm": 446.8163757324219, "learning_rate": 4.772032557047984e-07, "loss": 23.1284, "step": 436910 }, { "epoch": 0.8826060432212737, "grad_norm": 435.9635314941406, "learning_rate": 4.770544432947799e-07, "loss": 15.7814, "step": 436920 }, { "epoch": 0.8826262438539575, "grad_norm": 419.1697692871094, "learning_rate": 4.769056529290495e-07, "loss": 16.4134, "step": 436930 }, { "epoch": 0.8826464444866413, "grad_norm": 353.28460693359375, "learning_rate": 4.7675688460833145e-07, "loss": 21.9125, "step": 436940 }, { "epoch": 0.8826666451193251, "grad_norm": 260.09649658203125, "learning_rate": 4.766081383333521e-07, "loss": 28.3239, "step": 436950 }, { "epoch": 0.882686845752009, "grad_norm": 375.9036560058594, "learning_rate": 4.7645941410483733e-07, "loss": 17.1259, "step": 436960 }, { "epoch": 0.8827070463846928, "grad_norm": 186.19960021972656, "learning_rate": 4.7631071192350943e-07, "loss": 19.0688, "step": 436970 }, { "epoch": 0.8827272470173766, "grad_norm": 89.21056365966797, "learning_rate": 4.7616203179009445e-07, "loss": 21.1033, "step": 436980 }, { "epoch": 0.8827474476500604, "grad_norm": 96.50436401367188, "learning_rate": 4.760133737053174e-07, "loss": 8.5029, "step": 436990 }, { "epoch": 0.8827676482827442, "grad_norm": 78.41710662841797, "learning_rate": 4.758647376699033e-07, "loss": 14.3095, "step": 437000 }, { "epoch": 0.882787848915428, "grad_norm": 168.3367919921875, "learning_rate": 4.757161236845742e-07, "loss": 13.7055, "step": 437010 }, { "epoch": 0.8828080495481119, "grad_norm": 1005.8809814453125, "learning_rate": 4.755675317500569e-07, "loss": 31.0843, "step": 437020 }, { "epoch": 0.8828282501807957, "grad_norm": 252.2699432373047, "learning_rate": 4.7541896186707517e-07, "loss": 19.0612, "step": 437030 }, { "epoch": 0.8828484508134795, "grad_norm": 394.3023986816406, "learning_rate": 4.752704140363512e-07, "loss": 25.8683, "step": 437040 }, { "epoch": 0.8828686514461633, "grad_norm": 297.669921875, "learning_rate": 4.751218882586106e-07, "loss": 10.4874, "step": 437050 }, { "epoch": 0.8828888520788472, "grad_norm": 200.75941467285156, "learning_rate": 4.749733845345783e-07, "loss": 17.5828, "step": 437060 }, { "epoch": 0.882909052711531, "grad_norm": 963.4557495117188, "learning_rate": 4.748249028649765e-07, "loss": 22.735, "step": 437070 }, { "epoch": 0.8829292533442148, "grad_norm": 240.87510681152344, "learning_rate": 4.7467644325052855e-07, "loss": 16.0939, "step": 437080 }, { "epoch": 0.8829494539768985, "grad_norm": 223.8863525390625, "learning_rate": 4.7452800569195987e-07, "loss": 13.908, "step": 437090 }, { "epoch": 0.8829696546095823, "grad_norm": 433.0531005859375, "learning_rate": 4.743795901899928e-07, "loss": 20.2243, "step": 437100 }, { "epoch": 0.8829898552422661, "grad_norm": 233.21742248535156, "learning_rate": 4.742311967453495e-07, "loss": 14.2396, "step": 437110 }, { "epoch": 0.88301005587495, "grad_norm": 329.5322570800781, "learning_rate": 4.7408282535875593e-07, "loss": 21.1513, "step": 437120 }, { "epoch": 0.8830302565076338, "grad_norm": 52.65104293823242, "learning_rate": 4.739344760309322e-07, "loss": 14.8902, "step": 437130 }, { "epoch": 0.8830504571403176, "grad_norm": 41.04833984375, "learning_rate": 4.737861487626039e-07, "loss": 11.3489, "step": 437140 }, { "epoch": 0.8830706577730014, "grad_norm": 256.1357421875, "learning_rate": 4.7363784355449303e-07, "loss": 19.813, "step": 437150 }, { "epoch": 0.8830908584056852, "grad_norm": 418.2151184082031, "learning_rate": 4.734895604073214e-07, "loss": 14.3052, "step": 437160 }, { "epoch": 0.8831110590383691, "grad_norm": 422.72125244140625, "learning_rate": 4.7334129932181283e-07, "loss": 31.8319, "step": 437170 }, { "epoch": 0.8831312596710529, "grad_norm": 189.4805145263672, "learning_rate": 4.731930602986906e-07, "loss": 10.4662, "step": 437180 }, { "epoch": 0.8831514603037367, "grad_norm": 305.8288879394531, "learning_rate": 4.730448433386764e-07, "loss": 14.6863, "step": 437190 }, { "epoch": 0.8831716609364205, "grad_norm": 363.3165283203125, "learning_rate": 4.728966484424913e-07, "loss": 27.3676, "step": 437200 }, { "epoch": 0.8831918615691043, "grad_norm": 323.6328125, "learning_rate": 4.727484756108602e-07, "loss": 15.2025, "step": 437210 }, { "epoch": 0.8832120622017882, "grad_norm": 195.5129852294922, "learning_rate": 4.726003248445038e-07, "loss": 17.5062, "step": 437220 }, { "epoch": 0.883232262834472, "grad_norm": 144.901611328125, "learning_rate": 4.724521961441436e-07, "loss": 16.6498, "step": 437230 }, { "epoch": 0.8832524634671558, "grad_norm": 9.249590873718262, "learning_rate": 4.723040895105019e-07, "loss": 19.7237, "step": 437240 }, { "epoch": 0.8832726640998396, "grad_norm": 296.315185546875, "learning_rate": 4.72156004944303e-07, "loss": 14.0651, "step": 437250 }, { "epoch": 0.8832928647325234, "grad_norm": 241.59246826171875, "learning_rate": 4.720079424462648e-07, "loss": 18.9439, "step": 437260 }, { "epoch": 0.8833130653652073, "grad_norm": 283.0418701171875, "learning_rate": 4.718599020171105e-07, "loss": 17.934, "step": 437270 }, { "epoch": 0.8833332659978911, "grad_norm": 101.30632781982422, "learning_rate": 4.7171188365756235e-07, "loss": 19.9709, "step": 437280 }, { "epoch": 0.8833534666305749, "grad_norm": 0.0, "learning_rate": 4.71563887368342e-07, "loss": 9.3875, "step": 437290 }, { "epoch": 0.8833736672632587, "grad_norm": 20.22136688232422, "learning_rate": 4.714159131501689e-07, "loss": 7.5542, "step": 437300 }, { "epoch": 0.8833938678959425, "grad_norm": 229.6137237548828, "learning_rate": 4.7126796100376625e-07, "loss": 15.3226, "step": 437310 }, { "epoch": 0.8834140685286264, "grad_norm": 3.1302549839019775, "learning_rate": 4.7112003092985414e-07, "loss": 14.4668, "step": 437320 }, { "epoch": 0.8834342691613102, "grad_norm": 221.1377716064453, "learning_rate": 4.7097212292915307e-07, "loss": 16.8592, "step": 437330 }, { "epoch": 0.883454469793994, "grad_norm": 258.5196533203125, "learning_rate": 4.7082423700238413e-07, "loss": 13.0835, "step": 437340 }, { "epoch": 0.8834746704266777, "grad_norm": 318.5444030761719, "learning_rate": 4.7067637315027005e-07, "loss": 12.993, "step": 437350 }, { "epoch": 0.8834948710593615, "grad_norm": 204.92083740234375, "learning_rate": 4.705285313735297e-07, "loss": 9.7958, "step": 437360 }, { "epoch": 0.8835150716920454, "grad_norm": 429.11932373046875, "learning_rate": 4.703807116728831e-07, "loss": 14.774, "step": 437370 }, { "epoch": 0.8835352723247292, "grad_norm": 182.92518615722656, "learning_rate": 4.7023291404905245e-07, "loss": 12.807, "step": 437380 }, { "epoch": 0.883555472957413, "grad_norm": 0.0, "learning_rate": 4.700851385027566e-07, "loss": 15.9494, "step": 437390 }, { "epoch": 0.8835756735900968, "grad_norm": 277.5395812988281, "learning_rate": 4.699373850347161e-07, "loss": 17.1926, "step": 437400 }, { "epoch": 0.8835958742227806, "grad_norm": 423.8408508300781, "learning_rate": 4.69789653645652e-07, "loss": 20.0163, "step": 437410 }, { "epoch": 0.8836160748554645, "grad_norm": 439.76751708984375, "learning_rate": 4.6964194433628317e-07, "loss": 11.1602, "step": 437420 }, { "epoch": 0.8836362754881483, "grad_norm": 369.44580078125, "learning_rate": 4.6949425710733076e-07, "loss": 8.9738, "step": 437430 }, { "epoch": 0.8836564761208321, "grad_norm": 239.2298126220703, "learning_rate": 4.693465919595136e-07, "loss": 10.1647, "step": 437440 }, { "epoch": 0.8836766767535159, "grad_norm": 292.31805419921875, "learning_rate": 4.691989488935511e-07, "loss": 16.8424, "step": 437450 }, { "epoch": 0.8836968773861997, "grad_norm": 199.2280731201172, "learning_rate": 4.690513279101638e-07, "loss": 13.3979, "step": 437460 }, { "epoch": 0.8837170780188836, "grad_norm": 0.0, "learning_rate": 4.689037290100712e-07, "loss": 16.1303, "step": 437470 }, { "epoch": 0.8837372786515674, "grad_norm": 319.63580322265625, "learning_rate": 4.687561521939915e-07, "loss": 14.6304, "step": 437480 }, { "epoch": 0.8837574792842512, "grad_norm": 86.21988677978516, "learning_rate": 4.686085974626442e-07, "loss": 16.5076, "step": 437490 }, { "epoch": 0.883777679916935, "grad_norm": 683.4754638671875, "learning_rate": 4.6846106481675035e-07, "loss": 36.1261, "step": 437500 }, { "epoch": 0.8837978805496188, "grad_norm": 14.934298515319824, "learning_rate": 4.683135542570277e-07, "loss": 14.7263, "step": 437510 }, { "epoch": 0.8838180811823027, "grad_norm": 414.68798828125, "learning_rate": 4.681660657841941e-07, "loss": 15.5596, "step": 437520 }, { "epoch": 0.8838382818149865, "grad_norm": 350.0975341796875, "learning_rate": 4.6801859939896997e-07, "loss": 20.273, "step": 437530 }, { "epoch": 0.8838584824476703, "grad_norm": 189.8002471923828, "learning_rate": 4.678711551020743e-07, "loss": 19.346, "step": 437540 }, { "epoch": 0.8838786830803541, "grad_norm": 1298.6451416015625, "learning_rate": 4.677237328942236e-07, "loss": 12.3551, "step": 437550 }, { "epoch": 0.8838988837130379, "grad_norm": 343.4154357910156, "learning_rate": 4.6757633277613734e-07, "loss": 14.7542, "step": 437560 }, { "epoch": 0.8839190843457218, "grad_norm": 411.26324462890625, "learning_rate": 4.674289547485367e-07, "loss": 18.4803, "step": 437570 }, { "epoch": 0.8839392849784056, "grad_norm": 543.5770874023438, "learning_rate": 4.672815988121354e-07, "loss": 20.6617, "step": 437580 }, { "epoch": 0.8839594856110894, "grad_norm": 30.991418838500977, "learning_rate": 4.6713426496765413e-07, "loss": 19.4316, "step": 437590 }, { "epoch": 0.8839796862437731, "grad_norm": 0.0, "learning_rate": 4.6698695321581165e-07, "loss": 20.5669, "step": 437600 }, { "epoch": 0.8839998868764569, "grad_norm": 265.6228942871094, "learning_rate": 4.6683966355732466e-07, "loss": 12.0727, "step": 437610 }, { "epoch": 0.8840200875091407, "grad_norm": 168.62765502929688, "learning_rate": 4.6669239599291093e-07, "loss": 20.7217, "step": 437620 }, { "epoch": 0.8840402881418246, "grad_norm": 334.17340087890625, "learning_rate": 4.665451505232882e-07, "loss": 15.5412, "step": 437630 }, { "epoch": 0.8840604887745084, "grad_norm": 299.30450439453125, "learning_rate": 4.663979271491764e-07, "loss": 21.0342, "step": 437640 }, { "epoch": 0.8840806894071922, "grad_norm": 317.87091064453125, "learning_rate": 4.662507258712895e-07, "loss": 11.1997, "step": 437650 }, { "epoch": 0.884100890039876, "grad_norm": 528.5407104492188, "learning_rate": 4.6610354669034686e-07, "loss": 24.1234, "step": 437660 }, { "epoch": 0.8841210906725598, "grad_norm": 506.4310302734375, "learning_rate": 4.6595638960706624e-07, "loss": 18.4001, "step": 437670 }, { "epoch": 0.8841412913052437, "grad_norm": 547.1437377929688, "learning_rate": 4.6580925462216487e-07, "loss": 21.1345, "step": 437680 }, { "epoch": 0.8841614919379275, "grad_norm": 210.5221710205078, "learning_rate": 4.656621417363577e-07, "loss": 31.4187, "step": 437690 }, { "epoch": 0.8841816925706113, "grad_norm": 25.963443756103516, "learning_rate": 4.655150509503642e-07, "loss": 7.2942, "step": 437700 }, { "epoch": 0.8842018932032951, "grad_norm": 337.7308044433594, "learning_rate": 4.65367982264901e-07, "loss": 12.6394, "step": 437710 }, { "epoch": 0.8842220938359789, "grad_norm": 339.14752197265625, "learning_rate": 4.6522093568068307e-07, "loss": 18.168, "step": 437720 }, { "epoch": 0.8842422944686628, "grad_norm": 231.02685546875, "learning_rate": 4.650739111984287e-07, "loss": 16.6801, "step": 437730 }, { "epoch": 0.8842624951013466, "grad_norm": 349.98480224609375, "learning_rate": 4.649269088188535e-07, "loss": 12.3236, "step": 437740 }, { "epoch": 0.8842826957340304, "grad_norm": 234.3077850341797, "learning_rate": 4.647799285426757e-07, "loss": 18.8477, "step": 437750 }, { "epoch": 0.8843028963667142, "grad_norm": 482.90771484375, "learning_rate": 4.646329703706104e-07, "loss": 38.1848, "step": 437760 }, { "epoch": 0.884323096999398, "grad_norm": 187.89315795898438, "learning_rate": 4.644860343033725e-07, "loss": 8.9286, "step": 437770 }, { "epoch": 0.8843432976320819, "grad_norm": 86.51445770263672, "learning_rate": 4.6433912034168083e-07, "loss": 10.5808, "step": 437780 }, { "epoch": 0.8843634982647657, "grad_norm": 283.5401611328125, "learning_rate": 4.6419222848624933e-07, "loss": 13.4795, "step": 437790 }, { "epoch": 0.8843836988974495, "grad_norm": 356.6964416503906, "learning_rate": 4.640453587377958e-07, "loss": 17.2985, "step": 437800 }, { "epoch": 0.8844038995301333, "grad_norm": 459.8682861328125, "learning_rate": 4.63898511097034e-07, "loss": 19.477, "step": 437810 }, { "epoch": 0.8844241001628171, "grad_norm": 171.66075134277344, "learning_rate": 4.6375168556468175e-07, "loss": 24.7088, "step": 437820 }, { "epoch": 0.884444300795501, "grad_norm": 596.6193237304688, "learning_rate": 4.636048821414535e-07, "loss": 16.6327, "step": 437830 }, { "epoch": 0.8844645014281848, "grad_norm": 359.46356201171875, "learning_rate": 4.6345810082806363e-07, "loss": 17.6766, "step": 437840 }, { "epoch": 0.8844847020608686, "grad_norm": 394.1361083984375, "learning_rate": 4.6331134162522994e-07, "loss": 14.17, "step": 437850 }, { "epoch": 0.8845049026935523, "grad_norm": 244.41360473632812, "learning_rate": 4.631646045336663e-07, "loss": 15.9952, "step": 437860 }, { "epoch": 0.8845251033262361, "grad_norm": 228.95028686523438, "learning_rate": 4.6301788955408765e-07, "loss": 22.1281, "step": 437870 }, { "epoch": 0.88454530395892, "grad_norm": 470.039306640625, "learning_rate": 4.62871196687209e-07, "loss": 18.5186, "step": 437880 }, { "epoch": 0.8845655045916038, "grad_norm": 213.72068786621094, "learning_rate": 4.6272452593374763e-07, "loss": 15.4759, "step": 437890 }, { "epoch": 0.8845857052242876, "grad_norm": 152.71107482910156, "learning_rate": 4.625778772944156e-07, "loss": 16.1006, "step": 437900 }, { "epoch": 0.8846059058569714, "grad_norm": 114.55770111083984, "learning_rate": 4.6243125076992857e-07, "loss": 9.6908, "step": 437910 }, { "epoch": 0.8846261064896552, "grad_norm": 112.05704498291016, "learning_rate": 4.62284646361002e-07, "loss": 13.8483, "step": 437920 }, { "epoch": 0.884646307122339, "grad_norm": 312.3384094238281, "learning_rate": 4.6213806406834926e-07, "loss": 24.441, "step": 437930 }, { "epoch": 0.8846665077550229, "grad_norm": 289.9527587890625, "learning_rate": 4.6199150389268476e-07, "loss": 13.0452, "step": 437940 }, { "epoch": 0.8846867083877067, "grad_norm": 366.7112731933594, "learning_rate": 4.6184496583472293e-07, "loss": 32.1492, "step": 437950 }, { "epoch": 0.8847069090203905, "grad_norm": 31.300445556640625, "learning_rate": 4.616984498951793e-07, "loss": 11.4439, "step": 437960 }, { "epoch": 0.8847271096530743, "grad_norm": 639.13623046875, "learning_rate": 4.6155195607476723e-07, "loss": 12.9683, "step": 437970 }, { "epoch": 0.8847473102857581, "grad_norm": 304.33856201171875, "learning_rate": 4.614054843741994e-07, "loss": 10.8003, "step": 437980 }, { "epoch": 0.884767510918442, "grad_norm": 62.580081939697266, "learning_rate": 4.61259034794192e-07, "loss": 12.7833, "step": 437990 }, { "epoch": 0.8847877115511258, "grad_norm": 411.7159118652344, "learning_rate": 4.6111260733545714e-07, "loss": 29.6107, "step": 438000 }, { "epoch": 0.8848079121838096, "grad_norm": 129.64031982421875, "learning_rate": 4.6096620199870824e-07, "loss": 9.6474, "step": 438010 }, { "epoch": 0.8848281128164934, "grad_norm": 163.2689208984375, "learning_rate": 4.6081981878466077e-07, "loss": 16.459, "step": 438020 }, { "epoch": 0.8848483134491772, "grad_norm": 327.9782409667969, "learning_rate": 4.606734576940253e-07, "loss": 17.2054, "step": 438030 }, { "epoch": 0.8848685140818611, "grad_norm": 0.0, "learning_rate": 4.6052711872751843e-07, "loss": 9.4346, "step": 438040 }, { "epoch": 0.8848887147145449, "grad_norm": 242.32127380371094, "learning_rate": 4.6038080188585135e-07, "loss": 22.6262, "step": 438050 }, { "epoch": 0.8849089153472287, "grad_norm": 167.4984893798828, "learning_rate": 4.602345071697373e-07, "loss": 12.9228, "step": 438060 }, { "epoch": 0.8849291159799125, "grad_norm": 128.27276611328125, "learning_rate": 4.600882345798902e-07, "loss": 23.6155, "step": 438070 }, { "epoch": 0.8849493166125963, "grad_norm": 116.59937286376953, "learning_rate": 4.599419841170216e-07, "loss": 14.5997, "step": 438080 }, { "epoch": 0.8849695172452802, "grad_norm": 353.3071594238281, "learning_rate": 4.5979575578184554e-07, "loss": 18.4459, "step": 438090 }, { "epoch": 0.884989717877964, "grad_norm": 451.44781494140625, "learning_rate": 4.5964954957507414e-07, "loss": 24.7281, "step": 438100 }, { "epoch": 0.8850099185106478, "grad_norm": 559.2554321289062, "learning_rate": 4.595033654974207e-07, "loss": 12.8119, "step": 438110 }, { "epoch": 0.8850301191433315, "grad_norm": 220.00465393066406, "learning_rate": 4.593572035495969e-07, "loss": 15.8552, "step": 438120 }, { "epoch": 0.8850503197760153, "grad_norm": 163.78689575195312, "learning_rate": 4.592110637323149e-07, "loss": 9.1616, "step": 438130 }, { "epoch": 0.8850705204086992, "grad_norm": 330.65069580078125, "learning_rate": 4.5906494604628816e-07, "loss": 17.3459, "step": 438140 }, { "epoch": 0.885090721041383, "grad_norm": 313.33856201171875, "learning_rate": 4.5891885049222815e-07, "loss": 10.0967, "step": 438150 }, { "epoch": 0.8851109216740668, "grad_norm": 459.80084228515625, "learning_rate": 4.587727770708461e-07, "loss": 12.1347, "step": 438160 }, { "epoch": 0.8851311223067506, "grad_norm": 378.82647705078125, "learning_rate": 4.5862672578285475e-07, "loss": 18.2424, "step": 438170 }, { "epoch": 0.8851513229394344, "grad_norm": 377.5517272949219, "learning_rate": 4.5848069662896786e-07, "loss": 18.4155, "step": 438180 }, { "epoch": 0.8851715235721183, "grad_norm": 230.7869110107422, "learning_rate": 4.5833468960989333e-07, "loss": 20.4322, "step": 438190 }, { "epoch": 0.8851917242048021, "grad_norm": 185.71571350097656, "learning_rate": 4.581887047263445e-07, "loss": 36.8069, "step": 438200 }, { "epoch": 0.8852119248374859, "grad_norm": 222.73947143554688, "learning_rate": 4.5804274197903396e-07, "loss": 10.4, "step": 438210 }, { "epoch": 0.8852321254701697, "grad_norm": 309.8128356933594, "learning_rate": 4.5789680136867245e-07, "loss": 13.4804, "step": 438220 }, { "epoch": 0.8852523261028535, "grad_norm": 131.28533935546875, "learning_rate": 4.577508828959698e-07, "loss": 11.2862, "step": 438230 }, { "epoch": 0.8852725267355374, "grad_norm": 464.95977783203125, "learning_rate": 4.5760498656163886e-07, "loss": 29.445, "step": 438240 }, { "epoch": 0.8852927273682212, "grad_norm": 556.691650390625, "learning_rate": 4.5745911236639186e-07, "loss": 24.9435, "step": 438250 }, { "epoch": 0.885312928000905, "grad_norm": 409.10546875, "learning_rate": 4.5731326031093645e-07, "loss": 18.3854, "step": 438260 }, { "epoch": 0.8853331286335888, "grad_norm": 270.2962341308594, "learning_rate": 4.57167430395985e-07, "loss": 17.4197, "step": 438270 }, { "epoch": 0.8853533292662726, "grad_norm": 513.6214599609375, "learning_rate": 4.5702162262224957e-07, "loss": 16.8293, "step": 438280 }, { "epoch": 0.8853735298989565, "grad_norm": 317.1377868652344, "learning_rate": 4.5687583699044027e-07, "loss": 19.0154, "step": 438290 }, { "epoch": 0.8853937305316403, "grad_norm": 459.9891052246094, "learning_rate": 4.567300735012653e-07, "loss": 17.5073, "step": 438300 }, { "epoch": 0.8854139311643241, "grad_norm": 335.3582458496094, "learning_rate": 4.565843321554386e-07, "loss": 7.7538, "step": 438310 }, { "epoch": 0.8854341317970079, "grad_norm": 335.785400390625, "learning_rate": 4.5643861295366854e-07, "loss": 21.6278, "step": 438320 }, { "epoch": 0.8854543324296917, "grad_norm": 595.5294799804688, "learning_rate": 4.562929158966645e-07, "loss": 19.6184, "step": 438330 }, { "epoch": 0.8854745330623756, "grad_norm": 570.8287963867188, "learning_rate": 4.561472409851386e-07, "loss": 28.1183, "step": 438340 }, { "epoch": 0.8854947336950594, "grad_norm": 185.64492797851562, "learning_rate": 4.5600158821979933e-07, "loss": 20.9304, "step": 438350 }, { "epoch": 0.8855149343277432, "grad_norm": 22.53156280517578, "learning_rate": 4.5585595760135825e-07, "loss": 12.0694, "step": 438360 }, { "epoch": 0.8855351349604269, "grad_norm": 179.72341918945312, "learning_rate": 4.557103491305237e-07, "loss": 23.1451, "step": 438370 }, { "epoch": 0.8855553355931107, "grad_norm": 263.9036865234375, "learning_rate": 4.555647628080051e-07, "loss": 8.5029, "step": 438380 }, { "epoch": 0.8855755362257945, "grad_norm": 118.56056213378906, "learning_rate": 4.554191986345136e-07, "loss": 18.9355, "step": 438390 }, { "epoch": 0.8855957368584784, "grad_norm": 185.22677612304688, "learning_rate": 4.552736566107563e-07, "loss": 10.1332, "step": 438400 }, { "epoch": 0.8856159374911622, "grad_norm": 14.288604736328125, "learning_rate": 4.551281367374455e-07, "loss": 8.2533, "step": 438410 }, { "epoch": 0.885636138123846, "grad_norm": 175.18568420410156, "learning_rate": 4.5498263901528784e-07, "loss": 13.3607, "step": 438420 }, { "epoch": 0.8856563387565298, "grad_norm": 251.22047424316406, "learning_rate": 4.548371634449944e-07, "loss": 12.3873, "step": 438430 }, { "epoch": 0.8856765393892136, "grad_norm": 319.089111328125, "learning_rate": 4.546917100272735e-07, "loss": 8.8985, "step": 438440 }, { "epoch": 0.8856967400218975, "grad_norm": 176.71875, "learning_rate": 4.5454627876283295e-07, "loss": 18.8316, "step": 438450 }, { "epoch": 0.8857169406545813, "grad_norm": 354.1561279296875, "learning_rate": 4.5440086965238326e-07, "loss": 13.3039, "step": 438460 }, { "epoch": 0.8857371412872651, "grad_norm": 706.0140991210938, "learning_rate": 4.542554826966328e-07, "loss": 14.5978, "step": 438470 }, { "epoch": 0.8857573419199489, "grad_norm": 330.8226623535156, "learning_rate": 4.541101178962887e-07, "loss": 20.6027, "step": 438480 }, { "epoch": 0.8857775425526327, "grad_norm": 191.33779907226562, "learning_rate": 4.539647752520604e-07, "loss": 14.3021, "step": 438490 }, { "epoch": 0.8857977431853166, "grad_norm": 78.56890869140625, "learning_rate": 4.538194547646574e-07, "loss": 31.2673, "step": 438500 }, { "epoch": 0.8858179438180004, "grad_norm": 304.31842041015625, "learning_rate": 4.5367415643478683e-07, "loss": 28.3919, "step": 438510 }, { "epoch": 0.8858381444506842, "grad_norm": 619.509033203125, "learning_rate": 4.5352888026315654e-07, "loss": 26.8117, "step": 438520 }, { "epoch": 0.885858345083368, "grad_norm": 136.1430206298828, "learning_rate": 4.533836262504759e-07, "loss": 15.978, "step": 438530 }, { "epoch": 0.8858785457160518, "grad_norm": 127.50112915039062, "learning_rate": 4.5323839439745163e-07, "loss": 43.1338, "step": 438540 }, { "epoch": 0.8858987463487357, "grad_norm": 525.2925415039062, "learning_rate": 4.5309318470479144e-07, "loss": 19.3131, "step": 438550 }, { "epoch": 0.8859189469814195, "grad_norm": 124.81487274169922, "learning_rate": 4.529479971732031e-07, "loss": 15.0068, "step": 438560 }, { "epoch": 0.8859391476141033, "grad_norm": 440.443603515625, "learning_rate": 4.528028318033961e-07, "loss": 18.4901, "step": 438570 }, { "epoch": 0.8859593482467871, "grad_norm": 203.86892700195312, "learning_rate": 4.526576885960765e-07, "loss": 12.414, "step": 438580 }, { "epoch": 0.885979548879471, "grad_norm": 482.45123291015625, "learning_rate": 4.5251256755195093e-07, "loss": 15.0141, "step": 438590 }, { "epoch": 0.8859997495121548, "grad_norm": 397.09820556640625, "learning_rate": 4.523674686717283e-07, "loss": 14.082, "step": 438600 }, { "epoch": 0.8860199501448386, "grad_norm": 184.38796997070312, "learning_rate": 4.522223919561153e-07, "loss": 8.2287, "step": 438610 }, { "epoch": 0.8860401507775224, "grad_norm": 179.822509765625, "learning_rate": 4.520773374058179e-07, "loss": 12.3596, "step": 438620 }, { "epoch": 0.8860603514102061, "grad_norm": 267.5096435546875, "learning_rate": 4.519323050215446e-07, "loss": 9.7496, "step": 438630 }, { "epoch": 0.8860805520428899, "grad_norm": 1529.614013671875, "learning_rate": 4.5178729480400084e-07, "loss": 43.2379, "step": 438640 }, { "epoch": 0.8861007526755738, "grad_norm": 303.8629455566406, "learning_rate": 4.51642306753895e-07, "loss": 11.7304, "step": 438650 }, { "epoch": 0.8861209533082576, "grad_norm": 57.34650421142578, "learning_rate": 4.514973408719331e-07, "loss": 19.2086, "step": 438660 }, { "epoch": 0.8861411539409414, "grad_norm": 291.76080322265625, "learning_rate": 4.513523971588202e-07, "loss": 14.6775, "step": 438670 }, { "epoch": 0.8861613545736252, "grad_norm": 784.0736694335938, "learning_rate": 4.512074756152651e-07, "loss": 80.4675, "step": 438680 }, { "epoch": 0.886181555206309, "grad_norm": 216.25804138183594, "learning_rate": 4.5106257624197237e-07, "loss": 16.916, "step": 438690 }, { "epoch": 0.8862017558389929, "grad_norm": 420.09442138671875, "learning_rate": 4.5091769903964965e-07, "loss": 18.0371, "step": 438700 }, { "epoch": 0.8862219564716767, "grad_norm": 21.776302337646484, "learning_rate": 4.5077284400900147e-07, "loss": 33.2878, "step": 438710 }, { "epoch": 0.8862421571043605, "grad_norm": 222.6472625732422, "learning_rate": 4.5062801115073607e-07, "loss": 17.3187, "step": 438720 }, { "epoch": 0.8862623577370443, "grad_norm": 164.23194885253906, "learning_rate": 4.504832004655574e-07, "loss": 10.0259, "step": 438730 }, { "epoch": 0.8862825583697281, "grad_norm": 326.33038330078125, "learning_rate": 4.503384119541709e-07, "loss": 12.61, "step": 438740 }, { "epoch": 0.886302759002412, "grad_norm": 243.9933624267578, "learning_rate": 4.501936456172845e-07, "loss": 24.4791, "step": 438750 }, { "epoch": 0.8863229596350958, "grad_norm": 432.9656677246094, "learning_rate": 4.50048901455602e-07, "loss": 19.3517, "step": 438760 }, { "epoch": 0.8863431602677796, "grad_norm": 174.36346435546875, "learning_rate": 4.4990417946982836e-07, "loss": 9.2157, "step": 438770 }, { "epoch": 0.8863633609004634, "grad_norm": 803.330322265625, "learning_rate": 4.4975947966067023e-07, "loss": 30.4556, "step": 438780 }, { "epoch": 0.8863835615331472, "grad_norm": 331.22705078125, "learning_rate": 4.4961480202883434e-07, "loss": 14.0299, "step": 438790 }, { "epoch": 0.886403762165831, "grad_norm": 437.1974182128906, "learning_rate": 4.494701465750217e-07, "loss": 25.7276, "step": 438800 }, { "epoch": 0.8864239627985149, "grad_norm": 129.12181091308594, "learning_rate": 4.4932551329994023e-07, "loss": 12.1632, "step": 438810 }, { "epoch": 0.8864441634311987, "grad_norm": 211.15261840820312, "learning_rate": 4.4918090220429476e-07, "loss": 10.5796, "step": 438820 }, { "epoch": 0.8864643640638825, "grad_norm": 124.55679321289062, "learning_rate": 4.490363132887904e-07, "loss": 10.6447, "step": 438830 }, { "epoch": 0.8864845646965663, "grad_norm": 690.0352783203125, "learning_rate": 4.4889174655412924e-07, "loss": 19.7262, "step": 438840 }, { "epoch": 0.8865047653292502, "grad_norm": 204.7061004638672, "learning_rate": 4.487472020010181e-07, "loss": 9.3996, "step": 438850 }, { "epoch": 0.886524965961934, "grad_norm": 252.89378356933594, "learning_rate": 4.4860267963016293e-07, "loss": 13.7279, "step": 438860 }, { "epoch": 0.8865451665946178, "grad_norm": 115.66488647460938, "learning_rate": 4.484581794422643e-07, "loss": 9.3315, "step": 438870 }, { "epoch": 0.8865653672273015, "grad_norm": 881.0204467773438, "learning_rate": 4.48313701438029e-07, "loss": 21.6291, "step": 438880 }, { "epoch": 0.8865855678599853, "grad_norm": 255.3397979736328, "learning_rate": 4.4816924561816076e-07, "loss": 16.0989, "step": 438890 }, { "epoch": 0.8866057684926691, "grad_norm": 393.5469055175781, "learning_rate": 4.480248119833641e-07, "loss": 12.8053, "step": 438900 }, { "epoch": 0.886625969125353, "grad_norm": 118.80579376220703, "learning_rate": 4.4788040053434124e-07, "loss": 14.7572, "step": 438910 }, { "epoch": 0.8866461697580368, "grad_norm": 154.90057373046875, "learning_rate": 4.477360112717982e-07, "loss": 20.8149, "step": 438920 }, { "epoch": 0.8866663703907206, "grad_norm": 412.5389709472656, "learning_rate": 4.475916441964379e-07, "loss": 18.8692, "step": 438930 }, { "epoch": 0.8866865710234044, "grad_norm": 312.3932189941406, "learning_rate": 4.474472993089629e-07, "loss": 15.959, "step": 438940 }, { "epoch": 0.8867067716560882, "grad_norm": 548.4207153320312, "learning_rate": 4.473029766100784e-07, "loss": 36.9365, "step": 438950 }, { "epoch": 0.8867269722887721, "grad_norm": 67.55258178710938, "learning_rate": 4.471586761004859e-07, "loss": 6.5442, "step": 438960 }, { "epoch": 0.8867471729214559, "grad_norm": 121.97498321533203, "learning_rate": 4.4701439778089105e-07, "loss": 18.2159, "step": 438970 }, { "epoch": 0.8867673735541397, "grad_norm": 12.259499549865723, "learning_rate": 4.4687014165199547e-07, "loss": 11.6851, "step": 438980 }, { "epoch": 0.8867875741868235, "grad_norm": 229.6267547607422, "learning_rate": 4.46725907714502e-07, "loss": 31.3248, "step": 438990 }, { "epoch": 0.8868077748195073, "grad_norm": 301.18377685546875, "learning_rate": 4.4658169596911493e-07, "loss": 18.4136, "step": 439000 }, { "epoch": 0.8868279754521912, "grad_norm": 295.84893798828125, "learning_rate": 4.464375064165355e-07, "loss": 17.0115, "step": 439010 }, { "epoch": 0.886848176084875, "grad_norm": 521.0673217773438, "learning_rate": 4.4629333905746864e-07, "loss": 20.9325, "step": 439020 }, { "epoch": 0.8868683767175588, "grad_norm": 626.5892333984375, "learning_rate": 4.461491938926144e-07, "loss": 23.6057, "step": 439030 }, { "epoch": 0.8868885773502426, "grad_norm": 107.52507781982422, "learning_rate": 4.4600507092267767e-07, "loss": 14.1313, "step": 439040 }, { "epoch": 0.8869087779829264, "grad_norm": 322.2767639160156, "learning_rate": 4.4586097014836017e-07, "loss": 9.7177, "step": 439050 }, { "epoch": 0.8869289786156103, "grad_norm": 217.96405029296875, "learning_rate": 4.4571689157036244e-07, "loss": 9.5048, "step": 439060 }, { "epoch": 0.8869491792482941, "grad_norm": 83.07560729980469, "learning_rate": 4.455728351893895e-07, "loss": 18.3758, "step": 439070 }, { "epoch": 0.8869693798809779, "grad_norm": 870.941650390625, "learning_rate": 4.454288010061425e-07, "loss": 33.5819, "step": 439080 }, { "epoch": 0.8869895805136617, "grad_norm": 263.697021484375, "learning_rate": 4.4528478902132187e-07, "loss": 11.3598, "step": 439090 }, { "epoch": 0.8870097811463455, "grad_norm": 109.19169616699219, "learning_rate": 4.4514079923563103e-07, "loss": 16.8128, "step": 439100 }, { "epoch": 0.8870299817790294, "grad_norm": 266.2052001953125, "learning_rate": 4.449968316497721e-07, "loss": 13.9623, "step": 439110 }, { "epoch": 0.8870501824117132, "grad_norm": 316.3713073730469, "learning_rate": 4.448528862644458e-07, "loss": 11.0006, "step": 439120 }, { "epoch": 0.887070383044397, "grad_norm": 274.6880187988281, "learning_rate": 4.447089630803536e-07, "loss": 25.449, "step": 439130 }, { "epoch": 0.8870905836770807, "grad_norm": 915.1109619140625, "learning_rate": 4.445650620981984e-07, "loss": 16.723, "step": 439140 }, { "epoch": 0.8871107843097645, "grad_norm": 226.83387756347656, "learning_rate": 4.444211833186807e-07, "loss": 16.3436, "step": 439150 }, { "epoch": 0.8871309849424484, "grad_norm": 470.22509765625, "learning_rate": 4.4427732674250045e-07, "loss": 20.8528, "step": 439160 }, { "epoch": 0.8871511855751322, "grad_norm": 277.1685485839844, "learning_rate": 4.4413349237036e-07, "loss": 14.1202, "step": 439170 }, { "epoch": 0.887171386207816, "grad_norm": 228.85626220703125, "learning_rate": 4.4398968020296143e-07, "loss": 17.8448, "step": 439180 }, { "epoch": 0.8871915868404998, "grad_norm": 489.93524169921875, "learning_rate": 4.4384589024100423e-07, "loss": 29.0556, "step": 439190 }, { "epoch": 0.8872117874731836, "grad_norm": 304.1772155761719, "learning_rate": 4.4370212248518895e-07, "loss": 15.8423, "step": 439200 }, { "epoch": 0.8872319881058675, "grad_norm": 461.1487731933594, "learning_rate": 4.4355837693621786e-07, "loss": 19.2313, "step": 439210 }, { "epoch": 0.8872521887385513, "grad_norm": 301.6158447265625, "learning_rate": 4.434146535947903e-07, "loss": 19.8144, "step": 439220 }, { "epoch": 0.8872723893712351, "grad_norm": 96.90778350830078, "learning_rate": 4.4327095246160636e-07, "loss": 16.7332, "step": 439230 }, { "epoch": 0.8872925900039189, "grad_norm": 243.5862579345703, "learning_rate": 4.4312727353736816e-07, "loss": 41.9337, "step": 439240 }, { "epoch": 0.8873127906366027, "grad_norm": 367.57672119140625, "learning_rate": 4.4298361682277355e-07, "loss": 18.4597, "step": 439250 }, { "epoch": 0.8873329912692866, "grad_norm": 388.3193359375, "learning_rate": 4.428399823185253e-07, "loss": 16.5243, "step": 439260 }, { "epoch": 0.8873531919019704, "grad_norm": 170.0860595703125, "learning_rate": 4.426963700253223e-07, "loss": 19.4929, "step": 439270 }, { "epoch": 0.8873733925346542, "grad_norm": 500.71966552734375, "learning_rate": 4.425527799438639e-07, "loss": 13.3557, "step": 439280 }, { "epoch": 0.887393593167338, "grad_norm": 320.1374816894531, "learning_rate": 4.4240921207485077e-07, "loss": 27.7214, "step": 439290 }, { "epoch": 0.8874137938000218, "grad_norm": 295.4293518066406, "learning_rate": 4.4226566641898173e-07, "loss": 9.7501, "step": 439300 }, { "epoch": 0.8874339944327057, "grad_norm": 33.95865249633789, "learning_rate": 4.421221429769579e-07, "loss": 13.4921, "step": 439310 }, { "epoch": 0.8874541950653895, "grad_norm": 363.8642883300781, "learning_rate": 4.4197864174947755e-07, "loss": 10.8632, "step": 439320 }, { "epoch": 0.8874743956980733, "grad_norm": 210.2339324951172, "learning_rate": 4.418351627372408e-07, "loss": 19.4186, "step": 439330 }, { "epoch": 0.8874945963307571, "grad_norm": 893.2373657226562, "learning_rate": 4.416917059409465e-07, "loss": 21.4893, "step": 439340 }, { "epoch": 0.8875147969634409, "grad_norm": 80.63335418701172, "learning_rate": 4.415482713612934e-07, "loss": 15.3269, "step": 439350 }, { "epoch": 0.8875349975961248, "grad_norm": 154.1669921875, "learning_rate": 4.414048589989822e-07, "loss": 13.8406, "step": 439360 }, { "epoch": 0.8875551982288086, "grad_norm": 251.88331604003906, "learning_rate": 4.4126146885471067e-07, "loss": 24.2011, "step": 439370 }, { "epoch": 0.8875753988614924, "grad_norm": 256.91546630859375, "learning_rate": 4.411181009291765e-07, "loss": 28.8828, "step": 439380 }, { "epoch": 0.8875955994941762, "grad_norm": 30.955636978149414, "learning_rate": 4.409747552230803e-07, "loss": 11.8671, "step": 439390 }, { "epoch": 0.8876158001268599, "grad_norm": 472.96087646484375, "learning_rate": 4.4083143173712207e-07, "loss": 16.3747, "step": 439400 }, { "epoch": 0.8876360007595437, "grad_norm": 366.6407165527344, "learning_rate": 4.406881304719962e-07, "loss": 15.4536, "step": 439410 }, { "epoch": 0.8876562013922276, "grad_norm": 16.067556381225586, "learning_rate": 4.405448514284039e-07, "loss": 33.2922, "step": 439420 }, { "epoch": 0.8876764020249114, "grad_norm": 382.92041015625, "learning_rate": 4.404015946070439e-07, "loss": 22.2967, "step": 439430 }, { "epoch": 0.8876966026575952, "grad_norm": 343.244140625, "learning_rate": 4.40258360008613e-07, "loss": 27.397, "step": 439440 }, { "epoch": 0.887716803290279, "grad_norm": 517.2798461914062, "learning_rate": 4.401151476338095e-07, "loss": 41.0492, "step": 439450 }, { "epoch": 0.8877370039229628, "grad_norm": 895.5753173828125, "learning_rate": 4.3997195748333113e-07, "loss": 10.4305, "step": 439460 }, { "epoch": 0.8877572045556467, "grad_norm": 11.679781913757324, "learning_rate": 4.3982878955787844e-07, "loss": 14.3762, "step": 439470 }, { "epoch": 0.8877774051883305, "grad_norm": 38.89082717895508, "learning_rate": 4.396856438581454e-07, "loss": 15.0276, "step": 439480 }, { "epoch": 0.8877976058210143, "grad_norm": 209.6109619140625, "learning_rate": 4.395425203848314e-07, "loss": 15.5432, "step": 439490 }, { "epoch": 0.8878178064536981, "grad_norm": 285.5658874511719, "learning_rate": 4.3939941913863525e-07, "loss": 17.9952, "step": 439500 }, { "epoch": 0.8878380070863819, "grad_norm": 131.74278259277344, "learning_rate": 4.392563401202526e-07, "loss": 27.507, "step": 439510 }, { "epoch": 0.8878582077190658, "grad_norm": 145.62322998046875, "learning_rate": 4.391132833303807e-07, "loss": 14.4187, "step": 439520 }, { "epoch": 0.8878784083517496, "grad_norm": 242.29408264160156, "learning_rate": 4.389702487697189e-07, "loss": 12.3936, "step": 439530 }, { "epoch": 0.8878986089844334, "grad_norm": 31.640588760375977, "learning_rate": 4.388272364389623e-07, "loss": 13.26, "step": 439540 }, { "epoch": 0.8879188096171172, "grad_norm": 241.95538330078125, "learning_rate": 4.38684246338808e-07, "loss": 17.4089, "step": 439550 }, { "epoch": 0.887939010249801, "grad_norm": 305.1247863769531, "learning_rate": 4.385412784699544e-07, "loss": 26.7816, "step": 439560 }, { "epoch": 0.8879592108824849, "grad_norm": 542.28271484375, "learning_rate": 4.3839833283309597e-07, "loss": 28.6824, "step": 439570 }, { "epoch": 0.8879794115151687, "grad_norm": 271.184326171875, "learning_rate": 4.3825540942893206e-07, "loss": 19.0649, "step": 439580 }, { "epoch": 0.8879996121478525, "grad_norm": 171.00033569335938, "learning_rate": 4.381125082581583e-07, "loss": 19.832, "step": 439590 }, { "epoch": 0.8880198127805363, "grad_norm": 387.65185546875, "learning_rate": 4.379696293214697e-07, "loss": 21.0981, "step": 439600 }, { "epoch": 0.8880400134132201, "grad_norm": 409.376953125, "learning_rate": 4.378267726195645e-07, "loss": 15.5741, "step": 439610 }, { "epoch": 0.888060214045904, "grad_norm": 470.7140808105469, "learning_rate": 4.3768393815313723e-07, "loss": 17.937, "step": 439620 }, { "epoch": 0.8880804146785878, "grad_norm": 86.91465759277344, "learning_rate": 4.375411259228868e-07, "loss": 13.9544, "step": 439630 }, { "epoch": 0.8881006153112716, "grad_norm": 152.19589233398438, "learning_rate": 4.373983359295059e-07, "loss": 18.4752, "step": 439640 }, { "epoch": 0.8881208159439553, "grad_norm": 101.01200103759766, "learning_rate": 4.372555681736934e-07, "loss": 22.6062, "step": 439650 }, { "epoch": 0.8881410165766391, "grad_norm": 378.2396240234375, "learning_rate": 4.3711282265614385e-07, "loss": 18.0084, "step": 439660 }, { "epoch": 0.888161217209323, "grad_norm": 195.88209533691406, "learning_rate": 4.369700993775522e-07, "loss": 11.7541, "step": 439670 }, { "epoch": 0.8881814178420068, "grad_norm": 106.99616241455078, "learning_rate": 4.368273983386157e-07, "loss": 24.8551, "step": 439680 }, { "epoch": 0.8882016184746906, "grad_norm": 80.48089599609375, "learning_rate": 4.3668471954002864e-07, "loss": 16.0395, "step": 439690 }, { "epoch": 0.8882218191073744, "grad_norm": 434.21148681640625, "learning_rate": 4.3654206298248625e-07, "loss": 17.1421, "step": 439700 }, { "epoch": 0.8882420197400582, "grad_norm": 172.45501708984375, "learning_rate": 4.363994286666845e-07, "loss": 20.3014, "step": 439710 }, { "epoch": 0.888262220372742, "grad_norm": 436.064697265625, "learning_rate": 4.3625681659331895e-07, "loss": 8.8036, "step": 439720 }, { "epoch": 0.8882824210054259, "grad_norm": 189.44009399414062, "learning_rate": 4.3611422676308413e-07, "loss": 18.125, "step": 439730 }, { "epoch": 0.8883026216381097, "grad_norm": 154.24684143066406, "learning_rate": 4.359716591766744e-07, "loss": 10.3431, "step": 439740 }, { "epoch": 0.8883228222707935, "grad_norm": 45.151947021484375, "learning_rate": 4.3582911383478646e-07, "loss": 22.5793, "step": 439750 }, { "epoch": 0.8883430229034773, "grad_norm": 101.4354248046875, "learning_rate": 4.3568659073811306e-07, "loss": 14.692, "step": 439760 }, { "epoch": 0.8883632235361612, "grad_norm": 332.0010681152344, "learning_rate": 4.355440898873492e-07, "loss": 8.9394, "step": 439770 }, { "epoch": 0.888383424168845, "grad_norm": 231.61651611328125, "learning_rate": 4.354016112831899e-07, "loss": 8.167, "step": 439780 }, { "epoch": 0.8884036248015288, "grad_norm": 618.9818725585938, "learning_rate": 4.352591549263302e-07, "loss": 14.1208, "step": 439790 }, { "epoch": 0.8884238254342126, "grad_norm": 507.2235107421875, "learning_rate": 4.3511672081746393e-07, "loss": 12.3601, "step": 439800 }, { "epoch": 0.8884440260668964, "grad_norm": 224.71241760253906, "learning_rate": 4.3497430895728444e-07, "loss": 13.6204, "step": 439810 }, { "epoch": 0.8884642266995803, "grad_norm": 222.20713806152344, "learning_rate": 4.348319193464867e-07, "loss": 28.523, "step": 439820 }, { "epoch": 0.8884844273322641, "grad_norm": 107.95604705810547, "learning_rate": 4.3468955198576524e-07, "loss": 8.9733, "step": 439830 }, { "epoch": 0.8885046279649479, "grad_norm": 290.3319091796875, "learning_rate": 4.3454720687581165e-07, "loss": 22.8961, "step": 439840 }, { "epoch": 0.8885248285976317, "grad_norm": 553.7430419921875, "learning_rate": 4.344048840173226e-07, "loss": 11.9736, "step": 439850 }, { "epoch": 0.8885450292303155, "grad_norm": 217.2595977783203, "learning_rate": 4.3426258341098925e-07, "loss": 6.9081, "step": 439860 }, { "epoch": 0.8885652298629994, "grad_norm": 445.9281005859375, "learning_rate": 4.341203050575077e-07, "loss": 19.6621, "step": 439870 }, { "epoch": 0.8885854304956832, "grad_norm": 0.0, "learning_rate": 4.3397804895756957e-07, "loss": 21.3665, "step": 439880 }, { "epoch": 0.888605631128367, "grad_norm": 14.04886531829834, "learning_rate": 4.338358151118677e-07, "loss": 7.1894, "step": 439890 }, { "epoch": 0.8886258317610508, "grad_norm": 244.17861938476562, "learning_rate": 4.33693603521097e-07, "loss": 8.8234, "step": 439900 }, { "epoch": 0.8886460323937345, "grad_norm": 162.6847381591797, "learning_rate": 4.3355141418594926e-07, "loss": 20.4846, "step": 439910 }, { "epoch": 0.8886662330264183, "grad_norm": 238.5950469970703, "learning_rate": 4.334092471071194e-07, "loss": 15.8418, "step": 439920 }, { "epoch": 0.8886864336591022, "grad_norm": 372.736083984375, "learning_rate": 4.3326710228529746e-07, "loss": 10.6589, "step": 439930 }, { "epoch": 0.888706634291786, "grad_norm": 485.205078125, "learning_rate": 4.3312497972117895e-07, "loss": 16.4431, "step": 439940 }, { "epoch": 0.8887268349244698, "grad_norm": 219.19924926757812, "learning_rate": 4.32982879415455e-07, "loss": 20.38, "step": 439950 }, { "epoch": 0.8887470355571536, "grad_norm": 171.46522521972656, "learning_rate": 4.3284080136881847e-07, "loss": 26.0539, "step": 439960 }, { "epoch": 0.8887672361898374, "grad_norm": 299.0748291015625, "learning_rate": 4.32698745581962e-07, "loss": 14.9703, "step": 439970 }, { "epoch": 0.8887874368225213, "grad_norm": 352.73876953125, "learning_rate": 4.325567120555785e-07, "loss": 10.8353, "step": 439980 }, { "epoch": 0.8888076374552051, "grad_norm": 268.17730712890625, "learning_rate": 4.324147007903584e-07, "loss": 16.2466, "step": 439990 }, { "epoch": 0.8888278380878889, "grad_norm": 114.64710998535156, "learning_rate": 4.322727117869951e-07, "loss": 9.285, "step": 440000 }, { "epoch": 0.8888480387205727, "grad_norm": 28.741056442260742, "learning_rate": 4.3213074504618256e-07, "loss": 12.5165, "step": 440010 }, { "epoch": 0.8888682393532565, "grad_norm": 334.08123779296875, "learning_rate": 4.31988800568609e-07, "loss": 22.8308, "step": 440020 }, { "epoch": 0.8888884399859404, "grad_norm": 30.762502670288086, "learning_rate": 4.3184687835496784e-07, "loss": 23.0419, "step": 440030 }, { "epoch": 0.8889086406186242, "grad_norm": 163.64968872070312, "learning_rate": 4.317049784059518e-07, "loss": 12.5823, "step": 440040 }, { "epoch": 0.888928841251308, "grad_norm": 113.00885772705078, "learning_rate": 4.315631007222515e-07, "loss": 15.331, "step": 440050 }, { "epoch": 0.8889490418839918, "grad_norm": 205.9576873779297, "learning_rate": 4.31421245304558e-07, "loss": 19.476, "step": 440060 }, { "epoch": 0.8889692425166756, "grad_norm": 227.61708068847656, "learning_rate": 4.3127941215356296e-07, "loss": 17.8654, "step": 440070 }, { "epoch": 0.8889894431493595, "grad_norm": 0.0, "learning_rate": 4.3113760126995974e-07, "loss": 18.4632, "step": 440080 }, { "epoch": 0.8890096437820433, "grad_norm": 160.78369140625, "learning_rate": 4.309958126544361e-07, "loss": 19.9924, "step": 440090 }, { "epoch": 0.8890298444147271, "grad_norm": 12.122901916503906, "learning_rate": 4.308540463076849e-07, "loss": 8.1445, "step": 440100 }, { "epoch": 0.8890500450474109, "grad_norm": 390.04315185546875, "learning_rate": 4.3071230223039774e-07, "loss": 20.182, "step": 440110 }, { "epoch": 0.8890702456800947, "grad_norm": 179.36801147460938, "learning_rate": 4.3057058042326407e-07, "loss": 18.0014, "step": 440120 }, { "epoch": 0.8890904463127786, "grad_norm": 143.66476440429688, "learning_rate": 4.30428880886975e-07, "loss": 9.581, "step": 440130 }, { "epoch": 0.8891106469454624, "grad_norm": 133.86337280273438, "learning_rate": 4.3028720362222166e-07, "loss": 20.8693, "step": 440140 }, { "epoch": 0.8891308475781462, "grad_norm": 314.28997802734375, "learning_rate": 4.301455486296946e-07, "loss": 19.0603, "step": 440150 }, { "epoch": 0.8891510482108299, "grad_norm": 341.1095275878906, "learning_rate": 4.300039159100827e-07, "loss": 16.8484, "step": 440160 }, { "epoch": 0.8891712488435137, "grad_norm": 292.7345886230469, "learning_rate": 4.298623054640788e-07, "loss": 13.9937, "step": 440170 }, { "epoch": 0.8891914494761975, "grad_norm": 326.391845703125, "learning_rate": 4.2972071729237065e-07, "loss": 10.7691, "step": 440180 }, { "epoch": 0.8892116501088814, "grad_norm": 6.493969917297363, "learning_rate": 4.295791513956504e-07, "loss": 18.682, "step": 440190 }, { "epoch": 0.8892318507415652, "grad_norm": 216.3207550048828, "learning_rate": 4.29437607774606e-07, "loss": 13.1934, "step": 440200 }, { "epoch": 0.889252051374249, "grad_norm": 411.6736145019531, "learning_rate": 4.2929608642992894e-07, "loss": 26.305, "step": 440210 }, { "epoch": 0.8892722520069328, "grad_norm": 487.83905029296875, "learning_rate": 4.291545873623087e-07, "loss": 16.1887, "step": 440220 }, { "epoch": 0.8892924526396166, "grad_norm": 248.68894958496094, "learning_rate": 4.2901311057243377e-07, "loss": 9.9636, "step": 440230 }, { "epoch": 0.8893126532723005, "grad_norm": 347.11932373046875, "learning_rate": 4.2887165606099513e-07, "loss": 18.2997, "step": 440240 }, { "epoch": 0.8893328539049843, "grad_norm": 180.7696990966797, "learning_rate": 4.2873022382868115e-07, "loss": 17.6919, "step": 440250 }, { "epoch": 0.8893530545376681, "grad_norm": 361.1531066894531, "learning_rate": 4.2858881387618235e-07, "loss": 17.0809, "step": 440260 }, { "epoch": 0.8893732551703519, "grad_norm": 309.6268005371094, "learning_rate": 4.284474262041871e-07, "loss": 22.6974, "step": 440270 }, { "epoch": 0.8893934558030357, "grad_norm": 241.9208221435547, "learning_rate": 4.283060608133843e-07, "loss": 17.4749, "step": 440280 }, { "epoch": 0.8894136564357196, "grad_norm": 363.3228759765625, "learning_rate": 4.2816471770446343e-07, "loss": 15.3348, "step": 440290 }, { "epoch": 0.8894338570684034, "grad_norm": 190.2127227783203, "learning_rate": 4.280233968781139e-07, "loss": 12.9646, "step": 440300 }, { "epoch": 0.8894540577010872, "grad_norm": 124.24407196044922, "learning_rate": 4.2788209833502237e-07, "loss": 15.3975, "step": 440310 }, { "epoch": 0.889474258333771, "grad_norm": 312.8065185546875, "learning_rate": 4.277408220758794e-07, "loss": 15.6786, "step": 440320 }, { "epoch": 0.8894944589664548, "grad_norm": 134.03599548339844, "learning_rate": 4.275995681013745e-07, "loss": 28.8227, "step": 440330 }, { "epoch": 0.8895146595991387, "grad_norm": 473.7370910644531, "learning_rate": 4.2745833641219317e-07, "loss": 16.5285, "step": 440340 }, { "epoch": 0.8895348602318225, "grad_norm": 75.65824127197266, "learning_rate": 4.273171270090254e-07, "loss": 8.0494, "step": 440350 }, { "epoch": 0.8895550608645063, "grad_norm": 310.4652404785156, "learning_rate": 4.271759398925601e-07, "loss": 14.2664, "step": 440360 }, { "epoch": 0.8895752614971901, "grad_norm": 300.664794921875, "learning_rate": 4.270347750634846e-07, "loss": 14.1265, "step": 440370 }, { "epoch": 0.889595462129874, "grad_norm": 379.99761962890625, "learning_rate": 4.2689363252248595e-07, "loss": 19.6016, "step": 440380 }, { "epoch": 0.8896156627625578, "grad_norm": 148.29335021972656, "learning_rate": 4.2675251227025315e-07, "loss": 17.414, "step": 440390 }, { "epoch": 0.8896358633952416, "grad_norm": 300.0272521972656, "learning_rate": 4.266114143074751e-07, "loss": 6.6113, "step": 440400 }, { "epoch": 0.8896560640279254, "grad_norm": 137.89877319335938, "learning_rate": 4.264703386348384e-07, "loss": 9.8072, "step": 440410 }, { "epoch": 0.8896762646606091, "grad_norm": 168.51852416992188, "learning_rate": 4.263292852530293e-07, "loss": 21.7177, "step": 440420 }, { "epoch": 0.8896964652932929, "grad_norm": 107.62776184082031, "learning_rate": 4.261882541627377e-07, "loss": 7.3128, "step": 440430 }, { "epoch": 0.8897166659259768, "grad_norm": 332.1779479980469, "learning_rate": 4.260472453646497e-07, "loss": 27.318, "step": 440440 }, { "epoch": 0.8897368665586606, "grad_norm": 286.0569152832031, "learning_rate": 4.2590625885945205e-07, "loss": 17.3097, "step": 440450 }, { "epoch": 0.8897570671913444, "grad_norm": 243.79681396484375, "learning_rate": 4.25765294647833e-07, "loss": 28.8181, "step": 440460 }, { "epoch": 0.8897772678240282, "grad_norm": 461.97052001953125, "learning_rate": 4.256243527304782e-07, "loss": 17.8837, "step": 440470 }, { "epoch": 0.889797468456712, "grad_norm": 232.52359008789062, "learning_rate": 4.2548343310807704e-07, "loss": 10.1855, "step": 440480 }, { "epoch": 0.8898176690893959, "grad_norm": 423.13519287109375, "learning_rate": 4.25342535781314e-07, "loss": 18.6517, "step": 440490 }, { "epoch": 0.8898378697220797, "grad_norm": 238.52865600585938, "learning_rate": 4.2520166075087635e-07, "loss": 14.1518, "step": 440500 }, { "epoch": 0.8898580703547635, "grad_norm": 369.58734130859375, "learning_rate": 4.250608080174512e-07, "loss": 19.9182, "step": 440510 }, { "epoch": 0.8898782709874473, "grad_norm": 257.09002685546875, "learning_rate": 4.249199775817242e-07, "loss": 15.1414, "step": 440520 }, { "epoch": 0.8898984716201311, "grad_norm": 66.41230773925781, "learning_rate": 4.247791694443837e-07, "loss": 14.5972, "step": 440530 }, { "epoch": 0.889918672252815, "grad_norm": 563.6635131835938, "learning_rate": 4.24638383606113e-07, "loss": 22.6768, "step": 440540 }, { "epoch": 0.8899388728854988, "grad_norm": 479.9045104980469, "learning_rate": 4.24497620067601e-07, "loss": 38.3795, "step": 440550 }, { "epoch": 0.8899590735181826, "grad_norm": 494.77960205078125, "learning_rate": 4.2435687882953327e-07, "loss": 17.7069, "step": 440560 }, { "epoch": 0.8899792741508664, "grad_norm": 126.67586517333984, "learning_rate": 4.242161598925937e-07, "loss": 16.1657, "step": 440570 }, { "epoch": 0.8899994747835502, "grad_norm": 265.52288818359375, "learning_rate": 4.240754632574706e-07, "loss": 9.9768, "step": 440580 }, { "epoch": 0.8900196754162341, "grad_norm": 64.7813491821289, "learning_rate": 4.239347889248485e-07, "loss": 13.0563, "step": 440590 }, { "epoch": 0.8900398760489179, "grad_norm": 128.74452209472656, "learning_rate": 4.237941368954124e-07, "loss": 22.1379, "step": 440600 }, { "epoch": 0.8900600766816017, "grad_norm": 9.655394554138184, "learning_rate": 4.236535071698489e-07, "loss": 16.6747, "step": 440610 }, { "epoch": 0.8900802773142855, "grad_norm": 334.9057922363281, "learning_rate": 4.2351289974884467e-07, "loss": 25.0396, "step": 440620 }, { "epoch": 0.8901004779469693, "grad_norm": 144.74742126464844, "learning_rate": 4.2337231463308147e-07, "loss": 16.4383, "step": 440630 }, { "epoch": 0.8901206785796532, "grad_norm": 564.4386596679688, "learning_rate": 4.2323175182324706e-07, "loss": 27.9278, "step": 440640 }, { "epoch": 0.890140879212337, "grad_norm": 235.91358947753906, "learning_rate": 4.2309121132002695e-07, "loss": 12.3268, "step": 440650 }, { "epoch": 0.8901610798450208, "grad_norm": 32.54255294799805, "learning_rate": 4.2295069312410455e-07, "loss": 14.3396, "step": 440660 }, { "epoch": 0.8901812804777045, "grad_norm": 18.373422622680664, "learning_rate": 4.228101972361648e-07, "loss": 6.3706, "step": 440670 }, { "epoch": 0.8902014811103883, "grad_norm": 241.1278533935547, "learning_rate": 4.226697236568933e-07, "loss": 16.4846, "step": 440680 }, { "epoch": 0.8902216817430721, "grad_norm": 343.79498291015625, "learning_rate": 4.225292723869762e-07, "loss": 19.9639, "step": 440690 }, { "epoch": 0.890241882375756, "grad_norm": 473.7337951660156, "learning_rate": 4.2238884342709397e-07, "loss": 24.4123, "step": 440700 }, { "epoch": 0.8902620830084398, "grad_norm": 259.67449951171875, "learning_rate": 4.222484367779334e-07, "loss": 35.9583, "step": 440710 }, { "epoch": 0.8902822836411236, "grad_norm": 284.6575927734375, "learning_rate": 4.2210805244017993e-07, "loss": 19.8381, "step": 440720 }, { "epoch": 0.8903024842738074, "grad_norm": 339.5075988769531, "learning_rate": 4.219676904145165e-07, "loss": 31.5925, "step": 440730 }, { "epoch": 0.8903226849064912, "grad_norm": 111.9180908203125, "learning_rate": 4.218273507016263e-07, "loss": 33.4121, "step": 440740 }, { "epoch": 0.8903428855391751, "grad_norm": 363.7783203125, "learning_rate": 4.2168703330219494e-07, "loss": 18.3535, "step": 440750 }, { "epoch": 0.8903630861718589, "grad_norm": 288.3460998535156, "learning_rate": 4.2154673821690585e-07, "loss": 22.4761, "step": 440760 }, { "epoch": 0.8903832868045427, "grad_norm": 241.97601318359375, "learning_rate": 4.2140646544644227e-07, "loss": 16.7243, "step": 440770 }, { "epoch": 0.8904034874372265, "grad_norm": 361.3790588378906, "learning_rate": 4.212662149914887e-07, "loss": 13.1733, "step": 440780 }, { "epoch": 0.8904236880699103, "grad_norm": 166.67642211914062, "learning_rate": 4.211259868527273e-07, "loss": 28.0201, "step": 440790 }, { "epoch": 0.8904438887025942, "grad_norm": 495.0302429199219, "learning_rate": 4.2098578103084376e-07, "loss": 21.8701, "step": 440800 }, { "epoch": 0.890464089335278, "grad_norm": 33.01707458496094, "learning_rate": 4.208455975265191e-07, "loss": 18.4406, "step": 440810 }, { "epoch": 0.8904842899679618, "grad_norm": 126.27222442626953, "learning_rate": 4.2070543634043834e-07, "loss": 9.5508, "step": 440820 }, { "epoch": 0.8905044906006456, "grad_norm": 429.9417419433594, "learning_rate": 4.205652974732838e-07, "loss": 18.9119, "step": 440830 }, { "epoch": 0.8905246912333294, "grad_norm": 257.1548767089844, "learning_rate": 4.2042518092573814e-07, "loss": 25.3358, "step": 440840 }, { "epoch": 0.8905448918660133, "grad_norm": 124.42353820800781, "learning_rate": 4.202850866984853e-07, "loss": 20.7962, "step": 440850 }, { "epoch": 0.8905650924986971, "grad_norm": 228.07293701171875, "learning_rate": 4.201450147922065e-07, "loss": 14.201, "step": 440860 }, { "epoch": 0.8905852931313809, "grad_norm": 159.50985717773438, "learning_rate": 4.200049652075866e-07, "loss": 15.3572, "step": 440870 }, { "epoch": 0.8906054937640647, "grad_norm": 300.2088317871094, "learning_rate": 4.198649379453068e-07, "loss": 12.1017, "step": 440880 }, { "epoch": 0.8906256943967485, "grad_norm": 347.2508239746094, "learning_rate": 4.1972493300604877e-07, "loss": 20.4435, "step": 440890 }, { "epoch": 0.8906458950294324, "grad_norm": 177.9487762451172, "learning_rate": 4.195849503904975e-07, "loss": 12.7254, "step": 440900 }, { "epoch": 0.8906660956621162, "grad_norm": 133.84117126464844, "learning_rate": 4.1944499009933303e-07, "loss": 8.6773, "step": 440910 }, { "epoch": 0.8906862962948, "grad_norm": 56.217750549316406, "learning_rate": 4.19305052133237e-07, "loss": 8.5564, "step": 440920 }, { "epoch": 0.8907064969274837, "grad_norm": 324.2454528808594, "learning_rate": 4.1916513649289334e-07, "loss": 17.785, "step": 440930 }, { "epoch": 0.8907266975601675, "grad_norm": 248.8067169189453, "learning_rate": 4.1902524317898427e-07, "loss": 17.8755, "step": 440940 }, { "epoch": 0.8907468981928514, "grad_norm": 194.91070556640625, "learning_rate": 4.188853721921893e-07, "loss": 18.099, "step": 440950 }, { "epoch": 0.8907670988255352, "grad_norm": 371.50653076171875, "learning_rate": 4.1874552353319107e-07, "loss": 21.8642, "step": 440960 }, { "epoch": 0.890787299458219, "grad_norm": 440.4020690917969, "learning_rate": 4.186056972026725e-07, "loss": 38.5145, "step": 440970 }, { "epoch": 0.8908075000909028, "grad_norm": 276.4840087890625, "learning_rate": 4.1846589320131415e-07, "loss": 15.2821, "step": 440980 }, { "epoch": 0.8908277007235866, "grad_norm": 531.48828125, "learning_rate": 4.1832611152979655e-07, "loss": 20.814, "step": 440990 }, { "epoch": 0.8908479013562705, "grad_norm": 295.4017639160156, "learning_rate": 4.1818635218880186e-07, "loss": 18.2705, "step": 441000 }, { "epoch": 0.8908681019889543, "grad_norm": 51.23957061767578, "learning_rate": 4.1804661517901244e-07, "loss": 41.4554, "step": 441010 }, { "epoch": 0.8908883026216381, "grad_norm": 0.0, "learning_rate": 4.179069005011066e-07, "loss": 11.2908, "step": 441020 }, { "epoch": 0.8909085032543219, "grad_norm": 697.160400390625, "learning_rate": 4.177672081557671e-07, "loss": 12.2288, "step": 441030 }, { "epoch": 0.8909287038870057, "grad_norm": 356.4187316894531, "learning_rate": 4.176275381436751e-07, "loss": 10.0519, "step": 441040 }, { "epoch": 0.8909489045196896, "grad_norm": 22.14446449279785, "learning_rate": 4.1748789046551055e-07, "loss": 21.3258, "step": 441050 }, { "epoch": 0.8909691051523734, "grad_norm": 172.09939575195312, "learning_rate": 4.173482651219535e-07, "loss": 13.7005, "step": 441060 }, { "epoch": 0.8909893057850572, "grad_norm": 384.9477233886719, "learning_rate": 4.1720866211368615e-07, "loss": 21.885, "step": 441070 }, { "epoch": 0.891009506417741, "grad_norm": 295.076171875, "learning_rate": 4.1706908144138804e-07, "loss": 12.893, "step": 441080 }, { "epoch": 0.8910297070504248, "grad_norm": 280.5708923339844, "learning_rate": 4.1692952310573854e-07, "loss": 21.7029, "step": 441090 }, { "epoch": 0.8910499076831087, "grad_norm": 366.3118591308594, "learning_rate": 4.1678998710741936e-07, "loss": 19.4737, "step": 441100 }, { "epoch": 0.8910701083157925, "grad_norm": 669.0502319335938, "learning_rate": 4.1665047344710887e-07, "loss": 26.9611, "step": 441110 }, { "epoch": 0.8910903089484763, "grad_norm": 194.59312438964844, "learning_rate": 4.1651098212548923e-07, "loss": 11.9774, "step": 441120 }, { "epoch": 0.8911105095811601, "grad_norm": 146.01962280273438, "learning_rate": 4.163715131432383e-07, "loss": 10.5464, "step": 441130 }, { "epoch": 0.8911307102138439, "grad_norm": 301.42669677734375, "learning_rate": 4.162320665010372e-07, "loss": 20.7333, "step": 441140 }, { "epoch": 0.8911509108465278, "grad_norm": 5.441578388214111, "learning_rate": 4.160926421995648e-07, "loss": 17.515, "step": 441150 }, { "epoch": 0.8911711114792116, "grad_norm": 526.412109375, "learning_rate": 4.159532402395011e-07, "loss": 27.2499, "step": 441160 }, { "epoch": 0.8911913121118954, "grad_norm": 1.964666485786438, "learning_rate": 4.158138606215256e-07, "loss": 8.5801, "step": 441170 }, { "epoch": 0.8912115127445792, "grad_norm": 0.0, "learning_rate": 4.1567450334631667e-07, "loss": 21.3488, "step": 441180 }, { "epoch": 0.8912317133772629, "grad_norm": 707.3838500976562, "learning_rate": 4.155351684145548e-07, "loss": 16.148, "step": 441190 }, { "epoch": 0.8912519140099467, "grad_norm": 286.784912109375, "learning_rate": 4.153958558269189e-07, "loss": 14.0193, "step": 441200 }, { "epoch": 0.8912721146426306, "grad_norm": 188.17913818359375, "learning_rate": 4.1525656558408624e-07, "loss": 11.3886, "step": 441210 }, { "epoch": 0.8912923152753144, "grad_norm": 395.32012939453125, "learning_rate": 4.151172976867374e-07, "loss": 14.509, "step": 441220 }, { "epoch": 0.8913125159079982, "grad_norm": 268.0252380371094, "learning_rate": 4.149780521355523e-07, "loss": 13.7314, "step": 441230 }, { "epoch": 0.891332716540682, "grad_norm": 167.34649658203125, "learning_rate": 4.1483882893120606e-07, "loss": 11.0642, "step": 441240 }, { "epoch": 0.8913529171733658, "grad_norm": 282.3281555175781, "learning_rate": 4.146996280743798e-07, "loss": 24.9861, "step": 441250 }, { "epoch": 0.8913731178060497, "grad_norm": 104.86113739013672, "learning_rate": 4.145604495657518e-07, "loss": 4.6084, "step": 441260 }, { "epoch": 0.8913933184387335, "grad_norm": 35.9450569152832, "learning_rate": 4.144212934060005e-07, "loss": 17.0768, "step": 441270 }, { "epoch": 0.8914135190714173, "grad_norm": 32.53382873535156, "learning_rate": 4.142821595958024e-07, "loss": 20.1245, "step": 441280 }, { "epoch": 0.8914337197041011, "grad_norm": 225.77322387695312, "learning_rate": 4.1414304813583663e-07, "loss": 25.8087, "step": 441290 }, { "epoch": 0.8914539203367849, "grad_norm": 74.63481903076172, "learning_rate": 4.140039590267836e-07, "loss": 21.3402, "step": 441300 }, { "epoch": 0.8914741209694688, "grad_norm": 31.45956039428711, "learning_rate": 4.1386489226931723e-07, "loss": 19.7698, "step": 441310 }, { "epoch": 0.8914943216021526, "grad_norm": 160.2866668701172, "learning_rate": 4.137258478641176e-07, "loss": 19.4852, "step": 441320 }, { "epoch": 0.8915145222348364, "grad_norm": 261.996826171875, "learning_rate": 4.135868258118625e-07, "loss": 12.7521, "step": 441330 }, { "epoch": 0.8915347228675202, "grad_norm": 41.85097885131836, "learning_rate": 4.1344782611322855e-07, "loss": 9.3961, "step": 441340 }, { "epoch": 0.891554923500204, "grad_norm": 211.323974609375, "learning_rate": 4.13308848768893e-07, "loss": 19.9661, "step": 441350 }, { "epoch": 0.8915751241328879, "grad_norm": 190.90200805664062, "learning_rate": 4.1316989377953477e-07, "loss": 8.9747, "step": 441360 }, { "epoch": 0.8915953247655717, "grad_norm": 70.90047454833984, "learning_rate": 4.1303096114583e-07, "loss": 15.8222, "step": 441370 }, { "epoch": 0.8916155253982555, "grad_norm": 345.0062561035156, "learning_rate": 4.128920508684553e-07, "loss": 16.628, "step": 441380 }, { "epoch": 0.8916357260309393, "grad_norm": 280.53509521484375, "learning_rate": 4.127531629480891e-07, "loss": 16.726, "step": 441390 }, { "epoch": 0.8916559266636231, "grad_norm": 578.4691772460938, "learning_rate": 4.1261429738540694e-07, "loss": 30.5171, "step": 441400 }, { "epoch": 0.891676127296307, "grad_norm": 389.1383056640625, "learning_rate": 4.1247545418108715e-07, "loss": 20.0731, "step": 441410 }, { "epoch": 0.8916963279289908, "grad_norm": 173.65341186523438, "learning_rate": 4.1233663333580474e-07, "loss": 12.1688, "step": 441420 }, { "epoch": 0.8917165285616746, "grad_norm": 350.42218017578125, "learning_rate": 4.121978348502381e-07, "loss": 27.4812, "step": 441430 }, { "epoch": 0.8917367291943583, "grad_norm": 235.80104064941406, "learning_rate": 4.1205905872506224e-07, "loss": 18.2688, "step": 441440 }, { "epoch": 0.8917569298270421, "grad_norm": 396.60870361328125, "learning_rate": 4.119203049609538e-07, "loss": 17.07, "step": 441450 }, { "epoch": 0.891777130459726, "grad_norm": 4.170270919799805, "learning_rate": 4.1178157355859005e-07, "loss": 13.0384, "step": 441460 }, { "epoch": 0.8917973310924098, "grad_norm": 277.4400329589844, "learning_rate": 4.1164286451864543e-07, "loss": 15.3368, "step": 441470 }, { "epoch": 0.8918175317250936, "grad_norm": 169.8954620361328, "learning_rate": 4.1150417784179776e-07, "loss": 20.122, "step": 441480 }, { "epoch": 0.8918377323577774, "grad_norm": 595.0029296875, "learning_rate": 4.1136551352872256e-07, "loss": 17.5802, "step": 441490 }, { "epoch": 0.8918579329904612, "grad_norm": 519.4209594726562, "learning_rate": 4.112268715800943e-07, "loss": 23.3711, "step": 441500 }, { "epoch": 0.891878133623145, "grad_norm": 114.85365295410156, "learning_rate": 4.1108825199659087e-07, "loss": 16.6366, "step": 441510 }, { "epoch": 0.8918983342558289, "grad_norm": 384.9375915527344, "learning_rate": 4.1094965477888605e-07, "loss": 19.2176, "step": 441520 }, { "epoch": 0.8919185348885127, "grad_norm": 127.04666900634766, "learning_rate": 4.1081107992765546e-07, "loss": 16.5187, "step": 441530 }, { "epoch": 0.8919387355211965, "grad_norm": 259.2323303222656, "learning_rate": 4.1067252744357524e-07, "loss": 16.7474, "step": 441540 }, { "epoch": 0.8919589361538803, "grad_norm": 260.97119140625, "learning_rate": 4.10533997327322e-07, "loss": 9.9041, "step": 441550 }, { "epoch": 0.8919791367865642, "grad_norm": 232.7197265625, "learning_rate": 4.1039548957956807e-07, "loss": 15.5778, "step": 441560 }, { "epoch": 0.891999337419248, "grad_norm": 57.10182571411133, "learning_rate": 4.102570042009896e-07, "loss": 18.1468, "step": 441570 }, { "epoch": 0.8920195380519318, "grad_norm": 248.76600646972656, "learning_rate": 4.101185411922626e-07, "loss": 14.0383, "step": 441580 }, { "epoch": 0.8920397386846156, "grad_norm": 70.76747131347656, "learning_rate": 4.099801005540616e-07, "loss": 15.9473, "step": 441590 }, { "epoch": 0.8920599393172994, "grad_norm": 300.2578430175781, "learning_rate": 4.0984168228705934e-07, "loss": 14.8233, "step": 441600 }, { "epoch": 0.8920801399499833, "grad_norm": 288.84088134765625, "learning_rate": 4.0970328639193255e-07, "loss": 17.224, "step": 441610 }, { "epoch": 0.8921003405826671, "grad_norm": 341.37408447265625, "learning_rate": 4.0956491286935687e-07, "loss": 42.7241, "step": 441620 }, { "epoch": 0.8921205412153509, "grad_norm": 716.47509765625, "learning_rate": 4.0942656172000273e-07, "loss": 18.3528, "step": 441630 }, { "epoch": 0.8921407418480347, "grad_norm": 211.32870483398438, "learning_rate": 4.0928823294454743e-07, "loss": 23.5649, "step": 441640 }, { "epoch": 0.8921609424807185, "grad_norm": 467.16729736328125, "learning_rate": 4.091499265436649e-07, "loss": 15.5355, "step": 441650 }, { "epoch": 0.8921811431134024, "grad_norm": 570.491455078125, "learning_rate": 4.0901164251802905e-07, "loss": 17.3162, "step": 441660 }, { "epoch": 0.8922013437460862, "grad_norm": 166.6530303955078, "learning_rate": 4.088733808683132e-07, "loss": 19.6374, "step": 441670 }, { "epoch": 0.89222154437877, "grad_norm": 326.330322265625, "learning_rate": 4.087351415951918e-07, "loss": 26.1327, "step": 441680 }, { "epoch": 0.8922417450114538, "grad_norm": 527.7655029296875, "learning_rate": 4.085969246993388e-07, "loss": 20.6709, "step": 441690 }, { "epoch": 0.8922619456441375, "grad_norm": 493.05120849609375, "learning_rate": 4.084587301814269e-07, "loss": 17.5031, "step": 441700 }, { "epoch": 0.8922821462768213, "grad_norm": 280.71697998046875, "learning_rate": 4.0832055804212957e-07, "loss": 12.9486, "step": 441710 }, { "epoch": 0.8923023469095052, "grad_norm": 18.699125289916992, "learning_rate": 4.081824082821223e-07, "loss": 11.9697, "step": 441720 }, { "epoch": 0.892322547542189, "grad_norm": 311.6136169433594, "learning_rate": 4.080442809020774e-07, "loss": 14.3622, "step": 441730 }, { "epoch": 0.8923427481748728, "grad_norm": 170.95773315429688, "learning_rate": 4.079061759026659e-07, "loss": 18.422, "step": 441740 }, { "epoch": 0.8923629488075566, "grad_norm": 278.82080078125, "learning_rate": 4.0776809328456455e-07, "loss": 12.0484, "step": 441750 }, { "epoch": 0.8923831494402404, "grad_norm": 412.0937805175781, "learning_rate": 4.0763003304844395e-07, "loss": 12.5635, "step": 441760 }, { "epoch": 0.8924033500729243, "grad_norm": 340.64300537109375, "learning_rate": 4.0749199519497686e-07, "loss": 13.7271, "step": 441770 }, { "epoch": 0.8924235507056081, "grad_norm": 208.55242919921875, "learning_rate": 4.073539797248377e-07, "loss": 21.6009, "step": 441780 }, { "epoch": 0.8924437513382919, "grad_norm": 89.96759033203125, "learning_rate": 4.0721598663869764e-07, "loss": 18.0797, "step": 441790 }, { "epoch": 0.8924639519709757, "grad_norm": 251.16778564453125, "learning_rate": 4.0707801593723006e-07, "loss": 32.1788, "step": 441800 }, { "epoch": 0.8924841526036595, "grad_norm": 251.69451904296875, "learning_rate": 4.069400676211077e-07, "loss": 20.4469, "step": 441810 }, { "epoch": 0.8925043532363434, "grad_norm": 183.2567138671875, "learning_rate": 4.0680214169100117e-07, "loss": 15.515, "step": 441820 }, { "epoch": 0.8925245538690272, "grad_norm": 547.2921142578125, "learning_rate": 4.0666423814758436e-07, "loss": 30.1629, "step": 441830 }, { "epoch": 0.892544754501711, "grad_norm": 125.34368896484375, "learning_rate": 4.065263569915301e-07, "loss": 16.8468, "step": 441840 }, { "epoch": 0.8925649551343948, "grad_norm": 188.44284057617188, "learning_rate": 4.063884982235078e-07, "loss": 21.6664, "step": 441850 }, { "epoch": 0.8925851557670786, "grad_norm": 150.7265167236328, "learning_rate": 4.062506618441908e-07, "loss": 14.6142, "step": 441860 }, { "epoch": 0.8926053563997625, "grad_norm": 239.54351806640625, "learning_rate": 4.06112847854252e-07, "loss": 30.2596, "step": 441870 }, { "epoch": 0.8926255570324463, "grad_norm": 310.3310852050781, "learning_rate": 4.059750562543618e-07, "loss": 41.8212, "step": 441880 }, { "epoch": 0.8926457576651301, "grad_norm": 230.776611328125, "learning_rate": 4.05837287045191e-07, "loss": 12.8379, "step": 441890 }, { "epoch": 0.8926659582978139, "grad_norm": 278.3397521972656, "learning_rate": 4.056995402274122e-07, "loss": 21.7668, "step": 441900 }, { "epoch": 0.8926861589304977, "grad_norm": 269.46240234375, "learning_rate": 4.0556181580169885e-07, "loss": 15.8528, "step": 441910 }, { "epoch": 0.8927063595631816, "grad_norm": 320.7786865234375, "learning_rate": 4.054241137687176e-07, "loss": 22.1815, "step": 441920 }, { "epoch": 0.8927265601958654, "grad_norm": 91.73960876464844, "learning_rate": 4.052864341291418e-07, "loss": 10.3726, "step": 441930 }, { "epoch": 0.8927467608285492, "grad_norm": 195.0965118408203, "learning_rate": 4.051487768836443e-07, "loss": 23.2232, "step": 441940 }, { "epoch": 0.8927669614612329, "grad_norm": 161.50064086914062, "learning_rate": 4.0501114203289395e-07, "loss": 15.2331, "step": 441950 }, { "epoch": 0.8927871620939167, "grad_norm": 282.3250427246094, "learning_rate": 4.048735295775608e-07, "loss": 17.1713, "step": 441960 }, { "epoch": 0.8928073627266006, "grad_norm": 280.8865661621094, "learning_rate": 4.0473593951831814e-07, "loss": 11.15, "step": 441970 }, { "epoch": 0.8928275633592844, "grad_norm": 420.7433776855469, "learning_rate": 4.0459837185583497e-07, "loss": 10.429, "step": 441980 }, { "epoch": 0.8928477639919682, "grad_norm": 452.8355712890625, "learning_rate": 4.044608265907807e-07, "loss": 26.0573, "step": 441990 }, { "epoch": 0.892867964624652, "grad_norm": 143.9297332763672, "learning_rate": 4.043233037238281e-07, "loss": 18.3739, "step": 442000 }, { "epoch": 0.8928881652573358, "grad_norm": 476.40283203125, "learning_rate": 4.041858032556456e-07, "loss": 18.2946, "step": 442010 }, { "epoch": 0.8929083658900197, "grad_norm": 242.421630859375, "learning_rate": 4.040483251869054e-07, "loss": 13.0868, "step": 442020 }, { "epoch": 0.8929285665227035, "grad_norm": 52.56117630004883, "learning_rate": 4.0391086951827474e-07, "loss": 31.286, "step": 442030 }, { "epoch": 0.8929487671553873, "grad_norm": 228.5247344970703, "learning_rate": 4.0377343625042587e-07, "loss": 13.4995, "step": 442040 }, { "epoch": 0.8929689677880711, "grad_norm": 114.69009399414062, "learning_rate": 4.0363602538402823e-07, "loss": 9.915, "step": 442050 }, { "epoch": 0.8929891684207549, "grad_norm": 537.7235107421875, "learning_rate": 4.034986369197502e-07, "loss": 26.735, "step": 442060 }, { "epoch": 0.8930093690534388, "grad_norm": 307.6634826660156, "learning_rate": 4.0336127085826294e-07, "loss": 23.8693, "step": 442070 }, { "epoch": 0.8930295696861226, "grad_norm": 304.4604187011719, "learning_rate": 4.032239272002347e-07, "loss": 13.2142, "step": 442080 }, { "epoch": 0.8930497703188064, "grad_norm": 394.4139099121094, "learning_rate": 4.030866059463362e-07, "loss": 25.4436, "step": 442090 }, { "epoch": 0.8930699709514902, "grad_norm": 234.47674560546875, "learning_rate": 4.029493070972362e-07, "loss": 12.9756, "step": 442100 }, { "epoch": 0.893090171584174, "grad_norm": 197.06240844726562, "learning_rate": 4.0281203065360265e-07, "loss": 16.0591, "step": 442110 }, { "epoch": 0.8931103722168579, "grad_norm": 34.24604415893555, "learning_rate": 4.026747766161071e-07, "loss": 16.2573, "step": 442120 }, { "epoch": 0.8931305728495417, "grad_norm": 9.292876243591309, "learning_rate": 4.025375449854163e-07, "loss": 6.0698, "step": 442130 }, { "epoch": 0.8931507734822255, "grad_norm": 363.0008544921875, "learning_rate": 4.0240033576219974e-07, "loss": 14.518, "step": 442140 }, { "epoch": 0.8931709741149093, "grad_norm": 394.5640869140625, "learning_rate": 4.022631489471257e-07, "loss": 20.8416, "step": 442150 }, { "epoch": 0.8931911747475931, "grad_norm": 129.547607421875, "learning_rate": 4.0212598454086596e-07, "loss": 17.2825, "step": 442160 }, { "epoch": 0.893211375380277, "grad_norm": 305.1719970703125, "learning_rate": 4.019888425440838e-07, "loss": 12.5452, "step": 442170 }, { "epoch": 0.8932315760129608, "grad_norm": 335.4129943847656, "learning_rate": 4.018517229574509e-07, "loss": 11.5626, "step": 442180 }, { "epoch": 0.8932517766456446, "grad_norm": 311.81048583984375, "learning_rate": 4.0171462578163624e-07, "loss": 17.4888, "step": 442190 }, { "epoch": 0.8932719772783284, "grad_norm": 236.4404296875, "learning_rate": 4.0157755101730645e-07, "loss": 14.4155, "step": 442200 }, { "epoch": 0.8932921779110121, "grad_norm": 326.996337890625, "learning_rate": 4.014404986651288e-07, "loss": 18.2013, "step": 442210 }, { "epoch": 0.8933123785436959, "grad_norm": 352.307373046875, "learning_rate": 4.013034687257727e-07, "loss": 22.2774, "step": 442220 }, { "epoch": 0.8933325791763798, "grad_norm": 277.7940673828125, "learning_rate": 4.011664611999072e-07, "loss": 30.8169, "step": 442230 }, { "epoch": 0.8933527798090636, "grad_norm": 538.7009887695312, "learning_rate": 4.010294760881972e-07, "loss": 24.6857, "step": 442240 }, { "epoch": 0.8933729804417474, "grad_norm": 176.78065490722656, "learning_rate": 4.0089251339131164e-07, "loss": 20.1488, "step": 442250 }, { "epoch": 0.8933931810744312, "grad_norm": 3.870227336883545, "learning_rate": 4.0075557310991886e-07, "loss": 17.3812, "step": 442260 }, { "epoch": 0.893413381707115, "grad_norm": 439.2266845703125, "learning_rate": 4.006186552446861e-07, "loss": 16.0918, "step": 442270 }, { "epoch": 0.8934335823397989, "grad_norm": 258.6006774902344, "learning_rate": 4.00481759796279e-07, "loss": 21.6943, "step": 442280 }, { "epoch": 0.8934537829724827, "grad_norm": 341.0641174316406, "learning_rate": 4.003448867653664e-07, "loss": 18.1336, "step": 442290 }, { "epoch": 0.8934739836051665, "grad_norm": 437.7100830078125, "learning_rate": 4.002080361526156e-07, "loss": 15.1572, "step": 442300 }, { "epoch": 0.8934941842378503, "grad_norm": 347.60882568359375, "learning_rate": 4.000712079586916e-07, "loss": 16.401, "step": 442310 }, { "epoch": 0.8935143848705341, "grad_norm": 480.7490539550781, "learning_rate": 3.999344021842627e-07, "loss": 24.8384, "step": 442320 }, { "epoch": 0.893534585503218, "grad_norm": 192.32810974121094, "learning_rate": 3.997976188299968e-07, "loss": 18.4758, "step": 442330 }, { "epoch": 0.8935547861359018, "grad_norm": 847.3984375, "learning_rate": 3.996608578965594e-07, "loss": 22.622, "step": 442340 }, { "epoch": 0.8935749867685856, "grad_norm": 223.96299743652344, "learning_rate": 3.9952411938461557e-07, "loss": 18.4748, "step": 442350 }, { "epoch": 0.8935951874012694, "grad_norm": 212.14646911621094, "learning_rate": 3.9938740329483473e-07, "loss": 18.2735, "step": 442360 }, { "epoch": 0.8936153880339532, "grad_norm": 359.57940673828125, "learning_rate": 3.992507096278814e-07, "loss": 25.5224, "step": 442370 }, { "epoch": 0.8936355886666371, "grad_norm": 271.06353759765625, "learning_rate": 3.991140383844211e-07, "loss": 9.497, "step": 442380 }, { "epoch": 0.8936557892993209, "grad_norm": 422.99774169921875, "learning_rate": 3.989773895651222e-07, "loss": 16.8614, "step": 442390 }, { "epoch": 0.8936759899320047, "grad_norm": 337.3374938964844, "learning_rate": 3.9884076317064813e-07, "loss": 20.38, "step": 442400 }, { "epoch": 0.8936961905646885, "grad_norm": 299.5712890625, "learning_rate": 3.9870415920166715e-07, "loss": 16.9914, "step": 442410 }, { "epoch": 0.8937163911973723, "grad_norm": 232.5764617919922, "learning_rate": 3.9856757765884436e-07, "loss": 10.033, "step": 442420 }, { "epoch": 0.8937365918300562, "grad_norm": 7.127708435058594, "learning_rate": 3.984310185428442e-07, "loss": 31.2197, "step": 442430 }, { "epoch": 0.89375679246274, "grad_norm": 354.21075439453125, "learning_rate": 3.9829448185433385e-07, "loss": 26.9723, "step": 442440 }, { "epoch": 0.8937769930954238, "grad_norm": 299.904541015625, "learning_rate": 3.9815796759397783e-07, "loss": 8.4131, "step": 442450 }, { "epoch": 0.8937971937281076, "grad_norm": 0.0, "learning_rate": 3.980214757624412e-07, "loss": 18.6585, "step": 442460 }, { "epoch": 0.8938173943607913, "grad_norm": 162.68905639648438, "learning_rate": 3.978850063603895e-07, "loss": 16.8996, "step": 442470 }, { "epoch": 0.8938375949934751, "grad_norm": 384.43865966796875, "learning_rate": 3.977485593884889e-07, "loss": 26.5161, "step": 442480 }, { "epoch": 0.893857795626159, "grad_norm": 272.01507568359375, "learning_rate": 3.9761213484740435e-07, "loss": 19.0648, "step": 442490 }, { "epoch": 0.8938779962588428, "grad_norm": 419.2401123046875, "learning_rate": 3.9747573273779816e-07, "loss": 14.2595, "step": 442500 }, { "epoch": 0.8938981968915266, "grad_norm": 171.304443359375, "learning_rate": 3.9733935306033756e-07, "loss": 16.8611, "step": 442510 }, { "epoch": 0.8939183975242104, "grad_norm": 401.70294189453125, "learning_rate": 3.9720299581568865e-07, "loss": 23.067, "step": 442520 }, { "epoch": 0.8939385981568942, "grad_norm": 301.2441711425781, "learning_rate": 3.970666610045121e-07, "loss": 15.8445, "step": 442530 }, { "epoch": 0.8939587987895781, "grad_norm": 430.2862548828125, "learning_rate": 3.969303486274745e-07, "loss": 14.9746, "step": 442540 }, { "epoch": 0.8939789994222619, "grad_norm": 252.0267791748047, "learning_rate": 3.967940586852409e-07, "loss": 11.8626, "step": 442550 }, { "epoch": 0.8939992000549457, "grad_norm": 84.86177062988281, "learning_rate": 3.966577911784747e-07, "loss": 19.9149, "step": 442560 }, { "epoch": 0.8940194006876295, "grad_norm": 500.92437744140625, "learning_rate": 3.965215461078392e-07, "loss": 15.9517, "step": 442570 }, { "epoch": 0.8940396013203133, "grad_norm": 490.483642578125, "learning_rate": 3.963853234740006e-07, "loss": 20.9653, "step": 442580 }, { "epoch": 0.8940598019529972, "grad_norm": 330.5580749511719, "learning_rate": 3.962491232776211e-07, "loss": 18.0036, "step": 442590 }, { "epoch": 0.894080002585681, "grad_norm": 343.3367919921875, "learning_rate": 3.961129455193641e-07, "loss": 32.6233, "step": 442600 }, { "epoch": 0.8941002032183648, "grad_norm": 118.35514068603516, "learning_rate": 3.959767901998957e-07, "loss": 12.4153, "step": 442610 }, { "epoch": 0.8941204038510486, "grad_norm": 51.02582931518555, "learning_rate": 3.958406573198764e-07, "loss": 16.2508, "step": 442620 }, { "epoch": 0.8941406044837324, "grad_norm": 259.5372314453125, "learning_rate": 3.957045468799725e-07, "loss": 20.5187, "step": 442630 }, { "epoch": 0.8941608051164163, "grad_norm": 215.82168579101562, "learning_rate": 3.955684588808456e-07, "loss": 31.4842, "step": 442640 }, { "epoch": 0.8941810057491001, "grad_norm": 117.93922424316406, "learning_rate": 3.954323933231602e-07, "loss": 22.4197, "step": 442650 }, { "epoch": 0.8942012063817839, "grad_norm": 405.8056945800781, "learning_rate": 3.952963502075791e-07, "loss": 22.5982, "step": 442660 }, { "epoch": 0.8942214070144677, "grad_norm": 454.1044616699219, "learning_rate": 3.951603295347639e-07, "loss": 18.579, "step": 442670 }, { "epoch": 0.8942416076471515, "grad_norm": 3.49399995803833, "learning_rate": 3.9502433130537977e-07, "loss": 11.4103, "step": 442680 }, { "epoch": 0.8942618082798354, "grad_norm": 365.5223693847656, "learning_rate": 3.9488835552008773e-07, "loss": 18.178, "step": 442690 }, { "epoch": 0.8942820089125192, "grad_norm": 42.808685302734375, "learning_rate": 3.947524021795518e-07, "loss": 17.3722, "step": 442700 }, { "epoch": 0.894302209545203, "grad_norm": 162.291259765625, "learning_rate": 3.946164712844347e-07, "loss": 11.6861, "step": 442710 }, { "epoch": 0.8943224101778867, "grad_norm": 207.251220703125, "learning_rate": 3.9448056283539704e-07, "loss": 18.4137, "step": 442720 }, { "epoch": 0.8943426108105705, "grad_norm": 285.8092346191406, "learning_rate": 3.9434467683310327e-07, "loss": 12.0819, "step": 442730 }, { "epoch": 0.8943628114432544, "grad_norm": 236.7385711669922, "learning_rate": 3.942088132782157e-07, "loss": 17.1582, "step": 442740 }, { "epoch": 0.8943830120759382, "grad_norm": 283.231689453125, "learning_rate": 3.9407297217139427e-07, "loss": 14.0678, "step": 442750 }, { "epoch": 0.894403212708622, "grad_norm": 245.2840576171875, "learning_rate": 3.9393715351330243e-07, "loss": 14.1923, "step": 442760 }, { "epoch": 0.8944234133413058, "grad_norm": 167.3682861328125, "learning_rate": 3.9380135730460347e-07, "loss": 11.5076, "step": 442770 }, { "epoch": 0.8944436139739896, "grad_norm": 377.32257080078125, "learning_rate": 3.9366558354595797e-07, "loss": 24.7849, "step": 442780 }, { "epoch": 0.8944638146066735, "grad_norm": 106.98843383789062, "learning_rate": 3.935298322380271e-07, "loss": 9.0127, "step": 442790 }, { "epoch": 0.8944840152393573, "grad_norm": 496.2410583496094, "learning_rate": 3.9339410338147363e-07, "loss": 14.5842, "step": 442800 }, { "epoch": 0.8945042158720411, "grad_norm": 261.22564697265625, "learning_rate": 3.9325839697695877e-07, "loss": 11.0385, "step": 442810 }, { "epoch": 0.8945244165047249, "grad_norm": 474.54119873046875, "learning_rate": 3.931227130251425e-07, "loss": 16.6529, "step": 442820 }, { "epoch": 0.8945446171374087, "grad_norm": 47.422218322753906, "learning_rate": 3.929870515266876e-07, "loss": 20.1841, "step": 442830 }, { "epoch": 0.8945648177700926, "grad_norm": 98.60272216796875, "learning_rate": 3.928514124822569e-07, "loss": 16.4046, "step": 442840 }, { "epoch": 0.8945850184027764, "grad_norm": 25.00154685974121, "learning_rate": 3.9271579589250817e-07, "loss": 26.1975, "step": 442850 }, { "epoch": 0.8946052190354602, "grad_norm": 351.44476318359375, "learning_rate": 3.925802017581032e-07, "loss": 14.8405, "step": 442860 }, { "epoch": 0.894625419668144, "grad_norm": 233.4315643310547, "learning_rate": 3.924446300797052e-07, "loss": 11.589, "step": 442870 }, { "epoch": 0.8946456203008278, "grad_norm": 166.7718048095703, "learning_rate": 3.923090808579727e-07, "loss": 12.9134, "step": 442880 }, { "epoch": 0.8946658209335117, "grad_norm": 341.9676818847656, "learning_rate": 3.9217355409356614e-07, "loss": 26.3224, "step": 442890 }, { "epoch": 0.8946860215661955, "grad_norm": 256.1241455078125, "learning_rate": 3.920380497871473e-07, "loss": 14.5901, "step": 442900 }, { "epoch": 0.8947062221988793, "grad_norm": 338.17987060546875, "learning_rate": 3.9190256793937675e-07, "loss": 20.7812, "step": 442910 }, { "epoch": 0.8947264228315631, "grad_norm": 214.90966796875, "learning_rate": 3.9176710855091283e-07, "loss": 10.1591, "step": 442920 }, { "epoch": 0.8947466234642469, "grad_norm": 208.99169921875, "learning_rate": 3.916316716224172e-07, "loss": 17.3368, "step": 442930 }, { "epoch": 0.8947668240969308, "grad_norm": 226.1060791015625, "learning_rate": 3.9149625715455107e-07, "loss": 20.4121, "step": 442940 }, { "epoch": 0.8947870247296146, "grad_norm": 94.91813659667969, "learning_rate": 3.913608651479733e-07, "loss": 25.107, "step": 442950 }, { "epoch": 0.8948072253622984, "grad_norm": 754.3956909179688, "learning_rate": 3.912254956033423e-07, "loss": 20.8499, "step": 442960 }, { "epoch": 0.8948274259949822, "grad_norm": 310.0697021484375, "learning_rate": 3.9109014852132035e-07, "loss": 11.3727, "step": 442970 }, { "epoch": 0.8948476266276659, "grad_norm": 413.1166687011719, "learning_rate": 3.9095482390256624e-07, "loss": 16.3572, "step": 442980 }, { "epoch": 0.8948678272603497, "grad_norm": 380.8415832519531, "learning_rate": 3.908195217477384e-07, "loss": 18.5484, "step": 442990 }, { "epoch": 0.8948880278930336, "grad_norm": 485.34539794921875, "learning_rate": 3.90684242057498e-07, "loss": 32.9813, "step": 443000 }, { "epoch": 0.8949082285257174, "grad_norm": 279.1822814941406, "learning_rate": 3.9054898483250224e-07, "loss": 14.1602, "step": 443010 }, { "epoch": 0.8949284291584012, "grad_norm": 396.0739440917969, "learning_rate": 3.904137500734129e-07, "loss": 22.0797, "step": 443020 }, { "epoch": 0.894948629791085, "grad_norm": 409.1693115234375, "learning_rate": 3.902785377808882e-07, "loss": 26.4177, "step": 443030 }, { "epoch": 0.8949688304237688, "grad_norm": 165.26966857910156, "learning_rate": 3.901433479555855e-07, "loss": 19.7893, "step": 443040 }, { "epoch": 0.8949890310564527, "grad_norm": 278.30572509765625, "learning_rate": 3.9000818059816593e-07, "loss": 24.8908, "step": 443050 }, { "epoch": 0.8950092316891365, "grad_norm": 140.10414123535156, "learning_rate": 3.898730357092878e-07, "loss": 20.9508, "step": 443060 }, { "epoch": 0.8950294323218203, "grad_norm": 583.90380859375, "learning_rate": 3.8973791328960786e-07, "loss": 17.115, "step": 443070 }, { "epoch": 0.8950496329545041, "grad_norm": 178.4078369140625, "learning_rate": 3.8960281333978667e-07, "loss": 18.1718, "step": 443080 }, { "epoch": 0.895069833587188, "grad_norm": 96.53193664550781, "learning_rate": 3.894677358604826e-07, "loss": 13.6481, "step": 443090 }, { "epoch": 0.8950900342198718, "grad_norm": 390.45806884765625, "learning_rate": 3.89332680852354e-07, "loss": 17.0324, "step": 443100 }, { "epoch": 0.8951102348525556, "grad_norm": 155.85145568847656, "learning_rate": 3.8919764831605754e-07, "loss": 9.2945, "step": 443110 }, { "epoch": 0.8951304354852394, "grad_norm": 243.15550231933594, "learning_rate": 3.890626382522539e-07, "loss": 21.3716, "step": 443120 }, { "epoch": 0.8951506361179232, "grad_norm": 902.5233764648438, "learning_rate": 3.889276506615991e-07, "loss": 14.31, "step": 443130 }, { "epoch": 0.895170836750607, "grad_norm": 323.9816589355469, "learning_rate": 3.88792685544751e-07, "loss": 23.5845, "step": 443140 }, { "epoch": 0.8951910373832909, "grad_norm": 144.73451232910156, "learning_rate": 3.88657742902368e-07, "loss": 12.5715, "step": 443150 }, { "epoch": 0.8952112380159747, "grad_norm": 317.0386657714844, "learning_rate": 3.88522822735109e-07, "loss": 28.2326, "step": 443160 }, { "epoch": 0.8952314386486585, "grad_norm": 189.03042602539062, "learning_rate": 3.8838792504363066e-07, "loss": 5.949, "step": 443170 }, { "epoch": 0.8952516392813423, "grad_norm": 712.6046142578125, "learning_rate": 3.882530498285886e-07, "loss": 15.2152, "step": 443180 }, { "epoch": 0.8952718399140261, "grad_norm": 190.4026641845703, "learning_rate": 3.8811819709064336e-07, "loss": 12.2439, "step": 443190 }, { "epoch": 0.89529204054671, "grad_norm": 193.0666046142578, "learning_rate": 3.879833668304506e-07, "loss": 27.4535, "step": 443200 }, { "epoch": 0.8953122411793938, "grad_norm": 254.59495544433594, "learning_rate": 3.8784855904866637e-07, "loss": 14.8993, "step": 443210 }, { "epoch": 0.8953324418120776, "grad_norm": 100.77217864990234, "learning_rate": 3.877137737459502e-07, "loss": 11.718, "step": 443220 }, { "epoch": 0.8953526424447613, "grad_norm": 303.43316650390625, "learning_rate": 3.875790109229566e-07, "loss": 26.1313, "step": 443230 }, { "epoch": 0.8953728430774451, "grad_norm": 54.86558532714844, "learning_rate": 3.8744427058034384e-07, "loss": 13.5601, "step": 443240 }, { "epoch": 0.895393043710129, "grad_norm": 377.93890380859375, "learning_rate": 3.8730955271876813e-07, "loss": 9.8107, "step": 443250 }, { "epoch": 0.8954132443428128, "grad_norm": 8.372718811035156, "learning_rate": 3.871748573388867e-07, "loss": 20.0752, "step": 443260 }, { "epoch": 0.8954334449754966, "grad_norm": 473.22430419921875, "learning_rate": 3.870401844413557e-07, "loss": 19.557, "step": 443270 }, { "epoch": 0.8954536456081804, "grad_norm": 374.2972106933594, "learning_rate": 3.8690553402683015e-07, "loss": 20.0601, "step": 443280 }, { "epoch": 0.8954738462408642, "grad_norm": 256.11004638671875, "learning_rate": 3.86770906095969e-07, "loss": 20.1961, "step": 443290 }, { "epoch": 0.895494046873548, "grad_norm": 355.7103576660156, "learning_rate": 3.866363006494256e-07, "loss": 17.2464, "step": 443300 }, { "epoch": 0.8955142475062319, "grad_norm": 207.41500854492188, "learning_rate": 3.8650171768785826e-07, "loss": 28.9698, "step": 443310 }, { "epoch": 0.8955344481389157, "grad_norm": 378.89422607421875, "learning_rate": 3.863671572119221e-07, "loss": 11.5894, "step": 443320 }, { "epoch": 0.8955546487715995, "grad_norm": 364.7547912597656, "learning_rate": 3.8623261922227204e-07, "loss": 26.4094, "step": 443330 }, { "epoch": 0.8955748494042833, "grad_norm": 231.82345581054688, "learning_rate": 3.8609810371956544e-07, "loss": 9.4566, "step": 443340 }, { "epoch": 0.8955950500369672, "grad_norm": 21.13140106201172, "learning_rate": 3.859636107044573e-07, "loss": 23.3379, "step": 443350 }, { "epoch": 0.895615250669651, "grad_norm": 290.96771240234375, "learning_rate": 3.8582914017760154e-07, "loss": 20.6907, "step": 443360 }, { "epoch": 0.8956354513023348, "grad_norm": 441.9970703125, "learning_rate": 3.856946921396554e-07, "loss": 14.0148, "step": 443370 }, { "epoch": 0.8956556519350186, "grad_norm": 223.53692626953125, "learning_rate": 3.8556026659127445e-07, "loss": 8.2351, "step": 443380 }, { "epoch": 0.8956758525677024, "grad_norm": 229.99073791503906, "learning_rate": 3.8542586353311264e-07, "loss": 9.8784, "step": 443390 }, { "epoch": 0.8956960532003863, "grad_norm": 234.25892639160156, "learning_rate": 3.85291482965825e-07, "loss": 15.4107, "step": 443400 }, { "epoch": 0.8957162538330701, "grad_norm": 431.2823486328125, "learning_rate": 3.851571248900676e-07, "loss": 15.1119, "step": 443410 }, { "epoch": 0.8957364544657539, "grad_norm": 353.539794921875, "learning_rate": 3.8502278930649506e-07, "loss": 19.6273, "step": 443420 }, { "epoch": 0.8957566550984377, "grad_norm": 141.57339477539062, "learning_rate": 3.8488847621576066e-07, "loss": 8.7348, "step": 443430 }, { "epoch": 0.8957768557311215, "grad_norm": 66.6593017578125, "learning_rate": 3.8475418561851996e-07, "loss": 45.1004, "step": 443440 }, { "epoch": 0.8957970563638054, "grad_norm": 424.2586975097656, "learning_rate": 3.846199175154297e-07, "loss": 16.1736, "step": 443450 }, { "epoch": 0.8958172569964892, "grad_norm": 98.96004486083984, "learning_rate": 3.8448567190713993e-07, "loss": 20.4798, "step": 443460 }, { "epoch": 0.895837457629173, "grad_norm": 177.68402099609375, "learning_rate": 3.843514487943079e-07, "loss": 18.4954, "step": 443470 }, { "epoch": 0.8958576582618568, "grad_norm": 93.97421264648438, "learning_rate": 3.8421724817758745e-07, "loss": 19.7879, "step": 443480 }, { "epoch": 0.8958778588945405, "grad_norm": 300.3408203125, "learning_rate": 3.84083070057632e-07, "loss": 23.0815, "step": 443490 }, { "epoch": 0.8958980595272243, "grad_norm": 266.1869201660156, "learning_rate": 3.8394891443509554e-07, "loss": 6.7015, "step": 443500 }, { "epoch": 0.8959182601599082, "grad_norm": 319.9082946777344, "learning_rate": 3.83814781310633e-07, "loss": 17.6728, "step": 443510 }, { "epoch": 0.895938460792592, "grad_norm": 24.413856506347656, "learning_rate": 3.8368067068489724e-07, "loss": 17.9294, "step": 443520 }, { "epoch": 0.8959586614252758, "grad_norm": 230.49240112304688, "learning_rate": 3.8354658255854105e-07, "loss": 18.6986, "step": 443530 }, { "epoch": 0.8959788620579596, "grad_norm": 369.7834777832031, "learning_rate": 3.8341251693221893e-07, "loss": 16.3494, "step": 443540 }, { "epoch": 0.8959990626906434, "grad_norm": 257.3564758300781, "learning_rate": 3.832784738065853e-07, "loss": 19.3482, "step": 443550 }, { "epoch": 0.8960192633233273, "grad_norm": 280.5361633300781, "learning_rate": 3.83144453182292e-07, "loss": 8.8472, "step": 443560 }, { "epoch": 0.8960394639560111, "grad_norm": 549.4288940429688, "learning_rate": 3.830104550599922e-07, "loss": 16.389, "step": 443570 }, { "epoch": 0.8960596645886949, "grad_norm": 44.95626449584961, "learning_rate": 3.8287647944034054e-07, "loss": 11.7476, "step": 443580 }, { "epoch": 0.8960798652213787, "grad_norm": 93.65108489990234, "learning_rate": 3.827425263239887e-07, "loss": 14.6472, "step": 443590 }, { "epoch": 0.8961000658540625, "grad_norm": 436.2303466796875, "learning_rate": 3.8260859571158883e-07, "loss": 22.2924, "step": 443600 }, { "epoch": 0.8961202664867464, "grad_norm": 212.3765411376953, "learning_rate": 3.824746876037955e-07, "loss": 14.0277, "step": 443610 }, { "epoch": 0.8961404671194302, "grad_norm": 432.84234619140625, "learning_rate": 3.8234080200125977e-07, "loss": 17.49, "step": 443620 }, { "epoch": 0.896160667752114, "grad_norm": 166.82823181152344, "learning_rate": 3.822069389046357e-07, "loss": 20.0879, "step": 443630 }, { "epoch": 0.8961808683847978, "grad_norm": 292.7633056640625, "learning_rate": 3.8207309831457485e-07, "loss": 12.7529, "step": 443640 }, { "epoch": 0.8962010690174816, "grad_norm": 156.07223510742188, "learning_rate": 3.8193928023172897e-07, "loss": 28.1126, "step": 443650 }, { "epoch": 0.8962212696501655, "grad_norm": 218.61766052246094, "learning_rate": 3.818054846567515e-07, "loss": 16.0304, "step": 443660 }, { "epoch": 0.8962414702828493, "grad_norm": 225.5504150390625, "learning_rate": 3.8167171159029405e-07, "loss": 16.0139, "step": 443670 }, { "epoch": 0.8962616709155331, "grad_norm": 214.22952270507812, "learning_rate": 3.815379610330078e-07, "loss": 14.4313, "step": 443680 }, { "epoch": 0.8962818715482169, "grad_norm": 270.0863037109375, "learning_rate": 3.814042329855455e-07, "loss": 17.5856, "step": 443690 }, { "epoch": 0.8963020721809007, "grad_norm": 436.82843017578125, "learning_rate": 3.812705274485595e-07, "loss": 19.8155, "step": 443700 }, { "epoch": 0.8963222728135846, "grad_norm": 393.1731262207031, "learning_rate": 3.811368444227009e-07, "loss": 17.6372, "step": 443710 }, { "epoch": 0.8963424734462684, "grad_norm": 292.5810241699219, "learning_rate": 3.8100318390862033e-07, "loss": 11.1767, "step": 443720 }, { "epoch": 0.8963626740789522, "grad_norm": 188.14743041992188, "learning_rate": 3.8086954590697057e-07, "loss": 15.2354, "step": 443730 }, { "epoch": 0.8963828747116359, "grad_norm": 292.0819396972656, "learning_rate": 3.8073593041840274e-07, "loss": 40.1812, "step": 443740 }, { "epoch": 0.8964030753443197, "grad_norm": 382.9385070800781, "learning_rate": 3.8060233744356634e-07, "loss": 16.6645, "step": 443750 }, { "epoch": 0.8964232759770036, "grad_norm": 17.047842025756836, "learning_rate": 3.804687669831142e-07, "loss": 12.4839, "step": 443760 }, { "epoch": 0.8964434766096874, "grad_norm": 199.1194610595703, "learning_rate": 3.80335219037698e-07, "loss": 15.6125, "step": 443770 }, { "epoch": 0.8964636772423712, "grad_norm": 389.44268798828125, "learning_rate": 3.802016936079678e-07, "loss": 22.794, "step": 443780 }, { "epoch": 0.896483877875055, "grad_norm": 181.8019561767578, "learning_rate": 3.8006819069457304e-07, "loss": 20.7453, "step": 443790 }, { "epoch": 0.8965040785077388, "grad_norm": 171.97613525390625, "learning_rate": 3.7993471029816653e-07, "loss": 10.4415, "step": 443800 }, { "epoch": 0.8965242791404227, "grad_norm": 272.59613037109375, "learning_rate": 3.798012524193978e-07, "loss": 30.8633, "step": 443810 }, { "epoch": 0.8965444797731065, "grad_norm": 126.36026000976562, "learning_rate": 3.7966781705891684e-07, "loss": 22.1035, "step": 443820 }, { "epoch": 0.8965646804057903, "grad_norm": 564.8611450195312, "learning_rate": 3.7953440421737433e-07, "loss": 18.0288, "step": 443830 }, { "epoch": 0.8965848810384741, "grad_norm": 155.26641845703125, "learning_rate": 3.794010138954213e-07, "loss": 19.7711, "step": 443840 }, { "epoch": 0.8966050816711579, "grad_norm": 631.1908569335938, "learning_rate": 3.792676460937078e-07, "loss": 23.0908, "step": 443850 }, { "epoch": 0.8966252823038418, "grad_norm": 11.287824630737305, "learning_rate": 3.791343008128823e-07, "loss": 11.334, "step": 443860 }, { "epoch": 0.8966454829365256, "grad_norm": 18.347753524780273, "learning_rate": 3.790009780535969e-07, "loss": 18.2939, "step": 443870 }, { "epoch": 0.8966656835692094, "grad_norm": 135.71646118164062, "learning_rate": 3.7886767781650016e-07, "loss": 23.7491, "step": 443880 }, { "epoch": 0.8966858842018932, "grad_norm": 137.08241271972656, "learning_rate": 3.787344001022408e-07, "loss": 11.8424, "step": 443890 }, { "epoch": 0.896706084834577, "grad_norm": 235.2169647216797, "learning_rate": 3.7860114491147017e-07, "loss": 15.3706, "step": 443900 }, { "epoch": 0.8967262854672609, "grad_norm": 170.87989807128906, "learning_rate": 3.784679122448365e-07, "loss": 28.9176, "step": 443910 }, { "epoch": 0.8967464860999447, "grad_norm": 7.418533802032471, "learning_rate": 3.783347021029904e-07, "loss": 10.0521, "step": 443920 }, { "epoch": 0.8967666867326285, "grad_norm": 32.72330856323242, "learning_rate": 3.782015144865808e-07, "loss": 17.3912, "step": 443930 }, { "epoch": 0.8967868873653123, "grad_norm": 132.7852325439453, "learning_rate": 3.780683493962556e-07, "loss": 12.4034, "step": 443940 }, { "epoch": 0.8968070879979961, "grad_norm": 128.07769775390625, "learning_rate": 3.779352068326653e-07, "loss": 9.9278, "step": 443950 }, { "epoch": 0.89682728863068, "grad_norm": 809.1480102539062, "learning_rate": 3.7780208679645826e-07, "loss": 24.2736, "step": 443960 }, { "epoch": 0.8968474892633638, "grad_norm": 35.31715774536133, "learning_rate": 3.776689892882823e-07, "loss": 20.8586, "step": 443970 }, { "epoch": 0.8968676898960476, "grad_norm": 446.0987854003906, "learning_rate": 3.77535914308787e-07, "loss": 16.3148, "step": 443980 }, { "epoch": 0.8968878905287314, "grad_norm": 262.0650939941406, "learning_rate": 3.774028618586217e-07, "loss": 27.6167, "step": 443990 }, { "epoch": 0.8969080911614151, "grad_norm": 382.62713623046875, "learning_rate": 3.772698319384349e-07, "loss": 12.7364, "step": 444000 }, { "epoch": 0.8969282917940989, "grad_norm": 45.584442138671875, "learning_rate": 3.7713682454887266e-07, "loss": 10.9779, "step": 444010 }, { "epoch": 0.8969484924267828, "grad_norm": 226.5741424560547, "learning_rate": 3.770038396905862e-07, "loss": 13.4693, "step": 444020 }, { "epoch": 0.8969686930594666, "grad_norm": 198.7794952392578, "learning_rate": 3.768708773642221e-07, "loss": 16.6698, "step": 444030 }, { "epoch": 0.8969888936921504, "grad_norm": 230.5293731689453, "learning_rate": 3.767379375704278e-07, "loss": 15.721, "step": 444040 }, { "epoch": 0.8970090943248342, "grad_norm": 85.8768081665039, "learning_rate": 3.7660502030985203e-07, "loss": 13.5907, "step": 444050 }, { "epoch": 0.897029294957518, "grad_norm": 278.4578552246094, "learning_rate": 3.7647212558314493e-07, "loss": 10.7476, "step": 444060 }, { "epoch": 0.8970494955902019, "grad_norm": 248.03053283691406, "learning_rate": 3.7633925339094936e-07, "loss": 4.6229, "step": 444070 }, { "epoch": 0.8970696962228857, "grad_norm": 298.5616455078125, "learning_rate": 3.762064037339158e-07, "loss": 26.9045, "step": 444080 }, { "epoch": 0.8970898968555695, "grad_norm": 288.4778747558594, "learning_rate": 3.760735766126927e-07, "loss": 17.8781, "step": 444090 }, { "epoch": 0.8971100974882533, "grad_norm": 234.10653686523438, "learning_rate": 3.759407720279257e-07, "loss": 15.303, "step": 444100 }, { "epoch": 0.8971302981209371, "grad_norm": 507.3984680175781, "learning_rate": 3.758079899802619e-07, "loss": 18.7645, "step": 444110 }, { "epoch": 0.897150498753621, "grad_norm": 251.6602020263672, "learning_rate": 3.756752304703498e-07, "loss": 18.8491, "step": 444120 }, { "epoch": 0.8971706993863048, "grad_norm": 217.68797302246094, "learning_rate": 3.755424934988355e-07, "loss": 9.1092, "step": 444130 }, { "epoch": 0.8971909000189886, "grad_norm": 35.48928451538086, "learning_rate": 3.7540977906636576e-07, "loss": 9.8827, "step": 444140 }, { "epoch": 0.8972111006516724, "grad_norm": 197.29443359375, "learning_rate": 3.752770871735878e-07, "loss": 10.2756, "step": 444150 }, { "epoch": 0.8972313012843562, "grad_norm": 134.35777282714844, "learning_rate": 3.751444178211494e-07, "loss": 8.3943, "step": 444160 }, { "epoch": 0.8972515019170401, "grad_norm": 148.85157775878906, "learning_rate": 3.7501177100969566e-07, "loss": 8.8188, "step": 444170 }, { "epoch": 0.8972717025497239, "grad_norm": 383.755615234375, "learning_rate": 3.748791467398732e-07, "loss": 12.4341, "step": 444180 }, { "epoch": 0.8972919031824077, "grad_norm": 566.0632934570312, "learning_rate": 3.747465450123294e-07, "loss": 23.8997, "step": 444190 }, { "epoch": 0.8973121038150915, "grad_norm": 174.40255737304688, "learning_rate": 3.7461396582771035e-07, "loss": 19.4514, "step": 444200 }, { "epoch": 0.8973323044477753, "grad_norm": 217.28640747070312, "learning_rate": 3.744814091866605e-07, "loss": 22.4372, "step": 444210 }, { "epoch": 0.8973525050804592, "grad_norm": 301.05712890625, "learning_rate": 3.7434887508982886e-07, "loss": 16.4227, "step": 444220 }, { "epoch": 0.897372705713143, "grad_norm": 95.06523132324219, "learning_rate": 3.7421636353785815e-07, "loss": 16.4544, "step": 444230 }, { "epoch": 0.8973929063458268, "grad_norm": 204.04043579101562, "learning_rate": 3.740838745313974e-07, "loss": 12.0667, "step": 444240 }, { "epoch": 0.8974131069785106, "grad_norm": 166.67311096191406, "learning_rate": 3.739514080710899e-07, "loss": 11.7908, "step": 444250 }, { "epoch": 0.8974333076111943, "grad_norm": 0.0, "learning_rate": 3.738189641575818e-07, "loss": 19.6684, "step": 444260 }, { "epoch": 0.8974535082438782, "grad_norm": 305.39263916015625, "learning_rate": 3.7368654279151985e-07, "loss": 45.9074, "step": 444270 }, { "epoch": 0.897473708876562, "grad_norm": 145.71751403808594, "learning_rate": 3.7355414397354796e-07, "loss": 10.9525, "step": 444280 }, { "epoch": 0.8974939095092458, "grad_norm": 221.46197509765625, "learning_rate": 3.7342176770431284e-07, "loss": 15.7742, "step": 444290 }, { "epoch": 0.8975141101419296, "grad_norm": 247.08724975585938, "learning_rate": 3.732894139844578e-07, "loss": 36.7726, "step": 444300 }, { "epoch": 0.8975343107746134, "grad_norm": 141.1046905517578, "learning_rate": 3.731570828146297e-07, "loss": 17.5182, "step": 444310 }, { "epoch": 0.8975545114072973, "grad_norm": 26.958621978759766, "learning_rate": 3.730247741954729e-07, "loss": 15.3155, "step": 444320 }, { "epoch": 0.8975747120399811, "grad_norm": 357.8409423828125, "learning_rate": 3.7289248812763137e-07, "loss": 9.8675, "step": 444330 }, { "epoch": 0.8975949126726649, "grad_norm": 534.7803344726562, "learning_rate": 3.727602246117518e-07, "loss": 20.979, "step": 444340 }, { "epoch": 0.8976151133053487, "grad_norm": 622.3795776367188, "learning_rate": 3.7262798364847753e-07, "loss": 26.6488, "step": 444350 }, { "epoch": 0.8976353139380325, "grad_norm": 172.6232147216797, "learning_rate": 3.72495765238452e-07, "loss": 15.5163, "step": 444360 }, { "epoch": 0.8976555145707164, "grad_norm": 271.0767517089844, "learning_rate": 3.723635693823213e-07, "loss": 19.8816, "step": 444370 }, { "epoch": 0.8976757152034002, "grad_norm": 241.2416229248047, "learning_rate": 3.7223139608073e-07, "loss": 18.3012, "step": 444380 }, { "epoch": 0.897695915836084, "grad_norm": 317.7682189941406, "learning_rate": 3.720992453343214e-07, "loss": 20.9782, "step": 444390 }, { "epoch": 0.8977161164687678, "grad_norm": 77.09332275390625, "learning_rate": 3.7196711714373947e-07, "loss": 11.7378, "step": 444400 }, { "epoch": 0.8977363171014516, "grad_norm": 137.31317138671875, "learning_rate": 3.7183501150962863e-07, "loss": 13.5225, "step": 444410 }, { "epoch": 0.8977565177341355, "grad_norm": 300.7694091796875, "learning_rate": 3.7170292843263347e-07, "loss": 22.7928, "step": 444420 }, { "epoch": 0.8977767183668193, "grad_norm": 207.14952087402344, "learning_rate": 3.715708679133956e-07, "loss": 9.4938, "step": 444430 }, { "epoch": 0.8977969189995031, "grad_norm": 202.46482849121094, "learning_rate": 3.714388299525595e-07, "loss": 11.2566, "step": 444440 }, { "epoch": 0.8978171196321869, "grad_norm": 234.36447143554688, "learning_rate": 3.713068145507709e-07, "loss": 11.378, "step": 444450 }, { "epoch": 0.8978373202648707, "grad_norm": 28.78504753112793, "learning_rate": 3.7117482170867083e-07, "loss": 12.4225, "step": 444460 }, { "epoch": 0.8978575208975546, "grad_norm": 402.2554016113281, "learning_rate": 3.710428514269027e-07, "loss": 12.8743, "step": 444470 }, { "epoch": 0.8978777215302384, "grad_norm": 594.9292602539062, "learning_rate": 3.7091090370611093e-07, "loss": 21.2218, "step": 444480 }, { "epoch": 0.8978979221629222, "grad_norm": 208.42092895507812, "learning_rate": 3.707789785469379e-07, "loss": 16.3208, "step": 444490 }, { "epoch": 0.897918122795606, "grad_norm": 215.97412109375, "learning_rate": 3.7064707595002636e-07, "loss": 19.2725, "step": 444500 }, { "epoch": 0.8979383234282897, "grad_norm": 59.36453628540039, "learning_rate": 3.705151959160197e-07, "loss": 18.0953, "step": 444510 }, { "epoch": 0.8979585240609735, "grad_norm": 330.7110900878906, "learning_rate": 3.703833384455602e-07, "loss": 19.0474, "step": 444520 }, { "epoch": 0.8979787246936574, "grad_norm": 249.13511657714844, "learning_rate": 3.702515035392912e-07, "loss": 16.3551, "step": 444530 }, { "epoch": 0.8979989253263412, "grad_norm": 0.0, "learning_rate": 3.7011969119785496e-07, "loss": 4.1604, "step": 444540 }, { "epoch": 0.898019125959025, "grad_norm": 363.7060241699219, "learning_rate": 3.6998790142189324e-07, "loss": 12.8501, "step": 444550 }, { "epoch": 0.8980393265917088, "grad_norm": 498.28857421875, "learning_rate": 3.698561342120499e-07, "loss": 18.1249, "step": 444560 }, { "epoch": 0.8980595272243926, "grad_norm": 136.7294921875, "learning_rate": 3.6972438956896563e-07, "loss": 10.7324, "step": 444570 }, { "epoch": 0.8980797278570765, "grad_norm": 400.9918518066406, "learning_rate": 3.695926674932826e-07, "loss": 34.5535, "step": 444580 }, { "epoch": 0.8980999284897603, "grad_norm": 366.4157409667969, "learning_rate": 3.694609679856431e-07, "loss": 27.4047, "step": 444590 }, { "epoch": 0.8981201291224441, "grad_norm": 477.45355224609375, "learning_rate": 3.693292910466906e-07, "loss": 21.5866, "step": 444600 }, { "epoch": 0.8981403297551279, "grad_norm": 244.50428771972656, "learning_rate": 3.69197636677065e-07, "loss": 13.6325, "step": 444610 }, { "epoch": 0.8981605303878117, "grad_norm": 117.06970977783203, "learning_rate": 3.690660048774075e-07, "loss": 12.3494, "step": 444620 }, { "epoch": 0.8981807310204956, "grad_norm": 240.0150909423828, "learning_rate": 3.6893439564836155e-07, "loss": 8.5246, "step": 444630 }, { "epoch": 0.8982009316531794, "grad_norm": 43.846248626708984, "learning_rate": 3.688028089905682e-07, "loss": 16.1283, "step": 444640 }, { "epoch": 0.8982211322858632, "grad_norm": 75.04704284667969, "learning_rate": 3.6867124490466697e-07, "loss": 15.8183, "step": 444650 }, { "epoch": 0.898241332918547, "grad_norm": 403.3542785644531, "learning_rate": 3.685397033913002e-07, "loss": 15.3458, "step": 444660 }, { "epoch": 0.8982615335512308, "grad_norm": 298.10205078125, "learning_rate": 3.6840818445111114e-07, "loss": 17.4278, "step": 444670 }, { "epoch": 0.8982817341839147, "grad_norm": 418.43408203125, "learning_rate": 3.6827668808473714e-07, "loss": 10.1699, "step": 444680 }, { "epoch": 0.8983019348165985, "grad_norm": 406.7113037109375, "learning_rate": 3.68145214292821e-07, "loss": 15.6826, "step": 444690 }, { "epoch": 0.8983221354492823, "grad_norm": 773.9559936523438, "learning_rate": 3.680137630760039e-07, "loss": 26.0593, "step": 444700 }, { "epoch": 0.8983423360819661, "grad_norm": 348.9058837890625, "learning_rate": 3.6788233443492583e-07, "loss": 21.1771, "step": 444710 }, { "epoch": 0.8983625367146499, "grad_norm": 135.27320861816406, "learning_rate": 3.6775092837022685e-07, "loss": 17.6119, "step": 444720 }, { "epoch": 0.8983827373473338, "grad_norm": 143.0601806640625, "learning_rate": 3.676195448825487e-07, "loss": 14.7294, "step": 444730 }, { "epoch": 0.8984029379800176, "grad_norm": 418.4273681640625, "learning_rate": 3.674881839725314e-07, "loss": 25.7207, "step": 444740 }, { "epoch": 0.8984231386127014, "grad_norm": 343.03704833984375, "learning_rate": 3.6735684564081385e-07, "loss": 18.8208, "step": 444750 }, { "epoch": 0.8984433392453852, "grad_norm": 83.89474487304688, "learning_rate": 3.672255298880367e-07, "loss": 32.372, "step": 444760 }, { "epoch": 0.8984635398780689, "grad_norm": 209.2916259765625, "learning_rate": 3.670942367148417e-07, "loss": 21.1825, "step": 444770 }, { "epoch": 0.8984837405107527, "grad_norm": 431.716796875, "learning_rate": 3.669629661218671e-07, "loss": 25.6026, "step": 444780 }, { "epoch": 0.8985039411434366, "grad_norm": 30.953857421875, "learning_rate": 3.66831718109753e-07, "loss": 14.6427, "step": 444790 }, { "epoch": 0.8985241417761204, "grad_norm": 259.8677673339844, "learning_rate": 3.6670049267913954e-07, "loss": 15.1308, "step": 444800 }, { "epoch": 0.8985443424088042, "grad_norm": 243.4873809814453, "learning_rate": 3.665692898306655e-07, "loss": 18.857, "step": 444810 }, { "epoch": 0.898564543041488, "grad_norm": 164.88819885253906, "learning_rate": 3.664381095649705e-07, "loss": 12.5417, "step": 444820 }, { "epoch": 0.8985847436741718, "grad_norm": 18.60142707824707, "learning_rate": 3.6630695188269505e-07, "loss": 10.9017, "step": 444830 }, { "epoch": 0.8986049443068557, "grad_norm": 168.12863159179688, "learning_rate": 3.6617581678447647e-07, "loss": 15.9924, "step": 444840 }, { "epoch": 0.8986251449395395, "grad_norm": 36.90435791015625, "learning_rate": 3.6604470427095587e-07, "loss": 8.4565, "step": 444850 }, { "epoch": 0.8986453455722233, "grad_norm": 238.035888671875, "learning_rate": 3.6591361434277105e-07, "loss": 16.2636, "step": 444860 }, { "epoch": 0.8986655462049071, "grad_norm": 193.6130828857422, "learning_rate": 3.6578254700056107e-07, "loss": 8.3253, "step": 444870 }, { "epoch": 0.898685746837591, "grad_norm": 323.912353515625, "learning_rate": 3.6565150224496525e-07, "loss": 7.1689, "step": 444880 }, { "epoch": 0.8987059474702748, "grad_norm": 150.40097045898438, "learning_rate": 3.65520480076621e-07, "loss": 12.8619, "step": 444890 }, { "epoch": 0.8987261481029586, "grad_norm": 278.6393127441406, "learning_rate": 3.6538948049616886e-07, "loss": 12.2635, "step": 444900 }, { "epoch": 0.8987463487356424, "grad_norm": 319.26910400390625, "learning_rate": 3.6525850350424554e-07, "loss": 8.0543, "step": 444910 }, { "epoch": 0.8987665493683262, "grad_norm": 612.7775268554688, "learning_rate": 3.651275491014905e-07, "loss": 27.059, "step": 444920 }, { "epoch": 0.89878675000101, "grad_norm": 155.68157958984375, "learning_rate": 3.649966172885422e-07, "loss": 18.8907, "step": 444930 }, { "epoch": 0.8988069506336939, "grad_norm": 174.67977905273438, "learning_rate": 3.648657080660373e-07, "loss": 15.0618, "step": 444940 }, { "epoch": 0.8988271512663777, "grad_norm": 232.21981811523438, "learning_rate": 3.6473482143461523e-07, "loss": 13.615, "step": 444950 }, { "epoch": 0.8988473518990615, "grad_norm": 184.6064453125, "learning_rate": 3.6460395739491337e-07, "loss": 7.788, "step": 444960 }, { "epoch": 0.8988675525317453, "grad_norm": 239.39715576171875, "learning_rate": 3.644731159475695e-07, "loss": 13.8403, "step": 444970 }, { "epoch": 0.8988877531644291, "grad_norm": 0.0, "learning_rate": 3.643422970932209e-07, "loss": 20.4715, "step": 444980 }, { "epoch": 0.898907953797113, "grad_norm": 177.03160095214844, "learning_rate": 3.6421150083250754e-07, "loss": 11.254, "step": 444990 }, { "epoch": 0.8989281544297968, "grad_norm": 0.0, "learning_rate": 3.6408072716606346e-07, "loss": 21.7134, "step": 445000 }, { "epoch": 0.8989483550624806, "grad_norm": 640.3748779296875, "learning_rate": 3.6394997609452755e-07, "loss": 16.0379, "step": 445010 }, { "epoch": 0.8989685556951643, "grad_norm": 279.4459228515625, "learning_rate": 3.6381924761853814e-07, "loss": 12.6227, "step": 445020 }, { "epoch": 0.8989887563278481, "grad_norm": 451.8597412109375, "learning_rate": 3.6368854173873094e-07, "loss": 20.9938, "step": 445030 }, { "epoch": 0.899008956960532, "grad_norm": 209.34207153320312, "learning_rate": 3.635578584557431e-07, "loss": 10.6094, "step": 445040 }, { "epoch": 0.8990291575932158, "grad_norm": 352.37432861328125, "learning_rate": 3.6342719777021194e-07, "loss": 23.8625, "step": 445050 }, { "epoch": 0.8990493582258996, "grad_norm": 145.17018127441406, "learning_rate": 3.6329655968277477e-07, "loss": 17.6126, "step": 445060 }, { "epoch": 0.8990695588585834, "grad_norm": 27.934141159057617, "learning_rate": 3.6316594419406826e-07, "loss": 13.5671, "step": 445070 }, { "epoch": 0.8990897594912672, "grad_norm": 271.7455139160156, "learning_rate": 3.6303535130472743e-07, "loss": 22.3715, "step": 445080 }, { "epoch": 0.8991099601239511, "grad_norm": 201.5966033935547, "learning_rate": 3.6290478101539073e-07, "loss": 11.2014, "step": 445090 }, { "epoch": 0.8991301607566349, "grad_norm": 255.765869140625, "learning_rate": 3.627742333266937e-07, "loss": 12.2225, "step": 445100 }, { "epoch": 0.8991503613893187, "grad_norm": 55.76103591918945, "learning_rate": 3.6264370823927196e-07, "loss": 17.4078, "step": 445110 }, { "epoch": 0.8991705620220025, "grad_norm": 205.4469451904297, "learning_rate": 3.6251320575376336e-07, "loss": 23.2812, "step": 445120 }, { "epoch": 0.8991907626546863, "grad_norm": 197.58169555664062, "learning_rate": 3.6238272587080183e-07, "loss": 16.2706, "step": 445130 }, { "epoch": 0.8992109632873702, "grad_norm": 220.38482666015625, "learning_rate": 3.6225226859102515e-07, "loss": 9.8516, "step": 445140 }, { "epoch": 0.899231163920054, "grad_norm": 474.5306091308594, "learning_rate": 3.621218339150684e-07, "loss": 14.372, "step": 445150 }, { "epoch": 0.8992513645527378, "grad_norm": 487.86395263671875, "learning_rate": 3.619914218435666e-07, "loss": 17.0953, "step": 445160 }, { "epoch": 0.8992715651854216, "grad_norm": 337.3041687011719, "learning_rate": 3.6186103237715706e-07, "loss": 12.5712, "step": 445170 }, { "epoch": 0.8992917658181054, "grad_norm": 514.4605102539062, "learning_rate": 3.617306655164743e-07, "loss": 36.5964, "step": 445180 }, { "epoch": 0.8993119664507893, "grad_norm": 373.0463562011719, "learning_rate": 3.6160032126215274e-07, "loss": 17.169, "step": 445190 }, { "epoch": 0.8993321670834731, "grad_norm": 625.6041259765625, "learning_rate": 3.614699996148285e-07, "loss": 24.1392, "step": 445200 }, { "epoch": 0.8993523677161569, "grad_norm": 210.83209228515625, "learning_rate": 3.613397005751379e-07, "loss": 24.6616, "step": 445210 }, { "epoch": 0.8993725683488407, "grad_norm": 311.2628479003906, "learning_rate": 3.612094241437153e-07, "loss": 19.9175, "step": 445220 }, { "epoch": 0.8993927689815245, "grad_norm": 179.17237854003906, "learning_rate": 3.610791703211941e-07, "loss": 14.6733, "step": 445230 }, { "epoch": 0.8994129696142084, "grad_norm": 308.4068298339844, "learning_rate": 3.6094893910821103e-07, "loss": 20.6028, "step": 445240 }, { "epoch": 0.8994331702468922, "grad_norm": 263.1321105957031, "learning_rate": 3.608187305054006e-07, "loss": 12.3121, "step": 445250 }, { "epoch": 0.899453370879576, "grad_norm": 400.6637878417969, "learning_rate": 3.606885445133962e-07, "loss": 11.6952, "step": 445260 }, { "epoch": 0.8994735715122598, "grad_norm": 203.16830444335938, "learning_rate": 3.605583811328328e-07, "loss": 21.5273, "step": 445270 }, { "epoch": 0.8994937721449435, "grad_norm": 341.5491638183594, "learning_rate": 3.604282403643472e-07, "loss": 21.8069, "step": 445280 }, { "epoch": 0.8995139727776273, "grad_norm": 515.5481567382812, "learning_rate": 3.6029812220857e-07, "loss": 18.6255, "step": 445290 }, { "epoch": 0.8995341734103112, "grad_norm": 54.038150787353516, "learning_rate": 3.601680266661367e-07, "loss": 16.0552, "step": 445300 }, { "epoch": 0.899554374042995, "grad_norm": 574.565185546875, "learning_rate": 3.6003795373768303e-07, "loss": 21.3307, "step": 445310 }, { "epoch": 0.8995745746756788, "grad_norm": 7.462080955505371, "learning_rate": 3.5990790342384117e-07, "loss": 18.7261, "step": 445320 }, { "epoch": 0.8995947753083626, "grad_norm": 373.57940673828125, "learning_rate": 3.5977787572524457e-07, "loss": 15.4283, "step": 445330 }, { "epoch": 0.8996149759410464, "grad_norm": 27.012331008911133, "learning_rate": 3.596478706425277e-07, "loss": 16.4097, "step": 445340 }, { "epoch": 0.8996351765737303, "grad_norm": 399.6962585449219, "learning_rate": 3.5951788817632615e-07, "loss": 14.6317, "step": 445350 }, { "epoch": 0.8996553772064141, "grad_norm": 307.5194396972656, "learning_rate": 3.5938792832726996e-07, "loss": 24.4639, "step": 445360 }, { "epoch": 0.8996755778390979, "grad_norm": 182.70657348632812, "learning_rate": 3.5925799109599426e-07, "loss": 16.0825, "step": 445370 }, { "epoch": 0.8996957784717817, "grad_norm": 243.04278564453125, "learning_rate": 3.5912807648313285e-07, "loss": 14.5699, "step": 445380 }, { "epoch": 0.8997159791044655, "grad_norm": 233.26922607421875, "learning_rate": 3.5899818448931865e-07, "loss": 20.6728, "step": 445390 }, { "epoch": 0.8997361797371494, "grad_norm": 230.99720764160156, "learning_rate": 3.5886831511518336e-07, "loss": 19.7656, "step": 445400 }, { "epoch": 0.8997563803698332, "grad_norm": 158.81932067871094, "learning_rate": 3.5873846836136204e-07, "loss": 22.3781, "step": 445410 }, { "epoch": 0.899776581002517, "grad_norm": 0.0, "learning_rate": 3.586086442284864e-07, "loss": 21.998, "step": 445420 }, { "epoch": 0.8997967816352008, "grad_norm": 236.00514221191406, "learning_rate": 3.5847884271718814e-07, "loss": 20.4317, "step": 445430 }, { "epoch": 0.8998169822678846, "grad_norm": 201.31605529785156, "learning_rate": 3.583490638281023e-07, "loss": 25.3305, "step": 445440 }, { "epoch": 0.8998371829005685, "grad_norm": 209.36764526367188, "learning_rate": 3.5821930756185894e-07, "loss": 16.4358, "step": 445450 }, { "epoch": 0.8998573835332523, "grad_norm": 239.61834716796875, "learning_rate": 3.5808957391909315e-07, "loss": 13.0421, "step": 445460 }, { "epoch": 0.8998775841659361, "grad_norm": 550.1686401367188, "learning_rate": 3.579598629004355e-07, "loss": 15.5555, "step": 445470 }, { "epoch": 0.8998977847986199, "grad_norm": 149.8254852294922, "learning_rate": 3.5783017450651714e-07, "loss": 11.5235, "step": 445480 }, { "epoch": 0.8999179854313037, "grad_norm": 457.91363525390625, "learning_rate": 3.5770050873797314e-07, "loss": 18.5262, "step": 445490 }, { "epoch": 0.8999381860639876, "grad_norm": 114.06350708007812, "learning_rate": 3.575708655954324e-07, "loss": 9.1562, "step": 445500 }, { "epoch": 0.8999583866966714, "grad_norm": 207.97767639160156, "learning_rate": 3.5744124507952895e-07, "loss": 15.5924, "step": 445510 }, { "epoch": 0.8999785873293552, "grad_norm": 83.53045654296875, "learning_rate": 3.573116471908933e-07, "loss": 24.5435, "step": 445520 }, { "epoch": 0.899998787962039, "grad_norm": 364.34765625, "learning_rate": 3.571820719301583e-07, "loss": 16.694, "step": 445530 }, { "epoch": 0.9000189885947227, "grad_norm": 270.1733093261719, "learning_rate": 3.570525192979546e-07, "loss": 11.4549, "step": 445540 }, { "epoch": 0.9000391892274066, "grad_norm": 417.6627197265625, "learning_rate": 3.569229892949133e-07, "loss": 21.2883, "step": 445550 }, { "epoch": 0.9000593898600904, "grad_norm": 310.5437316894531, "learning_rate": 3.5679348192166675e-07, "loss": 8.723, "step": 445560 }, { "epoch": 0.9000795904927742, "grad_norm": 0.0, "learning_rate": 3.5666399717884604e-07, "loss": 13.0665, "step": 445570 }, { "epoch": 0.900099791125458, "grad_norm": 282.0537414550781, "learning_rate": 3.565345350670807e-07, "loss": 12.7627, "step": 445580 }, { "epoch": 0.9001199917581418, "grad_norm": 365.40948486328125, "learning_rate": 3.56405095587003e-07, "loss": 9.9332, "step": 445590 }, { "epoch": 0.9001401923908257, "grad_norm": 101.62939453125, "learning_rate": 3.562756787392452e-07, "loss": 21.3777, "step": 445600 }, { "epoch": 0.9001603930235095, "grad_norm": 287.9040832519531, "learning_rate": 3.561462845244351e-07, "loss": 14.5309, "step": 445610 }, { "epoch": 0.9001805936561933, "grad_norm": 279.9335021972656, "learning_rate": 3.560169129432045e-07, "loss": 24.3167, "step": 445620 }, { "epoch": 0.9002007942888771, "grad_norm": 410.9738464355469, "learning_rate": 3.5588756399618507e-07, "loss": 10.6661, "step": 445630 }, { "epoch": 0.9002209949215609, "grad_norm": 157.7344970703125, "learning_rate": 3.557582376840063e-07, "loss": 12.3605, "step": 445640 }, { "epoch": 0.9002411955542448, "grad_norm": 285.1471252441406, "learning_rate": 3.556289340072977e-07, "loss": 9.8739, "step": 445650 }, { "epoch": 0.9002613961869286, "grad_norm": 268.43438720703125, "learning_rate": 3.55499652966691e-07, "loss": 16.0661, "step": 445660 }, { "epoch": 0.9002815968196124, "grad_norm": 237.16184997558594, "learning_rate": 3.5537039456281674e-07, "loss": 13.6825, "step": 445670 }, { "epoch": 0.9003017974522962, "grad_norm": 111.08230590820312, "learning_rate": 3.5524115879630225e-07, "loss": 7.6335, "step": 445680 }, { "epoch": 0.90032199808498, "grad_norm": 381.768798828125, "learning_rate": 3.551119456677793e-07, "loss": 14.7367, "step": 445690 }, { "epoch": 0.9003421987176639, "grad_norm": 246.447265625, "learning_rate": 3.5498275517787783e-07, "loss": 31.8115, "step": 445700 }, { "epoch": 0.9003623993503477, "grad_norm": 296.7474060058594, "learning_rate": 3.5485358732722743e-07, "loss": 17.3367, "step": 445710 }, { "epoch": 0.9003825999830315, "grad_norm": 454.2543029785156, "learning_rate": 3.547244421164564e-07, "loss": 20.8422, "step": 445720 }, { "epoch": 0.9004028006157153, "grad_norm": 333.75164794921875, "learning_rate": 3.545953195461954e-07, "loss": 21.1452, "step": 445730 }, { "epoch": 0.9004230012483991, "grad_norm": 204.9512939453125, "learning_rate": 3.5446621961707284e-07, "loss": 14.492, "step": 445740 }, { "epoch": 0.900443201881083, "grad_norm": 378.46734619140625, "learning_rate": 3.5433714232971927e-07, "loss": 24.8063, "step": 445750 }, { "epoch": 0.9004634025137668, "grad_norm": 273.31439208984375, "learning_rate": 3.5420808768476313e-07, "loss": 10.6374, "step": 445760 }, { "epoch": 0.9004836031464506, "grad_norm": 560.32763671875, "learning_rate": 3.540790556828327e-07, "loss": 15.3381, "step": 445770 }, { "epoch": 0.9005038037791344, "grad_norm": 168.69284057617188, "learning_rate": 3.539500463245582e-07, "loss": 44.2349, "step": 445780 }, { "epoch": 0.9005240044118181, "grad_norm": 246.83082580566406, "learning_rate": 3.5382105961056735e-07, "loss": 25.3657, "step": 445790 }, { "epoch": 0.9005442050445019, "grad_norm": 165.73048400878906, "learning_rate": 3.5369209554148854e-07, "loss": 11.9341, "step": 445800 }, { "epoch": 0.9005644056771858, "grad_norm": 169.6850128173828, "learning_rate": 3.535631541179507e-07, "loss": 13.8381, "step": 445810 }, { "epoch": 0.9005846063098696, "grad_norm": 7.769381523132324, "learning_rate": 3.534342353405834e-07, "loss": 12.1496, "step": 445820 }, { "epoch": 0.9006048069425534, "grad_norm": 248.3420867919922, "learning_rate": 3.533053392100144e-07, "loss": 24.7979, "step": 445830 }, { "epoch": 0.9006250075752372, "grad_norm": 357.61468505859375, "learning_rate": 3.531764657268705e-07, "loss": 13.5486, "step": 445840 }, { "epoch": 0.900645208207921, "grad_norm": 0.0, "learning_rate": 3.530476148917816e-07, "loss": 17.1647, "step": 445850 }, { "epoch": 0.9006654088406049, "grad_norm": 455.8564147949219, "learning_rate": 3.5291878670537516e-07, "loss": 21.4658, "step": 445860 }, { "epoch": 0.9006856094732887, "grad_norm": 261.73760986328125, "learning_rate": 3.5278998116827835e-07, "loss": 23.2449, "step": 445870 }, { "epoch": 0.9007058101059725, "grad_norm": 208.8277130126953, "learning_rate": 3.5266119828111953e-07, "loss": 20.0035, "step": 445880 }, { "epoch": 0.9007260107386563, "grad_norm": 399.3625183105469, "learning_rate": 3.525324380445277e-07, "loss": 14.0625, "step": 445890 }, { "epoch": 0.9007462113713401, "grad_norm": 318.3999328613281, "learning_rate": 3.524037004591274e-07, "loss": 16.3384, "step": 445900 }, { "epoch": 0.900766412004024, "grad_norm": 90.35254669189453, "learning_rate": 3.5227498552554805e-07, "loss": 18.7766, "step": 445910 }, { "epoch": 0.9007866126367078, "grad_norm": 308.0154113769531, "learning_rate": 3.5214629324441754e-07, "loss": 32.7715, "step": 445920 }, { "epoch": 0.9008068132693916, "grad_norm": 546.2141723632812, "learning_rate": 3.5201762361636195e-07, "loss": 12.3502, "step": 445930 }, { "epoch": 0.9008270139020754, "grad_norm": 356.170654296875, "learning_rate": 3.5188897664200804e-07, "loss": 17.1992, "step": 445940 }, { "epoch": 0.9008472145347592, "grad_norm": 272.68035888671875, "learning_rate": 3.5176035232198367e-07, "loss": 18.716, "step": 445950 }, { "epoch": 0.9008674151674431, "grad_norm": 448.6239013671875, "learning_rate": 3.516317506569172e-07, "loss": 23.2998, "step": 445960 }, { "epoch": 0.9008876158001269, "grad_norm": 247.167724609375, "learning_rate": 3.515031716474321e-07, "loss": 17.9454, "step": 445970 }, { "epoch": 0.9009078164328107, "grad_norm": 407.5674743652344, "learning_rate": 3.513746152941572e-07, "loss": 14.5001, "step": 445980 }, { "epoch": 0.9009280170654945, "grad_norm": 327.32696533203125, "learning_rate": 3.5124608159771864e-07, "loss": 15.3075, "step": 445990 }, { "epoch": 0.9009482176981783, "grad_norm": 184.2942352294922, "learning_rate": 3.511175705587433e-07, "loss": 20.2835, "step": 446000 }, { "epoch": 0.9009684183308622, "grad_norm": 489.49395751953125, "learning_rate": 3.509890821778561e-07, "loss": 17.2991, "step": 446010 }, { "epoch": 0.900988618963546, "grad_norm": 1080.24169921875, "learning_rate": 3.508606164556855e-07, "loss": 21.9367, "step": 446020 }, { "epoch": 0.9010088195962298, "grad_norm": 280.5447082519531, "learning_rate": 3.507321733928559e-07, "loss": 12.6716, "step": 446030 }, { "epoch": 0.9010290202289136, "grad_norm": 233.6311798095703, "learning_rate": 3.5060375298999303e-07, "loss": 24.0079, "step": 446040 }, { "epoch": 0.9010492208615973, "grad_norm": 321.3100891113281, "learning_rate": 3.5047535524772467e-07, "loss": 25.8466, "step": 446050 }, { "epoch": 0.9010694214942812, "grad_norm": 210.2325897216797, "learning_rate": 3.5034698016667423e-07, "loss": 12.6793, "step": 446060 }, { "epoch": 0.901089622126965, "grad_norm": 234.207275390625, "learning_rate": 3.5021862774747007e-07, "loss": 9.8604, "step": 446070 }, { "epoch": 0.9011098227596488, "grad_norm": 80.25869750976562, "learning_rate": 3.500902979907356e-07, "loss": 15.5287, "step": 446080 }, { "epoch": 0.9011300233923326, "grad_norm": 631.1233520507812, "learning_rate": 3.4996199089709695e-07, "loss": 14.0402, "step": 446090 }, { "epoch": 0.9011502240250164, "grad_norm": 324.9217529296875, "learning_rate": 3.498337064671803e-07, "loss": 20.8748, "step": 446100 }, { "epoch": 0.9011704246577003, "grad_norm": 180.3143310546875, "learning_rate": 3.4970544470160905e-07, "loss": 12.7918, "step": 446110 }, { "epoch": 0.9011906252903841, "grad_norm": 171.03756713867188, "learning_rate": 3.495772056010105e-07, "loss": 21.1228, "step": 446120 }, { "epoch": 0.9012108259230679, "grad_norm": 276.5911865234375, "learning_rate": 3.4944898916600743e-07, "loss": 25.7992, "step": 446130 }, { "epoch": 0.9012310265557517, "grad_norm": 402.3453369140625, "learning_rate": 3.493207953972272e-07, "loss": 13.0384, "step": 446140 }, { "epoch": 0.9012512271884355, "grad_norm": 578.5548706054688, "learning_rate": 3.491926242952931e-07, "loss": 20.0322, "step": 446150 }, { "epoch": 0.9012714278211194, "grad_norm": 396.3736877441406, "learning_rate": 3.4906447586082917e-07, "loss": 27.0589, "step": 446160 }, { "epoch": 0.9012916284538032, "grad_norm": 145.3114013671875, "learning_rate": 3.48936350094462e-07, "loss": 9.8804, "step": 446170 }, { "epoch": 0.901311829086487, "grad_norm": 717.904296875, "learning_rate": 3.488082469968146e-07, "loss": 25.9545, "step": 446180 }, { "epoch": 0.9013320297191708, "grad_norm": 750.8253784179688, "learning_rate": 3.4868016656851135e-07, "loss": 20.0636, "step": 446190 }, { "epoch": 0.9013522303518546, "grad_norm": 168.28651428222656, "learning_rate": 3.4855210881017675e-07, "loss": 17.5162, "step": 446200 }, { "epoch": 0.9013724309845385, "grad_norm": 342.64117431640625, "learning_rate": 3.4842407372243646e-07, "loss": 31.7302, "step": 446210 }, { "epoch": 0.9013926316172223, "grad_norm": 90.33460235595703, "learning_rate": 3.482960613059111e-07, "loss": 16.0992, "step": 446220 }, { "epoch": 0.9014128322499061, "grad_norm": 227.67636108398438, "learning_rate": 3.481680715612273e-07, "loss": 10.1211, "step": 446230 }, { "epoch": 0.9014330328825899, "grad_norm": 468.148681640625, "learning_rate": 3.480401044890086e-07, "loss": 11.4297, "step": 446240 }, { "epoch": 0.9014532335152737, "grad_norm": 254.1317901611328, "learning_rate": 3.479121600898777e-07, "loss": 24.1494, "step": 446250 }, { "epoch": 0.9014734341479576, "grad_norm": 399.9097900390625, "learning_rate": 3.477842383644586e-07, "loss": 14.8352, "step": 446260 }, { "epoch": 0.9014936347806414, "grad_norm": 293.8617248535156, "learning_rate": 3.476563393133747e-07, "loss": 18.3029, "step": 446270 }, { "epoch": 0.9015138354133252, "grad_norm": 208.68783569335938, "learning_rate": 3.475284629372511e-07, "loss": 23.6214, "step": 446280 }, { "epoch": 0.901534036046009, "grad_norm": 374.38604736328125, "learning_rate": 3.474006092367077e-07, "loss": 12.5321, "step": 446290 }, { "epoch": 0.9015542366786927, "grad_norm": 14.303327560424805, "learning_rate": 3.472727782123697e-07, "loss": 5.7181, "step": 446300 }, { "epoch": 0.9015744373113765, "grad_norm": 281.0022277832031, "learning_rate": 3.4714496986486045e-07, "loss": 15.1799, "step": 446310 }, { "epoch": 0.9015946379440604, "grad_norm": 320.8405456542969, "learning_rate": 3.470171841948022e-07, "loss": 16.6386, "step": 446320 }, { "epoch": 0.9016148385767442, "grad_norm": 221.64418029785156, "learning_rate": 3.468894212028173e-07, "loss": 25.7132, "step": 446330 }, { "epoch": 0.901635039209428, "grad_norm": 297.1494140625, "learning_rate": 3.467616808895302e-07, "loss": 15.8963, "step": 446340 }, { "epoch": 0.9016552398421118, "grad_norm": 207.5110626220703, "learning_rate": 3.4663396325556154e-07, "loss": 7.7352, "step": 446350 }, { "epoch": 0.9016754404747956, "grad_norm": 148.60226440429688, "learning_rate": 3.465062683015341e-07, "loss": 14.0815, "step": 446360 }, { "epoch": 0.9016956411074795, "grad_norm": 234.91012573242188, "learning_rate": 3.463785960280719e-07, "loss": 17.0193, "step": 446370 }, { "epoch": 0.9017158417401633, "grad_norm": 219.4169464111328, "learning_rate": 3.462509464357944e-07, "loss": 28.2836, "step": 446380 }, { "epoch": 0.9017360423728471, "grad_norm": 194.2111053466797, "learning_rate": 3.461233195253266e-07, "loss": 16.8256, "step": 446390 }, { "epoch": 0.9017562430055309, "grad_norm": 653.9534912109375, "learning_rate": 3.459957152972887e-07, "loss": 27.3666, "step": 446400 }, { "epoch": 0.9017764436382147, "grad_norm": 0.0, "learning_rate": 3.45868133752304e-07, "loss": 18.6734, "step": 446410 }, { "epoch": 0.9017966442708986, "grad_norm": 357.0810546875, "learning_rate": 3.45740574890992e-07, "loss": 17.6775, "step": 446420 }, { "epoch": 0.9018168449035824, "grad_norm": 52.81655502319336, "learning_rate": 3.456130387139778e-07, "loss": 41.3172, "step": 446430 }, { "epoch": 0.9018370455362662, "grad_norm": 195.17762756347656, "learning_rate": 3.454855252218803e-07, "loss": 21.7486, "step": 446440 }, { "epoch": 0.90185724616895, "grad_norm": 160.00689697265625, "learning_rate": 3.4535803441532125e-07, "loss": 14.2743, "step": 446450 }, { "epoch": 0.9018774468016338, "grad_norm": 197.13502502441406, "learning_rate": 3.4523056629492344e-07, "loss": 23.7805, "step": 446460 }, { "epoch": 0.9018976474343177, "grad_norm": 206.31163024902344, "learning_rate": 3.451031208613076e-07, "loss": 11.4748, "step": 446470 }, { "epoch": 0.9019178480670015, "grad_norm": 243.82015991210938, "learning_rate": 3.449756981150931e-07, "loss": 13.8016, "step": 446480 }, { "epoch": 0.9019380486996853, "grad_norm": 332.4928894042969, "learning_rate": 3.448482980569029e-07, "loss": 29.6933, "step": 446490 }, { "epoch": 0.9019582493323691, "grad_norm": 554.052978515625, "learning_rate": 3.4472092068735917e-07, "loss": 27.1281, "step": 446500 }, { "epoch": 0.9019784499650529, "grad_norm": 424.04071044921875, "learning_rate": 3.4459356600707925e-07, "loss": 9.0941, "step": 446510 }, { "epoch": 0.9019986505977368, "grad_norm": 439.9280090332031, "learning_rate": 3.44466234016686e-07, "loss": 14.4513, "step": 446520 }, { "epoch": 0.9020188512304206, "grad_norm": 211.07901000976562, "learning_rate": 3.443389247168e-07, "loss": 15.9431, "step": 446530 }, { "epoch": 0.9020390518631044, "grad_norm": 146.1761932373047, "learning_rate": 3.442116381080418e-07, "loss": 13.5847, "step": 446540 }, { "epoch": 0.9020592524957882, "grad_norm": 368.0498046875, "learning_rate": 3.4408437419103047e-07, "loss": 10.3202, "step": 446550 }, { "epoch": 0.9020794531284719, "grad_norm": 143.20919799804688, "learning_rate": 3.4395713296638713e-07, "loss": 15.2309, "step": 446560 }, { "epoch": 0.9020996537611558, "grad_norm": 154.44309997558594, "learning_rate": 3.4382991443473403e-07, "loss": 21.3466, "step": 446570 }, { "epoch": 0.9021198543938396, "grad_norm": 545.963134765625, "learning_rate": 3.437027185966868e-07, "loss": 13.0537, "step": 446580 }, { "epoch": 0.9021400550265234, "grad_norm": 160.8242645263672, "learning_rate": 3.4357554545286833e-07, "loss": 21.0006, "step": 446590 }, { "epoch": 0.9021602556592072, "grad_norm": 8.755069732666016, "learning_rate": 3.434483950038986e-07, "loss": 10.28, "step": 446600 }, { "epoch": 0.902180456291891, "grad_norm": 439.3963623046875, "learning_rate": 3.433212672503966e-07, "loss": 18.5469, "step": 446610 }, { "epoch": 0.9022006569245749, "grad_norm": 256.8570861816406, "learning_rate": 3.431941621929813e-07, "loss": 21.0754, "step": 446620 }, { "epoch": 0.9022208575572587, "grad_norm": 30.363508224487305, "learning_rate": 3.430670798322733e-07, "loss": 13.3923, "step": 446630 }, { "epoch": 0.9022410581899425, "grad_norm": 162.2301025390625, "learning_rate": 3.4294002016889206e-07, "loss": 15.3003, "step": 446640 }, { "epoch": 0.9022612588226263, "grad_norm": 167.9653778076172, "learning_rate": 3.428129832034549e-07, "loss": 19.8767, "step": 446650 }, { "epoch": 0.9022814594553101, "grad_norm": 346.23358154296875, "learning_rate": 3.426859689365836e-07, "loss": 13.0976, "step": 446660 }, { "epoch": 0.902301660087994, "grad_norm": 137.21295166015625, "learning_rate": 3.425589773688953e-07, "loss": 19.9106, "step": 446670 }, { "epoch": 0.9023218607206778, "grad_norm": 206.3722381591797, "learning_rate": 3.424320085010102e-07, "loss": 18.7182, "step": 446680 }, { "epoch": 0.9023420613533616, "grad_norm": 68.49281311035156, "learning_rate": 3.423050623335467e-07, "loss": 7.6911, "step": 446690 }, { "epoch": 0.9023622619860454, "grad_norm": 320.5769958496094, "learning_rate": 3.421781388671225e-07, "loss": 13.3725, "step": 446700 }, { "epoch": 0.9023824626187292, "grad_norm": 17.555816650390625, "learning_rate": 3.420512381023583e-07, "loss": 16.0823, "step": 446710 }, { "epoch": 0.902402663251413, "grad_norm": 48.743309020996094, "learning_rate": 3.419243600398703e-07, "loss": 11.5678, "step": 446720 }, { "epoch": 0.9024228638840969, "grad_norm": 184.26426696777344, "learning_rate": 3.4179750468027906e-07, "loss": 15.7388, "step": 446730 }, { "epoch": 0.9024430645167807, "grad_norm": 304.2300109863281, "learning_rate": 3.416706720242008e-07, "loss": 15.5561, "step": 446740 }, { "epoch": 0.9024632651494645, "grad_norm": 230.85507202148438, "learning_rate": 3.415438620722555e-07, "loss": 17.5529, "step": 446750 }, { "epoch": 0.9024834657821483, "grad_norm": 226.01370239257812, "learning_rate": 3.4141707482506056e-07, "loss": 18.1902, "step": 446760 }, { "epoch": 0.9025036664148322, "grad_norm": 993.4498291015625, "learning_rate": 3.412903102832327e-07, "loss": 11.4418, "step": 446770 }, { "epoch": 0.902523867047516, "grad_norm": 888.44091796875, "learning_rate": 3.4116356844739184e-07, "loss": 29.1596, "step": 446780 }, { "epoch": 0.9025440676801998, "grad_norm": 48.29187774658203, "learning_rate": 3.4103684931815483e-07, "loss": 16.8089, "step": 446790 }, { "epoch": 0.9025642683128836, "grad_norm": 186.04946899414062, "learning_rate": 3.409101528961378e-07, "loss": 4.0013, "step": 446800 }, { "epoch": 0.9025844689455673, "grad_norm": 142.0152130126953, "learning_rate": 3.407834791819603e-07, "loss": 16.3202, "step": 446810 }, { "epoch": 0.9026046695782511, "grad_norm": 200.44882202148438, "learning_rate": 3.4065682817624015e-07, "loss": 19.398, "step": 446820 }, { "epoch": 0.902624870210935, "grad_norm": 411.5492858886719, "learning_rate": 3.4053019987959234e-07, "loss": 27.2015, "step": 446830 }, { "epoch": 0.9026450708436188, "grad_norm": 496.2413024902344, "learning_rate": 3.404035942926348e-07, "loss": 15.3034, "step": 446840 }, { "epoch": 0.9026652714763026, "grad_norm": 363.2920227050781, "learning_rate": 3.402770114159859e-07, "loss": 24.9961, "step": 446850 }, { "epoch": 0.9026854721089864, "grad_norm": 233.4441375732422, "learning_rate": 3.401504512502618e-07, "loss": 18.1636, "step": 446860 }, { "epoch": 0.9027056727416702, "grad_norm": 291.9984130859375, "learning_rate": 3.4002391379607815e-07, "loss": 20.9636, "step": 446870 }, { "epoch": 0.9027258733743541, "grad_norm": 19.619997024536133, "learning_rate": 3.3989739905405326e-07, "loss": 11.5068, "step": 446880 }, { "epoch": 0.9027460740070379, "grad_norm": 302.156982421875, "learning_rate": 3.3977090702480455e-07, "loss": 21.4687, "step": 446890 }, { "epoch": 0.9027662746397217, "grad_norm": 475.95806884765625, "learning_rate": 3.396444377089453e-07, "loss": 18.9968, "step": 446900 }, { "epoch": 0.9027864752724055, "grad_norm": 319.4463195800781, "learning_rate": 3.395179911070945e-07, "loss": 19.7993, "step": 446910 }, { "epoch": 0.9028066759050893, "grad_norm": 132.29652404785156, "learning_rate": 3.3939156721986777e-07, "loss": 13.7618, "step": 446920 }, { "epoch": 0.9028268765377732, "grad_norm": 109.25587463378906, "learning_rate": 3.3926516604788185e-07, "loss": 22.8294, "step": 446930 }, { "epoch": 0.902847077170457, "grad_norm": 314.4452819824219, "learning_rate": 3.3913878759175124e-07, "loss": 25.3541, "step": 446940 }, { "epoch": 0.9028672778031408, "grad_norm": 455.28790283203125, "learning_rate": 3.3901243185209375e-07, "loss": 17.1267, "step": 446950 }, { "epoch": 0.9028874784358246, "grad_norm": 548.4476318359375, "learning_rate": 3.388860988295245e-07, "loss": 18.3909, "step": 446960 }, { "epoch": 0.9029076790685084, "grad_norm": 219.3155517578125, "learning_rate": 3.3875978852465795e-07, "loss": 14.5548, "step": 446970 }, { "epoch": 0.9029278797011923, "grad_norm": 82.13236999511719, "learning_rate": 3.3863350093811196e-07, "loss": 12.6254, "step": 446980 }, { "epoch": 0.9029480803338761, "grad_norm": 96.57003784179688, "learning_rate": 3.3850723607049994e-07, "loss": 13.3052, "step": 446990 }, { "epoch": 0.9029682809665599, "grad_norm": 132.06687927246094, "learning_rate": 3.3838099392243915e-07, "loss": 18.9448, "step": 447000 }, { "epoch": 0.9029884815992437, "grad_norm": 52.39521789550781, "learning_rate": 3.382547744945436e-07, "loss": 11.7903, "step": 447010 }, { "epoch": 0.9030086822319275, "grad_norm": 265.75286865234375, "learning_rate": 3.3812857778742935e-07, "loss": 11.8829, "step": 447020 }, { "epoch": 0.9030288828646114, "grad_norm": 353.17645263671875, "learning_rate": 3.3800240380171046e-07, "loss": 14.2112, "step": 447030 }, { "epoch": 0.9030490834972952, "grad_norm": 367.2689208984375, "learning_rate": 3.3787625253800247e-07, "loss": 13.5671, "step": 447040 }, { "epoch": 0.903069284129979, "grad_norm": 253.53717041015625, "learning_rate": 3.3775012399692055e-07, "loss": 23.6926, "step": 447050 }, { "epoch": 0.9030894847626628, "grad_norm": 591.800048828125, "learning_rate": 3.3762401817907795e-07, "loss": 19.7851, "step": 447060 }, { "epoch": 0.9031096853953465, "grad_norm": 185.76414489746094, "learning_rate": 3.374979350850921e-07, "loss": 23.7267, "step": 447070 }, { "epoch": 0.9031298860280303, "grad_norm": 752.8250732421875, "learning_rate": 3.373718747155752e-07, "loss": 24.4539, "step": 447080 }, { "epoch": 0.9031500866607142, "grad_norm": 116.03507232666016, "learning_rate": 3.372458370711412e-07, "loss": 11.2266, "step": 447090 }, { "epoch": 0.903170287293398, "grad_norm": 170.8887939453125, "learning_rate": 3.371198221524069e-07, "loss": 12.8361, "step": 447100 }, { "epoch": 0.9031904879260818, "grad_norm": 300.7256774902344, "learning_rate": 3.3699382995998455e-07, "loss": 13.9144, "step": 447110 }, { "epoch": 0.9032106885587656, "grad_norm": 189.86111450195312, "learning_rate": 3.368678604944886e-07, "loss": 13.0529, "step": 447120 }, { "epoch": 0.9032308891914494, "grad_norm": 287.09002685546875, "learning_rate": 3.3674191375653255e-07, "loss": 9.8071, "step": 447130 }, { "epoch": 0.9032510898241333, "grad_norm": 208.15184020996094, "learning_rate": 3.366159897467314e-07, "loss": 13.5541, "step": 447140 }, { "epoch": 0.9032712904568171, "grad_norm": 254.23193359375, "learning_rate": 3.364900884656991e-07, "loss": 16.9599, "step": 447150 }, { "epoch": 0.9032914910895009, "grad_norm": 297.40203857421875, "learning_rate": 3.3636420991404686e-07, "loss": 10.7206, "step": 447160 }, { "epoch": 0.9033116917221847, "grad_norm": 330.4181213378906, "learning_rate": 3.3623835409239023e-07, "loss": 21.283, "step": 447170 }, { "epoch": 0.9033318923548685, "grad_norm": 313.4491271972656, "learning_rate": 3.361125210013438e-07, "loss": 16.4498, "step": 447180 }, { "epoch": 0.9033520929875524, "grad_norm": 0.0, "learning_rate": 3.3598671064151767e-07, "loss": 17.463, "step": 447190 }, { "epoch": 0.9033722936202362, "grad_norm": 227.22650146484375, "learning_rate": 3.358609230135268e-07, "loss": 14.3232, "step": 447200 }, { "epoch": 0.90339249425292, "grad_norm": 128.48728942871094, "learning_rate": 3.357351581179846e-07, "loss": 23.0015, "step": 447210 }, { "epoch": 0.9034126948856038, "grad_norm": 221.73208618164062, "learning_rate": 3.35609415955504e-07, "loss": 15.211, "step": 447220 }, { "epoch": 0.9034328955182876, "grad_norm": 590.2010498046875, "learning_rate": 3.354836965266961e-07, "loss": 47.4309, "step": 447230 }, { "epoch": 0.9034530961509715, "grad_norm": 288.79150390625, "learning_rate": 3.35357999832176e-07, "loss": 14.823, "step": 447240 }, { "epoch": 0.9034732967836553, "grad_norm": 9.984078407287598, "learning_rate": 3.352323258725554e-07, "loss": 12.5962, "step": 447250 }, { "epoch": 0.9034934974163391, "grad_norm": 352.9945983886719, "learning_rate": 3.351066746484455e-07, "loss": 16.0944, "step": 447260 }, { "epoch": 0.9035136980490229, "grad_norm": 356.8675842285156, "learning_rate": 3.349810461604608e-07, "loss": 19.7702, "step": 447270 }, { "epoch": 0.9035338986817067, "grad_norm": 59.46029281616211, "learning_rate": 3.3485544040921194e-07, "loss": 30.8776, "step": 447280 }, { "epoch": 0.9035540993143906, "grad_norm": 437.4639892578125, "learning_rate": 3.347298573953128e-07, "loss": 13.3344, "step": 447290 }, { "epoch": 0.9035742999470744, "grad_norm": 21.0152587890625, "learning_rate": 3.3460429711937417e-07, "loss": 19.9867, "step": 447300 }, { "epoch": 0.9035945005797582, "grad_norm": 320.994140625, "learning_rate": 3.344787595820076e-07, "loss": 11.8049, "step": 447310 }, { "epoch": 0.903614701212442, "grad_norm": 364.42034912109375, "learning_rate": 3.343532447838266e-07, "loss": 17.2327, "step": 447320 }, { "epoch": 0.9036349018451257, "grad_norm": 274.0588684082031, "learning_rate": 3.3422775272544115e-07, "loss": 17.06, "step": 447330 }, { "epoch": 0.9036551024778096, "grad_norm": 255.96929931640625, "learning_rate": 3.3410228340746475e-07, "loss": 15.7417, "step": 447340 }, { "epoch": 0.9036753031104934, "grad_norm": 5.572038173675537, "learning_rate": 3.3397683683050685e-07, "loss": 26.1625, "step": 447350 }, { "epoch": 0.9036955037431772, "grad_norm": 327.1401062011719, "learning_rate": 3.338514129951809e-07, "loss": 6.1452, "step": 447360 }, { "epoch": 0.903715704375861, "grad_norm": 120.31031036376953, "learning_rate": 3.337260119020974e-07, "loss": 18.9752, "step": 447370 }, { "epoch": 0.9037359050085448, "grad_norm": 281.9804382324219, "learning_rate": 3.33600633551866e-07, "loss": 24.0222, "step": 447380 }, { "epoch": 0.9037561056412287, "grad_norm": 111.14518737792969, "learning_rate": 3.334752779451006e-07, "loss": 13.0984, "step": 447390 }, { "epoch": 0.9037763062739125, "grad_norm": 203.70059204101562, "learning_rate": 3.3334994508241013e-07, "loss": 18.8232, "step": 447400 }, { "epoch": 0.9037965069065963, "grad_norm": 634.7371826171875, "learning_rate": 3.332246349644058e-07, "loss": 31.133, "step": 447410 }, { "epoch": 0.9038167075392801, "grad_norm": 325.8391418457031, "learning_rate": 3.3309934759169825e-07, "loss": 19.8529, "step": 447420 }, { "epoch": 0.9038369081719639, "grad_norm": 235.7917938232422, "learning_rate": 3.3297408296489973e-07, "loss": 13.4451, "step": 447430 }, { "epoch": 0.9038571088046478, "grad_norm": 275.82525634765625, "learning_rate": 3.328488410846187e-07, "loss": 19.9762, "step": 447440 }, { "epoch": 0.9038773094373316, "grad_norm": 541.6697387695312, "learning_rate": 3.327236219514657e-07, "loss": 16.1105, "step": 447450 }, { "epoch": 0.9038975100700154, "grad_norm": 143.609130859375, "learning_rate": 3.325984255660525e-07, "loss": 16.8708, "step": 447460 }, { "epoch": 0.9039177107026992, "grad_norm": 310.2473449707031, "learning_rate": 3.324732519289886e-07, "loss": 14.6144, "step": 447470 }, { "epoch": 0.903937911335383, "grad_norm": 139.44161987304688, "learning_rate": 3.3234810104088356e-07, "loss": 16.5766, "step": 447480 }, { "epoch": 0.9039581119680669, "grad_norm": 401.2772521972656, "learning_rate": 3.322229729023474e-07, "loss": 11.4328, "step": 447490 }, { "epoch": 0.9039783126007507, "grad_norm": 358.89385986328125, "learning_rate": 3.320978675139919e-07, "loss": 20.5475, "step": 447500 }, { "epoch": 0.9039985132334345, "grad_norm": 10.338994979858398, "learning_rate": 3.319727848764237e-07, "loss": 18.9177, "step": 447510 }, { "epoch": 0.9040187138661183, "grad_norm": 386.9339294433594, "learning_rate": 3.318477249902541e-07, "loss": 13.5949, "step": 447520 }, { "epoch": 0.9040389144988021, "grad_norm": 29.801605224609375, "learning_rate": 3.317226878560931e-07, "loss": 15.1736, "step": 447530 }, { "epoch": 0.904059115131486, "grad_norm": 397.3448181152344, "learning_rate": 3.3159767347454963e-07, "loss": 27.8067, "step": 447540 }, { "epoch": 0.9040793157641698, "grad_norm": 227.94371032714844, "learning_rate": 3.3147268184623216e-07, "loss": 14.3855, "step": 447550 }, { "epoch": 0.9040995163968536, "grad_norm": 287.36328125, "learning_rate": 3.3134771297175127e-07, "loss": 21.9384, "step": 447560 }, { "epoch": 0.9041197170295374, "grad_norm": 217.54669189453125, "learning_rate": 3.3122276685171593e-07, "loss": 24.2679, "step": 447570 }, { "epoch": 0.9041399176622211, "grad_norm": 520.4777221679688, "learning_rate": 3.3109784348673293e-07, "loss": 16.5656, "step": 447580 }, { "epoch": 0.904160118294905, "grad_norm": 585.6632080078125, "learning_rate": 3.309729428774144e-07, "loss": 16.717, "step": 447590 }, { "epoch": 0.9041803189275888, "grad_norm": 181.5473175048828, "learning_rate": 3.3084806502436617e-07, "loss": 15.9249, "step": 447600 }, { "epoch": 0.9042005195602726, "grad_norm": 111.66215515136719, "learning_rate": 3.3072320992819875e-07, "loss": 20.6349, "step": 447610 }, { "epoch": 0.9042207201929564, "grad_norm": 123.65542602539062, "learning_rate": 3.3059837758951995e-07, "loss": 10.5056, "step": 447620 }, { "epoch": 0.9042409208256402, "grad_norm": 575.4657592773438, "learning_rate": 3.3047356800893826e-07, "loss": 23.5115, "step": 447630 }, { "epoch": 0.904261121458324, "grad_norm": 204.24893188476562, "learning_rate": 3.303487811870626e-07, "loss": 11.435, "step": 447640 }, { "epoch": 0.9042813220910079, "grad_norm": 167.03961181640625, "learning_rate": 3.3022401712450025e-07, "loss": 15.5288, "step": 447650 }, { "epoch": 0.9043015227236917, "grad_norm": 1584.814697265625, "learning_rate": 3.3009927582185965e-07, "loss": 35.3933, "step": 447660 }, { "epoch": 0.9043217233563755, "grad_norm": 43.52713394165039, "learning_rate": 3.2997455727974856e-07, "loss": 18.9435, "step": 447670 }, { "epoch": 0.9043419239890593, "grad_norm": 194.8103485107422, "learning_rate": 3.2984986149877554e-07, "loss": 7.7875, "step": 447680 }, { "epoch": 0.9043621246217431, "grad_norm": 1.3497366905212402, "learning_rate": 3.297251884795477e-07, "loss": 19.7871, "step": 447690 }, { "epoch": 0.904382325254427, "grad_norm": 382.6680603027344, "learning_rate": 3.2960053822267245e-07, "loss": 18.3611, "step": 447700 }, { "epoch": 0.9044025258871108, "grad_norm": 143.8529052734375, "learning_rate": 3.294759107287582e-07, "loss": 10.4991, "step": 447710 }, { "epoch": 0.9044227265197946, "grad_norm": 199.284423828125, "learning_rate": 3.293513059984121e-07, "loss": 15.4406, "step": 447720 }, { "epoch": 0.9044429271524784, "grad_norm": 339.30767822265625, "learning_rate": 3.2922672403224053e-07, "loss": 19.711, "step": 447730 }, { "epoch": 0.9044631277851622, "grad_norm": 454.2643737792969, "learning_rate": 3.2910216483085125e-07, "loss": 14.7664, "step": 447740 }, { "epoch": 0.9044833284178461, "grad_norm": 250.41156005859375, "learning_rate": 3.289776283948526e-07, "loss": 16.6035, "step": 447750 }, { "epoch": 0.9045035290505299, "grad_norm": 363.5282287597656, "learning_rate": 3.2885311472485025e-07, "loss": 14.1688, "step": 447760 }, { "epoch": 0.9045237296832137, "grad_norm": 178.89268493652344, "learning_rate": 3.287286238214504e-07, "loss": 11.6931, "step": 447770 }, { "epoch": 0.9045439303158975, "grad_norm": 147.0160675048828, "learning_rate": 3.286041556852615e-07, "loss": 28.4786, "step": 447780 }, { "epoch": 0.9045641309485813, "grad_norm": 131.66220092773438, "learning_rate": 3.2847971031688963e-07, "loss": 34.993, "step": 447790 }, { "epoch": 0.9045843315812652, "grad_norm": 220.76544189453125, "learning_rate": 3.283552877169399e-07, "loss": 17.8001, "step": 447800 }, { "epoch": 0.904604532213949, "grad_norm": 228.4512481689453, "learning_rate": 3.282308878860202e-07, "loss": 11.7146, "step": 447810 }, { "epoch": 0.9046247328466328, "grad_norm": 314.55743408203125, "learning_rate": 3.281065108247372e-07, "loss": 45.2426, "step": 447820 }, { "epoch": 0.9046449334793166, "grad_norm": 287.1359558105469, "learning_rate": 3.279821565336966e-07, "loss": 12.0542, "step": 447830 }, { "epoch": 0.9046651341120003, "grad_norm": 121.59862518310547, "learning_rate": 3.2785782501350284e-07, "loss": 19.8585, "step": 447840 }, { "epoch": 0.9046853347446842, "grad_norm": 222.90481567382812, "learning_rate": 3.277335162647649e-07, "loss": 32.5965, "step": 447850 }, { "epoch": 0.904705535377368, "grad_norm": 374.2373962402344, "learning_rate": 3.276092302880868e-07, "loss": 15.8904, "step": 447860 }, { "epoch": 0.9047257360100518, "grad_norm": 131.89588928222656, "learning_rate": 3.274849670840741e-07, "loss": 6.9801, "step": 447870 }, { "epoch": 0.9047459366427356, "grad_norm": 299.618408203125, "learning_rate": 3.2736072665333353e-07, "loss": 16.9254, "step": 447880 }, { "epoch": 0.9047661372754194, "grad_norm": 223.69972229003906, "learning_rate": 3.272365089964691e-07, "loss": 10.6168, "step": 447890 }, { "epoch": 0.9047863379081033, "grad_norm": 187.61891174316406, "learning_rate": 3.271123141140886e-07, "loss": 16.898, "step": 447900 }, { "epoch": 0.9048065385407871, "grad_norm": 530.3232421875, "learning_rate": 3.269881420067944e-07, "loss": 17.9211, "step": 447910 }, { "epoch": 0.9048267391734709, "grad_norm": 215.07627868652344, "learning_rate": 3.268639926751943e-07, "loss": 11.479, "step": 447920 }, { "epoch": 0.9048469398061547, "grad_norm": 485.24908447265625, "learning_rate": 3.267398661198923e-07, "loss": 25.1056, "step": 447930 }, { "epoch": 0.9048671404388385, "grad_norm": 272.2235412597656, "learning_rate": 3.2661576234149285e-07, "loss": 15.0733, "step": 447940 }, { "epoch": 0.9048873410715224, "grad_norm": 207.17344665527344, "learning_rate": 3.264916813406022e-07, "loss": 13.0652, "step": 447950 }, { "epoch": 0.9049075417042062, "grad_norm": 270.88641357421875, "learning_rate": 3.263676231178231e-07, "loss": 9.3535, "step": 447960 }, { "epoch": 0.90492774233689, "grad_norm": 4.275578498840332, "learning_rate": 3.262435876737624e-07, "loss": 17.6642, "step": 447970 }, { "epoch": 0.9049479429695738, "grad_norm": 303.15155029296875, "learning_rate": 3.2611957500902345e-07, "loss": 28.7785, "step": 447980 }, { "epoch": 0.9049681436022576, "grad_norm": 1.4317559003829956, "learning_rate": 3.2599558512421024e-07, "loss": 18.0041, "step": 447990 }, { "epoch": 0.9049883442349415, "grad_norm": 183.83926391601562, "learning_rate": 3.258716180199278e-07, "loss": 19.8937, "step": 448000 }, { "epoch": 0.9050085448676253, "grad_norm": 115.31336212158203, "learning_rate": 3.2574767369678073e-07, "loss": 15.0221, "step": 448010 }, { "epoch": 0.9050287455003091, "grad_norm": 276.0068054199219, "learning_rate": 3.2562375215537176e-07, "loss": 12.0676, "step": 448020 }, { "epoch": 0.9050489461329929, "grad_norm": 194.3147735595703, "learning_rate": 3.2549985339630606e-07, "loss": 24.008, "step": 448030 }, { "epoch": 0.9050691467656767, "grad_norm": 158.41131591796875, "learning_rate": 3.253759774201881e-07, "loss": 17.3652, "step": 448040 }, { "epoch": 0.9050893473983606, "grad_norm": 226.0045928955078, "learning_rate": 3.252521242276191e-07, "loss": 30.4765, "step": 448050 }, { "epoch": 0.9051095480310444, "grad_norm": 272.4423828125, "learning_rate": 3.2512829381920463e-07, "loss": 22.5964, "step": 448060 }, { "epoch": 0.9051297486637282, "grad_norm": 219.59657287597656, "learning_rate": 3.250044861955487e-07, "loss": 23.9859, "step": 448070 }, { "epoch": 0.905149949296412, "grad_norm": 13.538033485412598, "learning_rate": 3.248807013572536e-07, "loss": 18.8713, "step": 448080 }, { "epoch": 0.9051701499290957, "grad_norm": 245.32994079589844, "learning_rate": 3.2475693930492214e-07, "loss": 11.9465, "step": 448090 }, { "epoch": 0.9051903505617795, "grad_norm": 374.3828430175781, "learning_rate": 3.246332000391583e-07, "loss": 15.8, "step": 448100 }, { "epoch": 0.9052105511944634, "grad_norm": 555.6296997070312, "learning_rate": 3.245094835605667e-07, "loss": 15.4352, "step": 448110 }, { "epoch": 0.9052307518271472, "grad_norm": 69.77349090576172, "learning_rate": 3.2438578986974776e-07, "loss": 18.0138, "step": 448120 }, { "epoch": 0.905250952459831, "grad_norm": 689.2047119140625, "learning_rate": 3.242621189673051e-07, "loss": 35.6984, "step": 448130 }, { "epoch": 0.9052711530925148, "grad_norm": 241.93197631835938, "learning_rate": 3.2413847085384256e-07, "loss": 12.6856, "step": 448140 }, { "epoch": 0.9052913537251986, "grad_norm": 231.5634002685547, "learning_rate": 3.240148455299619e-07, "loss": 11.645, "step": 448150 }, { "epoch": 0.9053115543578825, "grad_norm": 260.03094482421875, "learning_rate": 3.2389124299626483e-07, "loss": 24.7901, "step": 448160 }, { "epoch": 0.9053317549905663, "grad_norm": 552.4435424804688, "learning_rate": 3.237676632533554e-07, "loss": 15.0893, "step": 448170 }, { "epoch": 0.9053519556232501, "grad_norm": 472.7244567871094, "learning_rate": 3.2364410630183587e-07, "loss": 12.8835, "step": 448180 }, { "epoch": 0.9053721562559339, "grad_norm": 173.71104431152344, "learning_rate": 3.2352057214230623e-07, "loss": 12.9068, "step": 448190 }, { "epoch": 0.9053923568886177, "grad_norm": 19.331586837768555, "learning_rate": 3.233970607753717e-07, "loss": 18.2913, "step": 448200 }, { "epoch": 0.9054125575213016, "grad_norm": 281.31011962890625, "learning_rate": 3.2327357220163116e-07, "loss": 14.0501, "step": 448210 }, { "epoch": 0.9054327581539854, "grad_norm": 265.1382141113281, "learning_rate": 3.231501064216891e-07, "loss": 28.7558, "step": 448220 }, { "epoch": 0.9054529587866692, "grad_norm": 129.23358154296875, "learning_rate": 3.2302666343614565e-07, "loss": 12.1181, "step": 448230 }, { "epoch": 0.905473159419353, "grad_norm": 181.31980895996094, "learning_rate": 3.2290324324560363e-07, "loss": 17.8086, "step": 448240 }, { "epoch": 0.9054933600520368, "grad_norm": 278.672119140625, "learning_rate": 3.227798458506637e-07, "loss": 23.1344, "step": 448250 }, { "epoch": 0.9055135606847207, "grad_norm": 344.4086608886719, "learning_rate": 3.22656471251927e-07, "loss": 20.6322, "step": 448260 }, { "epoch": 0.9055337613174045, "grad_norm": 235.47267150878906, "learning_rate": 3.225331194499964e-07, "loss": 10.4709, "step": 448270 }, { "epoch": 0.9055539619500883, "grad_norm": 168.4288330078125, "learning_rate": 3.2240979044547095e-07, "loss": 14.7583, "step": 448280 }, { "epoch": 0.9055741625827721, "grad_norm": 131.17369079589844, "learning_rate": 3.2228648423895335e-07, "loss": 18.4809, "step": 448290 }, { "epoch": 0.9055943632154559, "grad_norm": 227.6136016845703, "learning_rate": 3.2216320083104434e-07, "loss": 17.8105, "step": 448300 }, { "epoch": 0.9056145638481398, "grad_norm": 184.4230194091797, "learning_rate": 3.2203994022234396e-07, "loss": 18.6132, "step": 448310 }, { "epoch": 0.9056347644808236, "grad_norm": 266.4573974609375, "learning_rate": 3.2191670241345395e-07, "loss": 21.2703, "step": 448320 }, { "epoch": 0.9056549651135074, "grad_norm": 698.3164672851562, "learning_rate": 3.2179348740497494e-07, "loss": 15.9293, "step": 448330 }, { "epoch": 0.9056751657461912, "grad_norm": 304.7605285644531, "learning_rate": 3.216702951975059e-07, "loss": 15.8953, "step": 448340 }, { "epoch": 0.9056953663788749, "grad_norm": 113.5278549194336, "learning_rate": 3.2154712579164913e-07, "loss": 8.5069, "step": 448350 }, { "epoch": 0.9057155670115588, "grad_norm": 274.9203796386719, "learning_rate": 3.2142397918800416e-07, "loss": 19.7816, "step": 448360 }, { "epoch": 0.9057357676442426, "grad_norm": 158.28135681152344, "learning_rate": 3.213008553871716e-07, "loss": 12.7516, "step": 448370 }, { "epoch": 0.9057559682769264, "grad_norm": 272.28643798828125, "learning_rate": 3.2117775438975096e-07, "loss": 19.5911, "step": 448380 }, { "epoch": 0.9057761689096102, "grad_norm": 136.56214904785156, "learning_rate": 3.2105467619634234e-07, "loss": 14.3715, "step": 448390 }, { "epoch": 0.905796369542294, "grad_norm": 198.1589813232422, "learning_rate": 3.2093162080754634e-07, "loss": 10.9491, "step": 448400 }, { "epoch": 0.9058165701749779, "grad_norm": 187.0760040283203, "learning_rate": 3.208085882239614e-07, "loss": 11.7124, "step": 448410 }, { "epoch": 0.9058367708076617, "grad_norm": 120.32931518554688, "learning_rate": 3.206855784461876e-07, "loss": 15.4974, "step": 448420 }, { "epoch": 0.9058569714403455, "grad_norm": 175.50558471679688, "learning_rate": 3.205625914748256e-07, "loss": 22.6222, "step": 448430 }, { "epoch": 0.9058771720730293, "grad_norm": 405.4112548828125, "learning_rate": 3.2043962731047373e-07, "loss": 11.259, "step": 448440 }, { "epoch": 0.9058973727057131, "grad_norm": 1.81077241897583, "learning_rate": 3.20316685953731e-07, "loss": 27.5821, "step": 448450 }, { "epoch": 0.905917573338397, "grad_norm": 257.16375732421875, "learning_rate": 3.20193767405198e-07, "loss": 12.0861, "step": 448460 }, { "epoch": 0.9059377739710808, "grad_norm": 166.19174194335938, "learning_rate": 3.2007087166547325e-07, "loss": 10.5968, "step": 448470 }, { "epoch": 0.9059579746037646, "grad_norm": 147.11138916015625, "learning_rate": 3.199479987351545e-07, "loss": 14.8641, "step": 448480 }, { "epoch": 0.9059781752364484, "grad_norm": 346.37860107421875, "learning_rate": 3.1982514861484184e-07, "loss": 16.6575, "step": 448490 }, { "epoch": 0.9059983758691322, "grad_norm": 0.0, "learning_rate": 3.1970232130513365e-07, "loss": 7.7384, "step": 448500 }, { "epoch": 0.906018576501816, "grad_norm": 271.4297790527344, "learning_rate": 3.19579516806629e-07, "loss": 21.7959, "step": 448510 }, { "epoch": 0.9060387771344999, "grad_norm": 263.17095947265625, "learning_rate": 3.194567351199257e-07, "loss": 23.3497, "step": 448520 }, { "epoch": 0.9060589777671837, "grad_norm": 547.2625122070312, "learning_rate": 3.193339762456232e-07, "loss": 13.4204, "step": 448530 }, { "epoch": 0.9060791783998675, "grad_norm": 485.02685546875, "learning_rate": 3.1921124018431946e-07, "loss": 18.5841, "step": 448540 }, { "epoch": 0.9060993790325513, "grad_norm": 329.9028015136719, "learning_rate": 3.1908852693661116e-07, "loss": 26.0023, "step": 448550 }, { "epoch": 0.9061195796652352, "grad_norm": 283.8506774902344, "learning_rate": 3.1896583650309896e-07, "loss": 19.4961, "step": 448560 }, { "epoch": 0.906139780297919, "grad_norm": 449.8561706542969, "learning_rate": 3.188431688843785e-07, "loss": 20.3796, "step": 448570 }, { "epoch": 0.9061599809306028, "grad_norm": 428.76458740234375, "learning_rate": 3.187205240810493e-07, "loss": 19.7878, "step": 448580 }, { "epoch": 0.9061801815632866, "grad_norm": 226.718505859375, "learning_rate": 3.1859790209370855e-07, "loss": 18.1999, "step": 448590 }, { "epoch": 0.9062003821959704, "grad_norm": 33.923519134521484, "learning_rate": 3.1847530292295313e-07, "loss": 9.0033, "step": 448600 }, { "epoch": 0.9062205828286541, "grad_norm": 286.41119384765625, "learning_rate": 3.18352726569382e-07, "loss": 16.4444, "step": 448610 }, { "epoch": 0.906240783461338, "grad_norm": 281.3250732421875, "learning_rate": 3.1823017303359185e-07, "loss": 17.605, "step": 448620 }, { "epoch": 0.9062609840940218, "grad_norm": 464.6627197265625, "learning_rate": 3.181076423161794e-07, "loss": 16.2893, "step": 448630 }, { "epoch": 0.9062811847267056, "grad_norm": 285.9560852050781, "learning_rate": 3.179851344177426e-07, "loss": 8.4293, "step": 448640 }, { "epoch": 0.9063013853593894, "grad_norm": 287.4449157714844, "learning_rate": 3.1786264933887977e-07, "loss": 10.3244, "step": 448650 }, { "epoch": 0.9063215859920732, "grad_norm": 191.82052612304688, "learning_rate": 3.1774018708018493e-07, "loss": 13.5765, "step": 448660 }, { "epoch": 0.9063417866247571, "grad_norm": 270.4676208496094, "learning_rate": 3.176177476422565e-07, "loss": 11.8198, "step": 448670 }, { "epoch": 0.9063619872574409, "grad_norm": 336.0176086425781, "learning_rate": 3.1749533102569176e-07, "loss": 8.6977, "step": 448680 }, { "epoch": 0.9063821878901247, "grad_norm": 15.550987243652344, "learning_rate": 3.173729372310874e-07, "loss": 8.9225, "step": 448690 }, { "epoch": 0.9064023885228085, "grad_norm": 239.37109375, "learning_rate": 3.172505662590386e-07, "loss": 12.3695, "step": 448700 }, { "epoch": 0.9064225891554923, "grad_norm": 628.9953002929688, "learning_rate": 3.1712821811014205e-07, "loss": 17.098, "step": 448710 }, { "epoch": 0.9064427897881762, "grad_norm": 324.8562316894531, "learning_rate": 3.170058927849967e-07, "loss": 9.2761, "step": 448720 }, { "epoch": 0.90646299042086, "grad_norm": 219.28326416015625, "learning_rate": 3.168835902841949e-07, "loss": 10.7078, "step": 448730 }, { "epoch": 0.9064831910535438, "grad_norm": 188.46009826660156, "learning_rate": 3.167613106083345e-07, "loss": 16.3328, "step": 448740 }, { "epoch": 0.9065033916862276, "grad_norm": 245.0628204345703, "learning_rate": 3.166390537580122e-07, "loss": 16.5272, "step": 448750 }, { "epoch": 0.9065235923189114, "grad_norm": 3.24403715133667, "learning_rate": 3.165168197338231e-07, "loss": 27.1772, "step": 448760 }, { "epoch": 0.9065437929515953, "grad_norm": 415.12286376953125, "learning_rate": 3.1639460853636226e-07, "loss": 15.0703, "step": 448770 }, { "epoch": 0.9065639935842791, "grad_norm": 23.8918514251709, "learning_rate": 3.162724201662265e-07, "loss": 11.7758, "step": 448780 }, { "epoch": 0.9065841942169629, "grad_norm": 4.333435535430908, "learning_rate": 3.161502546240114e-07, "loss": 14.5456, "step": 448790 }, { "epoch": 0.9066043948496467, "grad_norm": 589.1670532226562, "learning_rate": 3.160281119103109e-07, "loss": 29.9515, "step": 448800 }, { "epoch": 0.9066245954823305, "grad_norm": 142.0504913330078, "learning_rate": 3.159059920257218e-07, "loss": 15.2165, "step": 448810 }, { "epoch": 0.9066447961150144, "grad_norm": 28.602466583251953, "learning_rate": 3.157838949708386e-07, "loss": 16.4465, "step": 448820 }, { "epoch": 0.9066649967476982, "grad_norm": 1572.9324951171875, "learning_rate": 3.1566182074625693e-07, "loss": 25.6184, "step": 448830 }, { "epoch": 0.906685197380382, "grad_norm": 426.5455627441406, "learning_rate": 3.155397693525708e-07, "loss": 25.8037, "step": 448840 }, { "epoch": 0.9067053980130658, "grad_norm": 320.0198669433594, "learning_rate": 3.1541774079037635e-07, "loss": 22.5102, "step": 448850 }, { "epoch": 0.9067255986457495, "grad_norm": 527.325439453125, "learning_rate": 3.1529573506026757e-07, "loss": 27.059, "step": 448860 }, { "epoch": 0.9067457992784334, "grad_norm": 8.045053482055664, "learning_rate": 3.151737521628384e-07, "loss": 11.6383, "step": 448870 }, { "epoch": 0.9067659999111172, "grad_norm": 207.8684539794922, "learning_rate": 3.150517920986851e-07, "loss": 11.5602, "step": 448880 }, { "epoch": 0.906786200543801, "grad_norm": 378.9325256347656, "learning_rate": 3.1492985486840044e-07, "loss": 11.034, "step": 448890 }, { "epoch": 0.9068064011764848, "grad_norm": 297.4162902832031, "learning_rate": 3.148079404725801e-07, "loss": 14.1207, "step": 448900 }, { "epoch": 0.9068266018091686, "grad_norm": 269.1322937011719, "learning_rate": 3.1468604891181755e-07, "loss": 9.0224, "step": 448910 }, { "epoch": 0.9068468024418525, "grad_norm": 417.515625, "learning_rate": 3.145641801867061e-07, "loss": 17.4701, "step": 448920 }, { "epoch": 0.9068670030745363, "grad_norm": 18.833566665649414, "learning_rate": 3.1444233429784145e-07, "loss": 16.3909, "step": 448930 }, { "epoch": 0.9068872037072201, "grad_norm": 286.1451416015625, "learning_rate": 3.14320511245817e-07, "loss": 17.3131, "step": 448940 }, { "epoch": 0.9069074043399039, "grad_norm": 208.767333984375, "learning_rate": 3.1419871103122447e-07, "loss": 15.315, "step": 448950 }, { "epoch": 0.9069276049725877, "grad_norm": 116.77249145507812, "learning_rate": 3.1407693365465954e-07, "loss": 20.7787, "step": 448960 }, { "epoch": 0.9069478056052716, "grad_norm": 431.48309326171875, "learning_rate": 3.1395517911671613e-07, "loss": 28.2554, "step": 448970 }, { "epoch": 0.9069680062379554, "grad_norm": 38.44464111328125, "learning_rate": 3.1383344741798716e-07, "loss": 19.8453, "step": 448980 }, { "epoch": 0.9069882068706392, "grad_norm": 291.7116394042969, "learning_rate": 3.137117385590643e-07, "loss": 22.2248, "step": 448990 }, { "epoch": 0.907008407503323, "grad_norm": 80.84439849853516, "learning_rate": 3.135900525405428e-07, "loss": 13.2023, "step": 449000 }, { "epoch": 0.9070286081360068, "grad_norm": 112.67292785644531, "learning_rate": 3.134683893630153e-07, "loss": 20.4081, "step": 449010 }, { "epoch": 0.9070488087686907, "grad_norm": 105.46914672851562, "learning_rate": 3.133467490270736e-07, "loss": 10.5356, "step": 449020 }, { "epoch": 0.9070690094013745, "grad_norm": 340.24774169921875, "learning_rate": 3.1322513153331124e-07, "loss": 19.299, "step": 449030 }, { "epoch": 0.9070892100340583, "grad_norm": 1019.2459716796875, "learning_rate": 3.1310353688232207e-07, "loss": 19.9877, "step": 449040 }, { "epoch": 0.9071094106667421, "grad_norm": 138.2771759033203, "learning_rate": 3.1298196507469737e-07, "loss": 25.8873, "step": 449050 }, { "epoch": 0.9071296112994259, "grad_norm": 150.6022491455078, "learning_rate": 3.128604161110299e-07, "loss": 8.4736, "step": 449060 }, { "epoch": 0.9071498119321098, "grad_norm": 222.2021942138672, "learning_rate": 3.1273888999191314e-07, "loss": 13.5926, "step": 449070 }, { "epoch": 0.9071700125647936, "grad_norm": 743.35302734375, "learning_rate": 3.126173867179383e-07, "loss": 24.3137, "step": 449080 }, { "epoch": 0.9071902131974774, "grad_norm": 131.61061096191406, "learning_rate": 3.1249590628969707e-07, "loss": 17.9359, "step": 449090 }, { "epoch": 0.9072104138301612, "grad_norm": 333.20806884765625, "learning_rate": 3.123744487077829e-07, "loss": 26.8429, "step": 449100 }, { "epoch": 0.907230614462845, "grad_norm": 159.0042266845703, "learning_rate": 3.122530139727864e-07, "loss": 20.8925, "step": 449110 }, { "epoch": 0.9072508150955287, "grad_norm": 210.1376953125, "learning_rate": 3.12131602085301e-07, "loss": 12.3458, "step": 449120 }, { "epoch": 0.9072710157282126, "grad_norm": 403.5615234375, "learning_rate": 3.1201021304591684e-07, "loss": 16.6434, "step": 449130 }, { "epoch": 0.9072912163608964, "grad_norm": 312.8066101074219, "learning_rate": 3.118888468552267e-07, "loss": 6.4285, "step": 449140 }, { "epoch": 0.9073114169935802, "grad_norm": 283.7262878417969, "learning_rate": 3.1176750351382235e-07, "loss": 15.0267, "step": 449150 }, { "epoch": 0.907331617626264, "grad_norm": 106.77458190917969, "learning_rate": 3.116461830222933e-07, "loss": 12.1946, "step": 449160 }, { "epoch": 0.9073518182589478, "grad_norm": 428.0980224609375, "learning_rate": 3.11524885381233e-07, "loss": 33.8796, "step": 449170 }, { "epoch": 0.9073720188916317, "grad_norm": 181.52322387695312, "learning_rate": 3.11403610591231e-07, "loss": 15.0143, "step": 449180 }, { "epoch": 0.9073922195243155, "grad_norm": 115.66758728027344, "learning_rate": 3.1128235865288013e-07, "loss": 16.0853, "step": 449190 }, { "epoch": 0.9074124201569993, "grad_norm": 170.62115478515625, "learning_rate": 3.1116112956677045e-07, "loss": 9.9213, "step": 449200 }, { "epoch": 0.9074326207896831, "grad_norm": 68.90569305419922, "learning_rate": 3.1103992333349153e-07, "loss": 14.7757, "step": 449210 }, { "epoch": 0.9074528214223669, "grad_norm": 350.0859680175781, "learning_rate": 3.1091873995363677e-07, "loss": 16.3275, "step": 449220 }, { "epoch": 0.9074730220550508, "grad_norm": 226.39288330078125, "learning_rate": 3.1079757942779453e-07, "loss": 17.1437, "step": 449230 }, { "epoch": 0.9074932226877346, "grad_norm": 276.9502258300781, "learning_rate": 3.106764417565561e-07, "loss": 10.0963, "step": 449240 }, { "epoch": 0.9075134233204184, "grad_norm": 319.51055908203125, "learning_rate": 3.105553269405115e-07, "loss": 18.2976, "step": 449250 }, { "epoch": 0.9075336239531022, "grad_norm": 328.3490295410156, "learning_rate": 3.1043423498025303e-07, "loss": 21.3379, "step": 449260 }, { "epoch": 0.907553824585786, "grad_norm": 432.4721374511719, "learning_rate": 3.1031316587636805e-07, "loss": 17.5255, "step": 449270 }, { "epoch": 0.9075740252184699, "grad_norm": 0.0, "learning_rate": 3.101921196294477e-07, "loss": 24.2896, "step": 449280 }, { "epoch": 0.9075942258511537, "grad_norm": 125.64295959472656, "learning_rate": 3.1007109624008326e-07, "loss": 28.2211, "step": 449290 }, { "epoch": 0.9076144264838375, "grad_norm": 320.15679931640625, "learning_rate": 3.0995009570886305e-07, "loss": 23.9613, "step": 449300 }, { "epoch": 0.9076346271165213, "grad_norm": 164.57333374023438, "learning_rate": 3.098291180363766e-07, "loss": 16.5072, "step": 449310 }, { "epoch": 0.9076548277492051, "grad_norm": 219.70770263671875, "learning_rate": 3.097081632232141e-07, "loss": 12.2796, "step": 449320 }, { "epoch": 0.907675028381889, "grad_norm": 225.4513397216797, "learning_rate": 3.095872312699666e-07, "loss": 10.1394, "step": 449330 }, { "epoch": 0.9076952290145728, "grad_norm": 317.7618713378906, "learning_rate": 3.094663221772209e-07, "loss": 18.2566, "step": 449340 }, { "epoch": 0.9077154296472566, "grad_norm": 214.27200317382812, "learning_rate": 3.093454359455672e-07, "loss": 18.4359, "step": 449350 }, { "epoch": 0.9077356302799404, "grad_norm": 138.70181274414062, "learning_rate": 3.09224572575596e-07, "loss": 20.3603, "step": 449360 }, { "epoch": 0.9077558309126241, "grad_norm": 485.7292785644531, "learning_rate": 3.091037320678947e-07, "loss": 20.2745, "step": 449370 }, { "epoch": 0.907776031545308, "grad_norm": 255.82957458496094, "learning_rate": 3.089829144230527e-07, "loss": 15.1267, "step": 449380 }, { "epoch": 0.9077962321779918, "grad_norm": 241.6278839111328, "learning_rate": 3.088621196416597e-07, "loss": 6.9126, "step": 449390 }, { "epoch": 0.9078164328106756, "grad_norm": 254.8242645263672, "learning_rate": 3.0874134772430344e-07, "loss": 8.0878, "step": 449400 }, { "epoch": 0.9078366334433594, "grad_norm": 928.5169067382812, "learning_rate": 3.0862059867157237e-07, "loss": 17.6812, "step": 449410 }, { "epoch": 0.9078568340760432, "grad_norm": 315.62933349609375, "learning_rate": 3.08499872484056e-07, "loss": 9.0443, "step": 449420 }, { "epoch": 0.907877034708727, "grad_norm": 249.79283142089844, "learning_rate": 3.0837916916234166e-07, "loss": 32.0876, "step": 449430 }, { "epoch": 0.9078972353414109, "grad_norm": 296.26904296875, "learning_rate": 3.0825848870701893e-07, "loss": 19.0261, "step": 449440 }, { "epoch": 0.9079174359740947, "grad_norm": 4.003561973571777, "learning_rate": 3.08137831118675e-07, "loss": 15.0675, "step": 449450 }, { "epoch": 0.9079376366067785, "grad_norm": 197.02659606933594, "learning_rate": 3.080171963978984e-07, "loss": 21.0176, "step": 449460 }, { "epoch": 0.9079578372394623, "grad_norm": 127.26113891601562, "learning_rate": 3.078965845452769e-07, "loss": 9.8642, "step": 449470 }, { "epoch": 0.9079780378721461, "grad_norm": 345.94903564453125, "learning_rate": 3.077759955613979e-07, "loss": 13.7259, "step": 449480 }, { "epoch": 0.90799823850483, "grad_norm": 188.60939025878906, "learning_rate": 3.0765542944685036e-07, "loss": 23.0865, "step": 449490 }, { "epoch": 0.9080184391375138, "grad_norm": 357.7233581542969, "learning_rate": 3.0753488620222037e-07, "loss": 29.3263, "step": 449500 }, { "epoch": 0.9080386397701976, "grad_norm": 13.091974258422852, "learning_rate": 3.07414365828097e-07, "loss": 14.683, "step": 449510 }, { "epoch": 0.9080588404028814, "grad_norm": 337.0787048339844, "learning_rate": 3.0729386832506647e-07, "loss": 21.3541, "step": 449520 }, { "epoch": 0.9080790410355652, "grad_norm": 631.6271362304688, "learning_rate": 3.07173393693716e-07, "loss": 23.8249, "step": 449530 }, { "epoch": 0.9080992416682491, "grad_norm": 551.5026245117188, "learning_rate": 3.0705294193463406e-07, "loss": 14.6544, "step": 449540 }, { "epoch": 0.9081194423009329, "grad_norm": 392.66705322265625, "learning_rate": 3.069325130484069e-07, "loss": 19.4949, "step": 449550 }, { "epoch": 0.9081396429336167, "grad_norm": 56.55012130737305, "learning_rate": 3.068121070356206e-07, "loss": 19.1171, "step": 449560 }, { "epoch": 0.9081598435663005, "grad_norm": 78.03938293457031, "learning_rate": 3.066917238968631e-07, "loss": 11.2309, "step": 449570 }, { "epoch": 0.9081800441989843, "grad_norm": 86.67057037353516, "learning_rate": 3.065713636327211e-07, "loss": 20.7654, "step": 449580 }, { "epoch": 0.9082002448316682, "grad_norm": 224.8217315673828, "learning_rate": 3.0645102624378144e-07, "loss": 16.2432, "step": 449590 }, { "epoch": 0.908220445464352, "grad_norm": 423.26861572265625, "learning_rate": 3.0633071173062966e-07, "loss": 12.1546, "step": 449600 }, { "epoch": 0.9082406460970358, "grad_norm": 68.96484375, "learning_rate": 3.0621042009385313e-07, "loss": 17.9391, "step": 449610 }, { "epoch": 0.9082608467297196, "grad_norm": 189.0087890625, "learning_rate": 3.0609015133403806e-07, "loss": 19.6735, "step": 449620 }, { "epoch": 0.9082810473624033, "grad_norm": 177.26031494140625, "learning_rate": 3.0596990545176895e-07, "loss": 15.3103, "step": 449630 }, { "epoch": 0.9083012479950872, "grad_norm": 235.55921936035156, "learning_rate": 3.058496824476337e-07, "loss": 9.5063, "step": 449640 }, { "epoch": 0.908321448627771, "grad_norm": 175.67483520507812, "learning_rate": 3.057294823222184e-07, "loss": 20.511, "step": 449650 }, { "epoch": 0.9083416492604548, "grad_norm": 210.60415649414062, "learning_rate": 3.056093050761083e-07, "loss": 14.8836, "step": 449660 }, { "epoch": 0.9083618498931386, "grad_norm": 255.8742218017578, "learning_rate": 3.0548915070988837e-07, "loss": 12.1927, "step": 449670 }, { "epoch": 0.9083820505258224, "grad_norm": 211.16329956054688, "learning_rate": 3.0536901922414543e-07, "loss": 21.3886, "step": 449680 }, { "epoch": 0.9084022511585063, "grad_norm": 361.5028991699219, "learning_rate": 3.052489106194645e-07, "loss": 29.5198, "step": 449690 }, { "epoch": 0.9084224517911901, "grad_norm": 465.8304443359375, "learning_rate": 3.051288248964307e-07, "loss": 22.9383, "step": 449700 }, { "epoch": 0.9084426524238739, "grad_norm": 68.58818817138672, "learning_rate": 3.050087620556302e-07, "loss": 7.1828, "step": 449710 }, { "epoch": 0.9084628530565577, "grad_norm": 0.0, "learning_rate": 3.0488872209764654e-07, "loss": 20.7759, "step": 449720 }, { "epoch": 0.9084830536892415, "grad_norm": 36.281620025634766, "learning_rate": 3.047687050230663e-07, "loss": 12.8341, "step": 449730 }, { "epoch": 0.9085032543219254, "grad_norm": 411.8639831542969, "learning_rate": 3.046487108324736e-07, "loss": 16.3744, "step": 449740 }, { "epoch": 0.9085234549546092, "grad_norm": 381.7320251464844, "learning_rate": 3.0452873952645455e-07, "loss": 12.6217, "step": 449750 }, { "epoch": 0.908543655587293, "grad_norm": 478.99676513671875, "learning_rate": 3.0440879110559263e-07, "loss": 31.7627, "step": 449760 }, { "epoch": 0.9085638562199768, "grad_norm": 40.081295013427734, "learning_rate": 3.0428886557047176e-07, "loss": 18.577, "step": 449770 }, { "epoch": 0.9085840568526606, "grad_norm": 374.45465087890625, "learning_rate": 3.0416896292167873e-07, "loss": 23.2313, "step": 449780 }, { "epoch": 0.9086042574853445, "grad_norm": 175.38536071777344, "learning_rate": 3.0404908315979587e-07, "loss": 20.0117, "step": 449790 }, { "epoch": 0.9086244581180283, "grad_norm": 372.4679870605469, "learning_rate": 3.0392922628540875e-07, "loss": 22.1593, "step": 449800 }, { "epoch": 0.9086446587507121, "grad_norm": 285.4126281738281, "learning_rate": 3.0380939229910087e-07, "loss": 22.7447, "step": 449810 }, { "epoch": 0.9086648593833959, "grad_norm": 367.1722717285156, "learning_rate": 3.036895812014556e-07, "loss": 14.5338, "step": 449820 }, { "epoch": 0.9086850600160797, "grad_norm": 357.52117919921875, "learning_rate": 3.0356979299305867e-07, "loss": 18.3037, "step": 449830 }, { "epoch": 0.9087052606487636, "grad_norm": 200.9291534423828, "learning_rate": 3.0345002767449337e-07, "loss": 14.4045, "step": 449840 }, { "epoch": 0.9087254612814474, "grad_norm": 659.1195068359375, "learning_rate": 3.0333028524634156e-07, "loss": 20.09, "step": 449850 }, { "epoch": 0.9087456619141312, "grad_norm": 9.049476623535156, "learning_rate": 3.0321056570918883e-07, "loss": 14.2662, "step": 449860 }, { "epoch": 0.908765862546815, "grad_norm": 711.8421630859375, "learning_rate": 3.030908690636192e-07, "loss": 25.4986, "step": 449870 }, { "epoch": 0.9087860631794987, "grad_norm": 201.71435546875, "learning_rate": 3.029711953102138e-07, "loss": 23.3851, "step": 449880 }, { "epoch": 0.9088062638121825, "grad_norm": 257.1253662109375, "learning_rate": 3.028515444495572e-07, "loss": 10.9446, "step": 449890 }, { "epoch": 0.9088264644448664, "grad_norm": 275.5248718261719, "learning_rate": 3.027319164822329e-07, "loss": 16.2062, "step": 449900 }, { "epoch": 0.9088466650775502, "grad_norm": 122.3408432006836, "learning_rate": 3.0261231140882363e-07, "loss": 30.1198, "step": 449910 }, { "epoch": 0.908866865710234, "grad_norm": 161.4365692138672, "learning_rate": 3.024927292299118e-07, "loss": 21.3074, "step": 449920 }, { "epoch": 0.9088870663429178, "grad_norm": 362.51983642578125, "learning_rate": 3.0237316994608025e-07, "loss": 15.4257, "step": 449930 }, { "epoch": 0.9089072669756016, "grad_norm": 353.9023132324219, "learning_rate": 3.02253633557914e-07, "loss": 18.5174, "step": 449940 }, { "epoch": 0.9089274676082855, "grad_norm": 203.5133056640625, "learning_rate": 3.0213412006599216e-07, "loss": 12.4842, "step": 449950 }, { "epoch": 0.9089476682409693, "grad_norm": 0.0, "learning_rate": 3.0201462947089865e-07, "loss": 23.1439, "step": 449960 }, { "epoch": 0.9089678688736531, "grad_norm": 518.57861328125, "learning_rate": 3.018951617732169e-07, "loss": 23.5071, "step": 449970 }, { "epoch": 0.9089880695063369, "grad_norm": 513.31787109375, "learning_rate": 3.01775716973528e-07, "loss": 35.2477, "step": 449980 }, { "epoch": 0.9090082701390207, "grad_norm": 264.6451721191406, "learning_rate": 3.0165629507241446e-07, "loss": 14.1119, "step": 449990 }, { "epoch": 0.9090284707717046, "grad_norm": 197.44154357910156, "learning_rate": 3.015368960704584e-07, "loss": 14.4538, "step": 450000 }, { "epoch": 0.9090486714043884, "grad_norm": 371.9814453125, "learning_rate": 3.014175199682418e-07, "loss": 15.6022, "step": 450010 }, { "epoch": 0.9090688720370722, "grad_norm": 427.51153564453125, "learning_rate": 3.012981667663456e-07, "loss": 14.1698, "step": 450020 }, { "epoch": 0.909089072669756, "grad_norm": 537.7033081054688, "learning_rate": 3.011788364653523e-07, "loss": 17.894, "step": 450030 }, { "epoch": 0.9091092733024398, "grad_norm": 263.43603515625, "learning_rate": 3.010595290658441e-07, "loss": 7.9788, "step": 450040 }, { "epoch": 0.9091294739351237, "grad_norm": 67.04159545898438, "learning_rate": 3.0094024456840176e-07, "loss": 12.2793, "step": 450050 }, { "epoch": 0.9091496745678075, "grad_norm": 422.98028564453125, "learning_rate": 3.008209829736064e-07, "loss": 19.8516, "step": 450060 }, { "epoch": 0.9091698752004913, "grad_norm": 129.8915557861328, "learning_rate": 3.007017442820398e-07, "loss": 21.2672, "step": 450070 }, { "epoch": 0.9091900758331751, "grad_norm": 549.937744140625, "learning_rate": 3.005825284942837e-07, "loss": 24.0331, "step": 450080 }, { "epoch": 0.909210276465859, "grad_norm": 8.076929092407227, "learning_rate": 3.004633356109171e-07, "loss": 24.6605, "step": 450090 }, { "epoch": 0.9092304770985428, "grad_norm": 276.8363952636719, "learning_rate": 3.003441656325229e-07, "loss": 13.11, "step": 450100 }, { "epoch": 0.9092506777312266, "grad_norm": 208.9092254638672, "learning_rate": 3.002250185596806e-07, "loss": 15.1485, "step": 450110 }, { "epoch": 0.9092708783639104, "grad_norm": 66.46244812011719, "learning_rate": 3.0010589439297245e-07, "loss": 31.4674, "step": 450120 }, { "epoch": 0.9092910789965942, "grad_norm": 286.9383850097656, "learning_rate": 2.9998679313297807e-07, "loss": 18.5123, "step": 450130 }, { "epoch": 0.9093112796292779, "grad_norm": 250.16802978515625, "learning_rate": 2.99867714780277e-07, "loss": 17.2191, "step": 450140 }, { "epoch": 0.9093314802619618, "grad_norm": 104.57228088378906, "learning_rate": 2.9974865933545207e-07, "loss": 15.2934, "step": 450150 }, { "epoch": 0.9093516808946456, "grad_norm": 123.68235778808594, "learning_rate": 2.996296267990817e-07, "loss": 16.0567, "step": 450160 }, { "epoch": 0.9093718815273294, "grad_norm": 224.68667602539062, "learning_rate": 2.9951061717174543e-07, "loss": 17.8656, "step": 450170 }, { "epoch": 0.9093920821600132, "grad_norm": 275.7343444824219, "learning_rate": 2.9939163045402456e-07, "loss": 7.4893, "step": 450180 }, { "epoch": 0.909412282792697, "grad_norm": 67.62259674072266, "learning_rate": 2.992726666464996e-07, "loss": 17.9176, "step": 450190 }, { "epoch": 0.9094324834253809, "grad_norm": 259.7737121582031, "learning_rate": 2.99153725749749e-07, "loss": 17.5629, "step": 450200 }, { "epoch": 0.9094526840580647, "grad_norm": 278.46795654296875, "learning_rate": 2.990348077643529e-07, "loss": 12.2591, "step": 450210 }, { "epoch": 0.9094728846907485, "grad_norm": 118.11419677734375, "learning_rate": 2.989159126908914e-07, "loss": 16.5497, "step": 450220 }, { "epoch": 0.9094930853234323, "grad_norm": 325.4405212402344, "learning_rate": 2.9879704052994395e-07, "loss": 11.928, "step": 450230 }, { "epoch": 0.9095132859561161, "grad_norm": 278.14739990234375, "learning_rate": 2.986781912820885e-07, "loss": 9.2348, "step": 450240 }, { "epoch": 0.9095334865888, "grad_norm": 533.7655639648438, "learning_rate": 2.9855936494790516e-07, "loss": 21.2803, "step": 450250 }, { "epoch": 0.9095536872214838, "grad_norm": 41.38686752319336, "learning_rate": 2.9844056152797505e-07, "loss": 15.8319, "step": 450260 }, { "epoch": 0.9095738878541676, "grad_norm": 178.795654296875, "learning_rate": 2.983217810228739e-07, "loss": 5.9369, "step": 450270 }, { "epoch": 0.9095940884868514, "grad_norm": 249.68287658691406, "learning_rate": 2.9820302343318177e-07, "loss": 19.1524, "step": 450280 }, { "epoch": 0.9096142891195352, "grad_norm": 249.4966583251953, "learning_rate": 2.9808428875947925e-07, "loss": 13.1276, "step": 450290 }, { "epoch": 0.909634489752219, "grad_norm": 6.903567790985107, "learning_rate": 2.9796557700234317e-07, "loss": 24.3792, "step": 450300 }, { "epoch": 0.9096546903849029, "grad_norm": 182.82485961914062, "learning_rate": 2.9784688816235194e-07, "loss": 19.4116, "step": 450310 }, { "epoch": 0.9096748910175867, "grad_norm": 173.53346252441406, "learning_rate": 2.9772822224008515e-07, "loss": 14.2293, "step": 450320 }, { "epoch": 0.9096950916502705, "grad_norm": 170.77554321289062, "learning_rate": 2.976095792361211e-07, "loss": 46.01, "step": 450330 }, { "epoch": 0.9097152922829543, "grad_norm": 276.0481872558594, "learning_rate": 2.9749095915103665e-07, "loss": 25.1989, "step": 450340 }, { "epoch": 0.9097354929156382, "grad_norm": 309.44805908203125, "learning_rate": 2.9737236198541077e-07, "loss": 28.9171, "step": 450350 }, { "epoch": 0.909755693548322, "grad_norm": 99.80960083007812, "learning_rate": 2.9725378773982295e-07, "loss": 16.5631, "step": 450360 }, { "epoch": 0.9097758941810058, "grad_norm": 277.0606384277344, "learning_rate": 2.971352364148494e-07, "loss": 15.4084, "step": 450370 }, { "epoch": 0.9097960948136896, "grad_norm": 14.412562370300293, "learning_rate": 2.970167080110675e-07, "loss": 10.3078, "step": 450380 }, { "epoch": 0.9098162954463734, "grad_norm": 117.36461639404297, "learning_rate": 2.968982025290568e-07, "loss": 20.2965, "step": 450390 }, { "epoch": 0.9098364960790571, "grad_norm": 42.38365936279297, "learning_rate": 2.967797199693928e-07, "loss": 23.8395, "step": 450400 }, { "epoch": 0.909856696711741, "grad_norm": 529.2208862304688, "learning_rate": 2.9666126033265517e-07, "loss": 18.0251, "step": 450410 }, { "epoch": 0.9098768973444248, "grad_norm": 134.37818908691406, "learning_rate": 2.9654282361941953e-07, "loss": 20.5174, "step": 450420 }, { "epoch": 0.9098970979771086, "grad_norm": 486.904052734375, "learning_rate": 2.9642440983026324e-07, "loss": 20.9586, "step": 450430 }, { "epoch": 0.9099172986097924, "grad_norm": 410.2285461425781, "learning_rate": 2.963060189657646e-07, "loss": 15.4766, "step": 450440 }, { "epoch": 0.9099374992424762, "grad_norm": 139.95008850097656, "learning_rate": 2.961876510264999e-07, "loss": 18.7565, "step": 450450 }, { "epoch": 0.9099576998751601, "grad_norm": 183.5253448486328, "learning_rate": 2.9606930601304595e-07, "loss": 16.7339, "step": 450460 }, { "epoch": 0.9099779005078439, "grad_norm": 28.533550262451172, "learning_rate": 2.9595098392597887e-07, "loss": 13.0484, "step": 450470 }, { "epoch": 0.9099981011405277, "grad_norm": 442.8360290527344, "learning_rate": 2.958326847658771e-07, "loss": 15.7757, "step": 450480 }, { "epoch": 0.9100183017732115, "grad_norm": 149.72361755371094, "learning_rate": 2.9571440853331634e-07, "loss": 20.018, "step": 450490 }, { "epoch": 0.9100385024058953, "grad_norm": 338.7470397949219, "learning_rate": 2.9559615522887275e-07, "loss": 11.6823, "step": 450500 }, { "epoch": 0.9100587030385792, "grad_norm": 156.58456420898438, "learning_rate": 2.954779248531231e-07, "loss": 13.7843, "step": 450510 }, { "epoch": 0.910078903671263, "grad_norm": 204.5081024169922, "learning_rate": 2.953597174066436e-07, "loss": 17.9999, "step": 450520 }, { "epoch": 0.9100991043039468, "grad_norm": 192.9317626953125, "learning_rate": 2.952415328900093e-07, "loss": 13.9295, "step": 450530 }, { "epoch": 0.9101193049366306, "grad_norm": 129.8246612548828, "learning_rate": 2.951233713037971e-07, "loss": 10.9243, "step": 450540 }, { "epoch": 0.9101395055693144, "grad_norm": 299.04266357421875, "learning_rate": 2.9500523264858473e-07, "loss": 15.7801, "step": 450550 }, { "epoch": 0.9101597062019983, "grad_norm": 707.8751831054688, "learning_rate": 2.948871169249451e-07, "loss": 24.1985, "step": 450560 }, { "epoch": 0.9101799068346821, "grad_norm": 310.42083740234375, "learning_rate": 2.9476902413345443e-07, "loss": 16.73, "step": 450570 }, { "epoch": 0.9102001074673659, "grad_norm": 123.99519348144531, "learning_rate": 2.946509542746895e-07, "loss": 10.4897, "step": 450580 }, { "epoch": 0.9102203081000497, "grad_norm": 291.90667724609375, "learning_rate": 2.9453290734922537e-07, "loss": 26.6431, "step": 450590 }, { "epoch": 0.9102405087327335, "grad_norm": 292.6350402832031, "learning_rate": 2.9441488335763656e-07, "loss": 32.7174, "step": 450600 }, { "epoch": 0.9102607093654174, "grad_norm": 7.0587897300720215, "learning_rate": 2.9429688230049934e-07, "loss": 13.6948, "step": 450610 }, { "epoch": 0.9102809099981012, "grad_norm": 393.41009521484375, "learning_rate": 2.941789041783888e-07, "loss": 9.8604, "step": 450620 }, { "epoch": 0.910301110630785, "grad_norm": 11.90262508392334, "learning_rate": 2.940609489918783e-07, "loss": 11.5078, "step": 450630 }, { "epoch": 0.9103213112634688, "grad_norm": 336.51275634765625, "learning_rate": 2.9394301674154413e-07, "loss": 15.6038, "step": 450640 }, { "epoch": 0.9103415118961525, "grad_norm": 206.1016845703125, "learning_rate": 2.938251074279619e-07, "loss": 16.0895, "step": 450650 }, { "epoch": 0.9103617125288364, "grad_norm": 543.8297119140625, "learning_rate": 2.9370722105170504e-07, "loss": 16.5557, "step": 450660 }, { "epoch": 0.9103819131615202, "grad_norm": 30.56165885925293, "learning_rate": 2.935893576133475e-07, "loss": 25.3035, "step": 450670 }, { "epoch": 0.910402113794204, "grad_norm": 67.37085723876953, "learning_rate": 2.9347151711346556e-07, "loss": 12.4123, "step": 450680 }, { "epoch": 0.9104223144268878, "grad_norm": 279.19879150390625, "learning_rate": 2.933536995526326e-07, "loss": 16.1284, "step": 450690 }, { "epoch": 0.9104425150595716, "grad_norm": 54.01791000366211, "learning_rate": 2.9323590493142206e-07, "loss": 16.3258, "step": 450700 }, { "epoch": 0.9104627156922555, "grad_norm": 204.2699737548828, "learning_rate": 2.931181332504096e-07, "loss": 15.5862, "step": 450710 }, { "epoch": 0.9104829163249393, "grad_norm": 162.3144989013672, "learning_rate": 2.930003845101681e-07, "loss": 15.9014, "step": 450720 }, { "epoch": 0.9105031169576231, "grad_norm": 227.80149841308594, "learning_rate": 2.9288265871127206e-07, "loss": 11.8717, "step": 450730 }, { "epoch": 0.9105233175903069, "grad_norm": 142.5568084716797, "learning_rate": 2.927649558542955e-07, "loss": 14.2517, "step": 450740 }, { "epoch": 0.9105435182229907, "grad_norm": 250.9642333984375, "learning_rate": 2.9264727593981024e-07, "loss": 15.1651, "step": 450750 }, { "epoch": 0.9105637188556746, "grad_norm": 179.55856323242188, "learning_rate": 2.9252961896839236e-07, "loss": 15.7246, "step": 450760 }, { "epoch": 0.9105839194883584, "grad_norm": 405.2208251953125, "learning_rate": 2.9241198494061427e-07, "loss": 20.4519, "step": 450770 }, { "epoch": 0.9106041201210422, "grad_norm": 47.20588302612305, "learning_rate": 2.922943738570483e-07, "loss": 13.0551, "step": 450780 }, { "epoch": 0.910624320753726, "grad_norm": 186.8336944580078, "learning_rate": 2.921767857182689e-07, "loss": 12.4455, "step": 450790 }, { "epoch": 0.9106445213864098, "grad_norm": 224.8080291748047, "learning_rate": 2.920592205248496e-07, "loss": 20.2467, "step": 450800 }, { "epoch": 0.9106647220190937, "grad_norm": 288.7509460449219, "learning_rate": 2.919416782773621e-07, "loss": 17.0196, "step": 450810 }, { "epoch": 0.9106849226517775, "grad_norm": 249.38348388671875, "learning_rate": 2.918241589763793e-07, "loss": 18.2203, "step": 450820 }, { "epoch": 0.9107051232844613, "grad_norm": 108.31822204589844, "learning_rate": 2.917066626224757e-07, "loss": 22.2649, "step": 450830 }, { "epoch": 0.9107253239171451, "grad_norm": 455.79339599609375, "learning_rate": 2.9158918921622205e-07, "loss": 28.5233, "step": 450840 }, { "epoch": 0.9107455245498289, "grad_norm": 205.9493865966797, "learning_rate": 2.914717387581917e-07, "loss": 27.7138, "step": 450850 }, { "epoch": 0.9107657251825128, "grad_norm": 259.10467529296875, "learning_rate": 2.913543112489564e-07, "loss": 23.003, "step": 450860 }, { "epoch": 0.9107859258151966, "grad_norm": 451.16400146484375, "learning_rate": 2.912369066890908e-07, "loss": 13.9283, "step": 450870 }, { "epoch": 0.9108061264478804, "grad_norm": 359.8724670410156, "learning_rate": 2.9111952507916375e-07, "loss": 25.6942, "step": 450880 }, { "epoch": 0.9108263270805642, "grad_norm": 158.18458557128906, "learning_rate": 2.910021664197493e-07, "loss": 21.1689, "step": 450890 }, { "epoch": 0.910846527713248, "grad_norm": 341.4246520996094, "learning_rate": 2.908848307114198e-07, "loss": 18.6615, "step": 450900 }, { "epoch": 0.9108667283459317, "grad_norm": 193.39120483398438, "learning_rate": 2.9076751795474647e-07, "loss": 13.6904, "step": 450910 }, { "epoch": 0.9108869289786156, "grad_norm": 191.11099243164062, "learning_rate": 2.9065022815030044e-07, "loss": 9.8379, "step": 450920 }, { "epoch": 0.9109071296112994, "grad_norm": 411.2284240722656, "learning_rate": 2.905329612986546e-07, "loss": 18.3774, "step": 450930 }, { "epoch": 0.9109273302439832, "grad_norm": 164.7104034423828, "learning_rate": 2.9041571740037967e-07, "loss": 15.9514, "step": 450940 }, { "epoch": 0.910947530876667, "grad_norm": 386.8396911621094, "learning_rate": 2.9029849645604735e-07, "loss": 19.9905, "step": 450950 }, { "epoch": 0.9109677315093508, "grad_norm": 232.3302459716797, "learning_rate": 2.9018129846622834e-07, "loss": 11.5451, "step": 450960 }, { "epoch": 0.9109879321420347, "grad_norm": 372.60186767578125, "learning_rate": 2.900641234314955e-07, "loss": 20.9357, "step": 450970 }, { "epoch": 0.9110081327747185, "grad_norm": 126.22235107421875, "learning_rate": 2.899469713524183e-07, "loss": 4.8332, "step": 450980 }, { "epoch": 0.9110283334074023, "grad_norm": 14.61534309387207, "learning_rate": 2.898298422295681e-07, "loss": 20.8277, "step": 450990 }, { "epoch": 0.9110485340400861, "grad_norm": 215.75051879882812, "learning_rate": 2.8971273606351656e-07, "loss": 16.382, "step": 451000 }, { "epoch": 0.9110687346727699, "grad_norm": 182.18350219726562, "learning_rate": 2.895956528548338e-07, "loss": 35.4606, "step": 451010 }, { "epoch": 0.9110889353054538, "grad_norm": 148.57958984375, "learning_rate": 2.8947859260408997e-07, "loss": 15.8084, "step": 451020 }, { "epoch": 0.9111091359381376, "grad_norm": 32.10409164428711, "learning_rate": 2.8936155531185675e-07, "loss": 30.7559, "step": 451030 }, { "epoch": 0.9111293365708214, "grad_norm": 227.17288208007812, "learning_rate": 2.892445409787037e-07, "loss": 31.6167, "step": 451040 }, { "epoch": 0.9111495372035052, "grad_norm": 85.67294311523438, "learning_rate": 2.891275496052015e-07, "loss": 16.6726, "step": 451050 }, { "epoch": 0.911169737836189, "grad_norm": 350.76080322265625, "learning_rate": 2.8901058119192026e-07, "loss": 16.188, "step": 451060 }, { "epoch": 0.9111899384688729, "grad_norm": 245.30093383789062, "learning_rate": 2.8889363573943006e-07, "loss": 12.0636, "step": 451070 }, { "epoch": 0.9112101391015567, "grad_norm": 290.5528259277344, "learning_rate": 2.8877671324829994e-07, "loss": 16.3702, "step": 451080 }, { "epoch": 0.9112303397342405, "grad_norm": 241.7861785888672, "learning_rate": 2.886598137191021e-07, "loss": 22.5154, "step": 451090 }, { "epoch": 0.9112505403669243, "grad_norm": 20.893571853637695, "learning_rate": 2.8854293715240455e-07, "loss": 20.9318, "step": 451100 }, { "epoch": 0.9112707409996081, "grad_norm": 121.3875732421875, "learning_rate": 2.884260835487768e-07, "loss": 13.9963, "step": 451110 }, { "epoch": 0.911290941632292, "grad_norm": 139.95486450195312, "learning_rate": 2.8830925290878997e-07, "loss": 20.3732, "step": 451120 }, { "epoch": 0.9113111422649758, "grad_norm": 137.38673400878906, "learning_rate": 2.8819244523301206e-07, "loss": 13.2261, "step": 451130 }, { "epoch": 0.9113313428976596, "grad_norm": 224.9229736328125, "learning_rate": 2.880756605220114e-07, "loss": 15.8526, "step": 451140 }, { "epoch": 0.9113515435303434, "grad_norm": 468.8535461425781, "learning_rate": 2.879588987763593e-07, "loss": 27.8865, "step": 451150 }, { "epoch": 0.9113717441630271, "grad_norm": 195.5144500732422, "learning_rate": 2.878421599966252e-07, "loss": 19.7361, "step": 451160 }, { "epoch": 0.911391944795711, "grad_norm": 250.16378784179688, "learning_rate": 2.877254441833754e-07, "loss": 34.297, "step": 451170 }, { "epoch": 0.9114121454283948, "grad_norm": 397.02337646484375, "learning_rate": 2.8760875133718003e-07, "loss": 43.6365, "step": 451180 }, { "epoch": 0.9114323460610786, "grad_norm": 735.8729858398438, "learning_rate": 2.8749208145860907e-07, "loss": 13.713, "step": 451190 }, { "epoch": 0.9114525466937624, "grad_norm": 160.88473510742188, "learning_rate": 2.8737543454822993e-07, "loss": 16.7952, "step": 451200 }, { "epoch": 0.9114727473264462, "grad_norm": 216.04766845703125, "learning_rate": 2.87258810606611e-07, "loss": 12.7574, "step": 451210 }, { "epoch": 0.91149294795913, "grad_norm": 256.7967834472656, "learning_rate": 2.8714220963432125e-07, "loss": 15.7725, "step": 451220 }, { "epoch": 0.9115131485918139, "grad_norm": 0.3572355806827545, "learning_rate": 2.870256316319292e-07, "loss": 24.3407, "step": 451230 }, { "epoch": 0.9115333492244977, "grad_norm": 305.8081359863281, "learning_rate": 2.8690907660000156e-07, "loss": 13.9204, "step": 451240 }, { "epoch": 0.9115535498571815, "grad_norm": 78.4308853149414, "learning_rate": 2.867925445391079e-07, "loss": 11.3374, "step": 451250 }, { "epoch": 0.9115737504898653, "grad_norm": 505.9815368652344, "learning_rate": 2.8667603544981604e-07, "loss": 18.6716, "step": 451260 }, { "epoch": 0.9115939511225492, "grad_norm": 448.60125732421875, "learning_rate": 2.8655954933269395e-07, "loss": 22.5912, "step": 451270 }, { "epoch": 0.911614151755233, "grad_norm": 346.9026794433594, "learning_rate": 2.8644308618830775e-07, "loss": 24.7697, "step": 451280 }, { "epoch": 0.9116343523879168, "grad_norm": 177.3780517578125, "learning_rate": 2.86326646017227e-07, "loss": 12.4185, "step": 451290 }, { "epoch": 0.9116545530206006, "grad_norm": 56.058929443359375, "learning_rate": 2.862102288200186e-07, "loss": 8.4533, "step": 451300 }, { "epoch": 0.9116747536532844, "grad_norm": 61.3682746887207, "learning_rate": 2.8609383459724915e-07, "loss": 10.9397, "step": 451310 }, { "epoch": 0.9116949542859683, "grad_norm": 295.5820617675781, "learning_rate": 2.8597746334948773e-07, "loss": 13.9304, "step": 451320 }, { "epoch": 0.9117151549186521, "grad_norm": 363.1051025390625, "learning_rate": 2.8586111507729887e-07, "loss": 17.3195, "step": 451330 }, { "epoch": 0.9117353555513359, "grad_norm": 201.4821319580078, "learning_rate": 2.8574478978125266e-07, "loss": 29.5149, "step": 451340 }, { "epoch": 0.9117555561840197, "grad_norm": 168.32260131835938, "learning_rate": 2.856284874619142e-07, "loss": 14.6948, "step": 451350 }, { "epoch": 0.9117757568167035, "grad_norm": 278.9552917480469, "learning_rate": 2.855122081198503e-07, "loss": 14.1625, "step": 451360 }, { "epoch": 0.9117959574493874, "grad_norm": 246.78610229492188, "learning_rate": 2.8539595175562817e-07, "loss": 14.2023, "step": 451370 }, { "epoch": 0.9118161580820712, "grad_norm": 283.81964111328125, "learning_rate": 2.852797183698147e-07, "loss": 25.7544, "step": 451380 }, { "epoch": 0.911836358714755, "grad_norm": 120.52972412109375, "learning_rate": 2.851635079629755e-07, "loss": 17.5716, "step": 451390 }, { "epoch": 0.9118565593474388, "grad_norm": 9.556193351745605, "learning_rate": 2.850473205356774e-07, "loss": 19.6392, "step": 451400 }, { "epoch": 0.9118767599801226, "grad_norm": 252.46270751953125, "learning_rate": 2.8493115608848764e-07, "loss": 23.096, "step": 451410 }, { "epoch": 0.9118969606128063, "grad_norm": 176.66888427734375, "learning_rate": 2.8481501462197137e-07, "loss": 12.809, "step": 451420 }, { "epoch": 0.9119171612454902, "grad_norm": 388.5914001464844, "learning_rate": 2.846988961366942e-07, "loss": 21.0153, "step": 451430 }, { "epoch": 0.911937361878174, "grad_norm": 210.03109741210938, "learning_rate": 2.8458280063322353e-07, "loss": 22.5919, "step": 451440 }, { "epoch": 0.9119575625108578, "grad_norm": 297.4843444824219, "learning_rate": 2.844667281121244e-07, "loss": 16.2806, "step": 451450 }, { "epoch": 0.9119777631435416, "grad_norm": 236.39234924316406, "learning_rate": 2.843506785739614e-07, "loss": 10.5966, "step": 451460 }, { "epoch": 0.9119979637762254, "grad_norm": 35.00349044799805, "learning_rate": 2.842346520193018e-07, "loss": 17.5844, "step": 451470 }, { "epoch": 0.9120181644089093, "grad_norm": 179.8292999267578, "learning_rate": 2.8411864844871184e-07, "loss": 15.5369, "step": 451480 }, { "epoch": 0.9120383650415931, "grad_norm": 186.2885284423828, "learning_rate": 2.8400266786275387e-07, "loss": 32.9115, "step": 451490 }, { "epoch": 0.9120585656742769, "grad_norm": 259.9469299316406, "learning_rate": 2.838867102619952e-07, "loss": 16.1085, "step": 451500 }, { "epoch": 0.9120787663069607, "grad_norm": 193.43089294433594, "learning_rate": 2.8377077564700094e-07, "loss": 10.3301, "step": 451510 }, { "epoch": 0.9120989669396445, "grad_norm": 311.5097961425781, "learning_rate": 2.8365486401833677e-07, "loss": 20.2049, "step": 451520 }, { "epoch": 0.9121191675723284, "grad_norm": 194.8396759033203, "learning_rate": 2.835389753765655e-07, "loss": 13.4595, "step": 451530 }, { "epoch": 0.9121393682050122, "grad_norm": 443.40216064453125, "learning_rate": 2.834231097222534e-07, "loss": 23.4322, "step": 451540 }, { "epoch": 0.912159568837696, "grad_norm": 445.602783203125, "learning_rate": 2.833072670559661e-07, "loss": 21.8692, "step": 451550 }, { "epoch": 0.9121797694703798, "grad_norm": 496.0737609863281, "learning_rate": 2.83191447378266e-07, "loss": 18.3607, "step": 451560 }, { "epoch": 0.9121999701030636, "grad_norm": 42.452980041503906, "learning_rate": 2.8307565068971867e-07, "loss": 17.8633, "step": 451570 }, { "epoch": 0.9122201707357475, "grad_norm": 284.5263671875, "learning_rate": 2.829598769908892e-07, "loss": 26.0513, "step": 451580 }, { "epoch": 0.9122403713684313, "grad_norm": 172.8795928955078, "learning_rate": 2.8284412628234117e-07, "loss": 11.8185, "step": 451590 }, { "epoch": 0.9122605720011151, "grad_norm": 14.350419044494629, "learning_rate": 2.8272839856463783e-07, "loss": 13.9188, "step": 451600 }, { "epoch": 0.9122807726337989, "grad_norm": 398.5994873046875, "learning_rate": 2.8261269383834497e-07, "loss": 19.949, "step": 451610 }, { "epoch": 0.9123009732664827, "grad_norm": 271.53192138671875, "learning_rate": 2.8249701210402603e-07, "loss": 20.3639, "step": 451620 }, { "epoch": 0.9123211738991666, "grad_norm": 352.9737854003906, "learning_rate": 2.823813533622438e-07, "loss": 15.1458, "step": 451630 }, { "epoch": 0.9123413745318504, "grad_norm": 438.1044616699219, "learning_rate": 2.822657176135629e-07, "loss": 17.7847, "step": 451640 }, { "epoch": 0.9123615751645342, "grad_norm": 383.8388366699219, "learning_rate": 2.821501048585462e-07, "loss": 62.2985, "step": 451650 }, { "epoch": 0.912381775797218, "grad_norm": 194.94883728027344, "learning_rate": 2.8203451509775825e-07, "loss": 12.9917, "step": 451660 }, { "epoch": 0.9124019764299018, "grad_norm": 2.216585874557495, "learning_rate": 2.819189483317625e-07, "loss": 16.7181, "step": 451670 }, { "epoch": 0.9124221770625855, "grad_norm": 514.4530029296875, "learning_rate": 2.818034045611201e-07, "loss": 20.4691, "step": 451680 }, { "epoch": 0.9124423776952694, "grad_norm": 280.14794921875, "learning_rate": 2.816878837863968e-07, "loss": 11.5293, "step": 451690 }, { "epoch": 0.9124625783279532, "grad_norm": 436.2810974121094, "learning_rate": 2.815723860081537e-07, "loss": 17.2589, "step": 451700 }, { "epoch": 0.912482778960637, "grad_norm": 122.03319549560547, "learning_rate": 2.8145691122695496e-07, "loss": 20.786, "step": 451710 }, { "epoch": 0.9125029795933208, "grad_norm": 261.3052673339844, "learning_rate": 2.8134145944336225e-07, "loss": 15.168, "step": 451720 }, { "epoch": 0.9125231802260046, "grad_norm": 209.5710906982422, "learning_rate": 2.812260306579401e-07, "loss": 20.8096, "step": 451730 }, { "epoch": 0.9125433808586885, "grad_norm": 430.4251403808594, "learning_rate": 2.811106248712497e-07, "loss": 22.5538, "step": 451740 }, { "epoch": 0.9125635814913723, "grad_norm": 333.8047180175781, "learning_rate": 2.8099524208385297e-07, "loss": 30.343, "step": 451750 }, { "epoch": 0.9125837821240561, "grad_norm": 219.22140502929688, "learning_rate": 2.8087988229631325e-07, "loss": 10.7576, "step": 451760 }, { "epoch": 0.9126039827567399, "grad_norm": 256.58636474609375, "learning_rate": 2.8076454550919397e-07, "loss": 20.6302, "step": 451770 }, { "epoch": 0.9126241833894237, "grad_norm": 582.3648071289062, "learning_rate": 2.8064923172305467e-07, "loss": 27.125, "step": 451780 }, { "epoch": 0.9126443840221076, "grad_norm": 168.98902893066406, "learning_rate": 2.8053394093845833e-07, "loss": 15.7785, "step": 451790 }, { "epoch": 0.9126645846547914, "grad_norm": 6.28079891204834, "learning_rate": 2.804186731559677e-07, "loss": 12.2514, "step": 451800 }, { "epoch": 0.9126847852874752, "grad_norm": 354.2558288574219, "learning_rate": 2.8030342837614466e-07, "loss": 16.6112, "step": 451810 }, { "epoch": 0.912704985920159, "grad_norm": 79.20686340332031, "learning_rate": 2.8018820659954927e-07, "loss": 14.0123, "step": 451820 }, { "epoch": 0.9127251865528428, "grad_norm": 65.2433853149414, "learning_rate": 2.800730078267444e-07, "loss": 12.3392, "step": 451830 }, { "epoch": 0.9127453871855267, "grad_norm": 9.308917045593262, "learning_rate": 2.7995783205829185e-07, "loss": 15.2853, "step": 451840 }, { "epoch": 0.9127655878182105, "grad_norm": 254.87281799316406, "learning_rate": 2.798426792947517e-07, "loss": 17.1233, "step": 451850 }, { "epoch": 0.9127857884508943, "grad_norm": 483.3761901855469, "learning_rate": 2.7972754953668524e-07, "loss": 17.6073, "step": 451860 }, { "epoch": 0.9128059890835781, "grad_norm": 222.07420349121094, "learning_rate": 2.796124427846553e-07, "loss": 9.4997, "step": 451870 }, { "epoch": 0.912826189716262, "grad_norm": 184.33493041992188, "learning_rate": 2.7949735903922195e-07, "loss": 17.244, "step": 451880 }, { "epoch": 0.9128463903489458, "grad_norm": 281.9110107421875, "learning_rate": 2.7938229830094475e-07, "loss": 13.8942, "step": 451890 }, { "epoch": 0.9128665909816296, "grad_norm": 586.2553100585938, "learning_rate": 2.792672605703867e-07, "loss": 24.6446, "step": 451900 }, { "epoch": 0.9128867916143134, "grad_norm": 375.984375, "learning_rate": 2.791522458481077e-07, "loss": 22.5588, "step": 451910 }, { "epoch": 0.9129069922469972, "grad_norm": 335.722412109375, "learning_rate": 2.79037254134667e-07, "loss": 25.6251, "step": 451920 }, { "epoch": 0.9129271928796809, "grad_norm": 231.74801635742188, "learning_rate": 2.7892228543062725e-07, "loss": 9.2204, "step": 451930 }, { "epoch": 0.9129473935123648, "grad_norm": 499.72247314453125, "learning_rate": 2.788073397365465e-07, "loss": 22.4502, "step": 451940 }, { "epoch": 0.9129675941450486, "grad_norm": 169.93235778808594, "learning_rate": 2.78692417052987e-07, "loss": 16.9182, "step": 451950 }, { "epoch": 0.9129877947777324, "grad_norm": 309.1972961425781, "learning_rate": 2.785775173805083e-07, "loss": 26.383, "step": 451960 }, { "epoch": 0.9130079954104162, "grad_norm": 432.0539855957031, "learning_rate": 2.784626407196689e-07, "loss": 18.6492, "step": 451970 }, { "epoch": 0.9130281960431, "grad_norm": 234.69717407226562, "learning_rate": 2.7834778707103104e-07, "loss": 13.5171, "step": 451980 }, { "epoch": 0.9130483966757839, "grad_norm": 9.804825782775879, "learning_rate": 2.782329564351532e-07, "loss": 28.9248, "step": 451990 }, { "epoch": 0.9130685973084677, "grad_norm": 436.0420227050781, "learning_rate": 2.7811814881259503e-07, "loss": 31.211, "step": 452000 }, { "epoch": 0.9130887979411515, "grad_norm": 506.11895751953125, "learning_rate": 2.7800336420391593e-07, "loss": 18.9072, "step": 452010 }, { "epoch": 0.9131089985738353, "grad_norm": 199.8274688720703, "learning_rate": 2.7788860260967665e-07, "loss": 21.4032, "step": 452020 }, { "epoch": 0.9131291992065191, "grad_norm": 369.046875, "learning_rate": 2.77773864030435e-07, "loss": 14.663, "step": 452030 }, { "epoch": 0.913149399839203, "grad_norm": 397.6197204589844, "learning_rate": 2.7765914846675067e-07, "loss": 26.5788, "step": 452040 }, { "epoch": 0.9131696004718868, "grad_norm": 214.2436065673828, "learning_rate": 2.775444559191837e-07, "loss": 16.6265, "step": 452050 }, { "epoch": 0.9131898011045706, "grad_norm": 298.3954162597656, "learning_rate": 2.774297863882919e-07, "loss": 20.599, "step": 452060 }, { "epoch": 0.9132100017372544, "grad_norm": 171.51646423339844, "learning_rate": 2.773151398746338e-07, "loss": 6.9806, "step": 452070 }, { "epoch": 0.9132302023699382, "grad_norm": 87.55284881591797, "learning_rate": 2.772005163787689e-07, "loss": 5.9049, "step": 452080 }, { "epoch": 0.9132504030026221, "grad_norm": 202.61053466796875, "learning_rate": 2.770859159012579e-07, "loss": 10.9852, "step": 452090 }, { "epoch": 0.9132706036353059, "grad_norm": 319.46868896484375, "learning_rate": 2.7697133844265535e-07, "loss": 17.7638, "step": 452100 }, { "epoch": 0.9132908042679897, "grad_norm": 36.19598388671875, "learning_rate": 2.768567840035219e-07, "loss": 28.3298, "step": 452110 }, { "epoch": 0.9133110049006735, "grad_norm": 324.45440673828125, "learning_rate": 2.76742252584416e-07, "loss": 10.1354, "step": 452120 }, { "epoch": 0.9133312055333573, "grad_norm": 19.367141723632812, "learning_rate": 2.7662774418589555e-07, "loss": 7.299, "step": 452130 }, { "epoch": 0.9133514061660412, "grad_norm": 217.58401489257812, "learning_rate": 2.765132588085184e-07, "loss": 21.0982, "step": 452140 }, { "epoch": 0.913371606798725, "grad_norm": 206.20396423339844, "learning_rate": 2.763987964528425e-07, "loss": 14.6621, "step": 452150 }, { "epoch": 0.9133918074314088, "grad_norm": 426.7931823730469, "learning_rate": 2.7628435711942737e-07, "loss": 20.1852, "step": 452160 }, { "epoch": 0.9134120080640926, "grad_norm": 2748.8115234375, "learning_rate": 2.7616994080882754e-07, "loss": 23.0796, "step": 452170 }, { "epoch": 0.9134322086967764, "grad_norm": 295.9453430175781, "learning_rate": 2.7605554752160256e-07, "loss": 20.84, "step": 452180 }, { "epoch": 0.9134524093294601, "grad_norm": 50.751956939697266, "learning_rate": 2.7594117725831096e-07, "loss": 7.0311, "step": 452190 }, { "epoch": 0.913472609962144, "grad_norm": 290.8677673339844, "learning_rate": 2.758268300195094e-07, "loss": 12.3131, "step": 452200 }, { "epoch": 0.9134928105948278, "grad_norm": 352.98046875, "learning_rate": 2.757125058057536e-07, "loss": 11.1176, "step": 452210 }, { "epoch": 0.9135130112275116, "grad_norm": 257.6129455566406, "learning_rate": 2.755982046176031e-07, "loss": 19.519, "step": 452220 }, { "epoch": 0.9135332118601954, "grad_norm": 180.9540252685547, "learning_rate": 2.754839264556136e-07, "loss": 6.2478, "step": 452230 }, { "epoch": 0.9135534124928792, "grad_norm": 153.62515258789062, "learning_rate": 2.7536967132034186e-07, "loss": 12.1595, "step": 452240 }, { "epoch": 0.9135736131255631, "grad_norm": 466.9817199707031, "learning_rate": 2.752554392123463e-07, "loss": 22.4284, "step": 452250 }, { "epoch": 0.9135938137582469, "grad_norm": 384.5799560546875, "learning_rate": 2.7514123013218153e-07, "loss": 21.8164, "step": 452260 }, { "epoch": 0.9136140143909307, "grad_norm": 119.99606323242188, "learning_rate": 2.750270440804065e-07, "loss": 11.328, "step": 452270 }, { "epoch": 0.9136342150236145, "grad_norm": 128.53558349609375, "learning_rate": 2.749128810575763e-07, "loss": 14.0993, "step": 452280 }, { "epoch": 0.9136544156562983, "grad_norm": 305.25958251953125, "learning_rate": 2.747987410642472e-07, "loss": 21.608, "step": 452290 }, { "epoch": 0.9136746162889822, "grad_norm": 198.42884826660156, "learning_rate": 2.746846241009765e-07, "loss": 11.7095, "step": 452300 }, { "epoch": 0.913694816921666, "grad_norm": 420.90496826171875, "learning_rate": 2.745705301683188e-07, "loss": 15.0137, "step": 452310 }, { "epoch": 0.9137150175543498, "grad_norm": 220.3802947998047, "learning_rate": 2.7445645926683253e-07, "loss": 21.6016, "step": 452320 }, { "epoch": 0.9137352181870336, "grad_norm": 223.92161560058594, "learning_rate": 2.7434241139707106e-07, "loss": 9.5915, "step": 452330 }, { "epoch": 0.9137554188197174, "grad_norm": 140.13250732421875, "learning_rate": 2.742283865595924e-07, "loss": 12.0885, "step": 452340 }, { "epoch": 0.9137756194524013, "grad_norm": 329.86859130859375, "learning_rate": 2.7411438475495155e-07, "loss": 16.692, "step": 452350 }, { "epoch": 0.9137958200850851, "grad_norm": 502.484130859375, "learning_rate": 2.740004059837031e-07, "loss": 19.292, "step": 452360 }, { "epoch": 0.9138160207177689, "grad_norm": 303.68548583984375, "learning_rate": 2.738864502464045e-07, "loss": 8.5126, "step": 452370 }, { "epoch": 0.9138362213504527, "grad_norm": 286.79205322265625, "learning_rate": 2.737725175436101e-07, "loss": 12.9968, "step": 452380 }, { "epoch": 0.9138564219831365, "grad_norm": 390.5729675292969, "learning_rate": 2.7365860787587405e-07, "loss": 11.5651, "step": 452390 }, { "epoch": 0.9138766226158204, "grad_norm": 29.03893280029297, "learning_rate": 2.735447212437531e-07, "loss": 26.2479, "step": 452400 }, { "epoch": 0.9138968232485042, "grad_norm": 0.7239755392074585, "learning_rate": 2.734308576478023e-07, "loss": 12.455, "step": 452410 }, { "epoch": 0.913917023881188, "grad_norm": 493.9660339355469, "learning_rate": 2.733170170885768e-07, "loss": 22.2287, "step": 452420 }, { "epoch": 0.9139372245138718, "grad_norm": 378.9903564453125, "learning_rate": 2.7320319956662957e-07, "loss": 18.7744, "step": 452430 }, { "epoch": 0.9139574251465555, "grad_norm": 359.2613830566406, "learning_rate": 2.730894050825178e-07, "loss": 7.5829, "step": 452440 }, { "epoch": 0.9139776257792394, "grad_norm": 48.83420181274414, "learning_rate": 2.72975633636795e-07, "loss": 10.8604, "step": 452450 }, { "epoch": 0.9139978264119232, "grad_norm": 263.2594299316406, "learning_rate": 2.728618852300147e-07, "loss": 10.6731, "step": 452460 }, { "epoch": 0.914018027044607, "grad_norm": 360.8517150878906, "learning_rate": 2.727481598627324e-07, "loss": 13.3083, "step": 452470 }, { "epoch": 0.9140382276772908, "grad_norm": 280.552978515625, "learning_rate": 2.7263445753550275e-07, "loss": 6.6467, "step": 452480 }, { "epoch": 0.9140584283099746, "grad_norm": 247.21461486816406, "learning_rate": 2.725207782488792e-07, "loss": 17.2739, "step": 452490 }, { "epoch": 0.9140786289426585, "grad_norm": 305.6524963378906, "learning_rate": 2.724071220034158e-07, "loss": 20.9711, "step": 452500 }, { "epoch": 0.9140988295753423, "grad_norm": 564.3761596679688, "learning_rate": 2.72293488799667e-07, "loss": 23.1078, "step": 452510 }, { "epoch": 0.9141190302080261, "grad_norm": 189.10385131835938, "learning_rate": 2.7217987863818684e-07, "loss": 19.2934, "step": 452520 }, { "epoch": 0.9141392308407099, "grad_norm": 134.81488037109375, "learning_rate": 2.7206629151952715e-07, "loss": 19.1449, "step": 452530 }, { "epoch": 0.9141594314733937, "grad_norm": 143.1569061279297, "learning_rate": 2.7195272744424405e-07, "loss": 16.5561, "step": 452540 }, { "epoch": 0.9141796321060776, "grad_norm": 330.8506164550781, "learning_rate": 2.7183918641288943e-07, "loss": 10.3609, "step": 452550 }, { "epoch": 0.9141998327387614, "grad_norm": 350.2809143066406, "learning_rate": 2.717256684260172e-07, "loss": 22.1234, "step": 452560 }, { "epoch": 0.9142200333714452, "grad_norm": 84.64785766601562, "learning_rate": 2.716121734841814e-07, "loss": 23.5274, "step": 452570 }, { "epoch": 0.914240234004129, "grad_norm": 136.28900146484375, "learning_rate": 2.714987015879328e-07, "loss": 19.7956, "step": 452580 }, { "epoch": 0.9142604346368128, "grad_norm": 428.3825988769531, "learning_rate": 2.7138525273782746e-07, "loss": 20.0656, "step": 452590 }, { "epoch": 0.9142806352694967, "grad_norm": 147.80023193359375, "learning_rate": 2.712718269344161e-07, "loss": 12.2215, "step": 452600 }, { "epoch": 0.9143008359021805, "grad_norm": 77.33128356933594, "learning_rate": 2.711584241782528e-07, "loss": 13.2031, "step": 452610 }, { "epoch": 0.9143210365348643, "grad_norm": 159.60865783691406, "learning_rate": 2.7104504446988867e-07, "loss": 20.644, "step": 452620 }, { "epoch": 0.9143412371675481, "grad_norm": 228.53274536132812, "learning_rate": 2.709316878098789e-07, "loss": 14.8703, "step": 452630 }, { "epoch": 0.9143614378002319, "grad_norm": 386.92388916015625, "learning_rate": 2.708183541987741e-07, "loss": 20.069, "step": 452640 }, { "epoch": 0.9143816384329158, "grad_norm": 188.64630126953125, "learning_rate": 2.707050436371267e-07, "loss": 17.4205, "step": 452650 }, { "epoch": 0.9144018390655996, "grad_norm": 142.79185485839844, "learning_rate": 2.7059175612548947e-07, "loss": 18.5848, "step": 452660 }, { "epoch": 0.9144220396982834, "grad_norm": 284.08392333984375, "learning_rate": 2.7047849166441487e-07, "loss": 25.1389, "step": 452670 }, { "epoch": 0.9144422403309672, "grad_norm": 389.1358337402344, "learning_rate": 2.703652502544535e-07, "loss": 24.7348, "step": 452680 }, { "epoch": 0.914462440963651, "grad_norm": 382.4786376953125, "learning_rate": 2.702520318961588e-07, "loss": 17.6049, "step": 452690 }, { "epoch": 0.9144826415963347, "grad_norm": 223.2645263671875, "learning_rate": 2.701388365900831e-07, "loss": 15.3738, "step": 452700 }, { "epoch": 0.9145028422290186, "grad_norm": 236.0268096923828, "learning_rate": 2.7002566433677547e-07, "loss": 16.6175, "step": 452710 }, { "epoch": 0.9145230428617024, "grad_norm": 98.96418762207031, "learning_rate": 2.699125151367893e-07, "loss": 17.9103, "step": 452720 }, { "epoch": 0.9145432434943862, "grad_norm": 177.8522491455078, "learning_rate": 2.697993889906764e-07, "loss": 14.3337, "step": 452730 }, { "epoch": 0.91456344412707, "grad_norm": 269.5322570800781, "learning_rate": 2.6968628589898735e-07, "loss": 7.494, "step": 452740 }, { "epoch": 0.9145836447597538, "grad_norm": 123.6279067993164, "learning_rate": 2.6957320586227354e-07, "loss": 24.4679, "step": 452750 }, { "epoch": 0.9146038453924377, "grad_norm": 5.928144454956055, "learning_rate": 2.694601488810855e-07, "loss": 20.4435, "step": 452760 }, { "epoch": 0.9146240460251215, "grad_norm": 353.43511962890625, "learning_rate": 2.6934711495597676e-07, "loss": 29.1884, "step": 452770 }, { "epoch": 0.9146442466578053, "grad_norm": 492.0124206542969, "learning_rate": 2.6923410408749516e-07, "loss": 16.7438, "step": 452780 }, { "epoch": 0.9146644472904891, "grad_norm": 220.84674072265625, "learning_rate": 2.6912111627619255e-07, "loss": 16.7628, "step": 452790 }, { "epoch": 0.9146846479231729, "grad_norm": 145.1833038330078, "learning_rate": 2.690081515226206e-07, "loss": 21.2504, "step": 452800 }, { "epoch": 0.9147048485558568, "grad_norm": 264.7141418457031, "learning_rate": 2.6889520982732897e-07, "loss": 11.6204, "step": 452810 }, { "epoch": 0.9147250491885406, "grad_norm": 103.72254180908203, "learning_rate": 2.6878229119086776e-07, "loss": 8.374, "step": 452820 }, { "epoch": 0.9147452498212244, "grad_norm": 144.16172790527344, "learning_rate": 2.6866939561378867e-07, "loss": 16.1434, "step": 452830 }, { "epoch": 0.9147654504539082, "grad_norm": 234.51316833496094, "learning_rate": 2.685565230966408e-07, "loss": 15.9326, "step": 452840 }, { "epoch": 0.914785651086592, "grad_norm": 421.41058349609375, "learning_rate": 2.684436736399737e-07, "loss": 15.4687, "step": 452850 }, { "epoch": 0.9148058517192759, "grad_norm": 87.86022186279297, "learning_rate": 2.6833084724433965e-07, "loss": 25.2908, "step": 452860 }, { "epoch": 0.9148260523519597, "grad_norm": 320.94854736328125, "learning_rate": 2.6821804391028603e-07, "loss": 19.5364, "step": 452870 }, { "epoch": 0.9148462529846435, "grad_norm": 63.65008544921875, "learning_rate": 2.681052636383641e-07, "loss": 13.9083, "step": 452880 }, { "epoch": 0.9148664536173273, "grad_norm": 685.5750732421875, "learning_rate": 2.679925064291239e-07, "loss": 24.0978, "step": 452890 }, { "epoch": 0.9148866542500111, "grad_norm": 83.85057830810547, "learning_rate": 2.6787977228311336e-07, "loss": 12.3388, "step": 452900 }, { "epoch": 0.914906854882695, "grad_norm": 245.42599487304688, "learning_rate": 2.677670612008837e-07, "loss": 25.9351, "step": 452910 }, { "epoch": 0.9149270555153788, "grad_norm": 267.9879455566406, "learning_rate": 2.676543731829823e-07, "loss": 22.9067, "step": 452920 }, { "epoch": 0.9149472561480626, "grad_norm": 465.3280029296875, "learning_rate": 2.6754170822996026e-07, "loss": 12.1629, "step": 452930 }, { "epoch": 0.9149674567807464, "grad_norm": 811.5821533203125, "learning_rate": 2.6742906634236564e-07, "loss": 16.7378, "step": 452940 }, { "epoch": 0.9149876574134301, "grad_norm": 279.7994689941406, "learning_rate": 2.6731644752074846e-07, "loss": 16.232, "step": 452950 }, { "epoch": 0.915007858046114, "grad_norm": 164.19772338867188, "learning_rate": 2.6720385176565664e-07, "loss": 10.9066, "step": 452960 }, { "epoch": 0.9150280586787978, "grad_norm": 604.0054931640625, "learning_rate": 2.6709127907763864e-07, "loss": 29.5591, "step": 452970 }, { "epoch": 0.9150482593114816, "grad_norm": 18.724149703979492, "learning_rate": 2.6697872945724455e-07, "loss": 14.1053, "step": 452980 }, { "epoch": 0.9150684599441654, "grad_norm": 279.6636047363281, "learning_rate": 2.668662029050217e-07, "loss": 8.1674, "step": 452990 }, { "epoch": 0.9150886605768492, "grad_norm": 237.82156372070312, "learning_rate": 2.6675369942151864e-07, "loss": 22.8053, "step": 453000 }, { "epoch": 0.915108861209533, "grad_norm": 422.0186767578125, "learning_rate": 2.666412190072837e-07, "loss": 28.2567, "step": 453010 }, { "epoch": 0.9151290618422169, "grad_norm": 394.2140197753906, "learning_rate": 2.665287616628659e-07, "loss": 10.5564, "step": 453020 }, { "epoch": 0.9151492624749007, "grad_norm": 183.66497802734375, "learning_rate": 2.6641632738881315e-07, "loss": 16.4966, "step": 453030 }, { "epoch": 0.9151694631075845, "grad_norm": 135.10885620117188, "learning_rate": 2.663039161856723e-07, "loss": 15.2203, "step": 453040 }, { "epoch": 0.9151896637402683, "grad_norm": 163.83843994140625, "learning_rate": 2.6619152805399286e-07, "loss": 19.0739, "step": 453050 }, { "epoch": 0.9152098643729522, "grad_norm": 206.0903778076172, "learning_rate": 2.660791629943216e-07, "loss": 27.0069, "step": 453060 }, { "epoch": 0.915230065005636, "grad_norm": 32.92919158935547, "learning_rate": 2.659668210072058e-07, "loss": 10.3376, "step": 453070 }, { "epoch": 0.9152502656383198, "grad_norm": 344.9319152832031, "learning_rate": 2.658545020931935e-07, "loss": 29.5457, "step": 453080 }, { "epoch": 0.9152704662710036, "grad_norm": 248.47747802734375, "learning_rate": 2.657422062528325e-07, "loss": 15.6671, "step": 453090 }, { "epoch": 0.9152906669036874, "grad_norm": 392.9204406738281, "learning_rate": 2.656299334866702e-07, "loss": 20.1031, "step": 453100 }, { "epoch": 0.9153108675363713, "grad_norm": 236.53485107421875, "learning_rate": 2.655176837952528e-07, "loss": 24.4267, "step": 453110 }, { "epoch": 0.9153310681690551, "grad_norm": 264.0382080078125, "learning_rate": 2.654054571791287e-07, "loss": 10.8446, "step": 453120 }, { "epoch": 0.9153512688017389, "grad_norm": 356.65625, "learning_rate": 2.6529325363884364e-07, "loss": 15.9139, "step": 453130 }, { "epoch": 0.9153714694344227, "grad_norm": 99.18729400634766, "learning_rate": 2.651810731749449e-07, "loss": 12.5715, "step": 453140 }, { "epoch": 0.9153916700671065, "grad_norm": 252.50363159179688, "learning_rate": 2.650689157879799e-07, "loss": 7.7056, "step": 453150 }, { "epoch": 0.9154118706997904, "grad_norm": 295.3934020996094, "learning_rate": 2.649567814784937e-07, "loss": 37.6581, "step": 453160 }, { "epoch": 0.9154320713324742, "grad_norm": 242.6433868408203, "learning_rate": 2.6484467024703476e-07, "loss": 18.3434, "step": 453170 }, { "epoch": 0.915452271965158, "grad_norm": 326.314697265625, "learning_rate": 2.647325820941488e-07, "loss": 15.2285, "step": 453180 }, { "epoch": 0.9154724725978418, "grad_norm": 351.17059326171875, "learning_rate": 2.6462051702038085e-07, "loss": 16.0792, "step": 453190 }, { "epoch": 0.9154926732305256, "grad_norm": 325.4627380371094, "learning_rate": 2.6450847502627883e-07, "loss": 24.9354, "step": 453200 }, { "epoch": 0.9155128738632093, "grad_norm": 263.8374938964844, "learning_rate": 2.6439645611238795e-07, "loss": 15.9869, "step": 453210 }, { "epoch": 0.9155330744958932, "grad_norm": 495.61895751953125, "learning_rate": 2.642844602792544e-07, "loss": 17.7545, "step": 453220 }, { "epoch": 0.915553275128577, "grad_norm": 90.01434326171875, "learning_rate": 2.6417248752742374e-07, "loss": 18.3905, "step": 453230 }, { "epoch": 0.9155734757612608, "grad_norm": 260.0401611328125, "learning_rate": 2.640605378574429e-07, "loss": 26.9076, "step": 453240 }, { "epoch": 0.9155936763939446, "grad_norm": 118.38766479492188, "learning_rate": 2.639486112698564e-07, "loss": 19.0116, "step": 453250 }, { "epoch": 0.9156138770266284, "grad_norm": 365.8596496582031, "learning_rate": 2.6383670776520933e-07, "loss": 25.7866, "step": 453260 }, { "epoch": 0.9156340776593123, "grad_norm": 200.04348754882812, "learning_rate": 2.637248273440479e-07, "loss": 19.1621, "step": 453270 }, { "epoch": 0.9156542782919961, "grad_norm": 321.8495788574219, "learning_rate": 2.6361297000691787e-07, "loss": 11.9167, "step": 453280 }, { "epoch": 0.9156744789246799, "grad_norm": 258.5777893066406, "learning_rate": 2.6350113575436266e-07, "loss": 9.2933, "step": 453290 }, { "epoch": 0.9156946795573637, "grad_norm": 309.27978515625, "learning_rate": 2.6338932458692847e-07, "loss": 13.4667, "step": 453300 }, { "epoch": 0.9157148801900475, "grad_norm": 919.72607421875, "learning_rate": 2.6327753650516205e-07, "loss": 48.5722, "step": 453310 }, { "epoch": 0.9157350808227314, "grad_norm": 356.7960510253906, "learning_rate": 2.631657715096048e-07, "loss": 10.7685, "step": 453320 }, { "epoch": 0.9157552814554152, "grad_norm": 71.45125579833984, "learning_rate": 2.630540296008027e-07, "loss": 4.6038, "step": 453330 }, { "epoch": 0.915775482088099, "grad_norm": 0.0, "learning_rate": 2.629423107793022e-07, "loss": 16.0654, "step": 453340 }, { "epoch": 0.9157956827207828, "grad_norm": 299.66754150390625, "learning_rate": 2.6283061504564553e-07, "loss": 13.7657, "step": 453350 }, { "epoch": 0.9158158833534666, "grad_norm": 367.063720703125, "learning_rate": 2.6271894240037785e-07, "loss": 18.1173, "step": 453360 }, { "epoch": 0.9158360839861505, "grad_norm": 374.85101318359375, "learning_rate": 2.626072928440432e-07, "loss": 16.9255, "step": 453370 }, { "epoch": 0.9158562846188343, "grad_norm": 164.22169494628906, "learning_rate": 2.6249566637718714e-07, "loss": 17.5023, "step": 453380 }, { "epoch": 0.9158764852515181, "grad_norm": 188.7342987060547, "learning_rate": 2.623840630003516e-07, "loss": 13.6479, "step": 453390 }, { "epoch": 0.9158966858842019, "grad_norm": 0.0, "learning_rate": 2.622724827140816e-07, "loss": 24.684, "step": 453400 }, { "epoch": 0.9159168865168857, "grad_norm": 271.82586669921875, "learning_rate": 2.6216092551892116e-07, "loss": 17.3753, "step": 453410 }, { "epoch": 0.9159370871495696, "grad_norm": 386.52496337890625, "learning_rate": 2.6204939141541376e-07, "loss": 18.1273, "step": 453420 }, { "epoch": 0.9159572877822534, "grad_norm": 215.76119995117188, "learning_rate": 2.6193788040410286e-07, "loss": 20.3966, "step": 453430 }, { "epoch": 0.9159774884149372, "grad_norm": 442.1326904296875, "learning_rate": 2.618263924855324e-07, "loss": 18.699, "step": 453440 }, { "epoch": 0.915997689047621, "grad_norm": 49.77064514160156, "learning_rate": 2.617149276602454e-07, "loss": 21.666, "step": 453450 }, { "epoch": 0.9160178896803048, "grad_norm": 4.305212497711182, "learning_rate": 2.616034859287847e-07, "loss": 11.1293, "step": 453460 }, { "epoch": 0.9160380903129886, "grad_norm": 145.03042602539062, "learning_rate": 2.614920672916943e-07, "loss": 12.7993, "step": 453470 }, { "epoch": 0.9160582909456724, "grad_norm": 165.0931396484375, "learning_rate": 2.61380671749516e-07, "loss": 12.0623, "step": 453480 }, { "epoch": 0.9160784915783562, "grad_norm": 65.10508728027344, "learning_rate": 2.6126929930279486e-07, "loss": 17.926, "step": 453490 }, { "epoch": 0.91609869221104, "grad_norm": 221.85946655273438, "learning_rate": 2.611579499520722e-07, "loss": 23.5891, "step": 453500 }, { "epoch": 0.9161188928437238, "grad_norm": 179.46832275390625, "learning_rate": 2.610466236978898e-07, "loss": 22.8867, "step": 453510 }, { "epoch": 0.9161390934764077, "grad_norm": 165.3672637939453, "learning_rate": 2.6093532054079276e-07, "loss": 18.5289, "step": 453520 }, { "epoch": 0.9161592941090915, "grad_norm": 136.2425537109375, "learning_rate": 2.6082404048132114e-07, "loss": 14.3849, "step": 453530 }, { "epoch": 0.9161794947417753, "grad_norm": 280.4484558105469, "learning_rate": 2.6071278352001904e-07, "loss": 15.8007, "step": 453540 }, { "epoch": 0.9161996953744591, "grad_norm": 91.72692108154297, "learning_rate": 2.606015496574277e-07, "loss": 20.0773, "step": 453550 }, { "epoch": 0.9162198960071429, "grad_norm": 232.214599609375, "learning_rate": 2.604903388940899e-07, "loss": 20.5344, "step": 453560 }, { "epoch": 0.9162400966398268, "grad_norm": 145.7259521484375, "learning_rate": 2.60379151230547e-07, "loss": 13.2599, "step": 453570 }, { "epoch": 0.9162602972725106, "grad_norm": 229.9413299560547, "learning_rate": 2.602679866673413e-07, "loss": 17.8836, "step": 453580 }, { "epoch": 0.9162804979051944, "grad_norm": 345.7672119140625, "learning_rate": 2.601568452050146e-07, "loss": 27.2276, "step": 453590 }, { "epoch": 0.9163006985378782, "grad_norm": 277.6082458496094, "learning_rate": 2.600457268441092e-07, "loss": 21.4439, "step": 453600 }, { "epoch": 0.916320899170562, "grad_norm": 171.35084533691406, "learning_rate": 2.599346315851653e-07, "loss": 19.7956, "step": 453610 }, { "epoch": 0.9163410998032459, "grad_norm": 1177.286865234375, "learning_rate": 2.598235594287246e-07, "loss": 46.3974, "step": 453620 }, { "epoch": 0.9163613004359297, "grad_norm": 5.111498832702637, "learning_rate": 2.597125103753301e-07, "loss": 17.405, "step": 453630 }, { "epoch": 0.9163815010686135, "grad_norm": 431.3167419433594, "learning_rate": 2.596014844255218e-07, "loss": 19.3773, "step": 453640 }, { "epoch": 0.9164017017012973, "grad_norm": 548.6310424804688, "learning_rate": 2.594904815798399e-07, "loss": 15.4044, "step": 453650 }, { "epoch": 0.9164219023339811, "grad_norm": 331.8158264160156, "learning_rate": 2.5937950183882734e-07, "loss": 21.1692, "step": 453660 }, { "epoch": 0.916442102966665, "grad_norm": 53.809814453125, "learning_rate": 2.5926854520302414e-07, "loss": 32.3545, "step": 453670 }, { "epoch": 0.9164623035993488, "grad_norm": 217.87533569335938, "learning_rate": 2.591576116729705e-07, "loss": 15.405, "step": 453680 }, { "epoch": 0.9164825042320326, "grad_norm": 187.37620544433594, "learning_rate": 2.590467012492076e-07, "loss": 21.7085, "step": 453690 }, { "epoch": 0.9165027048647164, "grad_norm": 281.9234619140625, "learning_rate": 2.589358139322767e-07, "loss": 22.0715, "step": 453700 }, { "epoch": 0.9165229054974002, "grad_norm": 397.6363220214844, "learning_rate": 2.5882494972271746e-07, "loss": 10.1635, "step": 453710 }, { "epoch": 0.9165431061300839, "grad_norm": 292.94915771484375, "learning_rate": 2.587141086210698e-07, "loss": 8.4192, "step": 453720 }, { "epoch": 0.9165633067627678, "grad_norm": 342.94427490234375, "learning_rate": 2.586032906278757e-07, "loss": 16.3067, "step": 453730 }, { "epoch": 0.9165835073954516, "grad_norm": 515.53271484375, "learning_rate": 2.584924957436735e-07, "loss": 17.0453, "step": 453740 }, { "epoch": 0.9166037080281354, "grad_norm": 363.7118225097656, "learning_rate": 2.583817239690034e-07, "loss": 12.1022, "step": 453750 }, { "epoch": 0.9166239086608192, "grad_norm": 140.2545623779297, "learning_rate": 2.5827097530440605e-07, "loss": 18.2588, "step": 453760 }, { "epoch": 0.916644109293503, "grad_norm": 216.16690063476562, "learning_rate": 2.581602497504204e-07, "loss": 13.8425, "step": 453770 }, { "epoch": 0.9166643099261869, "grad_norm": 170.5027618408203, "learning_rate": 2.580495473075878e-07, "loss": 12.6689, "step": 453780 }, { "epoch": 0.9166845105588707, "grad_norm": 374.31842041015625, "learning_rate": 2.579388679764455e-07, "loss": 20.037, "step": 453790 }, { "epoch": 0.9167047111915545, "grad_norm": 421.94769287109375, "learning_rate": 2.578282117575343e-07, "loss": 20.8775, "step": 453800 }, { "epoch": 0.9167249118242383, "grad_norm": 51.00823974609375, "learning_rate": 2.577175786513936e-07, "loss": 31.8737, "step": 453810 }, { "epoch": 0.9167451124569221, "grad_norm": 259.5227355957031, "learning_rate": 2.576069686585614e-07, "loss": 9.5977, "step": 453820 }, { "epoch": 0.916765313089606, "grad_norm": 507.0741882324219, "learning_rate": 2.5749638177957834e-07, "loss": 22.4468, "step": 453830 }, { "epoch": 0.9167855137222898, "grad_norm": 264.82342529296875, "learning_rate": 2.5738581801498187e-07, "loss": 26.934, "step": 453840 }, { "epoch": 0.9168057143549736, "grad_norm": 188.7342987060547, "learning_rate": 2.5727527736531256e-07, "loss": 15.6602, "step": 453850 }, { "epoch": 0.9168259149876574, "grad_norm": 338.59942626953125, "learning_rate": 2.571647598311089e-07, "loss": 26.7214, "step": 453860 }, { "epoch": 0.9168461156203412, "grad_norm": 212.51951599121094, "learning_rate": 2.5705426541290765e-07, "loss": 26.4734, "step": 453870 }, { "epoch": 0.9168663162530251, "grad_norm": 250.728271484375, "learning_rate": 2.56943794111249e-07, "loss": 13.9403, "step": 453880 }, { "epoch": 0.9168865168857089, "grad_norm": 256.1279296875, "learning_rate": 2.5683334592667195e-07, "loss": 18.4427, "step": 453890 }, { "epoch": 0.9169067175183927, "grad_norm": 517.9696655273438, "learning_rate": 2.5672292085971276e-07, "loss": 20.7408, "step": 453900 }, { "epoch": 0.9169269181510765, "grad_norm": 674.7428588867188, "learning_rate": 2.5661251891091087e-07, "loss": 27.3759, "step": 453910 }, { "epoch": 0.9169471187837603, "grad_norm": 44.62065124511719, "learning_rate": 2.5650214008080544e-07, "loss": 10.5466, "step": 453920 }, { "epoch": 0.9169673194164442, "grad_norm": 211.87799072265625, "learning_rate": 2.5639178436993205e-07, "loss": 6.9806, "step": 453930 }, { "epoch": 0.916987520049128, "grad_norm": 272.81170654296875, "learning_rate": 2.5628145177882926e-07, "loss": 15.2975, "step": 453940 }, { "epoch": 0.9170077206818118, "grad_norm": 165.90725708007812, "learning_rate": 2.561711423080365e-07, "loss": 15.7969, "step": 453950 }, { "epoch": 0.9170279213144956, "grad_norm": 400.4488220214844, "learning_rate": 2.5606085595809015e-07, "loss": 18.5943, "step": 453960 }, { "epoch": 0.9170481219471794, "grad_norm": 256.30218505859375, "learning_rate": 2.559505927295275e-07, "loss": 14.317, "step": 453970 }, { "epoch": 0.9170683225798631, "grad_norm": 300.9992370605469, "learning_rate": 2.5584035262288585e-07, "loss": 16.0037, "step": 453980 }, { "epoch": 0.917088523212547, "grad_norm": 518.5432739257812, "learning_rate": 2.557301356387043e-07, "loss": 16.5282, "step": 453990 }, { "epoch": 0.9171087238452308, "grad_norm": 549.3116455078125, "learning_rate": 2.556199417775174e-07, "loss": 20.3889, "step": 454000 }, { "epoch": 0.9171289244779146, "grad_norm": 397.176513671875, "learning_rate": 2.555097710398635e-07, "loss": 16.9556, "step": 454010 }, { "epoch": 0.9171491251105984, "grad_norm": 572.1047973632812, "learning_rate": 2.553996234262801e-07, "loss": 18.0201, "step": 454020 }, { "epoch": 0.9171693257432822, "grad_norm": 170.86328125, "learning_rate": 2.5528949893730393e-07, "loss": 12.7383, "step": 454030 }, { "epoch": 0.9171895263759661, "grad_norm": 530.5274658203125, "learning_rate": 2.551793975734701e-07, "loss": 22.9562, "step": 454040 }, { "epoch": 0.9172097270086499, "grad_norm": 643.59375, "learning_rate": 2.550693193353171e-07, "loss": 19.9046, "step": 454050 }, { "epoch": 0.9172299276413337, "grad_norm": 701.5162963867188, "learning_rate": 2.5495926422338115e-07, "loss": 28.291, "step": 454060 }, { "epoch": 0.9172501282740175, "grad_norm": 262.4273376464844, "learning_rate": 2.548492322381968e-07, "loss": 16.2135, "step": 454070 }, { "epoch": 0.9172703289067013, "grad_norm": 281.5733947753906, "learning_rate": 2.547392233803031e-07, "loss": 11.4153, "step": 454080 }, { "epoch": 0.9172905295393852, "grad_norm": 0.0, "learning_rate": 2.5462923765023404e-07, "loss": 20.6734, "step": 454090 }, { "epoch": 0.917310730172069, "grad_norm": 170.24087524414062, "learning_rate": 2.5451927504852757e-07, "loss": 15.0851, "step": 454100 }, { "epoch": 0.9173309308047528, "grad_norm": 202.82864379882812, "learning_rate": 2.544093355757171e-07, "loss": 11.4643, "step": 454110 }, { "epoch": 0.9173511314374366, "grad_norm": 210.89051818847656, "learning_rate": 2.5429941923234114e-07, "loss": 18.046, "step": 454120 }, { "epoch": 0.9173713320701204, "grad_norm": 712.810791015625, "learning_rate": 2.541895260189342e-07, "loss": 20.078, "step": 454130 }, { "epoch": 0.9173915327028043, "grad_norm": 353.95068359375, "learning_rate": 2.5407965593603147e-07, "loss": 9.7528, "step": 454140 }, { "epoch": 0.9174117333354881, "grad_norm": 23.561250686645508, "learning_rate": 2.539698089841691e-07, "loss": 27.3173, "step": 454150 }, { "epoch": 0.9174319339681719, "grad_norm": 252.623046875, "learning_rate": 2.538599851638818e-07, "loss": 10.0199, "step": 454160 }, { "epoch": 0.9174521346008557, "grad_norm": 300.888671875, "learning_rate": 2.537501844757062e-07, "loss": 11.7257, "step": 454170 }, { "epoch": 0.9174723352335395, "grad_norm": 495.1927795410156, "learning_rate": 2.5364040692017644e-07, "loss": 17.0178, "step": 454180 }, { "epoch": 0.9174925358662234, "grad_norm": 0.0, "learning_rate": 2.5353065249782647e-07, "loss": 18.3665, "step": 454190 }, { "epoch": 0.9175127364989072, "grad_norm": 56.5859375, "learning_rate": 2.534209212091937e-07, "loss": 19.8699, "step": 454200 }, { "epoch": 0.917532937131591, "grad_norm": 183.22332763671875, "learning_rate": 2.5331121305481154e-07, "loss": 13.1073, "step": 454210 }, { "epoch": 0.9175531377642748, "grad_norm": 450.3115234375, "learning_rate": 2.53201528035214e-07, "loss": 17.4463, "step": 454220 }, { "epoch": 0.9175733383969585, "grad_norm": 493.7592468261719, "learning_rate": 2.530918661509368e-07, "loss": 15.6409, "step": 454230 }, { "epoch": 0.9175935390296424, "grad_norm": 193.6795654296875, "learning_rate": 2.529822274025151e-07, "loss": 34.6854, "step": 454240 }, { "epoch": 0.9176137396623262, "grad_norm": 202.70217895507812, "learning_rate": 2.5287261179048117e-07, "loss": 13.2721, "step": 454250 }, { "epoch": 0.91763394029501, "grad_norm": 41.909339904785156, "learning_rate": 2.5276301931537015e-07, "loss": 11.1418, "step": 454260 }, { "epoch": 0.9176541409276938, "grad_norm": 281.51202392578125, "learning_rate": 2.5265344997771726e-07, "loss": 14.5339, "step": 454270 }, { "epoch": 0.9176743415603776, "grad_norm": 454.9624938964844, "learning_rate": 2.525439037780558e-07, "loss": 18.4829, "step": 454280 }, { "epoch": 0.9176945421930615, "grad_norm": 433.8101501464844, "learning_rate": 2.5243438071691826e-07, "loss": 10.5991, "step": 454290 }, { "epoch": 0.9177147428257453, "grad_norm": 178.11285400390625, "learning_rate": 2.523248807948403e-07, "loss": 17.7645, "step": 454300 }, { "epoch": 0.9177349434584291, "grad_norm": 75.87867736816406, "learning_rate": 2.522154040123559e-07, "loss": 30.4693, "step": 454310 }, { "epoch": 0.9177551440911129, "grad_norm": 78.83842468261719, "learning_rate": 2.521059503699974e-07, "loss": 50.0412, "step": 454320 }, { "epoch": 0.9177753447237967, "grad_norm": 419.480712890625, "learning_rate": 2.5199651986829777e-07, "loss": 11.1858, "step": 454330 }, { "epoch": 0.9177955453564806, "grad_norm": 446.3019104003906, "learning_rate": 2.518871125077926e-07, "loss": 25.6513, "step": 454340 }, { "epoch": 0.9178157459891644, "grad_norm": 466.9568786621094, "learning_rate": 2.5177772828901327e-07, "loss": 16.6025, "step": 454350 }, { "epoch": 0.9178359466218482, "grad_norm": 231.08163452148438, "learning_rate": 2.5166836721249254e-07, "loss": 22.5752, "step": 454360 }, { "epoch": 0.917856147254532, "grad_norm": 224.1905517578125, "learning_rate": 2.515590292787656e-07, "loss": 15.2264, "step": 454370 }, { "epoch": 0.9178763478872158, "grad_norm": 112.1280746459961, "learning_rate": 2.5144971448836263e-07, "loss": 19.4447, "step": 454380 }, { "epoch": 0.9178965485198997, "grad_norm": 330.6990966796875, "learning_rate": 2.5134042284181927e-07, "loss": 11.428, "step": 454390 }, { "epoch": 0.9179167491525835, "grad_norm": 127.68366241455078, "learning_rate": 2.5123115433966615e-07, "loss": 14.6926, "step": 454400 }, { "epoch": 0.9179369497852673, "grad_norm": 307.2967834472656, "learning_rate": 2.5112190898243627e-07, "loss": 24.1661, "step": 454410 }, { "epoch": 0.9179571504179511, "grad_norm": 176.82020568847656, "learning_rate": 2.5101268677066247e-07, "loss": 16.3142, "step": 454420 }, { "epoch": 0.9179773510506349, "grad_norm": 1019.8779907226562, "learning_rate": 2.5090348770487604e-07, "loss": 17.838, "step": 454430 }, { "epoch": 0.9179975516833188, "grad_norm": 17.085533142089844, "learning_rate": 2.50794311785611e-07, "loss": 17.5736, "step": 454440 }, { "epoch": 0.9180177523160026, "grad_norm": 397.52734375, "learning_rate": 2.5068515901339794e-07, "loss": 15.178, "step": 454450 }, { "epoch": 0.9180379529486864, "grad_norm": 303.3193054199219, "learning_rate": 2.505760293887699e-07, "loss": 10.8081, "step": 454460 }, { "epoch": 0.9180581535813702, "grad_norm": 173.09710693359375, "learning_rate": 2.5046692291225803e-07, "loss": 5.3988, "step": 454470 }, { "epoch": 0.918078354214054, "grad_norm": 294.963623046875, "learning_rate": 2.503578395843936e-07, "loss": 14.7642, "step": 454480 }, { "epoch": 0.9180985548467377, "grad_norm": 119.04206085205078, "learning_rate": 2.502487794057101e-07, "loss": 17.0179, "step": 454490 }, { "epoch": 0.9181187554794216, "grad_norm": 343.16058349609375, "learning_rate": 2.5013974237673824e-07, "loss": 19.2829, "step": 454500 }, { "epoch": 0.9181389561121054, "grad_norm": 0.0, "learning_rate": 2.50030728498008e-07, "loss": 18.3615, "step": 454510 }, { "epoch": 0.9181591567447892, "grad_norm": 197.31698608398438, "learning_rate": 2.499217377700519e-07, "loss": 14.0502, "step": 454520 }, { "epoch": 0.918179357377473, "grad_norm": 261.01910400390625, "learning_rate": 2.498127701934022e-07, "loss": 12.4974, "step": 454530 }, { "epoch": 0.9181995580101568, "grad_norm": 106.8160171508789, "learning_rate": 2.49703825768588e-07, "loss": 15.5191, "step": 454540 }, { "epoch": 0.9182197586428407, "grad_norm": 354.593017578125, "learning_rate": 2.49594904496141e-07, "loss": 22.9946, "step": 454550 }, { "epoch": 0.9182399592755245, "grad_norm": 262.3971252441406, "learning_rate": 2.494860063765936e-07, "loss": 30.4192, "step": 454560 }, { "epoch": 0.9182601599082083, "grad_norm": 167.65670776367188, "learning_rate": 2.493771314104743e-07, "loss": 9.0718, "step": 454570 }, { "epoch": 0.9182803605408921, "grad_norm": 720.4822998046875, "learning_rate": 2.492682795983148e-07, "loss": 27.2784, "step": 454580 }, { "epoch": 0.918300561173576, "grad_norm": 269.57147216796875, "learning_rate": 2.4915945094064476e-07, "loss": 20.2397, "step": 454590 }, { "epoch": 0.9183207618062598, "grad_norm": 455.30487060546875, "learning_rate": 2.4905064543799706e-07, "loss": 35.0193, "step": 454600 }, { "epoch": 0.9183409624389436, "grad_norm": 482.8698425292969, "learning_rate": 2.4894186309089906e-07, "loss": 30.1231, "step": 454610 }, { "epoch": 0.9183611630716274, "grad_norm": 209.74327087402344, "learning_rate": 2.48833103899882e-07, "loss": 8.6898, "step": 454620 }, { "epoch": 0.9183813637043112, "grad_norm": 127.46502685546875, "learning_rate": 2.487243678654772e-07, "loss": 23.6524, "step": 454630 }, { "epoch": 0.918401564336995, "grad_norm": 287.8888854980469, "learning_rate": 2.486156549882135e-07, "loss": 24.7229, "step": 454640 }, { "epoch": 0.9184217649696789, "grad_norm": 433.9644775390625, "learning_rate": 2.485069652686195e-07, "loss": 18.8414, "step": 454650 }, { "epoch": 0.9184419656023627, "grad_norm": 0.0, "learning_rate": 2.4839829870722756e-07, "loss": 10.8807, "step": 454660 }, { "epoch": 0.9184621662350465, "grad_norm": 46.72091293334961, "learning_rate": 2.482896553045661e-07, "loss": 14.1955, "step": 454670 }, { "epoch": 0.9184823668677303, "grad_norm": 419.1227111816406, "learning_rate": 2.4818103506116355e-07, "loss": 19.7585, "step": 454680 }, { "epoch": 0.9185025675004141, "grad_norm": 182.9734649658203, "learning_rate": 2.4807243797755064e-07, "loss": 17.2594, "step": 454690 }, { "epoch": 0.918522768133098, "grad_norm": 303.3350524902344, "learning_rate": 2.479638640542564e-07, "loss": 19.9566, "step": 454700 }, { "epoch": 0.9185429687657818, "grad_norm": 259.21917724609375, "learning_rate": 2.478553132918099e-07, "loss": 16.9143, "step": 454710 }, { "epoch": 0.9185631693984656, "grad_norm": 220.04345703125, "learning_rate": 2.477467856907401e-07, "loss": 21.5817, "step": 454720 }, { "epoch": 0.9185833700311494, "grad_norm": 250.58963012695312, "learning_rate": 2.4763828125157654e-07, "loss": 17.703, "step": 454730 }, { "epoch": 0.9186035706638332, "grad_norm": 231.0832061767578, "learning_rate": 2.4752979997484774e-07, "loss": 25.8946, "step": 454740 }, { "epoch": 0.918623771296517, "grad_norm": 191.92022705078125, "learning_rate": 2.474213418610816e-07, "loss": 10.4869, "step": 454750 }, { "epoch": 0.9186439719292008, "grad_norm": 118.54442596435547, "learning_rate": 2.4731290691080766e-07, "loss": 15.8718, "step": 454760 }, { "epoch": 0.9186641725618846, "grad_norm": 472.7878112792969, "learning_rate": 2.472044951245539e-07, "loss": 13.8702, "step": 454770 }, { "epoch": 0.9186843731945684, "grad_norm": 41.55654525756836, "learning_rate": 2.470961065028499e-07, "loss": 7.9889, "step": 454780 }, { "epoch": 0.9187045738272522, "grad_norm": 57.816062927246094, "learning_rate": 2.4698774104622235e-07, "loss": 17.0623, "step": 454790 }, { "epoch": 0.918724774459936, "grad_norm": 249.79714965820312, "learning_rate": 2.4687939875519984e-07, "loss": 12.9687, "step": 454800 }, { "epoch": 0.9187449750926199, "grad_norm": 1019.9679565429688, "learning_rate": 2.4677107963031134e-07, "loss": 23.4064, "step": 454810 }, { "epoch": 0.9187651757253037, "grad_norm": 15.089723587036133, "learning_rate": 2.4666278367208417e-07, "loss": 8.9919, "step": 454820 }, { "epoch": 0.9187853763579875, "grad_norm": 226.8956298828125, "learning_rate": 2.465545108810452e-07, "loss": 13.0729, "step": 454830 }, { "epoch": 0.9188055769906713, "grad_norm": 19.46324920654297, "learning_rate": 2.464462612577234e-07, "loss": 18.5806, "step": 454840 }, { "epoch": 0.9188257776233552, "grad_norm": 318.8092041015625, "learning_rate": 2.463380348026467e-07, "loss": 19.0252, "step": 454850 }, { "epoch": 0.918845978256039, "grad_norm": 317.57550048828125, "learning_rate": 2.4622983151634083e-07, "loss": 19.1349, "step": 454860 }, { "epoch": 0.9188661788887228, "grad_norm": 118.0604019165039, "learning_rate": 2.461216513993342e-07, "loss": 7.3784, "step": 454870 }, { "epoch": 0.9188863795214066, "grad_norm": 300.44110107421875, "learning_rate": 2.460134944521547e-07, "loss": 14.0023, "step": 454880 }, { "epoch": 0.9189065801540904, "grad_norm": 59.587764739990234, "learning_rate": 2.459053606753292e-07, "loss": 17.6721, "step": 454890 }, { "epoch": 0.9189267807867743, "grad_norm": 458.0155944824219, "learning_rate": 2.457972500693834e-07, "loss": 27.4783, "step": 454900 }, { "epoch": 0.9189469814194581, "grad_norm": 169.9490203857422, "learning_rate": 2.456891626348451e-07, "loss": 11.7725, "step": 454910 }, { "epoch": 0.9189671820521419, "grad_norm": 269.10235595703125, "learning_rate": 2.455810983722429e-07, "loss": 20.9167, "step": 454920 }, { "epoch": 0.9189873826848257, "grad_norm": 309.55657958984375, "learning_rate": 2.4547305728210015e-07, "loss": 19.0819, "step": 454930 }, { "epoch": 0.9190075833175095, "grad_norm": 28.484663009643555, "learning_rate": 2.453650393649448e-07, "loss": 17.6875, "step": 454940 }, { "epoch": 0.9190277839501934, "grad_norm": 145.0044708251953, "learning_rate": 2.4525704462130485e-07, "loss": 10.4286, "step": 454950 }, { "epoch": 0.9190479845828772, "grad_norm": 0.0, "learning_rate": 2.4514907305170476e-07, "loss": 11.6454, "step": 454960 }, { "epoch": 0.919068185215561, "grad_norm": 241.74693298339844, "learning_rate": 2.4504112465667085e-07, "loss": 16.4838, "step": 454970 }, { "epoch": 0.9190883858482448, "grad_norm": 88.93653869628906, "learning_rate": 2.44933199436731e-07, "loss": 11.3602, "step": 454980 }, { "epoch": 0.9191085864809286, "grad_norm": 89.9294662475586, "learning_rate": 2.448252973924087e-07, "loss": 17.4705, "step": 454990 }, { "epoch": 0.9191287871136123, "grad_norm": 78.2550048828125, "learning_rate": 2.447174185242324e-07, "loss": 15.4892, "step": 455000 }, { "epoch": 0.9191489877462962, "grad_norm": 72.71954345703125, "learning_rate": 2.446095628327261e-07, "loss": 21.7942, "step": 455010 }, { "epoch": 0.91916918837898, "grad_norm": 283.3192138671875, "learning_rate": 2.4450173031841607e-07, "loss": 19.0136, "step": 455020 }, { "epoch": 0.9191893890116638, "grad_norm": 182.2919464111328, "learning_rate": 2.4439392098182804e-07, "loss": 19.1115, "step": 455030 }, { "epoch": 0.9192095896443476, "grad_norm": 6.921985626220703, "learning_rate": 2.442861348234865e-07, "loss": 15.7835, "step": 455040 }, { "epoch": 0.9192297902770314, "grad_norm": 232.60023498535156, "learning_rate": 2.4417837184391833e-07, "loss": 18.8307, "step": 455050 }, { "epoch": 0.9192499909097153, "grad_norm": 146.5553741455078, "learning_rate": 2.4407063204364703e-07, "loss": 15.4173, "step": 455060 }, { "epoch": 0.9192701915423991, "grad_norm": 87.2750244140625, "learning_rate": 2.4396291542319985e-07, "loss": 9.5828, "step": 455070 }, { "epoch": 0.9192903921750829, "grad_norm": 324.0497741699219, "learning_rate": 2.438552219831003e-07, "loss": 19.6949, "step": 455080 }, { "epoch": 0.9193105928077667, "grad_norm": 119.35296630859375, "learning_rate": 2.43747551723873e-07, "loss": 14.4704, "step": 455090 }, { "epoch": 0.9193307934404505, "grad_norm": 38.295467376708984, "learning_rate": 2.4363990464604357e-07, "loss": 14.0668, "step": 455100 }, { "epoch": 0.9193509940731344, "grad_norm": 327.8507385253906, "learning_rate": 2.435322807501367e-07, "loss": 12.0949, "step": 455110 }, { "epoch": 0.9193711947058182, "grad_norm": 181.30738830566406, "learning_rate": 2.4342468003667576e-07, "loss": 11.6783, "step": 455120 }, { "epoch": 0.919391395338502, "grad_norm": 172.4878692626953, "learning_rate": 2.4331710250618647e-07, "loss": 14.4112, "step": 455130 }, { "epoch": 0.9194115959711858, "grad_norm": 207.41802978515625, "learning_rate": 2.432095481591934e-07, "loss": 15.1611, "step": 455140 }, { "epoch": 0.9194317966038696, "grad_norm": 105.40169525146484, "learning_rate": 2.4310201699621896e-07, "loss": 7.7742, "step": 455150 }, { "epoch": 0.9194519972365535, "grad_norm": 671.6128540039062, "learning_rate": 2.429945090177888e-07, "loss": 15.4885, "step": 455160 }, { "epoch": 0.9194721978692373, "grad_norm": 474.05694580078125, "learning_rate": 2.4288702422442633e-07, "loss": 20.5155, "step": 455170 }, { "epoch": 0.9194923985019211, "grad_norm": 305.14703369140625, "learning_rate": 2.4277956261665624e-07, "loss": 23.5529, "step": 455180 }, { "epoch": 0.9195125991346049, "grad_norm": 270.3517761230469, "learning_rate": 2.426721241950003e-07, "loss": 19.0494, "step": 455190 }, { "epoch": 0.9195327997672887, "grad_norm": 52.737709045410156, "learning_rate": 2.4256470895998363e-07, "loss": 10.0723, "step": 455200 }, { "epoch": 0.9195530003999726, "grad_norm": 494.6665344238281, "learning_rate": 2.4245731691213137e-07, "loss": 19.9762, "step": 455210 }, { "epoch": 0.9195732010326564, "grad_norm": 191.9027557373047, "learning_rate": 2.423499480519631e-07, "loss": 13.1072, "step": 455220 }, { "epoch": 0.9195934016653402, "grad_norm": 450.24920654296875, "learning_rate": 2.4224260238000454e-07, "loss": 15.7209, "step": 455230 }, { "epoch": 0.919613602298024, "grad_norm": 457.0672302246094, "learning_rate": 2.421352798967791e-07, "loss": 17.7888, "step": 455240 }, { "epoch": 0.9196338029307078, "grad_norm": 25.905193328857422, "learning_rate": 2.420279806028092e-07, "loss": 15.7499, "step": 455250 }, { "epoch": 0.9196540035633916, "grad_norm": 327.5270690917969, "learning_rate": 2.4192070449861717e-07, "loss": 19.2746, "step": 455260 }, { "epoch": 0.9196742041960754, "grad_norm": 322.00762939453125, "learning_rate": 2.418134515847276e-07, "loss": 14.7609, "step": 455270 }, { "epoch": 0.9196944048287592, "grad_norm": 283.40240478515625, "learning_rate": 2.417062218616617e-07, "loss": 18.1085, "step": 455280 }, { "epoch": 0.919714605461443, "grad_norm": 108.97318267822266, "learning_rate": 2.415990153299419e-07, "loss": 9.2425, "step": 455290 }, { "epoch": 0.9197348060941268, "grad_norm": 206.3502960205078, "learning_rate": 2.414918319900922e-07, "loss": 14.3758, "step": 455300 }, { "epoch": 0.9197550067268107, "grad_norm": 139.03282165527344, "learning_rate": 2.413846718426338e-07, "loss": 7.5249, "step": 455310 }, { "epoch": 0.9197752073594945, "grad_norm": 43.54553985595703, "learning_rate": 2.412775348880897e-07, "loss": 11.8814, "step": 455320 }, { "epoch": 0.9197954079921783, "grad_norm": 180.7390899658203, "learning_rate": 2.4117042112698107e-07, "loss": 22.3416, "step": 455330 }, { "epoch": 0.9198156086248621, "grad_norm": 370.84210205078125, "learning_rate": 2.410633305598309e-07, "loss": 10.3919, "step": 455340 }, { "epoch": 0.9198358092575459, "grad_norm": 460.71710205078125, "learning_rate": 2.4095626318716146e-07, "loss": 17.2197, "step": 455350 }, { "epoch": 0.9198560098902298, "grad_norm": 40.18421936035156, "learning_rate": 2.40849219009493e-07, "loss": 15.7161, "step": 455360 }, { "epoch": 0.9198762105229136, "grad_norm": 352.50836181640625, "learning_rate": 2.407421980273489e-07, "loss": 14.1091, "step": 455370 }, { "epoch": 0.9198964111555974, "grad_norm": 205.72628784179688, "learning_rate": 2.406352002412499e-07, "loss": 7.1285, "step": 455380 }, { "epoch": 0.9199166117882812, "grad_norm": 200.0044403076172, "learning_rate": 2.4052822565171775e-07, "loss": 36.7614, "step": 455390 }, { "epoch": 0.919936812420965, "grad_norm": 207.90643310546875, "learning_rate": 2.404212742592743e-07, "loss": 12.0063, "step": 455400 }, { "epoch": 0.9199570130536489, "grad_norm": 166.22991943359375, "learning_rate": 2.4031434606443914e-07, "loss": 17.9532, "step": 455410 }, { "epoch": 0.9199772136863327, "grad_norm": 689.3635864257812, "learning_rate": 2.4020744106773573e-07, "loss": 16.9967, "step": 455420 }, { "epoch": 0.9199974143190165, "grad_norm": 146.4396514892578, "learning_rate": 2.401005592696837e-07, "loss": 5.9636, "step": 455430 }, { "epoch": 0.9200176149517003, "grad_norm": 326.1533508300781, "learning_rate": 2.399937006708036e-07, "loss": 14.2867, "step": 455440 }, { "epoch": 0.9200378155843841, "grad_norm": 137.99993896484375, "learning_rate": 2.3988686527161686e-07, "loss": 9.7865, "step": 455450 }, { "epoch": 0.920058016217068, "grad_norm": 453.38641357421875, "learning_rate": 2.3978005307264517e-07, "loss": 14.568, "step": 455460 }, { "epoch": 0.9200782168497518, "grad_norm": 268.1697692871094, "learning_rate": 2.396732640744076e-07, "loss": 15.3073, "step": 455470 }, { "epoch": 0.9200984174824356, "grad_norm": 202.7796173095703, "learning_rate": 2.395664982774243e-07, "loss": 10.6338, "step": 455480 }, { "epoch": 0.9201186181151194, "grad_norm": 256.9638977050781, "learning_rate": 2.3945975568221814e-07, "loss": 13.3608, "step": 455490 }, { "epoch": 0.9201388187478032, "grad_norm": 298.02587890625, "learning_rate": 2.3935303628930705e-07, "loss": 19.9997, "step": 455500 }, { "epoch": 0.9201590193804869, "grad_norm": 51.55033874511719, "learning_rate": 2.392463400992112e-07, "loss": 26.3338, "step": 455510 }, { "epoch": 0.9201792200131708, "grad_norm": 608.9950561523438, "learning_rate": 2.3913966711245185e-07, "loss": 17.7827, "step": 455520 }, { "epoch": 0.9201994206458546, "grad_norm": 417.44873046875, "learning_rate": 2.390330173295491e-07, "loss": 18.4131, "step": 455530 }, { "epoch": 0.9202196212785384, "grad_norm": 193.98619079589844, "learning_rate": 2.389263907510209e-07, "loss": 13.5151, "step": 455540 }, { "epoch": 0.9202398219112222, "grad_norm": 455.22021484375, "learning_rate": 2.388197873773879e-07, "loss": 20.0675, "step": 455550 }, { "epoch": 0.920260022543906, "grad_norm": 185.51930236816406, "learning_rate": 2.387132072091708e-07, "loss": 13.7994, "step": 455560 }, { "epoch": 0.9202802231765899, "grad_norm": 472.70367431640625, "learning_rate": 2.3860665024688757e-07, "loss": 13.6387, "step": 455570 }, { "epoch": 0.9203004238092737, "grad_norm": 191.1717987060547, "learning_rate": 2.3850011649105774e-07, "loss": 16.7665, "step": 455580 }, { "epoch": 0.9203206244419575, "grad_norm": 397.5945739746094, "learning_rate": 2.3839360594220094e-07, "loss": 9.1233, "step": 455590 }, { "epoch": 0.9203408250746413, "grad_norm": 342.2540588378906, "learning_rate": 2.3828711860083676e-07, "loss": 12.2973, "step": 455600 }, { "epoch": 0.9203610257073251, "grad_norm": 223.0743408203125, "learning_rate": 2.3818065446748306e-07, "loss": 14.3779, "step": 455610 }, { "epoch": 0.920381226340009, "grad_norm": 454.0294189453125, "learning_rate": 2.380742135426589e-07, "loss": 27.183, "step": 455620 }, { "epoch": 0.9204014269726928, "grad_norm": 218.7557830810547, "learning_rate": 2.3796779582688444e-07, "loss": 17.7765, "step": 455630 }, { "epoch": 0.9204216276053766, "grad_norm": 418.7548522949219, "learning_rate": 2.3786140132067703e-07, "loss": 14.2868, "step": 455640 }, { "epoch": 0.9204418282380604, "grad_norm": 293.7866516113281, "learning_rate": 2.3775503002455514e-07, "loss": 15.8268, "step": 455650 }, { "epoch": 0.9204620288707442, "grad_norm": 301.7937316894531, "learning_rate": 2.3764868193903835e-07, "loss": 33.7642, "step": 455660 }, { "epoch": 0.9204822295034281, "grad_norm": 463.62994384765625, "learning_rate": 2.3754235706464457e-07, "loss": 14.4391, "step": 455670 }, { "epoch": 0.9205024301361119, "grad_norm": 430.42596435546875, "learning_rate": 2.3743605540189063e-07, "loss": 23.2213, "step": 455680 }, { "epoch": 0.9205226307687957, "grad_norm": 209.38778686523438, "learning_rate": 2.3732977695129612e-07, "loss": 16.8263, "step": 455690 }, { "epoch": 0.9205428314014795, "grad_norm": 154.90745544433594, "learning_rate": 2.3722352171337836e-07, "loss": 19.6068, "step": 455700 }, { "epoch": 0.9205630320341633, "grad_norm": 468.6112060546875, "learning_rate": 2.3711728968865643e-07, "loss": 20.4635, "step": 455710 }, { "epoch": 0.9205832326668472, "grad_norm": 424.70806884765625, "learning_rate": 2.3701108087764657e-07, "loss": 10.8851, "step": 455720 }, { "epoch": 0.920603433299531, "grad_norm": 267.2959289550781, "learning_rate": 2.3690489528086668e-07, "loss": 9.1479, "step": 455730 }, { "epoch": 0.9206236339322148, "grad_norm": 345.7020263671875, "learning_rate": 2.367987328988347e-07, "loss": 9.4651, "step": 455740 }, { "epoch": 0.9206438345648986, "grad_norm": 311.5840759277344, "learning_rate": 2.366925937320691e-07, "loss": 21.4974, "step": 455750 }, { "epoch": 0.9206640351975824, "grad_norm": 279.4144592285156, "learning_rate": 2.36586477781085e-07, "loss": 23.7127, "step": 455760 }, { "epoch": 0.9206842358302662, "grad_norm": 575.6854248046875, "learning_rate": 2.3648038504640036e-07, "loss": 17.166, "step": 455770 }, { "epoch": 0.92070443646295, "grad_norm": 601.4854125976562, "learning_rate": 2.3637431552853363e-07, "loss": 16.4343, "step": 455780 }, { "epoch": 0.9207246370956338, "grad_norm": 610.1043701171875, "learning_rate": 2.362682692280005e-07, "loss": 27.5912, "step": 455790 }, { "epoch": 0.9207448377283176, "grad_norm": 372.38665771484375, "learning_rate": 2.361622461453178e-07, "loss": 20.647, "step": 455800 }, { "epoch": 0.9207650383610014, "grad_norm": 182.55914306640625, "learning_rate": 2.3605624628100178e-07, "loss": 15.2259, "step": 455810 }, { "epoch": 0.9207852389936853, "grad_norm": 343.69329833984375, "learning_rate": 2.3595026963557145e-07, "loss": 17.3321, "step": 455820 }, { "epoch": 0.9208054396263691, "grad_norm": 263.2419738769531, "learning_rate": 2.3584431620954085e-07, "loss": 17.0018, "step": 455830 }, { "epoch": 0.9208256402590529, "grad_norm": 129.1766357421875, "learning_rate": 2.357383860034268e-07, "loss": 16.7368, "step": 455840 }, { "epoch": 0.9208458408917367, "grad_norm": 298.4427795410156, "learning_rate": 2.3563247901774666e-07, "loss": 11.9806, "step": 455850 }, { "epoch": 0.9208660415244205, "grad_norm": 232.2042999267578, "learning_rate": 2.3552659525301557e-07, "loss": 12.4643, "step": 455860 }, { "epoch": 0.9208862421571044, "grad_norm": 304.7831115722656, "learning_rate": 2.354207347097498e-07, "loss": 20.6315, "step": 455870 }, { "epoch": 0.9209064427897882, "grad_norm": 396.6595153808594, "learning_rate": 2.3531489738846613e-07, "loss": 29.7388, "step": 455880 }, { "epoch": 0.920926643422472, "grad_norm": 237.63577270507812, "learning_rate": 2.3520908328968027e-07, "loss": 23.1591, "step": 455890 }, { "epoch": 0.9209468440551558, "grad_norm": 525.749755859375, "learning_rate": 2.351032924139063e-07, "loss": 20.3525, "step": 455900 }, { "epoch": 0.9209670446878396, "grad_norm": 22.981351852416992, "learning_rate": 2.349975247616615e-07, "loss": 22.067, "step": 455910 }, { "epoch": 0.9209872453205235, "grad_norm": 10.424921035766602, "learning_rate": 2.3489178033345994e-07, "loss": 14.1302, "step": 455920 }, { "epoch": 0.9210074459532073, "grad_norm": 384.0202941894531, "learning_rate": 2.34786059129819e-07, "loss": 16.9353, "step": 455930 }, { "epoch": 0.9210276465858911, "grad_norm": 303.44439697265625, "learning_rate": 2.3468036115125215e-07, "loss": 14.637, "step": 455940 }, { "epoch": 0.9210478472185749, "grad_norm": 376.6944580078125, "learning_rate": 2.3457468639827563e-07, "loss": 22.1572, "step": 455950 }, { "epoch": 0.9210680478512587, "grad_norm": 133.5134735107422, "learning_rate": 2.344690348714046e-07, "loss": 13.4534, "step": 455960 }, { "epoch": 0.9210882484839426, "grad_norm": 264.57220458984375, "learning_rate": 2.3436340657115253e-07, "loss": 21.2485, "step": 455970 }, { "epoch": 0.9211084491166264, "grad_norm": 271.7082824707031, "learning_rate": 2.3425780149803623e-07, "loss": 7.5798, "step": 455980 }, { "epoch": 0.9211286497493102, "grad_norm": 312.15020751953125, "learning_rate": 2.3415221965256807e-07, "loss": 25.5096, "step": 455990 }, { "epoch": 0.921148850381994, "grad_norm": 6.742545127868652, "learning_rate": 2.3404666103526542e-07, "loss": 19.401, "step": 456000 }, { "epoch": 0.9211690510146778, "grad_norm": 75.15931701660156, "learning_rate": 2.3394112564664062e-07, "loss": 10.5847, "step": 456010 }, { "epoch": 0.9211892516473615, "grad_norm": 522.3594970703125, "learning_rate": 2.338356134872083e-07, "loss": 19.4345, "step": 456020 }, { "epoch": 0.9212094522800454, "grad_norm": 394.4520263671875, "learning_rate": 2.3373012455748356e-07, "loss": 22.1055, "step": 456030 }, { "epoch": 0.9212296529127292, "grad_norm": 343.9664611816406, "learning_rate": 2.3362465885798046e-07, "loss": 19.8991, "step": 456040 }, { "epoch": 0.921249853545413, "grad_norm": 0.0, "learning_rate": 2.3351921638921193e-07, "loss": 17.1099, "step": 456050 }, { "epoch": 0.9212700541780968, "grad_norm": 111.21340942382812, "learning_rate": 2.3341379715169254e-07, "loss": 9.5939, "step": 456060 }, { "epoch": 0.9212902548107806, "grad_norm": 159.31533813476562, "learning_rate": 2.33308401145938e-07, "loss": 6.9512, "step": 456070 }, { "epoch": 0.9213104554434645, "grad_norm": 552.3495483398438, "learning_rate": 2.3320302837245846e-07, "loss": 19.0945, "step": 456080 }, { "epoch": 0.9213306560761483, "grad_norm": 248.4833984375, "learning_rate": 2.3309767883176903e-07, "loss": 13.281, "step": 456090 }, { "epoch": 0.9213508567088321, "grad_norm": 290.37841796875, "learning_rate": 2.3299235252438434e-07, "loss": 33.2221, "step": 456100 }, { "epoch": 0.9213710573415159, "grad_norm": 402.7010803222656, "learning_rate": 2.3288704945081675e-07, "loss": 17.8335, "step": 456110 }, { "epoch": 0.9213912579741997, "grad_norm": 406.3812561035156, "learning_rate": 2.327817696115786e-07, "loss": 22.4548, "step": 456120 }, { "epoch": 0.9214114586068836, "grad_norm": 246.31358337402344, "learning_rate": 2.3267651300718397e-07, "loss": 12.0949, "step": 456130 }, { "epoch": 0.9214316592395674, "grad_norm": 322.2605285644531, "learning_rate": 2.325712796381474e-07, "loss": 9.4732, "step": 456140 }, { "epoch": 0.9214518598722512, "grad_norm": 855.1376953125, "learning_rate": 2.3246606950497851e-07, "loss": 16.9807, "step": 456150 }, { "epoch": 0.921472060504935, "grad_norm": 379.2497253417969, "learning_rate": 2.3236088260819188e-07, "loss": 17.8963, "step": 456160 }, { "epoch": 0.9214922611376188, "grad_norm": 177.31777954101562, "learning_rate": 2.3225571894830047e-07, "loss": 31.2069, "step": 456170 }, { "epoch": 0.9215124617703027, "grad_norm": 412.48968505859375, "learning_rate": 2.3215057852581712e-07, "loss": 11.0542, "step": 456180 }, { "epoch": 0.9215326624029865, "grad_norm": 343.11834716796875, "learning_rate": 2.3204546134125207e-07, "loss": 14.3232, "step": 456190 }, { "epoch": 0.9215528630356703, "grad_norm": 515.6344604492188, "learning_rate": 2.319403673951204e-07, "loss": 31.6484, "step": 456200 }, { "epoch": 0.9215730636683541, "grad_norm": 183.17941284179688, "learning_rate": 2.3183529668793282e-07, "loss": 15.7709, "step": 456210 }, { "epoch": 0.9215932643010379, "grad_norm": 585.870849609375, "learning_rate": 2.3173024922020114e-07, "loss": 28.472, "step": 456220 }, { "epoch": 0.9216134649337218, "grad_norm": 326.94207763671875, "learning_rate": 2.3162522499243833e-07, "loss": 17.6644, "step": 456230 }, { "epoch": 0.9216336655664056, "grad_norm": 14.029626846313477, "learning_rate": 2.3152022400515561e-07, "loss": 10.2781, "step": 456240 }, { "epoch": 0.9216538661990894, "grad_norm": 214.8909454345703, "learning_rate": 2.314152462588659e-07, "loss": 14.2531, "step": 456250 }, { "epoch": 0.9216740668317732, "grad_norm": 210.4156951904297, "learning_rate": 2.3131029175407883e-07, "loss": 16.5687, "step": 456260 }, { "epoch": 0.921694267464457, "grad_norm": 186.1177215576172, "learning_rate": 2.3120536049130727e-07, "loss": 14.2814, "step": 456270 }, { "epoch": 0.9217144680971407, "grad_norm": 497.6822509765625, "learning_rate": 2.3110045247106305e-07, "loss": 17.227, "step": 456280 }, { "epoch": 0.9217346687298246, "grad_norm": 189.3236083984375, "learning_rate": 2.3099556769385578e-07, "loss": 19.3881, "step": 456290 }, { "epoch": 0.9217548693625084, "grad_norm": 373.1511535644531, "learning_rate": 2.3089070616019838e-07, "loss": 24.6633, "step": 456300 }, { "epoch": 0.9217750699951922, "grad_norm": 139.13961791992188, "learning_rate": 2.3078586787060098e-07, "loss": 18.6484, "step": 456310 }, { "epoch": 0.921795270627876, "grad_norm": 191.7769775390625, "learning_rate": 2.306810528255754e-07, "loss": 20.177, "step": 456320 }, { "epoch": 0.9218154712605598, "grad_norm": 216.1976776123047, "learning_rate": 2.3057626102563125e-07, "loss": 18.3794, "step": 456330 }, { "epoch": 0.9218356718932437, "grad_norm": 235.76002502441406, "learning_rate": 2.3047149247127975e-07, "loss": 18.2839, "step": 456340 }, { "epoch": 0.9218558725259275, "grad_norm": 203.26577758789062, "learning_rate": 2.3036674716303277e-07, "loss": 10.951, "step": 456350 }, { "epoch": 0.9218760731586113, "grad_norm": 175.94342041015625, "learning_rate": 2.3026202510139928e-07, "loss": 9.0335, "step": 456360 }, { "epoch": 0.9218962737912951, "grad_norm": 116.4832992553711, "learning_rate": 2.3015732628688948e-07, "loss": 19.4607, "step": 456370 }, { "epoch": 0.921916474423979, "grad_norm": 109.354248046875, "learning_rate": 2.300526507200146e-07, "loss": 21.0007, "step": 456380 }, { "epoch": 0.9219366750566628, "grad_norm": 153.26980590820312, "learning_rate": 2.2994799840128533e-07, "loss": 12.0502, "step": 456390 }, { "epoch": 0.9219568756893466, "grad_norm": 277.7088623046875, "learning_rate": 2.2984336933121076e-07, "loss": 19.344, "step": 456400 }, { "epoch": 0.9219770763220304, "grad_norm": 222.34278869628906, "learning_rate": 2.2973876351030046e-07, "loss": 19.7468, "step": 456410 }, { "epoch": 0.9219972769547142, "grad_norm": 117.16346740722656, "learning_rate": 2.2963418093906453e-07, "loss": 11.0947, "step": 456420 }, { "epoch": 0.922017477587398, "grad_norm": 273.9718322753906, "learning_rate": 2.2952962161801485e-07, "loss": 20.6282, "step": 456430 }, { "epoch": 0.9220376782200819, "grad_norm": 423.72808837890625, "learning_rate": 2.2942508554765764e-07, "loss": 25.9765, "step": 456440 }, { "epoch": 0.9220578788527657, "grad_norm": 286.5534973144531, "learning_rate": 2.2932057272850416e-07, "loss": 20.3886, "step": 456450 }, { "epoch": 0.9220780794854495, "grad_norm": 265.9811706542969, "learning_rate": 2.2921608316106402e-07, "loss": 13.5875, "step": 456460 }, { "epoch": 0.9220982801181333, "grad_norm": 275.2646789550781, "learning_rate": 2.2911161684584626e-07, "loss": 15.3887, "step": 456470 }, { "epoch": 0.9221184807508171, "grad_norm": 626.2711791992188, "learning_rate": 2.290071737833588e-07, "loss": 12.1256, "step": 456480 }, { "epoch": 0.922138681383501, "grad_norm": 791.0081787109375, "learning_rate": 2.2890275397411288e-07, "loss": 26.3437, "step": 456490 }, { "epoch": 0.9221588820161848, "grad_norm": 203.3986358642578, "learning_rate": 2.287983574186159e-07, "loss": 17.2147, "step": 456500 }, { "epoch": 0.9221790826488686, "grad_norm": 125.64102935791016, "learning_rate": 2.2869398411737687e-07, "loss": 9.2064, "step": 456510 }, { "epoch": 0.9221992832815524, "grad_norm": 374.40863037109375, "learning_rate": 2.2858963407090484e-07, "loss": 11.6159, "step": 456520 }, { "epoch": 0.9222194839142362, "grad_norm": 126.5657958984375, "learning_rate": 2.2848530727970775e-07, "loss": 14.4298, "step": 456530 }, { "epoch": 0.92223968454692, "grad_norm": 201.17803955078125, "learning_rate": 2.2838100374429518e-07, "loss": 25.9431, "step": 456540 }, { "epoch": 0.9222598851796038, "grad_norm": 388.0455322265625, "learning_rate": 2.2827672346517448e-07, "loss": 18.5784, "step": 456550 }, { "epoch": 0.9222800858122876, "grad_norm": 192.1857452392578, "learning_rate": 2.2817246644285472e-07, "loss": 19.548, "step": 456560 }, { "epoch": 0.9223002864449714, "grad_norm": 357.21453857421875, "learning_rate": 2.2806823267784327e-07, "loss": 10.3948, "step": 456570 }, { "epoch": 0.9223204870776552, "grad_norm": 462.7223815917969, "learning_rate": 2.2796402217064806e-07, "loss": 42.9697, "step": 456580 }, { "epoch": 0.9223406877103391, "grad_norm": 94.40213775634766, "learning_rate": 2.2785983492177867e-07, "loss": 17.3635, "step": 456590 }, { "epoch": 0.9223608883430229, "grad_norm": 312.3440246582031, "learning_rate": 2.2775567093174022e-07, "loss": 37.7345, "step": 456600 }, { "epoch": 0.9223810889757067, "grad_norm": 353.4170227050781, "learning_rate": 2.2765153020104292e-07, "loss": 17.2546, "step": 456610 }, { "epoch": 0.9224012896083905, "grad_norm": 107.74503326416016, "learning_rate": 2.27547412730193e-07, "loss": 22.4956, "step": 456620 }, { "epoch": 0.9224214902410743, "grad_norm": 261.94525146484375, "learning_rate": 2.274433185196978e-07, "loss": 25.4751, "step": 456630 }, { "epoch": 0.9224416908737582, "grad_norm": 74.53716278076172, "learning_rate": 2.2733924757006531e-07, "loss": 17.1028, "step": 456640 }, { "epoch": 0.922461891506442, "grad_norm": 199.27627563476562, "learning_rate": 2.2723519988180232e-07, "loss": 24.8385, "step": 456650 }, { "epoch": 0.9224820921391258, "grad_norm": 261.4853515625, "learning_rate": 2.2713117545541618e-07, "loss": 8.0366, "step": 456660 }, { "epoch": 0.9225022927718096, "grad_norm": 293.28741455078125, "learning_rate": 2.270271742914132e-07, "loss": 22.0636, "step": 456670 }, { "epoch": 0.9225224934044934, "grad_norm": 206.2681121826172, "learning_rate": 2.269231963903018e-07, "loss": 13.1123, "step": 456680 }, { "epoch": 0.9225426940371773, "grad_norm": 242.92726135253906, "learning_rate": 2.2681924175258773e-07, "loss": 12.7175, "step": 456690 }, { "epoch": 0.9225628946698611, "grad_norm": 190.275390625, "learning_rate": 2.2671531037877724e-07, "loss": 7.0395, "step": 456700 }, { "epoch": 0.9225830953025449, "grad_norm": 184.04701232910156, "learning_rate": 2.2661140226937773e-07, "loss": 17.0297, "step": 456710 }, { "epoch": 0.9226032959352287, "grad_norm": 282.6993408203125, "learning_rate": 2.2650751742489542e-07, "loss": 17.5334, "step": 456720 }, { "epoch": 0.9226234965679125, "grad_norm": 478.75469970703125, "learning_rate": 2.2640365584583602e-07, "loss": 14.0422, "step": 456730 }, { "epoch": 0.9226436972005964, "grad_norm": 299.892333984375, "learning_rate": 2.2629981753270636e-07, "loss": 25.043, "step": 456740 }, { "epoch": 0.9226638978332802, "grad_norm": 441.7330322265625, "learning_rate": 2.2619600248601327e-07, "loss": 15.7347, "step": 456750 }, { "epoch": 0.922684098465964, "grad_norm": 244.79605102539062, "learning_rate": 2.2609221070626132e-07, "loss": 21.4933, "step": 456760 }, { "epoch": 0.9227042990986478, "grad_norm": 347.6989440917969, "learning_rate": 2.259884421939562e-07, "loss": 15.3209, "step": 456770 }, { "epoch": 0.9227244997313316, "grad_norm": 708.950439453125, "learning_rate": 2.2588469694960535e-07, "loss": 19.6421, "step": 456780 }, { "epoch": 0.9227447003640153, "grad_norm": 401.40875244140625, "learning_rate": 2.2578097497371333e-07, "loss": 13.2815, "step": 456790 }, { "epoch": 0.9227649009966992, "grad_norm": 156.8737335205078, "learning_rate": 2.2567727626678527e-07, "loss": 15.3242, "step": 456800 }, { "epoch": 0.922785101629383, "grad_norm": 314.9941101074219, "learning_rate": 2.2557360082932745e-07, "loss": 21.0692, "step": 456810 }, { "epoch": 0.9228053022620668, "grad_norm": 317.0625, "learning_rate": 2.2546994866184557e-07, "loss": 13.397, "step": 456820 }, { "epoch": 0.9228255028947506, "grad_norm": 192.15399169921875, "learning_rate": 2.253663197648426e-07, "loss": 20.4725, "step": 456830 }, { "epoch": 0.9228457035274344, "grad_norm": 204.11090087890625, "learning_rate": 2.2526271413882528e-07, "loss": 11.4744, "step": 456840 }, { "epoch": 0.9228659041601183, "grad_norm": 152.6605987548828, "learning_rate": 2.2515913178429937e-07, "loss": 15.3138, "step": 456850 }, { "epoch": 0.9228861047928021, "grad_norm": 496.0419921875, "learning_rate": 2.2505557270176837e-07, "loss": 21.9908, "step": 456860 }, { "epoch": 0.9229063054254859, "grad_norm": 282.78680419921875, "learning_rate": 2.249520368917374e-07, "loss": 16.0992, "step": 456870 }, { "epoch": 0.9229265060581697, "grad_norm": 581.7693481445312, "learning_rate": 2.2484852435471106e-07, "loss": 23.6697, "step": 456880 }, { "epoch": 0.9229467066908535, "grad_norm": 118.70586395263672, "learning_rate": 2.2474503509119394e-07, "loss": 16.9308, "step": 456890 }, { "epoch": 0.9229669073235374, "grad_norm": 466.8150939941406, "learning_rate": 2.2464156910168954e-07, "loss": 20.8201, "step": 456900 }, { "epoch": 0.9229871079562212, "grad_norm": 70.92750549316406, "learning_rate": 2.2453812638670413e-07, "loss": 15.4252, "step": 456910 }, { "epoch": 0.923007308588905, "grad_norm": 290.128173828125, "learning_rate": 2.2443470694673953e-07, "loss": 12.7111, "step": 456920 }, { "epoch": 0.9230275092215888, "grad_norm": 381.5926818847656, "learning_rate": 2.2433131078230196e-07, "loss": 28.5388, "step": 456930 }, { "epoch": 0.9230477098542726, "grad_norm": 381.0793151855469, "learning_rate": 2.242279378938944e-07, "loss": 19.7802, "step": 456940 }, { "epoch": 0.9230679104869565, "grad_norm": 345.0141296386719, "learning_rate": 2.2412458828201977e-07, "loss": 20.9494, "step": 456950 }, { "epoch": 0.9230881111196403, "grad_norm": 44.365257263183594, "learning_rate": 2.2402126194718322e-07, "loss": 14.3114, "step": 456960 }, { "epoch": 0.9231083117523241, "grad_norm": 308.3829345703125, "learning_rate": 2.2391795888988822e-07, "loss": 26.8005, "step": 456970 }, { "epoch": 0.9231285123850079, "grad_norm": 0.0, "learning_rate": 2.2381467911063658e-07, "loss": 29.0832, "step": 456980 }, { "epoch": 0.9231487130176917, "grad_norm": 233.3600311279297, "learning_rate": 2.237114226099335e-07, "loss": 39.1211, "step": 456990 }, { "epoch": 0.9231689136503756, "grad_norm": 336.5529479980469, "learning_rate": 2.2360818938828189e-07, "loss": 21.1256, "step": 457000 }, { "epoch": 0.9231891142830594, "grad_norm": 145.19363403320312, "learning_rate": 2.2350497944618466e-07, "loss": 15.1682, "step": 457010 }, { "epoch": 0.9232093149157432, "grad_norm": 334.2428894042969, "learning_rate": 2.234017927841442e-07, "loss": 20.9393, "step": 457020 }, { "epoch": 0.923229515548427, "grad_norm": 94.92315673828125, "learning_rate": 2.2329862940266511e-07, "loss": 19.3263, "step": 457030 }, { "epoch": 0.9232497161811108, "grad_norm": 169.41757202148438, "learning_rate": 2.2319548930224865e-07, "loss": 8.1505, "step": 457040 }, { "epoch": 0.9232699168137946, "grad_norm": 308.775390625, "learning_rate": 2.2309237248339776e-07, "loss": 17.2342, "step": 457050 }, { "epoch": 0.9232901174464784, "grad_norm": 247.99966430664062, "learning_rate": 2.2298927894661481e-07, "loss": 17.2972, "step": 457060 }, { "epoch": 0.9233103180791622, "grad_norm": 348.5246887207031, "learning_rate": 2.2288620869240384e-07, "loss": 22.9325, "step": 457070 }, { "epoch": 0.923330518711846, "grad_norm": 245.3953094482422, "learning_rate": 2.2278316172126612e-07, "loss": 16.3009, "step": 457080 }, { "epoch": 0.9233507193445298, "grad_norm": 1451.7257080078125, "learning_rate": 2.2268013803370292e-07, "loss": 29.4351, "step": 457090 }, { "epoch": 0.9233709199772137, "grad_norm": 274.67156982421875, "learning_rate": 2.2257713763021826e-07, "loss": 22.262, "step": 457100 }, { "epoch": 0.9233911206098975, "grad_norm": 184.2248992919922, "learning_rate": 2.2247416051131288e-07, "loss": 14.3081, "step": 457110 }, { "epoch": 0.9234113212425813, "grad_norm": 124.99649810791016, "learning_rate": 2.2237120667748856e-07, "loss": 14.8649, "step": 457120 }, { "epoch": 0.9234315218752651, "grad_norm": 208.64932250976562, "learning_rate": 2.2226827612924774e-07, "loss": 19.1672, "step": 457130 }, { "epoch": 0.9234517225079489, "grad_norm": 72.52130889892578, "learning_rate": 2.221653688670916e-07, "loss": 17.0607, "step": 457140 }, { "epoch": 0.9234719231406328, "grad_norm": 278.6165466308594, "learning_rate": 2.220624848915226e-07, "loss": 19.9385, "step": 457150 }, { "epoch": 0.9234921237733166, "grad_norm": 160.53811645507812, "learning_rate": 2.2195962420304083e-07, "loss": 15.4356, "step": 457160 }, { "epoch": 0.9235123244060004, "grad_norm": 363.5828857421875, "learning_rate": 2.2185678680214927e-07, "loss": 23.8777, "step": 457170 }, { "epoch": 0.9235325250386842, "grad_norm": 270.5760498046875, "learning_rate": 2.2175397268934807e-07, "loss": 20.7764, "step": 457180 }, { "epoch": 0.923552725671368, "grad_norm": 77.8431167602539, "learning_rate": 2.216511818651379e-07, "loss": 17.9534, "step": 457190 }, { "epoch": 0.9235729263040519, "grad_norm": 148.47154235839844, "learning_rate": 2.2154841433002062e-07, "loss": 12.5748, "step": 457200 }, { "epoch": 0.9235931269367357, "grad_norm": 401.5377197265625, "learning_rate": 2.2144567008449636e-07, "loss": 25.8941, "step": 457210 }, { "epoch": 0.9236133275694195, "grad_norm": 362.03363037109375, "learning_rate": 2.2134294912906696e-07, "loss": 24.5876, "step": 457220 }, { "epoch": 0.9236335282021033, "grad_norm": 390.0622253417969, "learning_rate": 2.2124025146423255e-07, "loss": 15.1685, "step": 457230 }, { "epoch": 0.9236537288347871, "grad_norm": 0.0, "learning_rate": 2.2113757709049277e-07, "loss": 10.7024, "step": 457240 }, { "epoch": 0.923673929467471, "grad_norm": 388.9189758300781, "learning_rate": 2.210349260083494e-07, "loss": 19.8318, "step": 457250 }, { "epoch": 0.9236941301001548, "grad_norm": 453.5459899902344, "learning_rate": 2.2093229821830263e-07, "loss": 12.7006, "step": 457260 }, { "epoch": 0.9237143307328386, "grad_norm": 196.50888061523438, "learning_rate": 2.208296937208515e-07, "loss": 8.2976, "step": 457270 }, { "epoch": 0.9237345313655224, "grad_norm": 334.31744384765625, "learning_rate": 2.2072711251649615e-07, "loss": 8.1786, "step": 457280 }, { "epoch": 0.9237547319982062, "grad_norm": 406.2618713378906, "learning_rate": 2.2062455460573838e-07, "loss": 20.883, "step": 457290 }, { "epoch": 0.9237749326308899, "grad_norm": 364.6490173339844, "learning_rate": 2.2052201998907673e-07, "loss": 15.8866, "step": 457300 }, { "epoch": 0.9237951332635738, "grad_norm": 363.32379150390625, "learning_rate": 2.2041950866701078e-07, "loss": 30.2554, "step": 457310 }, { "epoch": 0.9238153338962576, "grad_norm": 110.7328872680664, "learning_rate": 2.2031702064004067e-07, "loss": 12.139, "step": 457320 }, { "epoch": 0.9238355345289414, "grad_norm": 303.4820556640625, "learning_rate": 2.2021455590866546e-07, "loss": 26.7076, "step": 457330 }, { "epoch": 0.9238557351616252, "grad_norm": 38.45009994506836, "learning_rate": 2.2011211447338477e-07, "loss": 19.7323, "step": 457340 }, { "epoch": 0.923875935794309, "grad_norm": 771.2612915039062, "learning_rate": 2.200096963346976e-07, "loss": 23.089, "step": 457350 }, { "epoch": 0.9238961364269929, "grad_norm": 284.0710754394531, "learning_rate": 2.199073014931047e-07, "loss": 15.2532, "step": 457360 }, { "epoch": 0.9239163370596767, "grad_norm": 25.208005905151367, "learning_rate": 2.198049299491023e-07, "loss": 16.2739, "step": 457370 }, { "epoch": 0.9239365376923605, "grad_norm": 164.5516357421875, "learning_rate": 2.1970258170319114e-07, "loss": 7.5858, "step": 457380 }, { "epoch": 0.9239567383250443, "grad_norm": 163.2032012939453, "learning_rate": 2.1960025675587082e-07, "loss": 10.7046, "step": 457390 }, { "epoch": 0.9239769389577281, "grad_norm": 203.40997314453125, "learning_rate": 2.1949795510763872e-07, "loss": 14.4185, "step": 457400 }, { "epoch": 0.923997139590412, "grad_norm": 430.99896240234375, "learning_rate": 2.1939567675899333e-07, "loss": 25.7025, "step": 457410 }, { "epoch": 0.9240173402230958, "grad_norm": 367.4797058105469, "learning_rate": 2.1929342171043366e-07, "loss": 32.4341, "step": 457420 }, { "epoch": 0.9240375408557796, "grad_norm": 156.14569091796875, "learning_rate": 2.191911899624588e-07, "loss": 26.5189, "step": 457430 }, { "epoch": 0.9240577414884634, "grad_norm": 296.7413330078125, "learning_rate": 2.1908898151556502e-07, "loss": 16.9909, "step": 457440 }, { "epoch": 0.9240779421211472, "grad_norm": 399.5950012207031, "learning_rate": 2.189867963702519e-07, "loss": 17.4734, "step": 457450 }, { "epoch": 0.9240981427538311, "grad_norm": 264.9706115722656, "learning_rate": 2.188846345270179e-07, "loss": 16.9213, "step": 457460 }, { "epoch": 0.9241183433865149, "grad_norm": 265.282958984375, "learning_rate": 2.1878249598636047e-07, "loss": 13.9011, "step": 457470 }, { "epoch": 0.9241385440191987, "grad_norm": 45.12477493286133, "learning_rate": 2.186803807487764e-07, "loss": 13.6294, "step": 457480 }, { "epoch": 0.9241587446518825, "grad_norm": 412.4893798828125, "learning_rate": 2.1857828881476472e-07, "loss": 24.5977, "step": 457490 }, { "epoch": 0.9241789452845663, "grad_norm": 245.3011474609375, "learning_rate": 2.1847622018482283e-07, "loss": 18.715, "step": 457500 }, { "epoch": 0.9241991459172502, "grad_norm": 393.8460998535156, "learning_rate": 2.1837417485944755e-07, "loss": 20.525, "step": 457510 }, { "epoch": 0.924219346549934, "grad_norm": 929.6036376953125, "learning_rate": 2.1827215283913683e-07, "loss": 34.2877, "step": 457520 }, { "epoch": 0.9242395471826178, "grad_norm": 518.9688720703125, "learning_rate": 2.1817015412438692e-07, "loss": 30.2135, "step": 457530 }, { "epoch": 0.9242597478153016, "grad_norm": 453.6609191894531, "learning_rate": 2.1806817871569686e-07, "loss": 26.5946, "step": 457540 }, { "epoch": 0.9242799484479854, "grad_norm": 221.75369262695312, "learning_rate": 2.1796622661356238e-07, "loss": 25.0084, "step": 457550 }, { "epoch": 0.9243001490806692, "grad_norm": 132.42124938964844, "learning_rate": 2.1786429781847972e-07, "loss": 22.873, "step": 457560 }, { "epoch": 0.924320349713353, "grad_norm": 391.724365234375, "learning_rate": 2.1776239233094687e-07, "loss": 11.0618, "step": 457570 }, { "epoch": 0.9243405503460368, "grad_norm": 198.1086883544922, "learning_rate": 2.176605101514606e-07, "loss": 25.2933, "step": 457580 }, { "epoch": 0.9243607509787206, "grad_norm": 670.4555053710938, "learning_rate": 2.175586512805161e-07, "loss": 28.8527, "step": 457590 }, { "epoch": 0.9243809516114044, "grad_norm": 295.0966491699219, "learning_rate": 2.174568157186102e-07, "loss": 18.4053, "step": 457600 }, { "epoch": 0.9244011522440883, "grad_norm": 1169.2147216796875, "learning_rate": 2.1735500346624083e-07, "loss": 17.9884, "step": 457610 }, { "epoch": 0.9244213528767721, "grad_norm": 340.5728759765625, "learning_rate": 2.1725321452390314e-07, "loss": 15.4388, "step": 457620 }, { "epoch": 0.9244415535094559, "grad_norm": 235.09950256347656, "learning_rate": 2.1715144889209284e-07, "loss": 19.7492, "step": 457630 }, { "epoch": 0.9244617541421397, "grad_norm": 302.98150634765625, "learning_rate": 2.1704970657130675e-07, "loss": 13.1334, "step": 457640 }, { "epoch": 0.9244819547748235, "grad_norm": 224.8277587890625, "learning_rate": 2.1694798756204005e-07, "loss": 10.285, "step": 457650 }, { "epoch": 0.9245021554075074, "grad_norm": 438.7237854003906, "learning_rate": 2.1684629186478846e-07, "loss": 13.7399, "step": 457660 }, { "epoch": 0.9245223560401912, "grad_norm": 240.50230407714844, "learning_rate": 2.1674461948004766e-07, "loss": 15.1875, "step": 457670 }, { "epoch": 0.924542556672875, "grad_norm": 237.82205200195312, "learning_rate": 2.1664297040831394e-07, "loss": 12.1434, "step": 457680 }, { "epoch": 0.9245627573055588, "grad_norm": 8.642843246459961, "learning_rate": 2.1654134465008247e-07, "loss": 10.0281, "step": 457690 }, { "epoch": 0.9245829579382426, "grad_norm": 431.3695068359375, "learning_rate": 2.1643974220584729e-07, "loss": 24.1967, "step": 457700 }, { "epoch": 0.9246031585709265, "grad_norm": 150.09141540527344, "learning_rate": 2.1633816307610577e-07, "loss": 6.7439, "step": 457710 }, { "epoch": 0.9246233592036103, "grad_norm": 293.7038269042969, "learning_rate": 2.1623660726135197e-07, "loss": 19.2351, "step": 457720 }, { "epoch": 0.9246435598362941, "grad_norm": 79.47240447998047, "learning_rate": 2.161350747620794e-07, "loss": 13.3978, "step": 457730 }, { "epoch": 0.9246637604689779, "grad_norm": 193.63145446777344, "learning_rate": 2.1603356557878486e-07, "loss": 11.6504, "step": 457740 }, { "epoch": 0.9246839611016617, "grad_norm": 243.01817321777344, "learning_rate": 2.1593207971196296e-07, "loss": 15.138, "step": 457750 }, { "epoch": 0.9247041617343456, "grad_norm": 393.2857666015625, "learning_rate": 2.1583061716210774e-07, "loss": 11.4301, "step": 457760 }, { "epoch": 0.9247243623670294, "grad_norm": 105.41272735595703, "learning_rate": 2.1572917792971326e-07, "loss": 20.4093, "step": 457770 }, { "epoch": 0.9247445629997132, "grad_norm": 122.74444580078125, "learning_rate": 2.1562776201527525e-07, "loss": 12.8214, "step": 457780 }, { "epoch": 0.924764763632397, "grad_norm": 784.9725952148438, "learning_rate": 2.1552636941928717e-07, "loss": 17.4607, "step": 457790 }, { "epoch": 0.9247849642650808, "grad_norm": 472.62603759765625, "learning_rate": 2.154250001422431e-07, "loss": 19.6794, "step": 457800 }, { "epoch": 0.9248051648977647, "grad_norm": 208.9678192138672, "learning_rate": 2.1532365418463708e-07, "loss": 8.3243, "step": 457810 }, { "epoch": 0.9248253655304484, "grad_norm": 255.95599365234375, "learning_rate": 2.1522233154696314e-07, "loss": 11.1677, "step": 457820 }, { "epoch": 0.9248455661631322, "grad_norm": 196.7720489501953, "learning_rate": 2.151210322297159e-07, "loss": 20.5409, "step": 457830 }, { "epoch": 0.924865766795816, "grad_norm": 61.60475158691406, "learning_rate": 2.1501975623338833e-07, "loss": 12.5555, "step": 457840 }, { "epoch": 0.9248859674284998, "grad_norm": 143.92803955078125, "learning_rate": 2.1491850355847332e-07, "loss": 16.1914, "step": 457850 }, { "epoch": 0.9249061680611836, "grad_norm": 90.24784088134766, "learning_rate": 2.1481727420546605e-07, "loss": 7.4153, "step": 457860 }, { "epoch": 0.9249263686938675, "grad_norm": 307.8219909667969, "learning_rate": 2.147160681748589e-07, "loss": 20.5707, "step": 457870 }, { "epoch": 0.9249465693265513, "grad_norm": 204.69508361816406, "learning_rate": 2.1461488546714425e-07, "loss": 18.323, "step": 457880 }, { "epoch": 0.9249667699592351, "grad_norm": 204.1322021484375, "learning_rate": 2.1451372608281674e-07, "loss": 8.0543, "step": 457890 }, { "epoch": 0.9249869705919189, "grad_norm": 258.7735900878906, "learning_rate": 2.1441259002236924e-07, "loss": 17.6502, "step": 457900 }, { "epoch": 0.9250071712246027, "grad_norm": 178.43128967285156, "learning_rate": 2.1431147728629476e-07, "loss": 13.9585, "step": 457910 }, { "epoch": 0.9250273718572866, "grad_norm": 264.40045166015625, "learning_rate": 2.1421038787508508e-07, "loss": 13.8546, "step": 457920 }, { "epoch": 0.9250475724899704, "grad_norm": 193.4455108642578, "learning_rate": 2.1410932178923372e-07, "loss": 17.9401, "step": 457930 }, { "epoch": 0.9250677731226542, "grad_norm": 277.23077392578125, "learning_rate": 2.1400827902923304e-07, "loss": 12.5947, "step": 457940 }, { "epoch": 0.925087973755338, "grad_norm": 94.73963165283203, "learning_rate": 2.1390725959557546e-07, "loss": 15.1187, "step": 457950 }, { "epoch": 0.9251081743880218, "grad_norm": 228.7657012939453, "learning_rate": 2.1380626348875278e-07, "loss": 16.4715, "step": 457960 }, { "epoch": 0.9251283750207057, "grad_norm": 179.51947021484375, "learning_rate": 2.137052907092596e-07, "loss": 12.7447, "step": 457970 }, { "epoch": 0.9251485756533895, "grad_norm": 287.54949951171875, "learning_rate": 2.13604341257585e-07, "loss": 22.2403, "step": 457980 }, { "epoch": 0.9251687762860733, "grad_norm": 251.56422424316406, "learning_rate": 2.135034151342219e-07, "loss": 19.6641, "step": 457990 }, { "epoch": 0.9251889769187571, "grad_norm": 266.3399353027344, "learning_rate": 2.134025123396638e-07, "loss": 18.5667, "step": 458000 }, { "epoch": 0.9252091775514409, "grad_norm": 270.826904296875, "learning_rate": 2.1330163287440087e-07, "loss": 9.1103, "step": 458010 }, { "epoch": 0.9252293781841248, "grad_norm": 2.8657827377319336, "learning_rate": 2.1320077673892493e-07, "loss": 17.3413, "step": 458020 }, { "epoch": 0.9252495788168086, "grad_norm": 355.48699951171875, "learning_rate": 2.1309994393372836e-07, "loss": 19.1438, "step": 458030 }, { "epoch": 0.9252697794494924, "grad_norm": 15.83651351928711, "learning_rate": 2.1299913445930242e-07, "loss": 17.0119, "step": 458040 }, { "epoch": 0.9252899800821762, "grad_norm": 19.4829158782959, "learning_rate": 2.1289834831613675e-07, "loss": 13.3193, "step": 458050 }, { "epoch": 0.92531018071486, "grad_norm": 285.29827880859375, "learning_rate": 2.127975855047243e-07, "loss": 14.4128, "step": 458060 }, { "epoch": 0.9253303813475438, "grad_norm": 360.4769287109375, "learning_rate": 2.126968460255563e-07, "loss": 20.0909, "step": 458070 }, { "epoch": 0.9253505819802276, "grad_norm": 115.21932983398438, "learning_rate": 2.1259612987912348e-07, "loss": 48.6393, "step": 458080 }, { "epoch": 0.9253707826129114, "grad_norm": 185.64874267578125, "learning_rate": 2.1249543706591602e-07, "loss": 7.2848, "step": 458090 }, { "epoch": 0.9253909832455952, "grad_norm": 167.89633178710938, "learning_rate": 2.123947675864252e-07, "loss": 10.263, "step": 458100 }, { "epoch": 0.925411183878279, "grad_norm": 54.32759094238281, "learning_rate": 2.1229412144114225e-07, "loss": 12.2169, "step": 458110 }, { "epoch": 0.9254313845109629, "grad_norm": 273.45697021484375, "learning_rate": 2.121934986305557e-07, "loss": 10.3457, "step": 458120 }, { "epoch": 0.9254515851436467, "grad_norm": 154.4072265625, "learning_rate": 2.120928991551585e-07, "loss": 17.0332, "step": 458130 }, { "epoch": 0.9254717857763305, "grad_norm": 183.60134887695312, "learning_rate": 2.1199232301543915e-07, "loss": 20.2376, "step": 458140 }, { "epoch": 0.9254919864090143, "grad_norm": 152.31246948242188, "learning_rate": 2.1189177021188888e-07, "loss": 37.2988, "step": 458150 }, { "epoch": 0.9255121870416981, "grad_norm": 110.86585235595703, "learning_rate": 2.117912407449979e-07, "loss": 13.2511, "step": 458160 }, { "epoch": 0.925532387674382, "grad_norm": 131.7351531982422, "learning_rate": 2.116907346152547e-07, "loss": 13.2585, "step": 458170 }, { "epoch": 0.9255525883070658, "grad_norm": 154.50274658203125, "learning_rate": 2.1159025182315052e-07, "loss": 15.4772, "step": 458180 }, { "epoch": 0.9255727889397496, "grad_norm": 158.2553253173828, "learning_rate": 2.11489792369175e-07, "loss": 15.0562, "step": 458190 }, { "epoch": 0.9255929895724334, "grad_norm": 134.0874786376953, "learning_rate": 2.1138935625381663e-07, "loss": 16.9204, "step": 458200 }, { "epoch": 0.9256131902051172, "grad_norm": 400.1285400390625, "learning_rate": 2.1128894347756613e-07, "loss": 18.6318, "step": 458210 }, { "epoch": 0.925633390837801, "grad_norm": 300.58599853515625, "learning_rate": 2.1118855404091253e-07, "loss": 26.1327, "step": 458220 }, { "epoch": 0.9256535914704849, "grad_norm": 185.72238159179688, "learning_rate": 2.110881879443455e-07, "loss": 14.9031, "step": 458230 }, { "epoch": 0.9256737921031687, "grad_norm": 129.05772399902344, "learning_rate": 2.1098784518835292e-07, "loss": 18.2543, "step": 458240 }, { "epoch": 0.9256939927358525, "grad_norm": 520.874755859375, "learning_rate": 2.1088752577342607e-07, "loss": 28.8492, "step": 458250 }, { "epoch": 0.9257141933685363, "grad_norm": 834.5855712890625, "learning_rate": 2.1078722970005182e-07, "loss": 16.1469, "step": 458260 }, { "epoch": 0.9257343940012202, "grad_norm": 203.6182403564453, "learning_rate": 2.1068695696871922e-07, "loss": 13.5855, "step": 458270 }, { "epoch": 0.925754594633904, "grad_norm": 276.94000244140625, "learning_rate": 2.1058670757991783e-07, "loss": 17.2852, "step": 458280 }, { "epoch": 0.9257747952665878, "grad_norm": 299.1051025390625, "learning_rate": 2.104864815341362e-07, "loss": 17.8666, "step": 458290 }, { "epoch": 0.9257949958992716, "grad_norm": 154.6279296875, "learning_rate": 2.103862788318628e-07, "loss": 15.1453, "step": 458300 }, { "epoch": 0.9258151965319554, "grad_norm": 187.29559326171875, "learning_rate": 2.102860994735856e-07, "loss": 10.3892, "step": 458310 }, { "epoch": 0.9258353971646393, "grad_norm": 368.73345947265625, "learning_rate": 2.1018594345979305e-07, "loss": 20.8475, "step": 458320 }, { "epoch": 0.925855597797323, "grad_norm": 376.1015625, "learning_rate": 2.1008581079097312e-07, "loss": 21.4322, "step": 458330 }, { "epoch": 0.9258757984300068, "grad_norm": 327.84954833984375, "learning_rate": 2.0998570146761376e-07, "loss": 18.1575, "step": 458340 }, { "epoch": 0.9258959990626906, "grad_norm": 46.97186279296875, "learning_rate": 2.098856154902029e-07, "loss": 12.9281, "step": 458350 }, { "epoch": 0.9259161996953744, "grad_norm": 190.0801239013672, "learning_rate": 2.0978555285922963e-07, "loss": 32.2267, "step": 458360 }, { "epoch": 0.9259364003280582, "grad_norm": 76.44590759277344, "learning_rate": 2.0968551357518018e-07, "loss": 14.1983, "step": 458370 }, { "epoch": 0.9259566009607421, "grad_norm": 655.9341430664062, "learning_rate": 2.0958549763854196e-07, "loss": 24.3508, "step": 458380 }, { "epoch": 0.9259768015934259, "grad_norm": 199.93775939941406, "learning_rate": 2.0948550504980403e-07, "loss": 14.0939, "step": 458390 }, { "epoch": 0.9259970022261097, "grad_norm": 129.3128204345703, "learning_rate": 2.0938553580945208e-07, "loss": 23.2577, "step": 458400 }, { "epoch": 0.9260172028587935, "grad_norm": 328.13800048828125, "learning_rate": 2.092855899179741e-07, "loss": 9.1749, "step": 458410 }, { "epoch": 0.9260374034914773, "grad_norm": 220.08375549316406, "learning_rate": 2.0918566737585688e-07, "loss": 21.2348, "step": 458420 }, { "epoch": 0.9260576041241612, "grad_norm": 157.90724182128906, "learning_rate": 2.0908576818358783e-07, "loss": 13.8015, "step": 458430 }, { "epoch": 0.926077804756845, "grad_norm": 161.1211700439453, "learning_rate": 2.0898589234165378e-07, "loss": 13.2482, "step": 458440 }, { "epoch": 0.9260980053895288, "grad_norm": 401.2912902832031, "learning_rate": 2.0888603985054156e-07, "loss": 20.68, "step": 458450 }, { "epoch": 0.9261182060222126, "grad_norm": 231.30166625976562, "learning_rate": 2.0878621071073745e-07, "loss": 10.3691, "step": 458460 }, { "epoch": 0.9261384066548964, "grad_norm": 250.1029052734375, "learning_rate": 2.086864049227283e-07, "loss": 15.4583, "step": 458470 }, { "epoch": 0.9261586072875803, "grad_norm": 475.3943786621094, "learning_rate": 2.085866224870009e-07, "loss": 11.1181, "step": 458480 }, { "epoch": 0.9261788079202641, "grad_norm": 300.5265808105469, "learning_rate": 2.0848686340404045e-07, "loss": 32.7259, "step": 458490 }, { "epoch": 0.9261990085529479, "grad_norm": 225.87728881835938, "learning_rate": 2.083871276743338e-07, "loss": 17.3624, "step": 458500 }, { "epoch": 0.9262192091856317, "grad_norm": 289.4924621582031, "learning_rate": 2.0828741529836771e-07, "loss": 16.8729, "step": 458510 }, { "epoch": 0.9262394098183155, "grad_norm": 9.950053215026855, "learning_rate": 2.0818772627662743e-07, "loss": 12.1771, "step": 458520 }, { "epoch": 0.9262596104509994, "grad_norm": 7.057925701141357, "learning_rate": 2.0808806060959864e-07, "loss": 27.0788, "step": 458530 }, { "epoch": 0.9262798110836832, "grad_norm": 281.7425842285156, "learning_rate": 2.0798841829776816e-07, "loss": 10.2757, "step": 458540 }, { "epoch": 0.926300011716367, "grad_norm": 192.39663696289062, "learning_rate": 2.0788879934162064e-07, "loss": 17.1062, "step": 458550 }, { "epoch": 0.9263202123490508, "grad_norm": 190.81417846679688, "learning_rate": 2.077892037416418e-07, "loss": 15.5512, "step": 458560 }, { "epoch": 0.9263404129817346, "grad_norm": 244.1360626220703, "learning_rate": 2.0768963149831678e-07, "loss": 13.6383, "step": 458570 }, { "epoch": 0.9263606136144183, "grad_norm": 275.1950378417969, "learning_rate": 2.0759008261213242e-07, "loss": 15.0651, "step": 458580 }, { "epoch": 0.9263808142471022, "grad_norm": 158.13926696777344, "learning_rate": 2.0749055708357168e-07, "loss": 19.8864, "step": 458590 }, { "epoch": 0.926401014879786, "grad_norm": 256.86767578125, "learning_rate": 2.0739105491312028e-07, "loss": 17.6625, "step": 458600 }, { "epoch": 0.9264212155124698, "grad_norm": 722.1422119140625, "learning_rate": 2.0729157610126448e-07, "loss": 25.766, "step": 458610 }, { "epoch": 0.9264414161451536, "grad_norm": 141.5935516357422, "learning_rate": 2.0719212064848838e-07, "loss": 19.278, "step": 458620 }, { "epoch": 0.9264616167778374, "grad_norm": 353.7138671875, "learning_rate": 2.07092688555276e-07, "loss": 17.0386, "step": 458630 }, { "epoch": 0.9264818174105213, "grad_norm": 118.1501235961914, "learning_rate": 2.0699327982211304e-07, "loss": 13.3947, "step": 458640 }, { "epoch": 0.9265020180432051, "grad_norm": 270.81756591796875, "learning_rate": 2.068938944494836e-07, "loss": 21.4207, "step": 458650 }, { "epoch": 0.9265222186758889, "grad_norm": 610.9850463867188, "learning_rate": 2.0679453243787174e-07, "loss": 12.7193, "step": 458660 }, { "epoch": 0.9265424193085727, "grad_norm": 0.0, "learning_rate": 2.0669519378776147e-07, "loss": 15.17, "step": 458670 }, { "epoch": 0.9265626199412565, "grad_norm": 285.80963134765625, "learning_rate": 2.0659587849963801e-07, "loss": 19.3955, "step": 458680 }, { "epoch": 0.9265828205739404, "grad_norm": 300.5206298828125, "learning_rate": 2.0649658657398487e-07, "loss": 11.1576, "step": 458690 }, { "epoch": 0.9266030212066242, "grad_norm": 416.0434265136719, "learning_rate": 2.0639731801128603e-07, "loss": 19.9857, "step": 458700 }, { "epoch": 0.926623221839308, "grad_norm": 319.1828918457031, "learning_rate": 2.0629807281202508e-07, "loss": 20.9035, "step": 458710 }, { "epoch": 0.9266434224719918, "grad_norm": 280.5700378417969, "learning_rate": 2.0619885097668658e-07, "loss": 10.4198, "step": 458720 }, { "epoch": 0.9266636231046756, "grad_norm": 367.6854248046875, "learning_rate": 2.0609965250575237e-07, "loss": 20.3417, "step": 458730 }, { "epoch": 0.9266838237373595, "grad_norm": 4.055483818054199, "learning_rate": 2.0600047739970762e-07, "loss": 10.6959, "step": 458740 }, { "epoch": 0.9267040243700433, "grad_norm": 157.8720245361328, "learning_rate": 2.0590132565903475e-07, "loss": 11.8822, "step": 458750 }, { "epoch": 0.9267242250027271, "grad_norm": 376.6069641113281, "learning_rate": 2.058021972842178e-07, "loss": 17.6613, "step": 458760 }, { "epoch": 0.9267444256354109, "grad_norm": 472.8377685546875, "learning_rate": 2.057030922757397e-07, "loss": 15.236, "step": 458770 }, { "epoch": 0.9267646262680947, "grad_norm": 222.2113494873047, "learning_rate": 2.056040106340823e-07, "loss": 10.7457, "step": 458780 }, { "epoch": 0.9267848269007786, "grad_norm": 48.7750244140625, "learning_rate": 2.0550495235973023e-07, "loss": 11.8005, "step": 458790 }, { "epoch": 0.9268050275334624, "grad_norm": 192.03140258789062, "learning_rate": 2.054059174531653e-07, "loss": 23.3923, "step": 458800 }, { "epoch": 0.9268252281661462, "grad_norm": 143.8888702392578, "learning_rate": 2.0530690591487047e-07, "loss": 18.0165, "step": 458810 }, { "epoch": 0.92684542879883, "grad_norm": 257.7686462402344, "learning_rate": 2.0520791774532757e-07, "loss": 18.7763, "step": 458820 }, { "epoch": 0.9268656294315138, "grad_norm": 308.28472900390625, "learning_rate": 2.0510895294502066e-07, "loss": 14.1266, "step": 458830 }, { "epoch": 0.9268858300641976, "grad_norm": 291.55487060546875, "learning_rate": 2.0501001151443156e-07, "loss": 16.9696, "step": 458840 }, { "epoch": 0.9269060306968814, "grad_norm": 255.02993774414062, "learning_rate": 2.0491109345404102e-07, "loss": 14.6789, "step": 458850 }, { "epoch": 0.9269262313295652, "grad_norm": 519.9100341796875, "learning_rate": 2.0481219876433257e-07, "loss": 19.5401, "step": 458860 }, { "epoch": 0.926946431962249, "grad_norm": 301.23931884765625, "learning_rate": 2.0471332744578853e-07, "loss": 25.7286, "step": 458870 }, { "epoch": 0.9269666325949328, "grad_norm": 442.61456298828125, "learning_rate": 2.0461447949888912e-07, "loss": 14.013, "step": 458880 }, { "epoch": 0.9269868332276167, "grad_norm": 251.81700134277344, "learning_rate": 2.0451565492411672e-07, "loss": 20.6521, "step": 458890 }, { "epoch": 0.9270070338603005, "grad_norm": 182.24066162109375, "learning_rate": 2.0441685372195487e-07, "loss": 16.012, "step": 458900 }, { "epoch": 0.9270272344929843, "grad_norm": 296.1080627441406, "learning_rate": 2.043180758928831e-07, "loss": 26.0667, "step": 458910 }, { "epoch": 0.9270474351256681, "grad_norm": 222.44517517089844, "learning_rate": 2.0421932143738276e-07, "loss": 13.0511, "step": 458920 }, { "epoch": 0.9270676357583519, "grad_norm": 248.0188446044922, "learning_rate": 2.041205903559368e-07, "loss": 13.8143, "step": 458930 }, { "epoch": 0.9270878363910358, "grad_norm": 108.15481567382812, "learning_rate": 2.0402188264902533e-07, "loss": 20.7155, "step": 458940 }, { "epoch": 0.9271080370237196, "grad_norm": 377.7518615722656, "learning_rate": 2.039231983171286e-07, "loss": 16.3114, "step": 458950 }, { "epoch": 0.9271282376564034, "grad_norm": 0.0, "learning_rate": 2.0382453736072838e-07, "loss": 17.6866, "step": 458960 }, { "epoch": 0.9271484382890872, "grad_norm": 234.6780548095703, "learning_rate": 2.0372589978030654e-07, "loss": 12.0828, "step": 458970 }, { "epoch": 0.927168638921771, "grad_norm": 617.6521606445312, "learning_rate": 2.0362728557634327e-07, "loss": 31.6445, "step": 458980 }, { "epoch": 0.9271888395544549, "grad_norm": 113.47299194335938, "learning_rate": 2.0352869474931758e-07, "loss": 11.6212, "step": 458990 }, { "epoch": 0.9272090401871387, "grad_norm": 197.9394989013672, "learning_rate": 2.0343012729971244e-07, "loss": 9.7091, "step": 459000 }, { "epoch": 0.9272292408198225, "grad_norm": 355.50469970703125, "learning_rate": 2.0333158322800696e-07, "loss": 15.216, "step": 459010 }, { "epoch": 0.9272494414525063, "grad_norm": 208.5948028564453, "learning_rate": 2.0323306253468123e-07, "loss": 11.2102, "step": 459020 }, { "epoch": 0.9272696420851901, "grad_norm": 379.1370544433594, "learning_rate": 2.0313456522021603e-07, "loss": 15.8408, "step": 459030 }, { "epoch": 0.927289842717874, "grad_norm": 278.2328186035156, "learning_rate": 2.0303609128509038e-07, "loss": 21.7229, "step": 459040 }, { "epoch": 0.9273100433505578, "grad_norm": 376.0787048339844, "learning_rate": 2.0293764072978618e-07, "loss": 23.0545, "step": 459050 }, { "epoch": 0.9273302439832416, "grad_norm": 256.8787841796875, "learning_rate": 2.0283921355478187e-07, "loss": 18.2476, "step": 459060 }, { "epoch": 0.9273504446159254, "grad_norm": 347.57073974609375, "learning_rate": 2.0274080976055655e-07, "loss": 12.9471, "step": 459070 }, { "epoch": 0.9273706452486092, "grad_norm": 344.43109130859375, "learning_rate": 2.0264242934759147e-07, "loss": 21.7921, "step": 459080 }, { "epoch": 0.927390845881293, "grad_norm": 359.5399169921875, "learning_rate": 2.025440723163652e-07, "loss": 22.9477, "step": 459090 }, { "epoch": 0.9274110465139768, "grad_norm": 123.47303009033203, "learning_rate": 2.0244573866735673e-07, "loss": 19.4989, "step": 459100 }, { "epoch": 0.9274312471466606, "grad_norm": 194.1509552001953, "learning_rate": 2.0234742840104627e-07, "loss": 16.2509, "step": 459110 }, { "epoch": 0.9274514477793444, "grad_norm": 173.22320556640625, "learning_rate": 2.0224914151791285e-07, "loss": 10.2063, "step": 459120 }, { "epoch": 0.9274716484120282, "grad_norm": 308.11761474609375, "learning_rate": 2.0215087801843504e-07, "loss": 15.6687, "step": 459130 }, { "epoch": 0.927491849044712, "grad_norm": 392.9170837402344, "learning_rate": 2.0205263790309125e-07, "loss": 12.4466, "step": 459140 }, { "epoch": 0.9275120496773959, "grad_norm": 669.3555297851562, "learning_rate": 2.0195442117236176e-07, "loss": 29.092, "step": 459150 }, { "epoch": 0.9275322503100797, "grad_norm": 282.663330078125, "learning_rate": 2.0185622782672497e-07, "loss": 15.6815, "step": 459160 }, { "epoch": 0.9275524509427635, "grad_norm": 255.64881896972656, "learning_rate": 2.0175805786665782e-07, "loss": 15.1651, "step": 459170 }, { "epoch": 0.9275726515754473, "grad_norm": 313.9009094238281, "learning_rate": 2.0165991129263984e-07, "loss": 11.7104, "step": 459180 }, { "epoch": 0.9275928522081311, "grad_norm": 254.2653350830078, "learning_rate": 2.0156178810515127e-07, "loss": 23.887, "step": 459190 }, { "epoch": 0.927613052840815, "grad_norm": 262.0769958496094, "learning_rate": 2.0146368830466668e-07, "loss": 24.3206, "step": 459200 }, { "epoch": 0.9276332534734988, "grad_norm": 402.42437744140625, "learning_rate": 2.0136561189166682e-07, "loss": 11.1628, "step": 459210 }, { "epoch": 0.9276534541061826, "grad_norm": 177.794677734375, "learning_rate": 2.0126755886662907e-07, "loss": 16.3253, "step": 459220 }, { "epoch": 0.9276736547388664, "grad_norm": 276.5149230957031, "learning_rate": 2.0116952923003142e-07, "loss": 14.3878, "step": 459230 }, { "epoch": 0.9276938553715502, "grad_norm": 516.1245727539062, "learning_rate": 2.0107152298235067e-07, "loss": 19.8517, "step": 459240 }, { "epoch": 0.9277140560042341, "grad_norm": 46.9970817565918, "learning_rate": 2.0097354012406535e-07, "loss": 24.6953, "step": 459250 }, { "epoch": 0.9277342566369179, "grad_norm": 471.01531982421875, "learning_rate": 2.0087558065565394e-07, "loss": 25.1619, "step": 459260 }, { "epoch": 0.9277544572696017, "grad_norm": 144.0623779296875, "learning_rate": 2.007776445775922e-07, "loss": 17.7077, "step": 459270 }, { "epoch": 0.9277746579022855, "grad_norm": 280.4176940917969, "learning_rate": 2.006797318903575e-07, "loss": 17.9041, "step": 459280 }, { "epoch": 0.9277948585349693, "grad_norm": 260.37921142578125, "learning_rate": 2.0058184259442893e-07, "loss": 16.1751, "step": 459290 }, { "epoch": 0.9278150591676532, "grad_norm": 289.43194580078125, "learning_rate": 2.0048397669028164e-07, "loss": 20.1105, "step": 459300 }, { "epoch": 0.927835259800337, "grad_norm": 294.12603759765625, "learning_rate": 2.003861341783936e-07, "loss": 16.9188, "step": 459310 }, { "epoch": 0.9278554604330208, "grad_norm": 164.79388427734375, "learning_rate": 2.0028831505924162e-07, "loss": 20.3254, "step": 459320 }, { "epoch": 0.9278756610657046, "grad_norm": 446.665283203125, "learning_rate": 2.0019051933330204e-07, "loss": 15.017, "step": 459330 }, { "epoch": 0.9278958616983884, "grad_norm": 85.39521789550781, "learning_rate": 2.000927470010511e-07, "loss": 10.0591, "step": 459340 }, { "epoch": 0.9279160623310722, "grad_norm": 9.294930458068848, "learning_rate": 1.9999499806296674e-07, "loss": 12.9359, "step": 459350 }, { "epoch": 0.927936262963756, "grad_norm": 286.6814270019531, "learning_rate": 1.9989727251952418e-07, "loss": 25.4458, "step": 459360 }, { "epoch": 0.9279564635964398, "grad_norm": 530.6024780273438, "learning_rate": 1.9979957037120078e-07, "loss": 20.9247, "step": 459370 }, { "epoch": 0.9279766642291236, "grad_norm": 336.87677001953125, "learning_rate": 1.9970189161847175e-07, "loss": 16.0934, "step": 459380 }, { "epoch": 0.9279968648618074, "grad_norm": 95.1301040649414, "learning_rate": 1.996042362618128e-07, "loss": 17.521, "step": 459390 }, { "epoch": 0.9280170654944913, "grad_norm": 198.83937072753906, "learning_rate": 1.995066043017013e-07, "loss": 39.8352, "step": 459400 }, { "epoch": 0.9280372661271751, "grad_norm": 310.2725524902344, "learning_rate": 1.9940899573861195e-07, "loss": 13.0402, "step": 459410 }, { "epoch": 0.9280574667598589, "grad_norm": 279.5280456542969, "learning_rate": 1.993114105730215e-07, "loss": 20.8009, "step": 459420 }, { "epoch": 0.9280776673925427, "grad_norm": 207.68809509277344, "learning_rate": 1.9921384880540406e-07, "loss": 16.6055, "step": 459430 }, { "epoch": 0.9280978680252265, "grad_norm": 276.0572814941406, "learning_rate": 1.9911631043623704e-07, "loss": 16.8136, "step": 459440 }, { "epoch": 0.9281180686579104, "grad_norm": 249.64663696289062, "learning_rate": 1.99018795465995e-07, "loss": 16.5146, "step": 459450 }, { "epoch": 0.9281382692905942, "grad_norm": 247.5889129638672, "learning_rate": 1.9892130389515207e-07, "loss": 12.4523, "step": 459460 }, { "epoch": 0.928158469923278, "grad_norm": 97.71456909179688, "learning_rate": 1.9882383572418508e-07, "loss": 8.6723, "step": 459470 }, { "epoch": 0.9281786705559618, "grad_norm": 687.3977661132812, "learning_rate": 1.987263909535686e-07, "loss": 24.0275, "step": 459480 }, { "epoch": 0.9281988711886456, "grad_norm": 395.609375, "learning_rate": 1.986289695837762e-07, "loss": 25.2735, "step": 459490 }, { "epoch": 0.9282190718213295, "grad_norm": 325.3480529785156, "learning_rate": 1.9853157161528468e-07, "loss": 15.8198, "step": 459500 }, { "epoch": 0.9282392724540133, "grad_norm": 106.27111053466797, "learning_rate": 1.984341970485687e-07, "loss": 10.0832, "step": 459510 }, { "epoch": 0.9282594730866971, "grad_norm": 172.36770629882812, "learning_rate": 1.9833684588410062e-07, "loss": 16.7191, "step": 459520 }, { "epoch": 0.9282796737193809, "grad_norm": 913.9033203125, "learning_rate": 1.9823951812235675e-07, "loss": 33.0155, "step": 459530 }, { "epoch": 0.9282998743520647, "grad_norm": 512.4244995117188, "learning_rate": 1.981422137638117e-07, "loss": 25.0951, "step": 459540 }, { "epoch": 0.9283200749847486, "grad_norm": 178.0298614501953, "learning_rate": 1.98044932808939e-07, "loss": 18.1507, "step": 459550 }, { "epoch": 0.9283402756174324, "grad_norm": 0.9513123035430908, "learning_rate": 1.9794767525821212e-07, "loss": 13.7926, "step": 459560 }, { "epoch": 0.9283604762501162, "grad_norm": 322.00299072265625, "learning_rate": 1.9785044111210627e-07, "loss": 13.1647, "step": 459570 }, { "epoch": 0.9283806768828, "grad_norm": 391.02349853515625, "learning_rate": 1.977532303710955e-07, "loss": 18.097, "step": 459580 }, { "epoch": 0.9284008775154838, "grad_norm": 600.1901245117188, "learning_rate": 1.9765604303565223e-07, "loss": 22.4555, "step": 459590 }, { "epoch": 0.9284210781481677, "grad_norm": 70.98042297363281, "learning_rate": 1.9755887910625103e-07, "loss": 17.0909, "step": 459600 }, { "epoch": 0.9284412787808514, "grad_norm": 367.9173583984375, "learning_rate": 1.9746173858336604e-07, "loss": 17.8391, "step": 459610 }, { "epoch": 0.9284614794135352, "grad_norm": 32.95124053955078, "learning_rate": 1.9736462146747015e-07, "loss": 15.9649, "step": 459620 }, { "epoch": 0.928481680046219, "grad_norm": 267.5936584472656, "learning_rate": 1.972675277590358e-07, "loss": 15.8187, "step": 459630 }, { "epoch": 0.9285018806789028, "grad_norm": 231.7164764404297, "learning_rate": 1.9717045745853758e-07, "loss": 13.5195, "step": 459640 }, { "epoch": 0.9285220813115866, "grad_norm": 308.99993896484375, "learning_rate": 1.9707341056644737e-07, "loss": 22.4319, "step": 459650 }, { "epoch": 0.9285422819442705, "grad_norm": 252.5726318359375, "learning_rate": 1.9697638708323918e-07, "loss": 15.8626, "step": 459660 }, { "epoch": 0.9285624825769543, "grad_norm": 285.3663330078125, "learning_rate": 1.9687938700938602e-07, "loss": 39.4787, "step": 459670 }, { "epoch": 0.9285826832096381, "grad_norm": 192.2904052734375, "learning_rate": 1.967824103453597e-07, "loss": 29.4373, "step": 459680 }, { "epoch": 0.9286028838423219, "grad_norm": 340.7438659667969, "learning_rate": 1.9668545709163378e-07, "loss": 14.9998, "step": 459690 }, { "epoch": 0.9286230844750057, "grad_norm": 413.1592712402344, "learning_rate": 1.9658852724868005e-07, "loss": 23.5365, "step": 459700 }, { "epoch": 0.9286432851076896, "grad_norm": 412.8314208984375, "learning_rate": 1.9649162081697094e-07, "loss": 26.8419, "step": 459710 }, { "epoch": 0.9286634857403734, "grad_norm": 320.90557861328125, "learning_rate": 1.963947377969788e-07, "loss": 12.5075, "step": 459720 }, { "epoch": 0.9286836863730572, "grad_norm": 271.4992370605469, "learning_rate": 1.9629787818917722e-07, "loss": 17.1874, "step": 459730 }, { "epoch": 0.928703887005741, "grad_norm": 39.38309097290039, "learning_rate": 1.9620104199403688e-07, "loss": 16.7725, "step": 459740 }, { "epoch": 0.9287240876384248, "grad_norm": 208.1249542236328, "learning_rate": 1.961042292120291e-07, "loss": 13.5535, "step": 459750 }, { "epoch": 0.9287442882711087, "grad_norm": 178.13792419433594, "learning_rate": 1.9600743984362792e-07, "loss": 14.1426, "step": 459760 }, { "epoch": 0.9287644889037925, "grad_norm": 226.3883056640625, "learning_rate": 1.959106738893035e-07, "loss": 11.4676, "step": 459770 }, { "epoch": 0.9287846895364763, "grad_norm": 127.52801513671875, "learning_rate": 1.958139313495272e-07, "loss": 16.1323, "step": 459780 }, { "epoch": 0.9288048901691601, "grad_norm": 0.7948424816131592, "learning_rate": 1.957172122247708e-07, "loss": 31.0293, "step": 459790 }, { "epoch": 0.9288250908018439, "grad_norm": 191.9970703125, "learning_rate": 1.9562051651550784e-07, "loss": 13.3462, "step": 459800 }, { "epoch": 0.9288452914345278, "grad_norm": 117.13938903808594, "learning_rate": 1.9552384422220627e-07, "loss": 13.4455, "step": 459810 }, { "epoch": 0.9288654920672116, "grad_norm": 305.8866882324219, "learning_rate": 1.954271953453385e-07, "loss": 19.3293, "step": 459820 }, { "epoch": 0.9288856926998954, "grad_norm": 302.30975341796875, "learning_rate": 1.953305698853769e-07, "loss": 26.1453, "step": 459830 }, { "epoch": 0.9289058933325792, "grad_norm": 486.2826232910156, "learning_rate": 1.9523396784279114e-07, "loss": 24.521, "step": 459840 }, { "epoch": 0.928926093965263, "grad_norm": 443.88958740234375, "learning_rate": 1.9513738921805192e-07, "loss": 16.7113, "step": 459850 }, { "epoch": 0.9289462945979468, "grad_norm": 228.5526580810547, "learning_rate": 1.9504083401162999e-07, "loss": 21.6349, "step": 459860 }, { "epoch": 0.9289664952306306, "grad_norm": 313.4875183105469, "learning_rate": 1.9494430222399774e-07, "loss": 18.5073, "step": 459870 }, { "epoch": 0.9289866958633144, "grad_norm": 124.33599090576172, "learning_rate": 1.948477938556226e-07, "loss": 14.3797, "step": 459880 }, { "epoch": 0.9290068964959982, "grad_norm": 159.2218017578125, "learning_rate": 1.9475130890697691e-07, "loss": 31.771, "step": 459890 }, { "epoch": 0.929027097128682, "grad_norm": 254.6146697998047, "learning_rate": 1.9465484737853092e-07, "loss": 21.2994, "step": 459900 }, { "epoch": 0.9290472977613659, "grad_norm": 213.3077392578125, "learning_rate": 1.945584092707542e-07, "loss": 12.993, "step": 459910 }, { "epoch": 0.9290674983940497, "grad_norm": 505.9075622558594, "learning_rate": 1.944619945841164e-07, "loss": 11.2377, "step": 459920 }, { "epoch": 0.9290876990267335, "grad_norm": 266.4447326660156, "learning_rate": 1.9436560331908882e-07, "loss": 9.0777, "step": 459930 }, { "epoch": 0.9291078996594173, "grad_norm": 251.5564727783203, "learning_rate": 1.9426923547614052e-07, "loss": 10.4074, "step": 459940 }, { "epoch": 0.9291281002921011, "grad_norm": 643.8529663085938, "learning_rate": 1.9417289105574054e-07, "loss": 29.6219, "step": 459950 }, { "epoch": 0.929148300924785, "grad_norm": 226.2017364501953, "learning_rate": 1.9407657005835967e-07, "loss": 20.2664, "step": 459960 }, { "epoch": 0.9291685015574688, "grad_norm": 149.39573669433594, "learning_rate": 1.9398027248446582e-07, "loss": 9.7155, "step": 459970 }, { "epoch": 0.9291887021901526, "grad_norm": 318.2593078613281, "learning_rate": 1.9388399833452974e-07, "loss": 19.1509, "step": 459980 }, { "epoch": 0.9292089028228364, "grad_norm": 283.86065673828125, "learning_rate": 1.9378774760902052e-07, "loss": 12.8305, "step": 459990 }, { "epoch": 0.9292291034555202, "grad_norm": 132.25704956054688, "learning_rate": 1.9369152030840553e-07, "loss": 16.9925, "step": 460000 }, { "epoch": 0.929249304088204, "grad_norm": 290.5444030761719, "learning_rate": 1.9359531643315665e-07, "loss": 16.6173, "step": 460010 }, { "epoch": 0.9292695047208879, "grad_norm": 552.2373657226562, "learning_rate": 1.9349913598374014e-07, "loss": 16.1302, "step": 460020 }, { "epoch": 0.9292897053535717, "grad_norm": 467.9726867675781, "learning_rate": 1.9340297896062676e-07, "loss": 14.7202, "step": 460030 }, { "epoch": 0.9293099059862555, "grad_norm": 533.1699829101562, "learning_rate": 1.9330684536428335e-07, "loss": 17.7331, "step": 460040 }, { "epoch": 0.9293301066189393, "grad_norm": 470.14697265625, "learning_rate": 1.9321073519518007e-07, "loss": 19.9507, "step": 460050 }, { "epoch": 0.9293503072516232, "grad_norm": 25.874284744262695, "learning_rate": 1.9311464845378492e-07, "loss": 12.7159, "step": 460060 }, { "epoch": 0.929370507884307, "grad_norm": 529.6058959960938, "learning_rate": 1.9301858514056527e-07, "loss": 9.6371, "step": 460070 }, { "epoch": 0.9293907085169908, "grad_norm": 342.6182861328125, "learning_rate": 1.9292254525599075e-07, "loss": 18.3621, "step": 460080 }, { "epoch": 0.9294109091496746, "grad_norm": 304.01837158203125, "learning_rate": 1.928265288005282e-07, "loss": 19.4114, "step": 460090 }, { "epoch": 0.9294311097823584, "grad_norm": 290.5, "learning_rate": 1.927305357746462e-07, "loss": 15.1211, "step": 460100 }, { "epoch": 0.9294513104150423, "grad_norm": 215.43443298339844, "learning_rate": 1.9263456617881203e-07, "loss": 10.2721, "step": 460110 }, { "epoch": 0.929471511047726, "grad_norm": 286.6866149902344, "learning_rate": 1.9253862001349543e-07, "loss": 13.1009, "step": 460120 }, { "epoch": 0.9294917116804098, "grad_norm": 289.4507751464844, "learning_rate": 1.9244269727916097e-07, "loss": 10.1294, "step": 460130 }, { "epoch": 0.9295119123130936, "grad_norm": 162.1569061279297, "learning_rate": 1.9234679797627832e-07, "loss": 13.3979, "step": 460140 }, { "epoch": 0.9295321129457774, "grad_norm": 197.26498413085938, "learning_rate": 1.9225092210531425e-07, "loss": 20.3173, "step": 460150 }, { "epoch": 0.9295523135784612, "grad_norm": 428.6112976074219, "learning_rate": 1.9215506966673624e-07, "loss": 23.4347, "step": 460160 }, { "epoch": 0.9295725142111451, "grad_norm": 280.0367736816406, "learning_rate": 1.9205924066101057e-07, "loss": 10.5209, "step": 460170 }, { "epoch": 0.9295927148438289, "grad_norm": 409.23406982421875, "learning_rate": 1.9196343508860515e-07, "loss": 20.4145, "step": 460180 }, { "epoch": 0.9296129154765127, "grad_norm": 360.54339599609375, "learning_rate": 1.9186765294998855e-07, "loss": 17.5262, "step": 460190 }, { "epoch": 0.9296331161091965, "grad_norm": 259.7884826660156, "learning_rate": 1.917718942456237e-07, "loss": 15.4444, "step": 460200 }, { "epoch": 0.9296533167418803, "grad_norm": 273.5634460449219, "learning_rate": 1.9167615897598023e-07, "loss": 13.764, "step": 460210 }, { "epoch": 0.9296735173745642, "grad_norm": 379.6659851074219, "learning_rate": 1.9158044714152447e-07, "loss": 23.9401, "step": 460220 }, { "epoch": 0.929693718007248, "grad_norm": 0.0, "learning_rate": 1.914847587427221e-07, "loss": 13.5406, "step": 460230 }, { "epoch": 0.9297139186399318, "grad_norm": 224.84014892578125, "learning_rate": 1.9138909378003946e-07, "loss": 16.305, "step": 460240 }, { "epoch": 0.9297341192726156, "grad_norm": 384.63299560546875, "learning_rate": 1.9129345225394335e-07, "loss": 18.553, "step": 460250 }, { "epoch": 0.9297543199052994, "grad_norm": 501.1186218261719, "learning_rate": 1.9119783416490013e-07, "loss": 26.2674, "step": 460260 }, { "epoch": 0.9297745205379833, "grad_norm": 20.195629119873047, "learning_rate": 1.9110223951337492e-07, "loss": 17.3952, "step": 460270 }, { "epoch": 0.9297947211706671, "grad_norm": 159.2061309814453, "learning_rate": 1.910066682998346e-07, "loss": 20.4762, "step": 460280 }, { "epoch": 0.9298149218033509, "grad_norm": 201.14389038085938, "learning_rate": 1.909111205247438e-07, "loss": 22.9978, "step": 460290 }, { "epoch": 0.9298351224360347, "grad_norm": 169.78372192382812, "learning_rate": 1.9081559618856938e-07, "loss": 27.0752, "step": 460300 }, { "epoch": 0.9298553230687185, "grad_norm": 322.01409912109375, "learning_rate": 1.907200952917765e-07, "loss": 23.7841, "step": 460310 }, { "epoch": 0.9298755237014024, "grad_norm": 245.37893676757812, "learning_rate": 1.9062461783483034e-07, "loss": 12.6225, "step": 460320 }, { "epoch": 0.9298957243340862, "grad_norm": 620.7093505859375, "learning_rate": 1.9052916381819664e-07, "loss": 20.2819, "step": 460330 }, { "epoch": 0.92991592496677, "grad_norm": 0.0, "learning_rate": 1.904337332423406e-07, "loss": 25.589, "step": 460340 }, { "epoch": 0.9299361255994538, "grad_norm": 251.79603576660156, "learning_rate": 1.903383261077274e-07, "loss": 22.6557, "step": 460350 }, { "epoch": 0.9299563262321376, "grad_norm": 5.498881816864014, "learning_rate": 1.9024294241482112e-07, "loss": 15.9323, "step": 460360 }, { "epoch": 0.9299765268648214, "grad_norm": 31.50300407409668, "learning_rate": 1.9014758216408803e-07, "loss": 19.4993, "step": 460370 }, { "epoch": 0.9299967274975052, "grad_norm": 26.68342399597168, "learning_rate": 1.900522453559922e-07, "loss": 16.4608, "step": 460380 }, { "epoch": 0.930016928130189, "grad_norm": 1415.607666015625, "learning_rate": 1.899569319909983e-07, "loss": 35.6689, "step": 460390 }, { "epoch": 0.9300371287628728, "grad_norm": 118.35054779052734, "learning_rate": 1.8986164206957037e-07, "loss": 15.2918, "step": 460400 }, { "epoch": 0.9300573293955566, "grad_norm": 537.1720581054688, "learning_rate": 1.897663755921747e-07, "loss": 24.4098, "step": 460410 }, { "epoch": 0.9300775300282405, "grad_norm": 219.6941375732422, "learning_rate": 1.8967113255927315e-07, "loss": 17.4501, "step": 460420 }, { "epoch": 0.9300977306609243, "grad_norm": 265.08026123046875, "learning_rate": 1.8957591297133093e-07, "loss": 9.4662, "step": 460430 }, { "epoch": 0.9301179312936081, "grad_norm": 327.1904296875, "learning_rate": 1.894807168288132e-07, "loss": 16.3948, "step": 460440 }, { "epoch": 0.9301381319262919, "grad_norm": 98.9393081665039, "learning_rate": 1.8938554413218292e-07, "loss": 13.5331, "step": 460450 }, { "epoch": 0.9301583325589757, "grad_norm": 140.4616241455078, "learning_rate": 1.8929039488190304e-07, "loss": 8.2329, "step": 460460 }, { "epoch": 0.9301785331916596, "grad_norm": 0.0, "learning_rate": 1.8919526907843876e-07, "loss": 39.6257, "step": 460470 }, { "epoch": 0.9301987338243434, "grad_norm": 42.32804870605469, "learning_rate": 1.8910016672225418e-07, "loss": 14.0256, "step": 460480 }, { "epoch": 0.9302189344570272, "grad_norm": 269.3990478515625, "learning_rate": 1.8900508781381056e-07, "loss": 13.5953, "step": 460490 }, { "epoch": 0.930239135089711, "grad_norm": 116.81532287597656, "learning_rate": 1.8891003235357307e-07, "loss": 18.2773, "step": 460500 }, { "epoch": 0.9302593357223948, "grad_norm": 340.3694152832031, "learning_rate": 1.8881500034200473e-07, "loss": 15.3587, "step": 460510 }, { "epoch": 0.9302795363550787, "grad_norm": 326.3700866699219, "learning_rate": 1.88719991779569e-07, "loss": 41.7643, "step": 460520 }, { "epoch": 0.9302997369877625, "grad_norm": 15.594130516052246, "learning_rate": 1.8862500666672778e-07, "loss": 9.7799, "step": 460530 }, { "epoch": 0.9303199376204463, "grad_norm": 443.0193176269531, "learning_rate": 1.8853004500394512e-07, "loss": 19.7495, "step": 460540 }, { "epoch": 0.9303401382531301, "grad_norm": 11.327974319458008, "learning_rate": 1.8843510679168341e-07, "loss": 12.4272, "step": 460550 }, { "epoch": 0.9303603388858139, "grad_norm": 5.238090515136719, "learning_rate": 1.883401920304051e-07, "loss": 10.2333, "step": 460560 }, { "epoch": 0.9303805395184978, "grad_norm": 352.72735595703125, "learning_rate": 1.8824530072057369e-07, "loss": 24.7441, "step": 460570 }, { "epoch": 0.9304007401511816, "grad_norm": 215.92539978027344, "learning_rate": 1.8815043286265044e-07, "loss": 18.3635, "step": 460580 }, { "epoch": 0.9304209407838654, "grad_norm": 146.20742797851562, "learning_rate": 1.8805558845709894e-07, "loss": 10.1945, "step": 460590 }, { "epoch": 0.9304411414165492, "grad_norm": 772.099609375, "learning_rate": 1.8796076750438096e-07, "loss": 27.2361, "step": 460600 }, { "epoch": 0.930461342049233, "grad_norm": 34.058345794677734, "learning_rate": 1.878659700049579e-07, "loss": 19.6448, "step": 460610 }, { "epoch": 0.9304815426819169, "grad_norm": 573.7548217773438, "learning_rate": 1.8777119595929315e-07, "loss": 14.8042, "step": 460620 }, { "epoch": 0.9305017433146006, "grad_norm": 367.4371032714844, "learning_rate": 1.8767644536784703e-07, "loss": 16.0438, "step": 460630 }, { "epoch": 0.9305219439472844, "grad_norm": 359.34515380859375, "learning_rate": 1.8758171823108295e-07, "loss": 12.4917, "step": 460640 }, { "epoch": 0.9305421445799682, "grad_norm": 221.7831573486328, "learning_rate": 1.874870145494617e-07, "loss": 24.6353, "step": 460650 }, { "epoch": 0.930562345212652, "grad_norm": 277.5025329589844, "learning_rate": 1.8739233432344518e-07, "loss": 17.8756, "step": 460660 }, { "epoch": 0.9305825458453358, "grad_norm": 67.49771118164062, "learning_rate": 1.8729767755349514e-07, "loss": 17.9026, "step": 460670 }, { "epoch": 0.9306027464780197, "grad_norm": 676.9361572265625, "learning_rate": 1.872030442400713e-07, "loss": 30.2501, "step": 460680 }, { "epoch": 0.9306229471107035, "grad_norm": 407.73931884765625, "learning_rate": 1.8710843438363713e-07, "loss": 35.4802, "step": 460690 }, { "epoch": 0.9306431477433873, "grad_norm": 0.0, "learning_rate": 1.8701384798465284e-07, "loss": 13.346, "step": 460700 }, { "epoch": 0.9306633483760711, "grad_norm": 160.38052368164062, "learning_rate": 1.8691928504357858e-07, "loss": 23.4906, "step": 460710 }, { "epoch": 0.9306835490087549, "grad_norm": 201.43923950195312, "learning_rate": 1.868247455608757e-07, "loss": 10.0047, "step": 460720 }, { "epoch": 0.9307037496414388, "grad_norm": 235.8616180419922, "learning_rate": 1.867302295370066e-07, "loss": 14.4985, "step": 460730 }, { "epoch": 0.9307239502741226, "grad_norm": 232.42117309570312, "learning_rate": 1.8663573697242977e-07, "loss": 16.334, "step": 460740 }, { "epoch": 0.9307441509068064, "grad_norm": 206.00790405273438, "learning_rate": 1.8654126786760597e-07, "loss": 33.1476, "step": 460750 }, { "epoch": 0.9307643515394902, "grad_norm": 0.0, "learning_rate": 1.8644682222299703e-07, "loss": 10.1404, "step": 460760 }, { "epoch": 0.930784552172174, "grad_norm": 74.71009063720703, "learning_rate": 1.8635240003906264e-07, "loss": 13.2874, "step": 460770 }, { "epoch": 0.9308047528048579, "grad_norm": 175.77464294433594, "learning_rate": 1.8625800131626236e-07, "loss": 31.2075, "step": 460780 }, { "epoch": 0.9308249534375417, "grad_norm": 94.30030822753906, "learning_rate": 1.8616362605505645e-07, "loss": 7.6412, "step": 460790 }, { "epoch": 0.9308451540702255, "grad_norm": 395.0694580078125, "learning_rate": 1.8606927425590616e-07, "loss": 21.654, "step": 460800 }, { "epoch": 0.9308653547029093, "grad_norm": 105.0181655883789, "learning_rate": 1.8597494591926946e-07, "loss": 18.7361, "step": 460810 }, { "epoch": 0.9308855553355931, "grad_norm": 482.18182373046875, "learning_rate": 1.858806410456071e-07, "loss": 16.3657, "step": 460820 }, { "epoch": 0.930905755968277, "grad_norm": 189.46099853515625, "learning_rate": 1.8578635963537926e-07, "loss": 11.8485, "step": 460830 }, { "epoch": 0.9309259566009608, "grad_norm": 264.17681884765625, "learning_rate": 1.856921016890445e-07, "loss": 18.5469, "step": 460840 }, { "epoch": 0.9309461572336446, "grad_norm": 228.68739318847656, "learning_rate": 1.8559786720706185e-07, "loss": 15.871, "step": 460850 }, { "epoch": 0.9309663578663284, "grad_norm": 527.9981079101562, "learning_rate": 1.8550365618989207e-07, "loss": 17.0557, "step": 460860 }, { "epoch": 0.9309865584990122, "grad_norm": 479.7044372558594, "learning_rate": 1.854094686379937e-07, "loss": 21.1745, "step": 460870 }, { "epoch": 0.9310067591316961, "grad_norm": 446.8087158203125, "learning_rate": 1.8531530455182522e-07, "loss": 13.7563, "step": 460880 }, { "epoch": 0.9310269597643798, "grad_norm": 10.297074317932129, "learning_rate": 1.852211639318463e-07, "loss": 7.3226, "step": 460890 }, { "epoch": 0.9310471603970636, "grad_norm": 135.11688232421875, "learning_rate": 1.8512704677851489e-07, "loss": 7.8177, "step": 460900 }, { "epoch": 0.9310673610297474, "grad_norm": 119.65900421142578, "learning_rate": 1.8503295309229065e-07, "loss": 9.6999, "step": 460910 }, { "epoch": 0.9310875616624312, "grad_norm": 344.15252685546875, "learning_rate": 1.8493888287363148e-07, "loss": 17.9117, "step": 460920 }, { "epoch": 0.931107762295115, "grad_norm": 69.01956939697266, "learning_rate": 1.8484483612299654e-07, "loss": 24.0062, "step": 460930 }, { "epoch": 0.9311279629277989, "grad_norm": 87.6671142578125, "learning_rate": 1.8475081284084428e-07, "loss": 15.6079, "step": 460940 }, { "epoch": 0.9311481635604827, "grad_norm": 133.2266082763672, "learning_rate": 1.846568130276316e-07, "loss": 18.3529, "step": 460950 }, { "epoch": 0.9311683641931665, "grad_norm": 206.53697204589844, "learning_rate": 1.8456283668381814e-07, "loss": 13.5257, "step": 460960 }, { "epoch": 0.9311885648258503, "grad_norm": 147.25564575195312, "learning_rate": 1.8446888380986128e-07, "loss": 24.4171, "step": 460970 }, { "epoch": 0.9312087654585341, "grad_norm": 197.21304321289062, "learning_rate": 1.84374954406219e-07, "loss": 19.9227, "step": 460980 }, { "epoch": 0.931228966091218, "grad_norm": 378.8609313964844, "learning_rate": 1.8428104847334927e-07, "loss": 8.0765, "step": 460990 }, { "epoch": 0.9312491667239018, "grad_norm": 363.6342468261719, "learning_rate": 1.841871660117095e-07, "loss": 15.0018, "step": 461000 }, { "epoch": 0.9312693673565856, "grad_norm": 473.4636535644531, "learning_rate": 1.8409330702175764e-07, "loss": 19.226, "step": 461010 }, { "epoch": 0.9312895679892694, "grad_norm": 82.45626068115234, "learning_rate": 1.8399947150395058e-07, "loss": 25.1138, "step": 461020 }, { "epoch": 0.9313097686219532, "grad_norm": 257.804931640625, "learning_rate": 1.8390565945874572e-07, "loss": 18.8037, "step": 461030 }, { "epoch": 0.9313299692546371, "grad_norm": 229.63601684570312, "learning_rate": 1.8381187088660046e-07, "loss": 19.9147, "step": 461040 }, { "epoch": 0.9313501698873209, "grad_norm": 125.4647445678711, "learning_rate": 1.8371810578797277e-07, "loss": 13.1107, "step": 461050 }, { "epoch": 0.9313703705200047, "grad_norm": 153.53598022460938, "learning_rate": 1.8362436416331896e-07, "loss": 13.5738, "step": 461060 }, { "epoch": 0.9313905711526885, "grad_norm": 239.79486083984375, "learning_rate": 1.8353064601309533e-07, "loss": 12.1175, "step": 461070 }, { "epoch": 0.9314107717853723, "grad_norm": 54.68346405029297, "learning_rate": 1.8343695133775874e-07, "loss": 26.0745, "step": 461080 }, { "epoch": 0.9314309724180562, "grad_norm": 222.83460998535156, "learning_rate": 1.833432801377677e-07, "loss": 17.1467, "step": 461090 }, { "epoch": 0.93145117305074, "grad_norm": 157.41082763671875, "learning_rate": 1.832496324135763e-07, "loss": 12.3532, "step": 461100 }, { "epoch": 0.9314713736834238, "grad_norm": 325.1773376464844, "learning_rate": 1.8315600816564137e-07, "loss": 20.4668, "step": 461110 }, { "epoch": 0.9314915743161076, "grad_norm": 166.11477661132812, "learning_rate": 1.8306240739442094e-07, "loss": 17.5134, "step": 461120 }, { "epoch": 0.9315117749487914, "grad_norm": 294.5464172363281, "learning_rate": 1.8296883010037014e-07, "loss": 17.9774, "step": 461130 }, { "epoch": 0.9315319755814752, "grad_norm": 73.10298919677734, "learning_rate": 1.8287527628394418e-07, "loss": 9.8741, "step": 461140 }, { "epoch": 0.931552176214159, "grad_norm": 131.22254943847656, "learning_rate": 1.827817459456005e-07, "loss": 11.0736, "step": 461150 }, { "epoch": 0.9315723768468428, "grad_norm": 190.13491821289062, "learning_rate": 1.826882390857948e-07, "loss": 18.4156, "step": 461160 }, { "epoch": 0.9315925774795266, "grad_norm": 157.9766082763672, "learning_rate": 1.825947557049812e-07, "loss": 22.2857, "step": 461170 }, { "epoch": 0.9316127781122104, "grad_norm": 460.1831359863281, "learning_rate": 1.825012958036171e-07, "loss": 34.7807, "step": 461180 }, { "epoch": 0.9316329787448943, "grad_norm": 365.3998107910156, "learning_rate": 1.824078593821571e-07, "loss": 14.7511, "step": 461190 }, { "epoch": 0.9316531793775781, "grad_norm": 48.21858596801758, "learning_rate": 1.8231444644105755e-07, "loss": 16.885, "step": 461200 }, { "epoch": 0.9316733800102619, "grad_norm": 498.6594543457031, "learning_rate": 1.8222105698077253e-07, "loss": 26.9817, "step": 461210 }, { "epoch": 0.9316935806429457, "grad_norm": 207.6343994140625, "learning_rate": 1.8212769100175774e-07, "loss": 22.9423, "step": 461220 }, { "epoch": 0.9317137812756295, "grad_norm": 387.0116882324219, "learning_rate": 1.8203434850446844e-07, "loss": 21.9838, "step": 461230 }, { "epoch": 0.9317339819083134, "grad_norm": 267.20263671875, "learning_rate": 1.819410294893592e-07, "loss": 26.3953, "step": 461240 }, { "epoch": 0.9317541825409972, "grad_norm": 5.025315761566162, "learning_rate": 1.8184773395688527e-07, "loss": 11.4558, "step": 461250 }, { "epoch": 0.931774383173681, "grad_norm": 388.8743896484375, "learning_rate": 1.8175446190750068e-07, "loss": 18.0609, "step": 461260 }, { "epoch": 0.9317945838063648, "grad_norm": 442.39404296875, "learning_rate": 1.816612133416612e-07, "loss": 14.8389, "step": 461270 }, { "epoch": 0.9318147844390486, "grad_norm": 177.66726684570312, "learning_rate": 1.8156798825982035e-07, "loss": 9.7187, "step": 461280 }, { "epoch": 0.9318349850717325, "grad_norm": 203.19876098632812, "learning_rate": 1.814747866624317e-07, "loss": 17.6691, "step": 461290 }, { "epoch": 0.9318551857044163, "grad_norm": 280.1035461425781, "learning_rate": 1.8138160854995145e-07, "loss": 23.1074, "step": 461300 }, { "epoch": 0.9318753863371001, "grad_norm": 302.52130126953125, "learning_rate": 1.8128845392283324e-07, "loss": 14.5102, "step": 461310 }, { "epoch": 0.9318955869697839, "grad_norm": 468.0241394042969, "learning_rate": 1.8119532278152996e-07, "loss": 28.7969, "step": 461320 }, { "epoch": 0.9319157876024677, "grad_norm": 332.97210693359375, "learning_rate": 1.8110221512649573e-07, "loss": 14.2433, "step": 461330 }, { "epoch": 0.9319359882351516, "grad_norm": 398.02874755859375, "learning_rate": 1.8100913095818627e-07, "loss": 21.8113, "step": 461340 }, { "epoch": 0.9319561888678354, "grad_norm": 149.2668914794922, "learning_rate": 1.8091607027705293e-07, "loss": 11.9831, "step": 461350 }, { "epoch": 0.9319763895005192, "grad_norm": 362.22430419921875, "learning_rate": 1.8082303308354976e-07, "loss": 19.275, "step": 461360 }, { "epoch": 0.931996590133203, "grad_norm": 129.23387145996094, "learning_rate": 1.8073001937813138e-07, "loss": 16.4116, "step": 461370 }, { "epoch": 0.9320167907658868, "grad_norm": 243.48883056640625, "learning_rate": 1.8063702916125025e-07, "loss": 9.4262, "step": 461380 }, { "epoch": 0.9320369913985707, "grad_norm": 206.71240234375, "learning_rate": 1.805440624333593e-07, "loss": 25.4547, "step": 461390 }, { "epoch": 0.9320571920312544, "grad_norm": 371.13433837890625, "learning_rate": 1.804511191949121e-07, "loss": 18.8169, "step": 461400 }, { "epoch": 0.9320773926639382, "grad_norm": 100.96916198730469, "learning_rate": 1.8035819944636269e-07, "loss": 25.9949, "step": 461410 }, { "epoch": 0.932097593296622, "grad_norm": 169.36326599121094, "learning_rate": 1.8026530318816183e-07, "loss": 15.5662, "step": 461420 }, { "epoch": 0.9321177939293058, "grad_norm": 107.69503021240234, "learning_rate": 1.8017243042076304e-07, "loss": 17.789, "step": 461430 }, { "epoch": 0.9321379945619896, "grad_norm": 230.31922912597656, "learning_rate": 1.8007958114462042e-07, "loss": 12.5262, "step": 461440 }, { "epoch": 0.9321581951946735, "grad_norm": 690.5305786132812, "learning_rate": 1.7998675536018474e-07, "loss": 21.3882, "step": 461450 }, { "epoch": 0.9321783958273573, "grad_norm": 76.3077163696289, "learning_rate": 1.7989395306790835e-07, "loss": 47.8428, "step": 461460 }, { "epoch": 0.9321985964600411, "grad_norm": 311.6148376464844, "learning_rate": 1.798011742682454e-07, "loss": 14.5941, "step": 461470 }, { "epoch": 0.9322187970927249, "grad_norm": 69.86370849609375, "learning_rate": 1.7970841896164658e-07, "loss": 28.1239, "step": 461480 }, { "epoch": 0.9322389977254087, "grad_norm": 124.4713134765625, "learning_rate": 1.7961568714856382e-07, "loss": 11.2964, "step": 461490 }, { "epoch": 0.9322591983580926, "grad_norm": 197.35536193847656, "learning_rate": 1.7952297882945e-07, "loss": 38.8373, "step": 461500 }, { "epoch": 0.9322793989907764, "grad_norm": 0.0, "learning_rate": 1.7943029400475598e-07, "loss": 14.5593, "step": 461510 }, { "epoch": 0.9322995996234602, "grad_norm": 179.98277282714844, "learning_rate": 1.7933763267493465e-07, "loss": 12.0409, "step": 461520 }, { "epoch": 0.932319800256144, "grad_norm": 1.978187084197998, "learning_rate": 1.7924499484043622e-07, "loss": 15.9934, "step": 461530 }, { "epoch": 0.9323400008888278, "grad_norm": 224.9529266357422, "learning_rate": 1.7915238050171367e-07, "loss": 18.9983, "step": 461540 }, { "epoch": 0.9323602015215117, "grad_norm": 129.3295135498047, "learning_rate": 1.7905978965921778e-07, "loss": 17.885, "step": 461550 }, { "epoch": 0.9323804021541955, "grad_norm": 401.3939514160156, "learning_rate": 1.7896722231339925e-07, "loss": 23.0999, "step": 461560 }, { "epoch": 0.9324006027868793, "grad_norm": 139.5451202392578, "learning_rate": 1.788746784647105e-07, "loss": 13.566, "step": 461570 }, { "epoch": 0.9324208034195631, "grad_norm": 381.82012939453125, "learning_rate": 1.7878215811360068e-07, "loss": 18.4742, "step": 461580 }, { "epoch": 0.932441004052247, "grad_norm": 375.12969970703125, "learning_rate": 1.7868966126052323e-07, "loss": 10.8111, "step": 461590 }, { "epoch": 0.9324612046849308, "grad_norm": 171.27349853515625, "learning_rate": 1.785971879059273e-07, "loss": 19.8354, "step": 461600 }, { "epoch": 0.9324814053176146, "grad_norm": 240.85580444335938, "learning_rate": 1.7850473805026304e-07, "loss": 22.4678, "step": 461610 }, { "epoch": 0.9325016059502984, "grad_norm": 94.5416030883789, "learning_rate": 1.7841231169398287e-07, "loss": 11.4463, "step": 461620 }, { "epoch": 0.9325218065829822, "grad_norm": 307.89361572265625, "learning_rate": 1.7831990883753592e-07, "loss": 14.7547, "step": 461630 }, { "epoch": 0.932542007215666, "grad_norm": 310.5614013671875, "learning_rate": 1.7822752948137289e-07, "loss": 13.9369, "step": 461640 }, { "epoch": 0.9325622078483498, "grad_norm": 909.2883911132812, "learning_rate": 1.7813517362594347e-07, "loss": 25.6955, "step": 461650 }, { "epoch": 0.9325824084810336, "grad_norm": 229.96792602539062, "learning_rate": 1.7804284127169946e-07, "loss": 7.7572, "step": 461660 }, { "epoch": 0.9326026091137174, "grad_norm": 233.24856567382812, "learning_rate": 1.7795053241908943e-07, "loss": 10.6594, "step": 461670 }, { "epoch": 0.9326228097464012, "grad_norm": 357.04248046875, "learning_rate": 1.7785824706856303e-07, "loss": 12.6461, "step": 461680 }, { "epoch": 0.932643010379085, "grad_norm": 458.421875, "learning_rate": 1.7776598522057154e-07, "loss": 22.4475, "step": 461690 }, { "epoch": 0.9326632110117689, "grad_norm": 232.36062622070312, "learning_rate": 1.7767374687556405e-07, "loss": 30.5857, "step": 461700 }, { "epoch": 0.9326834116444527, "grad_norm": 179.06101989746094, "learning_rate": 1.7758153203398853e-07, "loss": 25.9762, "step": 461710 }, { "epoch": 0.9327036122771365, "grad_norm": 167.18104553222656, "learning_rate": 1.774893406962963e-07, "loss": 22.3963, "step": 461720 }, { "epoch": 0.9327238129098203, "grad_norm": 506.8542785644531, "learning_rate": 1.7739717286293644e-07, "loss": 13.8257, "step": 461730 }, { "epoch": 0.9327440135425041, "grad_norm": 85.00739288330078, "learning_rate": 1.7730502853435805e-07, "loss": 13.1455, "step": 461740 }, { "epoch": 0.932764214175188, "grad_norm": 122.48938751220703, "learning_rate": 1.7721290771100964e-07, "loss": 12.0569, "step": 461750 }, { "epoch": 0.9327844148078718, "grad_norm": 143.9090576171875, "learning_rate": 1.7712081039334083e-07, "loss": 9.0365, "step": 461760 }, { "epoch": 0.9328046154405556, "grad_norm": 34.36774444580078, "learning_rate": 1.770287365818002e-07, "loss": 8.9793, "step": 461770 }, { "epoch": 0.9328248160732394, "grad_norm": 174.63685607910156, "learning_rate": 1.7693668627683625e-07, "loss": 9.9798, "step": 461780 }, { "epoch": 0.9328450167059232, "grad_norm": 250.67218017578125, "learning_rate": 1.7684465947889806e-07, "loss": 13.5474, "step": 461790 }, { "epoch": 0.9328652173386071, "grad_norm": 200.02870178222656, "learning_rate": 1.7675265618843361e-07, "loss": 10.7043, "step": 461800 }, { "epoch": 0.9328854179712909, "grad_norm": 215.54002380371094, "learning_rate": 1.7666067640589256e-07, "loss": 29.935, "step": 461810 }, { "epoch": 0.9329056186039747, "grad_norm": 446.4798278808594, "learning_rate": 1.7656872013172176e-07, "loss": 19.1518, "step": 461820 }, { "epoch": 0.9329258192366585, "grad_norm": 220.8884735107422, "learning_rate": 1.764767873663703e-07, "loss": 37.2522, "step": 461830 }, { "epoch": 0.9329460198693423, "grad_norm": 381.53521728515625, "learning_rate": 1.7638487811028616e-07, "loss": 16.741, "step": 461840 }, { "epoch": 0.9329662205020262, "grad_norm": 184.9522705078125, "learning_rate": 1.7629299236391616e-07, "loss": 13.5561, "step": 461850 }, { "epoch": 0.93298642113471, "grad_norm": 1035.3656005859375, "learning_rate": 1.7620113012771002e-07, "loss": 19.5629, "step": 461860 }, { "epoch": 0.9330066217673938, "grad_norm": 511.546630859375, "learning_rate": 1.7610929140211397e-07, "loss": 24.5877, "step": 461870 }, { "epoch": 0.9330268224000776, "grad_norm": 335.9839782714844, "learning_rate": 1.760174761875766e-07, "loss": 10.984, "step": 461880 }, { "epoch": 0.9330470230327614, "grad_norm": 194.3610382080078, "learning_rate": 1.7592568448454528e-07, "loss": 21.0758, "step": 461890 }, { "epoch": 0.9330672236654453, "grad_norm": 119.42951202392578, "learning_rate": 1.758339162934658e-07, "loss": 20.2449, "step": 461900 }, { "epoch": 0.933087424298129, "grad_norm": 446.78759765625, "learning_rate": 1.757421716147878e-07, "loss": 20.569, "step": 461910 }, { "epoch": 0.9331076249308128, "grad_norm": 545.8987426757812, "learning_rate": 1.7565045044895756e-07, "loss": 20.1485, "step": 461920 }, { "epoch": 0.9331278255634966, "grad_norm": 142.15664672851562, "learning_rate": 1.7555875279642087e-07, "loss": 16.4516, "step": 461930 }, { "epoch": 0.9331480261961804, "grad_norm": 79.61673736572266, "learning_rate": 1.754670786576257e-07, "loss": 11.931, "step": 461940 }, { "epoch": 0.9331682268288642, "grad_norm": 111.73258209228516, "learning_rate": 1.7537542803302e-07, "loss": 7.779, "step": 461950 }, { "epoch": 0.9331884274615481, "grad_norm": 283.0420227050781, "learning_rate": 1.7528380092304842e-07, "loss": 17.6648, "step": 461960 }, { "epoch": 0.9332086280942319, "grad_norm": 609.4569702148438, "learning_rate": 1.751921973281584e-07, "loss": 30.5705, "step": 461970 }, { "epoch": 0.9332288287269157, "grad_norm": 329.05462646484375, "learning_rate": 1.7510061724879678e-07, "loss": 16.6133, "step": 461980 }, { "epoch": 0.9332490293595995, "grad_norm": 8.676403045654297, "learning_rate": 1.750090606854099e-07, "loss": 20.2779, "step": 461990 }, { "epoch": 0.9332692299922833, "grad_norm": 217.2760009765625, "learning_rate": 1.7491752763844294e-07, "loss": 14.0126, "step": 462000 }, { "epoch": 0.9332894306249672, "grad_norm": 218.53541564941406, "learning_rate": 1.7482601810834276e-07, "loss": 13.3233, "step": 462010 }, { "epoch": 0.933309631257651, "grad_norm": 100.9002685546875, "learning_rate": 1.7473453209555625e-07, "loss": 15.2209, "step": 462020 }, { "epoch": 0.9333298318903348, "grad_norm": 363.990478515625, "learning_rate": 1.7464306960052746e-07, "loss": 16.0504, "step": 462030 }, { "epoch": 0.9333500325230186, "grad_norm": 572.1642456054688, "learning_rate": 1.7455163062370273e-07, "loss": 10.6288, "step": 462040 }, { "epoch": 0.9333702331557024, "grad_norm": 286.1855773925781, "learning_rate": 1.744602151655289e-07, "loss": 17.9434, "step": 462050 }, { "epoch": 0.9333904337883863, "grad_norm": 139.72940063476562, "learning_rate": 1.743688232264512e-07, "loss": 6.4435, "step": 462060 }, { "epoch": 0.9334106344210701, "grad_norm": 238.74295043945312, "learning_rate": 1.742774548069137e-07, "loss": 11.5949, "step": 462070 }, { "epoch": 0.9334308350537539, "grad_norm": 282.2811279296875, "learning_rate": 1.7418610990736273e-07, "loss": 15.9587, "step": 462080 }, { "epoch": 0.9334510356864377, "grad_norm": 108.57559967041016, "learning_rate": 1.7409478852824402e-07, "loss": 28.7614, "step": 462090 }, { "epoch": 0.9334712363191215, "grad_norm": 255.90158081054688, "learning_rate": 1.740034906700011e-07, "loss": 9.9074, "step": 462100 }, { "epoch": 0.9334914369518054, "grad_norm": 274.3307800292969, "learning_rate": 1.7391221633308032e-07, "loss": 16.6796, "step": 462110 }, { "epoch": 0.9335116375844892, "grad_norm": 268.1937255859375, "learning_rate": 1.7382096551792572e-07, "loss": 14.801, "step": 462120 }, { "epoch": 0.933531838217173, "grad_norm": 352.5740051269531, "learning_rate": 1.7372973822498252e-07, "loss": 16.7663, "step": 462130 }, { "epoch": 0.9335520388498568, "grad_norm": 19.405569076538086, "learning_rate": 1.7363853445469482e-07, "loss": 15.1377, "step": 462140 }, { "epoch": 0.9335722394825406, "grad_norm": 323.9486999511719, "learning_rate": 1.7354735420750835e-07, "loss": 14.7211, "step": 462150 }, { "epoch": 0.9335924401152244, "grad_norm": 163.8055419921875, "learning_rate": 1.7345619748386666e-07, "loss": 14.3039, "step": 462160 }, { "epoch": 0.9336126407479082, "grad_norm": 201.86553955078125, "learning_rate": 1.733650642842133e-07, "loss": 15.7717, "step": 462170 }, { "epoch": 0.933632841380592, "grad_norm": 454.3749084472656, "learning_rate": 1.73273954608994e-07, "loss": 27.3315, "step": 462180 }, { "epoch": 0.9336530420132758, "grad_norm": 30.84465789794922, "learning_rate": 1.7318286845865174e-07, "loss": 25.938, "step": 462190 }, { "epoch": 0.9336732426459596, "grad_norm": 88.91412353515625, "learning_rate": 1.7309180583363062e-07, "loss": 21.5223, "step": 462200 }, { "epoch": 0.9336934432786435, "grad_norm": 265.9754638671875, "learning_rate": 1.7300076673437526e-07, "loss": 9.4895, "step": 462210 }, { "epoch": 0.9337136439113273, "grad_norm": 270.78021240234375, "learning_rate": 1.7290975116132756e-07, "loss": 20.5894, "step": 462220 }, { "epoch": 0.9337338445440111, "grad_norm": 141.9483184814453, "learning_rate": 1.728187591149333e-07, "loss": 11.7209, "step": 462230 }, { "epoch": 0.9337540451766949, "grad_norm": 102.59892272949219, "learning_rate": 1.7272779059563483e-07, "loss": 22.9712, "step": 462240 }, { "epoch": 0.9337742458093787, "grad_norm": 242.95530700683594, "learning_rate": 1.7263684560387518e-07, "loss": 20.0876, "step": 462250 }, { "epoch": 0.9337944464420626, "grad_norm": 206.11166381835938, "learning_rate": 1.7254592414009785e-07, "loss": 16.8085, "step": 462260 }, { "epoch": 0.9338146470747464, "grad_norm": 704.259521484375, "learning_rate": 1.7245502620474643e-07, "loss": 20.4897, "step": 462270 }, { "epoch": 0.9338348477074302, "grad_norm": 281.055419921875, "learning_rate": 1.7236415179826438e-07, "loss": 21.912, "step": 462280 }, { "epoch": 0.933855048340114, "grad_norm": 85.06211853027344, "learning_rate": 1.7227330092109306e-07, "loss": 14.7004, "step": 462290 }, { "epoch": 0.9338752489727978, "grad_norm": 128.4312286376953, "learning_rate": 1.7218247357367656e-07, "loss": 14.4685, "step": 462300 }, { "epoch": 0.9338954496054817, "grad_norm": 8.975204467773438, "learning_rate": 1.720916697564573e-07, "loss": 8.9357, "step": 462310 }, { "epoch": 0.9339156502381655, "grad_norm": 378.54376220703125, "learning_rate": 1.7200088946987713e-07, "loss": 26.6003, "step": 462320 }, { "epoch": 0.9339358508708493, "grad_norm": 809.9996948242188, "learning_rate": 1.7191013271437908e-07, "loss": 22.5336, "step": 462330 }, { "epoch": 0.9339560515035331, "grad_norm": 446.6749267578125, "learning_rate": 1.7181939949040606e-07, "loss": 15.6763, "step": 462340 }, { "epoch": 0.9339762521362169, "grad_norm": 674.1312255859375, "learning_rate": 1.717286897983994e-07, "loss": 28.7681, "step": 462350 }, { "epoch": 0.9339964527689008, "grad_norm": 89.6597900390625, "learning_rate": 1.7163800363880102e-07, "loss": 8.1562, "step": 462360 }, { "epoch": 0.9340166534015846, "grad_norm": 273.99957275390625, "learning_rate": 1.715473410120544e-07, "loss": 27.0604, "step": 462370 }, { "epoch": 0.9340368540342684, "grad_norm": 324.4134521484375, "learning_rate": 1.7145670191859977e-07, "loss": 13.3288, "step": 462380 }, { "epoch": 0.9340570546669522, "grad_norm": 130.2768096923828, "learning_rate": 1.7136608635887952e-07, "loss": 17.8747, "step": 462390 }, { "epoch": 0.934077255299636, "grad_norm": 0.0, "learning_rate": 1.7127549433333557e-07, "loss": 13.5814, "step": 462400 }, { "epoch": 0.9340974559323199, "grad_norm": 108.8055191040039, "learning_rate": 1.7118492584240865e-07, "loss": 15.4526, "step": 462410 }, { "epoch": 0.9341176565650036, "grad_norm": 303.342529296875, "learning_rate": 1.7109438088654173e-07, "loss": 15.3623, "step": 462420 }, { "epoch": 0.9341378571976874, "grad_norm": 418.23321533203125, "learning_rate": 1.7100385946617393e-07, "loss": 22.4594, "step": 462430 }, { "epoch": 0.9341580578303712, "grad_norm": 633.9379272460938, "learning_rate": 1.7091336158174877e-07, "loss": 22.1655, "step": 462440 }, { "epoch": 0.934178258463055, "grad_norm": 247.4800567626953, "learning_rate": 1.7082288723370587e-07, "loss": 5.665, "step": 462450 }, { "epoch": 0.9341984590957388, "grad_norm": 177.9915771484375, "learning_rate": 1.7073243642248605e-07, "loss": 14.5838, "step": 462460 }, { "epoch": 0.9342186597284227, "grad_norm": 425.112548828125, "learning_rate": 1.7064200914853112e-07, "loss": 23.317, "step": 462470 }, { "epoch": 0.9342388603611065, "grad_norm": 157.11764526367188, "learning_rate": 1.7055160541228077e-07, "loss": 6.9621, "step": 462480 }, { "epoch": 0.9342590609937903, "grad_norm": 530.9124145507812, "learning_rate": 1.7046122521417686e-07, "loss": 11.7681, "step": 462490 }, { "epoch": 0.9342792616264741, "grad_norm": 187.06556701660156, "learning_rate": 1.7037086855465902e-07, "loss": 22.7605, "step": 462500 }, { "epoch": 0.9342994622591579, "grad_norm": 191.40975952148438, "learning_rate": 1.702805354341669e-07, "loss": 24.8928, "step": 462510 }, { "epoch": 0.9343196628918418, "grad_norm": 0.0, "learning_rate": 1.7019022585314293e-07, "loss": 9.0658, "step": 462520 }, { "epoch": 0.9343398635245256, "grad_norm": 588.7440795898438, "learning_rate": 1.7009993981202567e-07, "loss": 25.3733, "step": 462530 }, { "epoch": 0.9343600641572094, "grad_norm": 128.56907653808594, "learning_rate": 1.7000967731125472e-07, "loss": 21.2674, "step": 462540 }, { "epoch": 0.9343802647898932, "grad_norm": 204.19056701660156, "learning_rate": 1.699194383512709e-07, "loss": 7.1781, "step": 462550 }, { "epoch": 0.934400465422577, "grad_norm": 238.27174377441406, "learning_rate": 1.6982922293251548e-07, "loss": 44.0774, "step": 462560 }, { "epoch": 0.9344206660552609, "grad_norm": 180.11070251464844, "learning_rate": 1.6973903105542533e-07, "loss": 32.1863, "step": 462570 }, { "epoch": 0.9344408666879447, "grad_norm": 584.6224975585938, "learning_rate": 1.6964886272044069e-07, "loss": 23.1239, "step": 462580 }, { "epoch": 0.9344610673206285, "grad_norm": 245.20762634277344, "learning_rate": 1.6955871792800283e-07, "loss": 13.2816, "step": 462590 }, { "epoch": 0.9344812679533123, "grad_norm": 366.8085021972656, "learning_rate": 1.6946859667854977e-07, "loss": 31.749, "step": 462600 }, { "epoch": 0.9345014685859961, "grad_norm": 99.18296813964844, "learning_rate": 1.6937849897252056e-07, "loss": 8.3537, "step": 462610 }, { "epoch": 0.93452166921868, "grad_norm": 131.9552764892578, "learning_rate": 1.6928842481035436e-07, "loss": 9.0818, "step": 462620 }, { "epoch": 0.9345418698513638, "grad_norm": 371.1967468261719, "learning_rate": 1.691983741924913e-07, "loss": 16.3709, "step": 462630 }, { "epoch": 0.9345620704840476, "grad_norm": 271.56060791015625, "learning_rate": 1.6910834711936886e-07, "loss": 15.2731, "step": 462640 }, { "epoch": 0.9345822711167314, "grad_norm": 382.3261413574219, "learning_rate": 1.690183435914261e-07, "loss": 11.5249, "step": 462650 }, { "epoch": 0.9346024717494152, "grad_norm": 413.75128173828125, "learning_rate": 1.689283636091027e-07, "loss": 13.1042, "step": 462660 }, { "epoch": 0.9346226723820991, "grad_norm": 237.03350830078125, "learning_rate": 1.688384071728366e-07, "loss": 21.1822, "step": 462670 }, { "epoch": 0.9346428730147828, "grad_norm": 107.84660339355469, "learning_rate": 1.6874847428306583e-07, "loss": 20.7907, "step": 462680 }, { "epoch": 0.9346630736474666, "grad_norm": 206.15914916992188, "learning_rate": 1.6865856494022892e-07, "loss": 13.9638, "step": 462690 }, { "epoch": 0.9346832742801504, "grad_norm": 269.52392578125, "learning_rate": 1.6856867914476492e-07, "loss": 7.6573, "step": 462700 }, { "epoch": 0.9347034749128342, "grad_norm": 4.615231513977051, "learning_rate": 1.684788168971102e-07, "loss": 12.817, "step": 462710 }, { "epoch": 0.934723675545518, "grad_norm": 468.2229309082031, "learning_rate": 1.6838897819770438e-07, "loss": 38.626, "step": 462720 }, { "epoch": 0.9347438761782019, "grad_norm": 2374.771240234375, "learning_rate": 1.682991630469838e-07, "loss": 27.894, "step": 462730 }, { "epoch": 0.9347640768108857, "grad_norm": 432.9613037109375, "learning_rate": 1.6820937144538807e-07, "loss": 16.0758, "step": 462740 }, { "epoch": 0.9347842774435695, "grad_norm": 485.05889892578125, "learning_rate": 1.6811960339335298e-07, "loss": 18.6951, "step": 462750 }, { "epoch": 0.9348044780762533, "grad_norm": 0.0, "learning_rate": 1.6802985889131762e-07, "loss": 18.6691, "step": 462760 }, { "epoch": 0.9348246787089372, "grad_norm": 175.54074096679688, "learning_rate": 1.6794013793971887e-07, "loss": 26.6739, "step": 462770 }, { "epoch": 0.934844879341621, "grad_norm": 392.8362731933594, "learning_rate": 1.6785044053899302e-07, "loss": 14.6655, "step": 462780 }, { "epoch": 0.9348650799743048, "grad_norm": 141.31777954101562, "learning_rate": 1.6776076668957864e-07, "loss": 16.925, "step": 462790 }, { "epoch": 0.9348852806069886, "grad_norm": 176.33457946777344, "learning_rate": 1.6767111639191202e-07, "loss": 25.0126, "step": 462800 }, { "epoch": 0.9349054812396724, "grad_norm": 0.0, "learning_rate": 1.675814896464306e-07, "loss": 17.1646, "step": 462810 }, { "epoch": 0.9349256818723563, "grad_norm": 182.71763610839844, "learning_rate": 1.6749188645357072e-07, "loss": 12.5836, "step": 462820 }, { "epoch": 0.9349458825050401, "grad_norm": 312.7193603515625, "learning_rate": 1.6740230681376867e-07, "loss": 36.3983, "step": 462830 }, { "epoch": 0.9349660831377239, "grad_norm": 57.26655197143555, "learning_rate": 1.6731275072746244e-07, "loss": 19.5918, "step": 462840 }, { "epoch": 0.9349862837704077, "grad_norm": 362.644775390625, "learning_rate": 1.672232181950878e-07, "loss": 12.4153, "step": 462850 }, { "epoch": 0.9350064844030915, "grad_norm": 90.16925811767578, "learning_rate": 1.6713370921708049e-07, "loss": 10.4072, "step": 462860 }, { "epoch": 0.9350266850357754, "grad_norm": 241.1505584716797, "learning_rate": 1.6704422379387685e-07, "loss": 15.8052, "step": 462870 }, { "epoch": 0.9350468856684592, "grad_norm": 4.763140678405762, "learning_rate": 1.669547619259143e-07, "loss": 11.5775, "step": 462880 }, { "epoch": 0.935067086301143, "grad_norm": 322.7345275878906, "learning_rate": 1.6686532361362805e-07, "loss": 16.5688, "step": 462890 }, { "epoch": 0.9350872869338268, "grad_norm": 107.84266662597656, "learning_rate": 1.6677590885745388e-07, "loss": 30.3047, "step": 462900 }, { "epoch": 0.9351074875665106, "grad_norm": 134.6808624267578, "learning_rate": 1.6668651765782806e-07, "loss": 27.6451, "step": 462910 }, { "epoch": 0.9351276881991945, "grad_norm": 402.5738525390625, "learning_rate": 1.6659715001518583e-07, "loss": 19.3808, "step": 462920 }, { "epoch": 0.9351478888318782, "grad_norm": 406.24359130859375, "learning_rate": 1.665078059299624e-07, "loss": 11.4116, "step": 462930 }, { "epoch": 0.935168089464562, "grad_norm": 272.2979431152344, "learning_rate": 1.6641848540259353e-07, "loss": 29.7682, "step": 462940 }, { "epoch": 0.9351882900972458, "grad_norm": 55.207969665527344, "learning_rate": 1.6632918843351554e-07, "loss": 8.7309, "step": 462950 }, { "epoch": 0.9352084907299296, "grad_norm": 83.4108657836914, "learning_rate": 1.662399150231625e-07, "loss": 17.1847, "step": 462960 }, { "epoch": 0.9352286913626134, "grad_norm": 271.63861083984375, "learning_rate": 1.6615066517196965e-07, "loss": 22.5939, "step": 462970 }, { "epoch": 0.9352488919952973, "grad_norm": 364.66839599609375, "learning_rate": 1.6606143888037219e-07, "loss": 11.5765, "step": 462980 }, { "epoch": 0.9352690926279811, "grad_norm": 261.2265625, "learning_rate": 1.659722361488053e-07, "loss": 22.7304, "step": 462990 }, { "epoch": 0.9352892932606649, "grad_norm": 395.2714538574219, "learning_rate": 1.6588305697770313e-07, "loss": 23.3989, "step": 463000 }, { "epoch": 0.9353094938933487, "grad_norm": 162.39208984375, "learning_rate": 1.6579390136750086e-07, "loss": 29.618, "step": 463010 }, { "epoch": 0.9353296945260325, "grad_norm": 233.4529266357422, "learning_rate": 1.6570476931863256e-07, "loss": 14.8123, "step": 463020 }, { "epoch": 0.9353498951587164, "grad_norm": 370.2455749511719, "learning_rate": 1.656156608315329e-07, "loss": 22.7996, "step": 463030 }, { "epoch": 0.9353700957914002, "grad_norm": 196.27635192871094, "learning_rate": 1.65526575906636e-07, "loss": 14.9946, "step": 463040 }, { "epoch": 0.935390296424084, "grad_norm": 129.41957092285156, "learning_rate": 1.6543751454437708e-07, "loss": 14.2775, "step": 463050 }, { "epoch": 0.9354104970567678, "grad_norm": 165.82347106933594, "learning_rate": 1.6534847674518905e-07, "loss": 13.1837, "step": 463060 }, { "epoch": 0.9354306976894516, "grad_norm": 227.88143920898438, "learning_rate": 1.6525946250950553e-07, "loss": 12.6661, "step": 463070 }, { "epoch": 0.9354508983221355, "grad_norm": 144.63101196289062, "learning_rate": 1.651704718377617e-07, "loss": 12.573, "step": 463080 }, { "epoch": 0.9354710989548193, "grad_norm": 512.3611450195312, "learning_rate": 1.650815047303894e-07, "loss": 21.9159, "step": 463090 }, { "epoch": 0.9354912995875031, "grad_norm": 8.180095672607422, "learning_rate": 1.6499256118782503e-07, "loss": 24.0838, "step": 463100 }, { "epoch": 0.9355115002201869, "grad_norm": 1209.033447265625, "learning_rate": 1.6490364121049984e-07, "loss": 24.2327, "step": 463110 }, { "epoch": 0.9355317008528707, "grad_norm": 215.40579223632812, "learning_rate": 1.648147447988474e-07, "loss": 19.8343, "step": 463120 }, { "epoch": 0.9355519014855546, "grad_norm": 451.74371337890625, "learning_rate": 1.6472587195330236e-07, "loss": 28.199, "step": 463130 }, { "epoch": 0.9355721021182384, "grad_norm": 294.3414001464844, "learning_rate": 1.6463702267429659e-07, "loss": 16.5919, "step": 463140 }, { "epoch": 0.9355923027509222, "grad_norm": 301.0167541503906, "learning_rate": 1.645481969622631e-07, "loss": 19.5308, "step": 463150 }, { "epoch": 0.935612503383606, "grad_norm": 256.2607421875, "learning_rate": 1.644593948176354e-07, "loss": 23.8855, "step": 463160 }, { "epoch": 0.9356327040162898, "grad_norm": 201.0640869140625, "learning_rate": 1.6437061624084704e-07, "loss": 13.9896, "step": 463170 }, { "epoch": 0.9356529046489737, "grad_norm": 256.10980224609375, "learning_rate": 1.6428186123232826e-07, "loss": 13.7225, "step": 463180 }, { "epoch": 0.9356731052816574, "grad_norm": 207.07473754882812, "learning_rate": 1.6419312979251368e-07, "loss": 26.9117, "step": 463190 }, { "epoch": 0.9356933059143412, "grad_norm": 401.6343078613281, "learning_rate": 1.6410442192183574e-07, "loss": 13.2821, "step": 463200 }, { "epoch": 0.935713506547025, "grad_norm": 161.51248168945312, "learning_rate": 1.6401573762072631e-07, "loss": 9.5149, "step": 463210 }, { "epoch": 0.9357337071797088, "grad_norm": 327.02825927734375, "learning_rate": 1.6392707688961728e-07, "loss": 14.089, "step": 463220 }, { "epoch": 0.9357539078123926, "grad_norm": 150.39974975585938, "learning_rate": 1.638384397289411e-07, "loss": 24.2577, "step": 463230 }, { "epoch": 0.9357741084450765, "grad_norm": 220.7972412109375, "learning_rate": 1.6374982613913072e-07, "loss": 18.1346, "step": 463240 }, { "epoch": 0.9357943090777603, "grad_norm": 336.61572265625, "learning_rate": 1.6366123612061636e-07, "loss": 18.3377, "step": 463250 }, { "epoch": 0.9358145097104441, "grad_norm": 174.1511993408203, "learning_rate": 1.635726696738299e-07, "loss": 21.0204, "step": 463260 }, { "epoch": 0.9358347103431279, "grad_norm": 340.135498046875, "learning_rate": 1.6348412679920488e-07, "loss": 9.5584, "step": 463270 }, { "epoch": 0.9358549109758117, "grad_norm": 119.71471405029297, "learning_rate": 1.6339560749717154e-07, "loss": 9.1923, "step": 463280 }, { "epoch": 0.9358751116084956, "grad_norm": 237.67774963378906, "learning_rate": 1.633071117681606e-07, "loss": 14.2949, "step": 463290 }, { "epoch": 0.9358953122411794, "grad_norm": 500.4530944824219, "learning_rate": 1.6321863961260452e-07, "loss": 17.3124, "step": 463300 }, { "epoch": 0.9359155128738632, "grad_norm": 416.07958984375, "learning_rate": 1.6313019103093463e-07, "loss": 16.5199, "step": 463310 }, { "epoch": 0.935935713506547, "grad_norm": 35.018951416015625, "learning_rate": 1.6304176602358056e-07, "loss": 14.4992, "step": 463320 }, { "epoch": 0.9359559141392308, "grad_norm": 271.4516296386719, "learning_rate": 1.6295336459097532e-07, "loss": 13.3977, "step": 463330 }, { "epoch": 0.9359761147719147, "grad_norm": 294.55303955078125, "learning_rate": 1.62864986733548e-07, "loss": 14.6723, "step": 463340 }, { "epoch": 0.9359963154045985, "grad_norm": 200.52882385253906, "learning_rate": 1.6277663245173047e-07, "loss": 10.7413, "step": 463350 }, { "epoch": 0.9360165160372823, "grad_norm": 212.34140014648438, "learning_rate": 1.6268830174595242e-07, "loss": 11.5137, "step": 463360 }, { "epoch": 0.9360367166699661, "grad_norm": 298.3582763671875, "learning_rate": 1.6259999461664567e-07, "loss": 24.2277, "step": 463370 }, { "epoch": 0.93605691730265, "grad_norm": 15.46964168548584, "learning_rate": 1.6251171106423935e-07, "loss": 13.355, "step": 463380 }, { "epoch": 0.9360771179353338, "grad_norm": 249.83140563964844, "learning_rate": 1.6242345108916424e-07, "loss": 13.0832, "step": 463390 }, { "epoch": 0.9360973185680176, "grad_norm": 260.1106872558594, "learning_rate": 1.6233521469185054e-07, "loss": 21.597, "step": 463400 }, { "epoch": 0.9361175192007014, "grad_norm": 219.79693603515625, "learning_rate": 1.6224700187272792e-07, "loss": 17.4026, "step": 463410 }, { "epoch": 0.9361377198333852, "grad_norm": 194.88259887695312, "learning_rate": 1.621588126322271e-07, "loss": 11.5941, "step": 463420 }, { "epoch": 0.936157920466069, "grad_norm": 818.1163330078125, "learning_rate": 1.620706469707778e-07, "loss": 39.9242, "step": 463430 }, { "epoch": 0.9361781210987528, "grad_norm": 262.7649230957031, "learning_rate": 1.619825048888085e-07, "loss": 11.7672, "step": 463440 }, { "epoch": 0.9361983217314366, "grad_norm": 271.2383117675781, "learning_rate": 1.618943863867506e-07, "loss": 21.945, "step": 463450 }, { "epoch": 0.9362185223641204, "grad_norm": 10.14715576171875, "learning_rate": 1.6180629146503256e-07, "loss": 9.3233, "step": 463460 }, { "epoch": 0.9362387229968042, "grad_norm": 377.5271301269531, "learning_rate": 1.61718220124083e-07, "loss": 11.135, "step": 463470 }, { "epoch": 0.936258923629488, "grad_norm": 5.2541985511779785, "learning_rate": 1.6163017236433265e-07, "loss": 14.5229, "step": 463480 }, { "epoch": 0.9362791242621719, "grad_norm": 511.776123046875, "learning_rate": 1.6154214818621007e-07, "loss": 21.0427, "step": 463490 }, { "epoch": 0.9362993248948557, "grad_norm": 163.4555206298828, "learning_rate": 1.6145414759014433e-07, "loss": 13.9459, "step": 463500 }, { "epoch": 0.9363195255275395, "grad_norm": 158.45411682128906, "learning_rate": 1.6136617057656344e-07, "loss": 13.8923, "step": 463510 }, { "epoch": 0.9363397261602233, "grad_norm": 96.13873291015625, "learning_rate": 1.6127821714589763e-07, "loss": 14.0296, "step": 463520 }, { "epoch": 0.9363599267929071, "grad_norm": 518.1529541015625, "learning_rate": 1.6119028729857545e-07, "loss": 15.4953, "step": 463530 }, { "epoch": 0.936380127425591, "grad_norm": 210.54432678222656, "learning_rate": 1.6110238103502374e-07, "loss": 19.9439, "step": 463540 }, { "epoch": 0.9364003280582748, "grad_norm": 175.22804260253906, "learning_rate": 1.6101449835567273e-07, "loss": 11.3401, "step": 463550 }, { "epoch": 0.9364205286909586, "grad_norm": 559.84228515625, "learning_rate": 1.6092663926094987e-07, "loss": 21.6868, "step": 463560 }, { "epoch": 0.9364407293236424, "grad_norm": 321.38812255859375, "learning_rate": 1.6083880375128424e-07, "loss": 16.2458, "step": 463570 }, { "epoch": 0.9364609299563262, "grad_norm": 445.6077575683594, "learning_rate": 1.6075099182710274e-07, "loss": 33.7, "step": 463580 }, { "epoch": 0.9364811305890101, "grad_norm": 188.43954467773438, "learning_rate": 1.6066320348883448e-07, "loss": 21.0742, "step": 463590 }, { "epoch": 0.9365013312216939, "grad_norm": 136.55584716796875, "learning_rate": 1.6057543873690685e-07, "loss": 11.6383, "step": 463600 }, { "epoch": 0.9365215318543777, "grad_norm": 906.4600830078125, "learning_rate": 1.604876975717473e-07, "loss": 15.5584, "step": 463610 }, { "epoch": 0.9365417324870615, "grad_norm": 173.13731384277344, "learning_rate": 1.6039997999378388e-07, "loss": 9.4284, "step": 463620 }, { "epoch": 0.9365619331197453, "grad_norm": 169.66293334960938, "learning_rate": 1.603122860034434e-07, "loss": 17.2174, "step": 463630 }, { "epoch": 0.9365821337524292, "grad_norm": 116.82272338867188, "learning_rate": 1.6022461560115498e-07, "loss": 21.4119, "step": 463640 }, { "epoch": 0.936602334385113, "grad_norm": 421.8213806152344, "learning_rate": 1.6013696878734385e-07, "loss": 10.5557, "step": 463650 }, { "epoch": 0.9366225350177968, "grad_norm": 174.93968200683594, "learning_rate": 1.6004934556243857e-07, "loss": 8.0367, "step": 463660 }, { "epoch": 0.9366427356504806, "grad_norm": 351.3277587890625, "learning_rate": 1.5996174592686598e-07, "loss": 30.9874, "step": 463670 }, { "epoch": 0.9366629362831644, "grad_norm": 325.6501770019531, "learning_rate": 1.5987416988105188e-07, "loss": 27.6566, "step": 463680 }, { "epoch": 0.9366831369158483, "grad_norm": 802.6322021484375, "learning_rate": 1.5978661742542477e-07, "loss": 21.7901, "step": 463690 }, { "epoch": 0.936703337548532, "grad_norm": 363.5493469238281, "learning_rate": 1.596990885604105e-07, "loss": 13.6323, "step": 463700 }, { "epoch": 0.9367235381812158, "grad_norm": 299.0626220703125, "learning_rate": 1.596115832864359e-07, "loss": 28.5307, "step": 463710 }, { "epoch": 0.9367437388138996, "grad_norm": 273.3241882324219, "learning_rate": 1.5952410160392784e-07, "loss": 22.5999, "step": 463720 }, { "epoch": 0.9367639394465834, "grad_norm": 369.0726623535156, "learning_rate": 1.59436643513311e-07, "loss": 31.4092, "step": 463730 }, { "epoch": 0.9367841400792672, "grad_norm": 84.29007720947266, "learning_rate": 1.5934920901501395e-07, "loss": 13.0794, "step": 463740 }, { "epoch": 0.9368043407119511, "grad_norm": 335.17510986328125, "learning_rate": 1.5926179810946185e-07, "loss": 14.829, "step": 463750 }, { "epoch": 0.9368245413446349, "grad_norm": 62.5571403503418, "learning_rate": 1.5917441079707942e-07, "loss": 16.8753, "step": 463760 }, { "epoch": 0.9368447419773187, "grad_norm": 242.92271423339844, "learning_rate": 1.5908704707829458e-07, "loss": 12.437, "step": 463770 }, { "epoch": 0.9368649426100025, "grad_norm": 508.7433776855469, "learning_rate": 1.5899970695353262e-07, "loss": 19.0374, "step": 463780 }, { "epoch": 0.9368851432426863, "grad_norm": 277.3528747558594, "learning_rate": 1.5891239042321871e-07, "loss": 10.7622, "step": 463790 }, { "epoch": 0.9369053438753702, "grad_norm": 293.6339416503906, "learning_rate": 1.5882509748777809e-07, "loss": 23.3743, "step": 463800 }, { "epoch": 0.936925544508054, "grad_norm": 152.61813354492188, "learning_rate": 1.5873782814763762e-07, "loss": 39.8537, "step": 463810 }, { "epoch": 0.9369457451407378, "grad_norm": 100.76495361328125, "learning_rate": 1.586505824032214e-07, "loss": 14.3764, "step": 463820 }, { "epoch": 0.9369659457734216, "grad_norm": 400.2121887207031, "learning_rate": 1.5856336025495466e-07, "loss": 10.9528, "step": 463830 }, { "epoch": 0.9369861464061054, "grad_norm": 458.34588623046875, "learning_rate": 1.5847616170326318e-07, "loss": 17.7317, "step": 463840 }, { "epoch": 0.9370063470387893, "grad_norm": 370.8608093261719, "learning_rate": 1.5838898674857273e-07, "loss": 14.1121, "step": 463850 }, { "epoch": 0.9370265476714731, "grad_norm": 177.37461853027344, "learning_rate": 1.5830183539130574e-07, "loss": 10.8087, "step": 463860 }, { "epoch": 0.9370467483041569, "grad_norm": 310.3232116699219, "learning_rate": 1.582147076318885e-07, "loss": 13.7634, "step": 463870 }, { "epoch": 0.9370669489368407, "grad_norm": 125.4625244140625, "learning_rate": 1.581276034707463e-07, "loss": 18.1908, "step": 463880 }, { "epoch": 0.9370871495695245, "grad_norm": 279.0887756347656, "learning_rate": 1.5804052290830262e-07, "loss": 18.8361, "step": 463890 }, { "epoch": 0.9371073502022084, "grad_norm": 486.1850280761719, "learning_rate": 1.5795346594498162e-07, "loss": 15.2239, "step": 463900 }, { "epoch": 0.9371275508348922, "grad_norm": 183.5675811767578, "learning_rate": 1.5786643258120905e-07, "loss": 18.1985, "step": 463910 }, { "epoch": 0.937147751467576, "grad_norm": 20.34307861328125, "learning_rate": 1.5777942281740789e-07, "loss": 12.9008, "step": 463920 }, { "epoch": 0.9371679521002598, "grad_norm": 268.869384765625, "learning_rate": 1.5769243665400224e-07, "loss": 26.5671, "step": 463930 }, { "epoch": 0.9371881527329436, "grad_norm": 328.96832275390625, "learning_rate": 1.5760547409141626e-07, "loss": 17.6477, "step": 463940 }, { "epoch": 0.9372083533656275, "grad_norm": 154.60748291015625, "learning_rate": 1.5751853513007454e-07, "loss": 10.0067, "step": 463950 }, { "epoch": 0.9372285539983112, "grad_norm": 42.942100524902344, "learning_rate": 1.5743161977039954e-07, "loss": 16.669, "step": 463960 }, { "epoch": 0.937248754630995, "grad_norm": 225.9125213623047, "learning_rate": 1.5734472801281543e-07, "loss": 14.7296, "step": 463970 }, { "epoch": 0.9372689552636788, "grad_norm": 284.292236328125, "learning_rate": 1.5725785985774623e-07, "loss": 14.482, "step": 463980 }, { "epoch": 0.9372891558963626, "grad_norm": 266.5537109375, "learning_rate": 1.5717101530561497e-07, "loss": 10.6148, "step": 463990 }, { "epoch": 0.9373093565290465, "grad_norm": 174.10211181640625, "learning_rate": 1.5708419435684463e-07, "loss": 20.1838, "step": 464000 }, { "epoch": 0.9373295571617303, "grad_norm": 275.5843505859375, "learning_rate": 1.5699739701185878e-07, "loss": 42.5351, "step": 464010 }, { "epoch": 0.9373497577944141, "grad_norm": 432.7255554199219, "learning_rate": 1.5691062327107932e-07, "loss": 14.5928, "step": 464020 }, { "epoch": 0.9373699584270979, "grad_norm": 181.0866241455078, "learning_rate": 1.5682387313493086e-07, "loss": 16.4988, "step": 464030 }, { "epoch": 0.9373901590597817, "grad_norm": 326.6819152832031, "learning_rate": 1.5673714660383532e-07, "loss": 16.7856, "step": 464040 }, { "epoch": 0.9374103596924656, "grad_norm": 359.0847473144531, "learning_rate": 1.5665044367821513e-07, "loss": 17.2553, "step": 464050 }, { "epoch": 0.9374305603251494, "grad_norm": 386.4946594238281, "learning_rate": 1.5656376435849385e-07, "loss": 25.8802, "step": 464060 }, { "epoch": 0.9374507609578332, "grad_norm": 278.6641540527344, "learning_rate": 1.5647710864509336e-07, "loss": 18.7478, "step": 464070 }, { "epoch": 0.937470961590517, "grad_norm": 293.5640563964844, "learning_rate": 1.5639047653843554e-07, "loss": 14.4219, "step": 464080 }, { "epoch": 0.9374911622232008, "grad_norm": 114.81095123291016, "learning_rate": 1.563038680389428e-07, "loss": 9.2082, "step": 464090 }, { "epoch": 0.9375113628558847, "grad_norm": 50.59746551513672, "learning_rate": 1.5621728314703822e-07, "loss": 15.5744, "step": 464100 }, { "epoch": 0.9375315634885685, "grad_norm": 222.39903259277344, "learning_rate": 1.5613072186314304e-07, "loss": 14.7094, "step": 464110 }, { "epoch": 0.9375517641212523, "grad_norm": 506.1026916503906, "learning_rate": 1.560441841876792e-07, "loss": 24.3718, "step": 464120 }, { "epoch": 0.9375719647539361, "grad_norm": 248.27789306640625, "learning_rate": 1.5595767012106856e-07, "loss": 18.9075, "step": 464130 }, { "epoch": 0.9375921653866199, "grad_norm": 289.97283935546875, "learning_rate": 1.5587117966373244e-07, "loss": 9.895, "step": 464140 }, { "epoch": 0.9376123660193038, "grad_norm": 337.35882568359375, "learning_rate": 1.5578471281609274e-07, "loss": 18.4055, "step": 464150 }, { "epoch": 0.9376325666519876, "grad_norm": 252.82054138183594, "learning_rate": 1.5569826957857027e-07, "loss": 19.524, "step": 464160 }, { "epoch": 0.9376527672846714, "grad_norm": 144.07119750976562, "learning_rate": 1.556118499515885e-07, "loss": 18.7757, "step": 464170 }, { "epoch": 0.9376729679173552, "grad_norm": 427.6900634765625, "learning_rate": 1.555254539355655e-07, "loss": 11.2591, "step": 464180 }, { "epoch": 0.937693168550039, "grad_norm": 444.0274658203125, "learning_rate": 1.5543908153092424e-07, "loss": 13.1624, "step": 464190 }, { "epoch": 0.9377133691827229, "grad_norm": 394.1073913574219, "learning_rate": 1.553527327380855e-07, "loss": 27.6414, "step": 464200 }, { "epoch": 0.9377335698154066, "grad_norm": 256.9996337890625, "learning_rate": 1.5526640755747003e-07, "loss": 20.7622, "step": 464210 }, { "epoch": 0.9377537704480904, "grad_norm": 403.0283508300781, "learning_rate": 1.5518010598949807e-07, "loss": 16.7295, "step": 464220 }, { "epoch": 0.9377739710807742, "grad_norm": 288.77215576171875, "learning_rate": 1.5509382803459149e-07, "loss": 18.791, "step": 464230 }, { "epoch": 0.937794171713458, "grad_norm": 261.7998352050781, "learning_rate": 1.5500757369316888e-07, "loss": 24.8994, "step": 464240 }, { "epoch": 0.9378143723461418, "grad_norm": 293.237548828125, "learning_rate": 1.5492134296565264e-07, "loss": 57.5289, "step": 464250 }, { "epoch": 0.9378345729788257, "grad_norm": 161.0277557373047, "learning_rate": 1.5483513585246135e-07, "loss": 18.1571, "step": 464260 }, { "epoch": 0.9378547736115095, "grad_norm": 505.86834716796875, "learning_rate": 1.5474895235401688e-07, "loss": 21.8036, "step": 464270 }, { "epoch": 0.9378749742441933, "grad_norm": 372.587646484375, "learning_rate": 1.546627924707378e-07, "loss": 19.0715, "step": 464280 }, { "epoch": 0.9378951748768771, "grad_norm": 167.16751098632812, "learning_rate": 1.545766562030443e-07, "loss": 19.0516, "step": 464290 }, { "epoch": 0.937915375509561, "grad_norm": 272.3799743652344, "learning_rate": 1.5449054355135718e-07, "loss": 18.8585, "step": 464300 }, { "epoch": 0.9379355761422448, "grad_norm": 185.17149353027344, "learning_rate": 1.54404454516095e-07, "loss": 21.4619, "step": 464310 }, { "epoch": 0.9379557767749286, "grad_norm": 423.4834289550781, "learning_rate": 1.5431838909767793e-07, "loss": 18.7611, "step": 464320 }, { "epoch": 0.9379759774076124, "grad_norm": 219.85989379882812, "learning_rate": 1.542323472965257e-07, "loss": 15.396, "step": 464330 }, { "epoch": 0.9379961780402962, "grad_norm": 275.00982666015625, "learning_rate": 1.5414632911305683e-07, "loss": 13.2177, "step": 464340 }, { "epoch": 0.93801637867298, "grad_norm": 301.32568359375, "learning_rate": 1.5406033454769154e-07, "loss": 10.5237, "step": 464350 }, { "epoch": 0.9380365793056639, "grad_norm": 237.79751586914062, "learning_rate": 1.5397436360084784e-07, "loss": 17.7063, "step": 464360 }, { "epoch": 0.9380567799383477, "grad_norm": 236.28114318847656, "learning_rate": 1.5388841627294536e-07, "loss": 20.8824, "step": 464370 }, { "epoch": 0.9380769805710315, "grad_norm": 563.0267944335938, "learning_rate": 1.5380249256440272e-07, "loss": 22.0628, "step": 464380 }, { "epoch": 0.9380971812037153, "grad_norm": 365.0470886230469, "learning_rate": 1.5371659247564063e-07, "loss": 19.0374, "step": 464390 }, { "epoch": 0.9381173818363991, "grad_norm": 373.4736328125, "learning_rate": 1.5363071600707435e-07, "loss": 15.6116, "step": 464400 }, { "epoch": 0.938137582469083, "grad_norm": 231.653076171875, "learning_rate": 1.5354486315912408e-07, "loss": 18.9384, "step": 464410 }, { "epoch": 0.9381577831017668, "grad_norm": 477.3868713378906, "learning_rate": 1.534590339322095e-07, "loss": 12.3416, "step": 464420 }, { "epoch": 0.9381779837344506, "grad_norm": 266.5515441894531, "learning_rate": 1.533732283267475e-07, "loss": 24.4451, "step": 464430 }, { "epoch": 0.9381981843671344, "grad_norm": 221.8384246826172, "learning_rate": 1.532874463431555e-07, "loss": 9.7771, "step": 464440 }, { "epoch": 0.9382183849998182, "grad_norm": 225.82688903808594, "learning_rate": 1.532016879818532e-07, "loss": 19.1567, "step": 464450 }, { "epoch": 0.9382385856325021, "grad_norm": 46.60588455200195, "learning_rate": 1.5311595324325912e-07, "loss": 13.5228, "step": 464460 }, { "epoch": 0.9382587862651858, "grad_norm": 2069.479248046875, "learning_rate": 1.5303024212778905e-07, "loss": 27.4372, "step": 464470 }, { "epoch": 0.9382789868978696, "grad_norm": 208.14450073242188, "learning_rate": 1.5294455463586157e-07, "loss": 12.1534, "step": 464480 }, { "epoch": 0.9382991875305534, "grad_norm": 301.5461120605469, "learning_rate": 1.528588907678946e-07, "loss": 21.3321, "step": 464490 }, { "epoch": 0.9383193881632372, "grad_norm": 233.73643493652344, "learning_rate": 1.5277325052430569e-07, "loss": 8.8772, "step": 464500 }, { "epoch": 0.938339588795921, "grad_norm": 179.8396453857422, "learning_rate": 1.5268763390551167e-07, "loss": 8.8738, "step": 464510 }, { "epoch": 0.9383597894286049, "grad_norm": 608.1994018554688, "learning_rate": 1.526020409119311e-07, "loss": 15.8383, "step": 464520 }, { "epoch": 0.9383799900612887, "grad_norm": 483.1649169921875, "learning_rate": 1.5251647154397975e-07, "loss": 18.2113, "step": 464530 }, { "epoch": 0.9384001906939725, "grad_norm": 331.9686279296875, "learning_rate": 1.5243092580207507e-07, "loss": 21.4486, "step": 464540 }, { "epoch": 0.9384203913266563, "grad_norm": 156.11338806152344, "learning_rate": 1.5234540368663343e-07, "loss": 13.561, "step": 464550 }, { "epoch": 0.9384405919593402, "grad_norm": 255.95281982421875, "learning_rate": 1.5225990519807332e-07, "loss": 12.507, "step": 464560 }, { "epoch": 0.938460792592024, "grad_norm": 436.13861083984375, "learning_rate": 1.5217443033681058e-07, "loss": 12.8422, "step": 464570 }, { "epoch": 0.9384809932247078, "grad_norm": 261.2200927734375, "learning_rate": 1.5208897910326092e-07, "loss": 14.746, "step": 464580 }, { "epoch": 0.9385011938573916, "grad_norm": 217.70619201660156, "learning_rate": 1.520035514978424e-07, "loss": 15.9952, "step": 464590 }, { "epoch": 0.9385213944900754, "grad_norm": 377.078369140625, "learning_rate": 1.5191814752097024e-07, "loss": 10.0508, "step": 464600 }, { "epoch": 0.9385415951227593, "grad_norm": 389.1170654296875, "learning_rate": 1.5183276717306072e-07, "loss": 15.1369, "step": 464610 }, { "epoch": 0.9385617957554431, "grad_norm": 370.8136291503906, "learning_rate": 1.517474104545308e-07, "loss": 11.9117, "step": 464620 }, { "epoch": 0.9385819963881269, "grad_norm": 481.1498718261719, "learning_rate": 1.5166207736579564e-07, "loss": 22.6878, "step": 464630 }, { "epoch": 0.9386021970208107, "grad_norm": 468.8928527832031, "learning_rate": 1.515767679072716e-07, "loss": 12.4588, "step": 464640 }, { "epoch": 0.9386223976534945, "grad_norm": 378.5009460449219, "learning_rate": 1.5149148207937447e-07, "loss": 20.1811, "step": 464650 }, { "epoch": 0.9386425982861784, "grad_norm": 292.85101318359375, "learning_rate": 1.5140621988251947e-07, "loss": 5.6071, "step": 464660 }, { "epoch": 0.9386627989188622, "grad_norm": 363.9794006347656, "learning_rate": 1.513209813171229e-07, "loss": 16.2339, "step": 464670 }, { "epoch": 0.938682999551546, "grad_norm": 300.9176025390625, "learning_rate": 1.5123576638360004e-07, "loss": 15.4974, "step": 464680 }, { "epoch": 0.9387032001842298, "grad_norm": 146.10968017578125, "learning_rate": 1.5115057508236498e-07, "loss": 14.3807, "step": 464690 }, { "epoch": 0.9387234008169136, "grad_norm": 776.8226318359375, "learning_rate": 1.5106540741383402e-07, "loss": 25.4167, "step": 464700 }, { "epoch": 0.9387436014495975, "grad_norm": 110.0594482421875, "learning_rate": 1.5098026337842297e-07, "loss": 24.875, "step": 464710 }, { "epoch": 0.9387638020822812, "grad_norm": 293.3554382324219, "learning_rate": 1.5089514297654594e-07, "loss": 20.56, "step": 464720 }, { "epoch": 0.938784002714965, "grad_norm": 233.24440002441406, "learning_rate": 1.5081004620861706e-07, "loss": 18.238, "step": 464730 }, { "epoch": 0.9388042033476488, "grad_norm": 336.6822204589844, "learning_rate": 1.5072497307505263e-07, "loss": 16.5808, "step": 464740 }, { "epoch": 0.9388244039803326, "grad_norm": 293.6535339355469, "learning_rate": 1.5063992357626623e-07, "loss": 34.2862, "step": 464750 }, { "epoch": 0.9388446046130164, "grad_norm": 440.262939453125, "learning_rate": 1.5055489771267252e-07, "loss": 23.0098, "step": 464760 }, { "epoch": 0.9388648052457003, "grad_norm": 347.2552185058594, "learning_rate": 1.5046989548468616e-07, "loss": 15.1431, "step": 464770 }, { "epoch": 0.9388850058783841, "grad_norm": 226.5079803466797, "learning_rate": 1.503849168927224e-07, "loss": 24.5541, "step": 464780 }, { "epoch": 0.9389052065110679, "grad_norm": 405.8617248535156, "learning_rate": 1.502999619371931e-07, "loss": 20.6261, "step": 464790 }, { "epoch": 0.9389254071437517, "grad_norm": 268.3782958984375, "learning_rate": 1.502150306185135e-07, "loss": 17.145, "step": 464800 }, { "epoch": 0.9389456077764355, "grad_norm": 188.48545837402344, "learning_rate": 1.5013012293709828e-07, "loss": 10.6227, "step": 464810 }, { "epoch": 0.9389658084091194, "grad_norm": 139.31396484375, "learning_rate": 1.5004523889336042e-07, "loss": 20.7047, "step": 464820 }, { "epoch": 0.9389860090418032, "grad_norm": 144.83143615722656, "learning_rate": 1.499603784877135e-07, "loss": 11.3791, "step": 464830 }, { "epoch": 0.939006209674487, "grad_norm": 421.0755920410156, "learning_rate": 1.4987554172057216e-07, "loss": 14.0488, "step": 464840 }, { "epoch": 0.9390264103071708, "grad_norm": 437.9329528808594, "learning_rate": 1.497907285923489e-07, "loss": 16.8727, "step": 464850 }, { "epoch": 0.9390466109398546, "grad_norm": 100.489501953125, "learning_rate": 1.4970593910345665e-07, "loss": 9.158, "step": 464860 }, { "epoch": 0.9390668115725385, "grad_norm": 272.7707824707031, "learning_rate": 1.4962117325431013e-07, "loss": 23.773, "step": 464870 }, { "epoch": 0.9390870122052223, "grad_norm": 102.82606506347656, "learning_rate": 1.495364310453218e-07, "loss": 16.2965, "step": 464880 }, { "epoch": 0.9391072128379061, "grad_norm": 338.75128173828125, "learning_rate": 1.494517124769046e-07, "loss": 10.7134, "step": 464890 }, { "epoch": 0.9391274134705899, "grad_norm": 203.58189392089844, "learning_rate": 1.4936701754947104e-07, "loss": 11.8365, "step": 464900 }, { "epoch": 0.9391476141032737, "grad_norm": 251.21974182128906, "learning_rate": 1.4928234626343464e-07, "loss": 8.4952, "step": 464910 }, { "epoch": 0.9391678147359576, "grad_norm": 242.14511108398438, "learning_rate": 1.4919769861920785e-07, "loss": 13.1516, "step": 464920 }, { "epoch": 0.9391880153686414, "grad_norm": 219.99034118652344, "learning_rate": 1.491130746172026e-07, "loss": 18.0917, "step": 464930 }, { "epoch": 0.9392082160013252, "grad_norm": 391.0244445800781, "learning_rate": 1.490284742578324e-07, "loss": 32.7682, "step": 464940 }, { "epoch": 0.939228416634009, "grad_norm": 263.4881896972656, "learning_rate": 1.4894389754150862e-07, "loss": 30.4799, "step": 464950 }, { "epoch": 0.9392486172666928, "grad_norm": 242.61471557617188, "learning_rate": 1.4885934446864425e-07, "loss": 17.89, "step": 464960 }, { "epoch": 0.9392688178993767, "grad_norm": 133.09295654296875, "learning_rate": 1.487748150396512e-07, "loss": 14.5193, "step": 464970 }, { "epoch": 0.9392890185320604, "grad_norm": 109.19881439208984, "learning_rate": 1.4869030925494077e-07, "loss": 22.463, "step": 464980 }, { "epoch": 0.9393092191647442, "grad_norm": 387.658203125, "learning_rate": 1.4860582711492544e-07, "loss": 23.5541, "step": 464990 }, { "epoch": 0.939329419797428, "grad_norm": 127.01421356201172, "learning_rate": 1.4852136862001766e-07, "loss": 26.0863, "step": 465000 }, { "epoch": 0.9393496204301118, "grad_norm": 248.5615234375, "learning_rate": 1.4843693377062818e-07, "loss": 17.2345, "step": 465010 }, { "epoch": 0.9393698210627957, "grad_norm": 329.6812744140625, "learning_rate": 1.483525225671678e-07, "loss": 21.7594, "step": 465020 }, { "epoch": 0.9393900216954795, "grad_norm": 66.26323699951172, "learning_rate": 1.4826813501004954e-07, "loss": 15.4191, "step": 465030 }, { "epoch": 0.9394102223281633, "grad_norm": 622.2844848632812, "learning_rate": 1.4818377109968417e-07, "loss": 19.3583, "step": 465040 }, { "epoch": 0.9394304229608471, "grad_norm": 368.30633544921875, "learning_rate": 1.4809943083648194e-07, "loss": 16.1753, "step": 465050 }, { "epoch": 0.9394506235935309, "grad_norm": 373.9842529296875, "learning_rate": 1.480151142208547e-07, "loss": 13.5912, "step": 465060 }, { "epoch": 0.9394708242262148, "grad_norm": 161.87060546875, "learning_rate": 1.4793082125321435e-07, "loss": 21.0955, "step": 465070 }, { "epoch": 0.9394910248588986, "grad_norm": 178.7263946533203, "learning_rate": 1.4784655193396947e-07, "loss": 15.2693, "step": 465080 }, { "epoch": 0.9395112254915824, "grad_norm": 140.8055419921875, "learning_rate": 1.4776230626353193e-07, "loss": 16.8984, "step": 465090 }, { "epoch": 0.9395314261242662, "grad_norm": 252.18603515625, "learning_rate": 1.4767808424231312e-07, "loss": 12.0256, "step": 465100 }, { "epoch": 0.93955162675695, "grad_norm": 61.71898651123047, "learning_rate": 1.4759388587072266e-07, "loss": 19.4285, "step": 465110 }, { "epoch": 0.9395718273896339, "grad_norm": 159.76754760742188, "learning_rate": 1.475097111491708e-07, "loss": 12.8262, "step": 465120 }, { "epoch": 0.9395920280223177, "grad_norm": 176.18861389160156, "learning_rate": 1.474255600780683e-07, "loss": 13.4194, "step": 465130 }, { "epoch": 0.9396122286550015, "grad_norm": 276.20440673828125, "learning_rate": 1.473414326578254e-07, "loss": 9.6049, "step": 465140 }, { "epoch": 0.9396324292876853, "grad_norm": 652.6522216796875, "learning_rate": 1.4725732888885126e-07, "loss": 20.6648, "step": 465150 }, { "epoch": 0.9396526299203691, "grad_norm": 245.8262939453125, "learning_rate": 1.4717324877155603e-07, "loss": 7.9968, "step": 465160 }, { "epoch": 0.939672830553053, "grad_norm": 169.50160217285156, "learning_rate": 1.4708919230635054e-07, "loss": 13.6993, "step": 465170 }, { "epoch": 0.9396930311857368, "grad_norm": 307.2168273925781, "learning_rate": 1.4700515949364337e-07, "loss": 29.3474, "step": 465180 }, { "epoch": 0.9397132318184206, "grad_norm": 309.11541748046875, "learning_rate": 1.4692115033384468e-07, "loss": 24.8049, "step": 465190 }, { "epoch": 0.9397334324511044, "grad_norm": 299.8684997558594, "learning_rate": 1.4683716482736364e-07, "loss": 20.7849, "step": 465200 }, { "epoch": 0.9397536330837882, "grad_norm": 343.4499816894531, "learning_rate": 1.4675320297460994e-07, "loss": 22.834, "step": 465210 }, { "epoch": 0.939773833716472, "grad_norm": 0.3346022665500641, "learning_rate": 1.4666926477599153e-07, "loss": 8.4834, "step": 465220 }, { "epoch": 0.9397940343491558, "grad_norm": 1.9308102130889893, "learning_rate": 1.4658535023191922e-07, "loss": 20.9597, "step": 465230 }, { "epoch": 0.9398142349818396, "grad_norm": 197.47598266601562, "learning_rate": 1.4650145934280103e-07, "loss": 27.1396, "step": 465240 }, { "epoch": 0.9398344356145234, "grad_norm": 179.12158203125, "learning_rate": 1.4641759210904605e-07, "loss": 15.4021, "step": 465250 }, { "epoch": 0.9398546362472072, "grad_norm": 383.4201354980469, "learning_rate": 1.463337485310634e-07, "loss": 18.0885, "step": 465260 }, { "epoch": 0.939874836879891, "grad_norm": 3.454732656478882, "learning_rate": 1.4624992860926112e-07, "loss": 11.9672, "step": 465270 }, { "epoch": 0.9398950375125749, "grad_norm": 1308.201904296875, "learning_rate": 1.461661323440483e-07, "loss": 35.694, "step": 465280 }, { "epoch": 0.9399152381452587, "grad_norm": 179.21511840820312, "learning_rate": 1.4608235973583296e-07, "loss": 14.4515, "step": 465290 }, { "epoch": 0.9399354387779425, "grad_norm": 166.96446228027344, "learning_rate": 1.459986107850231e-07, "loss": 35.6755, "step": 465300 }, { "epoch": 0.9399556394106263, "grad_norm": 2.7305474281311035, "learning_rate": 1.4591488549202725e-07, "loss": 10.5408, "step": 465310 }, { "epoch": 0.9399758400433101, "grad_norm": 326.416015625, "learning_rate": 1.4583118385725402e-07, "loss": 10.2123, "step": 465320 }, { "epoch": 0.939996040675994, "grad_norm": 90.38726043701172, "learning_rate": 1.4574750588111085e-07, "loss": 10.5752, "step": 465330 }, { "epoch": 0.9400162413086778, "grad_norm": 33.91945266723633, "learning_rate": 1.4566385156400463e-07, "loss": 18.0592, "step": 465340 }, { "epoch": 0.9400364419413616, "grad_norm": 62.38507843017578, "learning_rate": 1.4558022090634504e-07, "loss": 9.2965, "step": 465350 }, { "epoch": 0.9400566425740454, "grad_norm": 201.84622192382812, "learning_rate": 1.4549661390853897e-07, "loss": 25.4784, "step": 465360 }, { "epoch": 0.9400768432067292, "grad_norm": 207.21717834472656, "learning_rate": 1.4541303057099275e-07, "loss": 11.7948, "step": 465370 }, { "epoch": 0.9400970438394131, "grad_norm": 310.4132995605469, "learning_rate": 1.4532947089411443e-07, "loss": 12.5109, "step": 465380 }, { "epoch": 0.9401172444720969, "grad_norm": 71.95929718017578, "learning_rate": 1.452459348783125e-07, "loss": 16.5754, "step": 465390 }, { "epoch": 0.9401374451047807, "grad_norm": 178.69338989257812, "learning_rate": 1.4516242252399227e-07, "loss": 21.9241, "step": 465400 }, { "epoch": 0.9401576457374645, "grad_norm": 635.33935546875, "learning_rate": 1.450789338315617e-07, "loss": 9.9826, "step": 465410 }, { "epoch": 0.9401778463701483, "grad_norm": 265.636962890625, "learning_rate": 1.4499546880142823e-07, "loss": 19.409, "step": 465420 }, { "epoch": 0.9401980470028322, "grad_norm": 250.55796813964844, "learning_rate": 1.4491202743399767e-07, "loss": 17.1007, "step": 465430 }, { "epoch": 0.940218247635516, "grad_norm": 404.17333984375, "learning_rate": 1.448286097296764e-07, "loss": 22.9604, "step": 465440 }, { "epoch": 0.9402384482681998, "grad_norm": 410.077392578125, "learning_rate": 1.4474521568887178e-07, "loss": 20.4779, "step": 465450 }, { "epoch": 0.9402586489008836, "grad_norm": 302.5479736328125, "learning_rate": 1.4466184531199135e-07, "loss": 10.2103, "step": 465460 }, { "epoch": 0.9402788495335674, "grad_norm": 696.5477905273438, "learning_rate": 1.4457849859943862e-07, "loss": 20.1254, "step": 465470 }, { "epoch": 0.9402990501662513, "grad_norm": 176.4188232421875, "learning_rate": 1.4449517555162163e-07, "loss": 11.1525, "step": 465480 }, { "epoch": 0.940319250798935, "grad_norm": 269.1247253417969, "learning_rate": 1.4441187616894724e-07, "loss": 17.4083, "step": 465490 }, { "epoch": 0.9403394514316188, "grad_norm": 181.580322265625, "learning_rate": 1.4432860045182019e-07, "loss": 20.2364, "step": 465500 }, { "epoch": 0.9403596520643026, "grad_norm": 139.49365234375, "learning_rate": 1.4424534840064563e-07, "loss": 16.2173, "step": 465510 }, { "epoch": 0.9403798526969864, "grad_norm": 154.74514770507812, "learning_rate": 1.4416212001583163e-07, "loss": 13.2751, "step": 465520 }, { "epoch": 0.9404000533296702, "grad_norm": 177.03106689453125, "learning_rate": 1.4407891529778172e-07, "loss": 11.1822, "step": 465530 }, { "epoch": 0.9404202539623541, "grad_norm": 308.2560729980469, "learning_rate": 1.4399573424690227e-07, "loss": 21.6448, "step": 465540 }, { "epoch": 0.9404404545950379, "grad_norm": 285.0311279296875, "learning_rate": 1.4391257686359906e-07, "loss": 21.1814, "step": 465550 }, { "epoch": 0.9404606552277217, "grad_norm": 153.18069458007812, "learning_rate": 1.438294431482762e-07, "loss": 29.6529, "step": 465560 }, { "epoch": 0.9404808558604055, "grad_norm": 357.177978515625, "learning_rate": 1.4374633310134057e-07, "loss": 16.2107, "step": 465570 }, { "epoch": 0.9405010564930893, "grad_norm": 263.331787109375, "learning_rate": 1.4366324672319575e-07, "loss": 27.4075, "step": 465580 }, { "epoch": 0.9405212571257732, "grad_norm": 163.2997283935547, "learning_rate": 1.43580184014247e-07, "loss": 17.678, "step": 465590 }, { "epoch": 0.940541457758457, "grad_norm": 160.16209411621094, "learning_rate": 1.4349714497490009e-07, "loss": 12.6306, "step": 465600 }, { "epoch": 0.9405616583911408, "grad_norm": 162.60272216796875, "learning_rate": 1.4341412960555855e-07, "loss": 13.1878, "step": 465610 }, { "epoch": 0.9405818590238246, "grad_norm": 389.0608825683594, "learning_rate": 1.4333113790662822e-07, "loss": 19.1214, "step": 465620 }, { "epoch": 0.9406020596565084, "grad_norm": 160.79275512695312, "learning_rate": 1.432481698785121e-07, "loss": 4.4156, "step": 465630 }, { "epoch": 0.9406222602891923, "grad_norm": 19.751644134521484, "learning_rate": 1.4316522552161593e-07, "loss": 17.5609, "step": 465640 }, { "epoch": 0.9406424609218761, "grad_norm": 6.37514591217041, "learning_rate": 1.4308230483634334e-07, "loss": 27.6134, "step": 465650 }, { "epoch": 0.9406626615545599, "grad_norm": 81.94923400878906, "learning_rate": 1.4299940782309785e-07, "loss": 11.7039, "step": 465660 }, { "epoch": 0.9406828621872437, "grad_norm": 514.7180786132812, "learning_rate": 1.4291653448228416e-07, "loss": 13.1153, "step": 465670 }, { "epoch": 0.9407030628199275, "grad_norm": 116.28053283691406, "learning_rate": 1.4283368481430747e-07, "loss": 20.3335, "step": 465680 }, { "epoch": 0.9407232634526114, "grad_norm": 87.40438079833984, "learning_rate": 1.427508588195692e-07, "loss": 15.2348, "step": 465690 }, { "epoch": 0.9407434640852952, "grad_norm": 460.5, "learning_rate": 1.4266805649847392e-07, "loss": 15.3604, "step": 465700 }, { "epoch": 0.940763664717979, "grad_norm": 351.30767822265625, "learning_rate": 1.425852778514264e-07, "loss": 12.1725, "step": 465710 }, { "epoch": 0.9407838653506628, "grad_norm": 393.6469421386719, "learning_rate": 1.4250252287882848e-07, "loss": 16.5886, "step": 465720 }, { "epoch": 0.9408040659833466, "grad_norm": 252.6640167236328, "learning_rate": 1.4241979158108433e-07, "loss": 16.6276, "step": 465730 }, { "epoch": 0.9408242666160305, "grad_norm": 15.118282318115234, "learning_rate": 1.4233708395859692e-07, "loss": 25.3619, "step": 465740 }, { "epoch": 0.9408444672487142, "grad_norm": 140.9523162841797, "learning_rate": 1.4225440001176983e-07, "loss": 15.3279, "step": 465750 }, { "epoch": 0.940864667881398, "grad_norm": 321.87371826171875, "learning_rate": 1.421717397410044e-07, "loss": 25.7802, "step": 465760 }, { "epoch": 0.9408848685140818, "grad_norm": 424.4866027832031, "learning_rate": 1.420891031467053e-07, "loss": 16.5313, "step": 465770 }, { "epoch": 0.9409050691467656, "grad_norm": 162.17381286621094, "learning_rate": 1.4200649022927505e-07, "loss": 12.1579, "step": 465780 }, { "epoch": 0.9409252697794495, "grad_norm": 91.25312805175781, "learning_rate": 1.41923900989116e-07, "loss": 16.8099, "step": 465790 }, { "epoch": 0.9409454704121333, "grad_norm": 355.78717041015625, "learning_rate": 1.4184133542663014e-07, "loss": 16.601, "step": 465800 }, { "epoch": 0.9409656710448171, "grad_norm": 181.00601196289062, "learning_rate": 1.41758793542221e-07, "loss": 11.673, "step": 465810 }, { "epoch": 0.9409858716775009, "grad_norm": 336.8885498046875, "learning_rate": 1.4167627533628992e-07, "loss": 18.4881, "step": 465820 }, { "epoch": 0.9410060723101847, "grad_norm": 1.1595476865768433, "learning_rate": 1.4159378080923936e-07, "loss": 23.6533, "step": 465830 }, { "epoch": 0.9410262729428686, "grad_norm": 322.81280517578125, "learning_rate": 1.4151130996147177e-07, "loss": 20.7972, "step": 465840 }, { "epoch": 0.9410464735755524, "grad_norm": 353.97314453125, "learning_rate": 1.4142886279338852e-07, "loss": 27.6655, "step": 465850 }, { "epoch": 0.9410666742082362, "grad_norm": 183.62677001953125, "learning_rate": 1.4134643930539204e-07, "loss": 13.2374, "step": 465860 }, { "epoch": 0.94108687484092, "grad_norm": 163.1032257080078, "learning_rate": 1.4126403949788369e-07, "loss": 14.9478, "step": 465870 }, { "epoch": 0.9411070754736038, "grad_norm": 50.128055572509766, "learning_rate": 1.4118166337126428e-07, "loss": 22.1623, "step": 465880 }, { "epoch": 0.9411272761062877, "grad_norm": 89.6222915649414, "learning_rate": 1.4109931092593732e-07, "loss": 23.4234, "step": 465890 }, { "epoch": 0.9411474767389715, "grad_norm": 417.8365478515625, "learning_rate": 1.4101698216230254e-07, "loss": 17.3314, "step": 465900 }, { "epoch": 0.9411676773716553, "grad_norm": 163.18458557128906, "learning_rate": 1.4093467708076126e-07, "loss": 15.2798, "step": 465910 }, { "epoch": 0.9411878780043391, "grad_norm": 460.31256103515625, "learning_rate": 1.4085239568171483e-07, "loss": 13.5431, "step": 465920 }, { "epoch": 0.9412080786370229, "grad_norm": 4.70221471786499, "learning_rate": 1.4077013796556515e-07, "loss": 26.0239, "step": 465930 }, { "epoch": 0.9412282792697068, "grad_norm": 166.77098083496094, "learning_rate": 1.406879039327125e-07, "loss": 19.1998, "step": 465940 }, { "epoch": 0.9412484799023906, "grad_norm": 72.78886413574219, "learning_rate": 1.4060569358355703e-07, "loss": 27.4905, "step": 465950 }, { "epoch": 0.9412686805350744, "grad_norm": 225.41270446777344, "learning_rate": 1.405235069185007e-07, "loss": 14.072, "step": 465960 }, { "epoch": 0.9412888811677582, "grad_norm": 309.4897155761719, "learning_rate": 1.4044134393794373e-07, "loss": 16.9801, "step": 465970 }, { "epoch": 0.941309081800442, "grad_norm": 248.46646118164062, "learning_rate": 1.4035920464228525e-07, "loss": 9.1789, "step": 465980 }, { "epoch": 0.9413292824331259, "grad_norm": 129.87884521484375, "learning_rate": 1.4027708903192662e-07, "loss": 24.2261, "step": 465990 }, { "epoch": 0.9413494830658096, "grad_norm": 291.889404296875, "learning_rate": 1.4019499710726913e-07, "loss": 16.4706, "step": 466000 }, { "epoch": 0.9413696836984934, "grad_norm": 137.46664428710938, "learning_rate": 1.4011292886871086e-07, "loss": 13.0814, "step": 466010 }, { "epoch": 0.9413898843311772, "grad_norm": 441.6543884277344, "learning_rate": 1.4003088431665312e-07, "loss": 10.4695, "step": 466020 }, { "epoch": 0.941410084963861, "grad_norm": 383.4626770019531, "learning_rate": 1.3994886345149504e-07, "loss": 16.5051, "step": 466030 }, { "epoch": 0.9414302855965448, "grad_norm": 575.4952392578125, "learning_rate": 1.3986686627363744e-07, "loss": 31.3564, "step": 466040 }, { "epoch": 0.9414504862292287, "grad_norm": 36.06890869140625, "learning_rate": 1.3978489278347883e-07, "loss": 26.1608, "step": 466050 }, { "epoch": 0.9414706868619125, "grad_norm": 257.4670104980469, "learning_rate": 1.397029429814184e-07, "loss": 22.7073, "step": 466060 }, { "epoch": 0.9414908874945963, "grad_norm": 98.51122283935547, "learning_rate": 1.39621016867858e-07, "loss": 18.6808, "step": 466070 }, { "epoch": 0.9415110881272801, "grad_norm": 287.9281921386719, "learning_rate": 1.39539114443194e-07, "loss": 24.4925, "step": 466080 }, { "epoch": 0.941531288759964, "grad_norm": 357.3394775390625, "learning_rate": 1.3945723570782722e-07, "loss": 20.3994, "step": 466090 }, { "epoch": 0.9415514893926478, "grad_norm": 101.79381561279297, "learning_rate": 1.3937538066215672e-07, "loss": 21.3881, "step": 466100 }, { "epoch": 0.9415716900253316, "grad_norm": 207.9473114013672, "learning_rate": 1.3929354930658112e-07, "loss": 12.0363, "step": 466110 }, { "epoch": 0.9415918906580154, "grad_norm": 275.6166687011719, "learning_rate": 1.3921174164149842e-07, "loss": 14.4197, "step": 466120 }, { "epoch": 0.9416120912906992, "grad_norm": 367.0701599121094, "learning_rate": 1.3912995766730887e-07, "loss": 9.1168, "step": 466130 }, { "epoch": 0.941632291923383, "grad_norm": 118.0570297241211, "learning_rate": 1.3904819738441043e-07, "loss": 26.8702, "step": 466140 }, { "epoch": 0.9416524925560669, "grad_norm": 489.6506652832031, "learning_rate": 1.3896646079320064e-07, "loss": 19.6159, "step": 466150 }, { "epoch": 0.9416726931887507, "grad_norm": 198.47056579589844, "learning_rate": 1.388847478940797e-07, "loss": 19.9269, "step": 466160 }, { "epoch": 0.9416928938214345, "grad_norm": 172.43580627441406, "learning_rate": 1.3880305868744392e-07, "loss": 19.4838, "step": 466170 }, { "epoch": 0.9417130944541183, "grad_norm": 269.263916015625, "learning_rate": 1.3872139317369304e-07, "loss": 9.9648, "step": 466180 }, { "epoch": 0.9417332950868021, "grad_norm": 394.6880798339844, "learning_rate": 1.3863975135322505e-07, "loss": 13.4915, "step": 466190 }, { "epoch": 0.941753495719486, "grad_norm": 137.1290740966797, "learning_rate": 1.385581332264363e-07, "loss": 15.7726, "step": 466200 }, { "epoch": 0.9417736963521698, "grad_norm": 113.25942993164062, "learning_rate": 1.3847653879372646e-07, "loss": 19.0675, "step": 466210 }, { "epoch": 0.9417938969848536, "grad_norm": 259.3558654785156, "learning_rate": 1.3839496805549136e-07, "loss": 14.4658, "step": 466220 }, { "epoch": 0.9418140976175374, "grad_norm": 315.2320861816406, "learning_rate": 1.383134210121301e-07, "loss": 12.7709, "step": 466230 }, { "epoch": 0.9418342982502212, "grad_norm": 163.65098571777344, "learning_rate": 1.3823189766403954e-07, "loss": 21.0497, "step": 466240 }, { "epoch": 0.9418544988829051, "grad_norm": 364.7218322753906, "learning_rate": 1.3815039801161723e-07, "loss": 14.7508, "step": 466250 }, { "epoch": 0.9418746995155888, "grad_norm": 420.7170715332031, "learning_rate": 1.3806892205526e-07, "loss": 12.4241, "step": 466260 }, { "epoch": 0.9418949001482726, "grad_norm": 78.01525115966797, "learning_rate": 1.3798746979536482e-07, "loss": 20.511, "step": 466270 }, { "epoch": 0.9419151007809564, "grad_norm": 251.70968627929688, "learning_rate": 1.3790604123232966e-07, "loss": 11.4409, "step": 466280 }, { "epoch": 0.9419353014136402, "grad_norm": 242.22872924804688, "learning_rate": 1.3782463636655087e-07, "loss": 23.0007, "step": 466290 }, { "epoch": 0.9419555020463241, "grad_norm": 31.46925163269043, "learning_rate": 1.3774325519842423e-07, "loss": 14.201, "step": 466300 }, { "epoch": 0.9419757026790079, "grad_norm": 191.0092315673828, "learning_rate": 1.376618977283478e-07, "loss": 20.0056, "step": 466310 }, { "epoch": 0.9419959033116917, "grad_norm": 411.2151184082031, "learning_rate": 1.3758056395671738e-07, "loss": 16.681, "step": 466320 }, { "epoch": 0.9420161039443755, "grad_norm": 170.69198608398438, "learning_rate": 1.374992538839298e-07, "loss": 19.8808, "step": 466330 }, { "epoch": 0.9420363045770593, "grad_norm": 111.5400619506836, "learning_rate": 1.3741796751038095e-07, "loss": 17.3456, "step": 466340 }, { "epoch": 0.9420565052097432, "grad_norm": 300.8111267089844, "learning_rate": 1.373367048364671e-07, "loss": 13.2539, "step": 466350 }, { "epoch": 0.942076705842427, "grad_norm": 1.120597243309021, "learning_rate": 1.3725546586258464e-07, "loss": 14.5331, "step": 466360 }, { "epoch": 0.9420969064751108, "grad_norm": 130.71031188964844, "learning_rate": 1.3717425058912882e-07, "loss": 18.7702, "step": 466370 }, { "epoch": 0.9421171071077946, "grad_norm": 9.499824523925781, "learning_rate": 1.3709305901649594e-07, "loss": 42.5031, "step": 466380 }, { "epoch": 0.9421373077404784, "grad_norm": 342.7139587402344, "learning_rate": 1.370118911450824e-07, "loss": 16.3428, "step": 466390 }, { "epoch": 0.9421575083731623, "grad_norm": 294.46417236328125, "learning_rate": 1.3693074697528231e-07, "loss": 20.2885, "step": 466400 }, { "epoch": 0.9421777090058461, "grad_norm": 32.19709396362305, "learning_rate": 1.36849626507492e-07, "loss": 20.2099, "step": 466410 }, { "epoch": 0.9421979096385299, "grad_norm": 221.39833068847656, "learning_rate": 1.367685297421073e-07, "loss": 22.3431, "step": 466420 }, { "epoch": 0.9422181102712137, "grad_norm": 517.8855590820312, "learning_rate": 1.366874566795229e-07, "loss": 16.7649, "step": 466430 }, { "epoch": 0.9422383109038975, "grad_norm": 349.9701232910156, "learning_rate": 1.3660640732013342e-07, "loss": 10.6436, "step": 466440 }, { "epoch": 0.9422585115365814, "grad_norm": 148.7420196533203, "learning_rate": 1.3652538166433527e-07, "loss": 19.0194, "step": 466450 }, { "epoch": 0.9422787121692652, "grad_norm": 109.53025817871094, "learning_rate": 1.3644437971252144e-07, "loss": 20.9439, "step": 466460 }, { "epoch": 0.942298912801949, "grad_norm": 202.75306701660156, "learning_rate": 1.3636340146508886e-07, "loss": 10.2435, "step": 466470 }, { "epoch": 0.9423191134346328, "grad_norm": 301.680908203125, "learning_rate": 1.362824469224311e-07, "loss": 25.3954, "step": 466480 }, { "epoch": 0.9423393140673166, "grad_norm": 615.1964111328125, "learning_rate": 1.362015160849417e-07, "loss": 18.3119, "step": 466490 }, { "epoch": 0.9423595147000005, "grad_norm": 123.60035705566406, "learning_rate": 1.3612060895301759e-07, "loss": 10.8776, "step": 466500 }, { "epoch": 0.9423797153326842, "grad_norm": 81.8182601928711, "learning_rate": 1.360397255270507e-07, "loss": 12.123, "step": 466510 }, { "epoch": 0.942399915965368, "grad_norm": 23.797916412353516, "learning_rate": 1.3595886580743677e-07, "loss": 16.8994, "step": 466520 }, { "epoch": 0.9424201165980518, "grad_norm": 151.0086212158203, "learning_rate": 1.3587802979456888e-07, "loss": 19.425, "step": 466530 }, { "epoch": 0.9424403172307356, "grad_norm": 393.822509765625, "learning_rate": 1.3579721748884222e-07, "loss": 16.2712, "step": 466540 }, { "epoch": 0.9424605178634194, "grad_norm": 61.192020416259766, "learning_rate": 1.3571642889064984e-07, "loss": 13.6674, "step": 466550 }, { "epoch": 0.9424807184961033, "grad_norm": 74.15399932861328, "learning_rate": 1.356356640003853e-07, "loss": 7.5289, "step": 466560 }, { "epoch": 0.9425009191287871, "grad_norm": 174.9483184814453, "learning_rate": 1.3555492281844273e-07, "loss": 15.7946, "step": 466570 }, { "epoch": 0.9425211197614709, "grad_norm": 242.9797821044922, "learning_rate": 1.354742053452157e-07, "loss": 25.624, "step": 466580 }, { "epoch": 0.9425413203941547, "grad_norm": 426.18695068359375, "learning_rate": 1.353935115810967e-07, "loss": 16.0391, "step": 466590 }, { "epoch": 0.9425615210268385, "grad_norm": 90.09152221679688, "learning_rate": 1.3531284152647983e-07, "loss": 25.8447, "step": 466600 }, { "epoch": 0.9425817216595224, "grad_norm": 135.96788024902344, "learning_rate": 1.3523219518175924e-07, "loss": 13.1811, "step": 466610 }, { "epoch": 0.9426019222922062, "grad_norm": 375.2832946777344, "learning_rate": 1.351515725473257e-07, "loss": 12.4075, "step": 466620 }, { "epoch": 0.94262212292489, "grad_norm": 448.191650390625, "learning_rate": 1.3507097362357392e-07, "loss": 25.0984, "step": 466630 }, { "epoch": 0.9426423235575738, "grad_norm": 420.7967834472656, "learning_rate": 1.349903984108958e-07, "loss": 19.3443, "step": 466640 }, { "epoch": 0.9426625241902576, "grad_norm": 415.8902587890625, "learning_rate": 1.3490984690968488e-07, "loss": 20.8693, "step": 466650 }, { "epoch": 0.9426827248229415, "grad_norm": 533.3202514648438, "learning_rate": 1.3482931912033314e-07, "loss": 17.6854, "step": 466660 }, { "epoch": 0.9427029254556253, "grad_norm": 0.0, "learning_rate": 1.3474881504323301e-07, "loss": 9.6554, "step": 466670 }, { "epoch": 0.9427231260883091, "grad_norm": 394.9208984375, "learning_rate": 1.346683346787775e-07, "loss": 11.8232, "step": 466680 }, { "epoch": 0.9427433267209929, "grad_norm": 303.13507080078125, "learning_rate": 1.3458787802735794e-07, "loss": 16.0649, "step": 466690 }, { "epoch": 0.9427635273536767, "grad_norm": 360.8521728515625, "learning_rate": 1.3450744508936687e-07, "loss": 16.0741, "step": 466700 }, { "epoch": 0.9427837279863606, "grad_norm": 152.80377197265625, "learning_rate": 1.3442703586519724e-07, "loss": 10.3248, "step": 466710 }, { "epoch": 0.9428039286190444, "grad_norm": 264.30511474609375, "learning_rate": 1.3434665035523985e-07, "loss": 30.071, "step": 466720 }, { "epoch": 0.9428241292517282, "grad_norm": 385.9399719238281, "learning_rate": 1.342662885598861e-07, "loss": 6.8205, "step": 466730 }, { "epoch": 0.942844329884412, "grad_norm": 341.55120849609375, "learning_rate": 1.3418595047952897e-07, "loss": 13.4045, "step": 466740 }, { "epoch": 0.9428645305170958, "grad_norm": 493.4660949707031, "learning_rate": 1.341056361145593e-07, "loss": 16.0428, "step": 466750 }, { "epoch": 0.9428847311497797, "grad_norm": 37.99992752075195, "learning_rate": 1.3402534546536783e-07, "loss": 17.0469, "step": 466760 }, { "epoch": 0.9429049317824634, "grad_norm": 246.25341796875, "learning_rate": 1.3394507853234763e-07, "loss": 16.1532, "step": 466770 }, { "epoch": 0.9429251324151472, "grad_norm": 328.9928283691406, "learning_rate": 1.3386483531588834e-07, "loss": 22.0632, "step": 466780 }, { "epoch": 0.942945333047831, "grad_norm": 352.3476257324219, "learning_rate": 1.337846158163819e-07, "loss": 11.8354, "step": 466790 }, { "epoch": 0.9429655336805148, "grad_norm": 449.89324951171875, "learning_rate": 1.3370442003421913e-07, "loss": 23.6962, "step": 466800 }, { "epoch": 0.9429857343131987, "grad_norm": 168.8517608642578, "learning_rate": 1.336242479697908e-07, "loss": 33.493, "step": 466810 }, { "epoch": 0.9430059349458825, "grad_norm": 482.6525573730469, "learning_rate": 1.335440996234877e-07, "loss": 10.6317, "step": 466820 }, { "epoch": 0.9430261355785663, "grad_norm": 208.42434692382812, "learning_rate": 1.334639749956995e-07, "loss": 13.3247, "step": 466830 }, { "epoch": 0.9430463362112501, "grad_norm": 401.0406494140625, "learning_rate": 1.3338387408681875e-07, "loss": 12.9286, "step": 466840 }, { "epoch": 0.9430665368439339, "grad_norm": 369.0025329589844, "learning_rate": 1.333037968972345e-07, "loss": 17.5546, "step": 466850 }, { "epoch": 0.9430867374766178, "grad_norm": 79.51467895507812, "learning_rate": 1.33223743427337e-07, "loss": 17.6554, "step": 466860 }, { "epoch": 0.9431069381093016, "grad_norm": 274.3871154785156, "learning_rate": 1.331437136775171e-07, "loss": 18.3525, "step": 466870 }, { "epoch": 0.9431271387419854, "grad_norm": 104.7078628540039, "learning_rate": 1.330637076481639e-07, "loss": 12.1618, "step": 466880 }, { "epoch": 0.9431473393746692, "grad_norm": 87.01747131347656, "learning_rate": 1.3298372533966874e-07, "loss": 10.0266, "step": 466890 }, { "epoch": 0.943167540007353, "grad_norm": 302.48150634765625, "learning_rate": 1.3290376675242022e-07, "loss": 26.9877, "step": 466900 }, { "epoch": 0.9431877406400369, "grad_norm": 261.38824462890625, "learning_rate": 1.3282383188680802e-07, "loss": 21.9908, "step": 466910 }, { "epoch": 0.9432079412727207, "grad_norm": 6.378824234008789, "learning_rate": 1.327439207432224e-07, "loss": 15.0628, "step": 466920 }, { "epoch": 0.9432281419054045, "grad_norm": 223.4723358154297, "learning_rate": 1.3266403332205248e-07, "loss": 10.1727, "step": 466930 }, { "epoch": 0.9432483425380883, "grad_norm": 162.22921752929688, "learning_rate": 1.3258416962368849e-07, "loss": 12.2772, "step": 466940 }, { "epoch": 0.9432685431707721, "grad_norm": 203.04843139648438, "learning_rate": 1.325043296485179e-07, "loss": 17.2785, "step": 466950 }, { "epoch": 0.943288743803456, "grad_norm": 403.38531494140625, "learning_rate": 1.3242451339693153e-07, "loss": 31.0572, "step": 466960 }, { "epoch": 0.9433089444361398, "grad_norm": 51.905208587646484, "learning_rate": 1.3234472086931738e-07, "loss": 26.5818, "step": 466970 }, { "epoch": 0.9433291450688236, "grad_norm": 6.7533159255981445, "learning_rate": 1.322649520660646e-07, "loss": 13.6709, "step": 466980 }, { "epoch": 0.9433493457015074, "grad_norm": 321.4626159667969, "learning_rate": 1.3218520698756177e-07, "loss": 9.8681, "step": 466990 }, { "epoch": 0.9433695463341912, "grad_norm": 161.235595703125, "learning_rate": 1.3210548563419857e-07, "loss": 14.6061, "step": 467000 }, { "epoch": 0.943389746966875, "grad_norm": 210.1715850830078, "learning_rate": 1.32025788006363e-07, "loss": 24.0145, "step": 467010 }, { "epoch": 0.9434099475995589, "grad_norm": 545.941650390625, "learning_rate": 1.3194611410444258e-07, "loss": 20.3786, "step": 467020 }, { "epoch": 0.9434301482322426, "grad_norm": 312.9477233886719, "learning_rate": 1.3186646392882696e-07, "loss": 9.535, "step": 467030 }, { "epoch": 0.9434503488649264, "grad_norm": 135.27679443359375, "learning_rate": 1.3178683747990362e-07, "loss": 10.6673, "step": 467040 }, { "epoch": 0.9434705494976102, "grad_norm": 435.4106750488281, "learning_rate": 1.3170723475806003e-07, "loss": 24.8198, "step": 467050 }, { "epoch": 0.943490750130294, "grad_norm": 206.95925903320312, "learning_rate": 1.3162765576368587e-07, "loss": 23.6655, "step": 467060 }, { "epoch": 0.9435109507629779, "grad_norm": 178.56077575683594, "learning_rate": 1.315481004971675e-07, "loss": 9.3552, "step": 467070 }, { "epoch": 0.9435311513956617, "grad_norm": 210.2391357421875, "learning_rate": 1.314685689588935e-07, "loss": 18.2021, "step": 467080 }, { "epoch": 0.9435513520283455, "grad_norm": 211.8839569091797, "learning_rate": 1.3138906114925133e-07, "loss": 11.0213, "step": 467090 }, { "epoch": 0.9435715526610293, "grad_norm": 119.14147186279297, "learning_rate": 1.313095770686279e-07, "loss": 12.2139, "step": 467100 }, { "epoch": 0.9435917532937131, "grad_norm": 248.24574279785156, "learning_rate": 1.3123011671741183e-07, "loss": 14.5964, "step": 467110 }, { "epoch": 0.943611953926397, "grad_norm": 158.181640625, "learning_rate": 1.3115068009598886e-07, "loss": 15.6111, "step": 467120 }, { "epoch": 0.9436321545590808, "grad_norm": 338.14825439453125, "learning_rate": 1.3107126720474762e-07, "loss": 12.4597, "step": 467130 }, { "epoch": 0.9436523551917646, "grad_norm": 250.89205932617188, "learning_rate": 1.3099187804407387e-07, "loss": 23.5018, "step": 467140 }, { "epoch": 0.9436725558244484, "grad_norm": 226.29510498046875, "learning_rate": 1.3091251261435568e-07, "loss": 7.2197, "step": 467150 }, { "epoch": 0.9436927564571322, "grad_norm": 278.6545104980469, "learning_rate": 1.3083317091597936e-07, "loss": 15.7558, "step": 467160 }, { "epoch": 0.9437129570898161, "grad_norm": 267.90771484375, "learning_rate": 1.3075385294933129e-07, "loss": 14.5581, "step": 467170 }, { "epoch": 0.9437331577224999, "grad_norm": 0.0, "learning_rate": 1.306745587147984e-07, "loss": 14.3584, "step": 467180 }, { "epoch": 0.9437533583551837, "grad_norm": 288.2501220703125, "learning_rate": 1.3059528821276758e-07, "loss": 18.8858, "step": 467190 }, { "epoch": 0.9437735589878675, "grad_norm": 111.18515014648438, "learning_rate": 1.3051604144362407e-07, "loss": 15.6672, "step": 467200 }, { "epoch": 0.9437937596205513, "grad_norm": 363.2454833984375, "learning_rate": 1.304368184077548e-07, "loss": 18.1603, "step": 467210 }, { "epoch": 0.9438139602532352, "grad_norm": 265.66278076171875, "learning_rate": 1.3035761910554666e-07, "loss": 21.7102, "step": 467220 }, { "epoch": 0.943834160885919, "grad_norm": 357.0940856933594, "learning_rate": 1.302784435373844e-07, "loss": 12.6734, "step": 467230 }, { "epoch": 0.9438543615186028, "grad_norm": 204.77838134765625, "learning_rate": 1.3019929170365376e-07, "loss": 18.3832, "step": 467240 }, { "epoch": 0.9438745621512866, "grad_norm": 301.5811462402344, "learning_rate": 1.3012016360474223e-07, "loss": 8.2714, "step": 467250 }, { "epoch": 0.9438947627839704, "grad_norm": 113.41458892822266, "learning_rate": 1.3004105924103394e-07, "loss": 27.4468, "step": 467260 }, { "epoch": 0.9439149634166543, "grad_norm": 255.78952026367188, "learning_rate": 1.2996197861291472e-07, "loss": 23.7522, "step": 467270 }, { "epoch": 0.943935164049338, "grad_norm": 258.4866943359375, "learning_rate": 1.2988292172076977e-07, "loss": 18.5747, "step": 467280 }, { "epoch": 0.9439553646820218, "grad_norm": 32.485294342041016, "learning_rate": 1.2980388856498604e-07, "loss": 8.9216, "step": 467290 }, { "epoch": 0.9439755653147056, "grad_norm": 112.42797088623047, "learning_rate": 1.29724879145946e-07, "loss": 13.212, "step": 467300 }, { "epoch": 0.9439957659473894, "grad_norm": 265.965087890625, "learning_rate": 1.296458934640371e-07, "loss": 9.4598, "step": 467310 }, { "epoch": 0.9440159665800733, "grad_norm": 378.8536682128906, "learning_rate": 1.2956693151964296e-07, "loss": 18.24, "step": 467320 }, { "epoch": 0.9440361672127571, "grad_norm": 212.27166748046875, "learning_rate": 1.2948799331314933e-07, "loss": 15.0391, "step": 467330 }, { "epoch": 0.9440563678454409, "grad_norm": 833.9385375976562, "learning_rate": 1.2940907884494036e-07, "loss": 45.1016, "step": 467340 }, { "epoch": 0.9440765684781247, "grad_norm": 298.88360595703125, "learning_rate": 1.2933018811540078e-07, "loss": 17.9972, "step": 467350 }, { "epoch": 0.9440967691108085, "grad_norm": 202.49998474121094, "learning_rate": 1.2925132112491523e-07, "loss": 15.4919, "step": 467360 }, { "epoch": 0.9441169697434924, "grad_norm": 188.1671600341797, "learning_rate": 1.2917247787386787e-07, "loss": 16.3879, "step": 467370 }, { "epoch": 0.9441371703761762, "grad_norm": 370.61407470703125, "learning_rate": 1.2909365836264287e-07, "loss": 18.5846, "step": 467380 }, { "epoch": 0.94415737100886, "grad_norm": 153.25758361816406, "learning_rate": 1.2901486259162488e-07, "loss": 18.4889, "step": 467390 }, { "epoch": 0.9441775716415438, "grad_norm": 251.57826232910156, "learning_rate": 1.289360905611975e-07, "loss": 8.5688, "step": 467400 }, { "epoch": 0.9441977722742276, "grad_norm": 91.77510070800781, "learning_rate": 1.288573422717454e-07, "loss": 31.492, "step": 467410 }, { "epoch": 0.9442179729069115, "grad_norm": 6.797186851501465, "learning_rate": 1.287786177236511e-07, "loss": 15.1667, "step": 467420 }, { "epoch": 0.9442381735395953, "grad_norm": 563.285888671875, "learning_rate": 1.2869991691729922e-07, "loss": 29.3362, "step": 467430 }, { "epoch": 0.9442583741722791, "grad_norm": 18.798179626464844, "learning_rate": 1.2862123985307284e-07, "loss": 10.5163, "step": 467440 }, { "epoch": 0.9442785748049629, "grad_norm": 217.36122131347656, "learning_rate": 1.285425865313561e-07, "loss": 13.9328, "step": 467450 }, { "epoch": 0.9442987754376467, "grad_norm": 314.8659973144531, "learning_rate": 1.28463956952532e-07, "loss": 20.3368, "step": 467460 }, { "epoch": 0.9443189760703306, "grad_norm": 440.9849548339844, "learning_rate": 1.2838535111698359e-07, "loss": 19.1159, "step": 467470 }, { "epoch": 0.9443391767030144, "grad_norm": 248.7581329345703, "learning_rate": 1.2830676902509443e-07, "loss": 13.1833, "step": 467480 }, { "epoch": 0.9443593773356982, "grad_norm": 266.7648620605469, "learning_rate": 1.2822821067724643e-07, "loss": 37.2818, "step": 467490 }, { "epoch": 0.944379577968382, "grad_norm": 181.34719848632812, "learning_rate": 1.2814967607382433e-07, "loss": 9.4124, "step": 467500 }, { "epoch": 0.9443997786010658, "grad_norm": 273.1263122558594, "learning_rate": 1.2807116521520947e-07, "loss": 12.9209, "step": 467510 }, { "epoch": 0.9444199792337497, "grad_norm": 432.1019592285156, "learning_rate": 1.279926781017843e-07, "loss": 26.1046, "step": 467520 }, { "epoch": 0.9444401798664335, "grad_norm": 42.72356033325195, "learning_rate": 1.2791421473393184e-07, "loss": 10.2638, "step": 467530 }, { "epoch": 0.9444603804991172, "grad_norm": 270.31695556640625, "learning_rate": 1.2783577511203515e-07, "loss": 15.005, "step": 467540 }, { "epoch": 0.944480581131801, "grad_norm": 641.5654296875, "learning_rate": 1.2775735923647614e-07, "loss": 20.9434, "step": 467550 }, { "epoch": 0.9445007817644848, "grad_norm": 177.09170532226562, "learning_rate": 1.2767896710763616e-07, "loss": 14.5292, "step": 467560 }, { "epoch": 0.9445209823971686, "grad_norm": 128.17633056640625, "learning_rate": 1.2760059872589824e-07, "loss": 21.4097, "step": 467570 }, { "epoch": 0.9445411830298525, "grad_norm": 191.09945678710938, "learning_rate": 1.2752225409164432e-07, "loss": 24.4088, "step": 467580 }, { "epoch": 0.9445613836625363, "grad_norm": 142.05490112304688, "learning_rate": 1.2744393320525573e-07, "loss": 11.0233, "step": 467590 }, { "epoch": 0.9445815842952201, "grad_norm": 330.49957275390625, "learning_rate": 1.2736563606711384e-07, "loss": 11.7947, "step": 467600 }, { "epoch": 0.9446017849279039, "grad_norm": 195.06472778320312, "learning_rate": 1.2728736267760167e-07, "loss": 42.5652, "step": 467610 }, { "epoch": 0.9446219855605877, "grad_norm": 123.06463623046875, "learning_rate": 1.2720911303710004e-07, "loss": 11.8161, "step": 467620 }, { "epoch": 0.9446421861932716, "grad_norm": 149.53256225585938, "learning_rate": 1.2713088714598974e-07, "loss": 10.8602, "step": 467630 }, { "epoch": 0.9446623868259554, "grad_norm": 118.68656158447266, "learning_rate": 1.2705268500465274e-07, "loss": 13.8827, "step": 467640 }, { "epoch": 0.9446825874586392, "grad_norm": 11.887679100036621, "learning_rate": 1.2697450661347033e-07, "loss": 12.5499, "step": 467650 }, { "epoch": 0.944702788091323, "grad_norm": 571.0030517578125, "learning_rate": 1.2689635197282224e-07, "loss": 25.1405, "step": 467660 }, { "epoch": 0.9447229887240068, "grad_norm": 598.7418212890625, "learning_rate": 1.2681822108309094e-07, "loss": 23.4043, "step": 467670 }, { "epoch": 0.9447431893566907, "grad_norm": 661.38427734375, "learning_rate": 1.2674011394465614e-07, "loss": 26.8192, "step": 467680 }, { "epoch": 0.9447633899893745, "grad_norm": 171.64144897460938, "learning_rate": 1.2666203055789915e-07, "loss": 10.2326, "step": 467690 }, { "epoch": 0.9447835906220583, "grad_norm": 269.7370300292969, "learning_rate": 1.2658397092320028e-07, "loss": 18.9251, "step": 467700 }, { "epoch": 0.9448037912547421, "grad_norm": 360.25775146484375, "learning_rate": 1.2650593504094034e-07, "loss": 16.4297, "step": 467710 }, { "epoch": 0.9448239918874259, "grad_norm": 316.7818908691406, "learning_rate": 1.2642792291149896e-07, "loss": 13.7358, "step": 467720 }, { "epoch": 0.9448441925201098, "grad_norm": 580.6852416992188, "learning_rate": 1.2634993453525702e-07, "loss": 25.5588, "step": 467730 }, { "epoch": 0.9448643931527936, "grad_norm": 177.68309020996094, "learning_rate": 1.2627196991259473e-07, "loss": 24.8256, "step": 467740 }, { "epoch": 0.9448845937854774, "grad_norm": 170.87631225585938, "learning_rate": 1.261940290438912e-07, "loss": 22.1925, "step": 467750 }, { "epoch": 0.9449047944181612, "grad_norm": 546.9649658203125, "learning_rate": 1.2611611192952733e-07, "loss": 14.5484, "step": 467760 }, { "epoch": 0.944924995050845, "grad_norm": 11.079774856567383, "learning_rate": 1.2603821856988218e-07, "loss": 13.4663, "step": 467770 }, { "epoch": 0.9449451956835289, "grad_norm": 354.6279602050781, "learning_rate": 1.259603489653355e-07, "loss": 14.3967, "step": 467780 }, { "epoch": 0.9449653963162126, "grad_norm": 219.11502075195312, "learning_rate": 1.2588250311626693e-07, "loss": 19.6991, "step": 467790 }, { "epoch": 0.9449855969488964, "grad_norm": 287.68670654296875, "learning_rate": 1.258046810230562e-07, "loss": 23.0365, "step": 467800 }, { "epoch": 0.9450057975815802, "grad_norm": 224.74842834472656, "learning_rate": 1.257268826860819e-07, "loss": 29.1695, "step": 467810 }, { "epoch": 0.945025998214264, "grad_norm": 150.61618041992188, "learning_rate": 1.2564910810572317e-07, "loss": 10.7292, "step": 467820 }, { "epoch": 0.9450461988469478, "grad_norm": 367.19525146484375, "learning_rate": 1.255713572823608e-07, "loss": 14.223, "step": 467830 }, { "epoch": 0.9450663994796317, "grad_norm": 493.5210266113281, "learning_rate": 1.2549363021637174e-07, "loss": 14.1907, "step": 467840 }, { "epoch": 0.9450866001123155, "grad_norm": 161.30459594726562, "learning_rate": 1.2541592690813508e-07, "loss": 14.1587, "step": 467850 }, { "epoch": 0.9451068007449993, "grad_norm": 210.84934997558594, "learning_rate": 1.2533824735803059e-07, "loss": 17.4816, "step": 467860 }, { "epoch": 0.9451270013776831, "grad_norm": 38.96179962158203, "learning_rate": 1.252605915664362e-07, "loss": 18.9776, "step": 467870 }, { "epoch": 0.945147202010367, "grad_norm": 398.9723205566406, "learning_rate": 1.2518295953373005e-07, "loss": 12.3231, "step": 467880 }, { "epoch": 0.9451674026430508, "grad_norm": 128.52308654785156, "learning_rate": 1.2510535126029067e-07, "loss": 14.2906, "step": 467890 }, { "epoch": 0.9451876032757346, "grad_norm": 35.2382926940918, "learning_rate": 1.2502776674649776e-07, "loss": 17.7047, "step": 467900 }, { "epoch": 0.9452078039084184, "grad_norm": 362.67681884765625, "learning_rate": 1.2495020599272766e-07, "loss": 15.2447, "step": 467910 }, { "epoch": 0.9452280045411022, "grad_norm": 220.71279907226562, "learning_rate": 1.2487266899935845e-07, "loss": 13.3988, "step": 467920 }, { "epoch": 0.945248205173786, "grad_norm": 350.5021057128906, "learning_rate": 1.2479515576676925e-07, "loss": 22.0513, "step": 467930 }, { "epoch": 0.9452684058064699, "grad_norm": 168.4711151123047, "learning_rate": 1.24717666295337e-07, "loss": 6.218, "step": 467940 }, { "epoch": 0.9452886064391537, "grad_norm": 141.270263671875, "learning_rate": 1.2464020058543912e-07, "loss": 10.5573, "step": 467950 }, { "epoch": 0.9453088070718375, "grad_norm": 290.7150573730469, "learning_rate": 1.2456275863745426e-07, "loss": 14.7041, "step": 467960 }, { "epoch": 0.9453290077045213, "grad_norm": 738.6734008789062, "learning_rate": 1.2448534045175876e-07, "loss": 23.7464, "step": 467970 }, { "epoch": 0.9453492083372051, "grad_norm": 304.2628479003906, "learning_rate": 1.2440794602873064e-07, "loss": 12.3787, "step": 467980 }, { "epoch": 0.945369408969889, "grad_norm": 162.34193420410156, "learning_rate": 1.2433057536874682e-07, "loss": 9.3161, "step": 467990 }, { "epoch": 0.9453896096025728, "grad_norm": 506.57305908203125, "learning_rate": 1.2425322847218368e-07, "loss": 21.8317, "step": 468000 }, { "epoch": 0.9454098102352566, "grad_norm": 60.979793548583984, "learning_rate": 1.241759053394198e-07, "loss": 14.2102, "step": 468010 }, { "epoch": 0.9454300108679404, "grad_norm": 71.64190673828125, "learning_rate": 1.2409860597083102e-07, "loss": 9.0547, "step": 468020 }, { "epoch": 0.9454502115006242, "grad_norm": 321.5577087402344, "learning_rate": 1.240213303667942e-07, "loss": 7.2337, "step": 468030 }, { "epoch": 0.9454704121333081, "grad_norm": 410.08990478515625, "learning_rate": 1.239440785276863e-07, "loss": 32.9802, "step": 468040 }, { "epoch": 0.9454906127659918, "grad_norm": 522.6094970703125, "learning_rate": 1.2386685045388313e-07, "loss": 19.6221, "step": 468050 }, { "epoch": 0.9455108133986756, "grad_norm": 339.32867431640625, "learning_rate": 1.2378964614576162e-07, "loss": 15.568, "step": 468060 }, { "epoch": 0.9455310140313594, "grad_norm": 302.2319641113281, "learning_rate": 1.237124656036981e-07, "loss": 20.2023, "step": 468070 }, { "epoch": 0.9455512146640432, "grad_norm": 133.09210205078125, "learning_rate": 1.236353088280684e-07, "loss": 18.3638, "step": 468080 }, { "epoch": 0.9455714152967271, "grad_norm": 499.7694091796875, "learning_rate": 1.2355817581924945e-07, "loss": 12.3634, "step": 468090 }, { "epoch": 0.9455916159294109, "grad_norm": 416.52093505859375, "learning_rate": 1.2348106657761537e-07, "loss": 10.5899, "step": 468100 }, { "epoch": 0.9456118165620947, "grad_norm": 173.1257781982422, "learning_rate": 1.2340398110354424e-07, "loss": 14.2336, "step": 468110 }, { "epoch": 0.9456320171947785, "grad_norm": 250.32508850097656, "learning_rate": 1.2332691939741015e-07, "loss": 8.7739, "step": 468120 }, { "epoch": 0.9456522178274623, "grad_norm": 205.54248046875, "learning_rate": 1.2324988145958895e-07, "loss": 18.2811, "step": 468130 }, { "epoch": 0.9456724184601462, "grad_norm": 23.474239349365234, "learning_rate": 1.2317286729045586e-07, "loss": 20.2123, "step": 468140 }, { "epoch": 0.94569261909283, "grad_norm": 186.38671875, "learning_rate": 1.2309587689038783e-07, "loss": 27.4474, "step": 468150 }, { "epoch": 0.9457128197255138, "grad_norm": 164.83273315429688, "learning_rate": 1.2301891025975897e-07, "loss": 16.3613, "step": 468160 }, { "epoch": 0.9457330203581976, "grad_norm": 66.46712493896484, "learning_rate": 1.229419673989435e-07, "loss": 15.0921, "step": 468170 }, { "epoch": 0.9457532209908814, "grad_norm": 267.094482421875, "learning_rate": 1.2286504830831824e-07, "loss": 14.6982, "step": 468180 }, { "epoch": 0.9457734216235653, "grad_norm": 284.9889831542969, "learning_rate": 1.2278815298825742e-07, "loss": 25.1199, "step": 468190 }, { "epoch": 0.9457936222562491, "grad_norm": 277.446533203125, "learning_rate": 1.2271128143913458e-07, "loss": 31.3855, "step": 468200 }, { "epoch": 0.9458138228889329, "grad_norm": 116.27572631835938, "learning_rate": 1.2263443366132555e-07, "loss": 7.6856, "step": 468210 }, { "epoch": 0.9458340235216167, "grad_norm": 237.4043731689453, "learning_rate": 1.2255760965520557e-07, "loss": 21.5642, "step": 468220 }, { "epoch": 0.9458542241543005, "grad_norm": 299.2898864746094, "learning_rate": 1.224808094211477e-07, "loss": 15.2724, "step": 468230 }, { "epoch": 0.9458744247869844, "grad_norm": 245.67074584960938, "learning_rate": 1.2240403295952662e-07, "loss": 9.2735, "step": 468240 }, { "epoch": 0.9458946254196682, "grad_norm": 399.322265625, "learning_rate": 1.2232728027071704e-07, "loss": 13.4048, "step": 468250 }, { "epoch": 0.945914826052352, "grad_norm": 69.8430404663086, "learning_rate": 1.222505513550931e-07, "loss": 9.9953, "step": 468260 }, { "epoch": 0.9459350266850358, "grad_norm": 737.2543334960938, "learning_rate": 1.221738462130273e-07, "loss": 25.3555, "step": 468270 }, { "epoch": 0.9459552273177196, "grad_norm": 54.74570846557617, "learning_rate": 1.2209716484489543e-07, "loss": 24.7528, "step": 468280 }, { "epoch": 0.9459754279504035, "grad_norm": 324.6932373046875, "learning_rate": 1.2202050725106995e-07, "loss": 23.5245, "step": 468290 }, { "epoch": 0.9459956285830872, "grad_norm": 189.67054748535156, "learning_rate": 1.2194387343192504e-07, "loss": 15.4185, "step": 468300 }, { "epoch": 0.946015829215771, "grad_norm": 157.17291259765625, "learning_rate": 1.2186726338783427e-07, "loss": 10.3489, "step": 468310 }, { "epoch": 0.9460360298484548, "grad_norm": 288.1325378417969, "learning_rate": 1.2179067711917015e-07, "loss": 12.6965, "step": 468320 }, { "epoch": 0.9460562304811386, "grad_norm": 113.22279357910156, "learning_rate": 1.2171411462630732e-07, "loss": 14.872, "step": 468330 }, { "epoch": 0.9460764311138224, "grad_norm": 384.1490478515625, "learning_rate": 1.216375759096178e-07, "loss": 34.573, "step": 468340 }, { "epoch": 0.9460966317465063, "grad_norm": 537.5817260742188, "learning_rate": 1.2156106096947563e-07, "loss": 30.7704, "step": 468350 }, { "epoch": 0.9461168323791901, "grad_norm": 83.53834533691406, "learning_rate": 1.2148456980625223e-07, "loss": 15.361, "step": 468360 }, { "epoch": 0.9461370330118739, "grad_norm": 222.9774169921875, "learning_rate": 1.214081024203223e-07, "loss": 16.106, "step": 468370 }, { "epoch": 0.9461572336445577, "grad_norm": 52.098289489746094, "learning_rate": 1.2133165881205723e-07, "loss": 17.7757, "step": 468380 }, { "epoch": 0.9461774342772415, "grad_norm": 193.8730926513672, "learning_rate": 1.2125523898182945e-07, "loss": 18.033, "step": 468390 }, { "epoch": 0.9461976349099254, "grad_norm": 116.78641510009766, "learning_rate": 1.211788429300126e-07, "loss": 18.3156, "step": 468400 }, { "epoch": 0.9462178355426092, "grad_norm": 339.0370178222656, "learning_rate": 1.21102470656978e-07, "loss": 21.2598, "step": 468410 }, { "epoch": 0.946238036175293, "grad_norm": 222.95582580566406, "learning_rate": 1.2102612216309816e-07, "loss": 16.2987, "step": 468420 }, { "epoch": 0.9462582368079768, "grad_norm": 61.94565200805664, "learning_rate": 1.2094979744874502e-07, "loss": 15.6604, "step": 468430 }, { "epoch": 0.9462784374406606, "grad_norm": 408.4616394042969, "learning_rate": 1.2087349651429215e-07, "loss": 12.794, "step": 468440 }, { "epoch": 0.9462986380733445, "grad_norm": 287.6683044433594, "learning_rate": 1.207972193601087e-07, "loss": 12.9118, "step": 468450 }, { "epoch": 0.9463188387060283, "grad_norm": 260.74639892578125, "learning_rate": 1.207209659865677e-07, "loss": 14.1318, "step": 468460 }, { "epoch": 0.9463390393387121, "grad_norm": 444.3227844238281, "learning_rate": 1.206447363940416e-07, "loss": 18.9796, "step": 468470 }, { "epoch": 0.9463592399713959, "grad_norm": 469.51971435546875, "learning_rate": 1.205685305829013e-07, "loss": 24.7297, "step": 468480 }, { "epoch": 0.9463794406040797, "grad_norm": 134.87899780273438, "learning_rate": 1.204923485535181e-07, "loss": 22.758, "step": 468490 }, { "epoch": 0.9463996412367636, "grad_norm": 418.8274841308594, "learning_rate": 1.2041619030626283e-07, "loss": 21.7794, "step": 468500 }, { "epoch": 0.9464198418694474, "grad_norm": 124.39008331298828, "learning_rate": 1.2034005584150854e-07, "loss": 12.7598, "step": 468510 }, { "epoch": 0.9464400425021312, "grad_norm": 294.5018005371094, "learning_rate": 1.2026394515962382e-07, "loss": 27.2352, "step": 468520 }, { "epoch": 0.946460243134815, "grad_norm": 265.9117126464844, "learning_rate": 1.2018785826098057e-07, "loss": 8.2796, "step": 468530 }, { "epoch": 0.9464804437674988, "grad_norm": 134.3726348876953, "learning_rate": 1.2011179514595072e-07, "loss": 29.3933, "step": 468540 }, { "epoch": 0.9465006444001827, "grad_norm": 245.517333984375, "learning_rate": 1.20035755814904e-07, "loss": 16.1021, "step": 468550 }, { "epoch": 0.9465208450328664, "grad_norm": 12.15012264251709, "learning_rate": 1.1995974026821066e-07, "loss": 24.2295, "step": 468560 }, { "epoch": 0.9465410456655502, "grad_norm": 163.0849151611328, "learning_rate": 1.1988374850624208e-07, "loss": 16.3836, "step": 468570 }, { "epoch": 0.946561246298234, "grad_norm": 0.0, "learning_rate": 1.198077805293679e-07, "loss": 3.8871, "step": 468580 }, { "epoch": 0.9465814469309178, "grad_norm": 255.66712951660156, "learning_rate": 1.1973183633795849e-07, "loss": 52.9546, "step": 468590 }, { "epoch": 0.9466016475636017, "grad_norm": 25.33980941772461, "learning_rate": 1.1965591593238513e-07, "loss": 10.039, "step": 468600 }, { "epoch": 0.9466218481962855, "grad_norm": 210.8309783935547, "learning_rate": 1.1958001931301587e-07, "loss": 11.9243, "step": 468610 }, { "epoch": 0.9466420488289693, "grad_norm": 138.99659729003906, "learning_rate": 1.195041464802227e-07, "loss": 11.3339, "step": 468620 }, { "epoch": 0.9466622494616531, "grad_norm": 28.0512752532959, "learning_rate": 1.19428297434373e-07, "loss": 22.4278, "step": 468630 }, { "epoch": 0.9466824500943369, "grad_norm": 294.96826171875, "learning_rate": 1.1935247217583934e-07, "loss": 11.0834, "step": 468640 }, { "epoch": 0.9467026507270208, "grad_norm": 0.0, "learning_rate": 1.1927667070498916e-07, "loss": 10.3246, "step": 468650 }, { "epoch": 0.9467228513597046, "grad_norm": 217.53558349609375, "learning_rate": 1.1920089302219218e-07, "loss": 15.5778, "step": 468660 }, { "epoch": 0.9467430519923884, "grad_norm": 22.173551559448242, "learning_rate": 1.1912513912781864e-07, "loss": 18.1012, "step": 468670 }, { "epoch": 0.9467632526250722, "grad_norm": 246.7733612060547, "learning_rate": 1.1904940902223661e-07, "loss": 17.512, "step": 468680 }, { "epoch": 0.946783453257756, "grad_norm": 312.6581726074219, "learning_rate": 1.1897370270581632e-07, "loss": 15.442, "step": 468690 }, { "epoch": 0.9468036538904399, "grad_norm": 9.137345314025879, "learning_rate": 1.1889802017892638e-07, "loss": 13.729, "step": 468700 }, { "epoch": 0.9468238545231237, "grad_norm": 22.472421646118164, "learning_rate": 1.1882236144193482e-07, "loss": 11.9116, "step": 468710 }, { "epoch": 0.9468440551558075, "grad_norm": 164.4954833984375, "learning_rate": 1.1874672649521135e-07, "loss": 25.9625, "step": 468720 }, { "epoch": 0.9468642557884913, "grad_norm": 115.21696472167969, "learning_rate": 1.1867111533912457e-07, "loss": 11.0453, "step": 468730 }, { "epoch": 0.9468844564211751, "grad_norm": 243.4597930908203, "learning_rate": 1.1859552797404194e-07, "loss": 12.88, "step": 468740 }, { "epoch": 0.946904657053859, "grad_norm": 0.0, "learning_rate": 1.185199644003332e-07, "loss": 14.7446, "step": 468750 }, { "epoch": 0.9469248576865428, "grad_norm": 552.9313354492188, "learning_rate": 1.1844442461836636e-07, "loss": 26.1298, "step": 468760 }, { "epoch": 0.9469450583192266, "grad_norm": 114.78852081298828, "learning_rate": 1.1836890862850892e-07, "loss": 18.9783, "step": 468770 }, { "epoch": 0.9469652589519104, "grad_norm": 174.59942626953125, "learning_rate": 1.1829341643112946e-07, "loss": 25.6725, "step": 468780 }, { "epoch": 0.9469854595845942, "grad_norm": 544.2118530273438, "learning_rate": 1.1821794802659603e-07, "loss": 14.5247, "step": 468790 }, { "epoch": 0.9470056602172781, "grad_norm": 182.3418731689453, "learning_rate": 1.1814250341527611e-07, "loss": 16.2897, "step": 468800 }, { "epoch": 0.9470258608499619, "grad_norm": 0.0, "learning_rate": 1.1806708259753718e-07, "loss": 15.6742, "step": 468810 }, { "epoch": 0.9470460614826456, "grad_norm": 212.8171844482422, "learning_rate": 1.179916855737473e-07, "loss": 16.854, "step": 468820 }, { "epoch": 0.9470662621153294, "grad_norm": 208.92489624023438, "learning_rate": 1.1791631234427448e-07, "loss": 14.473, "step": 468830 }, { "epoch": 0.9470864627480132, "grad_norm": 797.2440185546875, "learning_rate": 1.1784096290948455e-07, "loss": 22.6394, "step": 468840 }, { "epoch": 0.947106663380697, "grad_norm": 368.9800109863281, "learning_rate": 1.177656372697461e-07, "loss": 14.2557, "step": 468850 }, { "epoch": 0.9471268640133809, "grad_norm": 150.45416259765625, "learning_rate": 1.1769033542542552e-07, "loss": 7.5015, "step": 468860 }, { "epoch": 0.9471470646460647, "grad_norm": 169.18548583984375, "learning_rate": 1.1761505737689082e-07, "loss": 18.4031, "step": 468870 }, { "epoch": 0.9471672652787485, "grad_norm": 204.14382934570312, "learning_rate": 1.175398031245073e-07, "loss": 12.0764, "step": 468880 }, { "epoch": 0.9471874659114323, "grad_norm": 197.69760131835938, "learning_rate": 1.1746457266864297e-07, "loss": 14.9799, "step": 468890 }, { "epoch": 0.9472076665441161, "grad_norm": 312.3067321777344, "learning_rate": 1.1738936600966366e-07, "loss": 14.4391, "step": 468900 }, { "epoch": 0.9472278671768, "grad_norm": 270.6473693847656, "learning_rate": 1.173141831479374e-07, "loss": 17.4277, "step": 468910 }, { "epoch": 0.9472480678094838, "grad_norm": 161.43527221679688, "learning_rate": 1.1723902408382892e-07, "loss": 21.1507, "step": 468920 }, { "epoch": 0.9472682684421676, "grad_norm": 248.78045654296875, "learning_rate": 1.1716388881770513e-07, "loss": 28.6238, "step": 468930 }, { "epoch": 0.9472884690748514, "grad_norm": 490.5490417480469, "learning_rate": 1.1708877734993296e-07, "loss": 17.6159, "step": 468940 }, { "epoch": 0.9473086697075352, "grad_norm": 171.56349182128906, "learning_rate": 1.1701368968087711e-07, "loss": 9.1319, "step": 468950 }, { "epoch": 0.9473288703402191, "grad_norm": 262.90118408203125, "learning_rate": 1.1693862581090453e-07, "loss": 11.6391, "step": 468960 }, { "epoch": 0.9473490709729029, "grad_norm": 520.4573974609375, "learning_rate": 1.1686358574038104e-07, "loss": 20.4618, "step": 468970 }, { "epoch": 0.9473692716055867, "grad_norm": 89.89054107666016, "learning_rate": 1.1678856946967244e-07, "loss": 16.2975, "step": 468980 }, { "epoch": 0.9473894722382705, "grad_norm": 0.0, "learning_rate": 1.1671357699914343e-07, "loss": 11.2836, "step": 468990 }, { "epoch": 0.9474096728709543, "grad_norm": 124.83848571777344, "learning_rate": 1.166386083291604e-07, "loss": 13.0262, "step": 469000 }, { "epoch": 0.9474298735036382, "grad_norm": 459.4477233886719, "learning_rate": 1.1656366346008862e-07, "loss": 21.6147, "step": 469010 }, { "epoch": 0.947450074136322, "grad_norm": 277.0845947265625, "learning_rate": 1.1648874239229391e-07, "loss": 16.309, "step": 469020 }, { "epoch": 0.9474702747690058, "grad_norm": 221.6920166015625, "learning_rate": 1.1641384512613985e-07, "loss": 11.8478, "step": 469030 }, { "epoch": 0.9474904754016896, "grad_norm": 311.6980285644531, "learning_rate": 1.1633897166199227e-07, "loss": 20.3404, "step": 469040 }, { "epoch": 0.9475106760343734, "grad_norm": 295.0459899902344, "learning_rate": 1.1626412200021697e-07, "loss": 12.8551, "step": 469050 }, { "epoch": 0.9475308766670573, "grad_norm": 337.02716064453125, "learning_rate": 1.1618929614117757e-07, "loss": 21.6753, "step": 469060 }, { "epoch": 0.947551077299741, "grad_norm": 43.183799743652344, "learning_rate": 1.1611449408523879e-07, "loss": 17.3536, "step": 469070 }, { "epoch": 0.9475712779324248, "grad_norm": 218.86314392089844, "learning_rate": 1.1603971583276641e-07, "loss": 11.2716, "step": 469080 }, { "epoch": 0.9475914785651086, "grad_norm": 32.14845275878906, "learning_rate": 1.1596496138412405e-07, "loss": 29.2551, "step": 469090 }, { "epoch": 0.9476116791977924, "grad_norm": 514.91943359375, "learning_rate": 1.1589023073967586e-07, "loss": 19.7921, "step": 469100 }, { "epoch": 0.9476318798304763, "grad_norm": 0.8910002708435059, "learning_rate": 1.1581552389978601e-07, "loss": 9.6958, "step": 469110 }, { "epoch": 0.9476520804631601, "grad_norm": 233.43312072753906, "learning_rate": 1.1574084086481973e-07, "loss": 24.8293, "step": 469120 }, { "epoch": 0.9476722810958439, "grad_norm": 242.30213928222656, "learning_rate": 1.1566618163513954e-07, "loss": 12.9195, "step": 469130 }, { "epoch": 0.9476924817285277, "grad_norm": 17.421157836914062, "learning_rate": 1.1559154621110957e-07, "loss": 7.9596, "step": 469140 }, { "epoch": 0.9477126823612115, "grad_norm": 700.6426391601562, "learning_rate": 1.155169345930951e-07, "loss": 20.2572, "step": 469150 }, { "epoch": 0.9477328829938954, "grad_norm": 450.5594177246094, "learning_rate": 1.1544234678145805e-07, "loss": 23.2593, "step": 469160 }, { "epoch": 0.9477530836265792, "grad_norm": 111.23733520507812, "learning_rate": 1.1536778277656258e-07, "loss": 10.1155, "step": 469170 }, { "epoch": 0.947773284259263, "grad_norm": 180.03369140625, "learning_rate": 1.1529324257877228e-07, "loss": 21.2579, "step": 469180 }, { "epoch": 0.9477934848919468, "grad_norm": 174.05715942382812, "learning_rate": 1.152187261884502e-07, "loss": 14.3938, "step": 469190 }, { "epoch": 0.9478136855246306, "grad_norm": 443.8026428222656, "learning_rate": 1.1514423360595939e-07, "loss": 14.3149, "step": 469200 }, { "epoch": 0.9478338861573145, "grad_norm": 859.9103393554688, "learning_rate": 1.1506976483166343e-07, "loss": 21.5424, "step": 469210 }, { "epoch": 0.9478540867899983, "grad_norm": 729.26171875, "learning_rate": 1.1499531986592482e-07, "loss": 22.6312, "step": 469220 }, { "epoch": 0.9478742874226821, "grad_norm": 321.8718566894531, "learning_rate": 1.1492089870910662e-07, "loss": 17.0135, "step": 469230 }, { "epoch": 0.9478944880553659, "grad_norm": 254.30140686035156, "learning_rate": 1.1484650136157127e-07, "loss": 22.6747, "step": 469240 }, { "epoch": 0.9479146886880497, "grad_norm": 459.9327392578125, "learning_rate": 1.1477212782368185e-07, "loss": 16.1738, "step": 469250 }, { "epoch": 0.9479348893207336, "grad_norm": 345.2594909667969, "learning_rate": 1.1469777809580084e-07, "loss": 20.7374, "step": 469260 }, { "epoch": 0.9479550899534174, "grad_norm": 194.55494689941406, "learning_rate": 1.1462345217828963e-07, "loss": 10.2714, "step": 469270 }, { "epoch": 0.9479752905861012, "grad_norm": 374.4662170410156, "learning_rate": 1.1454915007151179e-07, "loss": 16.5521, "step": 469280 }, { "epoch": 0.947995491218785, "grad_norm": 177.5438232421875, "learning_rate": 1.1447487177582816e-07, "loss": 18.4548, "step": 469290 }, { "epoch": 0.9480156918514688, "grad_norm": 267.1248779296875, "learning_rate": 1.1440061729160235e-07, "loss": 19.3315, "step": 469300 }, { "epoch": 0.9480358924841527, "grad_norm": 45.62554168701172, "learning_rate": 1.1432638661919515e-07, "loss": 9.7843, "step": 469310 }, { "epoch": 0.9480560931168365, "grad_norm": 358.161376953125, "learning_rate": 1.1425217975896796e-07, "loss": 12.668, "step": 469320 }, { "epoch": 0.9480762937495202, "grad_norm": 299.3165283203125, "learning_rate": 1.1417799671128327e-07, "loss": 16.3915, "step": 469330 }, { "epoch": 0.948096494382204, "grad_norm": 272.8214111328125, "learning_rate": 1.14103837476503e-07, "loss": 16.3995, "step": 469340 }, { "epoch": 0.9481166950148878, "grad_norm": 491.92205810546875, "learning_rate": 1.1402970205498742e-07, "loss": 26.6676, "step": 469350 }, { "epoch": 0.9481368956475716, "grad_norm": 332.1183776855469, "learning_rate": 1.1395559044709848e-07, "loss": 10.5885, "step": 469360 }, { "epoch": 0.9481570962802555, "grad_norm": 368.10345458984375, "learning_rate": 1.1388150265319808e-07, "loss": 14.0164, "step": 469370 }, { "epoch": 0.9481772969129393, "grad_norm": 203.36778259277344, "learning_rate": 1.1380743867364596e-07, "loss": 16.265, "step": 469380 }, { "epoch": 0.9481974975456231, "grad_norm": 91.9708251953125, "learning_rate": 1.1373339850880405e-07, "loss": 15.9717, "step": 469390 }, { "epoch": 0.9482176981783069, "grad_norm": 312.5740051269531, "learning_rate": 1.136593821590326e-07, "loss": 12.2342, "step": 469400 }, { "epoch": 0.9482378988109907, "grad_norm": 123.86184692382812, "learning_rate": 1.1358538962469356e-07, "loss": 22.2205, "step": 469410 }, { "epoch": 0.9482580994436746, "grad_norm": 345.5480651855469, "learning_rate": 1.1351142090614553e-07, "loss": 20.7123, "step": 469420 }, { "epoch": 0.9482783000763584, "grad_norm": 109.01927185058594, "learning_rate": 1.1343747600375044e-07, "loss": 8.7963, "step": 469430 }, { "epoch": 0.9482985007090422, "grad_norm": 101.62294006347656, "learning_rate": 1.1336355491786966e-07, "loss": 14.3572, "step": 469440 }, { "epoch": 0.948318701341726, "grad_norm": 123.70928955078125, "learning_rate": 1.1328965764886069e-07, "loss": 21.6722, "step": 469450 }, { "epoch": 0.9483389019744098, "grad_norm": 104.20419311523438, "learning_rate": 1.1321578419708545e-07, "loss": 17.5573, "step": 469460 }, { "epoch": 0.9483591026070937, "grad_norm": 309.7586975097656, "learning_rate": 1.1314193456290424e-07, "loss": 22.9749, "step": 469470 }, { "epoch": 0.9483793032397775, "grad_norm": 43.185577392578125, "learning_rate": 1.1306810874667673e-07, "loss": 9.9447, "step": 469480 }, { "epoch": 0.9483995038724613, "grad_norm": 265.19671630859375, "learning_rate": 1.129943067487621e-07, "loss": 13.2196, "step": 469490 }, { "epoch": 0.9484197045051451, "grad_norm": 0.0, "learning_rate": 1.1292052856952063e-07, "loss": 12.6383, "step": 469500 }, { "epoch": 0.9484399051378289, "grad_norm": 44.23899841308594, "learning_rate": 1.1284677420931201e-07, "loss": 9.5979, "step": 469510 }, { "epoch": 0.9484601057705128, "grad_norm": 255.32183837890625, "learning_rate": 1.1277304366849539e-07, "loss": 16.5217, "step": 469520 }, { "epoch": 0.9484803064031966, "grad_norm": 399.6812438964844, "learning_rate": 1.1269933694742996e-07, "loss": 27.1066, "step": 469530 }, { "epoch": 0.9485005070358804, "grad_norm": 610.9277954101562, "learning_rate": 1.1262565404647485e-07, "loss": 18.2134, "step": 469540 }, { "epoch": 0.9485207076685642, "grad_norm": 219.33639526367188, "learning_rate": 1.1255199496599034e-07, "loss": 13.4326, "step": 469550 }, { "epoch": 0.948540908301248, "grad_norm": 502.5478210449219, "learning_rate": 1.1247835970633392e-07, "loss": 9.4909, "step": 469560 }, { "epoch": 0.9485611089339319, "grad_norm": 259.220947265625, "learning_rate": 1.1240474826786585e-07, "loss": 11.7262, "step": 469570 }, { "epoch": 0.9485813095666156, "grad_norm": 107.51399993896484, "learning_rate": 1.1233116065094363e-07, "loss": 19.9315, "step": 469580 }, { "epoch": 0.9486015101992994, "grad_norm": 521.3172607421875, "learning_rate": 1.1225759685592697e-07, "loss": 17.4128, "step": 469590 }, { "epoch": 0.9486217108319832, "grad_norm": 174.23753356933594, "learning_rate": 1.1218405688317447e-07, "loss": 8.2005, "step": 469600 }, { "epoch": 0.948641911464667, "grad_norm": 227.84393310546875, "learning_rate": 1.1211054073304305e-07, "loss": 14.7567, "step": 469610 }, { "epoch": 0.9486621120973509, "grad_norm": 191.42213439941406, "learning_rate": 1.1203704840589247e-07, "loss": 13.1507, "step": 469620 }, { "epoch": 0.9486823127300347, "grad_norm": 16.103796005249023, "learning_rate": 1.1196357990208074e-07, "loss": 9.6244, "step": 469630 }, { "epoch": 0.9487025133627185, "grad_norm": 304.25390625, "learning_rate": 1.1189013522196479e-07, "loss": 22.1177, "step": 469640 }, { "epoch": 0.9487227139954023, "grad_norm": 93.02654266357422, "learning_rate": 1.118167143659038e-07, "loss": 9.61, "step": 469650 }, { "epoch": 0.9487429146280861, "grad_norm": 306.44757080078125, "learning_rate": 1.1174331733425636e-07, "loss": 19.6469, "step": 469660 }, { "epoch": 0.94876311526077, "grad_norm": 304.4107360839844, "learning_rate": 1.1166994412737774e-07, "loss": 21.4619, "step": 469670 }, { "epoch": 0.9487833158934538, "grad_norm": 272.19146728515625, "learning_rate": 1.1159659474562712e-07, "loss": 13.1943, "step": 469680 }, { "epoch": 0.9488035165261376, "grad_norm": 408.47821044921875, "learning_rate": 1.1152326918936251e-07, "loss": 23.881, "step": 469690 }, { "epoch": 0.9488237171588214, "grad_norm": 301.56256103515625, "learning_rate": 1.1144996745894033e-07, "loss": 28.8089, "step": 469700 }, { "epoch": 0.9488439177915052, "grad_norm": 455.19940185546875, "learning_rate": 1.1137668955471803e-07, "loss": 11.0243, "step": 469710 }, { "epoch": 0.948864118424189, "grad_norm": 634.3616333007812, "learning_rate": 1.1130343547705257e-07, "loss": 23.3324, "step": 469720 }, { "epoch": 0.9488843190568729, "grad_norm": 353.26751708984375, "learning_rate": 1.1123020522630202e-07, "loss": 22.2149, "step": 469730 }, { "epoch": 0.9489045196895567, "grad_norm": 294.94317626953125, "learning_rate": 1.111569988028216e-07, "loss": 26.7194, "step": 469740 }, { "epoch": 0.9489247203222405, "grad_norm": 397.300537109375, "learning_rate": 1.1108381620696885e-07, "loss": 13.6373, "step": 469750 }, { "epoch": 0.9489449209549243, "grad_norm": 251.4523468017578, "learning_rate": 1.1101065743910122e-07, "loss": 14.7469, "step": 469760 }, { "epoch": 0.9489651215876082, "grad_norm": 285.14208984375, "learning_rate": 1.1093752249957512e-07, "loss": 22.9022, "step": 469770 }, { "epoch": 0.948985322220292, "grad_norm": 348.3089904785156, "learning_rate": 1.1086441138874581e-07, "loss": 34.9027, "step": 469780 }, { "epoch": 0.9490055228529758, "grad_norm": 422.3214416503906, "learning_rate": 1.107913241069708e-07, "loss": 15.4875, "step": 469790 }, { "epoch": 0.9490257234856596, "grad_norm": 139.68527221679688, "learning_rate": 1.107182606546059e-07, "loss": 6.1628, "step": 469800 }, { "epoch": 0.9490459241183434, "grad_norm": 45.21986770629883, "learning_rate": 1.1064522103200636e-07, "loss": 9.997, "step": 469810 }, { "epoch": 0.9490661247510273, "grad_norm": 294.0475158691406, "learning_rate": 1.1057220523953027e-07, "loss": 9.9022, "step": 469820 }, { "epoch": 0.9490863253837111, "grad_norm": 120.39701080322266, "learning_rate": 1.1049921327753121e-07, "loss": 9.7648, "step": 469830 }, { "epoch": 0.9491065260163948, "grad_norm": 466.20001220703125, "learning_rate": 1.1042624514636669e-07, "loss": 16.0697, "step": 469840 }, { "epoch": 0.9491267266490786, "grad_norm": 236.29421997070312, "learning_rate": 1.1035330084639084e-07, "loss": 11.462, "step": 469850 }, { "epoch": 0.9491469272817624, "grad_norm": 260.67724609375, "learning_rate": 1.1028038037796063e-07, "loss": 13.6326, "step": 469860 }, { "epoch": 0.9491671279144462, "grad_norm": 218.26943969726562, "learning_rate": 1.1020748374143075e-07, "loss": 16.4647, "step": 469870 }, { "epoch": 0.9491873285471301, "grad_norm": 144.53085327148438, "learning_rate": 1.1013461093715594e-07, "loss": 8.0247, "step": 469880 }, { "epoch": 0.9492075291798139, "grad_norm": 182.36032104492188, "learning_rate": 1.1006176196549256e-07, "loss": 7.5152, "step": 469890 }, { "epoch": 0.9492277298124977, "grad_norm": 305.69189453125, "learning_rate": 1.0998893682679479e-07, "loss": 12.5555, "step": 469900 }, { "epoch": 0.9492479304451815, "grad_norm": 221.77523803710938, "learning_rate": 1.099161355214179e-07, "loss": 10.1588, "step": 469910 }, { "epoch": 0.9492681310778653, "grad_norm": 431.93939208984375, "learning_rate": 1.0984335804971713e-07, "loss": 16.6653, "step": 469920 }, { "epoch": 0.9492883317105492, "grad_norm": 29.182485580444336, "learning_rate": 1.0977060441204612e-07, "loss": 11.957, "step": 469930 }, { "epoch": 0.949308532343233, "grad_norm": 219.14222717285156, "learning_rate": 1.0969787460876013e-07, "loss": 12.5424, "step": 469940 }, { "epoch": 0.9493287329759168, "grad_norm": 290.021484375, "learning_rate": 1.0962516864021388e-07, "loss": 15.1483, "step": 469950 }, { "epoch": 0.9493489336086006, "grad_norm": 163.8733673095703, "learning_rate": 1.0955248650676154e-07, "loss": 13.3683, "step": 469960 }, { "epoch": 0.9493691342412844, "grad_norm": 63.725318908691406, "learning_rate": 1.0947982820875669e-07, "loss": 18.1248, "step": 469970 }, { "epoch": 0.9493893348739683, "grad_norm": 1048.1917724609375, "learning_rate": 1.0940719374655462e-07, "loss": 35.0464, "step": 469980 }, { "epoch": 0.9494095355066521, "grad_norm": 194.04000854492188, "learning_rate": 1.0933458312050837e-07, "loss": 15.3915, "step": 469990 }, { "epoch": 0.9494297361393359, "grad_norm": 4.694753170013428, "learning_rate": 1.0926199633097156e-07, "loss": 10.135, "step": 470000 }, { "epoch": 0.9494499367720197, "grad_norm": 781.8992309570312, "learning_rate": 1.0918943337829945e-07, "loss": 30.4598, "step": 470010 }, { "epoch": 0.9494701374047035, "grad_norm": 41.23650360107422, "learning_rate": 1.091168942628451e-07, "loss": 9.8344, "step": 470020 }, { "epoch": 0.9494903380373874, "grad_norm": 199.8082733154297, "learning_rate": 1.09044378984961e-07, "loss": 11.447, "step": 470030 }, { "epoch": 0.9495105386700712, "grad_norm": 418.4902648925781, "learning_rate": 1.0897188754500187e-07, "loss": 17.8343, "step": 470040 }, { "epoch": 0.949530739302755, "grad_norm": 142.16915893554688, "learning_rate": 1.0889941994332077e-07, "loss": 13.8683, "step": 470050 }, { "epoch": 0.9495509399354388, "grad_norm": 757.1500244140625, "learning_rate": 1.0882697618027016e-07, "loss": 17.1053, "step": 470060 }, { "epoch": 0.9495711405681226, "grad_norm": 145.1659393310547, "learning_rate": 1.0875455625620368e-07, "loss": 23.5184, "step": 470070 }, { "epoch": 0.9495913412008065, "grad_norm": 580.390869140625, "learning_rate": 1.0868216017147437e-07, "loss": 28.3383, "step": 470080 }, { "epoch": 0.9496115418334903, "grad_norm": 363.58538818359375, "learning_rate": 1.0860978792643528e-07, "loss": 10.8664, "step": 470090 }, { "epoch": 0.949631742466174, "grad_norm": 300.77783203125, "learning_rate": 1.0853743952143836e-07, "loss": 13.1416, "step": 470100 }, { "epoch": 0.9496519430988578, "grad_norm": 192.88450622558594, "learning_rate": 1.084651149568372e-07, "loss": 15.3004, "step": 470110 }, { "epoch": 0.9496721437315416, "grad_norm": 342.4608459472656, "learning_rate": 1.0839281423298375e-07, "loss": 12.5223, "step": 470120 }, { "epoch": 0.9496923443642254, "grad_norm": 282.32489013671875, "learning_rate": 1.0832053735022996e-07, "loss": 13.0053, "step": 470130 }, { "epoch": 0.9497125449969093, "grad_norm": 545.429443359375, "learning_rate": 1.0824828430892831e-07, "loss": 26.8959, "step": 470140 }, { "epoch": 0.9497327456295931, "grad_norm": 491.9284973144531, "learning_rate": 1.0817605510943241e-07, "loss": 15.1654, "step": 470150 }, { "epoch": 0.9497529462622769, "grad_norm": 368.24237060546875, "learning_rate": 1.0810384975209254e-07, "loss": 17.1497, "step": 470160 }, { "epoch": 0.9497731468949607, "grad_norm": 299.6114501953125, "learning_rate": 1.0803166823726064e-07, "loss": 20.3, "step": 470170 }, { "epoch": 0.9497933475276445, "grad_norm": 348.03900146484375, "learning_rate": 1.0795951056528974e-07, "loss": 22.592, "step": 470180 }, { "epoch": 0.9498135481603284, "grad_norm": 344.5665283203125, "learning_rate": 1.0788737673653072e-07, "loss": 30.4026, "step": 470190 }, { "epoch": 0.9498337487930122, "grad_norm": 424.1960144042969, "learning_rate": 1.0781526675133492e-07, "loss": 26.6032, "step": 470200 }, { "epoch": 0.949853949425696, "grad_norm": 365.5455322265625, "learning_rate": 1.0774318061005484e-07, "loss": 16.6422, "step": 470210 }, { "epoch": 0.9498741500583798, "grad_norm": 282.263427734375, "learning_rate": 1.0767111831304022e-07, "loss": 16.3358, "step": 470220 }, { "epoch": 0.9498943506910636, "grad_norm": 394.3710632324219, "learning_rate": 1.0759907986064411e-07, "loss": 18.1085, "step": 470230 }, { "epoch": 0.9499145513237475, "grad_norm": 353.6780090332031, "learning_rate": 1.0752706525321622e-07, "loss": 11.1399, "step": 470240 }, { "epoch": 0.9499347519564313, "grad_norm": 156.2507781982422, "learning_rate": 1.0745507449110792e-07, "loss": 17.7139, "step": 470250 }, { "epoch": 0.9499549525891151, "grad_norm": 188.82017517089844, "learning_rate": 1.0738310757467064e-07, "loss": 19.2339, "step": 470260 }, { "epoch": 0.9499751532217989, "grad_norm": 126.23918914794922, "learning_rate": 1.0731116450425461e-07, "loss": 12.828, "step": 470270 }, { "epoch": 0.9499953538544827, "grad_norm": 310.2344970703125, "learning_rate": 1.0723924528021012e-07, "loss": 19.8323, "step": 470280 }, { "epoch": 0.9500155544871666, "grad_norm": 183.66943359375, "learning_rate": 1.0716734990288801e-07, "loss": 23.0236, "step": 470290 }, { "epoch": 0.9500357551198504, "grad_norm": 388.44287109375, "learning_rate": 1.0709547837263967e-07, "loss": 21.3623, "step": 470300 }, { "epoch": 0.9500559557525342, "grad_norm": 697.5735473632812, "learning_rate": 1.0702363068981425e-07, "loss": 34.2935, "step": 470310 }, { "epoch": 0.950076156385218, "grad_norm": 300.3755798339844, "learning_rate": 1.0695180685476148e-07, "loss": 15.7304, "step": 470320 }, { "epoch": 0.9500963570179018, "grad_norm": 380.5934143066406, "learning_rate": 1.0688000686783272e-07, "loss": 14.4428, "step": 470330 }, { "epoch": 0.9501165576505857, "grad_norm": 265.9210205078125, "learning_rate": 1.0680823072937774e-07, "loss": 18.7776, "step": 470340 }, { "epoch": 0.9501367582832694, "grad_norm": 251.99920654296875, "learning_rate": 1.067364784397451e-07, "loss": 26.2346, "step": 470350 }, { "epoch": 0.9501569589159532, "grad_norm": 716.3020629882812, "learning_rate": 1.0666474999928566e-07, "loss": 26.2636, "step": 470360 }, { "epoch": 0.950177159548637, "grad_norm": 427.94134521484375, "learning_rate": 1.0659304540834914e-07, "loss": 16.5608, "step": 470370 }, { "epoch": 0.9501973601813208, "grad_norm": 475.6017150878906, "learning_rate": 1.0652136466728468e-07, "loss": 17.7835, "step": 470380 }, { "epoch": 0.9502175608140047, "grad_norm": 361.806396484375, "learning_rate": 1.0644970777644093e-07, "loss": 9.2336, "step": 470390 }, { "epoch": 0.9502377614466885, "grad_norm": 181.4495391845703, "learning_rate": 1.0637807473616812e-07, "loss": 33.6267, "step": 470400 }, { "epoch": 0.9502579620793723, "grad_norm": 449.17926025390625, "learning_rate": 1.0630646554681545e-07, "loss": 19.7068, "step": 470410 }, { "epoch": 0.9502781627120561, "grad_norm": 294.85003662109375, "learning_rate": 1.0623488020873097e-07, "loss": 26.1473, "step": 470420 }, { "epoch": 0.9502983633447399, "grad_norm": 325.1664123535156, "learning_rate": 1.0616331872226437e-07, "loss": 17.327, "step": 470430 }, { "epoch": 0.9503185639774238, "grad_norm": 345.0419006347656, "learning_rate": 1.0609178108776375e-07, "loss": 14.0324, "step": 470440 }, { "epoch": 0.9503387646101076, "grad_norm": 3.2714545726776123, "learning_rate": 1.0602026730557879e-07, "loss": 15.103, "step": 470450 }, { "epoch": 0.9503589652427914, "grad_norm": 419.8089294433594, "learning_rate": 1.0594877737605702e-07, "loss": 13.4397, "step": 470460 }, { "epoch": 0.9503791658754752, "grad_norm": 297.6645812988281, "learning_rate": 1.0587731129954815e-07, "loss": 16.4085, "step": 470470 }, { "epoch": 0.950399366508159, "grad_norm": 258.7247619628906, "learning_rate": 1.0580586907639912e-07, "loss": 15.0832, "step": 470480 }, { "epoch": 0.9504195671408429, "grad_norm": 662.227294921875, "learning_rate": 1.0573445070695853e-07, "loss": 15.39, "step": 470490 }, { "epoch": 0.9504397677735267, "grad_norm": 151.84469604492188, "learning_rate": 1.0566305619157502e-07, "loss": 19.7452, "step": 470500 }, { "epoch": 0.9504599684062105, "grad_norm": 241.95960998535156, "learning_rate": 1.0559168553059551e-07, "loss": 22.2678, "step": 470510 }, { "epoch": 0.9504801690388943, "grad_norm": 199.27572631835938, "learning_rate": 1.0552033872436917e-07, "loss": 13.4917, "step": 470520 }, { "epoch": 0.9505003696715781, "grad_norm": 385.2051696777344, "learning_rate": 1.0544901577324351e-07, "loss": 16.1362, "step": 470530 }, { "epoch": 0.950520570304262, "grad_norm": 159.27716064453125, "learning_rate": 1.0537771667756436e-07, "loss": 14.5987, "step": 470540 }, { "epoch": 0.9505407709369458, "grad_norm": 257.7624816894531, "learning_rate": 1.0530644143768143e-07, "loss": 18.522, "step": 470550 }, { "epoch": 0.9505609715696296, "grad_norm": 122.473388671875, "learning_rate": 1.0523519005394167e-07, "loss": 18.2859, "step": 470560 }, { "epoch": 0.9505811722023134, "grad_norm": 326.22698974609375, "learning_rate": 1.0516396252669092e-07, "loss": 10.2913, "step": 470570 }, { "epoch": 0.9506013728349972, "grad_norm": 234.60018920898438, "learning_rate": 1.0509275885627779e-07, "loss": 9.2405, "step": 470580 }, { "epoch": 0.9506215734676811, "grad_norm": 449.334716796875, "learning_rate": 1.0502157904304866e-07, "loss": 13.7069, "step": 470590 }, { "epoch": 0.9506417741003649, "grad_norm": 455.3882141113281, "learning_rate": 1.0495042308735104e-07, "loss": 19.8605, "step": 470600 }, { "epoch": 0.9506619747330486, "grad_norm": 31.53827667236328, "learning_rate": 1.0487929098953131e-07, "loss": 11.4078, "step": 470610 }, { "epoch": 0.9506821753657324, "grad_norm": 430.6929016113281, "learning_rate": 1.0480818274993587e-07, "loss": 10.1137, "step": 470620 }, { "epoch": 0.9507023759984162, "grad_norm": 223.26327514648438, "learning_rate": 1.0473709836891222e-07, "loss": 12.6717, "step": 470630 }, { "epoch": 0.9507225766311, "grad_norm": 348.0071716308594, "learning_rate": 1.0466603784680562e-07, "loss": 17.5693, "step": 470640 }, { "epoch": 0.9507427772637839, "grad_norm": 654.5459594726562, "learning_rate": 1.0459500118396304e-07, "loss": 19.3442, "step": 470650 }, { "epoch": 0.9507629778964677, "grad_norm": 272.2804870605469, "learning_rate": 1.0452398838073141e-07, "loss": 11.1679, "step": 470660 }, { "epoch": 0.9507831785291515, "grad_norm": 324.45819091796875, "learning_rate": 1.0445299943745546e-07, "loss": 19.1832, "step": 470670 }, { "epoch": 0.9508033791618353, "grad_norm": 358.24114990234375, "learning_rate": 1.0438203435448157e-07, "loss": 38.1832, "step": 470680 }, { "epoch": 0.9508235797945191, "grad_norm": 1002.8253784179688, "learning_rate": 1.0431109313215671e-07, "loss": 32.5478, "step": 470690 }, { "epoch": 0.950843780427203, "grad_norm": 336.9682312011719, "learning_rate": 1.0424017577082556e-07, "loss": 13.8397, "step": 470700 }, { "epoch": 0.9508639810598868, "grad_norm": 0.0, "learning_rate": 1.0416928227083345e-07, "loss": 16.7932, "step": 470710 }, { "epoch": 0.9508841816925706, "grad_norm": 398.4461364746094, "learning_rate": 1.0409841263252673e-07, "loss": 18.7359, "step": 470720 }, { "epoch": 0.9509043823252544, "grad_norm": 299.9626159667969, "learning_rate": 1.040275668562507e-07, "loss": 20.0693, "step": 470730 }, { "epoch": 0.9509245829579382, "grad_norm": 121.19273376464844, "learning_rate": 1.0395674494235064e-07, "loss": 25.1212, "step": 470740 }, { "epoch": 0.9509447835906221, "grad_norm": 273.7412414550781, "learning_rate": 1.038859468911707e-07, "loss": 8.6719, "step": 470750 }, { "epoch": 0.9509649842233059, "grad_norm": 4.967778205871582, "learning_rate": 1.0381517270305786e-07, "loss": 25.2678, "step": 470760 }, { "epoch": 0.9509851848559897, "grad_norm": 0.9025456309318542, "learning_rate": 1.0374442237835625e-07, "loss": 7.366, "step": 470770 }, { "epoch": 0.9510053854886735, "grad_norm": 134.75787353515625, "learning_rate": 1.036736959174095e-07, "loss": 13.4244, "step": 470780 }, { "epoch": 0.9510255861213573, "grad_norm": 138.00926208496094, "learning_rate": 1.03602993320564e-07, "loss": 15.6263, "step": 470790 }, { "epoch": 0.9510457867540412, "grad_norm": 460.781005859375, "learning_rate": 1.0353231458816338e-07, "loss": 13.2165, "step": 470800 }, { "epoch": 0.951065987386725, "grad_norm": 607.1260375976562, "learning_rate": 1.0346165972055233e-07, "loss": 36.6935, "step": 470810 }, { "epoch": 0.9510861880194088, "grad_norm": 236.06959533691406, "learning_rate": 1.0339102871807505e-07, "loss": 15.51, "step": 470820 }, { "epoch": 0.9511063886520926, "grad_norm": 1040.3648681640625, "learning_rate": 1.0332042158107624e-07, "loss": 20.5573, "step": 470830 }, { "epoch": 0.9511265892847764, "grad_norm": 136.85296630859375, "learning_rate": 1.032498383099001e-07, "loss": 8.7666, "step": 470840 }, { "epoch": 0.9511467899174603, "grad_norm": 25.13045883178711, "learning_rate": 1.0317927890489021e-07, "loss": 12.975, "step": 470850 }, { "epoch": 0.951166990550144, "grad_norm": 169.87399291992188, "learning_rate": 1.0310874336639021e-07, "loss": 30.7924, "step": 470860 }, { "epoch": 0.9511871911828278, "grad_norm": 432.50494384765625, "learning_rate": 1.030382316947448e-07, "loss": 13.3448, "step": 470870 }, { "epoch": 0.9512073918155116, "grad_norm": 150.0078887939453, "learning_rate": 1.0296774389029707e-07, "loss": 21.6321, "step": 470880 }, { "epoch": 0.9512275924481954, "grad_norm": 23.30535316467285, "learning_rate": 1.0289727995339005e-07, "loss": 10.8087, "step": 470890 }, { "epoch": 0.9512477930808793, "grad_norm": 120.26686096191406, "learning_rate": 1.0282683988436792e-07, "loss": 12.9034, "step": 470900 }, { "epoch": 0.9512679937135631, "grad_norm": 24.445892333984375, "learning_rate": 1.027564236835743e-07, "loss": 16.5104, "step": 470910 }, { "epoch": 0.9512881943462469, "grad_norm": 125.10945129394531, "learning_rate": 1.0268603135135169e-07, "loss": 13.0459, "step": 470920 }, { "epoch": 0.9513083949789307, "grad_norm": 604.3030395507812, "learning_rate": 1.0261566288804315e-07, "loss": 20.8064, "step": 470930 }, { "epoch": 0.9513285956116145, "grad_norm": 217.4271240234375, "learning_rate": 1.0254531829399228e-07, "loss": 13.045, "step": 470940 }, { "epoch": 0.9513487962442984, "grad_norm": 168.72885131835938, "learning_rate": 1.024749975695416e-07, "loss": 14.6424, "step": 470950 }, { "epoch": 0.9513689968769822, "grad_norm": 293.8639221191406, "learning_rate": 1.0240470071503306e-07, "loss": 11.9183, "step": 470960 }, { "epoch": 0.951389197509666, "grad_norm": 154.8897705078125, "learning_rate": 1.0233442773081026e-07, "loss": 20.1196, "step": 470970 }, { "epoch": 0.9514093981423498, "grad_norm": 260.27703857421875, "learning_rate": 1.0226417861721571e-07, "loss": 7.7497, "step": 470980 }, { "epoch": 0.9514295987750336, "grad_norm": 214.36672973632812, "learning_rate": 1.0219395337459137e-07, "loss": 6.1547, "step": 470990 }, { "epoch": 0.9514497994077175, "grad_norm": 400.2989807128906, "learning_rate": 1.0212375200327973e-07, "loss": 18.0779, "step": 471000 }, { "epoch": 0.9514700000404013, "grad_norm": 151.48472595214844, "learning_rate": 1.0205357450362275e-07, "loss": 11.009, "step": 471010 }, { "epoch": 0.9514902006730851, "grad_norm": 3.5933449268341064, "learning_rate": 1.0198342087596292e-07, "loss": 16.8308, "step": 471020 }, { "epoch": 0.9515104013057689, "grad_norm": 340.96038818359375, "learning_rate": 1.0191329112064164e-07, "loss": 17.0175, "step": 471030 }, { "epoch": 0.9515306019384527, "grad_norm": 431.24090576171875, "learning_rate": 1.0184318523800086e-07, "loss": 13.8514, "step": 471040 }, { "epoch": 0.9515508025711366, "grad_norm": 370.28778076171875, "learning_rate": 1.0177310322838251e-07, "loss": 15.9059, "step": 471050 }, { "epoch": 0.9515710032038204, "grad_norm": 238.84820556640625, "learning_rate": 1.0170304509212803e-07, "loss": 20.2611, "step": 471060 }, { "epoch": 0.9515912038365042, "grad_norm": 171.47479248046875, "learning_rate": 1.0163301082957821e-07, "loss": 21.947, "step": 471070 }, { "epoch": 0.951611404469188, "grad_norm": 345.8973693847656, "learning_rate": 1.0156300044107559e-07, "loss": 11.2068, "step": 471080 }, { "epoch": 0.9516316051018718, "grad_norm": 182.06565856933594, "learning_rate": 1.0149301392696098e-07, "loss": 15.6309, "step": 471090 }, { "epoch": 0.9516518057345557, "grad_norm": 104.55928802490234, "learning_rate": 1.0142305128757468e-07, "loss": 22.0133, "step": 471100 }, { "epoch": 0.9516720063672395, "grad_norm": 233.50936889648438, "learning_rate": 1.0135311252325863e-07, "loss": 12.8193, "step": 471110 }, { "epoch": 0.9516922069999232, "grad_norm": 240.9148406982422, "learning_rate": 1.0128319763435312e-07, "loss": 14.8339, "step": 471120 }, { "epoch": 0.951712407632607, "grad_norm": 221.30917358398438, "learning_rate": 1.0121330662119954e-07, "loss": 12.9855, "step": 471130 }, { "epoch": 0.9517326082652908, "grad_norm": 175.91488647460938, "learning_rate": 1.0114343948413818e-07, "loss": 13.9831, "step": 471140 }, { "epoch": 0.9517528088979746, "grad_norm": 618.2254638671875, "learning_rate": 1.0107359622350877e-07, "loss": 27.8967, "step": 471150 }, { "epoch": 0.9517730095306585, "grad_norm": 109.8958740234375, "learning_rate": 1.0100377683965323e-07, "loss": 13.6493, "step": 471160 }, { "epoch": 0.9517932101633423, "grad_norm": 235.784912109375, "learning_rate": 1.0093398133291132e-07, "loss": 16.3887, "step": 471170 }, { "epoch": 0.9518134107960261, "grad_norm": 414.00244140625, "learning_rate": 1.0086420970362221e-07, "loss": 17.9258, "step": 471180 }, { "epoch": 0.9518336114287099, "grad_norm": 336.341552734375, "learning_rate": 1.0079446195212728e-07, "loss": 28.1355, "step": 471190 }, { "epoch": 0.9518538120613937, "grad_norm": 264.4357604980469, "learning_rate": 1.007247380787657e-07, "loss": 17.1038, "step": 471200 }, { "epoch": 0.9518740126940776, "grad_norm": 768.3027954101562, "learning_rate": 1.0065503808387777e-07, "loss": 24.6599, "step": 471210 }, { "epoch": 0.9518942133267614, "grad_norm": 95.99604034423828, "learning_rate": 1.0058536196780266e-07, "loss": 11.4118, "step": 471220 }, { "epoch": 0.9519144139594452, "grad_norm": 242.30259704589844, "learning_rate": 1.0051570973088064e-07, "loss": 17.5281, "step": 471230 }, { "epoch": 0.951934614592129, "grad_norm": 58.12041091918945, "learning_rate": 1.0044608137345091e-07, "loss": 11.3245, "step": 471240 }, { "epoch": 0.9519548152248128, "grad_norm": 324.6882629394531, "learning_rate": 1.0037647689585207e-07, "loss": 14.2132, "step": 471250 }, { "epoch": 0.9519750158574967, "grad_norm": 408.84527587890625, "learning_rate": 1.0030689629842382e-07, "loss": 19.237, "step": 471260 }, { "epoch": 0.9519952164901805, "grad_norm": 254.10365295410156, "learning_rate": 1.0023733958150706e-07, "loss": 19.3805, "step": 471270 }, { "epoch": 0.9520154171228643, "grad_norm": 380.91107177734375, "learning_rate": 1.0016780674543813e-07, "loss": 15.11, "step": 471280 }, { "epoch": 0.9520356177555481, "grad_norm": 117.3650131225586, "learning_rate": 1.0009829779055679e-07, "loss": 6.3612, "step": 471290 }, { "epoch": 0.952055818388232, "grad_norm": 166.88565063476562, "learning_rate": 1.0002881271720222e-07, "loss": 14.1588, "step": 471300 }, { "epoch": 0.9520760190209158, "grad_norm": 892.7716674804688, "learning_rate": 9.995935152571357e-08, "loss": 19.1736, "step": 471310 }, { "epoch": 0.9520962196535996, "grad_norm": 268.48779296875, "learning_rate": 9.988991421642779e-08, "loss": 15.5897, "step": 471320 }, { "epoch": 0.9521164202862834, "grad_norm": 214.12991333007812, "learning_rate": 9.98205007896852e-08, "loss": 11.805, "step": 471330 }, { "epoch": 0.9521366209189672, "grad_norm": 79.6239242553711, "learning_rate": 9.975111124582271e-08, "loss": 18.7248, "step": 471340 }, { "epoch": 0.952156821551651, "grad_norm": 0.0, "learning_rate": 9.968174558517895e-08, "loss": 10.0281, "step": 471350 }, { "epoch": 0.9521770221843349, "grad_norm": 279.0708312988281, "learning_rate": 9.961240380809201e-08, "loss": 17.1007, "step": 471360 }, { "epoch": 0.9521972228170186, "grad_norm": 392.2039794921875, "learning_rate": 9.954308591489991e-08, "loss": 27.0333, "step": 471370 }, { "epoch": 0.9522174234497024, "grad_norm": 0.0, "learning_rate": 9.947379190594076e-08, "loss": 22.3938, "step": 471380 }, { "epoch": 0.9522376240823862, "grad_norm": 460.63726806640625, "learning_rate": 9.940452178155147e-08, "loss": 20.6769, "step": 471390 }, { "epoch": 0.95225782471507, "grad_norm": 318.01348876953125, "learning_rate": 9.933527554207012e-08, "loss": 23.6312, "step": 471400 }, { "epoch": 0.9522780253477539, "grad_norm": 459.3076477050781, "learning_rate": 9.926605318783477e-08, "loss": 22.5429, "step": 471410 }, { "epoch": 0.9522982259804377, "grad_norm": 373.6624755859375, "learning_rate": 9.919685471918183e-08, "loss": 18.406, "step": 471420 }, { "epoch": 0.9523184266131215, "grad_norm": 182.34884643554688, "learning_rate": 9.912768013644936e-08, "loss": 17.1703, "step": 471430 }, { "epoch": 0.9523386272458053, "grad_norm": 388.4763488769531, "learning_rate": 9.905852943997374e-08, "loss": 12.4804, "step": 471440 }, { "epoch": 0.9523588278784891, "grad_norm": 151.5099639892578, "learning_rate": 9.898940263009304e-08, "loss": 10.5884, "step": 471450 }, { "epoch": 0.952379028511173, "grad_norm": 247.20875549316406, "learning_rate": 9.892029970714367e-08, "loss": 14.7343, "step": 471460 }, { "epoch": 0.9523992291438568, "grad_norm": 171.4412384033203, "learning_rate": 9.885122067146147e-08, "loss": 15.0036, "step": 471470 }, { "epoch": 0.9524194297765406, "grad_norm": 182.62937927246094, "learning_rate": 9.878216552338504e-08, "loss": 11.706, "step": 471480 }, { "epoch": 0.9524396304092244, "grad_norm": 360.761962890625, "learning_rate": 9.871313426324913e-08, "loss": 34.3857, "step": 471490 }, { "epoch": 0.9524598310419082, "grad_norm": 96.55928039550781, "learning_rate": 9.864412689139124e-08, "loss": 14.3127, "step": 471500 }, { "epoch": 0.952480031674592, "grad_norm": 144.3299560546875, "learning_rate": 9.857514340814667e-08, "loss": 12.1043, "step": 471510 }, { "epoch": 0.9525002323072759, "grad_norm": 324.63421630859375, "learning_rate": 9.850618381385346e-08, "loss": 12.5964, "step": 471520 }, { "epoch": 0.9525204329399597, "grad_norm": 260.4071044921875, "learning_rate": 9.843724810884636e-08, "loss": 10.9636, "step": 471530 }, { "epoch": 0.9525406335726435, "grad_norm": 204.39683532714844, "learning_rate": 9.836833629346121e-08, "loss": 13.2621, "step": 471540 }, { "epoch": 0.9525608342053273, "grad_norm": 417.4044189453125, "learning_rate": 9.82994483680344e-08, "loss": 21.0501, "step": 471550 }, { "epoch": 0.9525810348380112, "grad_norm": 186.35858154296875, "learning_rate": 9.823058433290178e-08, "loss": 14.8036, "step": 471560 }, { "epoch": 0.952601235470695, "grad_norm": 221.61087036132812, "learning_rate": 9.816174418839863e-08, "loss": 14.2294, "step": 471570 }, { "epoch": 0.9526214361033788, "grad_norm": 43.475059509277344, "learning_rate": 9.809292793486025e-08, "loss": 11.6246, "step": 471580 }, { "epoch": 0.9526416367360626, "grad_norm": 436.1901550292969, "learning_rate": 9.802413557262302e-08, "loss": 18.3258, "step": 471590 }, { "epoch": 0.9526618373687464, "grad_norm": 393.25921630859375, "learning_rate": 9.795536710202169e-08, "loss": 16.0255, "step": 471600 }, { "epoch": 0.9526820380014303, "grad_norm": 248.4064483642578, "learning_rate": 9.788662252339099e-08, "loss": 16.6259, "step": 471610 }, { "epoch": 0.9527022386341141, "grad_norm": 183.06944274902344, "learning_rate": 9.781790183706674e-08, "loss": 19.8227, "step": 471620 }, { "epoch": 0.9527224392667978, "grad_norm": 277.7666015625, "learning_rate": 9.774920504338315e-08, "loss": 27.9314, "step": 471630 }, { "epoch": 0.9527426398994816, "grad_norm": 0.0, "learning_rate": 9.768053214267548e-08, "loss": 17.4172, "step": 471640 }, { "epoch": 0.9527628405321654, "grad_norm": 146.64297485351562, "learning_rate": 9.761188313527792e-08, "loss": 16.2158, "step": 471650 }, { "epoch": 0.9527830411648492, "grad_norm": 15.5716552734375, "learning_rate": 9.754325802152575e-08, "loss": 10.0231, "step": 471660 }, { "epoch": 0.9528032417975331, "grad_norm": 118.41764831542969, "learning_rate": 9.747465680175316e-08, "loss": 15.2342, "step": 471670 }, { "epoch": 0.9528234424302169, "grad_norm": 186.61383056640625, "learning_rate": 9.740607947629433e-08, "loss": 17.2289, "step": 471680 }, { "epoch": 0.9528436430629007, "grad_norm": 441.5155944824219, "learning_rate": 9.733752604548397e-08, "loss": 23.8443, "step": 471690 }, { "epoch": 0.9528638436955845, "grad_norm": 549.6482543945312, "learning_rate": 9.726899650965626e-08, "loss": 21.348, "step": 471700 }, { "epoch": 0.9528840443282683, "grad_norm": 234.8240509033203, "learning_rate": 9.720049086914374e-08, "loss": 21.2825, "step": 471710 }, { "epoch": 0.9529042449609522, "grad_norm": 252.1730499267578, "learning_rate": 9.713200912428222e-08, "loss": 23.3847, "step": 471720 }, { "epoch": 0.952924445593636, "grad_norm": 107.79186248779297, "learning_rate": 9.706355127540423e-08, "loss": 12.5654, "step": 471730 }, { "epoch": 0.9529446462263198, "grad_norm": 59.06559371948242, "learning_rate": 9.699511732284395e-08, "loss": 13.7314, "step": 471740 }, { "epoch": 0.9529648468590036, "grad_norm": 165.02932739257812, "learning_rate": 9.692670726693498e-08, "loss": 11.965, "step": 471750 }, { "epoch": 0.9529850474916874, "grad_norm": 11.239250183105469, "learning_rate": 9.68583211080104e-08, "loss": 12.9003, "step": 471760 }, { "epoch": 0.9530052481243713, "grad_norm": 470.4634094238281, "learning_rate": 9.678995884640385e-08, "loss": 19.7971, "step": 471770 }, { "epoch": 0.9530254487570551, "grad_norm": 160.3477783203125, "learning_rate": 9.672162048244838e-08, "loss": 19.9627, "step": 471780 }, { "epoch": 0.9530456493897389, "grad_norm": 55.016910552978516, "learning_rate": 9.66533060164765e-08, "loss": 11.3744, "step": 471790 }, { "epoch": 0.9530658500224227, "grad_norm": 407.41015625, "learning_rate": 9.658501544882182e-08, "loss": 16.6285, "step": 471800 }, { "epoch": 0.9530860506551065, "grad_norm": 283.1016540527344, "learning_rate": 9.651674877981743e-08, "loss": 16.5085, "step": 471810 }, { "epoch": 0.9531062512877904, "grad_norm": 219.71046447753906, "learning_rate": 9.644850600979583e-08, "loss": 24.2003, "step": 471820 }, { "epoch": 0.9531264519204742, "grad_norm": 236.0611114501953, "learning_rate": 9.638028713908898e-08, "loss": 10.6865, "step": 471830 }, { "epoch": 0.953146652553158, "grad_norm": 228.70433044433594, "learning_rate": 9.63120921680305e-08, "loss": 13.9107, "step": 471840 }, { "epoch": 0.9531668531858418, "grad_norm": 213.1024169921875, "learning_rate": 9.62439210969518e-08, "loss": 17.9185, "step": 471850 }, { "epoch": 0.9531870538185256, "grad_norm": 306.6065673828125, "learning_rate": 9.617577392618538e-08, "loss": 16.2468, "step": 471860 }, { "epoch": 0.9532072544512095, "grad_norm": 266.8642883300781, "learning_rate": 9.61076506560632e-08, "loss": 13.3353, "step": 471870 }, { "epoch": 0.9532274550838933, "grad_norm": 342.0880126953125, "learning_rate": 9.603955128691833e-08, "loss": 11.9232, "step": 471880 }, { "epoch": 0.953247655716577, "grad_norm": 213.37538146972656, "learning_rate": 9.597147581908107e-08, "loss": 12.8125, "step": 471890 }, { "epoch": 0.9532678563492608, "grad_norm": 186.4586944580078, "learning_rate": 9.590342425288446e-08, "loss": 16.5308, "step": 471900 }, { "epoch": 0.9532880569819446, "grad_norm": 446.7265625, "learning_rate": 9.583539658865992e-08, "loss": 12.6748, "step": 471910 }, { "epoch": 0.9533082576146285, "grad_norm": 308.08984375, "learning_rate": 9.576739282673886e-08, "loss": 13.9051, "step": 471920 }, { "epoch": 0.9533284582473123, "grad_norm": 381.631591796875, "learning_rate": 9.569941296745212e-08, "loss": 24.1847, "step": 471930 }, { "epoch": 0.9533486588799961, "grad_norm": 297.9588317871094, "learning_rate": 9.563145701113219e-08, "loss": 33.634, "step": 471940 }, { "epoch": 0.9533688595126799, "grad_norm": 29.095582962036133, "learning_rate": 9.556352495810994e-08, "loss": 13.6166, "step": 471950 }, { "epoch": 0.9533890601453637, "grad_norm": 594.6957397460938, "learning_rate": 9.549561680871566e-08, "loss": 28.8783, "step": 471960 }, { "epoch": 0.9534092607780476, "grad_norm": 399.8542175292969, "learning_rate": 9.542773256328075e-08, "loss": 18.6644, "step": 471970 }, { "epoch": 0.9534294614107314, "grad_norm": 143.90599060058594, "learning_rate": 9.53598722221366e-08, "loss": 13.9902, "step": 471980 }, { "epoch": 0.9534496620434152, "grad_norm": 51.62968444824219, "learning_rate": 9.529203578561353e-08, "loss": 16.3586, "step": 471990 }, { "epoch": 0.953469862676099, "grad_norm": 589.9480590820312, "learning_rate": 9.522422325404234e-08, "loss": 24.4369, "step": 472000 }, { "epoch": 0.9534900633087828, "grad_norm": 291.8258361816406, "learning_rate": 9.515643462775337e-08, "loss": 24.2265, "step": 472010 }, { "epoch": 0.9535102639414667, "grad_norm": 483.8877258300781, "learning_rate": 9.508866990707688e-08, "loss": 16.6844, "step": 472020 }, { "epoch": 0.9535304645741505, "grad_norm": 495.4203796386719, "learning_rate": 9.502092909234317e-08, "loss": 18.2375, "step": 472030 }, { "epoch": 0.9535506652068343, "grad_norm": 362.7491760253906, "learning_rate": 9.495321218388309e-08, "loss": 26.3503, "step": 472040 }, { "epoch": 0.9535708658395181, "grad_norm": 207.70855712890625, "learning_rate": 9.488551918202527e-08, "loss": 10.3865, "step": 472050 }, { "epoch": 0.9535910664722019, "grad_norm": 382.47027587890625, "learning_rate": 9.481785008710165e-08, "loss": 21.5717, "step": 472060 }, { "epoch": 0.9536112671048858, "grad_norm": 213.85243225097656, "learning_rate": 9.475020489944032e-08, "loss": 7.1432, "step": 472070 }, { "epoch": 0.9536314677375696, "grad_norm": 305.5526123046875, "learning_rate": 9.468258361937155e-08, "loss": 14.1012, "step": 472080 }, { "epoch": 0.9536516683702534, "grad_norm": 192.07510375976562, "learning_rate": 9.461498624722509e-08, "loss": 13.8535, "step": 472090 }, { "epoch": 0.9536718690029372, "grad_norm": 718.9573364257812, "learning_rate": 9.454741278333013e-08, "loss": 20.1048, "step": 472100 }, { "epoch": 0.953692069635621, "grad_norm": 213.24778747558594, "learning_rate": 9.447986322801583e-08, "loss": 18.3788, "step": 472110 }, { "epoch": 0.9537122702683049, "grad_norm": 92.05439758300781, "learning_rate": 9.441233758161139e-08, "loss": 11.6426, "step": 472120 }, { "epoch": 0.9537324709009887, "grad_norm": 32.383121490478516, "learning_rate": 9.434483584444709e-08, "loss": 12.3286, "step": 472130 }, { "epoch": 0.9537526715336724, "grad_norm": 90.15750122070312, "learning_rate": 9.427735801685101e-08, "loss": 11.0646, "step": 472140 }, { "epoch": 0.9537728721663562, "grad_norm": 607.479248046875, "learning_rate": 9.420990409915176e-08, "loss": 27.6571, "step": 472150 }, { "epoch": 0.95379307279904, "grad_norm": 428.8316345214844, "learning_rate": 9.414247409167854e-08, "loss": 20.6816, "step": 472160 }, { "epoch": 0.9538132734317238, "grad_norm": 114.02137756347656, "learning_rate": 9.407506799475996e-08, "loss": 20.4459, "step": 472170 }, { "epoch": 0.9538334740644077, "grad_norm": 25.569461822509766, "learning_rate": 9.400768580872411e-08, "loss": 12.369, "step": 472180 }, { "epoch": 0.9538536746970915, "grad_norm": 149.68170166015625, "learning_rate": 9.394032753390014e-08, "loss": 12.3208, "step": 472190 }, { "epoch": 0.9538738753297753, "grad_norm": 2.610166311264038, "learning_rate": 9.387299317061615e-08, "loss": 13.1623, "step": 472200 }, { "epoch": 0.9538940759624591, "grad_norm": 71.14301300048828, "learning_rate": 9.380568271919966e-08, "loss": 6.1099, "step": 472210 }, { "epoch": 0.9539142765951429, "grad_norm": 137.8601531982422, "learning_rate": 9.373839617997926e-08, "loss": 19.6041, "step": 472220 }, { "epoch": 0.9539344772278268, "grad_norm": 227.89263916015625, "learning_rate": 9.367113355328361e-08, "loss": 17.0364, "step": 472230 }, { "epoch": 0.9539546778605106, "grad_norm": 239.04676818847656, "learning_rate": 9.36038948394391e-08, "loss": 20.7242, "step": 472240 }, { "epoch": 0.9539748784931944, "grad_norm": 242.86077880859375, "learning_rate": 9.353668003877437e-08, "loss": 26.4324, "step": 472250 }, { "epoch": 0.9539950791258782, "grad_norm": 222.70523071289062, "learning_rate": 9.346948915161636e-08, "loss": 18.3903, "step": 472260 }, { "epoch": 0.954015279758562, "grad_norm": 197.29324340820312, "learning_rate": 9.340232217829371e-08, "loss": 11.9223, "step": 472270 }, { "epoch": 0.9540354803912459, "grad_norm": 477.68975830078125, "learning_rate": 9.333517911913281e-08, "loss": 14.8718, "step": 472280 }, { "epoch": 0.9540556810239297, "grad_norm": 248.9418182373047, "learning_rate": 9.326805997446065e-08, "loss": 30.3828, "step": 472290 }, { "epoch": 0.9540758816566135, "grad_norm": 326.0284729003906, "learning_rate": 9.320096474460527e-08, "loss": 14.7554, "step": 472300 }, { "epoch": 0.9540960822892973, "grad_norm": 496.3621826171875, "learning_rate": 9.31338934298931e-08, "loss": 15.1102, "step": 472310 }, { "epoch": 0.9541162829219811, "grad_norm": 335.30010986328125, "learning_rate": 9.306684603065108e-08, "loss": 16.1921, "step": 472320 }, { "epoch": 0.954136483554665, "grad_norm": 271.0550537109375, "learning_rate": 9.299982254720674e-08, "loss": 11.4808, "step": 472330 }, { "epoch": 0.9541566841873488, "grad_norm": 316.6662902832031, "learning_rate": 9.293282297988537e-08, "loss": 25.0978, "step": 472340 }, { "epoch": 0.9541768848200326, "grad_norm": 146.21981811523438, "learning_rate": 9.28658473290145e-08, "loss": 18.0033, "step": 472350 }, { "epoch": 0.9541970854527164, "grad_norm": 153.33340454101562, "learning_rate": 9.27988955949205e-08, "loss": 20.8644, "step": 472360 }, { "epoch": 0.9542172860854002, "grad_norm": 101.7238540649414, "learning_rate": 9.273196777792926e-08, "loss": 19.9834, "step": 472370 }, { "epoch": 0.9542374867180841, "grad_norm": 636.7554931640625, "learning_rate": 9.266506387836771e-08, "loss": 19.0273, "step": 472380 }, { "epoch": 0.9542576873507679, "grad_norm": 293.48046875, "learning_rate": 9.259818389656117e-08, "loss": 10.0128, "step": 472390 }, { "epoch": 0.9542778879834516, "grad_norm": 214.25135803222656, "learning_rate": 9.253132783283548e-08, "loss": 17.1083, "step": 472400 }, { "epoch": 0.9542980886161354, "grad_norm": 296.0226135253906, "learning_rate": 9.246449568751702e-08, "loss": 15.2656, "step": 472410 }, { "epoch": 0.9543182892488192, "grad_norm": 204.22378540039062, "learning_rate": 9.239768746093226e-08, "loss": 14.079, "step": 472420 }, { "epoch": 0.954338489881503, "grad_norm": 450.76800537109375, "learning_rate": 9.233090315340532e-08, "loss": 17.212, "step": 472430 }, { "epoch": 0.9543586905141869, "grad_norm": 127.94630432128906, "learning_rate": 9.226414276526208e-08, "loss": 19.5001, "step": 472440 }, { "epoch": 0.9543788911468707, "grad_norm": 148.956787109375, "learning_rate": 9.219740629682838e-08, "loss": 17.2413, "step": 472450 }, { "epoch": 0.9543990917795545, "grad_norm": 441.98602294921875, "learning_rate": 9.213069374842953e-08, "loss": 20.9059, "step": 472460 }, { "epoch": 0.9544192924122383, "grad_norm": 0.0, "learning_rate": 9.206400512039026e-08, "loss": 15.4225, "step": 472470 }, { "epoch": 0.9544394930449221, "grad_norm": 259.5444030761719, "learning_rate": 9.199734041303532e-08, "loss": 17.8902, "step": 472480 }, { "epoch": 0.954459693677606, "grad_norm": 297.65185546875, "learning_rate": 9.19306996266911e-08, "loss": 14.0223, "step": 472490 }, { "epoch": 0.9544798943102898, "grad_norm": 515.2940673828125, "learning_rate": 9.186408276168012e-08, "loss": 13.3307, "step": 472500 }, { "epoch": 0.9545000949429736, "grad_norm": 328.2804260253906, "learning_rate": 9.179748981832881e-08, "loss": 18.1669, "step": 472510 }, { "epoch": 0.9545202955756574, "grad_norm": 175.72622680664062, "learning_rate": 9.173092079696188e-08, "loss": 9.2535, "step": 472520 }, { "epoch": 0.9545404962083412, "grad_norm": 200.9589080810547, "learning_rate": 9.166437569790242e-08, "loss": 29.9611, "step": 472530 }, { "epoch": 0.9545606968410251, "grad_norm": 206.6644287109375, "learning_rate": 9.159785452147574e-08, "loss": 7.6536, "step": 472540 }, { "epoch": 0.9545808974737089, "grad_norm": 35.999141693115234, "learning_rate": 9.153135726800599e-08, "loss": 13.8692, "step": 472550 }, { "epoch": 0.9546010981063927, "grad_norm": 502.7469787597656, "learning_rate": 9.146488393781683e-08, "loss": 12.1722, "step": 472560 }, { "epoch": 0.9546212987390765, "grad_norm": 374.749267578125, "learning_rate": 9.139843453123243e-08, "loss": 18.9526, "step": 472570 }, { "epoch": 0.9546414993717603, "grad_norm": 475.12982177734375, "learning_rate": 9.133200904857642e-08, "loss": 16.7067, "step": 472580 }, { "epoch": 0.9546617000044442, "grad_norm": 101.17208099365234, "learning_rate": 9.126560749017354e-08, "loss": 20.7566, "step": 472590 }, { "epoch": 0.954681900637128, "grad_norm": 206.51406860351562, "learning_rate": 9.119922985634633e-08, "loss": 13.3556, "step": 472600 }, { "epoch": 0.9547021012698118, "grad_norm": 189.1208953857422, "learning_rate": 9.113287614741895e-08, "loss": 18.4321, "step": 472610 }, { "epoch": 0.9547223019024956, "grad_norm": 337.6694641113281, "learning_rate": 9.106654636371448e-08, "loss": 17.0854, "step": 472620 }, { "epoch": 0.9547425025351794, "grad_norm": 70.41694641113281, "learning_rate": 9.1000240505556e-08, "loss": 16.453, "step": 472630 }, { "epoch": 0.9547627031678633, "grad_norm": 110.73184967041016, "learning_rate": 9.093395857326714e-08, "loss": 19.7817, "step": 472640 }, { "epoch": 0.954782903800547, "grad_norm": 193.75955200195312, "learning_rate": 9.086770056717099e-08, "loss": 17.0077, "step": 472650 }, { "epoch": 0.9548031044332308, "grad_norm": 93.84293365478516, "learning_rate": 9.080146648759003e-08, "loss": 35.6111, "step": 472660 }, { "epoch": 0.9548233050659146, "grad_norm": 207.8497314453125, "learning_rate": 9.073525633484737e-08, "loss": 11.2637, "step": 472670 }, { "epoch": 0.9548435056985984, "grad_norm": 258.3289794921875, "learning_rate": 9.066907010926551e-08, "loss": 19.0346, "step": 472680 }, { "epoch": 0.9548637063312823, "grad_norm": 325.7560729980469, "learning_rate": 9.060290781116698e-08, "loss": 26.075, "step": 472690 }, { "epoch": 0.9548839069639661, "grad_norm": 388.6772766113281, "learning_rate": 9.053676944087542e-08, "loss": 27.1975, "step": 472700 }, { "epoch": 0.9549041075966499, "grad_norm": 433.8287658691406, "learning_rate": 9.04706549987111e-08, "loss": 19.3901, "step": 472710 }, { "epoch": 0.9549243082293337, "grad_norm": 294.281494140625, "learning_rate": 9.040456448499769e-08, "loss": 17.0707, "step": 472720 }, { "epoch": 0.9549445088620175, "grad_norm": 642.8132934570312, "learning_rate": 9.03384979000571e-08, "loss": 23.4874, "step": 472730 }, { "epoch": 0.9549647094947014, "grad_norm": 125.74943542480469, "learning_rate": 9.027245524421135e-08, "loss": 15.4275, "step": 472740 }, { "epoch": 0.9549849101273852, "grad_norm": 280.635986328125, "learning_rate": 9.020643651778183e-08, "loss": 25.7181, "step": 472750 }, { "epoch": 0.955005110760069, "grad_norm": 138.8530731201172, "learning_rate": 9.014044172109049e-08, "loss": 11.7531, "step": 472760 }, { "epoch": 0.9550253113927528, "grad_norm": 281.1873474121094, "learning_rate": 9.007447085445987e-08, "loss": 19.3867, "step": 472770 }, { "epoch": 0.9550455120254366, "grad_norm": 130.60691833496094, "learning_rate": 9.00085239182108e-08, "loss": 12.7674, "step": 472780 }, { "epoch": 0.9550657126581205, "grad_norm": 80.7839126586914, "learning_rate": 8.99426009126636e-08, "loss": 17.2423, "step": 472790 }, { "epoch": 0.9550859132908043, "grad_norm": 228.19493103027344, "learning_rate": 8.987670183814134e-08, "loss": 17.0725, "step": 472800 }, { "epoch": 0.9551061139234881, "grad_norm": 2.6318907737731934, "learning_rate": 8.981082669496433e-08, "loss": 22.8002, "step": 472810 }, { "epoch": 0.9551263145561719, "grad_norm": 355.2213439941406, "learning_rate": 8.974497548345396e-08, "loss": 16.1209, "step": 472820 }, { "epoch": 0.9551465151888557, "grad_norm": 349.511962890625, "learning_rate": 8.967914820393108e-08, "loss": 16.256, "step": 472830 }, { "epoch": 0.9551667158215396, "grad_norm": 165.65904235839844, "learning_rate": 8.961334485671657e-08, "loss": 9.6964, "step": 472840 }, { "epoch": 0.9551869164542234, "grad_norm": 196.5765838623047, "learning_rate": 8.954756544213128e-08, "loss": 6.1107, "step": 472850 }, { "epoch": 0.9552071170869072, "grad_norm": 369.4851989746094, "learning_rate": 8.948180996049493e-08, "loss": 26.2368, "step": 472860 }, { "epoch": 0.955227317719591, "grad_norm": 179.2471466064453, "learning_rate": 8.941607841212841e-08, "loss": 11.5676, "step": 472870 }, { "epoch": 0.9552475183522748, "grad_norm": 244.04078674316406, "learning_rate": 8.93503707973531e-08, "loss": 13.2306, "step": 472880 }, { "epoch": 0.9552677189849587, "grad_norm": 32.12590408325195, "learning_rate": 8.928468711648875e-08, "loss": 26.8184, "step": 472890 }, { "epoch": 0.9552879196176425, "grad_norm": 181.82196044921875, "learning_rate": 8.921902736985399e-08, "loss": 13.9331, "step": 472900 }, { "epoch": 0.9553081202503262, "grad_norm": 211.99375915527344, "learning_rate": 8.915339155777136e-08, "loss": 25.6744, "step": 472910 }, { "epoch": 0.95532832088301, "grad_norm": 597.634765625, "learning_rate": 8.908777968055893e-08, "loss": 23.3503, "step": 472920 }, { "epoch": 0.9553485215156938, "grad_norm": 178.07797241210938, "learning_rate": 8.902219173853699e-08, "loss": 13.8494, "step": 472930 }, { "epoch": 0.9553687221483776, "grad_norm": 567.0872192382812, "learning_rate": 8.895662773202529e-08, "loss": 18.0347, "step": 472940 }, { "epoch": 0.9553889227810615, "grad_norm": 343.8282775878906, "learning_rate": 8.889108766134358e-08, "loss": 24.0426, "step": 472950 }, { "epoch": 0.9554091234137453, "grad_norm": 303.4576721191406, "learning_rate": 8.882557152681104e-08, "loss": 10.7665, "step": 472960 }, { "epoch": 0.9554293240464291, "grad_norm": 76.28064727783203, "learning_rate": 8.876007932874686e-08, "loss": 8.4141, "step": 472970 }, { "epoch": 0.9554495246791129, "grad_norm": 100.38688659667969, "learning_rate": 8.869461106747024e-08, "loss": 11.032, "step": 472980 }, { "epoch": 0.9554697253117967, "grad_norm": 358.0327453613281, "learning_rate": 8.862916674330091e-08, "loss": 22.9662, "step": 472990 }, { "epoch": 0.9554899259444806, "grad_norm": 359.1787414550781, "learning_rate": 8.856374635655696e-08, "loss": 13.3273, "step": 473000 }, { "epoch": 0.9555101265771644, "grad_norm": 296.2112731933594, "learning_rate": 8.849834990755757e-08, "loss": 16.9119, "step": 473010 }, { "epoch": 0.9555303272098482, "grad_norm": 236.23472595214844, "learning_rate": 8.843297739662138e-08, "loss": 16.3383, "step": 473020 }, { "epoch": 0.955550527842532, "grad_norm": 66.28463745117188, "learning_rate": 8.836762882406757e-08, "loss": 11.8923, "step": 473030 }, { "epoch": 0.9555707284752158, "grad_norm": 162.89300537109375, "learning_rate": 8.830230419021424e-08, "loss": 27.8157, "step": 473040 }, { "epoch": 0.9555909291078997, "grad_norm": 209.03518676757812, "learning_rate": 8.823700349537945e-08, "loss": 18.1876, "step": 473050 }, { "epoch": 0.9556111297405835, "grad_norm": 419.4091491699219, "learning_rate": 8.817172673988184e-08, "loss": 46.1519, "step": 473060 }, { "epoch": 0.9556313303732673, "grad_norm": 129.33718872070312, "learning_rate": 8.810647392404004e-08, "loss": 15.8276, "step": 473070 }, { "epoch": 0.9556515310059511, "grad_norm": 261.8002624511719, "learning_rate": 8.804124504817046e-08, "loss": 16.3749, "step": 473080 }, { "epoch": 0.955671731638635, "grad_norm": 462.8828125, "learning_rate": 8.797604011259287e-08, "loss": 20.0273, "step": 473090 }, { "epoch": 0.9556919322713188, "grad_norm": 211.12620544433594, "learning_rate": 8.791085911762476e-08, "loss": 10.8481, "step": 473100 }, { "epoch": 0.9557121329040026, "grad_norm": 536.9437255859375, "learning_rate": 8.784570206358201e-08, "loss": 24.6362, "step": 473110 }, { "epoch": 0.9557323335366864, "grad_norm": 210.85113525390625, "learning_rate": 8.778056895078435e-08, "loss": 25.9357, "step": 473120 }, { "epoch": 0.9557525341693702, "grad_norm": 598.8668212890625, "learning_rate": 8.77154597795482e-08, "loss": 30.8427, "step": 473130 }, { "epoch": 0.955772734802054, "grad_norm": 79.7105484008789, "learning_rate": 8.765037455019165e-08, "loss": 16.2035, "step": 473140 }, { "epoch": 0.9557929354347379, "grad_norm": 374.01458740234375, "learning_rate": 8.758531326303054e-08, "loss": 12.7645, "step": 473150 }, { "epoch": 0.9558131360674217, "grad_norm": 166.92881774902344, "learning_rate": 8.752027591838352e-08, "loss": 13.3774, "step": 473160 }, { "epoch": 0.9558333367001054, "grad_norm": 317.66131591796875, "learning_rate": 8.7455262516567e-08, "loss": 10.217, "step": 473170 }, { "epoch": 0.9558535373327892, "grad_norm": 161.99180603027344, "learning_rate": 8.739027305789682e-08, "loss": 9.1677, "step": 473180 }, { "epoch": 0.955873737965473, "grad_norm": 327.8397216796875, "learning_rate": 8.732530754269108e-08, "loss": 15.8845, "step": 473190 }, { "epoch": 0.9558939385981569, "grad_norm": 79.6656494140625, "learning_rate": 8.726036597126619e-08, "loss": 22.2873, "step": 473200 }, { "epoch": 0.9559141392308407, "grad_norm": 133.7283935546875, "learning_rate": 8.719544834393855e-08, "loss": 10.4272, "step": 473210 }, { "epoch": 0.9559343398635245, "grad_norm": 6.810715675354004, "learning_rate": 8.713055466102349e-08, "loss": 11.7141, "step": 473220 }, { "epoch": 0.9559545404962083, "grad_norm": 94.8315658569336, "learning_rate": 8.706568492283907e-08, "loss": 17.678, "step": 473230 }, { "epoch": 0.9559747411288921, "grad_norm": 80.83332824707031, "learning_rate": 8.700083912970058e-08, "loss": 13.8116, "step": 473240 }, { "epoch": 0.955994941761576, "grad_norm": 328.4601135253906, "learning_rate": 8.693601728192392e-08, "loss": 20.038, "step": 473250 }, { "epoch": 0.9560151423942598, "grad_norm": 396.4517517089844, "learning_rate": 8.687121937982545e-08, "loss": 22.6981, "step": 473260 }, { "epoch": 0.9560353430269436, "grad_norm": 177.8253631591797, "learning_rate": 8.680644542372052e-08, "loss": 15.6881, "step": 473270 }, { "epoch": 0.9560555436596274, "grad_norm": 219.24429321289062, "learning_rate": 8.674169541392552e-08, "loss": 13.0421, "step": 473280 }, { "epoch": 0.9560757442923112, "grad_norm": 308.054931640625, "learning_rate": 8.66769693507552e-08, "loss": 13.0121, "step": 473290 }, { "epoch": 0.9560959449249951, "grad_norm": 461.49658203125, "learning_rate": 8.661226723452542e-08, "loss": 30.7874, "step": 473300 }, { "epoch": 0.9561161455576789, "grad_norm": 271.1308288574219, "learning_rate": 8.65475890655515e-08, "loss": 26.9139, "step": 473310 }, { "epoch": 0.9561363461903627, "grad_norm": 109.32914733886719, "learning_rate": 8.648293484414871e-08, "loss": 14.0129, "step": 473320 }, { "epoch": 0.9561565468230465, "grad_norm": 280.9069519042969, "learning_rate": 8.641830457063239e-08, "loss": 15.7103, "step": 473330 }, { "epoch": 0.9561767474557303, "grad_norm": 195.9000244140625, "learning_rate": 8.63536982453167e-08, "loss": 18.6843, "step": 473340 }, { "epoch": 0.9561969480884142, "grad_norm": 193.5458221435547, "learning_rate": 8.628911586851752e-08, "loss": 19.0862, "step": 473350 }, { "epoch": 0.956217148721098, "grad_norm": 267.5543518066406, "learning_rate": 8.622455744054958e-08, "loss": 15.2687, "step": 473360 }, { "epoch": 0.9562373493537818, "grad_norm": 443.3758544921875, "learning_rate": 8.616002296172654e-08, "loss": 21.8186, "step": 473370 }, { "epoch": 0.9562575499864656, "grad_norm": 283.0821838378906, "learning_rate": 8.609551243236424e-08, "loss": 14.6029, "step": 473380 }, { "epoch": 0.9562777506191494, "grad_norm": 376.0336608886719, "learning_rate": 8.603102585277634e-08, "loss": 22.0365, "step": 473390 }, { "epoch": 0.9562979512518333, "grad_norm": 201.6820526123047, "learning_rate": 8.596656322327645e-08, "loss": 19.2655, "step": 473400 }, { "epoch": 0.9563181518845171, "grad_norm": 170.3756866455078, "learning_rate": 8.59021245441799e-08, "loss": 20.9075, "step": 473410 }, { "epoch": 0.9563383525172008, "grad_norm": 113.07420349121094, "learning_rate": 8.583770981580142e-08, "loss": 14.0275, "step": 473420 }, { "epoch": 0.9563585531498846, "grad_norm": 113.4518051147461, "learning_rate": 8.577331903845243e-08, "loss": 15.9164, "step": 473430 }, { "epoch": 0.9563787537825684, "grad_norm": 144.70835876464844, "learning_rate": 8.57089522124488e-08, "loss": 19.1881, "step": 473440 }, { "epoch": 0.9563989544152522, "grad_norm": 488.23638916015625, "learning_rate": 8.564460933810414e-08, "loss": 15.9569, "step": 473450 }, { "epoch": 0.9564191550479361, "grad_norm": 203.3751678466797, "learning_rate": 8.558029041573157e-08, "loss": 17.6322, "step": 473460 }, { "epoch": 0.9564393556806199, "grad_norm": 232.36102294921875, "learning_rate": 8.55159954456436e-08, "loss": 14.4901, "step": 473470 }, { "epoch": 0.9564595563133037, "grad_norm": 64.39278411865234, "learning_rate": 8.545172442815552e-08, "loss": 10.8955, "step": 473480 }, { "epoch": 0.9564797569459875, "grad_norm": 495.407470703125, "learning_rate": 8.538747736357933e-08, "loss": 25.2176, "step": 473490 }, { "epoch": 0.9564999575786713, "grad_norm": 47.280120849609375, "learning_rate": 8.53232542522292e-08, "loss": 22.3482, "step": 473500 }, { "epoch": 0.9565201582113552, "grad_norm": 107.16753387451172, "learning_rate": 8.525905509441656e-08, "loss": 18.7533, "step": 473510 }, { "epoch": 0.956540358844039, "grad_norm": 58.44377517700195, "learning_rate": 8.51948798904556e-08, "loss": 30.5177, "step": 473520 }, { "epoch": 0.9565605594767228, "grad_norm": 438.2406921386719, "learning_rate": 8.513072864065885e-08, "loss": 20.7406, "step": 473530 }, { "epoch": 0.9565807601094066, "grad_norm": 313.06787109375, "learning_rate": 8.506660134533828e-08, "loss": 16.4932, "step": 473540 }, { "epoch": 0.9566009607420904, "grad_norm": 299.9169921875, "learning_rate": 8.500249800480754e-08, "loss": 11.3267, "step": 473550 }, { "epoch": 0.9566211613747743, "grad_norm": 603.1043090820312, "learning_rate": 8.493841861937802e-08, "loss": 37.6081, "step": 473560 }, { "epoch": 0.9566413620074581, "grad_norm": 216.57540893554688, "learning_rate": 8.487436318936282e-08, "loss": 19.5097, "step": 473570 }, { "epoch": 0.9566615626401419, "grad_norm": 372.18536376953125, "learning_rate": 8.481033171507391e-08, "loss": 21.0429, "step": 473580 }, { "epoch": 0.9566817632728257, "grad_norm": 336.1340637207031, "learning_rate": 8.474632419682327e-08, "loss": 10.3598, "step": 473590 }, { "epoch": 0.9567019639055095, "grad_norm": 165.48570251464844, "learning_rate": 8.468234063492287e-08, "loss": 13.1302, "step": 473600 }, { "epoch": 0.9567221645381934, "grad_norm": 298.9831848144531, "learning_rate": 8.461838102968467e-08, "loss": 26.003, "step": 473610 }, { "epoch": 0.9567423651708772, "grad_norm": 261.3507385253906, "learning_rate": 8.45544453814201e-08, "loss": 13.8456, "step": 473620 }, { "epoch": 0.956762565803561, "grad_norm": 297.2005310058594, "learning_rate": 8.449053369044058e-08, "loss": 40.8161, "step": 473630 }, { "epoch": 0.9567827664362448, "grad_norm": 387.9478759765625, "learning_rate": 8.442664595705862e-08, "loss": 18.4141, "step": 473640 }, { "epoch": 0.9568029670689286, "grad_norm": 212.37924194335938, "learning_rate": 8.436278218158511e-08, "loss": 14.217, "step": 473650 }, { "epoch": 0.9568231677016125, "grad_norm": 356.33892822265625, "learning_rate": 8.429894236433089e-08, "loss": 26.0145, "step": 473660 }, { "epoch": 0.9568433683342963, "grad_norm": 199.64849853515625, "learning_rate": 8.423512650560795e-08, "loss": 12.3553, "step": 473670 }, { "epoch": 0.95686356896698, "grad_norm": 55.902217864990234, "learning_rate": 8.417133460572658e-08, "loss": 13.6445, "step": 473680 }, { "epoch": 0.9568837695996638, "grad_norm": 109.74313354492188, "learning_rate": 8.410756666499709e-08, "loss": 10.69, "step": 473690 }, { "epoch": 0.9569039702323476, "grad_norm": 291.5455627441406, "learning_rate": 8.404382268373145e-08, "loss": 28.3833, "step": 473700 }, { "epoch": 0.9569241708650315, "grad_norm": 0.0, "learning_rate": 8.39801026622411e-08, "loss": 9.3024, "step": 473710 }, { "epoch": 0.9569443714977153, "grad_norm": 331.5452880859375, "learning_rate": 8.391640660083411e-08, "loss": 17.7631, "step": 473720 }, { "epoch": 0.9569645721303991, "grad_norm": 288.2303161621094, "learning_rate": 8.3852734499823e-08, "loss": 25.573, "step": 473730 }, { "epoch": 0.9569847727630829, "grad_norm": 136.45462036132812, "learning_rate": 8.3789086359517e-08, "loss": 12.9635, "step": 473740 }, { "epoch": 0.9570049733957667, "grad_norm": 228.79193115234375, "learning_rate": 8.372546218022747e-08, "loss": 14.8749, "step": 473750 }, { "epoch": 0.9570251740284506, "grad_norm": 702.3757934570312, "learning_rate": 8.366186196226311e-08, "loss": 10.4563, "step": 473760 }, { "epoch": 0.9570453746611344, "grad_norm": 490.6363830566406, "learning_rate": 8.35982857059342e-08, "loss": 24.4584, "step": 473770 }, { "epoch": 0.9570655752938182, "grad_norm": 299.2497253417969, "learning_rate": 8.353473341155216e-08, "loss": 17.0274, "step": 473780 }, { "epoch": 0.957085775926502, "grad_norm": 409.9384460449219, "learning_rate": 8.347120507942453e-08, "loss": 23.91, "step": 473790 }, { "epoch": 0.9571059765591858, "grad_norm": 227.42718505859375, "learning_rate": 8.340770070986215e-08, "loss": 12.4463, "step": 473800 }, { "epoch": 0.9571261771918697, "grad_norm": 338.9191589355469, "learning_rate": 8.334422030317424e-08, "loss": 16.1405, "step": 473810 }, { "epoch": 0.9571463778245535, "grad_norm": 196.53457641601562, "learning_rate": 8.328076385967055e-08, "loss": 14.9167, "step": 473820 }, { "epoch": 0.9571665784572373, "grad_norm": 96.96067810058594, "learning_rate": 8.321733137966026e-08, "loss": 14.137, "step": 473830 }, { "epoch": 0.9571867790899211, "grad_norm": 165.6729278564453, "learning_rate": 8.315392286345203e-08, "loss": 13.696, "step": 473840 }, { "epoch": 0.9572069797226049, "grad_norm": 152.39344787597656, "learning_rate": 8.30905383113556e-08, "loss": 10.6705, "step": 473850 }, { "epoch": 0.9572271803552888, "grad_norm": 103.52873992919922, "learning_rate": 8.302717772367908e-08, "loss": 14.0565, "step": 473860 }, { "epoch": 0.9572473809879726, "grad_norm": 624.4776611328125, "learning_rate": 8.296384110073164e-08, "loss": 20.4844, "step": 473870 }, { "epoch": 0.9572675816206564, "grad_norm": 55.818180084228516, "learning_rate": 8.290052844282248e-08, "loss": 23.9011, "step": 473880 }, { "epoch": 0.9572877822533402, "grad_norm": 334.4815979003906, "learning_rate": 8.283723975025971e-08, "loss": 10.7081, "step": 473890 }, { "epoch": 0.957307982886024, "grad_norm": 338.7004699707031, "learning_rate": 8.277397502335194e-08, "loss": 13.664, "step": 473900 }, { "epoch": 0.9573281835187079, "grad_norm": 306.1288757324219, "learning_rate": 8.271073426240672e-08, "loss": 15.5262, "step": 473910 }, { "epoch": 0.9573483841513917, "grad_norm": 404.05316162109375, "learning_rate": 8.264751746773381e-08, "loss": 19.4341, "step": 473920 }, { "epoch": 0.9573685847840754, "grad_norm": 138.5911865234375, "learning_rate": 8.258432463964016e-08, "loss": 9.5202, "step": 473930 }, { "epoch": 0.9573887854167592, "grad_norm": 86.55339813232422, "learning_rate": 8.252115577843444e-08, "loss": 17.1271, "step": 473940 }, { "epoch": 0.957408986049443, "grad_norm": 184.81588745117188, "learning_rate": 8.245801088442362e-08, "loss": 19.6974, "step": 473950 }, { "epoch": 0.9574291866821268, "grad_norm": 114.68429565429688, "learning_rate": 8.239488995791633e-08, "loss": 12.5398, "step": 473960 }, { "epoch": 0.9574493873148107, "grad_norm": 92.18013000488281, "learning_rate": 8.233179299922012e-08, "loss": 9.8894, "step": 473970 }, { "epoch": 0.9574695879474945, "grad_norm": 447.7590637207031, "learning_rate": 8.226872000864194e-08, "loss": 18.6785, "step": 473980 }, { "epoch": 0.9574897885801783, "grad_norm": 243.25074768066406, "learning_rate": 8.22056709864899e-08, "loss": 28.9401, "step": 473990 }, { "epoch": 0.9575099892128621, "grad_norm": 63.45094680786133, "learning_rate": 8.214264593307097e-08, "loss": 14.1391, "step": 474000 }, { "epoch": 0.9575301898455459, "grad_norm": 286.0859069824219, "learning_rate": 8.207964484869158e-08, "loss": 22.7937, "step": 474010 }, { "epoch": 0.9575503904782298, "grad_norm": 251.81008911132812, "learning_rate": 8.201666773365979e-08, "loss": 12.8405, "step": 474020 }, { "epoch": 0.9575705911109136, "grad_norm": 154.5972442626953, "learning_rate": 8.195371458828316e-08, "loss": 9.8992, "step": 474030 }, { "epoch": 0.9575907917435974, "grad_norm": 1024.4110107421875, "learning_rate": 8.1890785412867e-08, "loss": 36.8507, "step": 474040 }, { "epoch": 0.9576109923762812, "grad_norm": 520.576416015625, "learning_rate": 8.182788020771826e-08, "loss": 14.4984, "step": 474050 }, { "epoch": 0.957631193008965, "grad_norm": 365.233642578125, "learning_rate": 8.176499897314505e-08, "loss": 16.9359, "step": 474060 }, { "epoch": 0.9576513936416489, "grad_norm": 237.56802368164062, "learning_rate": 8.170214170945212e-08, "loss": 18.8759, "step": 474070 }, { "epoch": 0.9576715942743327, "grad_norm": 0.0, "learning_rate": 8.163930841694589e-08, "loss": 7.2028, "step": 474080 }, { "epoch": 0.9576917949070165, "grad_norm": 567.1417236328125, "learning_rate": 8.157649909593335e-08, "loss": 21.3314, "step": 474090 }, { "epoch": 0.9577119955397003, "grad_norm": 430.8260192871094, "learning_rate": 8.151371374672146e-08, "loss": 25.491, "step": 474100 }, { "epoch": 0.9577321961723841, "grad_norm": 342.25030517578125, "learning_rate": 8.145095236961387e-08, "loss": 17.3369, "step": 474110 }, { "epoch": 0.957752396805068, "grad_norm": 325.0548095703125, "learning_rate": 8.13882149649181e-08, "loss": 17.8569, "step": 474120 }, { "epoch": 0.9577725974377518, "grad_norm": 251.9890594482422, "learning_rate": 8.132550153294005e-08, "loss": 15.6318, "step": 474130 }, { "epoch": 0.9577927980704356, "grad_norm": 468.25421142578125, "learning_rate": 8.1262812073985e-08, "loss": 21.4562, "step": 474140 }, { "epoch": 0.9578129987031194, "grad_norm": 70.49613952636719, "learning_rate": 8.120014658835828e-08, "loss": 19.4513, "step": 474150 }, { "epoch": 0.9578331993358032, "grad_norm": 129.25839233398438, "learning_rate": 8.11375050763652e-08, "loss": 9.8043, "step": 474160 }, { "epoch": 0.9578533999684871, "grad_norm": 212.5464630126953, "learning_rate": 8.107488753831161e-08, "loss": 8.6988, "step": 474170 }, { "epoch": 0.9578736006011709, "grad_norm": 196.2848358154297, "learning_rate": 8.101229397450228e-08, "loss": 23.1565, "step": 474180 }, { "epoch": 0.9578938012338546, "grad_norm": 357.7814636230469, "learning_rate": 8.094972438524251e-08, "loss": 23.4829, "step": 474190 }, { "epoch": 0.9579140018665384, "grad_norm": 38.56194305419922, "learning_rate": 8.088717877083706e-08, "loss": 9.1624, "step": 474200 }, { "epoch": 0.9579342024992222, "grad_norm": 337.6510314941406, "learning_rate": 8.082465713159126e-08, "loss": 14.772, "step": 474210 }, { "epoch": 0.957954403131906, "grad_norm": 395.1022644042969, "learning_rate": 8.076215946780874e-08, "loss": 16.72, "step": 474220 }, { "epoch": 0.9579746037645899, "grad_norm": 240.04920959472656, "learning_rate": 8.069968577979536e-08, "loss": 19.9663, "step": 474230 }, { "epoch": 0.9579948043972737, "grad_norm": 191.10911560058594, "learning_rate": 8.063723606785478e-08, "loss": 12.3374, "step": 474240 }, { "epoch": 0.9580150050299575, "grad_norm": 229.42955017089844, "learning_rate": 8.057481033229176e-08, "loss": 12.8458, "step": 474250 }, { "epoch": 0.9580352056626413, "grad_norm": 476.42681884765625, "learning_rate": 8.051240857341102e-08, "loss": 21.7681, "step": 474260 }, { "epoch": 0.9580554062953252, "grad_norm": 274.9783020019531, "learning_rate": 8.045003079151514e-08, "loss": 14.8177, "step": 474270 }, { "epoch": 0.958075606928009, "grad_norm": 245.0540008544922, "learning_rate": 8.038767698690996e-08, "loss": 21.4797, "step": 474280 }, { "epoch": 0.9580958075606928, "grad_norm": 32.84925079345703, "learning_rate": 8.032534715989859e-08, "loss": 15.4773, "step": 474290 }, { "epoch": 0.9581160081933766, "grad_norm": 3.756037473678589, "learning_rate": 8.02630413107841e-08, "loss": 28.1191, "step": 474300 }, { "epoch": 0.9581362088260604, "grad_norm": 201.16249084472656, "learning_rate": 8.020075943987071e-08, "loss": 27.9424, "step": 474310 }, { "epoch": 0.9581564094587443, "grad_norm": 316.2496337890625, "learning_rate": 8.013850154746317e-08, "loss": 23.3295, "step": 474320 }, { "epoch": 0.9581766100914281, "grad_norm": 191.16314697265625, "learning_rate": 8.007626763386345e-08, "loss": 10.7816, "step": 474330 }, { "epoch": 0.9581968107241119, "grad_norm": 86.80632019042969, "learning_rate": 8.001405769937464e-08, "loss": 43.165, "step": 474340 }, { "epoch": 0.9582170113567957, "grad_norm": 150.78231811523438, "learning_rate": 7.995187174430152e-08, "loss": 8.0702, "step": 474350 }, { "epoch": 0.9582372119894795, "grad_norm": 297.2938537597656, "learning_rate": 7.988970976894605e-08, "loss": 15.6809, "step": 474360 }, { "epoch": 0.9582574126221634, "grad_norm": 327.9884948730469, "learning_rate": 7.982757177361078e-08, "loss": 26.3823, "step": 474370 }, { "epoch": 0.9582776132548472, "grad_norm": 111.11241149902344, "learning_rate": 7.976545775859934e-08, "loss": 16.9001, "step": 474380 }, { "epoch": 0.958297813887531, "grad_norm": 230.60694885253906, "learning_rate": 7.970336772421483e-08, "loss": 10.2513, "step": 474390 }, { "epoch": 0.9583180145202148, "grad_norm": 46.74585723876953, "learning_rate": 7.964130167075923e-08, "loss": 26.2545, "step": 474400 }, { "epoch": 0.9583382151528986, "grad_norm": 31.040271759033203, "learning_rate": 7.957925959853452e-08, "loss": 17.7189, "step": 474410 }, { "epoch": 0.9583584157855825, "grad_norm": 274.662353515625, "learning_rate": 7.951724150784434e-08, "loss": 14.8723, "step": 474420 }, { "epoch": 0.9583786164182663, "grad_norm": 784.5784301757812, "learning_rate": 7.945524739899069e-08, "loss": 16.8366, "step": 474430 }, { "epoch": 0.95839881705095, "grad_norm": 176.93814086914062, "learning_rate": 7.939327727227441e-08, "loss": 13.5646, "step": 474440 }, { "epoch": 0.9584190176836338, "grad_norm": 500.37481689453125, "learning_rate": 7.933133112799918e-08, "loss": 6.064, "step": 474450 }, { "epoch": 0.9584392183163176, "grad_norm": 275.79901123046875, "learning_rate": 7.926940896646584e-08, "loss": 24.7872, "step": 474460 }, { "epoch": 0.9584594189490014, "grad_norm": 1403.5462646484375, "learning_rate": 7.920751078797695e-08, "loss": 26.6344, "step": 474470 }, { "epoch": 0.9584796195816853, "grad_norm": 24.37729263305664, "learning_rate": 7.914563659283392e-08, "loss": 6.7677, "step": 474480 }, { "epoch": 0.9584998202143691, "grad_norm": 424.0550231933594, "learning_rate": 7.908378638133762e-08, "loss": 13.5092, "step": 474490 }, { "epoch": 0.9585200208470529, "grad_norm": 538.5086059570312, "learning_rate": 7.90219601537906e-08, "loss": 28.8774, "step": 474500 }, { "epoch": 0.9585402214797367, "grad_norm": 89.05743408203125, "learning_rate": 7.896015791049372e-08, "loss": 21.9172, "step": 474510 }, { "epoch": 0.9585604221124205, "grad_norm": 416.69683837890625, "learning_rate": 7.889837965174784e-08, "loss": 11.4691, "step": 474520 }, { "epoch": 0.9585806227451044, "grad_norm": 116.03609466552734, "learning_rate": 7.883662537785442e-08, "loss": 21.3867, "step": 474530 }, { "epoch": 0.9586008233777882, "grad_norm": 52.5672721862793, "learning_rate": 7.877489508911429e-08, "loss": 28.9836, "step": 474540 }, { "epoch": 0.958621024010472, "grad_norm": 275.4264831542969, "learning_rate": 7.871318878582889e-08, "loss": 16.1185, "step": 474550 }, { "epoch": 0.9586412246431558, "grad_norm": 49.7515754699707, "learning_rate": 7.865150646829855e-08, "loss": 7.3196, "step": 474560 }, { "epoch": 0.9586614252758396, "grad_norm": 361.1689147949219, "learning_rate": 7.858984813682357e-08, "loss": 14.6561, "step": 474570 }, { "epoch": 0.9586816259085235, "grad_norm": 375.77764892578125, "learning_rate": 7.852821379170538e-08, "loss": 28.1445, "step": 474580 }, { "epoch": 0.9587018265412073, "grad_norm": 277.7471923828125, "learning_rate": 7.846660343324263e-08, "loss": 17.1649, "step": 474590 }, { "epoch": 0.9587220271738911, "grad_norm": 536.0797729492188, "learning_rate": 7.840501706173786e-08, "loss": 17.6065, "step": 474600 }, { "epoch": 0.9587422278065749, "grad_norm": 269.5274353027344, "learning_rate": 7.834345467748972e-08, "loss": 18.4193, "step": 474610 }, { "epoch": 0.9587624284392587, "grad_norm": 243.64309692382812, "learning_rate": 7.828191628079851e-08, "loss": 18.9776, "step": 474620 }, { "epoch": 0.9587826290719426, "grad_norm": 81.72669982910156, "learning_rate": 7.8220401871964e-08, "loss": 27.3236, "step": 474630 }, { "epoch": 0.9588028297046264, "grad_norm": 7.575145244598389, "learning_rate": 7.815891145128763e-08, "loss": 13.8749, "step": 474640 }, { "epoch": 0.9588230303373102, "grad_norm": 256.9676208496094, "learning_rate": 7.809744501906635e-08, "loss": 13.6655, "step": 474650 }, { "epoch": 0.958843230969994, "grad_norm": 251.22300720214844, "learning_rate": 7.803600257560162e-08, "loss": 28.9949, "step": 474660 }, { "epoch": 0.9588634316026778, "grad_norm": 429.87322998046875, "learning_rate": 7.797458412119264e-08, "loss": 14.0549, "step": 474670 }, { "epoch": 0.9588836322353617, "grad_norm": 198.52183532714844, "learning_rate": 7.791318965613859e-08, "loss": 11.3697, "step": 474680 }, { "epoch": 0.9589038328680455, "grad_norm": 487.17156982421875, "learning_rate": 7.785181918073814e-08, "loss": 12.1822, "step": 474690 }, { "epoch": 0.9589240335007292, "grad_norm": 0.0, "learning_rate": 7.779047269529105e-08, "loss": 24.6676, "step": 474700 }, { "epoch": 0.958944234133413, "grad_norm": 272.5414733886719, "learning_rate": 7.772915020009708e-08, "loss": 17.4134, "step": 474710 }, { "epoch": 0.9589644347660968, "grad_norm": 132.16969299316406, "learning_rate": 7.766785169545376e-08, "loss": 23.6353, "step": 474720 }, { "epoch": 0.9589846353987806, "grad_norm": 159.81565856933594, "learning_rate": 7.760657718165976e-08, "loss": 20.8954, "step": 474730 }, { "epoch": 0.9590048360314645, "grad_norm": 252.47760009765625, "learning_rate": 7.754532665901482e-08, "loss": 28.9729, "step": 474740 }, { "epoch": 0.9590250366641483, "grad_norm": 334.49664306640625, "learning_rate": 7.748410012781705e-08, "loss": 10.6586, "step": 474750 }, { "epoch": 0.9590452372968321, "grad_norm": 94.72216033935547, "learning_rate": 7.742289758836452e-08, "loss": 13.7154, "step": 474760 }, { "epoch": 0.9590654379295159, "grad_norm": 237.7094268798828, "learning_rate": 7.736171904095591e-08, "loss": 28.3998, "step": 474770 }, { "epoch": 0.9590856385621997, "grad_norm": 125.54972076416016, "learning_rate": 7.73005644858893e-08, "loss": 21.3815, "step": 474780 }, { "epoch": 0.9591058391948836, "grad_norm": 32.88019561767578, "learning_rate": 7.723943392346223e-08, "loss": 19.0009, "step": 474790 }, { "epoch": 0.9591260398275674, "grad_norm": 14.027314186096191, "learning_rate": 7.717832735397335e-08, "loss": 6.6176, "step": 474800 }, { "epoch": 0.9591462404602512, "grad_norm": 96.62458801269531, "learning_rate": 7.71172447777202e-08, "loss": 12.2616, "step": 474810 }, { "epoch": 0.959166441092935, "grad_norm": 196.66140747070312, "learning_rate": 7.705618619500032e-08, "loss": 17.6131, "step": 474820 }, { "epoch": 0.9591866417256188, "grad_norm": 20.23488998413086, "learning_rate": 7.699515160611182e-08, "loss": 15.0959, "step": 474830 }, { "epoch": 0.9592068423583027, "grad_norm": 274.4444274902344, "learning_rate": 7.693414101135166e-08, "loss": 10.6709, "step": 474840 }, { "epoch": 0.9592270429909865, "grad_norm": 168.56292724609375, "learning_rate": 7.687315441101795e-08, "loss": 10.3256, "step": 474850 }, { "epoch": 0.9592472436236703, "grad_norm": 698.96826171875, "learning_rate": 7.681219180540655e-08, "loss": 19.0904, "step": 474860 }, { "epoch": 0.9592674442563541, "grad_norm": 267.51019287109375, "learning_rate": 7.675125319481614e-08, "loss": 35.6655, "step": 474870 }, { "epoch": 0.959287644889038, "grad_norm": 214.7691192626953, "learning_rate": 7.669033857954255e-08, "loss": 6.8796, "step": 474880 }, { "epoch": 0.9593078455217218, "grad_norm": 271.8771057128906, "learning_rate": 7.662944795988337e-08, "loss": 14.7434, "step": 474890 }, { "epoch": 0.9593280461544056, "grad_norm": 143.24952697753906, "learning_rate": 7.656858133613498e-08, "loss": 14.6898, "step": 474900 }, { "epoch": 0.9593482467870894, "grad_norm": 404.09478759765625, "learning_rate": 7.65077387085944e-08, "loss": 18.0967, "step": 474910 }, { "epoch": 0.9593684474197732, "grad_norm": 0.0, "learning_rate": 7.64469200775575e-08, "loss": 8.2077, "step": 474920 }, { "epoch": 0.959388648052457, "grad_norm": 140.16885375976562, "learning_rate": 7.638612544332181e-08, "loss": 20.7628, "step": 474930 }, { "epoch": 0.9594088486851409, "grad_norm": 200.65782165527344, "learning_rate": 7.632535480618264e-08, "loss": 10.9137, "step": 474940 }, { "epoch": 0.9594290493178247, "grad_norm": 151.9487762451172, "learning_rate": 7.626460816643588e-08, "loss": 13.8894, "step": 474950 }, { "epoch": 0.9594492499505084, "grad_norm": 333.2170715332031, "learning_rate": 7.620388552437907e-08, "loss": 19.9184, "step": 474960 }, { "epoch": 0.9594694505831922, "grad_norm": 267.5528869628906, "learning_rate": 7.614318688030753e-08, "loss": 14.4488, "step": 474970 }, { "epoch": 0.959489651215876, "grad_norm": 434.326171875, "learning_rate": 7.608251223451601e-08, "loss": 17.0198, "step": 474980 }, { "epoch": 0.9595098518485599, "grad_norm": 65.68861389160156, "learning_rate": 7.602186158730152e-08, "loss": 22.1237, "step": 474990 }, { "epoch": 0.9595300524812437, "grad_norm": 366.1726989746094, "learning_rate": 7.59612349389599e-08, "loss": 14.4474, "step": 475000 }, { "epoch": 0.9595502531139275, "grad_norm": 195.4129180908203, "learning_rate": 7.590063228978539e-08, "loss": 19.7011, "step": 475010 }, { "epoch": 0.9595704537466113, "grad_norm": 338.5332336425781, "learning_rate": 7.584005364007386e-08, "loss": 19.4208, "step": 475020 }, { "epoch": 0.9595906543792951, "grad_norm": 820.9507446289062, "learning_rate": 7.577949899012116e-08, "loss": 19.7084, "step": 475030 }, { "epoch": 0.959610855011979, "grad_norm": 199.35757446289062, "learning_rate": 7.571896834022152e-08, "loss": 12.0899, "step": 475040 }, { "epoch": 0.9596310556446628, "grad_norm": 252.28346252441406, "learning_rate": 7.565846169067026e-08, "loss": 19.6838, "step": 475050 }, { "epoch": 0.9596512562773466, "grad_norm": 113.28939056396484, "learning_rate": 7.559797904176325e-08, "loss": 11.5452, "step": 475060 }, { "epoch": 0.9596714569100304, "grad_norm": 136.12945556640625, "learning_rate": 7.553752039379359e-08, "loss": 19.6234, "step": 475070 }, { "epoch": 0.9596916575427142, "grad_norm": 152.21763610839844, "learning_rate": 7.547708574705714e-08, "loss": 7.7885, "step": 475080 }, { "epoch": 0.9597118581753981, "grad_norm": 595.0191040039062, "learning_rate": 7.541667510184813e-08, "loss": 11.2779, "step": 475090 }, { "epoch": 0.9597320588080819, "grad_norm": 248.594482421875, "learning_rate": 7.535628845846077e-08, "loss": 21.5807, "step": 475100 }, { "epoch": 0.9597522594407657, "grad_norm": 220.2259063720703, "learning_rate": 7.529592581718981e-08, "loss": 20.4683, "step": 475110 }, { "epoch": 0.9597724600734495, "grad_norm": 333.22723388671875, "learning_rate": 7.52355871783289e-08, "loss": 25.0653, "step": 475120 }, { "epoch": 0.9597926607061333, "grad_norm": 406.2622985839844, "learning_rate": 7.517527254217282e-08, "loss": 14.6823, "step": 475130 }, { "epoch": 0.9598128613388172, "grad_norm": 321.54608154296875, "learning_rate": 7.511498190901467e-08, "loss": 15.3557, "step": 475140 }, { "epoch": 0.959833061971501, "grad_norm": 410.90118408203125, "learning_rate": 7.50547152791492e-08, "loss": 10.3963, "step": 475150 }, { "epoch": 0.9598532626041848, "grad_norm": 106.2830810546875, "learning_rate": 7.499447265286952e-08, "loss": 13.0822, "step": 475160 }, { "epoch": 0.9598734632368686, "grad_norm": 260.9781494140625, "learning_rate": 7.493425403046928e-08, "loss": 11.039, "step": 475170 }, { "epoch": 0.9598936638695524, "grad_norm": 260.24176025390625, "learning_rate": 7.487405941224268e-08, "loss": 15.9699, "step": 475180 }, { "epoch": 0.9599138645022363, "grad_norm": 290.000732421875, "learning_rate": 7.481388879848228e-08, "loss": 14.3762, "step": 475190 }, { "epoch": 0.9599340651349201, "grad_norm": 86.14088439941406, "learning_rate": 7.475374218948118e-08, "loss": 17.105, "step": 475200 }, { "epoch": 0.9599542657676038, "grad_norm": 144.34207153320312, "learning_rate": 7.469361958553356e-08, "loss": 6.9962, "step": 475210 }, { "epoch": 0.9599744664002876, "grad_norm": 364.90679931640625, "learning_rate": 7.463352098693199e-08, "loss": 9.7677, "step": 475220 }, { "epoch": 0.9599946670329714, "grad_norm": 401.1788024902344, "learning_rate": 7.457344639396902e-08, "loss": 16.2795, "step": 475230 }, { "epoch": 0.9600148676656552, "grad_norm": 152.72698974609375, "learning_rate": 7.451339580693718e-08, "loss": 11.5242, "step": 475240 }, { "epoch": 0.9600350682983391, "grad_norm": 209.91641235351562, "learning_rate": 7.445336922613067e-08, "loss": 20.3743, "step": 475250 }, { "epoch": 0.9600552689310229, "grad_norm": 125.84286499023438, "learning_rate": 7.439336665184039e-08, "loss": 18.534, "step": 475260 }, { "epoch": 0.9600754695637067, "grad_norm": 209.10047912597656, "learning_rate": 7.433338808435941e-08, "loss": 16.4652, "step": 475270 }, { "epoch": 0.9600956701963905, "grad_norm": 274.1868896484375, "learning_rate": 7.427343352398031e-08, "loss": 22.5486, "step": 475280 }, { "epoch": 0.9601158708290743, "grad_norm": 446.7681884765625, "learning_rate": 7.421350297099505e-08, "loss": 22.5897, "step": 475290 }, { "epoch": 0.9601360714617582, "grad_norm": 383.9038391113281, "learning_rate": 7.415359642569564e-08, "loss": 40.8068, "step": 475300 }, { "epoch": 0.960156272094442, "grad_norm": 149.6638946533203, "learning_rate": 7.409371388837405e-08, "loss": 14.5479, "step": 475310 }, { "epoch": 0.9601764727271258, "grad_norm": 411.8772277832031, "learning_rate": 7.403385535932284e-08, "loss": 14.5164, "step": 475320 }, { "epoch": 0.9601966733598096, "grad_norm": 178.7208251953125, "learning_rate": 7.397402083883287e-08, "loss": 5.3804, "step": 475330 }, { "epoch": 0.9602168739924934, "grad_norm": 212.35629272460938, "learning_rate": 7.39142103271956e-08, "loss": 32.1982, "step": 475340 }, { "epoch": 0.9602370746251773, "grad_norm": 46.84968948364258, "learning_rate": 7.385442382470354e-08, "loss": 13.0413, "step": 475350 }, { "epoch": 0.9602572752578611, "grad_norm": 400.7052307128906, "learning_rate": 7.379466133164759e-08, "loss": 12.3183, "step": 475360 }, { "epoch": 0.9602774758905449, "grad_norm": 53.30480194091797, "learning_rate": 7.373492284831862e-08, "loss": 12.3386, "step": 475370 }, { "epoch": 0.9602976765232287, "grad_norm": 337.81402587890625, "learning_rate": 7.367520837500808e-08, "loss": 20.6608, "step": 475380 }, { "epoch": 0.9603178771559125, "grad_norm": 200.33563232421875, "learning_rate": 7.361551791200794e-08, "loss": 13.0962, "step": 475390 }, { "epoch": 0.9603380777885964, "grad_norm": 277.03057861328125, "learning_rate": 7.355585145960743e-08, "loss": 20.8616, "step": 475400 }, { "epoch": 0.9603582784212802, "grad_norm": 368.3315124511719, "learning_rate": 7.34962090180985e-08, "loss": 13.296, "step": 475410 }, { "epoch": 0.960378479053964, "grad_norm": 233.2793731689453, "learning_rate": 7.343659058777098e-08, "loss": 19.747, "step": 475420 }, { "epoch": 0.9603986796866478, "grad_norm": 0.0, "learning_rate": 7.33769961689168e-08, "loss": 11.615, "step": 475430 }, { "epoch": 0.9604188803193316, "grad_norm": 312.4758605957031, "learning_rate": 7.331742576182466e-08, "loss": 22.6936, "step": 475440 }, { "epoch": 0.9604390809520155, "grad_norm": 43.45466995239258, "learning_rate": 7.325787936678708e-08, "loss": 19.882, "step": 475450 }, { "epoch": 0.9604592815846993, "grad_norm": 128.6321563720703, "learning_rate": 7.319835698409217e-08, "loss": 16.9376, "step": 475460 }, { "epoch": 0.960479482217383, "grad_norm": 405.3039855957031, "learning_rate": 7.313885861403135e-08, "loss": 21.7552, "step": 475470 }, { "epoch": 0.9604996828500668, "grad_norm": 125.45901489257812, "learning_rate": 7.307938425689388e-08, "loss": 12.1929, "step": 475480 }, { "epoch": 0.9605198834827506, "grad_norm": 514.5453491210938, "learning_rate": 7.301993391297003e-08, "loss": 20.2388, "step": 475490 }, { "epoch": 0.9605400841154345, "grad_norm": 390.7613830566406, "learning_rate": 7.296050758254958e-08, "loss": 17.4189, "step": 475500 }, { "epoch": 0.9605602847481183, "grad_norm": 220.3173065185547, "learning_rate": 7.290110526592231e-08, "loss": 14.4539, "step": 475510 }, { "epoch": 0.9605804853808021, "grad_norm": 182.73268127441406, "learning_rate": 7.284172696337688e-08, "loss": 17.0045, "step": 475520 }, { "epoch": 0.9606006860134859, "grad_norm": 77.84659576416016, "learning_rate": 7.27823726752036e-08, "loss": 13.1571, "step": 475530 }, { "epoch": 0.9606208866461697, "grad_norm": 392.80963134765625, "learning_rate": 7.272304240169115e-08, "loss": 14.0392, "step": 475540 }, { "epoch": 0.9606410872788536, "grad_norm": 223.89820861816406, "learning_rate": 7.266373614312927e-08, "loss": 11.1937, "step": 475550 }, { "epoch": 0.9606612879115374, "grad_norm": 472.5323486328125, "learning_rate": 7.260445389980609e-08, "loss": 15.6538, "step": 475560 }, { "epoch": 0.9606814885442212, "grad_norm": 399.0464782714844, "learning_rate": 7.25451956720119e-08, "loss": 22.7643, "step": 475570 }, { "epoch": 0.960701689176905, "grad_norm": 484.1720275878906, "learning_rate": 7.248596146003484e-08, "loss": 16.677, "step": 475580 }, { "epoch": 0.9607218898095888, "grad_norm": 407.47369384765625, "learning_rate": 7.242675126416299e-08, "loss": 27.4333, "step": 475590 }, { "epoch": 0.9607420904422727, "grad_norm": 186.0641632080078, "learning_rate": 7.236756508468612e-08, "loss": 15.0088, "step": 475600 }, { "epoch": 0.9607622910749565, "grad_norm": 353.9106750488281, "learning_rate": 7.230840292189179e-08, "loss": 10.1284, "step": 475610 }, { "epoch": 0.9607824917076403, "grad_norm": 245.0760040283203, "learning_rate": 7.224926477606864e-08, "loss": 18.781, "step": 475620 }, { "epoch": 0.9608026923403241, "grad_norm": 286.08154296875, "learning_rate": 7.219015064750478e-08, "loss": 16.0606, "step": 475630 }, { "epoch": 0.9608228929730079, "grad_norm": 372.6519470214844, "learning_rate": 7.213106053648889e-08, "loss": 13.3908, "step": 475640 }, { "epoch": 0.9608430936056918, "grad_norm": 14.131331443786621, "learning_rate": 7.207199444330847e-08, "loss": 14.3899, "step": 475650 }, { "epoch": 0.9608632942383756, "grad_norm": 264.9721374511719, "learning_rate": 7.201295236825112e-08, "loss": 13.8373, "step": 475660 }, { "epoch": 0.9608834948710594, "grad_norm": 441.21099853515625, "learning_rate": 7.195393431160491e-08, "loss": 11.3617, "step": 475670 }, { "epoch": 0.9609036955037432, "grad_norm": 28.24907684326172, "learning_rate": 7.189494027365795e-08, "loss": 25.5611, "step": 475680 }, { "epoch": 0.960923896136427, "grad_norm": 647.5918579101562, "learning_rate": 7.183597025469669e-08, "loss": 17.409, "step": 475690 }, { "epoch": 0.9609440967691109, "grad_norm": 231.3723602294922, "learning_rate": 7.177702425500977e-08, "loss": 17.2756, "step": 475700 }, { "epoch": 0.9609642974017947, "grad_norm": 125.4808578491211, "learning_rate": 7.171810227488363e-08, "loss": 11.1491, "step": 475710 }, { "epoch": 0.9609844980344784, "grad_norm": 154.3079376220703, "learning_rate": 7.165920431460637e-08, "loss": 20.8417, "step": 475720 }, { "epoch": 0.9610046986671622, "grad_norm": 258.5376281738281, "learning_rate": 7.16003303744639e-08, "loss": 14.5675, "step": 475730 }, { "epoch": 0.961024899299846, "grad_norm": 205.18617248535156, "learning_rate": 7.154148045474319e-08, "loss": 21.4444, "step": 475740 }, { "epoch": 0.9610450999325298, "grad_norm": 260.52996826171875, "learning_rate": 7.148265455573233e-08, "loss": 15.3334, "step": 475750 }, { "epoch": 0.9610653005652137, "grad_norm": 186.31607055664062, "learning_rate": 7.142385267771667e-08, "loss": 23.7496, "step": 475760 }, { "epoch": 0.9610855011978975, "grad_norm": 232.94505310058594, "learning_rate": 7.136507482098375e-08, "loss": 18.573, "step": 475770 }, { "epoch": 0.9611057018305813, "grad_norm": 29.94649887084961, "learning_rate": 7.130632098581947e-08, "loss": 19.7735, "step": 475780 }, { "epoch": 0.9611259024632651, "grad_norm": 818.3164672851562, "learning_rate": 7.124759117251078e-08, "loss": 29.18, "step": 475790 }, { "epoch": 0.961146103095949, "grad_norm": 446.5696105957031, "learning_rate": 7.118888538134361e-08, "loss": 11.0974, "step": 475800 }, { "epoch": 0.9611663037286328, "grad_norm": 433.7945556640625, "learning_rate": 7.113020361260325e-08, "loss": 17.209, "step": 475810 }, { "epoch": 0.9611865043613166, "grad_norm": 42.12403869628906, "learning_rate": 7.107154586657727e-08, "loss": 13.7848, "step": 475820 }, { "epoch": 0.9612067049940004, "grad_norm": 154.81005859375, "learning_rate": 7.101291214355043e-08, "loss": 14.8902, "step": 475830 }, { "epoch": 0.9612269056266842, "grad_norm": 119.46757507324219, "learning_rate": 7.095430244380863e-08, "loss": 11.1297, "step": 475840 }, { "epoch": 0.961247106259368, "grad_norm": 419.2917785644531, "learning_rate": 7.089571676763773e-08, "loss": 25.2929, "step": 475850 }, { "epoch": 0.9612673068920519, "grad_norm": 170.31443786621094, "learning_rate": 7.083715511532419e-08, "loss": 14.8543, "step": 475860 }, { "epoch": 0.9612875075247357, "grad_norm": 313.43658447265625, "learning_rate": 7.077861748715165e-08, "loss": 12.2518, "step": 475870 }, { "epoch": 0.9613077081574195, "grad_norm": 8.207794189453125, "learning_rate": 7.072010388340656e-08, "loss": 8.0544, "step": 475880 }, { "epoch": 0.9613279087901033, "grad_norm": 329.4176330566406, "learning_rate": 7.066161430437368e-08, "loss": 16.7999, "step": 475890 }, { "epoch": 0.9613481094227871, "grad_norm": 272.11688232421875, "learning_rate": 7.060314875033836e-08, "loss": 11.1837, "step": 475900 }, { "epoch": 0.961368310055471, "grad_norm": 52.65071105957031, "learning_rate": 7.054470722158535e-08, "loss": 16.2709, "step": 475910 }, { "epoch": 0.9613885106881548, "grad_norm": 216.29507446289062, "learning_rate": 7.048628971839944e-08, "loss": 25.8152, "step": 475920 }, { "epoch": 0.9614087113208386, "grad_norm": 190.11618041992188, "learning_rate": 7.042789624106594e-08, "loss": 14.2205, "step": 475930 }, { "epoch": 0.9614289119535224, "grad_norm": 501.01043701171875, "learning_rate": 7.036952678986852e-08, "loss": 27.5411, "step": 475940 }, { "epoch": 0.9614491125862062, "grad_norm": 245.2197265625, "learning_rate": 7.031118136509196e-08, "loss": 20.0645, "step": 475950 }, { "epoch": 0.9614693132188901, "grad_norm": 322.0236511230469, "learning_rate": 7.025285996702158e-08, "loss": 27.4476, "step": 475960 }, { "epoch": 0.9614895138515739, "grad_norm": 322.7179260253906, "learning_rate": 7.019456259594049e-08, "loss": 21.5527, "step": 475970 }, { "epoch": 0.9615097144842576, "grad_norm": 268.53509521484375, "learning_rate": 7.01362892521329e-08, "loss": 12.8646, "step": 475980 }, { "epoch": 0.9615299151169414, "grad_norm": 343.81280517578125, "learning_rate": 7.007803993588358e-08, "loss": 16.417, "step": 475990 }, { "epoch": 0.9615501157496252, "grad_norm": 294.3307800292969, "learning_rate": 7.001981464747565e-08, "loss": 21.8227, "step": 476000 }, { "epoch": 0.961570316382309, "grad_norm": 237.7524871826172, "learning_rate": 6.996161338719332e-08, "loss": 20.9503, "step": 476010 }, { "epoch": 0.9615905170149929, "grad_norm": 68.56524658203125, "learning_rate": 6.990343615532025e-08, "loss": 19.5319, "step": 476020 }, { "epoch": 0.9616107176476767, "grad_norm": 307.2353210449219, "learning_rate": 6.9845282952139e-08, "loss": 17.4761, "step": 476030 }, { "epoch": 0.9616309182803605, "grad_norm": 369.35650634765625, "learning_rate": 6.978715377793489e-08, "loss": 16.5511, "step": 476040 }, { "epoch": 0.9616511189130443, "grad_norm": 201.0762939453125, "learning_rate": 6.972904863298991e-08, "loss": 17.8656, "step": 476050 }, { "epoch": 0.9616713195457282, "grad_norm": 351.0091857910156, "learning_rate": 6.967096751758773e-08, "loss": 15.0056, "step": 476060 }, { "epoch": 0.961691520178412, "grad_norm": 385.28924560546875, "learning_rate": 6.961291043201145e-08, "loss": 17.2955, "step": 476070 }, { "epoch": 0.9617117208110958, "grad_norm": 245.84681701660156, "learning_rate": 6.955487737654309e-08, "loss": 13.5286, "step": 476080 }, { "epoch": 0.9617319214437796, "grad_norm": 306.4953918457031, "learning_rate": 6.949686835146685e-08, "loss": 17.9128, "step": 476090 }, { "epoch": 0.9617521220764634, "grad_norm": 122.697509765625, "learning_rate": 6.943888335706472e-08, "loss": 21.3639, "step": 476100 }, { "epoch": 0.9617723227091473, "grad_norm": 0.0, "learning_rate": 6.938092239361982e-08, "loss": 6.4645, "step": 476110 }, { "epoch": 0.9617925233418311, "grad_norm": 368.9552307128906, "learning_rate": 6.932298546141413e-08, "loss": 14.1267, "step": 476120 }, { "epoch": 0.9618127239745149, "grad_norm": 334.83331298828125, "learning_rate": 6.926507256072967e-08, "loss": 25.3837, "step": 476130 }, { "epoch": 0.9618329246071987, "grad_norm": 244.06455993652344, "learning_rate": 6.920718369185009e-08, "loss": 13.8009, "step": 476140 }, { "epoch": 0.9618531252398825, "grad_norm": 283.6094055175781, "learning_rate": 6.914931885505626e-08, "loss": 13.1624, "step": 476150 }, { "epoch": 0.9618733258725664, "grad_norm": 208.09109497070312, "learning_rate": 6.909147805063021e-08, "loss": 47.2771, "step": 476160 }, { "epoch": 0.9618935265052502, "grad_norm": 317.6132507324219, "learning_rate": 6.903366127885447e-08, "loss": 12.3003, "step": 476170 }, { "epoch": 0.961913727137934, "grad_norm": 201.04429626464844, "learning_rate": 6.897586854001048e-08, "loss": 26.5013, "step": 476180 }, { "epoch": 0.9619339277706178, "grad_norm": 231.37472534179688, "learning_rate": 6.89180998343808e-08, "loss": 21.9749, "step": 476190 }, { "epoch": 0.9619541284033016, "grad_norm": 196.7626495361328, "learning_rate": 6.88603551622452e-08, "loss": 15.4808, "step": 476200 }, { "epoch": 0.9619743290359855, "grad_norm": 521.8115844726562, "learning_rate": 6.88026345238868e-08, "loss": 29.6634, "step": 476210 }, { "epoch": 0.9619945296686693, "grad_norm": 386.37030029296875, "learning_rate": 6.874493791958648e-08, "loss": 14.1437, "step": 476220 }, { "epoch": 0.9620147303013531, "grad_norm": 260.12591552734375, "learning_rate": 6.868726534962456e-08, "loss": 20.8724, "step": 476230 }, { "epoch": 0.9620349309340368, "grad_norm": 239.26620483398438, "learning_rate": 6.862961681428304e-08, "loss": 32.5396, "step": 476240 }, { "epoch": 0.9620551315667206, "grad_norm": 331.8974304199219, "learning_rate": 6.857199231384282e-08, "loss": 24.4839, "step": 476250 }, { "epoch": 0.9620753321994044, "grad_norm": 369.7519836425781, "learning_rate": 6.851439184858477e-08, "loss": 22.7557, "step": 476260 }, { "epoch": 0.9620955328320883, "grad_norm": 375.781005859375, "learning_rate": 6.845681541878924e-08, "loss": 16.3223, "step": 476270 }, { "epoch": 0.9621157334647721, "grad_norm": 234.12542724609375, "learning_rate": 6.83992630247371e-08, "loss": 16.0636, "step": 476280 }, { "epoch": 0.9621359340974559, "grad_norm": 13.830657005310059, "learning_rate": 6.834173466670923e-08, "loss": 12.3598, "step": 476290 }, { "epoch": 0.9621561347301397, "grad_norm": 0.0, "learning_rate": 6.828423034498488e-08, "loss": 7.1124, "step": 476300 }, { "epoch": 0.9621763353628235, "grad_norm": 351.7645568847656, "learning_rate": 6.822675005984547e-08, "loss": 13.3322, "step": 476310 }, { "epoch": 0.9621965359955074, "grad_norm": 311.44940185546875, "learning_rate": 6.816929381157023e-08, "loss": 14.2324, "step": 476320 }, { "epoch": 0.9622167366281912, "grad_norm": 296.13848876953125, "learning_rate": 6.811186160044004e-08, "loss": 22.1924, "step": 476330 }, { "epoch": 0.962236937260875, "grad_norm": 232.44021606445312, "learning_rate": 6.805445342673467e-08, "loss": 15.257, "step": 476340 }, { "epoch": 0.9622571378935588, "grad_norm": 378.85284423828125, "learning_rate": 6.799706929073335e-08, "loss": 17.1737, "step": 476350 }, { "epoch": 0.9622773385262426, "grad_norm": 75.30892181396484, "learning_rate": 6.793970919271642e-08, "loss": 20.28, "step": 476360 }, { "epoch": 0.9622975391589265, "grad_norm": 468.77410888671875, "learning_rate": 6.788237313296309e-08, "loss": 21.4126, "step": 476370 }, { "epoch": 0.9623177397916103, "grad_norm": 248.53173828125, "learning_rate": 6.782506111175313e-08, "loss": 19.6177, "step": 476380 }, { "epoch": 0.9623379404242941, "grad_norm": 253.32679748535156, "learning_rate": 6.776777312936522e-08, "loss": 7.8689, "step": 476390 }, { "epoch": 0.9623581410569779, "grad_norm": 4.7597880363464355, "learning_rate": 6.771050918607913e-08, "loss": 22.3711, "step": 476400 }, { "epoch": 0.9623783416896617, "grad_norm": 215.17648315429688, "learning_rate": 6.765326928217408e-08, "loss": 22.2809, "step": 476410 }, { "epoch": 0.9623985423223456, "grad_norm": 174.30722045898438, "learning_rate": 6.759605341792819e-08, "loss": 16.431, "step": 476420 }, { "epoch": 0.9624187429550294, "grad_norm": 174.83242797851562, "learning_rate": 6.753886159362122e-08, "loss": 16.3768, "step": 476430 }, { "epoch": 0.9624389435877132, "grad_norm": 347.6761169433594, "learning_rate": 6.748169380953184e-08, "loss": 21.2614, "step": 476440 }, { "epoch": 0.962459144220397, "grad_norm": 322.230224609375, "learning_rate": 6.742455006593762e-08, "loss": 24.265, "step": 476450 }, { "epoch": 0.9624793448530808, "grad_norm": 295.3970947265625, "learning_rate": 6.736743036311832e-08, "loss": 28.6171, "step": 476460 }, { "epoch": 0.9624995454857647, "grad_norm": 582.6659545898438, "learning_rate": 6.731033470135262e-08, "loss": 21.0782, "step": 476470 }, { "epoch": 0.9625197461184485, "grad_norm": 438.8211975097656, "learning_rate": 6.725326308091751e-08, "loss": 24.4582, "step": 476480 }, { "epoch": 0.9625399467511322, "grad_norm": 236.2390594482422, "learning_rate": 6.71962155020911e-08, "loss": 21.9683, "step": 476490 }, { "epoch": 0.962560147383816, "grad_norm": 535.292724609375, "learning_rate": 6.713919196515317e-08, "loss": 24.2894, "step": 476500 }, { "epoch": 0.9625803480164998, "grad_norm": 183.3269805908203, "learning_rate": 6.708219247038017e-08, "loss": 17.0726, "step": 476510 }, { "epoch": 0.9626005486491837, "grad_norm": 291.149169921875, "learning_rate": 6.702521701804965e-08, "loss": 18.624, "step": 476520 }, { "epoch": 0.9626207492818675, "grad_norm": 192.3579559326172, "learning_rate": 6.696826560844027e-08, "loss": 19.7489, "step": 476530 }, { "epoch": 0.9626409499145513, "grad_norm": 285.07794189453125, "learning_rate": 6.691133824183016e-08, "loss": 26.5873, "step": 476540 }, { "epoch": 0.9626611505472351, "grad_norm": 122.84087371826172, "learning_rate": 6.685443491849464e-08, "loss": 20.8861, "step": 476550 }, { "epoch": 0.9626813511799189, "grad_norm": 106.66376495361328, "learning_rate": 6.679755563871292e-08, "loss": 15.7506, "step": 476560 }, { "epoch": 0.9627015518126028, "grad_norm": 464.84716796875, "learning_rate": 6.674070040276148e-08, "loss": 23.3472, "step": 476570 }, { "epoch": 0.9627217524452866, "grad_norm": 312.5509338378906, "learning_rate": 6.66838692109173e-08, "loss": 22.8308, "step": 476580 }, { "epoch": 0.9627419530779704, "grad_norm": 59.928245544433594, "learning_rate": 6.662706206345793e-08, "loss": 10.7981, "step": 476590 }, { "epoch": 0.9627621537106542, "grad_norm": 200.5502166748047, "learning_rate": 6.657027896065982e-08, "loss": 15.4725, "step": 476600 }, { "epoch": 0.962782354343338, "grad_norm": 77.65949249267578, "learning_rate": 6.651351990279997e-08, "loss": 3.993, "step": 476610 }, { "epoch": 0.9628025549760219, "grad_norm": 181.7306365966797, "learning_rate": 6.645678489015428e-08, "loss": 15.1982, "step": 476620 }, { "epoch": 0.9628227556087057, "grad_norm": 362.1164245605469, "learning_rate": 6.64000739230003e-08, "loss": 28.1297, "step": 476630 }, { "epoch": 0.9628429562413895, "grad_norm": 525.9012451171875, "learning_rate": 6.634338700161392e-08, "loss": 21.3613, "step": 476640 }, { "epoch": 0.9628631568740733, "grad_norm": 80.144287109375, "learning_rate": 6.628672412627158e-08, "loss": 11.4526, "step": 476650 }, { "epoch": 0.9628833575067571, "grad_norm": 275.218505859375, "learning_rate": 6.623008529724917e-08, "loss": 16.0609, "step": 476660 }, { "epoch": 0.962903558139441, "grad_norm": 254.39096069335938, "learning_rate": 6.617347051482315e-08, "loss": 18.5169, "step": 476670 }, { "epoch": 0.9629237587721248, "grad_norm": 220.32614135742188, "learning_rate": 6.611687977926939e-08, "loss": 14.3258, "step": 476680 }, { "epoch": 0.9629439594048086, "grad_norm": 162.96490478515625, "learning_rate": 6.606031309086269e-08, "loss": 17.6257, "step": 476690 }, { "epoch": 0.9629641600374924, "grad_norm": 395.3836364746094, "learning_rate": 6.60037704498806e-08, "loss": 23.2249, "step": 476700 }, { "epoch": 0.9629843606701762, "grad_norm": 113.44120025634766, "learning_rate": 6.594725185659734e-08, "loss": 15.892, "step": 476710 }, { "epoch": 0.96300456130286, "grad_norm": 245.11105346679688, "learning_rate": 6.58907573112888e-08, "loss": 15.0643, "step": 476720 }, { "epoch": 0.9630247619355439, "grad_norm": 350.1731872558594, "learning_rate": 6.583428681423032e-08, "loss": 24.1536, "step": 476730 }, { "epoch": 0.9630449625682277, "grad_norm": 329.8271789550781, "learning_rate": 6.577784036569668e-08, "loss": 16.0069, "step": 476740 }, { "epoch": 0.9630651632009114, "grad_norm": 440.2545471191406, "learning_rate": 6.572141796596376e-08, "loss": 17.5044, "step": 476750 }, { "epoch": 0.9630853638335952, "grad_norm": 88.97496795654297, "learning_rate": 6.566501961530636e-08, "loss": 13.9166, "step": 476760 }, { "epoch": 0.963105564466279, "grad_norm": 68.35948944091797, "learning_rate": 6.560864531399869e-08, "loss": 12.471, "step": 476770 }, { "epoch": 0.9631257650989629, "grad_norm": 322.03924560546875, "learning_rate": 6.555229506231608e-08, "loss": 23.7958, "step": 476780 }, { "epoch": 0.9631459657316467, "grad_norm": 251.2469024658203, "learning_rate": 6.549596886053334e-08, "loss": 15.9685, "step": 476790 }, { "epoch": 0.9631661663643305, "grad_norm": 123.99053192138672, "learning_rate": 6.543966670892465e-08, "loss": 18.9374, "step": 476800 }, { "epoch": 0.9631863669970143, "grad_norm": 20.874664306640625, "learning_rate": 6.538338860776483e-08, "loss": 10.2562, "step": 476810 }, { "epoch": 0.9632065676296981, "grad_norm": 234.0839385986328, "learning_rate": 6.532713455732753e-08, "loss": 19.7447, "step": 476820 }, { "epoch": 0.963226768262382, "grad_norm": 254.33309936523438, "learning_rate": 6.527090455788754e-08, "loss": 13.8631, "step": 476830 }, { "epoch": 0.9632469688950658, "grad_norm": 211.86892700195312, "learning_rate": 6.521469860971852e-08, "loss": 16.7011, "step": 476840 }, { "epoch": 0.9632671695277496, "grad_norm": 371.6062316894531, "learning_rate": 6.515851671309414e-08, "loss": 23.3379, "step": 476850 }, { "epoch": 0.9632873701604334, "grad_norm": 565.3331298828125, "learning_rate": 6.51023588682892e-08, "loss": 16.0675, "step": 476860 }, { "epoch": 0.9633075707931172, "grad_norm": 634.7963256835938, "learning_rate": 6.504622507557679e-08, "loss": 28.1137, "step": 476870 }, { "epoch": 0.9633277714258011, "grad_norm": 259.01446533203125, "learning_rate": 6.499011533523003e-08, "loss": 17.5227, "step": 476880 }, { "epoch": 0.9633479720584849, "grad_norm": 247.61936950683594, "learning_rate": 6.493402964752371e-08, "loss": 16.2293, "step": 476890 }, { "epoch": 0.9633681726911687, "grad_norm": 385.30706787109375, "learning_rate": 6.487796801272983e-08, "loss": 15.4351, "step": 476900 }, { "epoch": 0.9633883733238525, "grad_norm": 0.0, "learning_rate": 6.482193043112206e-08, "loss": 9.617, "step": 476910 }, { "epoch": 0.9634085739565363, "grad_norm": 351.1435241699219, "learning_rate": 6.476591690297407e-08, "loss": 10.6954, "step": 476920 }, { "epoch": 0.9634287745892202, "grad_norm": 229.2272186279297, "learning_rate": 6.470992742855786e-08, "loss": 16.816, "step": 476930 }, { "epoch": 0.963448975221904, "grad_norm": 295.8770751953125, "learning_rate": 6.465396200814766e-08, "loss": 19.5691, "step": 476940 }, { "epoch": 0.9634691758545878, "grad_norm": 305.0475769042969, "learning_rate": 6.459802064201437e-08, "loss": 19.6025, "step": 476950 }, { "epoch": 0.9634893764872716, "grad_norm": 346.71063232421875, "learning_rate": 6.454210333043275e-08, "loss": 17.3941, "step": 476960 }, { "epoch": 0.9635095771199554, "grad_norm": 132.2841339111328, "learning_rate": 6.448621007367428e-08, "loss": 19.3763, "step": 476970 }, { "epoch": 0.9635297777526393, "grad_norm": 224.59109497070312, "learning_rate": 6.443034087201095e-08, "loss": 21.4958, "step": 476980 }, { "epoch": 0.9635499783853231, "grad_norm": 483.6046142578125, "learning_rate": 6.437449572571586e-08, "loss": 19.1856, "step": 476990 }, { "epoch": 0.9635701790180068, "grad_norm": 29.51258659362793, "learning_rate": 6.431867463506047e-08, "loss": 11.6267, "step": 477000 }, { "epoch": 0.9635903796506906, "grad_norm": 242.21133422851562, "learning_rate": 6.426287760031736e-08, "loss": 15.2442, "step": 477010 }, { "epoch": 0.9636105802833744, "grad_norm": 118.39478302001953, "learning_rate": 6.42071046217585e-08, "loss": 8.5603, "step": 477020 }, { "epoch": 0.9636307809160582, "grad_norm": 108.25749969482422, "learning_rate": 6.415135569965536e-08, "loss": 19.3951, "step": 477030 }, { "epoch": 0.9636509815487421, "grad_norm": 233.133056640625, "learning_rate": 6.40956308342805e-08, "loss": 19.9293, "step": 477040 }, { "epoch": 0.9636711821814259, "grad_norm": 435.89239501953125, "learning_rate": 6.403993002590425e-08, "loss": 14.1264, "step": 477050 }, { "epoch": 0.9636913828141097, "grad_norm": 42.959861755371094, "learning_rate": 6.398425327479863e-08, "loss": 12.8804, "step": 477060 }, { "epoch": 0.9637115834467935, "grad_norm": 398.9713439941406, "learning_rate": 6.392860058123506e-08, "loss": 13.7219, "step": 477070 }, { "epoch": 0.9637317840794773, "grad_norm": 423.4179382324219, "learning_rate": 6.387297194548558e-08, "loss": 29.0164, "step": 477080 }, { "epoch": 0.9637519847121612, "grad_norm": 171.33631896972656, "learning_rate": 6.381736736781996e-08, "loss": 12.3808, "step": 477090 }, { "epoch": 0.963772185344845, "grad_norm": 136.57261657714844, "learning_rate": 6.376178684850965e-08, "loss": 17.7935, "step": 477100 }, { "epoch": 0.9637923859775288, "grad_norm": 288.0144348144531, "learning_rate": 6.370623038782608e-08, "loss": 19.6789, "step": 477110 }, { "epoch": 0.9638125866102126, "grad_norm": 27.331716537475586, "learning_rate": 6.365069798603962e-08, "loss": 32.6814, "step": 477120 }, { "epoch": 0.9638327872428964, "grad_norm": 219.19517517089844, "learning_rate": 6.359518964342059e-08, "loss": 12.6894, "step": 477130 }, { "epoch": 0.9638529878755803, "grad_norm": 382.98626708984375, "learning_rate": 6.353970536024045e-08, "loss": 19.4926, "step": 477140 }, { "epoch": 0.9638731885082641, "grad_norm": 256.29315185546875, "learning_rate": 6.348424513676898e-08, "loss": 6.9898, "step": 477150 }, { "epoch": 0.9638933891409479, "grad_norm": 256.78692626953125, "learning_rate": 6.342880897327597e-08, "loss": 13.9918, "step": 477160 }, { "epoch": 0.9639135897736317, "grad_norm": 135.3890838623047, "learning_rate": 6.337339687003286e-08, "loss": 14.0374, "step": 477170 }, { "epoch": 0.9639337904063155, "grad_norm": 377.88525390625, "learning_rate": 6.331800882730887e-08, "loss": 18.6535, "step": 477180 }, { "epoch": 0.9639539910389994, "grad_norm": 97.87631225585938, "learning_rate": 6.326264484537437e-08, "loss": 12.3394, "step": 477190 }, { "epoch": 0.9639741916716832, "grad_norm": 488.96820068359375, "learning_rate": 6.3207304924498e-08, "loss": 23.6453, "step": 477200 }, { "epoch": 0.963994392304367, "grad_norm": 314.15008544921875, "learning_rate": 6.315198906495179e-08, "loss": 21.1307, "step": 477210 }, { "epoch": 0.9640145929370508, "grad_norm": 405.3951110839844, "learning_rate": 6.30966972670033e-08, "loss": 23.4745, "step": 477220 }, { "epoch": 0.9640347935697346, "grad_norm": 431.6580810546875, "learning_rate": 6.304142953092285e-08, "loss": 18.9896, "step": 477230 }, { "epoch": 0.9640549942024185, "grad_norm": 366.255126953125, "learning_rate": 6.298618585697968e-08, "loss": 14.078, "step": 477240 }, { "epoch": 0.9640751948351023, "grad_norm": 146.75399780273438, "learning_rate": 6.293096624544304e-08, "loss": 8.5715, "step": 477250 }, { "epoch": 0.964095395467786, "grad_norm": 382.1266784667969, "learning_rate": 6.287577069658213e-08, "loss": 10.4208, "step": 477260 }, { "epoch": 0.9641155961004698, "grad_norm": 202.35650634765625, "learning_rate": 6.282059921066564e-08, "loss": 13.6529, "step": 477270 }, { "epoch": 0.9641357967331536, "grad_norm": 148.67845153808594, "learning_rate": 6.276545178796333e-08, "loss": 9.3581, "step": 477280 }, { "epoch": 0.9641559973658375, "grad_norm": 226.326171875, "learning_rate": 6.271032842874281e-08, "loss": 26.5209, "step": 477290 }, { "epoch": 0.9641761979985213, "grad_norm": 175.58291625976562, "learning_rate": 6.265522913327326e-08, "loss": 9.0418, "step": 477300 }, { "epoch": 0.9641963986312051, "grad_norm": 995.6618041992188, "learning_rate": 6.260015390182395e-08, "loss": 31.1935, "step": 477310 }, { "epoch": 0.9642165992638889, "grad_norm": 40.515159606933594, "learning_rate": 6.254510273466186e-08, "loss": 14.612, "step": 477320 }, { "epoch": 0.9642367998965727, "grad_norm": 415.0593566894531, "learning_rate": 6.249007563205679e-08, "loss": 34.3038, "step": 477330 }, { "epoch": 0.9642570005292566, "grad_norm": 590.2013549804688, "learning_rate": 6.243507259427628e-08, "loss": 21.9941, "step": 477340 }, { "epoch": 0.9642772011619404, "grad_norm": 278.40460205078125, "learning_rate": 6.238009362158793e-08, "loss": 16.9624, "step": 477350 }, { "epoch": 0.9642974017946242, "grad_norm": 152.50302124023438, "learning_rate": 6.232513871426038e-08, "loss": 18.1703, "step": 477360 }, { "epoch": 0.964317602427308, "grad_norm": 78.60377502441406, "learning_rate": 6.227020787256122e-08, "loss": 14.3266, "step": 477370 }, { "epoch": 0.9643378030599918, "grad_norm": 281.2479248046875, "learning_rate": 6.2215301096758e-08, "loss": 10.5865, "step": 477380 }, { "epoch": 0.9643580036926757, "grad_norm": 558.7536010742188, "learning_rate": 6.216041838711828e-08, "loss": 17.3697, "step": 477390 }, { "epoch": 0.9643782043253595, "grad_norm": 75.66012573242188, "learning_rate": 6.210555974391075e-08, "loss": 28.0086, "step": 477400 }, { "epoch": 0.9643984049580433, "grad_norm": 0.0, "learning_rate": 6.205072516740129e-08, "loss": 8.1656, "step": 477410 }, { "epoch": 0.9644186055907271, "grad_norm": 288.9216613769531, "learning_rate": 6.199591465785748e-08, "loss": 10.5967, "step": 477420 }, { "epoch": 0.9644388062234109, "grad_norm": 414.6328125, "learning_rate": 6.194112821554687e-08, "loss": 27.4639, "step": 477430 }, { "epoch": 0.9644590068560948, "grad_norm": 488.18212890625, "learning_rate": 6.188636584073648e-08, "loss": 19.7257, "step": 477440 }, { "epoch": 0.9644792074887786, "grad_norm": 48.27674102783203, "learning_rate": 6.183162753369221e-08, "loss": 11.156, "step": 477450 }, { "epoch": 0.9644994081214624, "grad_norm": 413.32781982421875, "learning_rate": 6.177691329468217e-08, "loss": 21.3233, "step": 477460 }, { "epoch": 0.9645196087541462, "grad_norm": 91.32913970947266, "learning_rate": 6.17222231239728e-08, "loss": 13.5473, "step": 477470 }, { "epoch": 0.96453980938683, "grad_norm": 288.6951904296875, "learning_rate": 6.166755702183058e-08, "loss": 21.0884, "step": 477480 }, { "epoch": 0.9645600100195139, "grad_norm": 11.18078327178955, "learning_rate": 6.161291498852084e-08, "loss": 17.6018, "step": 477490 }, { "epoch": 0.9645802106521977, "grad_norm": 481.84539794921875, "learning_rate": 6.15582970243117e-08, "loss": 16.2343, "step": 477500 }, { "epoch": 0.9646004112848814, "grad_norm": 32.75508499145508, "learning_rate": 6.150370312946797e-08, "loss": 6.4613, "step": 477510 }, { "epoch": 0.9646206119175652, "grad_norm": 283.7071533203125, "learning_rate": 6.144913330425606e-08, "loss": 23.7083, "step": 477520 }, { "epoch": 0.964640812550249, "grad_norm": 204.29095458984375, "learning_rate": 6.139458754894245e-08, "loss": 19.4765, "step": 477530 }, { "epoch": 0.9646610131829328, "grad_norm": 514.5524291992188, "learning_rate": 6.134006586379249e-08, "loss": 17.3997, "step": 477540 }, { "epoch": 0.9646812138156167, "grad_norm": 426.7435607910156, "learning_rate": 6.128556824907205e-08, "loss": 20.9471, "step": 477550 }, { "epoch": 0.9647014144483005, "grad_norm": 450.2146301269531, "learning_rate": 6.12310947050465e-08, "loss": 10.5752, "step": 477560 }, { "epoch": 0.9647216150809843, "grad_norm": 241.98593139648438, "learning_rate": 6.11766452319823e-08, "loss": 14.8117, "step": 477570 }, { "epoch": 0.9647418157136681, "grad_norm": 138.69134521484375, "learning_rate": 6.112221983014366e-08, "loss": 9.8652, "step": 477580 }, { "epoch": 0.964762016346352, "grad_norm": 202.12562561035156, "learning_rate": 6.106781849979648e-08, "loss": 13.2307, "step": 477590 }, { "epoch": 0.9647822169790358, "grad_norm": 251.06512451171875, "learning_rate": 6.101344124120557e-08, "loss": 28.6517, "step": 477600 }, { "epoch": 0.9648024176117196, "grad_norm": 472.91064453125, "learning_rate": 6.095908805463624e-08, "loss": 28.1801, "step": 477610 }, { "epoch": 0.9648226182444034, "grad_norm": 192.46087646484375, "learning_rate": 6.09047589403533e-08, "loss": 22.7737, "step": 477620 }, { "epoch": 0.9648428188770872, "grad_norm": 327.7209777832031, "learning_rate": 6.085045389862154e-08, "loss": 19.1767, "step": 477630 }, { "epoch": 0.964863019509771, "grad_norm": 281.77117919921875, "learning_rate": 6.079617292970519e-08, "loss": 9.5608, "step": 477640 }, { "epoch": 0.9648832201424549, "grad_norm": 411.31976318359375, "learning_rate": 6.074191603386958e-08, "loss": 24.7571, "step": 477650 }, { "epoch": 0.9649034207751387, "grad_norm": 374.9639587402344, "learning_rate": 6.068768321137897e-08, "loss": 11.2828, "step": 477660 }, { "epoch": 0.9649236214078225, "grad_norm": 3.5396199226379395, "learning_rate": 6.0633474462497e-08, "loss": 5.726, "step": 477670 }, { "epoch": 0.9649438220405063, "grad_norm": 147.08334350585938, "learning_rate": 6.057928978748906e-08, "loss": 9.2, "step": 477680 }, { "epoch": 0.9649640226731901, "grad_norm": 285.4568176269531, "learning_rate": 6.052512918661879e-08, "loss": 15.6435, "step": 477690 }, { "epoch": 0.964984223305874, "grad_norm": 93.05352783203125, "learning_rate": 6.047099266014877e-08, "loss": 21.0014, "step": 477700 }, { "epoch": 0.9650044239385578, "grad_norm": 378.9557189941406, "learning_rate": 6.041688020834491e-08, "loss": 19.2169, "step": 477710 }, { "epoch": 0.9650246245712416, "grad_norm": 6.093135833740234, "learning_rate": 6.036279183146975e-08, "loss": 18.4705, "step": 477720 }, { "epoch": 0.9650448252039254, "grad_norm": 305.2705993652344, "learning_rate": 6.030872752978756e-08, "loss": 16.7821, "step": 477730 }, { "epoch": 0.9650650258366092, "grad_norm": 249.75381469726562, "learning_rate": 6.025468730356144e-08, "loss": 16.2057, "step": 477740 }, { "epoch": 0.9650852264692931, "grad_norm": 3.7394824028015137, "learning_rate": 6.020067115305451e-08, "loss": 24.2809, "step": 477750 }, { "epoch": 0.9651054271019769, "grad_norm": 169.91648864746094, "learning_rate": 6.0146679078531e-08, "loss": 7.8114, "step": 477760 }, { "epoch": 0.9651256277346606, "grad_norm": 184.37428283691406, "learning_rate": 6.009271108025294e-08, "loss": 9.9487, "step": 477770 }, { "epoch": 0.9651458283673444, "grad_norm": 389.8765563964844, "learning_rate": 6.003876715848345e-08, "loss": 15.8888, "step": 477780 }, { "epoch": 0.9651660290000282, "grad_norm": 524.4351806640625, "learning_rate": 5.998484731348675e-08, "loss": 13.3921, "step": 477790 }, { "epoch": 0.9651862296327121, "grad_norm": 99.16090393066406, "learning_rate": 5.993095154552431e-08, "loss": 11.3058, "step": 477800 }, { "epoch": 0.9652064302653959, "grad_norm": 250.9905548095703, "learning_rate": 5.987707985485925e-08, "loss": 23.6366, "step": 477810 }, { "epoch": 0.9652266308980797, "grad_norm": 243.146240234375, "learning_rate": 5.982323224175468e-08, "loss": 12.6277, "step": 477820 }, { "epoch": 0.9652468315307635, "grad_norm": 102.24500274658203, "learning_rate": 5.976940870647207e-08, "loss": 25.9193, "step": 477830 }, { "epoch": 0.9652670321634473, "grad_norm": 115.31416320800781, "learning_rate": 5.9715609249274e-08, "loss": 15.9024, "step": 477840 }, { "epoch": 0.9652872327961312, "grad_norm": 155.22604370117188, "learning_rate": 5.966183387042246e-08, "loss": 22.6824, "step": 477850 }, { "epoch": 0.965307433428815, "grad_norm": 185.16635131835938, "learning_rate": 5.960808257018113e-08, "loss": 18.6027, "step": 477860 }, { "epoch": 0.9653276340614988, "grad_norm": 362.2665710449219, "learning_rate": 5.955435534881038e-08, "loss": 20.9415, "step": 477870 }, { "epoch": 0.9653478346941826, "grad_norm": 119.57987976074219, "learning_rate": 5.950065220657164e-08, "loss": 5.1876, "step": 477880 }, { "epoch": 0.9653680353268664, "grad_norm": 383.5547180175781, "learning_rate": 5.9446973143728605e-08, "loss": 20.0983, "step": 477890 }, { "epoch": 0.9653882359595503, "grad_norm": 17.12600326538086, "learning_rate": 5.939331816054161e-08, "loss": 20.9111, "step": 477900 }, { "epoch": 0.9654084365922341, "grad_norm": 361.46722412109375, "learning_rate": 5.9339687257272126e-08, "loss": 27.1896, "step": 477910 }, { "epoch": 0.9654286372249179, "grad_norm": 139.56591796875, "learning_rate": 5.92860804341816e-08, "loss": 17.6758, "step": 477920 }, { "epoch": 0.9654488378576017, "grad_norm": 302.6304626464844, "learning_rate": 5.9232497691531496e-08, "loss": 20.5391, "step": 477930 }, { "epoch": 0.9654690384902855, "grad_norm": 114.1827621459961, "learning_rate": 5.917893902958327e-08, "loss": 15.3628, "step": 477940 }, { "epoch": 0.9654892391229694, "grad_norm": 422.5317077636719, "learning_rate": 5.9125404448597825e-08, "loss": 9.8448, "step": 477950 }, { "epoch": 0.9655094397556532, "grad_norm": 388.36810302734375, "learning_rate": 5.9071893948835505e-08, "loss": 26.9694, "step": 477960 }, { "epoch": 0.965529640388337, "grad_norm": 124.12110900878906, "learning_rate": 5.901840753055776e-08, "loss": 34.7576, "step": 477970 }, { "epoch": 0.9655498410210208, "grad_norm": 166.75607299804688, "learning_rate": 5.896494519402496e-08, "loss": 14.2673, "step": 477980 }, { "epoch": 0.9655700416537046, "grad_norm": 239.36776733398438, "learning_rate": 5.891150693949743e-08, "loss": 9.9732, "step": 477990 }, { "epoch": 0.9655902422863885, "grad_norm": 527.8530883789062, "learning_rate": 5.8858092767236084e-08, "loss": 29.0657, "step": 478000 }, { "epoch": 0.9656104429190723, "grad_norm": 128.67298889160156, "learning_rate": 5.880470267750127e-08, "loss": 28.3848, "step": 478010 }, { "epoch": 0.9656306435517561, "grad_norm": 322.1239929199219, "learning_rate": 5.8751336670552775e-08, "loss": 19.7528, "step": 478020 }, { "epoch": 0.9656508441844398, "grad_norm": 436.1974182128906, "learning_rate": 5.8697994746650946e-08, "loss": 25.9408, "step": 478030 }, { "epoch": 0.9656710448171236, "grad_norm": 1870.925537109375, "learning_rate": 5.864467690605613e-08, "loss": 25.4245, "step": 478040 }, { "epoch": 0.9656912454498074, "grad_norm": 183.9953155517578, "learning_rate": 5.8591383149028126e-08, "loss": 28.0338, "step": 478050 }, { "epoch": 0.9657114460824913, "grad_norm": 78.80882263183594, "learning_rate": 5.8538113475825606e-08, "loss": 23.307, "step": 478060 }, { "epoch": 0.9657316467151751, "grad_norm": 145.49549865722656, "learning_rate": 5.848486788670893e-08, "loss": 20.6319, "step": 478070 }, { "epoch": 0.9657518473478589, "grad_norm": 355.9044189453125, "learning_rate": 5.843164638193899e-08, "loss": 12.1357, "step": 478080 }, { "epoch": 0.9657720479805427, "grad_norm": 185.50003051757812, "learning_rate": 5.837844896177225e-08, "loss": 16.2037, "step": 478090 }, { "epoch": 0.9657922486132265, "grad_norm": 202.33474731445312, "learning_rate": 5.8325275626470166e-08, "loss": 13.9172, "step": 478100 }, { "epoch": 0.9658124492459104, "grad_norm": 0.0, "learning_rate": 5.827212637629198e-08, "loss": 14.8709, "step": 478110 }, { "epoch": 0.9658326498785942, "grad_norm": 349.94708251953125, "learning_rate": 5.821900121149582e-08, "loss": 31.447, "step": 478120 }, { "epoch": 0.965852850511278, "grad_norm": 59.9437141418457, "learning_rate": 5.8165900132340356e-08, "loss": 25.3907, "step": 478130 }, { "epoch": 0.9658730511439618, "grad_norm": 432.94598388671875, "learning_rate": 5.8112823139085396e-08, "loss": 22.3759, "step": 478140 }, { "epoch": 0.9658932517766456, "grad_norm": 230.91351318359375, "learning_rate": 5.80597702319885e-08, "loss": 17.5498, "step": 478150 }, { "epoch": 0.9659134524093295, "grad_norm": 357.3666076660156, "learning_rate": 5.800674141130946e-08, "loss": 19.6385, "step": 478160 }, { "epoch": 0.9659336530420133, "grad_norm": 388.39483642578125, "learning_rate": 5.795373667730586e-08, "loss": 20.044, "step": 478170 }, { "epoch": 0.9659538536746971, "grad_norm": 66.93562316894531, "learning_rate": 5.7900756030236924e-08, "loss": 17.7336, "step": 478180 }, { "epoch": 0.9659740543073809, "grad_norm": 332.9247741699219, "learning_rate": 5.7847799470360236e-08, "loss": 11.2676, "step": 478190 }, { "epoch": 0.9659942549400647, "grad_norm": 193.0968780517578, "learning_rate": 5.7794866997933355e-08, "loss": 12.7786, "step": 478200 }, { "epoch": 0.9660144555727486, "grad_norm": 276.2154235839844, "learning_rate": 5.774195861321552e-08, "loss": 36.7903, "step": 478210 }, { "epoch": 0.9660346562054324, "grad_norm": 274.4327697753906, "learning_rate": 5.76890743164632e-08, "loss": 14.1745, "step": 478220 }, { "epoch": 0.9660548568381162, "grad_norm": 235.90101623535156, "learning_rate": 5.763621410793563e-08, "loss": 27.2531, "step": 478230 }, { "epoch": 0.9660750574708, "grad_norm": 61.51955032348633, "learning_rate": 5.758337798788982e-08, "loss": 10.9076, "step": 478240 }, { "epoch": 0.9660952581034838, "grad_norm": 436.40478515625, "learning_rate": 5.753056595658224e-08, "loss": 17.9183, "step": 478250 }, { "epoch": 0.9661154587361677, "grad_norm": 154.50656127929688, "learning_rate": 5.7477778014272124e-08, "loss": 22.435, "step": 478260 }, { "epoch": 0.9661356593688515, "grad_norm": 288.6327209472656, "learning_rate": 5.7425014161215375e-08, "loss": 23.1011, "step": 478270 }, { "epoch": 0.9661558600015352, "grad_norm": 638.116455078125, "learning_rate": 5.737227439766957e-08, "loss": 18.3505, "step": 478280 }, { "epoch": 0.966176060634219, "grad_norm": 116.31895446777344, "learning_rate": 5.7319558723892275e-08, "loss": 11.1959, "step": 478290 }, { "epoch": 0.9661962612669028, "grad_norm": 597.60498046875, "learning_rate": 5.726686714013996e-08, "loss": 22.2574, "step": 478300 }, { "epoch": 0.9662164618995867, "grad_norm": 80.69058990478516, "learning_rate": 5.7214199646669076e-08, "loss": 29.2931, "step": 478310 }, { "epoch": 0.9662366625322705, "grad_norm": 150.7561798095703, "learning_rate": 5.716155624373665e-08, "loss": 16.8851, "step": 478320 }, { "epoch": 0.9662568631649543, "grad_norm": 103.19973754882812, "learning_rate": 5.710893693159969e-08, "loss": 17.6151, "step": 478330 }, { "epoch": 0.9662770637976381, "grad_norm": 207.61679077148438, "learning_rate": 5.705634171051411e-08, "loss": 18.4357, "step": 478340 }, { "epoch": 0.9662972644303219, "grad_norm": 57.48310852050781, "learning_rate": 5.700377058073636e-08, "loss": 27.1168, "step": 478350 }, { "epoch": 0.9663174650630058, "grad_norm": 227.81890869140625, "learning_rate": 5.6951223542522915e-08, "loss": 33.3957, "step": 478360 }, { "epoch": 0.9663376656956896, "grad_norm": 342.69482421875, "learning_rate": 5.6898700596129674e-08, "loss": 9.5855, "step": 478370 }, { "epoch": 0.9663578663283734, "grad_norm": 184.25108337402344, "learning_rate": 5.684620174181255e-08, "loss": 14.7649, "step": 478380 }, { "epoch": 0.9663780669610572, "grad_norm": 163.93824768066406, "learning_rate": 5.679372697982688e-08, "loss": 16.5577, "step": 478390 }, { "epoch": 0.966398267593741, "grad_norm": 257.0531921386719, "learning_rate": 5.674127631043025e-08, "loss": 11.9526, "step": 478400 }, { "epoch": 0.9664184682264249, "grad_norm": 1.74449622631073, "learning_rate": 5.668884973387634e-08, "loss": 12.227, "step": 478410 }, { "epoch": 0.9664386688591087, "grad_norm": 440.2716369628906, "learning_rate": 5.663644725042161e-08, "loss": 29.5727, "step": 478420 }, { "epoch": 0.9664588694917925, "grad_norm": 326.8595275878906, "learning_rate": 5.658406886032142e-08, "loss": 17.7575, "step": 478430 }, { "epoch": 0.9664790701244763, "grad_norm": 476.65045166015625, "learning_rate": 5.653171456383055e-08, "loss": 22.3876, "step": 478440 }, { "epoch": 0.9664992707571601, "grad_norm": 235.6327362060547, "learning_rate": 5.647938436120437e-08, "loss": 10.9182, "step": 478450 }, { "epoch": 0.966519471389844, "grad_norm": 751.4415893554688, "learning_rate": 5.642707825269822e-08, "loss": 22.4554, "step": 478460 }, { "epoch": 0.9665396720225278, "grad_norm": 132.41456604003906, "learning_rate": 5.637479623856745e-08, "loss": 19.5785, "step": 478470 }, { "epoch": 0.9665598726552116, "grad_norm": 219.9902801513672, "learning_rate": 5.632253831906631e-08, "loss": 19.1593, "step": 478480 }, { "epoch": 0.9665800732878954, "grad_norm": 184.803955078125, "learning_rate": 5.6270304494449035e-08, "loss": 20.6811, "step": 478490 }, { "epoch": 0.9666002739205792, "grad_norm": 40.44912338256836, "learning_rate": 5.621809476497098e-08, "loss": 32.0742, "step": 478500 }, { "epoch": 0.966620474553263, "grad_norm": 190.54283142089844, "learning_rate": 5.616590913088638e-08, "loss": 16.1784, "step": 478510 }, { "epoch": 0.9666406751859469, "grad_norm": 149.3194122314453, "learning_rate": 5.611374759244892e-08, "loss": 11.1327, "step": 478520 }, { "epoch": 0.9666608758186307, "grad_norm": 491.3622741699219, "learning_rate": 5.6061610149913957e-08, "loss": 37.307, "step": 478530 }, { "epoch": 0.9666810764513144, "grad_norm": 163.3978729248047, "learning_rate": 5.6009496803534624e-08, "loss": 24.0111, "step": 478540 }, { "epoch": 0.9667012770839982, "grad_norm": 292.5745544433594, "learning_rate": 5.595740755356627e-08, "loss": 16.4687, "step": 478550 }, { "epoch": 0.966721477716682, "grad_norm": 4066.260009765625, "learning_rate": 5.590534240026146e-08, "loss": 43.7808, "step": 478560 }, { "epoch": 0.9667416783493659, "grad_norm": 19.287261962890625, "learning_rate": 5.58533013438739e-08, "loss": 18.9646, "step": 478570 }, { "epoch": 0.9667618789820497, "grad_norm": 186.73593139648438, "learning_rate": 5.580128438465837e-08, "loss": 11.4306, "step": 478580 }, { "epoch": 0.9667820796147335, "grad_norm": 394.5108642578125, "learning_rate": 5.574929152286745e-08, "loss": 15.8368, "step": 478590 }, { "epoch": 0.9668022802474173, "grad_norm": 170.31736755371094, "learning_rate": 5.569732275875428e-08, "loss": 18.6165, "step": 478600 }, { "epoch": 0.9668224808801011, "grad_norm": 207.41769409179688, "learning_rate": 5.5645378092573085e-08, "loss": 35.4603, "step": 478610 }, { "epoch": 0.966842681512785, "grad_norm": 209.5960235595703, "learning_rate": 5.559345752457701e-08, "loss": 13.7902, "step": 478620 }, { "epoch": 0.9668628821454688, "grad_norm": 342.3749084472656, "learning_rate": 5.554156105501862e-08, "loss": 37.1281, "step": 478630 }, { "epoch": 0.9668830827781526, "grad_norm": 2.9106838703155518, "learning_rate": 5.54896886841505e-08, "loss": 21.7941, "step": 478640 }, { "epoch": 0.9669032834108364, "grad_norm": 124.96565246582031, "learning_rate": 5.543784041222633e-08, "loss": 10.9223, "step": 478650 }, { "epoch": 0.9669234840435202, "grad_norm": 252.74948120117188, "learning_rate": 5.538601623949869e-08, "loss": 12.1952, "step": 478660 }, { "epoch": 0.9669436846762041, "grad_norm": 360.6080627441406, "learning_rate": 5.533421616621903e-08, "loss": 22.9077, "step": 478670 }, { "epoch": 0.9669638853088879, "grad_norm": 282.87744140625, "learning_rate": 5.528244019264106e-08, "loss": 17.5386, "step": 478680 }, { "epoch": 0.9669840859415717, "grad_norm": 116.16239929199219, "learning_rate": 5.5230688319017344e-08, "loss": 18.8524, "step": 478690 }, { "epoch": 0.9670042865742555, "grad_norm": 491.3843078613281, "learning_rate": 5.517896054559879e-08, "loss": 21.7241, "step": 478700 }, { "epoch": 0.9670244872069393, "grad_norm": 352.613037109375, "learning_rate": 5.512725687263853e-08, "loss": 16.5981, "step": 478710 }, { "epoch": 0.9670446878396232, "grad_norm": 576.197998046875, "learning_rate": 5.507557730038859e-08, "loss": 14.4004, "step": 478720 }, { "epoch": 0.967064888472307, "grad_norm": 394.2955322265625, "learning_rate": 5.5023921829100434e-08, "loss": 25.8643, "step": 478730 }, { "epoch": 0.9670850891049908, "grad_norm": 498.21356201171875, "learning_rate": 5.497229045902552e-08, "loss": 32.8693, "step": 478740 }, { "epoch": 0.9671052897376746, "grad_norm": 5.832070350646973, "learning_rate": 5.492068319041588e-08, "loss": 34.8002, "step": 478750 }, { "epoch": 0.9671254903703584, "grad_norm": 367.8793029785156, "learning_rate": 5.4869100023523526e-08, "loss": 18.0233, "step": 478760 }, { "epoch": 0.9671456910030423, "grad_norm": 101.5944595336914, "learning_rate": 5.4817540958598814e-08, "loss": 5.5856, "step": 478770 }, { "epoch": 0.9671658916357261, "grad_norm": 380.5604553222656, "learning_rate": 5.476600599589377e-08, "loss": 24.7774, "step": 478780 }, { "epoch": 0.9671860922684098, "grad_norm": 149.87095642089844, "learning_rate": 5.471449513565985e-08, "loss": 21.6804, "step": 478790 }, { "epoch": 0.9672062929010936, "grad_norm": 147.40972900390625, "learning_rate": 5.466300837814797e-08, "loss": 16.2145, "step": 478800 }, { "epoch": 0.9672264935337774, "grad_norm": 272.8534240722656, "learning_rate": 5.461154572360794e-08, "loss": 21.2751, "step": 478810 }, { "epoch": 0.9672466941664613, "grad_norm": 401.5909729003906, "learning_rate": 5.456010717229177e-08, "loss": 22.2067, "step": 478820 }, { "epoch": 0.9672668947991451, "grad_norm": 242.17041015625, "learning_rate": 5.4508692724449806e-08, "loss": 18.2951, "step": 478830 }, { "epoch": 0.9672870954318289, "grad_norm": 166.62091064453125, "learning_rate": 5.445730238033298e-08, "loss": 7.8322, "step": 478840 }, { "epoch": 0.9673072960645127, "grad_norm": 176.90423583984375, "learning_rate": 5.440593614019107e-08, "loss": 12.1154, "step": 478850 }, { "epoch": 0.9673274966971965, "grad_norm": 189.25193786621094, "learning_rate": 5.435459400427501e-08, "loss": 14.5552, "step": 478860 }, { "epoch": 0.9673476973298804, "grad_norm": 443.8551330566406, "learning_rate": 5.4303275972834577e-08, "loss": 23.8017, "step": 478870 }, { "epoch": 0.9673678979625642, "grad_norm": 216.1768341064453, "learning_rate": 5.42519820461207e-08, "loss": 10.8594, "step": 478880 }, { "epoch": 0.967388098595248, "grad_norm": 56.928367614746094, "learning_rate": 5.4200712224382056e-08, "loss": 10.5989, "step": 478890 }, { "epoch": 0.9674082992279318, "grad_norm": 219.41473388671875, "learning_rate": 5.414946650786957e-08, "loss": 10.5286, "step": 478900 }, { "epoch": 0.9674284998606156, "grad_norm": 177.71142578125, "learning_rate": 5.409824489683247e-08, "loss": 29.7072, "step": 478910 }, { "epoch": 0.9674487004932995, "grad_norm": 189.0150604248047, "learning_rate": 5.4047047391521114e-08, "loss": 22.9577, "step": 478920 }, { "epoch": 0.9674689011259833, "grad_norm": 148.5370330810547, "learning_rate": 5.39958739921842e-08, "loss": 19.9695, "step": 478930 }, { "epoch": 0.9674891017586671, "grad_norm": 355.3103942871094, "learning_rate": 5.394472469907208e-08, "loss": 25.7296, "step": 478940 }, { "epoch": 0.9675093023913509, "grad_norm": 356.6740417480469, "learning_rate": 5.389359951243345e-08, "loss": 9.252, "step": 478950 }, { "epoch": 0.9675295030240347, "grad_norm": 148.18887329101562, "learning_rate": 5.3842498432516986e-08, "loss": 13.152, "step": 478960 }, { "epoch": 0.9675497036567186, "grad_norm": 0.0, "learning_rate": 5.3791421459571947e-08, "loss": 11.5646, "step": 478970 }, { "epoch": 0.9675699042894024, "grad_norm": 124.81631469726562, "learning_rate": 5.374036859384868e-08, "loss": 10.3315, "step": 478980 }, { "epoch": 0.9675901049220862, "grad_norm": 374.0554504394531, "learning_rate": 5.3689339835594215e-08, "loss": 12.807, "step": 478990 }, { "epoch": 0.96761030555477, "grad_norm": 206.81459045410156, "learning_rate": 5.363833518505834e-08, "loss": 8.8482, "step": 479000 }, { "epoch": 0.9676305061874538, "grad_norm": 415.3966369628906, "learning_rate": 5.358735464248921e-08, "loss": 16.134, "step": 479010 }, { "epoch": 0.9676507068201377, "grad_norm": 365.6966247558594, "learning_rate": 5.3536398208135495e-08, "loss": 21.1034, "step": 479020 }, { "epoch": 0.9676709074528215, "grad_norm": 42.270973205566406, "learning_rate": 5.348546588224535e-08, "loss": 21.1337, "step": 479030 }, { "epoch": 0.9676911080855053, "grad_norm": 120.83961486816406, "learning_rate": 5.343455766506689e-08, "loss": 21.1428, "step": 479040 }, { "epoch": 0.967711308718189, "grad_norm": 136.2984619140625, "learning_rate": 5.338367355684881e-08, "loss": 21.5983, "step": 479050 }, { "epoch": 0.9677315093508728, "grad_norm": 217.37240600585938, "learning_rate": 5.33328135578387e-08, "loss": 25.2889, "step": 479060 }, { "epoch": 0.9677517099835566, "grad_norm": 302.94403076171875, "learning_rate": 5.3281977668284136e-08, "loss": 19.4852, "step": 479070 }, { "epoch": 0.9677719106162405, "grad_norm": 233.90975952148438, "learning_rate": 5.323116588843324e-08, "loss": 15.5937, "step": 479080 }, { "epoch": 0.9677921112489243, "grad_norm": 372.6578674316406, "learning_rate": 5.318037821853417e-08, "loss": 18.6789, "step": 479090 }, { "epoch": 0.9678123118816081, "grad_norm": 291.18841552734375, "learning_rate": 5.312961465883393e-08, "loss": 16.7501, "step": 479100 }, { "epoch": 0.9678325125142919, "grad_norm": 382.44415283203125, "learning_rate": 5.307887520957955e-08, "loss": 10.8481, "step": 479110 }, { "epoch": 0.9678527131469757, "grad_norm": 246.8311309814453, "learning_rate": 5.302815987101917e-08, "loss": 10.9505, "step": 479120 }, { "epoch": 0.9678729137796596, "grad_norm": 475.86279296875, "learning_rate": 5.2977468643399254e-08, "loss": 22.5141, "step": 479130 }, { "epoch": 0.9678931144123434, "grad_norm": 458.67535400390625, "learning_rate": 5.292680152696739e-08, "loss": 19.024, "step": 479140 }, { "epoch": 0.9679133150450272, "grad_norm": 102.90484619140625, "learning_rate": 5.2876158521969476e-08, "loss": 20.0739, "step": 479150 }, { "epoch": 0.967933515677711, "grad_norm": 497.6946105957031, "learning_rate": 5.282553962865422e-08, "loss": 11.9773, "step": 479160 }, { "epoch": 0.9679537163103948, "grad_norm": 279.33380126953125, "learning_rate": 5.2774944847266976e-08, "loss": 16.3153, "step": 479170 }, { "epoch": 0.9679739169430787, "grad_norm": 376.793701171875, "learning_rate": 5.27243741780542e-08, "loss": 18.424, "step": 479180 }, { "epoch": 0.9679941175757625, "grad_norm": 335.0399475097656, "learning_rate": 5.267382762126294e-08, "loss": 19.7251, "step": 479190 }, { "epoch": 0.9680143182084463, "grad_norm": 216.72479248046875, "learning_rate": 5.262330517713965e-08, "loss": 9.3309, "step": 479200 }, { "epoch": 0.9680345188411301, "grad_norm": 217.37112426757812, "learning_rate": 5.2572806845930244e-08, "loss": 23.5566, "step": 479210 }, { "epoch": 0.9680547194738139, "grad_norm": 208.50352478027344, "learning_rate": 5.252233262788065e-08, "loss": 14.9861, "step": 479220 }, { "epoch": 0.9680749201064978, "grad_norm": 129.575927734375, "learning_rate": 5.247188252323787e-08, "loss": 12.8478, "step": 479230 }, { "epoch": 0.9680951207391816, "grad_norm": 295.2030029296875, "learning_rate": 5.242145653224673e-08, "loss": 13.1718, "step": 479240 }, { "epoch": 0.9681153213718654, "grad_norm": 169.93435668945312, "learning_rate": 5.237105465515258e-08, "loss": 17.8442, "step": 479250 }, { "epoch": 0.9681355220045492, "grad_norm": 218.6941375732422, "learning_rate": 5.2320676892202996e-08, "loss": 17.0933, "step": 479260 }, { "epoch": 0.968155722637233, "grad_norm": 210.277099609375, "learning_rate": 5.227032324364167e-08, "loss": 16.3555, "step": 479270 }, { "epoch": 0.9681759232699169, "grad_norm": 180.5042724609375, "learning_rate": 5.2219993709714535e-08, "loss": 11.6651, "step": 479280 }, { "epoch": 0.9681961239026007, "grad_norm": 233.86984252929688, "learning_rate": 5.2169688290667485e-08, "loss": 18.3108, "step": 479290 }, { "epoch": 0.9682163245352845, "grad_norm": 415.8732604980469, "learning_rate": 5.2119406986745336e-08, "loss": 16.1397, "step": 479300 }, { "epoch": 0.9682365251679682, "grad_norm": 219.26419067382812, "learning_rate": 5.206914979819289e-08, "loss": 19.1014, "step": 479310 }, { "epoch": 0.968256725800652, "grad_norm": 318.5694274902344, "learning_rate": 5.2018916725254945e-08, "loss": 31.2966, "step": 479320 }, { "epoch": 0.9682769264333358, "grad_norm": 189.90823364257812, "learning_rate": 5.196870776817742e-08, "loss": 17.2804, "step": 479330 }, { "epoch": 0.9682971270660197, "grad_norm": 182.29135131835938, "learning_rate": 5.191852292720401e-08, "loss": 30.5925, "step": 479340 }, { "epoch": 0.9683173276987035, "grad_norm": 503.1014099121094, "learning_rate": 5.186836220257951e-08, "loss": 19.8871, "step": 479350 }, { "epoch": 0.9683375283313873, "grad_norm": 464.2020263671875, "learning_rate": 5.1818225594548185e-08, "loss": 12.8837, "step": 479360 }, { "epoch": 0.9683577289640711, "grad_norm": 62.10761642456055, "learning_rate": 5.176811310335539e-08, "loss": 19.4416, "step": 479370 }, { "epoch": 0.968377929596755, "grad_norm": 142.53768920898438, "learning_rate": 5.17180247292437e-08, "loss": 10.611, "step": 479380 }, { "epoch": 0.9683981302294388, "grad_norm": 214.14083862304688, "learning_rate": 5.1667960472459034e-08, "loss": 13.5588, "step": 479390 }, { "epoch": 0.9684183308621226, "grad_norm": 101.42478942871094, "learning_rate": 5.161792033324398e-08, "loss": 12.2453, "step": 479400 }, { "epoch": 0.9684385314948064, "grad_norm": 175.5721435546875, "learning_rate": 5.1567904311843886e-08, "loss": 15.277, "step": 479410 }, { "epoch": 0.9684587321274902, "grad_norm": 202.87779235839844, "learning_rate": 5.151791240850079e-08, "loss": 15.1492, "step": 479420 }, { "epoch": 0.968478932760174, "grad_norm": 276.27227783203125, "learning_rate": 5.14679446234595e-08, "loss": 41.4851, "step": 479430 }, { "epoch": 0.9684991333928579, "grad_norm": 384.14471435546875, "learning_rate": 5.14180009569637e-08, "loss": 22.8071, "step": 479440 }, { "epoch": 0.9685193340255417, "grad_norm": 278.8869323730469, "learning_rate": 5.136808140925542e-08, "loss": 13.2723, "step": 479450 }, { "epoch": 0.9685395346582255, "grad_norm": 376.0140686035156, "learning_rate": 5.131818598057947e-08, "loss": 17.5005, "step": 479460 }, { "epoch": 0.9685597352909093, "grad_norm": 220.8706817626953, "learning_rate": 5.126831467117843e-08, "loss": 23.8557, "step": 479470 }, { "epoch": 0.9685799359235931, "grad_norm": 190.7847137451172, "learning_rate": 5.121846748129544e-08, "loss": 20.3755, "step": 479480 }, { "epoch": 0.968600136556277, "grad_norm": 310.7080078125, "learning_rate": 5.116864441117364e-08, "loss": 21.222, "step": 479490 }, { "epoch": 0.9686203371889608, "grad_norm": 335.2868957519531, "learning_rate": 5.111884546105506e-08, "loss": 18.6405, "step": 479500 }, { "epoch": 0.9686405378216446, "grad_norm": 477.43853759765625, "learning_rate": 5.106907063118394e-08, "loss": 15.0737, "step": 479510 }, { "epoch": 0.9686607384543284, "grad_norm": 352.6179504394531, "learning_rate": 5.10193199218012e-08, "loss": 23.3457, "step": 479520 }, { "epoch": 0.9686809390870122, "grad_norm": 299.53912353515625, "learning_rate": 5.0969593333149994e-08, "loss": 15.7751, "step": 479530 }, { "epoch": 0.9687011397196961, "grad_norm": 234.4359893798828, "learning_rate": 5.091989086547289e-08, "loss": 20.7437, "step": 479540 }, { "epoch": 0.9687213403523799, "grad_norm": 18.35843849182129, "learning_rate": 5.0870212519012477e-08, "loss": 15.5351, "step": 479550 }, { "epoch": 0.9687415409850636, "grad_norm": 217.72280883789062, "learning_rate": 5.082055829400967e-08, "loss": 17.0471, "step": 479560 }, { "epoch": 0.9687617416177474, "grad_norm": 457.1471252441406, "learning_rate": 5.077092819070761e-08, "loss": 9.496, "step": 479570 }, { "epoch": 0.9687819422504312, "grad_norm": 162.7769775390625, "learning_rate": 5.072132220934722e-08, "loss": 23.2226, "step": 479580 }, { "epoch": 0.9688021428831151, "grad_norm": 402.1329345703125, "learning_rate": 5.067174035017164e-08, "loss": 11.5721, "step": 479590 }, { "epoch": 0.9688223435157989, "grad_norm": 137.4611358642578, "learning_rate": 5.062218261342122e-08, "loss": 15.9592, "step": 479600 }, { "epoch": 0.9688425441484827, "grad_norm": 252.6926727294922, "learning_rate": 5.0572648999338e-08, "loss": 10.2002, "step": 479610 }, { "epoch": 0.9688627447811665, "grad_norm": 322.97845458984375, "learning_rate": 5.052313950816401e-08, "loss": 13.9055, "step": 479620 }, { "epoch": 0.9688829454138503, "grad_norm": 331.7187194824219, "learning_rate": 5.0473654140139604e-08, "loss": 22.4178, "step": 479630 }, { "epoch": 0.9689031460465342, "grad_norm": 489.8714904785156, "learning_rate": 5.042419289550571e-08, "loss": 10.4482, "step": 479640 }, { "epoch": 0.968923346679218, "grad_norm": 760.1708984375, "learning_rate": 5.0374755774504346e-08, "loss": 22.504, "step": 479650 }, { "epoch": 0.9689435473119018, "grad_norm": 482.96337890625, "learning_rate": 5.032534277737644e-08, "loss": 15.7896, "step": 479660 }, { "epoch": 0.9689637479445856, "grad_norm": 342.7398986816406, "learning_rate": 5.027595390436235e-08, "loss": 13.5051, "step": 479670 }, { "epoch": 0.9689839485772694, "grad_norm": 422.38238525390625, "learning_rate": 5.0226589155702445e-08, "loss": 16.7095, "step": 479680 }, { "epoch": 0.9690041492099533, "grad_norm": 458.7433776855469, "learning_rate": 5.017724853163819e-08, "loss": 28.2092, "step": 479690 }, { "epoch": 0.9690243498426371, "grad_norm": 295.3371887207031, "learning_rate": 5.012793203240995e-08, "loss": 13.9451, "step": 479700 }, { "epoch": 0.9690445504753209, "grad_norm": 325.6849365234375, "learning_rate": 5.007863965825754e-08, "loss": 15.4571, "step": 479710 }, { "epoch": 0.9690647511080047, "grad_norm": 232.40927124023438, "learning_rate": 5.002937140942132e-08, "loss": 9.5497, "step": 479720 }, { "epoch": 0.9690849517406885, "grad_norm": 129.34600830078125, "learning_rate": 4.998012728614221e-08, "loss": 8.8086, "step": 479730 }, { "epoch": 0.9691051523733724, "grad_norm": 312.6191101074219, "learning_rate": 4.99309072886589e-08, "loss": 19.7745, "step": 479740 }, { "epoch": 0.9691253530060562, "grad_norm": 6.171020030975342, "learning_rate": 4.988171141721232e-08, "loss": 14.1907, "step": 479750 }, { "epoch": 0.96914555363874, "grad_norm": 263.8418273925781, "learning_rate": 4.983253967204171e-08, "loss": 15.5684, "step": 479760 }, { "epoch": 0.9691657542714238, "grad_norm": 302.4642028808594, "learning_rate": 4.9783392053386894e-08, "loss": 16.7071, "step": 479770 }, { "epoch": 0.9691859549041076, "grad_norm": 372.0815124511719, "learning_rate": 4.9734268561487665e-08, "loss": 15.0816, "step": 479780 }, { "epoch": 0.9692061555367915, "grad_norm": 524.9464721679688, "learning_rate": 4.968516919658328e-08, "loss": 10.3388, "step": 479790 }, { "epoch": 0.9692263561694753, "grad_norm": 223.0327606201172, "learning_rate": 4.9636093958913e-08, "loss": 19.6309, "step": 479800 }, { "epoch": 0.9692465568021591, "grad_norm": 181.9541778564453, "learning_rate": 4.958704284871552e-08, "loss": 16.8944, "step": 479810 }, { "epoch": 0.9692667574348428, "grad_norm": 213.51571655273438, "learning_rate": 4.9538015866230636e-08, "loss": 18.4977, "step": 479820 }, { "epoch": 0.9692869580675266, "grad_norm": 245.20925903320312, "learning_rate": 4.948901301169706e-08, "loss": 9.1325, "step": 479830 }, { "epoch": 0.9693071587002104, "grad_norm": 173.84434509277344, "learning_rate": 4.944003428535349e-08, "loss": 17.6009, "step": 479840 }, { "epoch": 0.9693273593328943, "grad_norm": 501.1705322265625, "learning_rate": 4.939107968743917e-08, "loss": 17.0873, "step": 479850 }, { "epoch": 0.9693475599655781, "grad_norm": 115.13642120361328, "learning_rate": 4.9342149218191694e-08, "loss": 10.3066, "step": 479860 }, { "epoch": 0.9693677605982619, "grad_norm": 156.52928161621094, "learning_rate": 4.9293242877850866e-08, "loss": 14.234, "step": 479870 }, { "epoch": 0.9693879612309457, "grad_norm": 215.22109985351562, "learning_rate": 4.9244360666653724e-08, "loss": 42.7303, "step": 479880 }, { "epoch": 0.9694081618636295, "grad_norm": 345.40728759765625, "learning_rate": 4.9195502584839516e-08, "loss": 27.9623, "step": 479890 }, { "epoch": 0.9694283624963134, "grad_norm": 420.7540283203125, "learning_rate": 4.914666863264528e-08, "loss": 13.216, "step": 479900 }, { "epoch": 0.9694485631289972, "grad_norm": 490.6384582519531, "learning_rate": 4.9097858810310815e-08, "loss": 19.9933, "step": 479910 }, { "epoch": 0.969468763761681, "grad_norm": 205.8202667236328, "learning_rate": 4.9049073118072057e-08, "loss": 22.8096, "step": 479920 }, { "epoch": 0.9694889643943648, "grad_norm": 358.317626953125, "learning_rate": 4.900031155616769e-08, "loss": 16.5017, "step": 479930 }, { "epoch": 0.9695091650270486, "grad_norm": 63.70272445678711, "learning_rate": 4.8951574124835865e-08, "loss": 12.9545, "step": 479940 }, { "epoch": 0.9695293656597325, "grad_norm": 191.5247344970703, "learning_rate": 4.890286082431306e-08, "loss": 31.304, "step": 479950 }, { "epoch": 0.9695495662924163, "grad_norm": 293.09033203125, "learning_rate": 4.885417165483741e-08, "loss": 13.7888, "step": 479960 }, { "epoch": 0.9695697669251001, "grad_norm": 275.1098937988281, "learning_rate": 4.880550661664541e-08, "loss": 21.4353, "step": 479970 }, { "epoch": 0.9695899675577839, "grad_norm": 267.2708435058594, "learning_rate": 4.8756865709976284e-08, "loss": 15.3204, "step": 479980 }, { "epoch": 0.9696101681904677, "grad_norm": 473.5832214355469, "learning_rate": 4.8708248935064315e-08, "loss": 23.8947, "step": 479990 }, { "epoch": 0.9696303688231516, "grad_norm": 218.5522918701172, "learning_rate": 4.865965629214819e-08, "loss": 16.9317, "step": 480000 }, { "epoch": 0.9696505694558354, "grad_norm": 79.72660064697266, "learning_rate": 4.861108778146495e-08, "loss": 13.4541, "step": 480010 }, { "epoch": 0.9696707700885192, "grad_norm": 496.3027648925781, "learning_rate": 4.856254340325051e-08, "loss": 17.6127, "step": 480020 }, { "epoch": 0.969690970721203, "grad_norm": 453.5667724609375, "learning_rate": 4.851402315774134e-08, "loss": 18.3573, "step": 480030 }, { "epoch": 0.9697111713538868, "grad_norm": 251.51531982421875, "learning_rate": 4.846552704517449e-08, "loss": 18.1281, "step": 480040 }, { "epoch": 0.9697313719865707, "grad_norm": 126.87972259521484, "learning_rate": 4.841705506578587e-08, "loss": 11.8783, "step": 480050 }, { "epoch": 0.9697515726192545, "grad_norm": 150.7003173828125, "learning_rate": 4.836860721981196e-08, "loss": 16.3773, "step": 480060 }, { "epoch": 0.9697717732519382, "grad_norm": 121.2275161743164, "learning_rate": 4.8320183507489236e-08, "loss": 14.113, "step": 480070 }, { "epoch": 0.969791973884622, "grad_norm": 378.8006286621094, "learning_rate": 4.827178392905307e-08, "loss": 18.2608, "step": 480080 }, { "epoch": 0.9698121745173058, "grad_norm": 131.00286865234375, "learning_rate": 4.822340848473994e-08, "loss": 24.1414, "step": 480090 }, { "epoch": 0.9698323751499897, "grad_norm": 132.82247924804688, "learning_rate": 4.8175057174785766e-08, "loss": 13.6934, "step": 480100 }, { "epoch": 0.9698525757826735, "grad_norm": 195.75389099121094, "learning_rate": 4.81267299994248e-08, "loss": 8.3021, "step": 480110 }, { "epoch": 0.9698727764153573, "grad_norm": 514.996826171875, "learning_rate": 4.807842695889409e-08, "loss": 21.4127, "step": 480120 }, { "epoch": 0.9698929770480411, "grad_norm": 240.45498657226562, "learning_rate": 4.8030148053428424e-08, "loss": 16.6076, "step": 480130 }, { "epoch": 0.9699131776807249, "grad_norm": 174.4800567626953, "learning_rate": 4.798189328326319e-08, "loss": 18.3415, "step": 480140 }, { "epoch": 0.9699333783134088, "grad_norm": 132.678955078125, "learning_rate": 4.793366264863375e-08, "loss": 15.2421, "step": 480150 }, { "epoch": 0.9699535789460926, "grad_norm": 133.9465789794922, "learning_rate": 4.788545614977491e-08, "loss": 17.5808, "step": 480160 }, { "epoch": 0.9699737795787764, "grad_norm": 667.6593627929688, "learning_rate": 4.783727378692205e-08, "loss": 23.5546, "step": 480170 }, { "epoch": 0.9699939802114602, "grad_norm": 340.83990478515625, "learning_rate": 4.778911556030885e-08, "loss": 15.1011, "step": 480180 }, { "epoch": 0.970014180844144, "grad_norm": 253.79537963867188, "learning_rate": 4.774098147017181e-08, "loss": 29.0629, "step": 480190 }, { "epoch": 0.9700343814768279, "grad_norm": 189.45553588867188, "learning_rate": 4.769287151674407e-08, "loss": 30.27, "step": 480200 }, { "epoch": 0.9700545821095117, "grad_norm": 0.0, "learning_rate": 4.764478570026043e-08, "loss": 15.7602, "step": 480210 }, { "epoch": 0.9700747827421955, "grad_norm": 187.9842987060547, "learning_rate": 4.759672402095572e-08, "loss": 16.245, "step": 480220 }, { "epoch": 0.9700949833748793, "grad_norm": 37.108055114746094, "learning_rate": 4.754868647906419e-08, "loss": 19.7174, "step": 480230 }, { "epoch": 0.9701151840075631, "grad_norm": 51.12001037597656, "learning_rate": 4.750067307481954e-08, "loss": 14.801, "step": 480240 }, { "epoch": 0.970135384640247, "grad_norm": 278.0903625488281, "learning_rate": 4.7452683808456026e-08, "loss": 15.2198, "step": 480250 }, { "epoch": 0.9701555852729308, "grad_norm": 429.378662109375, "learning_rate": 4.740471868020735e-08, "loss": 13.9195, "step": 480260 }, { "epoch": 0.9701757859056146, "grad_norm": 566.3360595703125, "learning_rate": 4.735677769030722e-08, "loss": 33.8871, "step": 480270 }, { "epoch": 0.9701959865382984, "grad_norm": 303.39007568359375, "learning_rate": 4.730886083898989e-08, "loss": 21.8571, "step": 480280 }, { "epoch": 0.9702161871709822, "grad_norm": 248.40341186523438, "learning_rate": 4.726096812648795e-08, "loss": 17.8176, "step": 480290 }, { "epoch": 0.9702363878036661, "grad_norm": 363.1632995605469, "learning_rate": 4.7213099553035655e-08, "loss": 12.4366, "step": 480300 }, { "epoch": 0.9702565884363499, "grad_norm": 203.74185180664062, "learning_rate": 4.716525511886616e-08, "loss": 29.5093, "step": 480310 }, { "epoch": 0.9702767890690337, "grad_norm": 107.11116790771484, "learning_rate": 4.711743482421205e-08, "loss": 21.8832, "step": 480320 }, { "epoch": 0.9702969897017174, "grad_norm": 164.6458740234375, "learning_rate": 4.7069638669307026e-08, "loss": 16.5345, "step": 480330 }, { "epoch": 0.9703171903344012, "grad_norm": 377.70587158203125, "learning_rate": 4.702186665438424e-08, "loss": 14.0841, "step": 480340 }, { "epoch": 0.970337390967085, "grad_norm": 159.20343017578125, "learning_rate": 4.697411877967573e-08, "loss": 21.3772, "step": 480350 }, { "epoch": 0.9703575915997689, "grad_norm": 12.814987182617188, "learning_rate": 4.692639504541518e-08, "loss": 7.7102, "step": 480360 }, { "epoch": 0.9703777922324527, "grad_norm": 264.45574951171875, "learning_rate": 4.68786954518341e-08, "loss": 11.9716, "step": 480370 }, { "epoch": 0.9703979928651365, "grad_norm": 302.3787841796875, "learning_rate": 4.683101999916562e-08, "loss": 6.1376, "step": 480380 }, { "epoch": 0.9704181934978203, "grad_norm": 133.24697875976562, "learning_rate": 4.6783368687642325e-08, "loss": 10.4917, "step": 480390 }, { "epoch": 0.9704383941305041, "grad_norm": 347.9091491699219, "learning_rate": 4.6735741517495715e-08, "loss": 23.6633, "step": 480400 }, { "epoch": 0.970458594763188, "grad_norm": 191.81300354003906, "learning_rate": 4.668813848895837e-08, "loss": 11.3855, "step": 480410 }, { "epoch": 0.9704787953958718, "grad_norm": 149.15115356445312, "learning_rate": 4.6640559602262325e-08, "loss": 13.42, "step": 480420 }, { "epoch": 0.9704989960285556, "grad_norm": 151.28768920898438, "learning_rate": 4.6593004857639627e-08, "loss": 7.9655, "step": 480430 }, { "epoch": 0.9705191966612394, "grad_norm": 55.116512298583984, "learning_rate": 4.654547425532119e-08, "loss": 12.9752, "step": 480440 }, { "epoch": 0.9705393972939232, "grad_norm": 340.17279052734375, "learning_rate": 4.649796779554016e-08, "loss": 15.622, "step": 480450 }, { "epoch": 0.9705595979266071, "grad_norm": 204.43618774414062, "learning_rate": 4.645048547852693e-08, "loss": 16.4864, "step": 480460 }, { "epoch": 0.9705797985592909, "grad_norm": 267.153564453125, "learning_rate": 4.6403027304513513e-08, "loss": 16.2877, "step": 480470 }, { "epoch": 0.9705999991919747, "grad_norm": 366.7547302246094, "learning_rate": 4.635559327373029e-08, "loss": 14.5811, "step": 480480 }, { "epoch": 0.9706201998246585, "grad_norm": 135.94874572753906, "learning_rate": 4.6308183386409855e-08, "loss": 22.3305, "step": 480490 }, { "epoch": 0.9706404004573423, "grad_norm": 62.06936264038086, "learning_rate": 4.626079764278202e-08, "loss": 16.6228, "step": 480500 }, { "epoch": 0.9706606010900262, "grad_norm": 637.1981201171875, "learning_rate": 4.621343604307826e-08, "loss": 34.2254, "step": 480510 }, { "epoch": 0.97068080172271, "grad_norm": 411.60650634765625, "learning_rate": 4.616609858753007e-08, "loss": 15.6918, "step": 480520 }, { "epoch": 0.9707010023553938, "grad_norm": 251.51487731933594, "learning_rate": 4.6118785276366706e-08, "loss": 19.9691, "step": 480530 }, { "epoch": 0.9707212029880776, "grad_norm": 50.386932373046875, "learning_rate": 4.6071496109819643e-08, "loss": 15.63, "step": 480540 }, { "epoch": 0.9707414036207614, "grad_norm": 380.03997802734375, "learning_rate": 4.6024231088119266e-08, "loss": 16.9728, "step": 480550 }, { "epoch": 0.9707616042534453, "grad_norm": 331.0513610839844, "learning_rate": 4.597699021149649e-08, "loss": 19.1525, "step": 480560 }, { "epoch": 0.9707818048861291, "grad_norm": 81.25759887695312, "learning_rate": 4.592977348018002e-08, "loss": 15.16, "step": 480570 }, { "epoch": 0.9708020055188128, "grad_norm": 463.4545593261719, "learning_rate": 4.588258089440134e-08, "loss": 11.7949, "step": 480580 }, { "epoch": 0.9708222061514966, "grad_norm": 289.548828125, "learning_rate": 4.5835412454390823e-08, "loss": 17.104, "step": 480590 }, { "epoch": 0.9708424067841804, "grad_norm": 256.2091064453125, "learning_rate": 4.578826816037718e-08, "loss": 27.8903, "step": 480600 }, { "epoch": 0.9708626074168643, "grad_norm": 278.6357727050781, "learning_rate": 4.574114801259022e-08, "loss": 13.8457, "step": 480610 }, { "epoch": 0.9708828080495481, "grad_norm": 317.54296875, "learning_rate": 4.569405201126087e-08, "loss": 31.4359, "step": 480620 }, { "epoch": 0.9709030086822319, "grad_norm": 362.89520263671875, "learning_rate": 4.5646980156617284e-08, "loss": 15.1028, "step": 480630 }, { "epoch": 0.9709232093149157, "grad_norm": 528.4024658203125, "learning_rate": 4.5599932448889276e-08, "loss": 23.333, "step": 480640 }, { "epoch": 0.9709434099475995, "grad_norm": 327.55322265625, "learning_rate": 4.5552908888306654e-08, "loss": 13.7529, "step": 480650 }, { "epoch": 0.9709636105802834, "grad_norm": 340.1185607910156, "learning_rate": 4.5505909475098144e-08, "loss": 18.9533, "step": 480660 }, { "epoch": 0.9709838112129672, "grad_norm": 237.61825561523438, "learning_rate": 4.545893420949299e-08, "loss": 16.5598, "step": 480670 }, { "epoch": 0.971004011845651, "grad_norm": 207.75701904296875, "learning_rate": 4.5411983091719905e-08, "loss": 10.5763, "step": 480680 }, { "epoch": 0.9710242124783348, "grad_norm": 124.75130462646484, "learning_rate": 4.5365056122007586e-08, "loss": 21.4576, "step": 480690 }, { "epoch": 0.9710444131110186, "grad_norm": 91.90564727783203, "learning_rate": 4.531815330058586e-08, "loss": 21.2379, "step": 480700 }, { "epoch": 0.9710646137437025, "grad_norm": 179.62643432617188, "learning_rate": 4.527127462768233e-08, "loss": 13.1461, "step": 480710 }, { "epoch": 0.9710848143763863, "grad_norm": 373.83477783203125, "learning_rate": 4.5224420103525125e-08, "loss": 19.0828, "step": 480720 }, { "epoch": 0.9711050150090701, "grad_norm": 281.1725769042969, "learning_rate": 4.517758972834352e-08, "loss": 21.119, "step": 480730 }, { "epoch": 0.9711252156417539, "grad_norm": 74.29496765136719, "learning_rate": 4.5130783502365106e-08, "loss": 15.3173, "step": 480740 }, { "epoch": 0.9711454162744377, "grad_norm": 257.2310485839844, "learning_rate": 4.508400142581859e-08, "loss": 16.1936, "step": 480750 }, { "epoch": 0.9711656169071216, "grad_norm": 362.8109130859375, "learning_rate": 4.503724349893157e-08, "loss": 21.5927, "step": 480760 }, { "epoch": 0.9711858175398054, "grad_norm": 440.2646179199219, "learning_rate": 4.49905097219322e-08, "loss": 21.8716, "step": 480770 }, { "epoch": 0.9712060181724892, "grad_norm": 479.6381530761719, "learning_rate": 4.4943800095048615e-08, "loss": 17.2833, "step": 480780 }, { "epoch": 0.971226218805173, "grad_norm": 459.6658630371094, "learning_rate": 4.4897114618506765e-08, "loss": 19.8731, "step": 480790 }, { "epoch": 0.9712464194378568, "grad_norm": 211.4539031982422, "learning_rate": 4.485045329253646e-08, "loss": 17.9922, "step": 480800 }, { "epoch": 0.9712666200705407, "grad_norm": 464.1227722167969, "learning_rate": 4.480381611736362e-08, "loss": 14.6579, "step": 480810 }, { "epoch": 0.9712868207032245, "grad_norm": 337.0411376953125, "learning_rate": 4.4757203093215854e-08, "loss": 11.4597, "step": 480820 }, { "epoch": 0.9713070213359083, "grad_norm": 152.1861572265625, "learning_rate": 4.4710614220320746e-08, "loss": 10.3488, "step": 480830 }, { "epoch": 0.971327221968592, "grad_norm": 182.34927368164062, "learning_rate": 4.4664049498904796e-08, "loss": 14.679, "step": 480840 }, { "epoch": 0.9713474226012758, "grad_norm": 255.8133087158203, "learning_rate": 4.4617508929195585e-08, "loss": 12.6503, "step": 480850 }, { "epoch": 0.9713676232339596, "grad_norm": 177.95912170410156, "learning_rate": 4.457099251141961e-08, "loss": 7.0826, "step": 480860 }, { "epoch": 0.9713878238666435, "grad_norm": 165.25167846679688, "learning_rate": 4.4524500245803346e-08, "loss": 13.9274, "step": 480870 }, { "epoch": 0.9714080244993273, "grad_norm": 273.8017883300781, "learning_rate": 4.4478032132573845e-08, "loss": 19.7758, "step": 480880 }, { "epoch": 0.9714282251320111, "grad_norm": 380.6207580566406, "learning_rate": 4.443158817195703e-08, "loss": 35.096, "step": 480890 }, { "epoch": 0.9714484257646949, "grad_norm": 8.3333158493042, "learning_rate": 4.438516836417994e-08, "loss": 20.3232, "step": 480900 }, { "epoch": 0.9714686263973787, "grad_norm": 380.8330383300781, "learning_rate": 4.4338772709468514e-08, "loss": 14.3037, "step": 480910 }, { "epoch": 0.9714888270300626, "grad_norm": 159.7582550048828, "learning_rate": 4.429240120804923e-08, "loss": 31.5011, "step": 480920 }, { "epoch": 0.9715090276627464, "grad_norm": 370.4774475097656, "learning_rate": 4.424605386014691e-08, "loss": 27.2993, "step": 480930 }, { "epoch": 0.9715292282954302, "grad_norm": 386.0980224609375, "learning_rate": 4.4199730665988594e-08, "loss": 15.8645, "step": 480940 }, { "epoch": 0.971549428928114, "grad_norm": 37.42185974121094, "learning_rate": 4.415343162580022e-08, "loss": 9.6183, "step": 480950 }, { "epoch": 0.9715696295607978, "grad_norm": 93.18374633789062, "learning_rate": 4.4107156739806037e-08, "loss": 14.5817, "step": 480960 }, { "epoch": 0.9715898301934817, "grad_norm": 8.321996688842773, "learning_rate": 4.40609060082331e-08, "loss": 10.9445, "step": 480970 }, { "epoch": 0.9716100308261655, "grad_norm": 190.19419860839844, "learning_rate": 4.401467943130622e-08, "loss": 12.5028, "step": 480980 }, { "epoch": 0.9716302314588493, "grad_norm": 273.48779296875, "learning_rate": 4.3968477009250775e-08, "loss": 20.0074, "step": 480990 }, { "epoch": 0.9716504320915331, "grad_norm": 71.3777084350586, "learning_rate": 4.392229874229159e-08, "loss": 15.8897, "step": 481000 }, { "epoch": 0.9716706327242169, "grad_norm": 194.49368286132812, "learning_rate": 4.387614463065404e-08, "loss": 25.8925, "step": 481010 }, { "epoch": 0.9716908333569008, "grad_norm": 579.4498291015625, "learning_rate": 4.383001467456294e-08, "loss": 22.2444, "step": 481020 }, { "epoch": 0.9717110339895846, "grad_norm": 1078.0789794921875, "learning_rate": 4.378390887424366e-08, "loss": 22.6205, "step": 481030 }, { "epoch": 0.9717312346222684, "grad_norm": 369.1848449707031, "learning_rate": 4.3737827229919926e-08, "loss": 14.4202, "step": 481040 }, { "epoch": 0.9717514352549522, "grad_norm": 144.8754119873047, "learning_rate": 4.36917697418171e-08, "loss": 13.1191, "step": 481050 }, { "epoch": 0.971771635887636, "grad_norm": 74.88249206542969, "learning_rate": 4.364573641016001e-08, "loss": 15.3583, "step": 481060 }, { "epoch": 0.9717918365203199, "grad_norm": 296.56024169921875, "learning_rate": 4.359972723517236e-08, "loss": 23.6246, "step": 481070 }, { "epoch": 0.9718120371530037, "grad_norm": 265.19952392578125, "learning_rate": 4.3553742217077866e-08, "loss": 16.3429, "step": 481080 }, { "epoch": 0.9718322377856875, "grad_norm": 111.01641845703125, "learning_rate": 4.350778135610134e-08, "loss": 11.5564, "step": 481090 }, { "epoch": 0.9718524384183712, "grad_norm": 1.675919771194458, "learning_rate": 4.346184465246761e-08, "loss": 16.0173, "step": 481100 }, { "epoch": 0.971872639051055, "grad_norm": 253.8666534423828, "learning_rate": 4.3415932106398715e-08, "loss": 17.6637, "step": 481110 }, { "epoch": 0.9718928396837389, "grad_norm": 267.0229187011719, "learning_rate": 4.3370043718119484e-08, "loss": 13.4649, "step": 481120 }, { "epoch": 0.9719130403164227, "grad_norm": 304.34234619140625, "learning_rate": 4.332417948785417e-08, "loss": 22.6865, "step": 481130 }, { "epoch": 0.9719332409491065, "grad_norm": 282.455322265625, "learning_rate": 4.327833941582538e-08, "loss": 17.5974, "step": 481140 }, { "epoch": 0.9719534415817903, "grad_norm": 153.42041015625, "learning_rate": 4.3232523502256264e-08, "loss": 28.9189, "step": 481150 }, { "epoch": 0.9719736422144741, "grad_norm": 110.81672668457031, "learning_rate": 4.318673174737109e-08, "loss": 8.7016, "step": 481160 }, { "epoch": 0.971993842847158, "grad_norm": 320.25115966796875, "learning_rate": 4.3140964151393015e-08, "loss": 9.325, "step": 481170 }, { "epoch": 0.9720140434798418, "grad_norm": 245.3419189453125, "learning_rate": 4.3095220714544084e-08, "loss": 13.8234, "step": 481180 }, { "epoch": 0.9720342441125256, "grad_norm": 194.27566528320312, "learning_rate": 4.304950143704745e-08, "loss": 12.9082, "step": 481190 }, { "epoch": 0.9720544447452094, "grad_norm": 192.88880920410156, "learning_rate": 4.3003806319127376e-08, "loss": 13.6647, "step": 481200 }, { "epoch": 0.9720746453778932, "grad_norm": 271.20098876953125, "learning_rate": 4.2958135361004794e-08, "loss": 15.0171, "step": 481210 }, { "epoch": 0.972094846010577, "grad_norm": 308.206298828125, "learning_rate": 4.291248856290342e-08, "loss": 23.1732, "step": 481220 }, { "epoch": 0.9721150466432609, "grad_norm": 272.736083984375, "learning_rate": 4.28668659250453e-08, "loss": 18.8223, "step": 481230 }, { "epoch": 0.9721352472759447, "grad_norm": 189.8990020751953, "learning_rate": 4.282126744765247e-08, "loss": 12.1868, "step": 481240 }, { "epoch": 0.9721554479086285, "grad_norm": 373.6046142578125, "learning_rate": 4.2775693130948094e-08, "loss": 32.1939, "step": 481250 }, { "epoch": 0.9721756485413123, "grad_norm": 0.28666916489601135, "learning_rate": 4.2730142975153654e-08, "loss": 22.4644, "step": 481260 }, { "epoch": 0.9721958491739962, "grad_norm": 201.35943603515625, "learning_rate": 4.26846169804912e-08, "loss": 6.796, "step": 481270 }, { "epoch": 0.97221604980668, "grad_norm": 151.1863555908203, "learning_rate": 4.263911514718222e-08, "loss": 22.4178, "step": 481280 }, { "epoch": 0.9722362504393638, "grad_norm": 756.0543212890625, "learning_rate": 4.259363747544931e-08, "loss": 24.6007, "step": 481290 }, { "epoch": 0.9722564510720476, "grad_norm": 299.47601318359375, "learning_rate": 4.2548183965513415e-08, "loss": 18.0124, "step": 481300 }, { "epoch": 0.9722766517047314, "grad_norm": 165.666015625, "learning_rate": 4.250275461759712e-08, "loss": 19.1593, "step": 481310 }, { "epoch": 0.9722968523374153, "grad_norm": 210.62261962890625, "learning_rate": 4.245734943192081e-08, "loss": 13.7901, "step": 481320 }, { "epoch": 0.9723170529700991, "grad_norm": 216.73516845703125, "learning_rate": 4.241196840870598e-08, "loss": 14.5709, "step": 481330 }, { "epoch": 0.9723372536027829, "grad_norm": 185.23233032226562, "learning_rate": 4.236661154817412e-08, "loss": 6.3514, "step": 481340 }, { "epoch": 0.9723574542354666, "grad_norm": 309.74249267578125, "learning_rate": 4.23212788505456e-08, "loss": 16.0866, "step": 481350 }, { "epoch": 0.9723776548681504, "grad_norm": 250.5777130126953, "learning_rate": 4.227597031604247e-08, "loss": 12.1945, "step": 481360 }, { "epoch": 0.9723978555008342, "grad_norm": 496.88690185546875, "learning_rate": 4.2230685944884554e-08, "loss": 31.1498, "step": 481370 }, { "epoch": 0.9724180561335181, "grad_norm": 275.16107177734375, "learning_rate": 4.218542573729334e-08, "loss": 19.4856, "step": 481380 }, { "epoch": 0.9724382567662019, "grad_norm": 202.57310485839844, "learning_rate": 4.2140189693488654e-08, "loss": 28.5615, "step": 481390 }, { "epoch": 0.9724584573988857, "grad_norm": 298.203369140625, "learning_rate": 4.209497781369143e-08, "loss": 6.3217, "step": 481400 }, { "epoch": 0.9724786580315695, "grad_norm": 309.79852294921875, "learning_rate": 4.20497900981226e-08, "loss": 19.4717, "step": 481410 }, { "epoch": 0.9724988586642533, "grad_norm": 44.44679260253906, "learning_rate": 4.2004626547000885e-08, "loss": 12.0639, "step": 481420 }, { "epoch": 0.9725190592969372, "grad_norm": 304.7922668457031, "learning_rate": 4.195948716054776e-08, "loss": 17.1214, "step": 481430 }, { "epoch": 0.972539259929621, "grad_norm": 404.0693359375, "learning_rate": 4.191437193898251e-08, "loss": 19.0104, "step": 481440 }, { "epoch": 0.9725594605623048, "grad_norm": 262.6856689453125, "learning_rate": 4.1869280882525506e-08, "loss": 34.2173, "step": 481450 }, { "epoch": 0.9725796611949886, "grad_norm": 88.3481674194336, "learning_rate": 4.1824213991396024e-08, "loss": 10.9159, "step": 481460 }, { "epoch": 0.9725998618276724, "grad_norm": 86.42558288574219, "learning_rate": 4.1779171265814435e-08, "loss": 10.9482, "step": 481470 }, { "epoch": 0.9726200624603563, "grad_norm": 194.77195739746094, "learning_rate": 4.173415270599945e-08, "loss": 21.5173, "step": 481480 }, { "epoch": 0.9726402630930401, "grad_norm": 155.04457092285156, "learning_rate": 4.168915831217091e-08, "loss": 14.087, "step": 481490 }, { "epoch": 0.9726604637257239, "grad_norm": 245.21212768554688, "learning_rate": 4.164418808454806e-08, "loss": 13.5881, "step": 481500 }, { "epoch": 0.9726806643584077, "grad_norm": 213.82957458496094, "learning_rate": 4.159924202334964e-08, "loss": 21.7162, "step": 481510 }, { "epoch": 0.9727008649910915, "grad_norm": 65.3963851928711, "learning_rate": 4.1554320128795455e-08, "loss": 13.6899, "step": 481520 }, { "epoch": 0.9727210656237754, "grad_norm": 262.2989501953125, "learning_rate": 4.150942240110478e-08, "loss": 12.5275, "step": 481530 }, { "epoch": 0.9727412662564592, "grad_norm": 379.6789245605469, "learning_rate": 4.146454884049467e-08, "loss": 22.427, "step": 481540 }, { "epoch": 0.972761466889143, "grad_norm": 369.7518615722656, "learning_rate": 4.1419699447186045e-08, "loss": 53.0007, "step": 481550 }, { "epoch": 0.9727816675218268, "grad_norm": 351.9305114746094, "learning_rate": 4.137487422139541e-08, "loss": 24.0868, "step": 481560 }, { "epoch": 0.9728018681545106, "grad_norm": 163.4134521484375, "learning_rate": 4.133007316334259e-08, "loss": 14.4129, "step": 481570 }, { "epoch": 0.9728220687871945, "grad_norm": 90.66841888427734, "learning_rate": 4.128529627324573e-08, "loss": 18.7434, "step": 481580 }, { "epoch": 0.9728422694198783, "grad_norm": 265.9713134765625, "learning_rate": 4.124054355132301e-08, "loss": 12.7847, "step": 481590 }, { "epoch": 0.9728624700525621, "grad_norm": 357.07513427734375, "learning_rate": 4.1195814997792014e-08, "loss": 11.0891, "step": 481600 }, { "epoch": 0.9728826706852458, "grad_norm": 363.5975646972656, "learning_rate": 4.1151110612872023e-08, "loss": 18.9386, "step": 481610 }, { "epoch": 0.9729028713179296, "grad_norm": 348.9988708496094, "learning_rate": 4.1106430396778974e-08, "loss": 29.8349, "step": 481620 }, { "epoch": 0.9729230719506134, "grad_norm": 173.11012268066406, "learning_rate": 4.1061774349732686e-08, "loss": 11.3092, "step": 481630 }, { "epoch": 0.9729432725832973, "grad_norm": 240.25750732421875, "learning_rate": 4.10171424719491e-08, "loss": 23.8165, "step": 481640 }, { "epoch": 0.9729634732159811, "grad_norm": 191.71826171875, "learning_rate": 4.097253476364693e-08, "loss": 34.259, "step": 481650 }, { "epoch": 0.9729836738486649, "grad_norm": 164.4589385986328, "learning_rate": 4.092795122504323e-08, "loss": 17.5541, "step": 481660 }, { "epoch": 0.9730038744813487, "grad_norm": 423.4584045410156, "learning_rate": 4.088339185635504e-08, "loss": 13.9292, "step": 481670 }, { "epoch": 0.9730240751140325, "grad_norm": 274.4292297363281, "learning_rate": 4.083885665779996e-08, "loss": 22.5548, "step": 481680 }, { "epoch": 0.9730442757467164, "grad_norm": 42.46436309814453, "learning_rate": 4.07943456295945e-08, "loss": 25.2314, "step": 481690 }, { "epoch": 0.9730644763794002, "grad_norm": 12.420757293701172, "learning_rate": 4.0749858771956253e-08, "loss": 11.2472, "step": 481700 }, { "epoch": 0.973084677012084, "grad_norm": 459.44866943359375, "learning_rate": 4.070539608510171e-08, "loss": 26.4822, "step": 481710 }, { "epoch": 0.9731048776447678, "grad_norm": 560.3666381835938, "learning_rate": 4.066095756924682e-08, "loss": 19.9428, "step": 481720 }, { "epoch": 0.9731250782774516, "grad_norm": 41.60747528076172, "learning_rate": 4.061654322460973e-08, "loss": 19.0391, "step": 481730 }, { "epoch": 0.9731452789101355, "grad_norm": 78.79512786865234, "learning_rate": 4.0572153051406383e-08, "loss": 15.3612, "step": 481740 }, { "epoch": 0.9731654795428193, "grad_norm": 173.41709899902344, "learning_rate": 4.052778704985216e-08, "loss": 27.3176, "step": 481750 }, { "epoch": 0.9731856801755031, "grad_norm": 292.3283996582031, "learning_rate": 4.048344522016356e-08, "loss": 22.1398, "step": 481760 }, { "epoch": 0.9732058808081869, "grad_norm": 491.2066650390625, "learning_rate": 4.043912756255819e-08, "loss": 19.1171, "step": 481770 }, { "epoch": 0.9732260814408707, "grad_norm": 164.1918487548828, "learning_rate": 4.039483407725031e-08, "loss": 16.1173, "step": 481780 }, { "epoch": 0.9732462820735546, "grad_norm": 205.3018035888672, "learning_rate": 4.035056476445698e-08, "loss": 25.6681, "step": 481790 }, { "epoch": 0.9732664827062384, "grad_norm": 479.06378173828125, "learning_rate": 4.030631962439302e-08, "loss": 13.227, "step": 481800 }, { "epoch": 0.9732866833389222, "grad_norm": 158.11305236816406, "learning_rate": 4.026209865727493e-08, "loss": 15.4445, "step": 481810 }, { "epoch": 0.973306883971606, "grad_norm": 41.378910064697266, "learning_rate": 4.0217901863317534e-08, "loss": 12.1698, "step": 481820 }, { "epoch": 0.9733270846042898, "grad_norm": 473.0097961425781, "learning_rate": 4.017372924273621e-08, "loss": 24.0611, "step": 481830 }, { "epoch": 0.9733472852369737, "grad_norm": 71.91669464111328, "learning_rate": 4.012958079574747e-08, "loss": 17.8506, "step": 481840 }, { "epoch": 0.9733674858696575, "grad_norm": 120.09349060058594, "learning_rate": 4.008545652256502e-08, "loss": 15.7919, "step": 481850 }, { "epoch": 0.9733876865023412, "grad_norm": 81.92552947998047, "learning_rate": 4.004135642340423e-08, "loss": 15.3709, "step": 481860 }, { "epoch": 0.973407887135025, "grad_norm": 131.71946716308594, "learning_rate": 3.999728049848106e-08, "loss": 12.4301, "step": 481870 }, { "epoch": 0.9734280877677088, "grad_norm": 386.7486267089844, "learning_rate": 3.995322874800922e-08, "loss": 25.5857, "step": 481880 }, { "epoch": 0.9734482884003927, "grad_norm": 182.57400512695312, "learning_rate": 3.9909201172203537e-08, "loss": 16.8112, "step": 481890 }, { "epoch": 0.9734684890330765, "grad_norm": 60.308738708496094, "learning_rate": 3.986519777127884e-08, "loss": 9.5926, "step": 481900 }, { "epoch": 0.9734886896657603, "grad_norm": 39.768798828125, "learning_rate": 3.9821218545449956e-08, "loss": 9.1046, "step": 481910 }, { "epoch": 0.9735088902984441, "grad_norm": 139.36248779296875, "learning_rate": 3.977726349493061e-08, "loss": 17.739, "step": 481920 }, { "epoch": 0.9735290909311279, "grad_norm": 206.23165893554688, "learning_rate": 3.973333261993506e-08, "loss": 30.9089, "step": 481930 }, { "epoch": 0.9735492915638118, "grad_norm": 3.162886381149292, "learning_rate": 3.9689425920678146e-08, "loss": 31.4828, "step": 481940 }, { "epoch": 0.9735694921964956, "grad_norm": 146.00070190429688, "learning_rate": 3.964554339737303e-08, "loss": 43.0, "step": 481950 }, { "epoch": 0.9735896928291794, "grad_norm": 696.09912109375, "learning_rate": 3.960168505023343e-08, "loss": 30.9772, "step": 481960 }, { "epoch": 0.9736098934618632, "grad_norm": 657.7053833007812, "learning_rate": 3.955785087947473e-08, "loss": 16.9947, "step": 481970 }, { "epoch": 0.973630094094547, "grad_norm": 394.7059020996094, "learning_rate": 3.951404088530841e-08, "loss": 20.1505, "step": 481980 }, { "epoch": 0.9736502947272309, "grad_norm": 224.27499389648438, "learning_rate": 3.947025506794933e-08, "loss": 24.3254, "step": 481990 }, { "epoch": 0.9736704953599147, "grad_norm": 219.9850311279297, "learning_rate": 3.9426493427611177e-08, "loss": 8.4789, "step": 482000 }, { "epoch": 0.9736906959925985, "grad_norm": 308.3173828125, "learning_rate": 3.938275596450603e-08, "loss": 19.5833, "step": 482010 }, { "epoch": 0.9737108966252823, "grad_norm": 520.0945434570312, "learning_rate": 3.933904267884758e-08, "loss": 14.8363, "step": 482020 }, { "epoch": 0.9737310972579661, "grad_norm": 293.06292724609375, "learning_rate": 3.929535357084957e-08, "loss": 29.0102, "step": 482030 }, { "epoch": 0.97375129789065, "grad_norm": 195.4599151611328, "learning_rate": 3.925168864072348e-08, "loss": 12.4343, "step": 482040 }, { "epoch": 0.9737714985233338, "grad_norm": 312.1951599121094, "learning_rate": 3.9208047888683597e-08, "loss": 7.9112, "step": 482050 }, { "epoch": 0.9737916991560176, "grad_norm": 153.21640014648438, "learning_rate": 3.9164431314941965e-08, "loss": 18.9792, "step": 482060 }, { "epoch": 0.9738118997887014, "grad_norm": 307.2727355957031, "learning_rate": 3.912083891971119e-08, "loss": 13.3604, "step": 482070 }, { "epoch": 0.9738321004213852, "grad_norm": 439.34478759765625, "learning_rate": 3.907727070320389e-08, "loss": 16.1297, "step": 482080 }, { "epoch": 0.9738523010540691, "grad_norm": 197.19056701660156, "learning_rate": 3.9033726665632096e-08, "loss": 10.431, "step": 482090 }, { "epoch": 0.9738725016867529, "grad_norm": 474.0113830566406, "learning_rate": 3.899020680720844e-08, "loss": 18.9224, "step": 482100 }, { "epoch": 0.9738927023194367, "grad_norm": 226.4248504638672, "learning_rate": 3.894671112814441e-08, "loss": 12.1161, "step": 482110 }, { "epoch": 0.9739129029521204, "grad_norm": 391.0127868652344, "learning_rate": 3.8903239628652615e-08, "loss": 12.1335, "step": 482120 }, { "epoch": 0.9739331035848042, "grad_norm": 180.11514282226562, "learning_rate": 3.88597923089451e-08, "loss": 18.9657, "step": 482130 }, { "epoch": 0.973953304217488, "grad_norm": 699.8246459960938, "learning_rate": 3.881636916923281e-08, "loss": 19.0863, "step": 482140 }, { "epoch": 0.9739735048501719, "grad_norm": 391.5164794921875, "learning_rate": 3.877297020972781e-08, "loss": 24.0634, "step": 482150 }, { "epoch": 0.9739937054828557, "grad_norm": 138.33474731445312, "learning_rate": 3.8729595430641586e-08, "loss": 17.5885, "step": 482160 }, { "epoch": 0.9740139061155395, "grad_norm": 428.3031921386719, "learning_rate": 3.868624483218619e-08, "loss": 25.3389, "step": 482170 }, { "epoch": 0.9740341067482233, "grad_norm": 43.16802978515625, "learning_rate": 3.864291841457146e-08, "loss": 21.3216, "step": 482180 }, { "epoch": 0.9740543073809071, "grad_norm": 304.3694152832031, "learning_rate": 3.859961617801e-08, "loss": 16.826, "step": 482190 }, { "epoch": 0.974074508013591, "grad_norm": 891.7839965820312, "learning_rate": 3.855633812271165e-08, "loss": 22.545, "step": 482200 }, { "epoch": 0.9740947086462748, "grad_norm": 354.1850891113281, "learning_rate": 3.8513084248888445e-08, "loss": 19.9415, "step": 482210 }, { "epoch": 0.9741149092789586, "grad_norm": 306.1986389160156, "learning_rate": 3.8469854556750785e-08, "loss": 15.7159, "step": 482220 }, { "epoch": 0.9741351099116424, "grad_norm": 362.8030090332031, "learning_rate": 3.842664904650906e-08, "loss": 17.7414, "step": 482230 }, { "epoch": 0.9741553105443262, "grad_norm": 280.6147155761719, "learning_rate": 3.83834677183742e-08, "loss": 28.3863, "step": 482240 }, { "epoch": 0.9741755111770101, "grad_norm": 64.55226135253906, "learning_rate": 3.83403105725566e-08, "loss": 11.1159, "step": 482250 }, { "epoch": 0.9741957118096939, "grad_norm": 270.72265625, "learning_rate": 3.82971776092661e-08, "loss": 16.8554, "step": 482260 }, { "epoch": 0.9742159124423777, "grad_norm": 394.1881103515625, "learning_rate": 3.825406882871363e-08, "loss": 12.6277, "step": 482270 }, { "epoch": 0.9742361130750615, "grad_norm": 564.988525390625, "learning_rate": 3.8210984231109583e-08, "loss": 16.2614, "step": 482280 }, { "epoch": 0.9742563137077453, "grad_norm": 417.332275390625, "learning_rate": 3.816792381666268e-08, "loss": 11.6937, "step": 482290 }, { "epoch": 0.9742765143404292, "grad_norm": 386.9827880859375, "learning_rate": 3.812488758558386e-08, "loss": 38.7971, "step": 482300 }, { "epoch": 0.974296714973113, "grad_norm": 383.1602478027344, "learning_rate": 3.8081875538082404e-08, "loss": 18.8922, "step": 482310 }, { "epoch": 0.9743169156057968, "grad_norm": 426.4156188964844, "learning_rate": 3.8038887674368697e-08, "loss": 17.9166, "step": 482320 }, { "epoch": 0.9743371162384806, "grad_norm": 380.5213928222656, "learning_rate": 3.799592399465091e-08, "loss": 16.2061, "step": 482330 }, { "epoch": 0.9743573168711644, "grad_norm": 279.58721923828125, "learning_rate": 3.7952984499138864e-08, "loss": 20.234, "step": 482340 }, { "epoch": 0.9743775175038483, "grad_norm": 25.954132080078125, "learning_rate": 3.791006918804296e-08, "loss": 16.3434, "step": 482350 }, { "epoch": 0.9743977181365321, "grad_norm": 141.3274383544922, "learning_rate": 3.786717806157136e-08, "loss": 17.5097, "step": 482360 }, { "epoch": 0.9744179187692159, "grad_norm": 140.3556671142578, "learning_rate": 3.782431111993279e-08, "loss": 25.5609, "step": 482370 }, { "epoch": 0.9744381194018996, "grad_norm": 329.6993408203125, "learning_rate": 3.778146836333707e-08, "loss": 18.3772, "step": 482380 }, { "epoch": 0.9744583200345834, "grad_norm": 277.1619567871094, "learning_rate": 3.7738649791992934e-08, "loss": 16.9295, "step": 482390 }, { "epoch": 0.9744785206672673, "grad_norm": 514.8152465820312, "learning_rate": 3.769585540610799e-08, "loss": 24.5608, "step": 482400 }, { "epoch": 0.9744987212999511, "grad_norm": 675.7714233398438, "learning_rate": 3.765308520589206e-08, "loss": 59.0566, "step": 482410 }, { "epoch": 0.9745189219326349, "grad_norm": 189.63316345214844, "learning_rate": 3.761033919155333e-08, "loss": 17.8879, "step": 482420 }, { "epoch": 0.9745391225653187, "grad_norm": 252.22926330566406, "learning_rate": 3.7567617363299945e-08, "loss": 18.0837, "step": 482430 }, { "epoch": 0.9745593231980025, "grad_norm": 249.82164001464844, "learning_rate": 3.7524919721339535e-08, "loss": 15.6312, "step": 482440 }, { "epoch": 0.9745795238306864, "grad_norm": 305.08734130859375, "learning_rate": 3.748224626588137e-08, "loss": 24.5202, "step": 482450 }, { "epoch": 0.9745997244633702, "grad_norm": 464.5709533691406, "learning_rate": 3.743959699713251e-08, "loss": 42.0495, "step": 482460 }, { "epoch": 0.974619925096054, "grad_norm": 447.7132568359375, "learning_rate": 3.739697191530112e-08, "loss": 20.2405, "step": 482470 }, { "epoch": 0.9746401257287378, "grad_norm": 107.67266845703125, "learning_rate": 3.735437102059536e-08, "loss": 14.533, "step": 482480 }, { "epoch": 0.9746603263614216, "grad_norm": 549.6131591796875, "learning_rate": 3.731179431322285e-08, "loss": 22.1725, "step": 482490 }, { "epoch": 0.9746805269941055, "grad_norm": 124.27351379394531, "learning_rate": 3.726924179339009e-08, "loss": 15.0744, "step": 482500 }, { "epoch": 0.9747007276267893, "grad_norm": 82.30006408691406, "learning_rate": 3.7226713461305245e-08, "loss": 24.8883, "step": 482510 }, { "epoch": 0.9747209282594731, "grad_norm": 521.1300048828125, "learning_rate": 3.7184209317175366e-08, "loss": 35.9421, "step": 482520 }, { "epoch": 0.9747411288921569, "grad_norm": 336.57330322265625, "learning_rate": 3.714172936120808e-08, "loss": 20.7084, "step": 482530 }, { "epoch": 0.9747613295248407, "grad_norm": 152.64459228515625, "learning_rate": 3.7099273593609316e-08, "loss": 13.1511, "step": 482540 }, { "epoch": 0.9747815301575246, "grad_norm": 277.96343994140625, "learning_rate": 3.7056842014587815e-08, "loss": 22.9116, "step": 482550 }, { "epoch": 0.9748017307902084, "grad_norm": 312.4891357421875, "learning_rate": 3.701443462434895e-08, "loss": 9.7763, "step": 482560 }, { "epoch": 0.9748219314228922, "grad_norm": 162.68878173828125, "learning_rate": 3.697205142309923e-08, "loss": 20.6659, "step": 482570 }, { "epoch": 0.974842132055576, "grad_norm": 412.9062805175781, "learning_rate": 3.692969241104683e-08, "loss": 14.0268, "step": 482580 }, { "epoch": 0.9748623326882598, "grad_norm": 354.30987548828125, "learning_rate": 3.688735758839601e-08, "loss": 8.6916, "step": 482590 }, { "epoch": 0.9748825333209437, "grad_norm": 423.56805419921875, "learning_rate": 3.684504695535496e-08, "loss": 20.0358, "step": 482600 }, { "epoch": 0.9749027339536275, "grad_norm": 550.2533569335938, "learning_rate": 3.680276051212961e-08, "loss": 20.1011, "step": 482610 }, { "epoch": 0.9749229345863113, "grad_norm": 139.30455017089844, "learning_rate": 3.67604982589248e-08, "loss": 16.5859, "step": 482620 }, { "epoch": 0.974943135218995, "grad_norm": 63.412986755371094, "learning_rate": 3.6718260195947594e-08, "loss": 8.6873, "step": 482630 }, { "epoch": 0.9749633358516788, "grad_norm": 589.2445068359375, "learning_rate": 3.6676046323403934e-08, "loss": 24.3236, "step": 482640 }, { "epoch": 0.9749835364843626, "grad_norm": 531.0650634765625, "learning_rate": 3.663385664149866e-08, "loss": 22.7496, "step": 482650 }, { "epoch": 0.9750037371170465, "grad_norm": 83.04975891113281, "learning_rate": 3.659169115043826e-08, "loss": 16.2014, "step": 482660 }, { "epoch": 0.9750239377497303, "grad_norm": 270.0564270019531, "learning_rate": 3.654954985042869e-08, "loss": 17.2266, "step": 482670 }, { "epoch": 0.9750441383824141, "grad_norm": 254.4864501953125, "learning_rate": 3.650743274167368e-08, "loss": 11.1291, "step": 482680 }, { "epoch": 0.9750643390150979, "grad_norm": 194.707275390625, "learning_rate": 3.6465339824379165e-08, "loss": 17.9651, "step": 482690 }, { "epoch": 0.9750845396477817, "grad_norm": 518.6030883789062, "learning_rate": 3.642327109875166e-08, "loss": 25.6451, "step": 482700 }, { "epoch": 0.9751047402804656, "grad_norm": 322.35125732421875, "learning_rate": 3.638122656499432e-08, "loss": 19.7594, "step": 482710 }, { "epoch": 0.9751249409131494, "grad_norm": 366.0865783691406, "learning_rate": 3.633920622331311e-08, "loss": 22.1807, "step": 482720 }, { "epoch": 0.9751451415458332, "grad_norm": 35.25457763671875, "learning_rate": 3.629721007391229e-08, "loss": 25.9606, "step": 482730 }, { "epoch": 0.975165342178517, "grad_norm": 201.05673217773438, "learning_rate": 3.625523811699727e-08, "loss": 14.9553, "step": 482740 }, { "epoch": 0.9751855428112008, "grad_norm": 395.165283203125, "learning_rate": 3.621329035277232e-08, "loss": 12.1448, "step": 482750 }, { "epoch": 0.9752057434438847, "grad_norm": 15.634329795837402, "learning_rate": 3.617136678144173e-08, "loss": 14.2171, "step": 482760 }, { "epoch": 0.9752259440765685, "grad_norm": 799.6106567382812, "learning_rate": 3.612946740320977e-08, "loss": 23.4363, "step": 482770 }, { "epoch": 0.9752461447092523, "grad_norm": 428.73382568359375, "learning_rate": 3.608759221828073e-08, "loss": 19.0898, "step": 482780 }, { "epoch": 0.9752663453419361, "grad_norm": 378.638671875, "learning_rate": 3.604574122685833e-08, "loss": 21.3059, "step": 482790 }, { "epoch": 0.97528654597462, "grad_norm": 383.9864807128906, "learning_rate": 3.600391442914741e-08, "loss": 17.5286, "step": 482800 }, { "epoch": 0.9753067466073038, "grad_norm": 409.30499267578125, "learning_rate": 3.5962111825350585e-08, "loss": 20.0298, "step": 482810 }, { "epoch": 0.9753269472399876, "grad_norm": 84.33053588867188, "learning_rate": 3.592033341567325e-08, "loss": 12.0383, "step": 482820 }, { "epoch": 0.9753471478726714, "grad_norm": 243.53125, "learning_rate": 3.5878579200318006e-08, "loss": 26.7664, "step": 482830 }, { "epoch": 0.9753673485053552, "grad_norm": 306.3808898925781, "learning_rate": 3.583684917948804e-08, "loss": 19.9252, "step": 482840 }, { "epoch": 0.975387549138039, "grad_norm": 224.66900634765625, "learning_rate": 3.579514335338763e-08, "loss": 19.7794, "step": 482850 }, { "epoch": 0.9754077497707229, "grad_norm": 243.73361206054688, "learning_rate": 3.575346172221939e-08, "loss": 16.3464, "step": 482860 }, { "epoch": 0.9754279504034067, "grad_norm": 265.2950439453125, "learning_rate": 3.5711804286187035e-08, "loss": 7.8652, "step": 482870 }, { "epoch": 0.9754481510360905, "grad_norm": 117.79085540771484, "learning_rate": 3.5670171045492643e-08, "loss": 9.7502, "step": 482880 }, { "epoch": 0.9754683516687742, "grad_norm": 1.972617506980896, "learning_rate": 3.5628562000339925e-08, "loss": 12.1673, "step": 482890 }, { "epoch": 0.975488552301458, "grad_norm": 28.19820213317871, "learning_rate": 3.558697715093207e-08, "loss": 20.6444, "step": 482900 }, { "epoch": 0.9755087529341419, "grad_norm": 151.6971435546875, "learning_rate": 3.554541649747056e-08, "loss": 20.8036, "step": 482910 }, { "epoch": 0.9755289535668257, "grad_norm": 152.75494384765625, "learning_rate": 3.5503880040158586e-08, "loss": 19.0316, "step": 482920 }, { "epoch": 0.9755491541995095, "grad_norm": 201.63723754882812, "learning_rate": 3.546236777919876e-08, "loss": 9.6726, "step": 482930 }, { "epoch": 0.9755693548321933, "grad_norm": 138.88731384277344, "learning_rate": 3.542087971479313e-08, "loss": 11.0982, "step": 482940 }, { "epoch": 0.9755895554648771, "grad_norm": 332.2503662109375, "learning_rate": 3.5379415847143775e-08, "loss": 16.3735, "step": 482950 }, { "epoch": 0.975609756097561, "grad_norm": 223.29818725585938, "learning_rate": 3.5337976176453845e-08, "loss": 14.105, "step": 482960 }, { "epoch": 0.9756299567302448, "grad_norm": 162.20608520507812, "learning_rate": 3.529656070292375e-08, "loss": 16.5648, "step": 482970 }, { "epoch": 0.9756501573629286, "grad_norm": 467.5368347167969, "learning_rate": 3.525516942675611e-08, "loss": 24.0874, "step": 482980 }, { "epoch": 0.9756703579956124, "grad_norm": 158.80160522460938, "learning_rate": 3.521380234815297e-08, "loss": 28.2517, "step": 482990 }, { "epoch": 0.9756905586282962, "grad_norm": 111.56636047363281, "learning_rate": 3.517245946731529e-08, "loss": 33.4182, "step": 483000 }, { "epoch": 0.97571075926098, "grad_norm": 202.2498321533203, "learning_rate": 3.513114078444513e-08, "loss": 6.0337, "step": 483010 }, { "epoch": 0.9757309598936639, "grad_norm": 439.6343688964844, "learning_rate": 3.508984629974288e-08, "loss": 22.1437, "step": 483020 }, { "epoch": 0.9757511605263477, "grad_norm": 589.834228515625, "learning_rate": 3.504857601341172e-08, "loss": 14.595, "step": 483030 }, { "epoch": 0.9757713611590315, "grad_norm": 25.5278377532959, "learning_rate": 3.5007329925650925e-08, "loss": 45.8825, "step": 483040 }, { "epoch": 0.9757915617917153, "grad_norm": 383.9505920410156, "learning_rate": 3.4966108036662006e-08, "loss": 10.8613, "step": 483050 }, { "epoch": 0.9758117624243992, "grad_norm": 198.6356201171875, "learning_rate": 3.4924910346647024e-08, "loss": 16.7055, "step": 483060 }, { "epoch": 0.975831963057083, "grad_norm": 397.1332092285156, "learning_rate": 3.488373685580526e-08, "loss": 17.8031, "step": 483070 }, { "epoch": 0.9758521636897668, "grad_norm": 426.4501647949219, "learning_rate": 3.4842587564337674e-08, "loss": 9.4932, "step": 483080 }, { "epoch": 0.9758723643224506, "grad_norm": 208.44989013671875, "learning_rate": 3.48014624724452e-08, "loss": 9.9579, "step": 483090 }, { "epoch": 0.9758925649551344, "grad_norm": 3.690246343612671, "learning_rate": 3.47603615803288e-08, "loss": 18.2543, "step": 483100 }, { "epoch": 0.9759127655878183, "grad_norm": 264.77227783203125, "learning_rate": 3.471928488818776e-08, "loss": 9.9958, "step": 483110 }, { "epoch": 0.9759329662205021, "grad_norm": 3.3299927711486816, "learning_rate": 3.467823239622248e-08, "loss": 17.3776, "step": 483120 }, { "epoch": 0.9759531668531859, "grad_norm": 327.7098388671875, "learning_rate": 3.463720410463334e-08, "loss": 24.4381, "step": 483130 }, { "epoch": 0.9759733674858696, "grad_norm": 290.3511962890625, "learning_rate": 3.459620001362074e-08, "loss": 15.9041, "step": 483140 }, { "epoch": 0.9759935681185534, "grad_norm": 330.9452819824219, "learning_rate": 3.4555220123383416e-08, "loss": 12.4754, "step": 483150 }, { "epoch": 0.9760137687512372, "grad_norm": 116.95169067382812, "learning_rate": 3.451426443412231e-08, "loss": 7.8998, "step": 483160 }, { "epoch": 0.9760339693839211, "grad_norm": 242.63372802734375, "learning_rate": 3.4473332946036164e-08, "loss": 20.3625, "step": 483170 }, { "epoch": 0.9760541700166049, "grad_norm": 418.365966796875, "learning_rate": 3.443242565932481e-08, "loss": 18.6768, "step": 483180 }, { "epoch": 0.9760743706492887, "grad_norm": 186.45838928222656, "learning_rate": 3.439154257418753e-08, "loss": 14.3481, "step": 483190 }, { "epoch": 0.9760945712819725, "grad_norm": 366.9158935546875, "learning_rate": 3.435068369082306e-08, "loss": 9.6534, "step": 483200 }, { "epoch": 0.9761147719146563, "grad_norm": 214.03306579589844, "learning_rate": 3.4309849009431794e-08, "loss": 21.3093, "step": 483210 }, { "epoch": 0.9761349725473402, "grad_norm": 165.76834106445312, "learning_rate": 3.4269038530211906e-08, "loss": 14.8489, "step": 483220 }, { "epoch": 0.976155173180024, "grad_norm": 163.30471801757812, "learning_rate": 3.4228252253362683e-08, "loss": 12.1658, "step": 483230 }, { "epoch": 0.9761753738127078, "grad_norm": 0.0, "learning_rate": 3.41874901790823e-08, "loss": 20.5138, "step": 483240 }, { "epoch": 0.9761955744453916, "grad_norm": 317.5463562011719, "learning_rate": 3.414675230757003e-08, "loss": 12.4586, "step": 483250 }, { "epoch": 0.9762157750780754, "grad_norm": 253.27920532226562, "learning_rate": 3.410603863902406e-08, "loss": 19.113, "step": 483260 }, { "epoch": 0.9762359757107593, "grad_norm": 36.075157165527344, "learning_rate": 3.406534917364257e-08, "loss": 12.2031, "step": 483270 }, { "epoch": 0.9762561763434431, "grad_norm": 303.51995849609375, "learning_rate": 3.402468391162539e-08, "loss": 16.7368, "step": 483280 }, { "epoch": 0.9762763769761269, "grad_norm": 268.71087646484375, "learning_rate": 3.398404285316847e-08, "loss": 8.2577, "step": 483290 }, { "epoch": 0.9762965776088107, "grad_norm": 149.17491149902344, "learning_rate": 3.394342599847111e-08, "loss": 13.4999, "step": 483300 }, { "epoch": 0.9763167782414945, "grad_norm": 492.189208984375, "learning_rate": 3.390283334773203e-08, "loss": 19.87, "step": 483310 }, { "epoch": 0.9763369788741784, "grad_norm": 318.0566711425781, "learning_rate": 3.3862264901147745e-08, "loss": 26.1718, "step": 483320 }, { "epoch": 0.9763571795068622, "grad_norm": 219.8817901611328, "learning_rate": 3.3821720658916426e-08, "loss": 6.3677, "step": 483330 }, { "epoch": 0.976377380139546, "grad_norm": 5.527459621429443, "learning_rate": 3.378120062123569e-08, "loss": 11.1907, "step": 483340 }, { "epoch": 0.9763975807722298, "grad_norm": 123.34063720703125, "learning_rate": 3.374070478830316e-08, "loss": 11.3013, "step": 483350 }, { "epoch": 0.9764177814049136, "grad_norm": 655.8611450195312, "learning_rate": 3.3700233160315897e-08, "loss": 13.955, "step": 483360 }, { "epoch": 0.9764379820375975, "grad_norm": 287.165771484375, "learning_rate": 3.365978573747153e-08, "loss": 29.42, "step": 483370 }, { "epoch": 0.9764581826702813, "grad_norm": 611.0186157226562, "learning_rate": 3.361936251996711e-08, "loss": 27.2618, "step": 483380 }, { "epoch": 0.9764783833029651, "grad_norm": 84.76065826416016, "learning_rate": 3.357896350799916e-08, "loss": 14.0287, "step": 483390 }, { "epoch": 0.9764985839356488, "grad_norm": 339.7453918457031, "learning_rate": 3.3538588701765296e-08, "loss": 13.6125, "step": 483400 }, { "epoch": 0.9765187845683326, "grad_norm": 176.95620727539062, "learning_rate": 3.349823810146202e-08, "loss": 8.7821, "step": 483410 }, { "epoch": 0.9765389852010165, "grad_norm": 102.81352233886719, "learning_rate": 3.34579117072864e-08, "loss": 15.8232, "step": 483420 }, { "epoch": 0.9765591858337003, "grad_norm": 199.88491821289062, "learning_rate": 3.341760951943385e-08, "loss": 19.5629, "step": 483430 }, { "epoch": 0.9765793864663841, "grad_norm": 346.9388427734375, "learning_rate": 3.337733153810141e-08, "loss": 16.255, "step": 483440 }, { "epoch": 0.9765995870990679, "grad_norm": 196.13929748535156, "learning_rate": 3.3337077763485605e-08, "loss": 23.9133, "step": 483450 }, { "epoch": 0.9766197877317517, "grad_norm": 377.25042724609375, "learning_rate": 3.329684819578294e-08, "loss": 24.0601, "step": 483460 }, { "epoch": 0.9766399883644356, "grad_norm": 276.38189697265625, "learning_rate": 3.3256642835188816e-08, "loss": 15.3663, "step": 483470 }, { "epoch": 0.9766601889971194, "grad_norm": 101.42993927001953, "learning_rate": 3.321646168189918e-08, "loss": 8.4814, "step": 483480 }, { "epoch": 0.9766803896298032, "grad_norm": 300.20281982421875, "learning_rate": 3.317630473611055e-08, "loss": 25.3813, "step": 483490 }, { "epoch": 0.976700590262487, "grad_norm": 5.163257122039795, "learning_rate": 3.313617199801777e-08, "loss": 15.0118, "step": 483500 }, { "epoch": 0.9767207908951708, "grad_norm": 482.6654968261719, "learning_rate": 3.309606346781735e-08, "loss": 15.7841, "step": 483510 }, { "epoch": 0.9767409915278547, "grad_norm": 362.0157470703125, "learning_rate": 3.305597914570413e-08, "loss": 14.2689, "step": 483520 }, { "epoch": 0.9767611921605385, "grad_norm": 400.5704650878906, "learning_rate": 3.301591903187351e-08, "loss": 13.4012, "step": 483530 }, { "epoch": 0.9767813927932223, "grad_norm": 249.0880889892578, "learning_rate": 3.297588312652089e-08, "loss": 22.4317, "step": 483540 }, { "epoch": 0.9768015934259061, "grad_norm": 182.11021423339844, "learning_rate": 3.2935871429841116e-08, "loss": 15.1609, "step": 483550 }, { "epoch": 0.9768217940585899, "grad_norm": 5.8800554275512695, "learning_rate": 3.289588394203014e-08, "loss": 6.4429, "step": 483560 }, { "epoch": 0.9768419946912738, "grad_norm": 215.13284301757812, "learning_rate": 3.285592066328169e-08, "loss": 14.1888, "step": 483570 }, { "epoch": 0.9768621953239576, "grad_norm": 642.272216796875, "learning_rate": 3.281598159379118e-08, "loss": 12.5785, "step": 483580 }, { "epoch": 0.9768823959566414, "grad_norm": 282.8641052246094, "learning_rate": 3.277606673375289e-08, "loss": 15.3044, "step": 483590 }, { "epoch": 0.9769025965893252, "grad_norm": 169.6785125732422, "learning_rate": 3.2736176083362216e-08, "loss": 18.1839, "step": 483600 }, { "epoch": 0.976922797222009, "grad_norm": 431.96875, "learning_rate": 3.2696309642812344e-08, "loss": 13.4903, "step": 483610 }, { "epoch": 0.9769429978546929, "grad_norm": 76.10205078125, "learning_rate": 3.2656467412298665e-08, "loss": 22.2909, "step": 483620 }, { "epoch": 0.9769631984873767, "grad_norm": 135.01373291015625, "learning_rate": 3.261664939201436e-08, "loss": 20.6173, "step": 483630 }, { "epoch": 0.9769833991200605, "grad_norm": 430.282470703125, "learning_rate": 3.2576855582154844e-08, "loss": 24.4488, "step": 483640 }, { "epoch": 0.9770035997527442, "grad_norm": 136.57626342773438, "learning_rate": 3.253708598291272e-08, "loss": 7.0277, "step": 483650 }, { "epoch": 0.977023800385428, "grad_norm": 239.49517822265625, "learning_rate": 3.2497340594482284e-08, "loss": 11.4957, "step": 483660 }, { "epoch": 0.9770440010181118, "grad_norm": 198.54737854003906, "learning_rate": 3.245761941705727e-08, "loss": 11.8537, "step": 483670 }, { "epoch": 0.9770642016507957, "grad_norm": 344.41021728515625, "learning_rate": 3.241792245083142e-08, "loss": 8.878, "step": 483680 }, { "epoch": 0.9770844022834795, "grad_norm": 233.0062255859375, "learning_rate": 3.237824969599845e-08, "loss": 14.1195, "step": 483690 }, { "epoch": 0.9771046029161633, "grad_norm": 190.5237274169922, "learning_rate": 3.2338601152751e-08, "loss": 20.4171, "step": 483700 }, { "epoch": 0.9771248035488471, "grad_norm": 245.7359619140625, "learning_rate": 3.2298976821282804e-08, "loss": 26.0637, "step": 483710 }, { "epoch": 0.9771450041815309, "grad_norm": 424.45123291015625, "learning_rate": 3.2259376701787025e-08, "loss": 16.3542, "step": 483720 }, { "epoch": 0.9771652048142148, "grad_norm": 154.21754455566406, "learning_rate": 3.2219800794456304e-08, "loss": 22.9416, "step": 483730 }, { "epoch": 0.9771854054468986, "grad_norm": 137.80577087402344, "learning_rate": 3.2180249099483806e-08, "loss": 7.9407, "step": 483740 }, { "epoch": 0.9772056060795824, "grad_norm": 429.020751953125, "learning_rate": 3.214072161706272e-08, "loss": 18.3285, "step": 483750 }, { "epoch": 0.9772258067122662, "grad_norm": 167.385986328125, "learning_rate": 3.210121834738456e-08, "loss": 24.7815, "step": 483760 }, { "epoch": 0.97724600734495, "grad_norm": 19.635986328125, "learning_rate": 3.206173929064304e-08, "loss": 20.0423, "step": 483770 }, { "epoch": 0.9772662079776339, "grad_norm": 102.77245330810547, "learning_rate": 3.20222844470297e-08, "loss": 9.3504, "step": 483780 }, { "epoch": 0.9772864086103177, "grad_norm": 527.6898193359375, "learning_rate": 3.198285381673716e-08, "loss": 29.21, "step": 483790 }, { "epoch": 0.9773066092430015, "grad_norm": 661.2086181640625, "learning_rate": 3.194344739995803e-08, "loss": 25.6792, "step": 483800 }, { "epoch": 0.9773268098756853, "grad_norm": 232.419677734375, "learning_rate": 3.1904065196883825e-08, "loss": 16.9828, "step": 483810 }, { "epoch": 0.9773470105083691, "grad_norm": 89.22576141357422, "learning_rate": 3.1864707207706624e-08, "loss": 5.4055, "step": 483820 }, { "epoch": 0.977367211141053, "grad_norm": 634.2050170898438, "learning_rate": 3.182537343261849e-08, "loss": 20.296, "step": 483830 }, { "epoch": 0.9773874117737368, "grad_norm": 289.4164123535156, "learning_rate": 3.178606387181038e-08, "loss": 28.411, "step": 483840 }, { "epoch": 0.9774076124064206, "grad_norm": 297.56500244140625, "learning_rate": 3.1746778525474916e-08, "loss": 10.1111, "step": 483850 }, { "epoch": 0.9774278130391044, "grad_norm": 54.85966491699219, "learning_rate": 3.1707517393803064e-08, "loss": 8.5428, "step": 483860 }, { "epoch": 0.9774480136717882, "grad_norm": 586.3662719726562, "learning_rate": 3.166828047698578e-08, "loss": 12.225, "step": 483870 }, { "epoch": 0.9774682143044721, "grad_norm": 279.25689697265625, "learning_rate": 3.1629067775214575e-08, "loss": 23.4355, "step": 483880 }, { "epoch": 0.9774884149371559, "grad_norm": 198.25270080566406, "learning_rate": 3.158987928868151e-08, "loss": 13.6672, "step": 483890 }, { "epoch": 0.9775086155698397, "grad_norm": 197.78375244140625, "learning_rate": 3.1550715017575895e-08, "loss": 14.6742, "step": 483900 }, { "epoch": 0.9775288162025234, "grad_norm": 312.73980712890625, "learning_rate": 3.151157496208979e-08, "loss": 10.7456, "step": 483910 }, { "epoch": 0.9775490168352072, "grad_norm": 0.0, "learning_rate": 3.1472459122414144e-08, "loss": 12.6866, "step": 483920 }, { "epoch": 0.977569217467891, "grad_norm": 26.30738067626953, "learning_rate": 3.143336749873882e-08, "loss": 14.7024, "step": 483930 }, { "epoch": 0.9775894181005749, "grad_norm": 153.23471069335938, "learning_rate": 3.139430009125477e-08, "loss": 17.0624, "step": 483940 }, { "epoch": 0.9776096187332587, "grad_norm": 626.3602294921875, "learning_rate": 3.135525690015184e-08, "loss": 22.0167, "step": 483950 }, { "epoch": 0.9776298193659425, "grad_norm": 165.5670928955078, "learning_rate": 3.131623792562155e-08, "loss": 14.3687, "step": 483960 }, { "epoch": 0.9776500199986263, "grad_norm": 11.01375675201416, "learning_rate": 3.127724316785263e-08, "loss": 18.0701, "step": 483970 }, { "epoch": 0.9776702206313101, "grad_norm": 94.835693359375, "learning_rate": 3.1238272627035494e-08, "loss": 13.9082, "step": 483980 }, { "epoch": 0.977690421263994, "grad_norm": 383.6202087402344, "learning_rate": 3.119932630336109e-08, "loss": 27.2308, "step": 483990 }, { "epoch": 0.9777106218966778, "grad_norm": 350.61016845703125, "learning_rate": 3.1160404197018155e-08, "loss": 17.0211, "step": 484000 }, { "epoch": 0.9777308225293616, "grad_norm": 131.5062713623047, "learning_rate": 3.11215063081971e-08, "loss": 27.6387, "step": 484010 }, { "epoch": 0.9777510231620454, "grad_norm": 182.68927001953125, "learning_rate": 3.108263263708666e-08, "loss": 7.6781, "step": 484020 }, { "epoch": 0.9777712237947292, "grad_norm": 101.00978088378906, "learning_rate": 3.104378318387724e-08, "loss": 12.5815, "step": 484030 }, { "epoch": 0.9777914244274131, "grad_norm": 145.689697265625, "learning_rate": 3.1004957948757576e-08, "loss": 15.6676, "step": 484040 }, { "epoch": 0.9778116250600969, "grad_norm": 278.1912841796875, "learning_rate": 3.0966156931916955e-08, "loss": 29.9499, "step": 484050 }, { "epoch": 0.9778318256927807, "grad_norm": 280.1376647949219, "learning_rate": 3.092738013354468e-08, "loss": 13.4762, "step": 484060 }, { "epoch": 0.9778520263254645, "grad_norm": 198.26275634765625, "learning_rate": 3.088862755383004e-08, "loss": 16.3791, "step": 484070 }, { "epoch": 0.9778722269581483, "grad_norm": 275.3869323730469, "learning_rate": 3.084989919296122e-08, "loss": 23.9532, "step": 484080 }, { "epoch": 0.9778924275908322, "grad_norm": 134.90830993652344, "learning_rate": 3.081119505112751e-08, "loss": 18.2092, "step": 484090 }, { "epoch": 0.977912628223516, "grad_norm": 222.60604858398438, "learning_rate": 3.077251512851709e-08, "loss": 24.5073, "step": 484100 }, { "epoch": 0.9779328288561998, "grad_norm": 744.3571166992188, "learning_rate": 3.07338594253187e-08, "loss": 22.8677, "step": 484110 }, { "epoch": 0.9779530294888836, "grad_norm": 480.4717712402344, "learning_rate": 3.069522794172109e-08, "loss": 19.43, "step": 484120 }, { "epoch": 0.9779732301215674, "grad_norm": 225.44764709472656, "learning_rate": 3.0656620677911867e-08, "loss": 11.4545, "step": 484130 }, { "epoch": 0.9779934307542513, "grad_norm": 434.86517333984375, "learning_rate": 3.061803763408033e-08, "loss": 22.5593, "step": 484140 }, { "epoch": 0.9780136313869351, "grad_norm": 923.6597900390625, "learning_rate": 3.057947881041301e-08, "loss": 24.499, "step": 484150 }, { "epoch": 0.9780338320196189, "grad_norm": 247.2659912109375, "learning_rate": 3.054094420709863e-08, "loss": 14.029, "step": 484160 }, { "epoch": 0.9780540326523026, "grad_norm": 263.4147644042969, "learning_rate": 3.050243382432483e-08, "loss": 16.9114, "step": 484170 }, { "epoch": 0.9780742332849864, "grad_norm": 355.8857116699219, "learning_rate": 3.046394766228034e-08, "loss": 8.9725, "step": 484180 }, { "epoch": 0.9780944339176703, "grad_norm": 217.84544372558594, "learning_rate": 3.0425485721151115e-08, "loss": 24.7696, "step": 484190 }, { "epoch": 0.9781146345503541, "grad_norm": 119.02053833007812, "learning_rate": 3.038704800112535e-08, "loss": 8.661, "step": 484200 }, { "epoch": 0.9781348351830379, "grad_norm": 208.46368408203125, "learning_rate": 3.034863450239067e-08, "loss": 26.7875, "step": 484210 }, { "epoch": 0.9781550358157217, "grad_norm": 436.1865234375, "learning_rate": 3.0310245225133595e-08, "loss": 19.8382, "step": 484220 }, { "epoch": 0.9781752364484055, "grad_norm": 403.9502258300781, "learning_rate": 3.027188016954175e-08, "loss": 20.4445, "step": 484230 }, { "epoch": 0.9781954370810894, "grad_norm": 261.0536193847656, "learning_rate": 3.0233539335802195e-08, "loss": 20.8463, "step": 484240 }, { "epoch": 0.9782156377137732, "grad_norm": 551.9981079101562, "learning_rate": 3.019522272410202e-08, "loss": 16.055, "step": 484250 }, { "epoch": 0.978235838346457, "grad_norm": 362.7401428222656, "learning_rate": 3.0156930334626633e-08, "loss": 13.623, "step": 484260 }, { "epoch": 0.9782560389791408, "grad_norm": 222.0453643798828, "learning_rate": 3.0118662167564205e-08, "loss": 13.3153, "step": 484270 }, { "epoch": 0.9782762396118246, "grad_norm": 234.4801025390625, "learning_rate": 3.008041822310015e-08, "loss": 13.9276, "step": 484280 }, { "epoch": 0.9782964402445085, "grad_norm": 12.658368110656738, "learning_rate": 3.004219850142209e-08, "loss": 25.2537, "step": 484290 }, { "epoch": 0.9783166408771923, "grad_norm": 452.21368408203125, "learning_rate": 3.0004003002714886e-08, "loss": 33.7096, "step": 484300 }, { "epoch": 0.9783368415098761, "grad_norm": 105.99453735351562, "learning_rate": 2.9965831727165603e-08, "loss": 14.9214, "step": 484310 }, { "epoch": 0.9783570421425599, "grad_norm": 125.94039916992188, "learning_rate": 2.992768467496021e-08, "loss": 19.0695, "step": 484320 }, { "epoch": 0.9783772427752437, "grad_norm": 77.85628509521484, "learning_rate": 2.988956184628411e-08, "loss": 19.3725, "step": 484330 }, { "epoch": 0.9783974434079276, "grad_norm": 37.931915283203125, "learning_rate": 2.985146324132438e-08, "loss": 37.1462, "step": 484340 }, { "epoch": 0.9784176440406114, "grad_norm": 354.00482177734375, "learning_rate": 2.981338886026475e-08, "loss": 13.8811, "step": 484350 }, { "epoch": 0.9784378446732952, "grad_norm": 192.91014099121094, "learning_rate": 2.97753387032923e-08, "loss": 19.4121, "step": 484360 }, { "epoch": 0.978458045305979, "grad_norm": 75.15606689453125, "learning_rate": 2.9737312770591887e-08, "loss": 19.2399, "step": 484370 }, { "epoch": 0.9784782459386628, "grad_norm": 247.0068817138672, "learning_rate": 2.9699311062349467e-08, "loss": 16.0864, "step": 484380 }, { "epoch": 0.9784984465713467, "grad_norm": 25.100473403930664, "learning_rate": 2.966133357874934e-08, "loss": 19.2274, "step": 484390 }, { "epoch": 0.9785186472040305, "grad_norm": 25.9183292388916, "learning_rate": 2.9623380319976912e-08, "loss": 13.2397, "step": 484400 }, { "epoch": 0.9785388478367143, "grad_norm": 208.7423095703125, "learning_rate": 2.9585451286217593e-08, "loss": 14.6802, "step": 484410 }, { "epoch": 0.978559048469398, "grad_norm": 132.13888549804688, "learning_rate": 2.954754647765623e-08, "loss": 14.4208, "step": 484420 }, { "epoch": 0.9785792491020818, "grad_norm": 559.3355712890625, "learning_rate": 2.950966589447657e-08, "loss": 19.5717, "step": 484430 }, { "epoch": 0.9785994497347656, "grad_norm": 304.85235595703125, "learning_rate": 2.947180953686457e-08, "loss": 17.2576, "step": 484440 }, { "epoch": 0.9786196503674495, "grad_norm": 345.2542419433594, "learning_rate": 2.9433977405003976e-08, "loss": 23.9686, "step": 484450 }, { "epoch": 0.9786398510001333, "grad_norm": 329.9510498046875, "learning_rate": 2.9396169499079087e-08, "loss": 16.6701, "step": 484460 }, { "epoch": 0.9786600516328171, "grad_norm": 353.3564758300781, "learning_rate": 2.935838581927475e-08, "loss": 33.0322, "step": 484470 }, { "epoch": 0.9786802522655009, "grad_norm": 130.25843811035156, "learning_rate": 2.9320626365774153e-08, "loss": 9.726, "step": 484480 }, { "epoch": 0.9787004528981847, "grad_norm": 17.38633918762207, "learning_rate": 2.9282891138762148e-08, "loss": 15.5292, "step": 484490 }, { "epoch": 0.9787206535308686, "grad_norm": 326.0066223144531, "learning_rate": 2.9245180138423033e-08, "loss": 37.6101, "step": 484500 }, { "epoch": 0.9787408541635524, "grad_norm": 419.8305358886719, "learning_rate": 2.920749336494e-08, "loss": 13.2768, "step": 484510 }, { "epoch": 0.9787610547962362, "grad_norm": 286.7938232421875, "learning_rate": 2.9169830818496226e-08, "loss": 25.2753, "step": 484520 }, { "epoch": 0.97878125542892, "grad_norm": 140.8557891845703, "learning_rate": 2.9132192499276014e-08, "loss": 19.5638, "step": 484530 }, { "epoch": 0.9788014560616038, "grad_norm": 13.008999824523926, "learning_rate": 2.9094578407462547e-08, "loss": 11.8918, "step": 484540 }, { "epoch": 0.9788216566942877, "grad_norm": 198.89772033691406, "learning_rate": 2.9056988543239018e-08, "loss": 7.4577, "step": 484550 }, { "epoch": 0.9788418573269715, "grad_norm": 13.655198097229004, "learning_rate": 2.9019422906789162e-08, "loss": 11.6218, "step": 484560 }, { "epoch": 0.9788620579596553, "grad_norm": 289.172119140625, "learning_rate": 2.8981881498295616e-08, "loss": 30.0727, "step": 484570 }, { "epoch": 0.9788822585923391, "grad_norm": 75.4028091430664, "learning_rate": 2.8944364317941564e-08, "loss": 10.8886, "step": 484580 }, { "epoch": 0.978902459225023, "grad_norm": 394.84210205078125, "learning_rate": 2.8906871365909638e-08, "loss": 14.4291, "step": 484590 }, { "epoch": 0.9789226598577068, "grad_norm": 444.9529724121094, "learning_rate": 2.8869402642382473e-08, "loss": 13.397, "step": 484600 }, { "epoch": 0.9789428604903906, "grad_norm": 577.3967895507812, "learning_rate": 2.8831958147543805e-08, "loss": 17.5643, "step": 484610 }, { "epoch": 0.9789630611230744, "grad_norm": 170.24781799316406, "learning_rate": 2.8794537881574046e-08, "loss": 22.3157, "step": 484620 }, { "epoch": 0.9789832617557582, "grad_norm": 422.06024169921875, "learning_rate": 2.87571418446575e-08, "loss": 20.9791, "step": 484630 }, { "epoch": 0.979003462388442, "grad_norm": 428.97283935546875, "learning_rate": 2.871977003697568e-08, "loss": 21.446, "step": 484640 }, { "epoch": 0.9790236630211259, "grad_norm": 288.517578125, "learning_rate": 2.8682422458710667e-08, "loss": 15.7553, "step": 484650 }, { "epoch": 0.9790438636538097, "grad_norm": 124.12348175048828, "learning_rate": 2.864509911004454e-08, "loss": 13.6223, "step": 484660 }, { "epoch": 0.9790640642864935, "grad_norm": 20.6627254486084, "learning_rate": 2.8607799991159368e-08, "loss": 9.575, "step": 484670 }, { "epoch": 0.9790842649191772, "grad_norm": 465.4810791015625, "learning_rate": 2.857052510223668e-08, "loss": 17.2106, "step": 484680 }, { "epoch": 0.979104465551861, "grad_norm": 258.0783386230469, "learning_rate": 2.853327444345799e-08, "loss": 15.3026, "step": 484690 }, { "epoch": 0.9791246661845449, "grad_norm": 550.4926147460938, "learning_rate": 2.8496048015005385e-08, "loss": 21.0773, "step": 484700 }, { "epoch": 0.9791448668172287, "grad_norm": 139.730224609375, "learning_rate": 2.8458845817060376e-08, "loss": 13.0492, "step": 484710 }, { "epoch": 0.9791650674499125, "grad_norm": 444.1943664550781, "learning_rate": 2.8421667849803937e-08, "loss": 14.2267, "step": 484720 }, { "epoch": 0.9791852680825963, "grad_norm": 17.33098602294922, "learning_rate": 2.8384514113417026e-08, "loss": 14.1109, "step": 484730 }, { "epoch": 0.9792054687152801, "grad_norm": 0.0, "learning_rate": 2.8347384608081173e-08, "loss": 24.8455, "step": 484740 }, { "epoch": 0.979225669347964, "grad_norm": 1.657724380493164, "learning_rate": 2.8310279333976786e-08, "loss": 21.6012, "step": 484750 }, { "epoch": 0.9792458699806478, "grad_norm": 104.38908386230469, "learning_rate": 2.827319829128594e-08, "loss": 12.692, "step": 484760 }, { "epoch": 0.9792660706133316, "grad_norm": 258.1601257324219, "learning_rate": 2.823614148018794e-08, "loss": 14.0637, "step": 484770 }, { "epoch": 0.9792862712460154, "grad_norm": 429.6944274902344, "learning_rate": 2.819910890086375e-08, "loss": 24.2677, "step": 484780 }, { "epoch": 0.9793064718786992, "grad_norm": 248.7170867919922, "learning_rate": 2.8162100553494887e-08, "loss": 17.2839, "step": 484790 }, { "epoch": 0.9793266725113831, "grad_norm": 518.0184326171875, "learning_rate": 2.8125116438260104e-08, "loss": 12.0953, "step": 484800 }, { "epoch": 0.9793468731440669, "grad_norm": 375.1488952636719, "learning_rate": 2.8088156555340916e-08, "loss": 26.6397, "step": 484810 }, { "epoch": 0.9793670737767507, "grad_norm": 318.00823974609375, "learning_rate": 2.805122090491719e-08, "loss": 15.0593, "step": 484820 }, { "epoch": 0.9793872744094345, "grad_norm": 125.44718933105469, "learning_rate": 2.801430948716821e-08, "loss": 17.0579, "step": 484830 }, { "epoch": 0.9794074750421183, "grad_norm": 273.11883544921875, "learning_rate": 2.797742230227496e-08, "loss": 14.5143, "step": 484840 }, { "epoch": 0.9794276756748022, "grad_norm": 272.05291748046875, "learning_rate": 2.794055935041673e-08, "loss": 14.1976, "step": 484850 }, { "epoch": 0.979447876307486, "grad_norm": 555.6732788085938, "learning_rate": 2.7903720631772824e-08, "loss": 24.0573, "step": 484860 }, { "epoch": 0.9794680769401698, "grad_norm": 186.22152709960938, "learning_rate": 2.7866906146523098e-08, "loss": 19.1456, "step": 484870 }, { "epoch": 0.9794882775728536, "grad_norm": 398.9652404785156, "learning_rate": 2.783011589484741e-08, "loss": 26.9826, "step": 484880 }, { "epoch": 0.9795084782055374, "grad_norm": 167.0987091064453, "learning_rate": 2.7793349876924503e-08, "loss": 24.4843, "step": 484890 }, { "epoch": 0.9795286788382213, "grad_norm": 268.7541198730469, "learning_rate": 2.7756608092933678e-08, "loss": 18.577, "step": 484900 }, { "epoch": 0.9795488794709051, "grad_norm": 201.6580352783203, "learning_rate": 2.771989054305424e-08, "loss": 10.9767, "step": 484910 }, { "epoch": 0.9795690801035889, "grad_norm": 141.82345581054688, "learning_rate": 2.768319722746493e-08, "loss": 10.678, "step": 484920 }, { "epoch": 0.9795892807362726, "grad_norm": 262.8535461425781, "learning_rate": 2.7646528146345053e-08, "loss": 12.9, "step": 484930 }, { "epoch": 0.9796094813689564, "grad_norm": 435.3656005859375, "learning_rate": 2.760988329987224e-08, "loss": 13.6356, "step": 484940 }, { "epoch": 0.9796296820016402, "grad_norm": 510.5507507324219, "learning_rate": 2.7573262688226355e-08, "loss": 35.3654, "step": 484950 }, { "epoch": 0.9796498826343241, "grad_norm": 341.7468566894531, "learning_rate": 2.753666631158447e-08, "loss": 14.537, "step": 484960 }, { "epoch": 0.9796700832670079, "grad_norm": 818.6873168945312, "learning_rate": 2.7500094170126447e-08, "loss": 12.7979, "step": 484970 }, { "epoch": 0.9796902838996917, "grad_norm": 200.34927368164062, "learning_rate": 2.7463546264029915e-08, "loss": 10.148, "step": 484980 }, { "epoch": 0.9797104845323755, "grad_norm": 480.0416564941406, "learning_rate": 2.7427022593473074e-08, "loss": 19.2405, "step": 484990 }, { "epoch": 0.9797306851650593, "grad_norm": 407.5085144042969, "learning_rate": 2.7390523158633552e-08, "loss": 17.0076, "step": 485000 }, { "epoch": 0.9797508857977432, "grad_norm": 249.0325164794922, "learning_rate": 2.7354047959689543e-08, "loss": 12.0682, "step": 485010 }, { "epoch": 0.979771086430427, "grad_norm": 400.5632629394531, "learning_rate": 2.7317596996818684e-08, "loss": 31.2551, "step": 485020 }, { "epoch": 0.9797912870631108, "grad_norm": 332.02001953125, "learning_rate": 2.728117027019861e-08, "loss": 38.4033, "step": 485030 }, { "epoch": 0.9798114876957946, "grad_norm": 482.14398193359375, "learning_rate": 2.7244767780007507e-08, "loss": 29.1403, "step": 485040 }, { "epoch": 0.9798316883284784, "grad_norm": 236.2940673828125, "learning_rate": 2.7208389526421907e-08, "loss": 34.5868, "step": 485050 }, { "epoch": 0.9798518889611623, "grad_norm": 228.9615020751953, "learning_rate": 2.7172035509619442e-08, "loss": 21.3379, "step": 485060 }, { "epoch": 0.9798720895938461, "grad_norm": 248.9374237060547, "learning_rate": 2.713570572977775e-08, "loss": 28.5801, "step": 485070 }, { "epoch": 0.9798922902265299, "grad_norm": 736.0357666015625, "learning_rate": 2.7099400187073356e-08, "loss": 18.1751, "step": 485080 }, { "epoch": 0.9799124908592137, "grad_norm": 380.0699768066406, "learning_rate": 2.7063118881682782e-08, "loss": 20.2449, "step": 485090 }, { "epoch": 0.9799326914918975, "grad_norm": 400.3285827636719, "learning_rate": 2.7026861813783668e-08, "loss": 12.1668, "step": 485100 }, { "epoch": 0.9799528921245814, "grad_norm": 164.75294494628906, "learning_rate": 2.6990628983553093e-08, "loss": 16.418, "step": 485110 }, { "epoch": 0.9799730927572652, "grad_norm": 417.77935791015625, "learning_rate": 2.6954420391166468e-08, "loss": 14.4959, "step": 485120 }, { "epoch": 0.979993293389949, "grad_norm": 394.34307861328125, "learning_rate": 2.691823603680088e-08, "loss": 29.6269, "step": 485130 }, { "epoch": 0.9800134940226328, "grad_norm": 233.8807830810547, "learning_rate": 2.6882075920632854e-08, "loss": 17.3482, "step": 485140 }, { "epoch": 0.9800336946553166, "grad_norm": 178.50732421875, "learning_rate": 2.684594004283836e-08, "loss": 14.9117, "step": 485150 }, { "epoch": 0.9800538952880005, "grad_norm": 410.2407531738281, "learning_rate": 2.6809828403593363e-08, "loss": 11.6257, "step": 485160 }, { "epoch": 0.9800740959206843, "grad_norm": 47.52455139160156, "learning_rate": 2.6773741003074394e-08, "loss": 17.6356, "step": 485170 }, { "epoch": 0.9800942965533681, "grad_norm": 598.1161499023438, "learning_rate": 2.6737677841456867e-08, "loss": 20.3677, "step": 485180 }, { "epoch": 0.9801144971860518, "grad_norm": 373.65185546875, "learning_rate": 2.670163891891675e-08, "loss": 18.3767, "step": 485190 }, { "epoch": 0.9801346978187356, "grad_norm": 236.4280548095703, "learning_rate": 2.6665624235629463e-08, "loss": 15.3819, "step": 485200 }, { "epoch": 0.9801548984514195, "grad_norm": 404.3671569824219, "learning_rate": 2.662963379177097e-08, "loss": 14.8196, "step": 485210 }, { "epoch": 0.9801750990841033, "grad_norm": 1.4682387113571167, "learning_rate": 2.6593667587516693e-08, "loss": 17.7185, "step": 485220 }, { "epoch": 0.9801952997167871, "grad_norm": 116.38851165771484, "learning_rate": 2.6557725623041487e-08, "loss": 17.0778, "step": 485230 }, { "epoch": 0.9802155003494709, "grad_norm": 202.219482421875, "learning_rate": 2.6521807898520214e-08, "loss": 11.1525, "step": 485240 }, { "epoch": 0.9802357009821547, "grad_norm": 249.83407592773438, "learning_rate": 2.64859144141294e-08, "loss": 12.2021, "step": 485250 }, { "epoch": 0.9802559016148386, "grad_norm": 158.11590576171875, "learning_rate": 2.6450045170042238e-08, "loss": 15.9091, "step": 485260 }, { "epoch": 0.9802761022475224, "grad_norm": 373.1625671386719, "learning_rate": 2.6414200166434144e-08, "loss": 33.0223, "step": 485270 }, { "epoch": 0.9802963028802062, "grad_norm": 558.8707275390625, "learning_rate": 2.6378379403480536e-08, "loss": 22.0269, "step": 485280 }, { "epoch": 0.98031650351289, "grad_norm": 80.94778442382812, "learning_rate": 2.6342582881355717e-08, "loss": 11.432, "step": 485290 }, { "epoch": 0.9803367041455738, "grad_norm": 447.9439392089844, "learning_rate": 2.6306810600233435e-08, "loss": 20.6871, "step": 485300 }, { "epoch": 0.9803569047782577, "grad_norm": 212.05914306640625, "learning_rate": 2.6271062560288552e-08, "loss": 20.6287, "step": 485310 }, { "epoch": 0.9803771054109415, "grad_norm": 454.74493408203125, "learning_rate": 2.6235338761695372e-08, "loss": 21.637, "step": 485320 }, { "epoch": 0.9803973060436253, "grad_norm": 220.4085693359375, "learning_rate": 2.6199639204628202e-08, "loss": 14.0467, "step": 485330 }, { "epoch": 0.9804175066763091, "grad_norm": 428.25933837890625, "learning_rate": 2.6163963889260236e-08, "loss": 23.0588, "step": 485340 }, { "epoch": 0.9804377073089929, "grad_norm": 14.814775466918945, "learning_rate": 2.6128312815766332e-08, "loss": 21.864, "step": 485350 }, { "epoch": 0.9804579079416768, "grad_norm": 515.070068359375, "learning_rate": 2.6092685984319134e-08, "loss": 15.1821, "step": 485360 }, { "epoch": 0.9804781085743606, "grad_norm": 180.12730407714844, "learning_rate": 2.6057083395093495e-08, "loss": 23.8452, "step": 485370 }, { "epoch": 0.9804983092070444, "grad_norm": 219.78884887695312, "learning_rate": 2.6021505048262062e-08, "loss": 25.7188, "step": 485380 }, { "epoch": 0.9805185098397282, "grad_norm": 4.057182312011719, "learning_rate": 2.5985950943999137e-08, "loss": 11.9159, "step": 485390 }, { "epoch": 0.980538710472412, "grad_norm": 164.8330535888672, "learning_rate": 2.5950421082476805e-08, "loss": 13.588, "step": 485400 }, { "epoch": 0.9805589111050959, "grad_norm": 252.8827667236328, "learning_rate": 2.5914915463868816e-08, "loss": 21.1826, "step": 485410 }, { "epoch": 0.9805791117377797, "grad_norm": 0.5538516640663147, "learning_rate": 2.5879434088348364e-08, "loss": 18.9406, "step": 485420 }, { "epoch": 0.9805993123704635, "grad_norm": 533.2706909179688, "learning_rate": 2.584397695608809e-08, "loss": 34.0056, "step": 485430 }, { "epoch": 0.9806195130031473, "grad_norm": 162.6726837158203, "learning_rate": 2.580854406726174e-08, "loss": 8.7961, "step": 485440 }, { "epoch": 0.980639713635831, "grad_norm": 37.770572662353516, "learning_rate": 2.5773135422040296e-08, "loss": 12.204, "step": 485450 }, { "epoch": 0.9806599142685148, "grad_norm": 323.77325439453125, "learning_rate": 2.5737751020598057e-08, "loss": 8.6064, "step": 485460 }, { "epoch": 0.9806801149011987, "grad_norm": 164.14524841308594, "learning_rate": 2.5702390863105996e-08, "loss": 7.7936, "step": 485470 }, { "epoch": 0.9807003155338825, "grad_norm": 325.83123779296875, "learning_rate": 2.5667054949737315e-08, "loss": 13.356, "step": 485480 }, { "epoch": 0.9807205161665663, "grad_norm": 206.82696533203125, "learning_rate": 2.5631743280664643e-08, "loss": 20.1518, "step": 485490 }, { "epoch": 0.9807407167992501, "grad_norm": 266.54156494140625, "learning_rate": 2.5596455856058966e-08, "loss": 20.6028, "step": 485500 }, { "epoch": 0.9807609174319339, "grad_norm": 263.7402648925781, "learning_rate": 2.556119267609347e-08, "loss": 13.1059, "step": 485510 }, { "epoch": 0.9807811180646178, "grad_norm": 296.02099609375, "learning_rate": 2.552595374093858e-08, "loss": 16.091, "step": 485520 }, { "epoch": 0.9808013186973016, "grad_norm": 113.26029205322266, "learning_rate": 2.5490739050767488e-08, "loss": 9.6391, "step": 485530 }, { "epoch": 0.9808215193299854, "grad_norm": 412.40570068359375, "learning_rate": 2.5455548605751167e-08, "loss": 30.8011, "step": 485540 }, { "epoch": 0.9808417199626692, "grad_norm": 297.3525085449219, "learning_rate": 2.5420382406060595e-08, "loss": 25.4737, "step": 485550 }, { "epoch": 0.980861920595353, "grad_norm": 256.57373046875, "learning_rate": 2.5385240451867853e-08, "loss": 17.9179, "step": 485560 }, { "epoch": 0.9808821212280369, "grad_norm": 345.115478515625, "learning_rate": 2.5350122743344476e-08, "loss": 21.138, "step": 485570 }, { "epoch": 0.9809023218607207, "grad_norm": 169.43362426757812, "learning_rate": 2.531502928066143e-08, "loss": 11.9062, "step": 485580 }, { "epoch": 0.9809225224934045, "grad_norm": 351.01409912109375, "learning_rate": 2.527996006398914e-08, "loss": 9.3808, "step": 485590 }, { "epoch": 0.9809427231260883, "grad_norm": 307.8703918457031, "learning_rate": 2.5244915093499134e-08, "loss": 36.9408, "step": 485600 }, { "epoch": 0.9809629237587721, "grad_norm": 154.72337341308594, "learning_rate": 2.5209894369362386e-08, "loss": 14.547, "step": 485610 }, { "epoch": 0.980983124391456, "grad_norm": 193.18045043945312, "learning_rate": 2.5174897891748762e-08, "loss": 26.4675, "step": 485620 }, { "epoch": 0.9810033250241398, "grad_norm": 170.4246368408203, "learning_rate": 2.5139925660829233e-08, "loss": 19.2328, "step": 485630 }, { "epoch": 0.9810235256568236, "grad_norm": 236.127197265625, "learning_rate": 2.5104977676774777e-08, "loss": 19.9385, "step": 485640 }, { "epoch": 0.9810437262895074, "grad_norm": 212.71778869628906, "learning_rate": 2.5070053939754702e-08, "loss": 13.2024, "step": 485650 }, { "epoch": 0.9810639269221912, "grad_norm": 319.12548828125, "learning_rate": 2.5035154449940535e-08, "loss": 27.3948, "step": 485660 }, { "epoch": 0.9810841275548751, "grad_norm": 484.36846923828125, "learning_rate": 2.500027920750103e-08, "loss": 15.4728, "step": 485670 }, { "epoch": 0.9811043281875589, "grad_norm": 314.556884765625, "learning_rate": 2.496542821260717e-08, "loss": 13.4917, "step": 485680 }, { "epoch": 0.9811245288202427, "grad_norm": 113.44535064697266, "learning_rate": 2.493060146542825e-08, "loss": 20.1452, "step": 485690 }, { "epoch": 0.9811447294529264, "grad_norm": 352.3360595703125, "learning_rate": 2.489579896613359e-08, "loss": 31.2981, "step": 485700 }, { "epoch": 0.9811649300856102, "grad_norm": 160.99942016601562, "learning_rate": 2.4861020714894156e-08, "loss": 9.934, "step": 485710 }, { "epoch": 0.981185130718294, "grad_norm": 139.1692657470703, "learning_rate": 2.482626671187871e-08, "loss": 10.0028, "step": 485720 }, { "epoch": 0.9812053313509779, "grad_norm": 391.8040771484375, "learning_rate": 2.4791536957256e-08, "loss": 15.2026, "step": 485730 }, { "epoch": 0.9812255319836617, "grad_norm": 560.1205444335938, "learning_rate": 2.4756831451196452e-08, "loss": 24.3444, "step": 485740 }, { "epoch": 0.9812457326163455, "grad_norm": 29.000030517578125, "learning_rate": 2.472215019386881e-08, "loss": 14.2183, "step": 485750 }, { "epoch": 0.9812659332490293, "grad_norm": 220.35426330566406, "learning_rate": 2.4687493185441836e-08, "loss": 24.7511, "step": 485760 }, { "epoch": 0.9812861338817132, "grad_norm": 254.14060974121094, "learning_rate": 2.4652860426084278e-08, "loss": 24.0683, "step": 485770 }, { "epoch": 0.981306334514397, "grad_norm": 58.67346954345703, "learning_rate": 2.46182519159649e-08, "loss": 10.4478, "step": 485780 }, { "epoch": 0.9813265351470808, "grad_norm": 44.45576477050781, "learning_rate": 2.458366765525355e-08, "loss": 9.0318, "step": 485790 }, { "epoch": 0.9813467357797646, "grad_norm": 409.38818359375, "learning_rate": 2.4549107644117888e-08, "loss": 18.5046, "step": 485800 }, { "epoch": 0.9813669364124484, "grad_norm": 100.24295043945312, "learning_rate": 2.4514571882726102e-08, "loss": 14.8458, "step": 485810 }, { "epoch": 0.9813871370451323, "grad_norm": 495.2707214355469, "learning_rate": 2.448006037124695e-08, "loss": 24.7137, "step": 485820 }, { "epoch": 0.9814073376778161, "grad_norm": 344.15325927734375, "learning_rate": 2.444557310984863e-08, "loss": 16.843, "step": 485830 }, { "epoch": 0.9814275383104999, "grad_norm": 408.4779052734375, "learning_rate": 2.441111009869879e-08, "loss": 25.2231, "step": 485840 }, { "epoch": 0.9814477389431837, "grad_norm": 65.52508544921875, "learning_rate": 2.4376671337966174e-08, "loss": 17.1064, "step": 485850 }, { "epoch": 0.9814679395758675, "grad_norm": 246.01783752441406, "learning_rate": 2.434225682781788e-08, "loss": 20.977, "step": 485860 }, { "epoch": 0.9814881402085514, "grad_norm": 506.14581298828125, "learning_rate": 2.43078665684221e-08, "loss": 46.8623, "step": 485870 }, { "epoch": 0.9815083408412352, "grad_norm": 335.97332763671875, "learning_rate": 2.427350055994593e-08, "loss": 26.2684, "step": 485880 }, { "epoch": 0.981528541473919, "grad_norm": 217.335205078125, "learning_rate": 2.423915880255756e-08, "loss": 14.9394, "step": 485890 }, { "epoch": 0.9815487421066028, "grad_norm": 121.01840209960938, "learning_rate": 2.4204841296424086e-08, "loss": 14.2219, "step": 485900 }, { "epoch": 0.9815689427392866, "grad_norm": 415.287109375, "learning_rate": 2.4170548041712594e-08, "loss": 19.2851, "step": 485910 }, { "epoch": 0.9815891433719705, "grad_norm": 250.50343322753906, "learning_rate": 2.4136279038590727e-08, "loss": 18.9241, "step": 485920 }, { "epoch": 0.9816093440046543, "grad_norm": 368.9814758300781, "learning_rate": 2.4102034287224462e-08, "loss": 12.0449, "step": 485930 }, { "epoch": 0.9816295446373381, "grad_norm": 394.1357727050781, "learning_rate": 2.4067813787782e-08, "loss": 22.9436, "step": 485940 }, { "epoch": 0.9816497452700219, "grad_norm": 175.39022827148438, "learning_rate": 2.403361754042932e-08, "loss": 19.2293, "step": 485950 }, { "epoch": 0.9816699459027056, "grad_norm": 53.9837646484375, "learning_rate": 2.3999445545332955e-08, "loss": 15.5892, "step": 485960 }, { "epoch": 0.9816901465353894, "grad_norm": 236.90887451171875, "learning_rate": 2.3965297802659993e-08, "loss": 17.771, "step": 485970 }, { "epoch": 0.9817103471680733, "grad_norm": 234.25405883789062, "learning_rate": 2.3931174312576966e-08, "loss": 19.3174, "step": 485980 }, { "epoch": 0.9817305478007571, "grad_norm": 201.67913818359375, "learning_rate": 2.3897075075249298e-08, "loss": 16.0098, "step": 485990 }, { "epoch": 0.9817507484334409, "grad_norm": 109.33238220214844, "learning_rate": 2.386300009084408e-08, "loss": 18.8036, "step": 486000 }, { "epoch": 0.9817709490661247, "grad_norm": 4.808259963989258, "learning_rate": 2.382894935952729e-08, "loss": 16.8313, "step": 486010 }, { "epoch": 0.9817911496988085, "grad_norm": 272.5266418457031, "learning_rate": 2.3794922881464344e-08, "loss": 18.3731, "step": 486020 }, { "epoch": 0.9818113503314924, "grad_norm": 34.69828414916992, "learning_rate": 2.3760920656821228e-08, "loss": 17.4615, "step": 486030 }, { "epoch": 0.9818315509641762, "grad_norm": 323.7899169921875, "learning_rate": 2.3726942685764474e-08, "loss": 22.6718, "step": 486040 }, { "epoch": 0.98185175159686, "grad_norm": 101.85675048828125, "learning_rate": 2.3692988968458398e-08, "loss": 30.6321, "step": 486050 }, { "epoch": 0.9818719522295438, "grad_norm": 198.76905822753906, "learning_rate": 2.3659059505069526e-08, "loss": 26.5358, "step": 486060 }, { "epoch": 0.9818921528622276, "grad_norm": 53.76090621948242, "learning_rate": 2.362515429576273e-08, "loss": 11.4659, "step": 486070 }, { "epoch": 0.9819123534949115, "grad_norm": 252.8328094482422, "learning_rate": 2.3591273340703436e-08, "loss": 11.9757, "step": 486080 }, { "epoch": 0.9819325541275953, "grad_norm": 793.3980102539062, "learning_rate": 2.3557416640056507e-08, "loss": 17.1405, "step": 486090 }, { "epoch": 0.9819527547602791, "grad_norm": 195.5869903564453, "learning_rate": 2.3523584193986816e-08, "loss": 11.7921, "step": 486100 }, { "epoch": 0.9819729553929629, "grad_norm": 510.00006103515625, "learning_rate": 2.3489776002660337e-08, "loss": 18.6178, "step": 486110 }, { "epoch": 0.9819931560256467, "grad_norm": 270.2353515625, "learning_rate": 2.3455992066240828e-08, "loss": 31.8047, "step": 486120 }, { "epoch": 0.9820133566583306, "grad_norm": 276.6331481933594, "learning_rate": 2.342223238489316e-08, "loss": 24.8707, "step": 486130 }, { "epoch": 0.9820335572910144, "grad_norm": 91.86866760253906, "learning_rate": 2.3388496958782203e-08, "loss": 20.6946, "step": 486140 }, { "epoch": 0.9820537579236982, "grad_norm": 178.90017700195312, "learning_rate": 2.3354785788072265e-08, "loss": 8.8982, "step": 486150 }, { "epoch": 0.982073958556382, "grad_norm": 476.80303955078125, "learning_rate": 2.3321098872927107e-08, "loss": 25.1611, "step": 486160 }, { "epoch": 0.9820941591890658, "grad_norm": 338.6235656738281, "learning_rate": 2.3287436213511038e-08, "loss": 14.7015, "step": 486170 }, { "epoch": 0.9821143598217497, "grad_norm": 253.2498321533203, "learning_rate": 2.3253797809988933e-08, "loss": 15.6617, "step": 486180 }, { "epoch": 0.9821345604544335, "grad_norm": 255.2903289794922, "learning_rate": 2.3220183662523986e-08, "loss": 19.9992, "step": 486190 }, { "epoch": 0.9821547610871173, "grad_norm": 241.35653686523438, "learning_rate": 2.3186593771280518e-08, "loss": 6.3188, "step": 486200 }, { "epoch": 0.982174961719801, "grad_norm": 446.3783874511719, "learning_rate": 2.3153028136421728e-08, "loss": 17.9964, "step": 486210 }, { "epoch": 0.9821951623524848, "grad_norm": 265.31549072265625, "learning_rate": 2.3119486758111375e-08, "loss": 11.47, "step": 486220 }, { "epoch": 0.9822153629851686, "grad_norm": 238.12451171875, "learning_rate": 2.3085969636513217e-08, "loss": 14.1695, "step": 486230 }, { "epoch": 0.9822355636178525, "grad_norm": 0.0, "learning_rate": 2.3052476771790454e-08, "loss": 11.3744, "step": 486240 }, { "epoch": 0.9822557642505363, "grad_norm": 332.5835876464844, "learning_rate": 2.301900816410574e-08, "loss": 12.744, "step": 486250 }, { "epoch": 0.9822759648832201, "grad_norm": 273.3670654296875, "learning_rate": 2.2985563813623378e-08, "loss": 15.5879, "step": 486260 }, { "epoch": 0.9822961655159039, "grad_norm": 128.8788604736328, "learning_rate": 2.295214372050547e-08, "loss": 12.1372, "step": 486270 }, { "epoch": 0.9823163661485877, "grad_norm": 246.0155792236328, "learning_rate": 2.2918747884915216e-08, "loss": 16.4574, "step": 486280 }, { "epoch": 0.9823365667812716, "grad_norm": 247.61831665039062, "learning_rate": 2.2885376307015817e-08, "loss": 20.4371, "step": 486290 }, { "epoch": 0.9823567674139554, "grad_norm": 244.03050231933594, "learning_rate": 2.285202898696881e-08, "loss": 19.5125, "step": 486300 }, { "epoch": 0.9823769680466392, "grad_norm": 441.6757507324219, "learning_rate": 2.2818705924937402e-08, "loss": 19.5062, "step": 486310 }, { "epoch": 0.982397168679323, "grad_norm": 158.03622436523438, "learning_rate": 2.2785407121084236e-08, "loss": 23.2111, "step": 486320 }, { "epoch": 0.9824173693120068, "grad_norm": 217.98892211914062, "learning_rate": 2.2752132575570852e-08, "loss": 19.4118, "step": 486330 }, { "epoch": 0.9824375699446907, "grad_norm": 376.9085998535156, "learning_rate": 2.271888228856045e-08, "loss": 18.4132, "step": 486340 }, { "epoch": 0.9824577705773745, "grad_norm": 335.8309631347656, "learning_rate": 2.268565626021457e-08, "loss": 42.8806, "step": 486350 }, { "epoch": 0.9824779712100583, "grad_norm": 220.08766174316406, "learning_rate": 2.2652454490694752e-08, "loss": 19.1188, "step": 486360 }, { "epoch": 0.9824981718427421, "grad_norm": 44.7899055480957, "learning_rate": 2.261927698016364e-08, "loss": 15.5816, "step": 486370 }, { "epoch": 0.982518372475426, "grad_norm": 0.0, "learning_rate": 2.2586123728781663e-08, "loss": 16.5778, "step": 486380 }, { "epoch": 0.9825385731081098, "grad_norm": 275.6081848144531, "learning_rate": 2.255299473671202e-08, "loss": 18.4161, "step": 486390 }, { "epoch": 0.9825587737407936, "grad_norm": 154.0469970703125, "learning_rate": 2.251989000411514e-08, "loss": 24.1468, "step": 486400 }, { "epoch": 0.9825789743734774, "grad_norm": 495.96075439453125, "learning_rate": 2.2486809531152563e-08, "loss": 8.1208, "step": 486410 }, { "epoch": 0.9825991750061612, "grad_norm": 195.0969696044922, "learning_rate": 2.2453753317985272e-08, "loss": 18.1736, "step": 486420 }, { "epoch": 0.982619375638845, "grad_norm": 304.2983703613281, "learning_rate": 2.2420721364775354e-08, "loss": 23.7621, "step": 486430 }, { "epoch": 0.9826395762715289, "grad_norm": 256.87933349609375, "learning_rate": 2.2387713671682687e-08, "loss": 25.3219, "step": 486440 }, { "epoch": 0.9826597769042127, "grad_norm": 311.9479064941406, "learning_rate": 2.2354730238868804e-08, "loss": 21.947, "step": 486450 }, { "epoch": 0.9826799775368965, "grad_norm": 153.4099578857422, "learning_rate": 2.2321771066494137e-08, "loss": 13.7153, "step": 486460 }, { "epoch": 0.9827001781695802, "grad_norm": 1512.3955078125, "learning_rate": 2.2288836154719663e-08, "loss": 22.5255, "step": 486470 }, { "epoch": 0.982720378802264, "grad_norm": 176.42466735839844, "learning_rate": 2.2255925503705255e-08, "loss": 13.9224, "step": 486480 }, { "epoch": 0.9827405794349479, "grad_norm": 140.4485626220703, "learning_rate": 2.22230391136119e-08, "loss": 11.1048, "step": 486490 }, { "epoch": 0.9827607800676317, "grad_norm": 356.75421142578125, "learning_rate": 2.219017698460002e-08, "loss": 19.3174, "step": 486500 }, { "epoch": 0.9827809807003155, "grad_norm": 216.4697723388672, "learning_rate": 2.215733911682949e-08, "loss": 13.9558, "step": 486510 }, { "epoch": 0.9828011813329993, "grad_norm": 160.7062530517578, "learning_rate": 2.2124525510459627e-08, "loss": 12.9543, "step": 486520 }, { "epoch": 0.9828213819656831, "grad_norm": 118.80420684814453, "learning_rate": 2.2091736165651966e-08, "loss": 6.4347, "step": 486530 }, { "epoch": 0.982841582598367, "grad_norm": 285.1014099121094, "learning_rate": 2.205897108256472e-08, "loss": 29.0186, "step": 486540 }, { "epoch": 0.9828617832310508, "grad_norm": 67.74488067626953, "learning_rate": 2.202623026135886e-08, "loss": 13.3825, "step": 486550 }, { "epoch": 0.9828819838637346, "grad_norm": 233.2350311279297, "learning_rate": 2.1993513702193157e-08, "loss": 18.1295, "step": 486560 }, { "epoch": 0.9829021844964184, "grad_norm": 86.41580200195312, "learning_rate": 2.1960821405226928e-08, "loss": 17.6475, "step": 486570 }, { "epoch": 0.9829223851291022, "grad_norm": 145.8586883544922, "learning_rate": 2.1928153370620598e-08, "loss": 19.6319, "step": 486580 }, { "epoch": 0.9829425857617861, "grad_norm": 189.52638244628906, "learning_rate": 2.1895509598532372e-08, "loss": 23.1163, "step": 486590 }, { "epoch": 0.9829627863944699, "grad_norm": 166.6016082763672, "learning_rate": 2.1862890089121567e-08, "loss": 22.5704, "step": 486600 }, { "epoch": 0.9829829870271537, "grad_norm": 108.01043701171875, "learning_rate": 2.1830294842547506e-08, "loss": 8.6083, "step": 486610 }, { "epoch": 0.9830031876598375, "grad_norm": 152.20062255859375, "learning_rate": 2.1797723858968388e-08, "loss": 11.4041, "step": 486620 }, { "epoch": 0.9830233882925213, "grad_norm": 445.4767761230469, "learning_rate": 2.1765177138543535e-08, "loss": 19.9943, "step": 486630 }, { "epoch": 0.9830435889252052, "grad_norm": 690.5706176757812, "learning_rate": 2.173265468143171e-08, "loss": 34.0933, "step": 486640 }, { "epoch": 0.983063789557889, "grad_norm": 203.51470947265625, "learning_rate": 2.1700156487790557e-08, "loss": 12.4785, "step": 486650 }, { "epoch": 0.9830839901905728, "grad_norm": 166.11949157714844, "learning_rate": 2.1667682557779958e-08, "loss": 20.8026, "step": 486660 }, { "epoch": 0.9831041908232566, "grad_norm": 349.7200012207031, "learning_rate": 2.1635232891556446e-08, "loss": 10.3085, "step": 486670 }, { "epoch": 0.9831243914559404, "grad_norm": 654.8858032226562, "learning_rate": 2.1602807489279344e-08, "loss": 14.9182, "step": 486680 }, { "epoch": 0.9831445920886243, "grad_norm": 538.98876953125, "learning_rate": 2.1570406351106298e-08, "loss": 21.2531, "step": 486690 }, { "epoch": 0.9831647927213081, "grad_norm": 265.0469970703125, "learning_rate": 2.1538029477195522e-08, "loss": 16.0271, "step": 486700 }, { "epoch": 0.9831849933539919, "grad_norm": 147.05458068847656, "learning_rate": 2.1505676867704105e-08, "loss": 15.6037, "step": 486710 }, { "epoch": 0.9832051939866756, "grad_norm": 167.15638732910156, "learning_rate": 2.1473348522790262e-08, "loss": 13.1396, "step": 486720 }, { "epoch": 0.9832253946193594, "grad_norm": 405.22979736328125, "learning_rate": 2.1441044442611634e-08, "loss": 22.5856, "step": 486730 }, { "epoch": 0.9832455952520432, "grad_norm": 23.794240951538086, "learning_rate": 2.1408764627325883e-08, "loss": 14.2671, "step": 486740 }, { "epoch": 0.9832657958847271, "grad_norm": 264.0993957519531, "learning_rate": 2.1376509077089546e-08, "loss": 17.3019, "step": 486750 }, { "epoch": 0.9832859965174109, "grad_norm": 236.44725036621094, "learning_rate": 2.1344277792060275e-08, "loss": 12.0019, "step": 486760 }, { "epoch": 0.9833061971500947, "grad_norm": 31.286575317382812, "learning_rate": 2.1312070772395165e-08, "loss": 13.8529, "step": 486770 }, { "epoch": 0.9833263977827785, "grad_norm": 69.27201080322266, "learning_rate": 2.1279888018251317e-08, "loss": 21.4398, "step": 486780 }, { "epoch": 0.9833465984154623, "grad_norm": 474.1777038574219, "learning_rate": 2.1247729529785822e-08, "loss": 19.1745, "step": 486790 }, { "epoch": 0.9833667990481462, "grad_norm": 66.36163330078125, "learning_rate": 2.1215595307154667e-08, "loss": 9.6001, "step": 486800 }, { "epoch": 0.98338699968083, "grad_norm": 378.3507385253906, "learning_rate": 2.1183485350514397e-08, "loss": 12.8704, "step": 486810 }, { "epoch": 0.9834072003135138, "grad_norm": 382.94683837890625, "learning_rate": 2.1151399660022664e-08, "loss": 16.8565, "step": 486820 }, { "epoch": 0.9834274009461976, "grad_norm": 455.8929748535156, "learning_rate": 2.1119338235834897e-08, "loss": 13.8287, "step": 486830 }, { "epoch": 0.9834476015788814, "grad_norm": 188.6612091064453, "learning_rate": 2.1087301078107637e-08, "loss": 18.3839, "step": 486840 }, { "epoch": 0.9834678022115653, "grad_norm": 188.607666015625, "learning_rate": 2.105528818699687e-08, "loss": 15.9933, "step": 486850 }, { "epoch": 0.9834880028442491, "grad_norm": 273.63031005859375, "learning_rate": 2.1023299562658584e-08, "loss": 11.8442, "step": 486860 }, { "epoch": 0.9835082034769329, "grad_norm": 0.0, "learning_rate": 2.0991335205249318e-08, "loss": 15.2303, "step": 486870 }, { "epoch": 0.9835284041096167, "grad_norm": 430.9086608886719, "learning_rate": 2.0959395114923954e-08, "loss": 18.0465, "step": 486880 }, { "epoch": 0.9835486047423005, "grad_norm": 1019.9697265625, "learning_rate": 2.0927479291839024e-08, "loss": 18.0296, "step": 486890 }, { "epoch": 0.9835688053749844, "grad_norm": 237.91407775878906, "learning_rate": 2.0895587736149414e-08, "loss": 17.7302, "step": 486900 }, { "epoch": 0.9835890060076682, "grad_norm": 375.41259765625, "learning_rate": 2.0863720448011106e-08, "loss": 11.8425, "step": 486910 }, { "epoch": 0.983609206640352, "grad_norm": 411.8083190917969, "learning_rate": 2.0831877427578974e-08, "loss": 17.8789, "step": 486920 }, { "epoch": 0.9836294072730358, "grad_norm": 517.0942993164062, "learning_rate": 2.0800058675007894e-08, "loss": 27.8522, "step": 486930 }, { "epoch": 0.9836496079057196, "grad_norm": 201.32533264160156, "learning_rate": 2.076826419045386e-08, "loss": 12.8727, "step": 486940 }, { "epoch": 0.9836698085384035, "grad_norm": 190.80589294433594, "learning_rate": 2.0736493974071736e-08, "loss": 18.4179, "step": 486950 }, { "epoch": 0.9836900091710873, "grad_norm": 454.38873291015625, "learning_rate": 2.0704748026015298e-08, "loss": 12.353, "step": 486960 }, { "epoch": 0.9837102098037711, "grad_norm": 261.8936767578125, "learning_rate": 2.0673026346440526e-08, "loss": 20.5521, "step": 486970 }, { "epoch": 0.9837304104364548, "grad_norm": 331.35302734375, "learning_rate": 2.0641328935501748e-08, "loss": 34.3284, "step": 486980 }, { "epoch": 0.9837506110691386, "grad_norm": 309.5794372558594, "learning_rate": 2.0609655793352724e-08, "loss": 17.0877, "step": 486990 }, { "epoch": 0.9837708117018225, "grad_norm": 479.65631103515625, "learning_rate": 2.057800692014833e-08, "loss": 17.5659, "step": 487000 }, { "epoch": 0.9837910123345063, "grad_norm": 251.328369140625, "learning_rate": 2.054638231604289e-08, "loss": 18.0924, "step": 487010 }, { "epoch": 0.9838112129671901, "grad_norm": 547.3744506835938, "learning_rate": 2.051478198119017e-08, "loss": 17.2195, "step": 487020 }, { "epoch": 0.9838314135998739, "grad_norm": 148.19195556640625, "learning_rate": 2.0483205915745042e-08, "loss": 20.8214, "step": 487030 }, { "epoch": 0.9838516142325577, "grad_norm": 314.1408386230469, "learning_rate": 2.0451654119860164e-08, "loss": 19.398, "step": 487040 }, { "epoch": 0.9838718148652416, "grad_norm": 207.7755584716797, "learning_rate": 2.0420126593690416e-08, "loss": 11.6213, "step": 487050 }, { "epoch": 0.9838920154979254, "grad_norm": 164.70834350585938, "learning_rate": 2.0388623337389003e-08, "loss": 28.0964, "step": 487060 }, { "epoch": 0.9839122161306092, "grad_norm": 182.27505493164062, "learning_rate": 2.0357144351109693e-08, "loss": 11.2916, "step": 487070 }, { "epoch": 0.983932416763293, "grad_norm": 1601.4581298828125, "learning_rate": 2.0325689635005142e-08, "loss": 22.9281, "step": 487080 }, { "epoch": 0.9839526173959768, "grad_norm": 120.6426773071289, "learning_rate": 2.029425918922967e-08, "loss": 13.2163, "step": 487090 }, { "epoch": 0.9839728180286607, "grad_norm": 179.578369140625, "learning_rate": 2.026285301393538e-08, "loss": 12.8361, "step": 487100 }, { "epoch": 0.9839930186613445, "grad_norm": 227.57310485839844, "learning_rate": 2.023147110927659e-08, "loss": 12.8364, "step": 487110 }, { "epoch": 0.9840132192940283, "grad_norm": 216.75086975097656, "learning_rate": 2.020011347540596e-08, "loss": 17.3644, "step": 487120 }, { "epoch": 0.9840334199267121, "grad_norm": 250.1388702392578, "learning_rate": 2.016878011247503e-08, "loss": 20.2271, "step": 487130 }, { "epoch": 0.9840536205593959, "grad_norm": 75.4766616821289, "learning_rate": 2.013747102063812e-08, "loss": 10.7925, "step": 487140 }, { "epoch": 0.9840738211920798, "grad_norm": 337.7696838378906, "learning_rate": 2.010618620004734e-08, "loss": 17.7548, "step": 487150 }, { "epoch": 0.9840940218247636, "grad_norm": 175.10000610351562, "learning_rate": 2.0074925650854226e-08, "loss": 20.6679, "step": 487160 }, { "epoch": 0.9841142224574474, "grad_norm": 113.11949920654297, "learning_rate": 2.004368937321255e-08, "loss": 10.9412, "step": 487170 }, { "epoch": 0.9841344230901312, "grad_norm": 260.4883117675781, "learning_rate": 2.0012477367273854e-08, "loss": 11.9149, "step": 487180 }, { "epoch": 0.984154623722815, "grad_norm": 276.24774169921875, "learning_rate": 1.9981289633190237e-08, "loss": 19.1066, "step": 487190 }, { "epoch": 0.9841748243554989, "grad_norm": 267.4715881347656, "learning_rate": 1.995012617111436e-08, "loss": 13.6057, "step": 487200 }, { "epoch": 0.9841950249881827, "grad_norm": 382.16961669921875, "learning_rate": 1.9918986981196653e-08, "loss": 21.8432, "step": 487210 }, { "epoch": 0.9842152256208665, "grad_norm": 321.4910583496094, "learning_rate": 1.988787206359033e-08, "loss": 17.212, "step": 487220 }, { "epoch": 0.9842354262535503, "grad_norm": 383.756591796875, "learning_rate": 1.985678141844638e-08, "loss": 13.3955, "step": 487230 }, { "epoch": 0.984255626886234, "grad_norm": 275.9746398925781, "learning_rate": 1.9825715045916905e-08, "loss": 20.2973, "step": 487240 }, { "epoch": 0.9842758275189178, "grad_norm": 169.0533447265625, "learning_rate": 1.9794672946152337e-08, "loss": 17.8191, "step": 487250 }, { "epoch": 0.9842960281516017, "grad_norm": 301.2450866699219, "learning_rate": 1.9763655119304227e-08, "loss": 11.1227, "step": 487260 }, { "epoch": 0.9843162287842855, "grad_norm": 179.3243865966797, "learning_rate": 1.973266156552467e-08, "loss": 20.714, "step": 487270 }, { "epoch": 0.9843364294169693, "grad_norm": 629.1900024414062, "learning_rate": 1.9701692284963547e-08, "loss": 21.4526, "step": 487280 }, { "epoch": 0.9843566300496531, "grad_norm": 495.31561279296875, "learning_rate": 1.967074727777296e-08, "loss": 26.6525, "step": 487290 }, { "epoch": 0.984376830682337, "grad_norm": 18.64380645751953, "learning_rate": 1.963982654410279e-08, "loss": 11.236, "step": 487300 }, { "epoch": 0.9843970313150208, "grad_norm": 253.85336303710938, "learning_rate": 1.9608930084104027e-08, "loss": 13.1734, "step": 487310 }, { "epoch": 0.9844172319477046, "grad_norm": 320.97882080078125, "learning_rate": 1.9578057897927104e-08, "loss": 21.7728, "step": 487320 }, { "epoch": 0.9844374325803884, "grad_norm": 320.5849914550781, "learning_rate": 1.9547209985723015e-08, "loss": 15.2387, "step": 487330 }, { "epoch": 0.9844576332130722, "grad_norm": 160.2686767578125, "learning_rate": 1.9516386347641636e-08, "loss": 30.8996, "step": 487340 }, { "epoch": 0.984477833845756, "grad_norm": 55.73615646362305, "learning_rate": 1.9485586983833404e-08, "loss": 16.7139, "step": 487350 }, { "epoch": 0.9844980344784399, "grad_norm": 243.82052612304688, "learning_rate": 1.94548118944482e-08, "loss": 8.9158, "step": 487360 }, { "epoch": 0.9845182351111237, "grad_norm": 200.86695861816406, "learning_rate": 1.9424061079636458e-08, "loss": 20.3272, "step": 487370 }, { "epoch": 0.9845384357438075, "grad_norm": 431.8441162109375, "learning_rate": 1.9393334539547505e-08, "loss": 18.077, "step": 487380 }, { "epoch": 0.9845586363764913, "grad_norm": 498.93487548828125, "learning_rate": 1.9362632274331215e-08, "loss": 24.6046, "step": 487390 }, { "epoch": 0.9845788370091751, "grad_norm": 226.7593536376953, "learning_rate": 1.9331954284137476e-08, "loss": 11.3064, "step": 487400 }, { "epoch": 0.984599037641859, "grad_norm": 512.7572021484375, "learning_rate": 1.9301300569116165e-08, "loss": 10.7101, "step": 487410 }, { "epoch": 0.9846192382745428, "grad_norm": 370.7438049316406, "learning_rate": 1.9270671129415496e-08, "loss": 23.7001, "step": 487420 }, { "epoch": 0.9846394389072266, "grad_norm": 154.5172119140625, "learning_rate": 1.9240065965185907e-08, "loss": 13.9947, "step": 487430 }, { "epoch": 0.9846596395399104, "grad_norm": 373.3928527832031, "learning_rate": 1.9209485076576718e-08, "loss": 19.3025, "step": 487440 }, { "epoch": 0.9846798401725942, "grad_norm": 524.5455322265625, "learning_rate": 1.9178928463735593e-08, "loss": 17.3592, "step": 487450 }, { "epoch": 0.9847000408052781, "grad_norm": 0.0, "learning_rate": 1.9148396126812407e-08, "loss": 21.9097, "step": 487460 }, { "epoch": 0.9847202414379619, "grad_norm": 153.7161102294922, "learning_rate": 1.9117888065955938e-08, "loss": 7.4523, "step": 487470 }, { "epoch": 0.9847404420706457, "grad_norm": 413.46221923828125, "learning_rate": 1.908740428131495e-08, "loss": 15.7865, "step": 487480 }, { "epoch": 0.9847606427033294, "grad_norm": 6.001185894012451, "learning_rate": 1.9056944773037656e-08, "loss": 14.429, "step": 487490 }, { "epoch": 0.9847808433360132, "grad_norm": 147.7904510498047, "learning_rate": 1.9026509541272276e-08, "loss": 12.247, "step": 487500 }, { "epoch": 0.984801043968697, "grad_norm": 375.27838134765625, "learning_rate": 1.8996098586168132e-08, "loss": 8.6651, "step": 487510 }, { "epoch": 0.9848212446013809, "grad_norm": 250.44415283203125, "learning_rate": 1.8965711907872885e-08, "loss": 23.1334, "step": 487520 }, { "epoch": 0.9848414452340647, "grad_norm": 329.9169006347656, "learning_rate": 1.8935349506534195e-08, "loss": 24.2585, "step": 487530 }, { "epoch": 0.9848616458667485, "grad_norm": 485.7986755371094, "learning_rate": 1.890501138230083e-08, "loss": 28.2031, "step": 487540 }, { "epoch": 0.9848818464994323, "grad_norm": 291.9529724121094, "learning_rate": 1.8874697535319897e-08, "loss": 14.7063, "step": 487550 }, { "epoch": 0.9849020471321162, "grad_norm": 431.1380615234375, "learning_rate": 1.8844407965740165e-08, "loss": 19.2102, "step": 487560 }, { "epoch": 0.9849222477648, "grad_norm": 294.2987976074219, "learning_rate": 1.881414267370818e-08, "loss": 26.2314, "step": 487570 }, { "epoch": 0.9849424483974838, "grad_norm": 241.67819213867188, "learning_rate": 1.8783901659372162e-08, "loss": 12.028, "step": 487580 }, { "epoch": 0.9849626490301676, "grad_norm": 168.99851989746094, "learning_rate": 1.875368492287921e-08, "loss": 13.3972, "step": 487590 }, { "epoch": 0.9849828496628514, "grad_norm": 395.69635009765625, "learning_rate": 1.8723492464376992e-08, "loss": 16.6246, "step": 487600 }, { "epoch": 0.9850030502955353, "grad_norm": 239.0929412841797, "learning_rate": 1.8693324284011495e-08, "loss": 30.1394, "step": 487610 }, { "epoch": 0.9850232509282191, "grad_norm": 283.23016357421875, "learning_rate": 1.8663180381931488e-08, "loss": 18.1366, "step": 487620 }, { "epoch": 0.9850434515609029, "grad_norm": 0.0, "learning_rate": 1.8633060758282418e-08, "loss": 13.3518, "step": 487630 }, { "epoch": 0.9850636521935867, "grad_norm": 390.0324401855469, "learning_rate": 1.860296541321138e-08, "loss": 14.8397, "step": 487640 }, { "epoch": 0.9850838528262705, "grad_norm": 193.6684112548828, "learning_rate": 1.8572894346866043e-08, "loss": 6.3827, "step": 487650 }, { "epoch": 0.9851040534589544, "grad_norm": 249.24124145507812, "learning_rate": 1.854284755939184e-08, "loss": 32.1922, "step": 487660 }, { "epoch": 0.9851242540916382, "grad_norm": 5.052976131439209, "learning_rate": 1.8512825050935323e-08, "loss": 14.5054, "step": 487670 }, { "epoch": 0.985144454724322, "grad_norm": 251.10342407226562, "learning_rate": 1.8482826821643596e-08, "loss": 36.6727, "step": 487680 }, { "epoch": 0.9851646553570058, "grad_norm": 87.64988708496094, "learning_rate": 1.8452852871662653e-08, "loss": 8.2042, "step": 487690 }, { "epoch": 0.9851848559896896, "grad_norm": 18.71599006652832, "learning_rate": 1.842290320113793e-08, "loss": 21.0584, "step": 487700 }, { "epoch": 0.9852050566223735, "grad_norm": 276.3898620605469, "learning_rate": 1.839297781021543e-08, "loss": 24.6668, "step": 487710 }, { "epoch": 0.9852252572550573, "grad_norm": 216.9844512939453, "learning_rate": 1.8363076699041695e-08, "loss": 16.0389, "step": 487720 }, { "epoch": 0.9852454578877411, "grad_norm": 240.8494415283203, "learning_rate": 1.8333199867762163e-08, "loss": 15.7165, "step": 487730 }, { "epoch": 0.9852656585204249, "grad_norm": 479.3291931152344, "learning_rate": 1.830334731652228e-08, "loss": 24.8963, "step": 487740 }, { "epoch": 0.9852858591531086, "grad_norm": 6.0796098709106445, "learning_rate": 1.8273519045468035e-08, "loss": 11.4292, "step": 487750 }, { "epoch": 0.9853060597857924, "grad_norm": 55.927310943603516, "learning_rate": 1.8243715054744315e-08, "loss": 19.4176, "step": 487760 }, { "epoch": 0.9853262604184763, "grad_norm": 225.20668029785156, "learning_rate": 1.8213935344496002e-08, "loss": 12.6681, "step": 487770 }, { "epoch": 0.9853464610511601, "grad_norm": 70.58676147460938, "learning_rate": 1.8184179914869093e-08, "loss": 10.7999, "step": 487780 }, { "epoch": 0.9853666616838439, "grad_norm": 230.5763702392578, "learning_rate": 1.815444876600847e-08, "loss": 12.6158, "step": 487790 }, { "epoch": 0.9853868623165277, "grad_norm": 47.597259521484375, "learning_rate": 1.8124741898058462e-08, "loss": 15.8652, "step": 487800 }, { "epoch": 0.9854070629492115, "grad_norm": 160.1806182861328, "learning_rate": 1.8095059311164508e-08, "loss": 15.9682, "step": 487810 }, { "epoch": 0.9854272635818954, "grad_norm": 55.63274002075195, "learning_rate": 1.8065401005470938e-08, "loss": 17.0484, "step": 487820 }, { "epoch": 0.9854474642145792, "grad_norm": 624.5726928710938, "learning_rate": 1.803576698112264e-08, "loss": 20.1198, "step": 487830 }, { "epoch": 0.985467664847263, "grad_norm": 268.1905517578125, "learning_rate": 1.8006157238263376e-08, "loss": 22.8521, "step": 487840 }, { "epoch": 0.9854878654799468, "grad_norm": 285.8241271972656, "learning_rate": 1.7976571777038044e-08, "loss": 14.8861, "step": 487850 }, { "epoch": 0.9855080661126306, "grad_norm": 328.4594421386719, "learning_rate": 1.7947010597590408e-08, "loss": 6.3186, "step": 487860 }, { "epoch": 0.9855282667453145, "grad_norm": 341.7926940917969, "learning_rate": 1.791747370006536e-08, "loss": 21.5587, "step": 487870 }, { "epoch": 0.9855484673779983, "grad_norm": 234.67457580566406, "learning_rate": 1.7887961084605554e-08, "loss": 18.5226, "step": 487880 }, { "epoch": 0.9855686680106821, "grad_norm": 202.00758361816406, "learning_rate": 1.7858472751355883e-08, "loss": 19.0618, "step": 487890 }, { "epoch": 0.9855888686433659, "grad_norm": 240.0037384033203, "learning_rate": 1.7829008700460116e-08, "loss": 26.7407, "step": 487900 }, { "epoch": 0.9856090692760497, "grad_norm": 428.3014831542969, "learning_rate": 1.779956893206092e-08, "loss": 24.3507, "step": 487910 }, { "epoch": 0.9856292699087336, "grad_norm": 277.4865417480469, "learning_rate": 1.7770153446302618e-08, "loss": 16.4346, "step": 487920 }, { "epoch": 0.9856494705414174, "grad_norm": 143.16390991210938, "learning_rate": 1.7740762243328435e-08, "loss": 15.1754, "step": 487930 }, { "epoch": 0.9856696711741012, "grad_norm": 261.26666259765625, "learning_rate": 1.7711395323281588e-08, "loss": 17.8769, "step": 487940 }, { "epoch": 0.985689871806785, "grad_norm": 332.67401123046875, "learning_rate": 1.768205268630474e-08, "loss": 11.723, "step": 487950 }, { "epoch": 0.9857100724394688, "grad_norm": 798.5401000976562, "learning_rate": 1.765273433254111e-08, "loss": 12.4922, "step": 487960 }, { "epoch": 0.9857302730721527, "grad_norm": 474.3741149902344, "learning_rate": 1.7623440262134472e-08, "loss": 28.1277, "step": 487970 }, { "epoch": 0.9857504737048365, "grad_norm": 431.0119934082031, "learning_rate": 1.759417047522638e-08, "loss": 21.4646, "step": 487980 }, { "epoch": 0.9857706743375203, "grad_norm": 162.62039184570312, "learning_rate": 1.756492497196005e-08, "loss": 26.5215, "step": 487990 }, { "epoch": 0.985790874970204, "grad_norm": 321.650634765625, "learning_rate": 1.753570375247815e-08, "loss": 21.9883, "step": 488000 }, { "epoch": 0.9858110756028878, "grad_norm": 0.0, "learning_rate": 1.7506506816923342e-08, "loss": 25.8632, "step": 488010 }, { "epoch": 0.9858312762355717, "grad_norm": 307.39825439453125, "learning_rate": 1.747733416543662e-08, "loss": 9.4321, "step": 488020 }, { "epoch": 0.9858514768682555, "grad_norm": 279.2649841308594, "learning_rate": 1.7448185798161765e-08, "loss": 11.7379, "step": 488030 }, { "epoch": 0.9858716775009393, "grad_norm": 608.5477294921875, "learning_rate": 1.741906171523977e-08, "loss": 22.1897, "step": 488040 }, { "epoch": 0.9858918781336231, "grad_norm": 325.70849609375, "learning_rate": 1.73899619168133e-08, "loss": 11.8657, "step": 488050 }, { "epoch": 0.9859120787663069, "grad_norm": 14.203507423400879, "learning_rate": 1.7360886403023358e-08, "loss": 8.2065, "step": 488060 }, { "epoch": 0.9859322793989908, "grad_norm": 117.48062133789062, "learning_rate": 1.7331835174012602e-08, "loss": 14.0592, "step": 488070 }, { "epoch": 0.9859524800316746, "grad_norm": 118.30371856689453, "learning_rate": 1.7302808229921476e-08, "loss": 9.5321, "step": 488080 }, { "epoch": 0.9859726806643584, "grad_norm": 0.0, "learning_rate": 1.7273805570892643e-08, "loss": 17.0119, "step": 488090 }, { "epoch": 0.9859928812970422, "grad_norm": 230.1846466064453, "learning_rate": 1.7244827197067103e-08, "loss": 11.6632, "step": 488100 }, { "epoch": 0.986013081929726, "grad_norm": 950.8536987304688, "learning_rate": 1.7215873108585858e-08, "loss": 39.8514, "step": 488110 }, { "epoch": 0.9860332825624099, "grad_norm": 357.2371520996094, "learning_rate": 1.71869433055899e-08, "loss": 15.9341, "step": 488120 }, { "epoch": 0.9860534831950937, "grad_norm": 358.3096618652344, "learning_rate": 1.7158037788220782e-08, "loss": 13.2133, "step": 488130 }, { "epoch": 0.9860736838277775, "grad_norm": 325.31451416015625, "learning_rate": 1.7129156556618398e-08, "loss": 22.9959, "step": 488140 }, { "epoch": 0.9860938844604613, "grad_norm": 290.09014892578125, "learning_rate": 1.7100299610924297e-08, "loss": 15.1126, "step": 488150 }, { "epoch": 0.9861140850931451, "grad_norm": 303.61358642578125, "learning_rate": 1.707146695127948e-08, "loss": 9.4876, "step": 488160 }, { "epoch": 0.986134285725829, "grad_norm": 228.52444458007812, "learning_rate": 1.7042658577823833e-08, "loss": 31.0255, "step": 488170 }, { "epoch": 0.9861544863585128, "grad_norm": 215.50685119628906, "learning_rate": 1.7013874490697802e-08, "loss": 10.9896, "step": 488180 }, { "epoch": 0.9861746869911966, "grad_norm": 194.21315002441406, "learning_rate": 1.6985114690041825e-08, "loss": 18.2948, "step": 488190 }, { "epoch": 0.9861948876238804, "grad_norm": 199.46400451660156, "learning_rate": 1.6956379175995796e-08, "loss": 6.3744, "step": 488200 }, { "epoch": 0.9862150882565642, "grad_norm": 154.3696746826172, "learning_rate": 1.6927667948700155e-08, "loss": 12.2599, "step": 488210 }, { "epoch": 0.986235288889248, "grad_norm": 0.0, "learning_rate": 1.689898100829479e-08, "loss": 11.7021, "step": 488220 }, { "epoch": 0.9862554895219319, "grad_norm": 373.691162109375, "learning_rate": 1.687031835491959e-08, "loss": 24.5346, "step": 488230 }, { "epoch": 0.9862756901546157, "grad_norm": 129.77389526367188, "learning_rate": 1.6841679988713332e-08, "loss": 13.8806, "step": 488240 }, { "epoch": 0.9862958907872995, "grad_norm": 200.0734100341797, "learning_rate": 1.681306590981702e-08, "loss": 20.9077, "step": 488250 }, { "epoch": 0.9863160914199832, "grad_norm": 319.417236328125, "learning_rate": 1.678447611836942e-08, "loss": 20.4569, "step": 488260 }, { "epoch": 0.986336292052667, "grad_norm": 203.24192810058594, "learning_rate": 1.6755910614509872e-08, "loss": 13.9135, "step": 488270 }, { "epoch": 0.9863564926853509, "grad_norm": 249.82212829589844, "learning_rate": 1.6727369398377158e-08, "loss": 19.4762, "step": 488280 }, { "epoch": 0.9863766933180347, "grad_norm": 372.3239440917969, "learning_rate": 1.669885247011116e-08, "loss": 11.3219, "step": 488290 }, { "epoch": 0.9863968939507185, "grad_norm": 346.02032470703125, "learning_rate": 1.6670359829850657e-08, "loss": 23.3714, "step": 488300 }, { "epoch": 0.9864170945834023, "grad_norm": 183.74671936035156, "learning_rate": 1.664189147773443e-08, "loss": 11.1664, "step": 488310 }, { "epoch": 0.9864372952160861, "grad_norm": 141.95510864257812, "learning_rate": 1.6613447413900696e-08, "loss": 17.8322, "step": 488320 }, { "epoch": 0.98645749584877, "grad_norm": 208.08604431152344, "learning_rate": 1.6585027638489347e-08, "loss": 17.2616, "step": 488330 }, { "epoch": 0.9864776964814538, "grad_norm": 189.37168884277344, "learning_rate": 1.655663215163805e-08, "loss": 8.0663, "step": 488340 }, { "epoch": 0.9864978971141376, "grad_norm": 154.3152313232422, "learning_rate": 1.6528260953484476e-08, "loss": 16.4016, "step": 488350 }, { "epoch": 0.9865180977468214, "grad_norm": 193.22824096679688, "learning_rate": 1.6499914044168508e-08, "loss": 13.7439, "step": 488360 }, { "epoch": 0.9865382983795052, "grad_norm": 146.596435546875, "learning_rate": 1.6471591423827817e-08, "loss": 14.3777, "step": 488370 }, { "epoch": 0.9865584990121891, "grad_norm": 277.32025146484375, "learning_rate": 1.644329309259951e-08, "loss": 23.4403, "step": 488380 }, { "epoch": 0.9865786996448729, "grad_norm": 207.35980224609375, "learning_rate": 1.6415019050622373e-08, "loss": 20.5025, "step": 488390 }, { "epoch": 0.9865989002775567, "grad_norm": 347.7386779785156, "learning_rate": 1.6386769298034067e-08, "loss": 16.721, "step": 488400 }, { "epoch": 0.9866191009102405, "grad_norm": 334.52081298828125, "learning_rate": 1.635854383497226e-08, "loss": 10.623, "step": 488410 }, { "epoch": 0.9866393015429243, "grad_norm": 184.0161590576172, "learning_rate": 1.6330342661574072e-08, "loss": 8.1553, "step": 488420 }, { "epoch": 0.9866595021756082, "grad_norm": 477.0707092285156, "learning_rate": 1.6302165777977718e-08, "loss": 15.1807, "step": 488430 }, { "epoch": 0.986679702808292, "grad_norm": 603.2002563476562, "learning_rate": 1.6274013184319757e-08, "loss": 23.8915, "step": 488440 }, { "epoch": 0.9866999034409758, "grad_norm": 259.2967224121094, "learning_rate": 1.6245884880738415e-08, "loss": 9.3005, "step": 488450 }, { "epoch": 0.9867201040736596, "grad_norm": 37.519474029541016, "learning_rate": 1.621778086736969e-08, "loss": 17.446, "step": 488460 }, { "epoch": 0.9867403047063434, "grad_norm": 290.7611999511719, "learning_rate": 1.6189701144351254e-08, "loss": 18.5315, "step": 488470 }, { "epoch": 0.9867605053390273, "grad_norm": 635.7722778320312, "learning_rate": 1.6161645711819664e-08, "loss": 28.1317, "step": 488480 }, { "epoch": 0.9867807059717111, "grad_norm": 97.23357391357422, "learning_rate": 1.6133614569912027e-08, "loss": 13.3002, "step": 488490 }, { "epoch": 0.9868009066043949, "grad_norm": 935.350830078125, "learning_rate": 1.610560771876435e-08, "loss": 19.8366, "step": 488500 }, { "epoch": 0.9868211072370787, "grad_norm": 208.09727478027344, "learning_rate": 1.607762515851319e-08, "loss": 15.4066, "step": 488510 }, { "epoch": 0.9868413078697624, "grad_norm": 336.455078125, "learning_rate": 1.6049666889295657e-08, "loss": 22.3513, "step": 488520 }, { "epoch": 0.9868615085024462, "grad_norm": 0.0, "learning_rate": 1.6021732911247756e-08, "loss": 16.3491, "step": 488530 }, { "epoch": 0.9868817091351301, "grad_norm": 478.55029296875, "learning_rate": 1.5993823224504935e-08, "loss": 22.3994, "step": 488540 }, { "epoch": 0.9869019097678139, "grad_norm": 412.5328674316406, "learning_rate": 1.5965937829204302e-08, "loss": 18.8308, "step": 488550 }, { "epoch": 0.9869221104004977, "grad_norm": 117.3507080078125, "learning_rate": 1.5938076725480756e-08, "loss": 15.6856, "step": 488560 }, { "epoch": 0.9869423110331815, "grad_norm": 202.3659210205078, "learning_rate": 1.5910239913470292e-08, "loss": 9.3767, "step": 488570 }, { "epoch": 0.9869625116658653, "grad_norm": 101.88932800292969, "learning_rate": 1.5882427393309475e-08, "loss": 12.814, "step": 488580 }, { "epoch": 0.9869827122985492, "grad_norm": 293.5104064941406, "learning_rate": 1.585463916513319e-08, "loss": 13.3986, "step": 488590 }, { "epoch": 0.987002912931233, "grad_norm": 396.2603759765625, "learning_rate": 1.582687522907633e-08, "loss": 14.7102, "step": 488600 }, { "epoch": 0.9870231135639168, "grad_norm": 360.0671691894531, "learning_rate": 1.5799135585274906e-08, "loss": 23.0482, "step": 488610 }, { "epoch": 0.9870433141966006, "grad_norm": 468.65740966796875, "learning_rate": 1.5771420233864355e-08, "loss": 17.836, "step": 488620 }, { "epoch": 0.9870635148292844, "grad_norm": 145.20582580566406, "learning_rate": 1.5743729174979016e-08, "loss": 9.367, "step": 488630 }, { "epoch": 0.9870837154619683, "grad_norm": 340.53564453125, "learning_rate": 1.571606240875434e-08, "loss": 26.3723, "step": 488640 }, { "epoch": 0.9871039160946521, "grad_norm": 8.510629653930664, "learning_rate": 1.5688419935325216e-08, "loss": 10.1287, "step": 488650 }, { "epoch": 0.9871241167273359, "grad_norm": 99.44361877441406, "learning_rate": 1.5660801754825983e-08, "loss": 11.9617, "step": 488660 }, { "epoch": 0.9871443173600197, "grad_norm": 163.0398406982422, "learning_rate": 1.563320786739153e-08, "loss": 13.9467, "step": 488670 }, { "epoch": 0.9871645179927035, "grad_norm": 365.76470947265625, "learning_rate": 1.56056382731562e-08, "loss": 20.5186, "step": 488680 }, { "epoch": 0.9871847186253874, "grad_norm": 83.98016357421875, "learning_rate": 1.5578092972254875e-08, "loss": 16.4393, "step": 488690 }, { "epoch": 0.9872049192580712, "grad_norm": 113.0829849243164, "learning_rate": 1.5550571964820793e-08, "loss": 24.4147, "step": 488700 }, { "epoch": 0.987225119890755, "grad_norm": 82.530029296875, "learning_rate": 1.5523075250989395e-08, "loss": 6.054, "step": 488710 }, { "epoch": 0.9872453205234388, "grad_norm": 102.75135040283203, "learning_rate": 1.5495602830893354e-08, "loss": 22.1198, "step": 488720 }, { "epoch": 0.9872655211561226, "grad_norm": 521.2152709960938, "learning_rate": 1.546815470466756e-08, "loss": 35.0327, "step": 488730 }, { "epoch": 0.9872857217888065, "grad_norm": 69.88471984863281, "learning_rate": 1.5440730872445242e-08, "loss": 26.4921, "step": 488740 }, { "epoch": 0.9873059224214903, "grad_norm": 210.11477661132812, "learning_rate": 1.541333133436018e-08, "loss": 20.2044, "step": 488750 }, { "epoch": 0.9873261230541741, "grad_norm": 0.0, "learning_rate": 1.538595609054616e-08, "loss": 10.2439, "step": 488760 }, { "epoch": 0.9873463236868578, "grad_norm": 185.92269897460938, "learning_rate": 1.5358605141136407e-08, "loss": 16.2549, "step": 488770 }, { "epoch": 0.9873665243195416, "grad_norm": 130.92626953125, "learning_rate": 1.5331278486264144e-08, "loss": 11.7812, "step": 488780 }, { "epoch": 0.9873867249522255, "grad_norm": 868.0314331054688, "learning_rate": 1.53039761260626e-08, "loss": 12.8459, "step": 488790 }, { "epoch": 0.9874069255849093, "grad_norm": 553.2174682617188, "learning_rate": 1.5276698060665007e-08, "loss": 20.79, "step": 488800 }, { "epoch": 0.9874271262175931, "grad_norm": 190.422119140625, "learning_rate": 1.5249444290204584e-08, "loss": 22.2626, "step": 488810 }, { "epoch": 0.9874473268502769, "grad_norm": 213.2537841796875, "learning_rate": 1.5222214814812897e-08, "loss": 14.2655, "step": 488820 }, { "epoch": 0.9874675274829607, "grad_norm": 177.89871215820312, "learning_rate": 1.519500963462428e-08, "loss": 13.1173, "step": 488830 }, { "epoch": 0.9874877281156446, "grad_norm": 340.16180419921875, "learning_rate": 1.5167828749770853e-08, "loss": 18.5178, "step": 488840 }, { "epoch": 0.9875079287483284, "grad_norm": 0.0, "learning_rate": 1.5140672160384174e-08, "loss": 21.7909, "step": 488850 }, { "epoch": 0.9875281293810122, "grad_norm": 72.12262725830078, "learning_rate": 1.511353986659747e-08, "loss": 12.1323, "step": 488860 }, { "epoch": 0.987548330013696, "grad_norm": 119.32369995117188, "learning_rate": 1.508643186854286e-08, "loss": 11.6525, "step": 488870 }, { "epoch": 0.9875685306463798, "grad_norm": 15.226715087890625, "learning_rate": 1.505934816635246e-08, "loss": 17.8393, "step": 488880 }, { "epoch": 0.9875887312790637, "grad_norm": 317.7912292480469, "learning_rate": 1.503228876015783e-08, "loss": 8.1661, "step": 488890 }, { "epoch": 0.9876089319117475, "grad_norm": 242.69522094726562, "learning_rate": 1.500525365009109e-08, "loss": 23.9129, "step": 488900 }, { "epoch": 0.9876291325444313, "grad_norm": 283.3116760253906, "learning_rate": 1.4978242836284908e-08, "loss": 19.5075, "step": 488910 }, { "epoch": 0.9876493331771151, "grad_norm": 206.3544464111328, "learning_rate": 1.4951256318869733e-08, "loss": 17.1439, "step": 488920 }, { "epoch": 0.9876695338097989, "grad_norm": 216.89434814453125, "learning_rate": 1.4924294097977687e-08, "loss": 23.3198, "step": 488930 }, { "epoch": 0.9876897344424828, "grad_norm": 30.293317794799805, "learning_rate": 1.4897356173739774e-08, "loss": 8.9313, "step": 488940 }, { "epoch": 0.9877099350751666, "grad_norm": 446.0645751953125, "learning_rate": 1.4870442546287555e-08, "loss": 12.2558, "step": 488950 }, { "epoch": 0.9877301357078504, "grad_norm": 429.6090087890625, "learning_rate": 1.4843553215752037e-08, "loss": 6.9012, "step": 488960 }, { "epoch": 0.9877503363405342, "grad_norm": 241.11767578125, "learning_rate": 1.4816688182264782e-08, "loss": 28.3325, "step": 488970 }, { "epoch": 0.987770536973218, "grad_norm": 146.9576873779297, "learning_rate": 1.478984744595624e-08, "loss": 11.2696, "step": 488980 }, { "epoch": 0.9877907376059019, "grad_norm": 264.9900207519531, "learning_rate": 1.4763031006957417e-08, "loss": 18.4016, "step": 488990 }, { "epoch": 0.9878109382385857, "grad_norm": 46.155147552490234, "learning_rate": 1.4736238865398766e-08, "loss": 21.9187, "step": 489000 }, { "epoch": 0.9878311388712695, "grad_norm": 442.6540222167969, "learning_rate": 1.4709471021411293e-08, "loss": 23.4123, "step": 489010 }, { "epoch": 0.9878513395039533, "grad_norm": 304.92022705078125, "learning_rate": 1.4682727475124891e-08, "loss": 8.7293, "step": 489020 }, { "epoch": 0.987871540136637, "grad_norm": 366.9974365234375, "learning_rate": 1.4656008226670571e-08, "loss": 16.9375, "step": 489030 }, { "epoch": 0.9878917407693208, "grad_norm": 585.1427612304688, "learning_rate": 1.462931327617767e-08, "loss": 17.0381, "step": 489040 }, { "epoch": 0.9879119414020047, "grad_norm": 248.7589874267578, "learning_rate": 1.4602642623777752e-08, "loss": 16.0915, "step": 489050 }, { "epoch": 0.9879321420346885, "grad_norm": 436.0631103515625, "learning_rate": 1.4575996269599046e-08, "loss": 23.4857, "step": 489060 }, { "epoch": 0.9879523426673723, "grad_norm": 229.00271606445312, "learning_rate": 1.454937421377256e-08, "loss": 22.8875, "step": 489070 }, { "epoch": 0.9879725433000561, "grad_norm": 487.84930419921875, "learning_rate": 1.4522776456427635e-08, "loss": 12.5085, "step": 489080 }, { "epoch": 0.98799274393274, "grad_norm": 265.67041015625, "learning_rate": 1.4496202997694164e-08, "loss": 12.242, "step": 489090 }, { "epoch": 0.9880129445654238, "grad_norm": 365.039306640625, "learning_rate": 1.4469653837701491e-08, "loss": 20.5881, "step": 489100 }, { "epoch": 0.9880331451981076, "grad_norm": 148.48818969726562, "learning_rate": 1.4443128976579513e-08, "loss": 9.5963, "step": 489110 }, { "epoch": 0.9880533458307914, "grad_norm": 112.43424987792969, "learning_rate": 1.4416628414456457e-08, "loss": 15.4086, "step": 489120 }, { "epoch": 0.9880735464634752, "grad_norm": 338.1566162109375, "learning_rate": 1.4390152151462222e-08, "loss": 14.5803, "step": 489130 }, { "epoch": 0.988093747096159, "grad_norm": 154.32980346679688, "learning_rate": 1.4363700187725593e-08, "loss": 4.736, "step": 489140 }, { "epoch": 0.9881139477288429, "grad_norm": 88.50868225097656, "learning_rate": 1.4337272523375911e-08, "loss": 11.0857, "step": 489150 }, { "epoch": 0.9881341483615267, "grad_norm": 201.3150634765625, "learning_rate": 1.4310869158541408e-08, "loss": 12.3076, "step": 489160 }, { "epoch": 0.9881543489942105, "grad_norm": 176.51992797851562, "learning_rate": 1.4284490093351421e-08, "loss": 12.9454, "step": 489170 }, { "epoch": 0.9881745496268943, "grad_norm": 498.5993957519531, "learning_rate": 1.425813532793363e-08, "loss": 9.4879, "step": 489180 }, { "epoch": 0.9881947502595781, "grad_norm": 326.467529296875, "learning_rate": 1.4231804862417375e-08, "loss": 14.8705, "step": 489190 }, { "epoch": 0.988214950892262, "grad_norm": 252.15155029296875, "learning_rate": 1.4205498696930332e-08, "loss": 10.7357, "step": 489200 }, { "epoch": 0.9882351515249458, "grad_norm": 242.57083129882812, "learning_rate": 1.4179216831601284e-08, "loss": 17.2078, "step": 489210 }, { "epoch": 0.9882553521576296, "grad_norm": 63.63706588745117, "learning_rate": 1.4152959266557354e-08, "loss": 8.6665, "step": 489220 }, { "epoch": 0.9882755527903134, "grad_norm": 281.6227111816406, "learning_rate": 1.4126726001927882e-08, "loss": 14.7733, "step": 489230 }, { "epoch": 0.9882957534229972, "grad_norm": 221.778076171875, "learning_rate": 1.4100517037839989e-08, "loss": 17.674, "step": 489240 }, { "epoch": 0.9883159540556811, "grad_norm": 52.52895736694336, "learning_rate": 1.4074332374421351e-08, "loss": 12.3258, "step": 489250 }, { "epoch": 0.9883361546883649, "grad_norm": 403.40509033203125, "learning_rate": 1.4048172011799643e-08, "loss": 24.0928, "step": 489260 }, { "epoch": 0.9883563553210487, "grad_norm": 154.4250030517578, "learning_rate": 1.4022035950102541e-08, "loss": 16.2239, "step": 489270 }, { "epoch": 0.9883765559537324, "grad_norm": 10.991928100585938, "learning_rate": 1.3995924189457167e-08, "loss": 13.4733, "step": 489280 }, { "epoch": 0.9883967565864162, "grad_norm": 225.697021484375, "learning_rate": 1.3969836729990637e-08, "loss": 12.9439, "step": 489290 }, { "epoch": 0.9884169572191001, "grad_norm": 50.451290130615234, "learning_rate": 1.3943773571831188e-08, "loss": 6.6792, "step": 489300 }, { "epoch": 0.9884371578517839, "grad_norm": 180.2344512939453, "learning_rate": 1.3917734715104269e-08, "loss": 28.7392, "step": 489310 }, { "epoch": 0.9884573584844677, "grad_norm": 481.76861572265625, "learning_rate": 1.3891720159938116e-08, "loss": 20.9584, "step": 489320 }, { "epoch": 0.9884775591171515, "grad_norm": 226.91014099121094, "learning_rate": 1.3865729906458735e-08, "loss": 15.3054, "step": 489330 }, { "epoch": 0.9884977597498353, "grad_norm": 404.64447021484375, "learning_rate": 1.3839763954792695e-08, "loss": 25.2543, "step": 489340 }, { "epoch": 0.9885179603825192, "grad_norm": 204.29371643066406, "learning_rate": 1.3813822305067115e-08, "loss": 16.9764, "step": 489350 }, { "epoch": 0.988538161015203, "grad_norm": 371.6661071777344, "learning_rate": 1.378790495740856e-08, "loss": 19.321, "step": 489360 }, { "epoch": 0.9885583616478868, "grad_norm": 472.3009033203125, "learning_rate": 1.376201191194304e-08, "loss": 16.0028, "step": 489370 }, { "epoch": 0.9885785622805706, "grad_norm": 213.62432861328125, "learning_rate": 1.3736143168796012e-08, "loss": 10.0731, "step": 489380 }, { "epoch": 0.9885987629132544, "grad_norm": 146.18003845214844, "learning_rate": 1.371029872809515e-08, "loss": 19.0526, "step": 489390 }, { "epoch": 0.9886189635459383, "grad_norm": 227.52987670898438, "learning_rate": 1.3684478589964801e-08, "loss": 16.331, "step": 489400 }, { "epoch": 0.9886391641786221, "grad_norm": 1322.4578857421875, "learning_rate": 1.3658682754532082e-08, "loss": 29.2223, "step": 489410 }, { "epoch": 0.9886593648113059, "grad_norm": 0.20874613523483276, "learning_rate": 1.3632911221921896e-08, "loss": 14.9838, "step": 489420 }, { "epoch": 0.9886795654439897, "grad_norm": 350.9067687988281, "learning_rate": 1.3607163992259697e-08, "loss": 11.9115, "step": 489430 }, { "epoch": 0.9886997660766735, "grad_norm": 310.6279602050781, "learning_rate": 1.3581441065672052e-08, "loss": 18.1364, "step": 489440 }, { "epoch": 0.9887199667093574, "grad_norm": 525.9887084960938, "learning_rate": 1.355574244228386e-08, "loss": 21.2615, "step": 489450 }, { "epoch": 0.9887401673420412, "grad_norm": 343.25640869140625, "learning_rate": 1.3530068122219464e-08, "loss": 29.8688, "step": 489460 }, { "epoch": 0.988760367974725, "grad_norm": 427.1761779785156, "learning_rate": 1.3504418105604877e-08, "loss": 16.783, "step": 489470 }, { "epoch": 0.9887805686074088, "grad_norm": 297.1617431640625, "learning_rate": 1.3478792392565553e-08, "loss": 17.5976, "step": 489480 }, { "epoch": 0.9888007692400926, "grad_norm": 110.7373046875, "learning_rate": 1.3453190983225285e-08, "loss": 13.126, "step": 489490 }, { "epoch": 0.9888209698727765, "grad_norm": 257.8731689453125, "learning_rate": 1.3427613877709523e-08, "loss": 21.7787, "step": 489500 }, { "epoch": 0.9888411705054603, "grad_norm": 227.19361877441406, "learning_rate": 1.3402061076142613e-08, "loss": 18.5559, "step": 489510 }, { "epoch": 0.9888613711381441, "grad_norm": 138.41561889648438, "learning_rate": 1.3376532578649459e-08, "loss": 13.6527, "step": 489520 }, { "epoch": 0.9888815717708279, "grad_norm": 222.69546508789062, "learning_rate": 1.3351028385354402e-08, "loss": 14.4402, "step": 489530 }, { "epoch": 0.9889017724035116, "grad_norm": 176.04368591308594, "learning_rate": 1.3325548496381235e-08, "loss": 17.616, "step": 489540 }, { "epoch": 0.9889219730361954, "grad_norm": 423.5249328613281, "learning_rate": 1.3300092911854856e-08, "loss": 10.7274, "step": 489550 }, { "epoch": 0.9889421736688793, "grad_norm": 90.23912811279297, "learning_rate": 1.3274661631899055e-08, "loss": 25.2611, "step": 489560 }, { "epoch": 0.9889623743015631, "grad_norm": 152.38360595703125, "learning_rate": 1.3249254656637622e-08, "loss": 10.2711, "step": 489570 }, { "epoch": 0.9889825749342469, "grad_norm": 206.1272430419922, "learning_rate": 1.3223871986194348e-08, "loss": 14.608, "step": 489580 }, { "epoch": 0.9890027755669307, "grad_norm": 253.0663604736328, "learning_rate": 1.3198513620693022e-08, "loss": 35.4088, "step": 489590 }, { "epoch": 0.9890229761996145, "grad_norm": 327.2695617675781, "learning_rate": 1.3173179560257432e-08, "loss": 11.8573, "step": 489600 }, { "epoch": 0.9890431768322984, "grad_norm": 242.9253387451172, "learning_rate": 1.314786980501137e-08, "loss": 6.221, "step": 489610 }, { "epoch": 0.9890633774649822, "grad_norm": 259.5450744628906, "learning_rate": 1.3122584355076962e-08, "loss": 11.6083, "step": 489620 }, { "epoch": 0.989083578097666, "grad_norm": 216.1023406982422, "learning_rate": 1.3097323210579104e-08, "loss": 18.5336, "step": 489630 }, { "epoch": 0.9891037787303498, "grad_norm": 232.20651245117188, "learning_rate": 1.307208637163937e-08, "loss": 21.1228, "step": 489640 }, { "epoch": 0.9891239793630336, "grad_norm": 3.4372758865356445, "learning_rate": 1.3046873838381546e-08, "loss": 11.8663, "step": 489650 }, { "epoch": 0.9891441799957175, "grad_norm": 899.5872192382812, "learning_rate": 1.3021685610928869e-08, "loss": 24.7065, "step": 489660 }, { "epoch": 0.9891643806284013, "grad_norm": 386.62091064453125, "learning_rate": 1.2996521689403463e-08, "loss": 17.0633, "step": 489670 }, { "epoch": 0.9891845812610851, "grad_norm": 213.47979736328125, "learning_rate": 1.2971382073928007e-08, "loss": 15.0888, "step": 489680 }, { "epoch": 0.9892047818937689, "grad_norm": 380.30621337890625, "learning_rate": 1.2946266764625182e-08, "loss": 14.2182, "step": 489690 }, { "epoch": 0.9892249825264527, "grad_norm": 230.2733917236328, "learning_rate": 1.292117576161711e-08, "loss": 33.2241, "step": 489700 }, { "epoch": 0.9892451831591366, "grad_norm": 164.525390625, "learning_rate": 1.2896109065027029e-08, "loss": 5.3284, "step": 489710 }, { "epoch": 0.9892653837918204, "grad_norm": 113.11135864257812, "learning_rate": 1.2871066674975951e-08, "loss": 13.0744, "step": 489720 }, { "epoch": 0.9892855844245042, "grad_norm": 158.61990356445312, "learning_rate": 1.2846048591586558e-08, "loss": 27.8961, "step": 489730 }, { "epoch": 0.989305785057188, "grad_norm": 58.93808364868164, "learning_rate": 1.2821054814980971e-08, "loss": 9.826, "step": 489740 }, { "epoch": 0.9893259856898718, "grad_norm": 145.59530639648438, "learning_rate": 1.2796085345280207e-08, "loss": 9.6687, "step": 489750 }, { "epoch": 0.9893461863225557, "grad_norm": 336.0180969238281, "learning_rate": 1.277114018260639e-08, "loss": 30.2326, "step": 489760 }, { "epoch": 0.9893663869552395, "grad_norm": 306.5389099121094, "learning_rate": 1.2746219327081644e-08, "loss": 6.7078, "step": 489770 }, { "epoch": 0.9893865875879233, "grad_norm": 277.2000427246094, "learning_rate": 1.2721322778826983e-08, "loss": 7.0898, "step": 489780 }, { "epoch": 0.989406788220607, "grad_norm": 85.2691421508789, "learning_rate": 1.2696450537963422e-08, "loss": 18.8508, "step": 489790 }, { "epoch": 0.9894269888532908, "grad_norm": 226.0152130126953, "learning_rate": 1.2671602604612531e-08, "loss": 22.8411, "step": 489800 }, { "epoch": 0.9894471894859747, "grad_norm": 248.8648223876953, "learning_rate": 1.2646778978895325e-08, "loss": 19.694, "step": 489810 }, { "epoch": 0.9894673901186585, "grad_norm": 239.36099243164062, "learning_rate": 1.2621979660932814e-08, "loss": 22.7057, "step": 489820 }, { "epoch": 0.9894875907513423, "grad_norm": 91.97760009765625, "learning_rate": 1.2597204650845463e-08, "loss": 8.3683, "step": 489830 }, { "epoch": 0.9895077913840261, "grad_norm": 277.6579895019531, "learning_rate": 1.2572453948755393e-08, "loss": 18.2378, "step": 489840 }, { "epoch": 0.9895279920167099, "grad_norm": 24.776805877685547, "learning_rate": 1.2547727554781398e-08, "loss": 17.9956, "step": 489850 }, { "epoch": 0.9895481926493938, "grad_norm": 221.5035858154297, "learning_rate": 1.2523025469045047e-08, "loss": 12.6799, "step": 489860 }, { "epoch": 0.9895683932820776, "grad_norm": 355.8934020996094, "learning_rate": 1.2498347691666801e-08, "loss": 11.9491, "step": 489870 }, { "epoch": 0.9895885939147614, "grad_norm": 164.27005004882812, "learning_rate": 1.2473694222766563e-08, "loss": 15.1054, "step": 489880 }, { "epoch": 0.9896087945474452, "grad_norm": 332.149169921875, "learning_rate": 1.2449065062464794e-08, "loss": 14.2905, "step": 489890 }, { "epoch": 0.989628995180129, "grad_norm": 554.3561401367188, "learning_rate": 1.2424460210881394e-08, "loss": 24.9038, "step": 489900 }, { "epoch": 0.9896491958128129, "grad_norm": 0.0, "learning_rate": 1.2399879668136271e-08, "loss": 13.2437, "step": 489910 }, { "epoch": 0.9896693964454967, "grad_norm": 185.58526611328125, "learning_rate": 1.2375323434348773e-08, "loss": 22.9642, "step": 489920 }, { "epoch": 0.9896895970781805, "grad_norm": 291.0820007324219, "learning_rate": 1.235079150963936e-08, "loss": 15.7851, "step": 489930 }, { "epoch": 0.9897097977108643, "grad_norm": 212.75608825683594, "learning_rate": 1.2326283894127378e-08, "loss": 35.3077, "step": 489940 }, { "epoch": 0.9897299983435481, "grad_norm": 297.8985900878906, "learning_rate": 1.2301800587932179e-08, "loss": 29.191, "step": 489950 }, { "epoch": 0.989750198976232, "grad_norm": 196.13829040527344, "learning_rate": 1.2277341591172553e-08, "loss": 17.0079, "step": 489960 }, { "epoch": 0.9897703996089158, "grad_norm": 356.3586120605469, "learning_rate": 1.225290690396841e-08, "loss": 16.3947, "step": 489970 }, { "epoch": 0.9897906002415996, "grad_norm": 0.7744454145431519, "learning_rate": 1.2228496526439093e-08, "loss": 14.7819, "step": 489980 }, { "epoch": 0.9898108008742834, "grad_norm": 5.643133640289307, "learning_rate": 1.2204110458702844e-08, "loss": 10.4795, "step": 489990 }, { "epoch": 0.9898310015069672, "grad_norm": 254.61962890625, "learning_rate": 1.2179748700879013e-08, "loss": 17.88, "step": 490000 }, { "epoch": 0.989851202139651, "grad_norm": 185.8321990966797, "learning_rate": 1.2155411253085835e-08, "loss": 12.9206, "step": 490010 }, { "epoch": 0.9898714027723349, "grad_norm": 260.7972106933594, "learning_rate": 1.2131098115442108e-08, "loss": 23.3891, "step": 490020 }, { "epoch": 0.9898916034050187, "grad_norm": 313.2037658691406, "learning_rate": 1.2106809288067178e-08, "loss": 20.5773, "step": 490030 }, { "epoch": 0.9899118040377025, "grad_norm": 73.95179748535156, "learning_rate": 1.208254477107762e-08, "loss": 12.1286, "step": 490040 }, { "epoch": 0.9899320046703862, "grad_norm": 21.208145141601562, "learning_rate": 1.2058304564593893e-08, "loss": 22.3142, "step": 490050 }, { "epoch": 0.98995220530307, "grad_norm": 206.740478515625, "learning_rate": 1.2034088668732568e-08, "loss": 17.1283, "step": 490060 }, { "epoch": 0.9899724059357539, "grad_norm": 181.3231964111328, "learning_rate": 1.2009897083611888e-08, "loss": 14.6414, "step": 490070 }, { "epoch": 0.9899926065684377, "grad_norm": 52.20246505737305, "learning_rate": 1.1985729809350088e-08, "loss": 9.1634, "step": 490080 }, { "epoch": 0.9900128072011215, "grad_norm": 257.18743896484375, "learning_rate": 1.1961586846064855e-08, "loss": 14.8102, "step": 490090 }, { "epoch": 0.9900330078338053, "grad_norm": 281.6833801269531, "learning_rate": 1.1937468193873869e-08, "loss": 14.4875, "step": 490100 }, { "epoch": 0.9900532084664891, "grad_norm": 182.42430114746094, "learning_rate": 1.1913373852894816e-08, "loss": 21.3874, "step": 490110 }, { "epoch": 0.990073409099173, "grad_norm": 2.4899678230285645, "learning_rate": 1.1889303823244825e-08, "loss": 6.1339, "step": 490120 }, { "epoch": 0.9900936097318568, "grad_norm": 116.8193588256836, "learning_rate": 1.1865258105041577e-08, "loss": 16.2697, "step": 490130 }, { "epoch": 0.9901138103645406, "grad_norm": 341.3250427246094, "learning_rate": 1.1841236698402202e-08, "loss": 26.4881, "step": 490140 }, { "epoch": 0.9901340109972244, "grad_norm": 106.97996520996094, "learning_rate": 1.1817239603443276e-08, "loss": 21.3249, "step": 490150 }, { "epoch": 0.9901542116299082, "grad_norm": 208.8172607421875, "learning_rate": 1.1793266820282478e-08, "loss": 11.2017, "step": 490160 }, { "epoch": 0.9901744122625921, "grad_norm": 224.62625122070312, "learning_rate": 1.1769318349036385e-08, "loss": 20.2307, "step": 490170 }, { "epoch": 0.9901946128952759, "grad_norm": 150.63478088378906, "learning_rate": 1.1745394189821013e-08, "loss": 9.0618, "step": 490180 }, { "epoch": 0.9902148135279597, "grad_norm": 304.30377197265625, "learning_rate": 1.1721494342754048e-08, "loss": 8.9648, "step": 490190 }, { "epoch": 0.9902350141606435, "grad_norm": 171.0780029296875, "learning_rate": 1.1697618807951504e-08, "loss": 10.7617, "step": 490200 }, { "epoch": 0.9902552147933273, "grad_norm": 37.07375717163086, "learning_rate": 1.1673767585529404e-08, "loss": 10.6067, "step": 490210 }, { "epoch": 0.9902754154260112, "grad_norm": 769.3895263671875, "learning_rate": 1.1649940675604876e-08, "loss": 23.3863, "step": 490220 }, { "epoch": 0.990295616058695, "grad_norm": 363.86407470703125, "learning_rate": 1.1626138078293381e-08, "loss": 14.0682, "step": 490230 }, { "epoch": 0.9903158166913788, "grad_norm": 389.760986328125, "learning_rate": 1.1602359793710938e-08, "loss": 22.7912, "step": 490240 }, { "epoch": 0.9903360173240626, "grad_norm": 193.2105255126953, "learning_rate": 1.1578605821973566e-08, "loss": 16.888, "step": 490250 }, { "epoch": 0.9903562179567464, "grad_norm": 214.14462280273438, "learning_rate": 1.1554876163197282e-08, "loss": 13.0855, "step": 490260 }, { "epoch": 0.9903764185894303, "grad_norm": 446.6460876464844, "learning_rate": 1.1531170817496995e-08, "loss": 12.9666, "step": 490270 }, { "epoch": 0.9903966192221141, "grad_norm": 491.61016845703125, "learning_rate": 1.1507489784989278e-08, "loss": 12.7472, "step": 490280 }, { "epoch": 0.9904168198547979, "grad_norm": 66.50450897216797, "learning_rate": 1.1483833065789041e-08, "loss": 14.7546, "step": 490290 }, { "epoch": 0.9904370204874817, "grad_norm": 416.74530029296875, "learning_rate": 1.146020066001119e-08, "loss": 15.681, "step": 490300 }, { "epoch": 0.9904572211201654, "grad_norm": 256.7451477050781, "learning_rate": 1.1436592567771188e-08, "loss": 21.3193, "step": 490310 }, { "epoch": 0.9904774217528493, "grad_norm": 308.0254211425781, "learning_rate": 1.1413008789184498e-08, "loss": 17.4518, "step": 490320 }, { "epoch": 0.9904976223855331, "grad_norm": 82.44973754882812, "learning_rate": 1.1389449324365476e-08, "loss": 15.4772, "step": 490330 }, { "epoch": 0.9905178230182169, "grad_norm": 99.13777923583984, "learning_rate": 1.1365914173429582e-08, "loss": 6.1406, "step": 490340 }, { "epoch": 0.9905380236509007, "grad_norm": 195.63157653808594, "learning_rate": 1.134240333649117e-08, "loss": 17.8515, "step": 490350 }, { "epoch": 0.9905582242835845, "grad_norm": 270.7568359375, "learning_rate": 1.1318916813664594e-08, "loss": 21.5513, "step": 490360 }, { "epoch": 0.9905784249162684, "grad_norm": 0.0, "learning_rate": 1.129545460506476e-08, "loss": 22.7828, "step": 490370 }, { "epoch": 0.9905986255489522, "grad_norm": 376.3780822753906, "learning_rate": 1.1272016710806021e-08, "loss": 21.6448, "step": 490380 }, { "epoch": 0.990618826181636, "grad_norm": 322.4913635253906, "learning_rate": 1.1248603131002178e-08, "loss": 38.267, "step": 490390 }, { "epoch": 0.9906390268143198, "grad_norm": 1179.2825927734375, "learning_rate": 1.1225213865767026e-08, "loss": 42.169, "step": 490400 }, { "epoch": 0.9906592274470036, "grad_norm": 152.80825805664062, "learning_rate": 1.1201848915216029e-08, "loss": 14.4051, "step": 490410 }, { "epoch": 0.9906794280796875, "grad_norm": 187.526123046875, "learning_rate": 1.1178508279461875e-08, "loss": 27.7123, "step": 490420 }, { "epoch": 0.9906996287123713, "grad_norm": 0.0, "learning_rate": 1.115519195861836e-08, "loss": 17.9001, "step": 490430 }, { "epoch": 0.9907198293450551, "grad_norm": 503.9375305175781, "learning_rate": 1.1131899952799285e-08, "loss": 19.0012, "step": 490440 }, { "epoch": 0.9907400299777389, "grad_norm": 437.102783203125, "learning_rate": 1.1108632262118446e-08, "loss": 25.6348, "step": 490450 }, { "epoch": 0.9907602306104227, "grad_norm": 0.0, "learning_rate": 1.1085388886689085e-08, "loss": 15.7962, "step": 490460 }, { "epoch": 0.9907804312431066, "grad_norm": 325.310546875, "learning_rate": 1.1062169826624447e-08, "loss": 24.1655, "step": 490470 }, { "epoch": 0.9908006318757904, "grad_norm": 68.74517059326172, "learning_rate": 1.1038975082037772e-08, "loss": 7.7549, "step": 490480 }, { "epoch": 0.9908208325084742, "grad_norm": 337.9579162597656, "learning_rate": 1.101580465304175e-08, "loss": 15.9894, "step": 490490 }, { "epoch": 0.990841033141158, "grad_norm": 636.4308471679688, "learning_rate": 1.0992658539750179e-08, "loss": 29.1541, "step": 490500 }, { "epoch": 0.9908612337738418, "grad_norm": 385.97589111328125, "learning_rate": 1.0969536742274633e-08, "loss": 23.8906, "step": 490510 }, { "epoch": 0.9908814344065257, "grad_norm": 6.2642412185668945, "learning_rate": 1.0946439260728914e-08, "loss": 8.0843, "step": 490520 }, { "epoch": 0.9909016350392095, "grad_norm": 15.482325553894043, "learning_rate": 1.0923366095225152e-08, "loss": 19.7717, "step": 490530 }, { "epoch": 0.9909218356718933, "grad_norm": 18.279489517211914, "learning_rate": 1.090031724587548e-08, "loss": 7.8719, "step": 490540 }, { "epoch": 0.9909420363045771, "grad_norm": 332.5791931152344, "learning_rate": 1.0877292712792586e-08, "loss": 17.9668, "step": 490550 }, { "epoch": 0.9909622369372608, "grad_norm": 477.405517578125, "learning_rate": 1.0854292496089158e-08, "loss": 17.2365, "step": 490560 }, { "epoch": 0.9909824375699446, "grad_norm": 2.2261149883270264, "learning_rate": 1.0831316595876218e-08, "loss": 14.3657, "step": 490570 }, { "epoch": 0.9910026382026285, "grad_norm": 244.87901306152344, "learning_rate": 1.0808365012266454e-08, "loss": 15.724, "step": 490580 }, { "epoch": 0.9910228388353123, "grad_norm": 147.3052215576172, "learning_rate": 1.0785437745371996e-08, "loss": 13.9779, "step": 490590 }, { "epoch": 0.9910430394679961, "grad_norm": 95.43013000488281, "learning_rate": 1.076253479530387e-08, "loss": 11.3153, "step": 490600 }, { "epoch": 0.9910632401006799, "grad_norm": 43.185733795166016, "learning_rate": 1.0739656162174205e-08, "loss": 15.0828, "step": 490610 }, { "epoch": 0.9910834407333637, "grad_norm": 595.6671142578125, "learning_rate": 1.0716801846094026e-08, "loss": 26.5095, "step": 490620 }, { "epoch": 0.9911036413660476, "grad_norm": 185.7697296142578, "learning_rate": 1.0693971847175466e-08, "loss": 15.6193, "step": 490630 }, { "epoch": 0.9911238419987314, "grad_norm": 520.8781127929688, "learning_rate": 1.067116616552899e-08, "loss": 21.0543, "step": 490640 }, { "epoch": 0.9911440426314152, "grad_norm": 152.95602416992188, "learning_rate": 1.0648384801266176e-08, "loss": 8.851, "step": 490650 }, { "epoch": 0.991164243264099, "grad_norm": 0.0, "learning_rate": 1.0625627754498048e-08, "loss": 8.961, "step": 490660 }, { "epoch": 0.9911844438967828, "grad_norm": 825.7657470703125, "learning_rate": 1.0602895025335624e-08, "loss": 14.3532, "step": 490670 }, { "epoch": 0.9912046445294667, "grad_norm": 333.7046813964844, "learning_rate": 1.0580186613888822e-08, "loss": 14.0829, "step": 490680 }, { "epoch": 0.9912248451621505, "grad_norm": 524.2979125976562, "learning_rate": 1.055750252026977e-08, "loss": 31.2251, "step": 490690 }, { "epoch": 0.9912450457948343, "grad_norm": 249.78887939453125, "learning_rate": 1.0534842744588381e-08, "loss": 27.6849, "step": 490700 }, { "epoch": 0.9912652464275181, "grad_norm": 287.83197021484375, "learning_rate": 1.0512207286954568e-08, "loss": 22.8785, "step": 490710 }, { "epoch": 0.9912854470602019, "grad_norm": 296.106689453125, "learning_rate": 1.0489596147479353e-08, "loss": 25.3628, "step": 490720 }, { "epoch": 0.9913056476928858, "grad_norm": 396.5429992675781, "learning_rate": 1.0467009326272648e-08, "loss": 12.7299, "step": 490730 }, { "epoch": 0.9913258483255696, "grad_norm": 408.876708984375, "learning_rate": 1.044444682344492e-08, "loss": 17.2851, "step": 490740 }, { "epoch": 0.9913460489582534, "grad_norm": 4.908615589141846, "learning_rate": 1.0421908639104971e-08, "loss": 26.2915, "step": 490750 }, { "epoch": 0.9913662495909372, "grad_norm": 38.23313903808594, "learning_rate": 1.039939477336438e-08, "loss": 9.5834, "step": 490760 }, { "epoch": 0.991386450223621, "grad_norm": 0.0, "learning_rate": 1.0376905226331391e-08, "loss": 19.0989, "step": 490770 }, { "epoch": 0.9914066508563049, "grad_norm": 260.5321350097656, "learning_rate": 1.0354439998116473e-08, "loss": 10.56, "step": 490780 }, { "epoch": 0.9914268514889887, "grad_norm": 462.5550231933594, "learning_rate": 1.0331999088828425e-08, "loss": 27.8829, "step": 490790 }, { "epoch": 0.9914470521216725, "grad_norm": 125.8621826171875, "learning_rate": 1.030958249857772e-08, "loss": 15.1149, "step": 490800 }, { "epoch": 0.9914672527543563, "grad_norm": 78.16096496582031, "learning_rate": 1.02871902274726e-08, "loss": 18.5501, "step": 490810 }, { "epoch": 0.99148745338704, "grad_norm": 183.17477416992188, "learning_rate": 1.026482227562242e-08, "loss": 16.0614, "step": 490820 }, { "epoch": 0.9915076540197238, "grad_norm": 194.7899169921875, "learning_rate": 1.0242478643136545e-08, "loss": 20.4395, "step": 490830 }, { "epoch": 0.9915278546524077, "grad_norm": 300.5813903808594, "learning_rate": 1.0220159330123214e-08, "loss": 13.6394, "step": 490840 }, { "epoch": 0.9915480552850915, "grad_norm": 528.572265625, "learning_rate": 1.0197864336691788e-08, "loss": 21.4617, "step": 490850 }, { "epoch": 0.9915682559177753, "grad_norm": 227.086669921875, "learning_rate": 1.0175593662951066e-08, "loss": 16.7332, "step": 490860 }, { "epoch": 0.9915884565504591, "grad_norm": 117.60448455810547, "learning_rate": 1.0153347309009299e-08, "loss": 13.3176, "step": 490870 }, { "epoch": 0.991608657183143, "grad_norm": 477.0719909667969, "learning_rate": 1.013112527497473e-08, "loss": 21.0266, "step": 490880 }, { "epoch": 0.9916288578158268, "grad_norm": 103.98390197753906, "learning_rate": 1.0108927560955606e-08, "loss": 13.0454, "step": 490890 }, { "epoch": 0.9916490584485106, "grad_norm": 402.55023193359375, "learning_rate": 1.008675416706073e-08, "loss": 18.175, "step": 490900 }, { "epoch": 0.9916692590811944, "grad_norm": 206.17750549316406, "learning_rate": 1.0064605093397794e-08, "loss": 21.9164, "step": 490910 }, { "epoch": 0.9916894597138782, "grad_norm": 459.20501708984375, "learning_rate": 1.0042480340075045e-08, "loss": 21.358, "step": 490920 }, { "epoch": 0.991709660346562, "grad_norm": 855.034912109375, "learning_rate": 1.0020379907199618e-08, "loss": 24.7808, "step": 490930 }, { "epoch": 0.9917298609792459, "grad_norm": 170.81808471679688, "learning_rate": 9.99830379487976e-09, "loss": 19.9852, "step": 490940 }, { "epoch": 0.9917500616119297, "grad_norm": 448.11920166015625, "learning_rate": 9.976252003223164e-09, "loss": 25.9088, "step": 490950 }, { "epoch": 0.9917702622446135, "grad_norm": 86.4518814086914, "learning_rate": 9.954224532336965e-09, "loss": 7.129, "step": 490960 }, { "epoch": 0.9917904628772973, "grad_norm": 391.43621826171875, "learning_rate": 9.932221382328299e-09, "loss": 11.2156, "step": 490970 }, { "epoch": 0.9918106635099811, "grad_norm": 115.07550048828125, "learning_rate": 9.91024255330486e-09, "loss": 12.0163, "step": 490980 }, { "epoch": 0.991830864142665, "grad_norm": 386.3694763183594, "learning_rate": 9.888288045374339e-09, "loss": 14.1917, "step": 490990 }, { "epoch": 0.9918510647753488, "grad_norm": 109.41740417480469, "learning_rate": 9.866357858642206e-09, "loss": 11.2098, "step": 491000 }, { "epoch": 0.9918712654080326, "grad_norm": 367.789306640625, "learning_rate": 9.844451993216708e-09, "loss": 19.6657, "step": 491010 }, { "epoch": 0.9918914660407164, "grad_norm": 444.3191833496094, "learning_rate": 9.822570449203873e-09, "loss": 14.5652, "step": 491020 }, { "epoch": 0.9919116666734002, "grad_norm": 186.89617919921875, "learning_rate": 9.800713226710834e-09, "loss": 12.7048, "step": 491030 }, { "epoch": 0.9919318673060841, "grad_norm": 108.09678649902344, "learning_rate": 9.77888032584362e-09, "loss": 17.9147, "step": 491040 }, { "epoch": 0.9919520679387679, "grad_norm": 289.07257080078125, "learning_rate": 9.757071746708812e-09, "loss": 12.7529, "step": 491050 }, { "epoch": 0.9919722685714517, "grad_norm": 173.3177490234375, "learning_rate": 9.735287489413547e-09, "loss": 10.0509, "step": 491060 }, { "epoch": 0.9919924692041354, "grad_norm": 126.21602630615234, "learning_rate": 9.71352755406274e-09, "loss": 9.7193, "step": 491070 }, { "epoch": 0.9920126698368192, "grad_norm": 212.0915985107422, "learning_rate": 9.691791940762418e-09, "loss": 23.766, "step": 491080 }, { "epoch": 0.9920328704695031, "grad_norm": 344.5935974121094, "learning_rate": 9.670080649619717e-09, "loss": 30.6248, "step": 491090 }, { "epoch": 0.9920530711021869, "grad_norm": 753.1189575195312, "learning_rate": 9.64839368074011e-09, "loss": 22.1914, "step": 491100 }, { "epoch": 0.9920732717348707, "grad_norm": 419.9007263183594, "learning_rate": 9.626731034227954e-09, "loss": 16.5807, "step": 491110 }, { "epoch": 0.9920934723675545, "grad_norm": 190.58302307128906, "learning_rate": 9.605092710190943e-09, "loss": 14.3772, "step": 491120 }, { "epoch": 0.9921136730002383, "grad_norm": 386.9684753417969, "learning_rate": 9.583478708732886e-09, "loss": 15.3493, "step": 491130 }, { "epoch": 0.9921338736329222, "grad_norm": 115.29700469970703, "learning_rate": 9.561889029959249e-09, "loss": 10.7981, "step": 491140 }, { "epoch": 0.992154074265606, "grad_norm": 253.6248016357422, "learning_rate": 9.540323673976615e-09, "loss": 19.4967, "step": 491150 }, { "epoch": 0.9921742748982898, "grad_norm": 187.130859375, "learning_rate": 9.518782640888235e-09, "loss": 20.4386, "step": 491160 }, { "epoch": 0.9921944755309736, "grad_norm": 317.4109802246094, "learning_rate": 9.497265930800691e-09, "loss": 22.2754, "step": 491170 }, { "epoch": 0.9922146761636574, "grad_norm": 536.2616577148438, "learning_rate": 9.475773543818345e-09, "loss": 20.5593, "step": 491180 }, { "epoch": 0.9922348767963413, "grad_norm": 276.8728942871094, "learning_rate": 9.454305480045556e-09, "loss": 21.9956, "step": 491190 }, { "epoch": 0.9922550774290251, "grad_norm": 622.9331665039062, "learning_rate": 9.432861739586685e-09, "loss": 21.2642, "step": 491200 }, { "epoch": 0.9922752780617089, "grad_norm": 399.0710754394531, "learning_rate": 9.411442322547204e-09, "loss": 26.3859, "step": 491210 }, { "epoch": 0.9922954786943927, "grad_norm": 368.78289794921875, "learning_rate": 9.390047229031474e-09, "loss": 13.4541, "step": 491220 }, { "epoch": 0.9923156793270765, "grad_norm": 276.2664794921875, "learning_rate": 9.368676459142744e-09, "loss": 21.949, "step": 491230 }, { "epoch": 0.9923358799597604, "grad_norm": 226.25160217285156, "learning_rate": 9.347330012985933e-09, "loss": 22.1823, "step": 491240 }, { "epoch": 0.9923560805924442, "grad_norm": 408.3966064453125, "learning_rate": 9.3260078906654e-09, "loss": 20.6702, "step": 491250 }, { "epoch": 0.992376281225128, "grad_norm": 66.66519927978516, "learning_rate": 9.304710092283842e-09, "loss": 15.5139, "step": 491260 }, { "epoch": 0.9923964818578118, "grad_norm": 385.6389465332031, "learning_rate": 9.283436617946173e-09, "loss": 9.6264, "step": 491270 }, { "epoch": 0.9924166824904956, "grad_norm": 161.71920776367188, "learning_rate": 9.262187467756201e-09, "loss": 8.8075, "step": 491280 }, { "epoch": 0.9924368831231795, "grad_norm": 184.41116333007812, "learning_rate": 9.24096264181662e-09, "loss": 22.24, "step": 491290 }, { "epoch": 0.9924570837558633, "grad_norm": 294.45697021484375, "learning_rate": 9.219762140231237e-09, "loss": 16.0654, "step": 491300 }, { "epoch": 0.9924772843885471, "grad_norm": 58.14417266845703, "learning_rate": 9.198585963103302e-09, "loss": 13.2526, "step": 491310 }, { "epoch": 0.9924974850212309, "grad_norm": 283.6220397949219, "learning_rate": 9.177434110536065e-09, "loss": 13.8107, "step": 491320 }, { "epoch": 0.9925176856539146, "grad_norm": 20.302827835083008, "learning_rate": 9.156306582633334e-09, "loss": 10.6104, "step": 491330 }, { "epoch": 0.9925378862865984, "grad_norm": 73.37297058105469, "learning_rate": 9.135203379496693e-09, "loss": 13.4014, "step": 491340 }, { "epoch": 0.9925580869192823, "grad_norm": 363.70428466796875, "learning_rate": 9.114124501230504e-09, "loss": 19.8887, "step": 491350 }, { "epoch": 0.9925782875519661, "grad_norm": 310.9961853027344, "learning_rate": 9.09306994793635e-09, "loss": 18.7803, "step": 491360 }, { "epoch": 0.9925984881846499, "grad_norm": 492.8285827636719, "learning_rate": 9.07203971971693e-09, "loss": 29.9061, "step": 491370 }, { "epoch": 0.9926186888173337, "grad_norm": 380.22637939453125, "learning_rate": 9.051033816675492e-09, "loss": 14.0132, "step": 491380 }, { "epoch": 0.9926388894500175, "grad_norm": 393.7242126464844, "learning_rate": 9.030052238913622e-09, "loss": 19.3284, "step": 491390 }, { "epoch": 0.9926590900827014, "grad_norm": 172.16143798828125, "learning_rate": 9.009094986534572e-09, "loss": 18.2413, "step": 491400 }, { "epoch": 0.9926792907153852, "grad_norm": 3.753679037094116, "learning_rate": 8.988162059639371e-09, "loss": 17.3487, "step": 491410 }, { "epoch": 0.992699491348069, "grad_norm": 142.9956817626953, "learning_rate": 8.967253458330715e-09, "loss": 18.9747, "step": 491420 }, { "epoch": 0.9927196919807528, "grad_norm": 202.57798767089844, "learning_rate": 8.946369182710191e-09, "loss": 15.247, "step": 491430 }, { "epoch": 0.9927398926134366, "grad_norm": 321.2886047363281, "learning_rate": 8.925509232879937e-09, "loss": 18.6229, "step": 491440 }, { "epoch": 0.9927600932461205, "grad_norm": 253.57022094726562, "learning_rate": 8.904673608940983e-09, "loss": 19.6635, "step": 491450 }, { "epoch": 0.9927802938788043, "grad_norm": 48.63462829589844, "learning_rate": 8.883862310995473e-09, "loss": 14.1624, "step": 491460 }, { "epoch": 0.9928004945114881, "grad_norm": 95.82794952392578, "learning_rate": 8.863075339144988e-09, "loss": 16.4728, "step": 491470 }, { "epoch": 0.9928206951441719, "grad_norm": 254.70733642578125, "learning_rate": 8.842312693490563e-09, "loss": 18.6319, "step": 491480 }, { "epoch": 0.9928408957768557, "grad_norm": 528.6598510742188, "learning_rate": 8.821574374132669e-09, "loss": 15.9206, "step": 491490 }, { "epoch": 0.9928610964095396, "grad_norm": 308.7865295410156, "learning_rate": 8.800860381173448e-09, "loss": 24.6954, "step": 491500 }, { "epoch": 0.9928812970422234, "grad_norm": 224.09146118164062, "learning_rate": 8.780170714713931e-09, "loss": 13.3492, "step": 491510 }, { "epoch": 0.9929014976749072, "grad_norm": 118.38770294189453, "learning_rate": 8.759505374854038e-09, "loss": 19.3737, "step": 491520 }, { "epoch": 0.992921698307591, "grad_norm": 232.26646423339844, "learning_rate": 8.738864361694799e-09, "loss": 32.7526, "step": 491530 }, { "epoch": 0.9929418989402748, "grad_norm": 322.0654602050781, "learning_rate": 8.718247675337243e-09, "loss": 27.4142, "step": 491540 }, { "epoch": 0.9929620995729587, "grad_norm": 478.8042907714844, "learning_rate": 8.697655315881293e-09, "loss": 20.8666, "step": 491550 }, { "epoch": 0.9929823002056425, "grad_norm": 300.4228515625, "learning_rate": 8.677087283427976e-09, "loss": 20.7805, "step": 491560 }, { "epoch": 0.9930025008383263, "grad_norm": 0.0, "learning_rate": 8.656543578077215e-09, "loss": 13.4335, "step": 491570 }, { "epoch": 0.9930227014710101, "grad_norm": 56.2254753112793, "learning_rate": 8.636024199928927e-09, "loss": 26.2247, "step": 491580 }, { "epoch": 0.9930429021036938, "grad_norm": 525.561279296875, "learning_rate": 8.615529149083034e-09, "loss": 25.3917, "step": 491590 }, { "epoch": 0.9930631027363777, "grad_norm": 163.4384002685547, "learning_rate": 8.595058425640012e-09, "loss": 16.977, "step": 491600 }, { "epoch": 0.9930833033690615, "grad_norm": 320.7842102050781, "learning_rate": 8.574612029699224e-09, "loss": 19.788, "step": 491610 }, { "epoch": 0.9931035040017453, "grad_norm": 131.5560760498047, "learning_rate": 8.554189961360037e-09, "loss": 21.5623, "step": 491620 }, { "epoch": 0.9931237046344291, "grad_norm": 181.0648651123047, "learning_rate": 8.53379222072237e-09, "loss": 15.097, "step": 491630 }, { "epoch": 0.9931439052671129, "grad_norm": 417.7359619140625, "learning_rate": 8.513418807886142e-09, "loss": 20.7334, "step": 491640 }, { "epoch": 0.9931641058997968, "grad_norm": 107.27851867675781, "learning_rate": 8.49306972294961e-09, "loss": 15.8216, "step": 491650 }, { "epoch": 0.9931843065324806, "grad_norm": 442.1458435058594, "learning_rate": 8.472744966012691e-09, "loss": 18.0458, "step": 491660 }, { "epoch": 0.9932045071651644, "grad_norm": 65.64525604248047, "learning_rate": 8.452444537174198e-09, "loss": 19.203, "step": 491670 }, { "epoch": 0.9932247077978482, "grad_norm": 255.77297973632812, "learning_rate": 8.43216843653294e-09, "loss": 9.3265, "step": 491680 }, { "epoch": 0.993244908430532, "grad_norm": 11.050529479980469, "learning_rate": 8.41191666418828e-09, "loss": 19.0387, "step": 491690 }, { "epoch": 0.9932651090632159, "grad_norm": 362.9149475097656, "learning_rate": 8.391689220238474e-09, "loss": 15.137, "step": 491700 }, { "epoch": 0.9932853096958997, "grad_norm": 353.6307067871094, "learning_rate": 8.37148610478178e-09, "loss": 18.2115, "step": 491710 }, { "epoch": 0.9933055103285835, "grad_norm": 572.0382690429688, "learning_rate": 8.351307317917002e-09, "loss": 18.9829, "step": 491720 }, { "epoch": 0.9933257109612673, "grad_norm": 92.88359069824219, "learning_rate": 8.331152859742952e-09, "loss": 29.1805, "step": 491730 }, { "epoch": 0.9933459115939511, "grad_norm": 386.05322265625, "learning_rate": 8.311022730357331e-09, "loss": 17.9108, "step": 491740 }, { "epoch": 0.993366112226635, "grad_norm": 29.423995971679688, "learning_rate": 8.290916929858394e-09, "loss": 30.5861, "step": 491750 }, { "epoch": 0.9933863128593188, "grad_norm": 316.3428039550781, "learning_rate": 8.27083545834384e-09, "loss": 18.7068, "step": 491760 }, { "epoch": 0.9934065134920026, "grad_norm": 361.76995849609375, "learning_rate": 8.250778315911922e-09, "loss": 19.9417, "step": 491770 }, { "epoch": 0.9934267141246864, "grad_norm": 153.23692321777344, "learning_rate": 8.230745502660343e-09, "loss": 19.0708, "step": 491780 }, { "epoch": 0.9934469147573702, "grad_norm": 307.64471435546875, "learning_rate": 8.210737018686798e-09, "loss": 17.6583, "step": 491790 }, { "epoch": 0.9934671153900541, "grad_norm": 210.4161834716797, "learning_rate": 8.190752864088436e-09, "loss": 26.3436, "step": 491800 }, { "epoch": 0.9934873160227379, "grad_norm": 0.0, "learning_rate": 8.17079303896351e-09, "loss": 28.192, "step": 491810 }, { "epoch": 0.9935075166554217, "grad_norm": 238.99990844726562, "learning_rate": 8.150857543408054e-09, "loss": 21.8865, "step": 491820 }, { "epoch": 0.9935277172881055, "grad_norm": 11.144876480102539, "learning_rate": 8.130946377519767e-09, "loss": 14.0856, "step": 491830 }, { "epoch": 0.9935479179207892, "grad_norm": 158.1072235107422, "learning_rate": 8.11105954139635e-09, "loss": 12.2065, "step": 491840 }, { "epoch": 0.993568118553473, "grad_norm": 173.94834899902344, "learning_rate": 8.091197035133836e-09, "loss": 18.2429, "step": 491850 }, { "epoch": 0.9935883191861569, "grad_norm": 237.55453491210938, "learning_rate": 8.07135885882937e-09, "loss": 15.4611, "step": 491860 }, { "epoch": 0.9936085198188407, "grad_norm": 348.1846618652344, "learning_rate": 8.051545012580097e-09, "loss": 13.8497, "step": 491870 }, { "epoch": 0.9936287204515245, "grad_norm": 171.78451538085938, "learning_rate": 8.031755496481496e-09, "loss": 14.8311, "step": 491880 }, { "epoch": 0.9936489210842083, "grad_norm": 336.60260009765625, "learning_rate": 8.011990310631269e-09, "loss": 14.865, "step": 491890 }, { "epoch": 0.9936691217168921, "grad_norm": 114.01586151123047, "learning_rate": 7.992249455124889e-09, "loss": 10.4964, "step": 491900 }, { "epoch": 0.993689322349576, "grad_norm": 160.74249267578125, "learning_rate": 7.972532930058396e-09, "loss": 17.2137, "step": 491910 }, { "epoch": 0.9937095229822598, "grad_norm": 9.658797264099121, "learning_rate": 7.952840735528933e-09, "loss": 13.6641, "step": 491920 }, { "epoch": 0.9937297236149436, "grad_norm": 325.94775390625, "learning_rate": 7.933172871631978e-09, "loss": 13.8571, "step": 491930 }, { "epoch": 0.9937499242476274, "grad_norm": 20.64225959777832, "learning_rate": 7.913529338463011e-09, "loss": 15.0212, "step": 491940 }, { "epoch": 0.9937701248803112, "grad_norm": 7.2898640632629395, "learning_rate": 7.89391013611751e-09, "loss": 20.1271, "step": 491950 }, { "epoch": 0.9937903255129951, "grad_norm": 323.5410461425781, "learning_rate": 7.874315264692622e-09, "loss": 15.424, "step": 491960 }, { "epoch": 0.9938105261456789, "grad_norm": 409.5080871582031, "learning_rate": 7.85474472428216e-09, "loss": 15.4924, "step": 491970 }, { "epoch": 0.9938307267783627, "grad_norm": 364.72625732421875, "learning_rate": 7.835198514982156e-09, "loss": 19.0092, "step": 491980 }, { "epoch": 0.9938509274110465, "grad_norm": 66.80720520019531, "learning_rate": 7.815676636888093e-09, "loss": 15.9973, "step": 491990 }, { "epoch": 0.9938711280437303, "grad_norm": 456.00714111328125, "learning_rate": 7.796179090094891e-09, "loss": 17.795, "step": 492000 }, { "epoch": 0.9938913286764142, "grad_norm": 362.4913024902344, "learning_rate": 7.776705874698032e-09, "loss": 13.1142, "step": 492010 }, { "epoch": 0.993911529309098, "grad_norm": 240.89476013183594, "learning_rate": 7.757256990791328e-09, "loss": 11.7076, "step": 492020 }, { "epoch": 0.9939317299417818, "grad_norm": 233.6432647705078, "learning_rate": 7.737832438470816e-09, "loss": 16.5738, "step": 492030 }, { "epoch": 0.9939519305744656, "grad_norm": 179.1953125, "learning_rate": 7.718432217830307e-09, "loss": 18.0129, "step": 492040 }, { "epoch": 0.9939721312071494, "grad_norm": 157.79263305664062, "learning_rate": 7.699056328964726e-09, "loss": 17.3427, "step": 492050 }, { "epoch": 0.9939923318398333, "grad_norm": 226.1960906982422, "learning_rate": 7.679704771968998e-09, "loss": 12.2909, "step": 492060 }, { "epoch": 0.9940125324725171, "grad_norm": 145.84329223632812, "learning_rate": 7.660377546936382e-09, "loss": 9.1649, "step": 492070 }, { "epoch": 0.9940327331052009, "grad_norm": 238.32235717773438, "learning_rate": 7.641074653961244e-09, "loss": 16.3975, "step": 492080 }, { "epoch": 0.9940529337378847, "grad_norm": 176.8657989501953, "learning_rate": 7.621796093138512e-09, "loss": 10.0998, "step": 492090 }, { "epoch": 0.9940731343705684, "grad_norm": 200.0746307373047, "learning_rate": 7.602541864561442e-09, "loss": 15.2308, "step": 492100 }, { "epoch": 0.9940933350032523, "grad_norm": 132.1905517578125, "learning_rate": 7.583311968324403e-09, "loss": 20.6681, "step": 492110 }, { "epoch": 0.9941135356359361, "grad_norm": 218.55520629882812, "learning_rate": 7.564106404520654e-09, "loss": 20.1178, "step": 492120 }, { "epoch": 0.9941337362686199, "grad_norm": 162.3923797607422, "learning_rate": 7.544925173243455e-09, "loss": 13.2029, "step": 492130 }, { "epoch": 0.9941539369013037, "grad_norm": 54.22587203979492, "learning_rate": 7.525768274587175e-09, "loss": 24.1705, "step": 492140 }, { "epoch": 0.9941741375339875, "grad_norm": 0.0, "learning_rate": 7.506635708645072e-09, "loss": 17.7284, "step": 492150 }, { "epoch": 0.9941943381666714, "grad_norm": 246.87582397460938, "learning_rate": 7.487527475509848e-09, "loss": 13.0934, "step": 492160 }, { "epoch": 0.9942145387993552, "grad_norm": 526.3923950195312, "learning_rate": 7.468443575274764e-09, "loss": 23.2213, "step": 492170 }, { "epoch": 0.994234739432039, "grad_norm": 42.60770034790039, "learning_rate": 7.449384008033078e-09, "loss": 24.0846, "step": 492180 }, { "epoch": 0.9942549400647228, "grad_norm": 466.7528991699219, "learning_rate": 7.430348773877494e-09, "loss": 22.2213, "step": 492190 }, { "epoch": 0.9942751406974066, "grad_norm": 131.48727416992188, "learning_rate": 7.411337872900715e-09, "loss": 22.6214, "step": 492200 }, { "epoch": 0.9942953413300905, "grad_norm": 169.9774627685547, "learning_rate": 7.392351305195999e-09, "loss": 17.74, "step": 492210 }, { "epoch": 0.9943155419627743, "grad_norm": 0.9770389199256897, "learning_rate": 7.373389070854941e-09, "loss": 15.1575, "step": 492220 }, { "epoch": 0.9943357425954581, "grad_norm": 490.18450927734375, "learning_rate": 7.3544511699708e-09, "loss": 17.1803, "step": 492230 }, { "epoch": 0.9943559432281419, "grad_norm": 353.4809875488281, "learning_rate": 7.335537602635723e-09, "loss": 14.9076, "step": 492240 }, { "epoch": 0.9943761438608257, "grad_norm": 140.79559326171875, "learning_rate": 7.3166483689413035e-09, "loss": 18.4327, "step": 492250 }, { "epoch": 0.9943963444935096, "grad_norm": 316.9680480957031, "learning_rate": 7.297783468980246e-09, "loss": 17.3414, "step": 492260 }, { "epoch": 0.9944165451261934, "grad_norm": 220.2744903564453, "learning_rate": 7.278942902843589e-09, "loss": 12.5453, "step": 492270 }, { "epoch": 0.9944367457588772, "grad_norm": 127.3188705444336, "learning_rate": 7.26012667062459e-09, "loss": 11.6946, "step": 492280 }, { "epoch": 0.994456946391561, "grad_norm": 204.44300842285156, "learning_rate": 7.241334772414288e-09, "loss": 21.4793, "step": 492290 }, { "epoch": 0.9944771470242448, "grad_norm": 155.9869384765625, "learning_rate": 7.222567208303721e-09, "loss": 22.7941, "step": 492300 }, { "epoch": 0.9944973476569287, "grad_norm": 533.785888671875, "learning_rate": 7.203823978384483e-09, "loss": 32.2214, "step": 492310 }, { "epoch": 0.9945175482896125, "grad_norm": 109.73191833496094, "learning_rate": 7.185105082748722e-09, "loss": 13.7931, "step": 492320 }, { "epoch": 0.9945377489222963, "grad_norm": 230.13491821289062, "learning_rate": 7.166410521487477e-09, "loss": 21.6221, "step": 492330 }, { "epoch": 0.9945579495549801, "grad_norm": 90.71044158935547, "learning_rate": 7.14774029469123e-09, "loss": 21.1023, "step": 492340 }, { "epoch": 0.9945781501876638, "grad_norm": 353.1844787597656, "learning_rate": 7.129094402451575e-09, "loss": 19.1026, "step": 492350 }, { "epoch": 0.9945983508203476, "grad_norm": 282.05908203125, "learning_rate": 7.11047284485844e-09, "loss": 15.9006, "step": 492360 }, { "epoch": 0.9946185514530315, "grad_norm": 469.705810546875, "learning_rate": 7.0918756220039745e-09, "loss": 22.5142, "step": 492370 }, { "epoch": 0.9946387520857153, "grad_norm": 214.02598571777344, "learning_rate": 7.073302733978104e-09, "loss": 17.0717, "step": 492380 }, { "epoch": 0.9946589527183991, "grad_norm": 288.0621337890625, "learning_rate": 7.054754180871315e-09, "loss": 7.8111, "step": 492390 }, { "epoch": 0.9946791533510829, "grad_norm": 222.0833282470703, "learning_rate": 7.036229962774088e-09, "loss": 19.9986, "step": 492400 }, { "epoch": 0.9946993539837667, "grad_norm": 232.6152801513672, "learning_rate": 7.0177300797763526e-09, "loss": 33.8875, "step": 492410 }, { "epoch": 0.9947195546164506, "grad_norm": 500.5577392578125, "learning_rate": 6.999254531969146e-09, "loss": 18.8919, "step": 492420 }, { "epoch": 0.9947397552491344, "grad_norm": 446.86334228515625, "learning_rate": 6.980803319441842e-09, "loss": 33.4219, "step": 492430 }, { "epoch": 0.9947599558818182, "grad_norm": 150.64266967773438, "learning_rate": 6.962376442284368e-09, "loss": 25.4434, "step": 492440 }, { "epoch": 0.994780156514502, "grad_norm": 437.434326171875, "learning_rate": 6.943973900586654e-09, "loss": 36.1648, "step": 492450 }, { "epoch": 0.9948003571471858, "grad_norm": 332.0025329589844, "learning_rate": 6.925595694438625e-09, "loss": 15.5303, "step": 492460 }, { "epoch": 0.9948205577798697, "grad_norm": 365.9385070800781, "learning_rate": 6.9072418239296556e-09, "loss": 38.4911, "step": 492470 }, { "epoch": 0.9948407584125535, "grad_norm": 492.3500671386719, "learning_rate": 6.888912289149119e-09, "loss": 19.1304, "step": 492480 }, { "epoch": 0.9948609590452373, "grad_norm": 135.6409454345703, "learning_rate": 6.8706070901863876e-09, "loss": 34.5697, "step": 492490 }, { "epoch": 0.9948811596779211, "grad_norm": 89.8146743774414, "learning_rate": 6.852326227130835e-09, "loss": 12.0278, "step": 492500 }, { "epoch": 0.9949013603106049, "grad_norm": 399.4657897949219, "learning_rate": 6.834069700071277e-09, "loss": 18.217, "step": 492510 }, { "epoch": 0.9949215609432888, "grad_norm": 17.456974029541016, "learning_rate": 6.81583750909709e-09, "loss": 8.5865, "step": 492520 }, { "epoch": 0.9949417615759726, "grad_norm": 166.68055725097656, "learning_rate": 6.797629654296533e-09, "loss": 15.6642, "step": 492530 }, { "epoch": 0.9949619622086564, "grad_norm": 450.2313537597656, "learning_rate": 6.779446135758982e-09, "loss": 22.3462, "step": 492540 }, { "epoch": 0.9949821628413402, "grad_norm": 264.4354553222656, "learning_rate": 6.761286953572699e-09, "loss": 15.3926, "step": 492550 }, { "epoch": 0.995002363474024, "grad_norm": 639.0560913085938, "learning_rate": 6.7431521078265e-09, "loss": 24.994, "step": 492560 }, { "epoch": 0.9950225641067079, "grad_norm": 459.2167053222656, "learning_rate": 6.725041598608651e-09, "loss": 19.193, "step": 492570 }, { "epoch": 0.9950427647393917, "grad_norm": 2859.9921875, "learning_rate": 6.706955426006856e-09, "loss": 20.0577, "step": 492580 }, { "epoch": 0.9950629653720755, "grad_norm": 73.68702697753906, "learning_rate": 6.688893590109935e-09, "loss": 14.732, "step": 492590 }, { "epoch": 0.9950831660047593, "grad_norm": 207.21372985839844, "learning_rate": 6.670856091006151e-09, "loss": 14.8194, "step": 492600 }, { "epoch": 0.995103366637443, "grad_norm": 72.70738983154297, "learning_rate": 6.652842928782655e-09, "loss": 21.3107, "step": 492610 }, { "epoch": 0.9951235672701269, "grad_norm": 179.05630493164062, "learning_rate": 6.63485410352771e-09, "loss": 23.2535, "step": 492620 }, { "epoch": 0.9951437679028107, "grad_norm": 175.2220458984375, "learning_rate": 6.61688961532847e-09, "loss": 7.077, "step": 492630 }, { "epoch": 0.9951639685354945, "grad_norm": 367.4979248046875, "learning_rate": 6.598949464273196e-09, "loss": 17.3254, "step": 492640 }, { "epoch": 0.9951841691681783, "grad_norm": 331.68743896484375, "learning_rate": 6.581033650449043e-09, "loss": 15.688, "step": 492650 }, { "epoch": 0.9952043698008621, "grad_norm": 376.92083740234375, "learning_rate": 6.563142173943715e-09, "loss": 17.2048, "step": 492660 }, { "epoch": 0.995224570433546, "grad_norm": 170.47386169433594, "learning_rate": 6.545275034843257e-09, "loss": 12.9788, "step": 492670 }, { "epoch": 0.9952447710662298, "grad_norm": 152.96368408203125, "learning_rate": 6.527432233235931e-09, "loss": 13.7648, "step": 492680 }, { "epoch": 0.9952649716989136, "grad_norm": 268.8824462890625, "learning_rate": 6.509613769207778e-09, "loss": 27.7853, "step": 492690 }, { "epoch": 0.9952851723315974, "grad_norm": 60.152130126953125, "learning_rate": 6.491819642846509e-09, "loss": 18.511, "step": 492700 }, { "epoch": 0.9953053729642812, "grad_norm": 62.471656799316406, "learning_rate": 6.4740498542387174e-09, "loss": 6.4525, "step": 492710 }, { "epoch": 0.995325573596965, "grad_norm": 22.64701271057129, "learning_rate": 6.456304403470448e-09, "loss": 15.0782, "step": 492720 }, { "epoch": 0.9953457742296489, "grad_norm": 509.4891052246094, "learning_rate": 6.438583290628298e-09, "loss": 20.1131, "step": 492730 }, { "epoch": 0.9953659748623327, "grad_norm": 164.00579833984375, "learning_rate": 6.420886515799418e-09, "loss": 23.3696, "step": 492740 }, { "epoch": 0.9953861754950165, "grad_norm": 296.6884765625, "learning_rate": 6.403214079069298e-09, "loss": 21.8988, "step": 492750 }, { "epoch": 0.9954063761277003, "grad_norm": 104.28278350830078, "learning_rate": 6.385565980523978e-09, "loss": 8.4838, "step": 492760 }, { "epoch": 0.9954265767603842, "grad_norm": 223.9442901611328, "learning_rate": 6.3679422202495015e-09, "loss": 13.4782, "step": 492770 }, { "epoch": 0.995446777393068, "grad_norm": 286.3375549316406, "learning_rate": 6.350342798332465e-09, "loss": 20.3432, "step": 492780 }, { "epoch": 0.9954669780257518, "grad_norm": 296.218505859375, "learning_rate": 6.332767714858357e-09, "loss": 21.4292, "step": 492790 }, { "epoch": 0.9954871786584356, "grad_norm": 316.03076171875, "learning_rate": 6.315216969912663e-09, "loss": 9.5164, "step": 492800 }, { "epoch": 0.9955073792911194, "grad_norm": 441.8782958984375, "learning_rate": 6.2976905635803165e-09, "loss": 22.2427, "step": 492810 }, { "epoch": 0.9955275799238033, "grad_norm": 453.11181640625, "learning_rate": 6.280188495947914e-09, "loss": 18.2984, "step": 492820 }, { "epoch": 0.9955477805564871, "grad_norm": 0.0, "learning_rate": 6.262710767100388e-09, "loss": 18.1285, "step": 492830 }, { "epoch": 0.9955679811891709, "grad_norm": 307.5326843261719, "learning_rate": 6.245257377122116e-09, "loss": 23.1288, "step": 492840 }, { "epoch": 0.9955881818218547, "grad_norm": 116.1676254272461, "learning_rate": 6.227828326099139e-09, "loss": 16.4471, "step": 492850 }, { "epoch": 0.9956083824545384, "grad_norm": 195.77703857421875, "learning_rate": 6.21042361411639e-09, "loss": 12.6839, "step": 492860 }, { "epoch": 0.9956285830872222, "grad_norm": 276.8194885253906, "learning_rate": 6.19304324125769e-09, "loss": 15.2534, "step": 492870 }, { "epoch": 0.9956487837199061, "grad_norm": 75.78239440917969, "learning_rate": 6.175687207609082e-09, "loss": 13.7528, "step": 492880 }, { "epoch": 0.9956689843525899, "grad_norm": 437.89654541015625, "learning_rate": 6.1583555132543886e-09, "loss": 25.4995, "step": 492890 }, { "epoch": 0.9956891849852737, "grad_norm": 154.37973022460938, "learning_rate": 6.141048158277429e-09, "loss": 11.9446, "step": 492900 }, { "epoch": 0.9957093856179575, "grad_norm": 180.64698791503906, "learning_rate": 6.123765142764249e-09, "loss": 11.4718, "step": 492910 }, { "epoch": 0.9957295862506413, "grad_norm": 163.42579650878906, "learning_rate": 6.106506466797557e-09, "loss": 11.0536, "step": 492920 }, { "epoch": 0.9957497868833252, "grad_norm": 281.0152282714844, "learning_rate": 6.0892721304622874e-09, "loss": 21.0349, "step": 492930 }, { "epoch": 0.995769987516009, "grad_norm": 193.30465698242188, "learning_rate": 6.0720621338422606e-09, "loss": 14.8078, "step": 492940 }, { "epoch": 0.9957901881486928, "grad_norm": 271.8005065917969, "learning_rate": 6.054876477021299e-09, "loss": 10.3514, "step": 492950 }, { "epoch": 0.9958103887813766, "grad_norm": 154.5194549560547, "learning_rate": 6.037715160083224e-09, "loss": 12.1968, "step": 492960 }, { "epoch": 0.9958305894140604, "grad_norm": 431.3895263671875, "learning_rate": 6.020578183111303e-09, "loss": 14.8034, "step": 492970 }, { "epoch": 0.9958507900467443, "grad_norm": 1497.735595703125, "learning_rate": 6.003465546189358e-09, "loss": 38.3837, "step": 492980 }, { "epoch": 0.9958709906794281, "grad_norm": 583.8172607421875, "learning_rate": 5.98637724940121e-09, "loss": 20.6984, "step": 492990 }, { "epoch": 0.9958911913121119, "grad_norm": 143.6290740966797, "learning_rate": 5.969313292830126e-09, "loss": 9.2027, "step": 493000 }, { "epoch": 0.9959113919447957, "grad_norm": 93.39070892333984, "learning_rate": 5.952273676558262e-09, "loss": 9.86, "step": 493010 }, { "epoch": 0.9959315925774795, "grad_norm": 406.0321960449219, "learning_rate": 5.935258400669442e-09, "loss": 12.2395, "step": 493020 }, { "epoch": 0.9959517932101634, "grad_norm": 223.07144165039062, "learning_rate": 5.918267465246374e-09, "loss": 13.2936, "step": 493030 }, { "epoch": 0.9959719938428472, "grad_norm": 258.6623229980469, "learning_rate": 5.901300870372329e-09, "loss": 20.8765, "step": 493040 }, { "epoch": 0.995992194475531, "grad_norm": 185.70497131347656, "learning_rate": 5.8843586161289045e-09, "loss": 9.3024, "step": 493050 }, { "epoch": 0.9960123951082148, "grad_norm": 667.3585205078125, "learning_rate": 5.867440702599925e-09, "loss": 13.7881, "step": 493060 }, { "epoch": 0.9960325957408986, "grad_norm": 717.0975952148438, "learning_rate": 5.850547129867546e-09, "loss": 30.2538, "step": 493070 }, { "epoch": 0.9960527963735825, "grad_norm": 185.55636596679688, "learning_rate": 5.833677898013368e-09, "loss": 21.8775, "step": 493080 }, { "epoch": 0.9960729970062663, "grad_norm": 380.11956787109375, "learning_rate": 5.816833007120659e-09, "loss": 19.064, "step": 493090 }, { "epoch": 0.9960931976389501, "grad_norm": 218.5950164794922, "learning_rate": 5.800012457270466e-09, "loss": 22.8577, "step": 493100 }, { "epoch": 0.9961133982716339, "grad_norm": 224.53257751464844, "learning_rate": 5.783216248545498e-09, "loss": 12.1389, "step": 493110 }, { "epoch": 0.9961335989043176, "grad_norm": 290.1208801269531, "learning_rate": 5.766444381027358e-09, "loss": 18.0064, "step": 493120 }, { "epoch": 0.9961537995370014, "grad_norm": 335.3583984375, "learning_rate": 5.749696854798204e-09, "loss": 15.1208, "step": 493130 }, { "epoch": 0.9961740001696853, "grad_norm": 4.631486892700195, "learning_rate": 5.732973669939079e-09, "loss": 16.424, "step": 493140 }, { "epoch": 0.9961942008023691, "grad_norm": 425.11138916015625, "learning_rate": 5.716274826531587e-09, "loss": 19.8227, "step": 493150 }, { "epoch": 0.9962144014350529, "grad_norm": 175.2985382080078, "learning_rate": 5.699600324657328e-09, "loss": 19.6608, "step": 493160 }, { "epoch": 0.9962346020677367, "grad_norm": 163.50164794921875, "learning_rate": 5.682950164397349e-09, "loss": 11.5084, "step": 493170 }, { "epoch": 0.9962548027004205, "grad_norm": 240.91807556152344, "learning_rate": 5.6663243458332514e-09, "loss": 19.7767, "step": 493180 }, { "epoch": 0.9962750033331044, "grad_norm": 85.84590911865234, "learning_rate": 5.649722869044971e-09, "loss": 31.0176, "step": 493190 }, { "epoch": 0.9962952039657882, "grad_norm": 253.2265167236328, "learning_rate": 5.633145734114665e-09, "loss": 19.1726, "step": 493200 }, { "epoch": 0.996315404598472, "grad_norm": 226.51393127441406, "learning_rate": 5.616592941123378e-09, "loss": 24.0137, "step": 493210 }, { "epoch": 0.9963356052311558, "grad_norm": 312.4957275390625, "learning_rate": 5.600064490149937e-09, "loss": 16.8343, "step": 493220 }, { "epoch": 0.9963558058638396, "grad_norm": 67.92218017578125, "learning_rate": 5.583560381276498e-09, "loss": 21.5998, "step": 493230 }, { "epoch": 0.9963760064965235, "grad_norm": 342.03173828125, "learning_rate": 5.5670806145835536e-09, "loss": 32.474, "step": 493240 }, { "epoch": 0.9963962071292073, "grad_norm": 272.71844482421875, "learning_rate": 5.5506251901504825e-09, "loss": 19.1489, "step": 493250 }, { "epoch": 0.9964164077618911, "grad_norm": 283.71484375, "learning_rate": 5.534194108057778e-09, "loss": 6.9455, "step": 493260 }, { "epoch": 0.9964366083945749, "grad_norm": 244.4901123046875, "learning_rate": 5.517787368385375e-09, "loss": 12.2465, "step": 493270 }, { "epoch": 0.9964568090272587, "grad_norm": 431.8292541503906, "learning_rate": 5.501404971214319e-09, "loss": 14.8472, "step": 493280 }, { "epoch": 0.9964770096599426, "grad_norm": 369.4844665527344, "learning_rate": 5.485046916622883e-09, "loss": 9.1092, "step": 493290 }, { "epoch": 0.9964972102926264, "grad_norm": 232.2456817626953, "learning_rate": 5.468713204692111e-09, "loss": 8.0756, "step": 493300 }, { "epoch": 0.9965174109253102, "grad_norm": 252.66551208496094, "learning_rate": 5.45240383550083e-09, "loss": 15.236, "step": 493310 }, { "epoch": 0.996537611557994, "grad_norm": 471.36676025390625, "learning_rate": 5.436118809128421e-09, "loss": 21.6559, "step": 493320 }, { "epoch": 0.9965578121906778, "grad_norm": 165.3039093017578, "learning_rate": 5.419858125655375e-09, "loss": 11.0622, "step": 493330 }, { "epoch": 0.9965780128233617, "grad_norm": 53.91355895996094, "learning_rate": 5.403621785159407e-09, "loss": 17.1724, "step": 493340 }, { "epoch": 0.9965982134560455, "grad_norm": 80.09151458740234, "learning_rate": 5.38740978772101e-09, "loss": 11.7053, "step": 493350 }, { "epoch": 0.9966184140887293, "grad_norm": 442.5940856933594, "learning_rate": 5.371222133418452e-09, "loss": 15.5027, "step": 493360 }, { "epoch": 0.9966386147214131, "grad_norm": 111.42676544189453, "learning_rate": 5.355058822330561e-09, "loss": 16.7894, "step": 493370 }, { "epoch": 0.9966588153540968, "grad_norm": 412.7138366699219, "learning_rate": 5.338919854536162e-09, "loss": 11.4344, "step": 493380 }, { "epoch": 0.9966790159867807, "grad_norm": 261.85955810546875, "learning_rate": 5.322805230114636e-09, "loss": 17.3089, "step": 493390 }, { "epoch": 0.9966992166194645, "grad_norm": 374.5972900390625, "learning_rate": 5.306714949143699e-09, "loss": 11.36, "step": 493400 }, { "epoch": 0.9967194172521483, "grad_norm": 392.8015441894531, "learning_rate": 5.290649011702176e-09, "loss": 27.3323, "step": 493410 }, { "epoch": 0.9967396178848321, "grad_norm": 380.3060607910156, "learning_rate": 5.2746074178683385e-09, "loss": 16.8733, "step": 493420 }, { "epoch": 0.9967598185175159, "grad_norm": 91.70571899414062, "learning_rate": 5.258590167719901e-09, "loss": 20.3022, "step": 493430 }, { "epoch": 0.9967800191501998, "grad_norm": 245.89756774902344, "learning_rate": 5.242597261335691e-09, "loss": 20.0283, "step": 493440 }, { "epoch": 0.9968002197828836, "grad_norm": 119.63677978515625, "learning_rate": 5.226628698792868e-09, "loss": 16.8436, "step": 493450 }, { "epoch": 0.9968204204155674, "grad_norm": 456.9029541015625, "learning_rate": 5.210684480169703e-09, "loss": 29.493, "step": 493460 }, { "epoch": 0.9968406210482512, "grad_norm": 32.520992279052734, "learning_rate": 5.1947646055444665e-09, "loss": 19.6443, "step": 493470 }, { "epoch": 0.996860821680935, "grad_norm": 333.980224609375, "learning_rate": 5.178869074993209e-09, "loss": 19.9943, "step": 493480 }, { "epoch": 0.9968810223136189, "grad_norm": 452.90362548828125, "learning_rate": 5.162997888595312e-09, "loss": 17.3427, "step": 493490 }, { "epoch": 0.9969012229463027, "grad_norm": 424.89251708984375, "learning_rate": 5.147151046426824e-09, "loss": 9.6077, "step": 493500 }, { "epoch": 0.9969214235789865, "grad_norm": 38.030208587646484, "learning_rate": 5.1313285485649064e-09, "loss": 10.3104, "step": 493510 }, { "epoch": 0.9969416242116703, "grad_norm": 1094.377685546875, "learning_rate": 5.115530395087276e-09, "loss": 17.6154, "step": 493520 }, { "epoch": 0.9969618248443541, "grad_norm": 9.166926383972168, "learning_rate": 5.099756586071092e-09, "loss": 11.0072, "step": 493530 }, { "epoch": 0.996982025477038, "grad_norm": 175.388671875, "learning_rate": 5.084007121592405e-09, "loss": 26.7231, "step": 493540 }, { "epoch": 0.9970022261097218, "grad_norm": 235.22972106933594, "learning_rate": 5.06828200172893e-09, "loss": 13.2122, "step": 493550 }, { "epoch": 0.9970224267424056, "grad_norm": 297.00189208984375, "learning_rate": 5.052581226556719e-09, "loss": 18.2604, "step": 493560 }, { "epoch": 0.9970426273750894, "grad_norm": 536.833740234375, "learning_rate": 5.036904796152375e-09, "loss": 28.2438, "step": 493570 }, { "epoch": 0.9970628280077732, "grad_norm": 659.442626953125, "learning_rate": 5.02125271059195e-09, "loss": 26.9861, "step": 493580 }, { "epoch": 0.9970830286404571, "grad_norm": 181.55291748046875, "learning_rate": 5.0056249699526046e-09, "loss": 24.303, "step": 493590 }, { "epoch": 0.9971032292731409, "grad_norm": 251.4856719970703, "learning_rate": 4.990021574309834e-09, "loss": 13.7942, "step": 493600 }, { "epoch": 0.9971234299058247, "grad_norm": 285.0186462402344, "learning_rate": 4.9744425237396865e-09, "loss": 11.717, "step": 493610 }, { "epoch": 0.9971436305385085, "grad_norm": 446.0801086425781, "learning_rate": 4.95888781831877e-09, "loss": 16.6561, "step": 493620 }, { "epoch": 0.9971638311711922, "grad_norm": 256.6576232910156, "learning_rate": 4.9433574581220225e-09, "loss": 10.5709, "step": 493630 }, { "epoch": 0.997184031803876, "grad_norm": 123.8934326171875, "learning_rate": 4.927851443225495e-09, "loss": 18.5653, "step": 493640 }, { "epoch": 0.9972042324365599, "grad_norm": 574.6906127929688, "learning_rate": 4.9123697737052386e-09, "loss": 14.9605, "step": 493650 }, { "epoch": 0.9972244330692437, "grad_norm": 628.70849609375, "learning_rate": 4.896912449635638e-09, "loss": 20.5991, "step": 493660 }, { "epoch": 0.9972446337019275, "grad_norm": 113.13594818115234, "learning_rate": 4.881479471093298e-09, "loss": 28.9503, "step": 493670 }, { "epoch": 0.9972648343346113, "grad_norm": 186.0068817138672, "learning_rate": 4.866070838152049e-09, "loss": 13.5168, "step": 493680 }, { "epoch": 0.9972850349672951, "grad_norm": 272.0819091796875, "learning_rate": 4.850686550888495e-09, "loss": 22.2566, "step": 493690 }, { "epoch": 0.997305235599979, "grad_norm": 607.2677612304688, "learning_rate": 4.835326609376468e-09, "loss": 16.489, "step": 493700 }, { "epoch": 0.9973254362326628, "grad_norm": 303.22735595703125, "learning_rate": 4.81999101369146e-09, "loss": 22.1093, "step": 493710 }, { "epoch": 0.9973456368653466, "grad_norm": 252.09075927734375, "learning_rate": 4.804679763907305e-09, "loss": 17.7456, "step": 493720 }, { "epoch": 0.9973658374980304, "grad_norm": 309.9205017089844, "learning_rate": 4.789392860100051e-09, "loss": 27.9882, "step": 493730 }, { "epoch": 0.9973860381307142, "grad_norm": 188.27894592285156, "learning_rate": 4.774130302342972e-09, "loss": 10.7294, "step": 493740 }, { "epoch": 0.9974062387633981, "grad_norm": 186.87384033203125, "learning_rate": 4.758892090711009e-09, "loss": 16.9977, "step": 493750 }, { "epoch": 0.9974264393960819, "grad_norm": 1355.3223876953125, "learning_rate": 4.743678225278547e-09, "loss": 22.0995, "step": 493760 }, { "epoch": 0.9974466400287657, "grad_norm": 146.46865844726562, "learning_rate": 4.7284887061194165e-09, "loss": 10.3052, "step": 493770 }, { "epoch": 0.9974668406614495, "grad_norm": 202.4141387939453, "learning_rate": 4.713323533308001e-09, "loss": 14.6178, "step": 493780 }, { "epoch": 0.9974870412941333, "grad_norm": 233.77516174316406, "learning_rate": 4.6981827069181305e-09, "loss": 26.2586, "step": 493790 }, { "epoch": 0.9975072419268172, "grad_norm": 485.7827453613281, "learning_rate": 4.683066227023081e-09, "loss": 12.0104, "step": 493800 }, { "epoch": 0.997527442559501, "grad_norm": 111.74791717529297, "learning_rate": 4.667974093696681e-09, "loss": 11.123, "step": 493810 }, { "epoch": 0.9975476431921848, "grad_norm": 210.8526153564453, "learning_rate": 4.6529063070133165e-09, "loss": 14.3363, "step": 493820 }, { "epoch": 0.9975678438248686, "grad_norm": 333.481689453125, "learning_rate": 4.637862867045151e-09, "loss": 16.3601, "step": 493830 }, { "epoch": 0.9975880444575524, "grad_norm": 334.3294677734375, "learning_rate": 4.6228437738665695e-09, "loss": 11.7012, "step": 493840 }, { "epoch": 0.9976082450902363, "grad_norm": 431.4281311035156, "learning_rate": 4.607849027550293e-09, "loss": 14.8651, "step": 493850 }, { "epoch": 0.9976284457229201, "grad_norm": 367.46844482421875, "learning_rate": 4.592878628169595e-09, "loss": 27.9158, "step": 493860 }, { "epoch": 0.9976486463556039, "grad_norm": 366.1986083984375, "learning_rate": 4.577932575797195e-09, "loss": 20.3304, "step": 493870 }, { "epoch": 0.9976688469882877, "grad_norm": 816.3501586914062, "learning_rate": 4.5630108705063684e-09, "loss": 18.9362, "step": 493880 }, { "epoch": 0.9976890476209714, "grad_norm": 380.4483337402344, "learning_rate": 4.5481135123692786e-09, "loss": 17.446, "step": 493890 }, { "epoch": 0.9977092482536553, "grad_norm": 488.8197326660156, "learning_rate": 4.533240501459202e-09, "loss": 17.5896, "step": 493900 }, { "epoch": 0.9977294488863391, "grad_norm": 159.9177703857422, "learning_rate": 4.518391837847747e-09, "loss": 10.0191, "step": 493910 }, { "epoch": 0.9977496495190229, "grad_norm": 377.70111083984375, "learning_rate": 4.503567521608187e-09, "loss": 18.2045, "step": 493920 }, { "epoch": 0.9977698501517067, "grad_norm": 66.71868133544922, "learning_rate": 4.4887675528121345e-09, "loss": 16.9787, "step": 493930 }, { "epoch": 0.9977900507843905, "grad_norm": 175.28021240234375, "learning_rate": 4.473991931531752e-09, "loss": 24.261, "step": 493940 }, { "epoch": 0.9978102514170744, "grad_norm": 162.11776733398438, "learning_rate": 4.459240657839203e-09, "loss": 26.007, "step": 493950 }, { "epoch": 0.9978304520497582, "grad_norm": 235.6828155517578, "learning_rate": 4.4445137318072096e-09, "loss": 19.5397, "step": 493960 }, { "epoch": 0.997850652682442, "grad_norm": 12.251602172851562, "learning_rate": 4.429811153505714e-09, "loss": 6.0647, "step": 493970 }, { "epoch": 0.9978708533151258, "grad_norm": 334.3155517578125, "learning_rate": 4.415132923007992e-09, "loss": 14.3883, "step": 493980 }, { "epoch": 0.9978910539478096, "grad_norm": 18.55206298828125, "learning_rate": 4.400479040385098e-09, "loss": 12.8171, "step": 493990 }, { "epoch": 0.9979112545804935, "grad_norm": 2870.7685546875, "learning_rate": 4.385849505708084e-09, "loss": 40.0994, "step": 494000 }, { "epoch": 0.9979314552131773, "grad_norm": 237.31381225585938, "learning_rate": 4.3712443190491175e-09, "loss": 15.5543, "step": 494010 }, { "epoch": 0.9979516558458611, "grad_norm": 169.72447204589844, "learning_rate": 4.3566634804781405e-09, "loss": 15.9001, "step": 494020 }, { "epoch": 0.9979718564785449, "grad_norm": 489.4140319824219, "learning_rate": 4.342106990067319e-09, "loss": 13.7482, "step": 494030 }, { "epoch": 0.9979920571112287, "grad_norm": 262.1561279296875, "learning_rate": 4.327574847886595e-09, "loss": 16.6041, "step": 494040 }, { "epoch": 0.9980122577439126, "grad_norm": 244.12388610839844, "learning_rate": 4.313067054008135e-09, "loss": 14.7642, "step": 494050 }, { "epoch": 0.9980324583765964, "grad_norm": 620.3541259765625, "learning_rate": 4.298583608501328e-09, "loss": 25.66, "step": 494060 }, { "epoch": 0.9980526590092802, "grad_norm": 1296.5672607421875, "learning_rate": 4.284124511437782e-09, "loss": 34.4936, "step": 494070 }, { "epoch": 0.998072859641964, "grad_norm": 182.0353240966797, "learning_rate": 4.269689762886886e-09, "loss": 22.3797, "step": 494080 }, { "epoch": 0.9980930602746478, "grad_norm": 346.6893615722656, "learning_rate": 4.2552793629202506e-09, "loss": 19.1555, "step": 494090 }, { "epoch": 0.9981132609073317, "grad_norm": 71.67774200439453, "learning_rate": 4.2408933116072635e-09, "loss": 14.7107, "step": 494100 }, { "epoch": 0.9981334615400155, "grad_norm": 197.77346801757812, "learning_rate": 4.22653160901787e-09, "loss": 17.0342, "step": 494110 }, { "epoch": 0.9981536621726993, "grad_norm": 79.39214324951172, "learning_rate": 4.212194255222568e-09, "loss": 5.2157, "step": 494120 }, { "epoch": 0.9981738628053831, "grad_norm": 181.61233520507812, "learning_rate": 4.197881250291302e-09, "loss": 31.528, "step": 494130 }, { "epoch": 0.9981940634380668, "grad_norm": 309.17108154296875, "learning_rate": 4.183592594294017e-09, "loss": 12.9723, "step": 494140 }, { "epoch": 0.9982142640707506, "grad_norm": 405.5485534667969, "learning_rate": 4.169328287299545e-09, "loss": 22.8086, "step": 494150 }, { "epoch": 0.9982344647034345, "grad_norm": 442.8080139160156, "learning_rate": 4.155088329377832e-09, "loss": 26.5508, "step": 494160 }, { "epoch": 0.9982546653361183, "grad_norm": 87.66561889648438, "learning_rate": 4.140872720598266e-09, "loss": 16.6031, "step": 494170 }, { "epoch": 0.9982748659688021, "grad_norm": 430.75762939453125, "learning_rate": 4.126681461030236e-09, "loss": 12.1058, "step": 494180 }, { "epoch": 0.9982950666014859, "grad_norm": 376.21795654296875, "learning_rate": 4.11251455074313e-09, "loss": 17.4234, "step": 494190 }, { "epoch": 0.9983152672341697, "grad_norm": 403.6354675292969, "learning_rate": 4.098371989805227e-09, "loss": 25.0719, "step": 494200 }, { "epoch": 0.9983354678668536, "grad_norm": 51.054805755615234, "learning_rate": 4.0842537782859185e-09, "loss": 13.4751, "step": 494210 }, { "epoch": 0.9983556684995374, "grad_norm": 509.38482666015625, "learning_rate": 4.07015991625459e-09, "loss": 9.6944, "step": 494220 }, { "epoch": 0.9983758691322212, "grad_norm": 167.8269500732422, "learning_rate": 4.056090403778967e-09, "loss": 27.3016, "step": 494230 }, { "epoch": 0.998396069764905, "grad_norm": 382.88677978515625, "learning_rate": 4.042045240927883e-09, "loss": 13.0717, "step": 494240 }, { "epoch": 0.9984162703975888, "grad_norm": 26.21680450439453, "learning_rate": 4.028024427770172e-09, "loss": 8.1485, "step": 494250 }, { "epoch": 0.9984364710302727, "grad_norm": 202.1404266357422, "learning_rate": 4.014027964373557e-09, "loss": 10.6287, "step": 494260 }, { "epoch": 0.9984566716629565, "grad_norm": 150.67433166503906, "learning_rate": 4.000055850807427e-09, "loss": 12.2298, "step": 494270 }, { "epoch": 0.9984768722956403, "grad_norm": 296.3301696777344, "learning_rate": 3.986108087138396e-09, "loss": 13.9021, "step": 494280 }, { "epoch": 0.9984970729283241, "grad_norm": 176.92782592773438, "learning_rate": 3.972184673435297e-09, "loss": 16.7603, "step": 494290 }, { "epoch": 0.998517273561008, "grad_norm": 236.4563751220703, "learning_rate": 3.9582856097658554e-09, "loss": 18.8897, "step": 494300 }, { "epoch": 0.9985374741936918, "grad_norm": 274.7383728027344, "learning_rate": 3.944410896197792e-09, "loss": 19.7265, "step": 494310 }, { "epoch": 0.9985576748263756, "grad_norm": 563.5479736328125, "learning_rate": 3.930560532798832e-09, "loss": 24.5245, "step": 494320 }, { "epoch": 0.9985778754590594, "grad_norm": 124.28654479980469, "learning_rate": 3.9167345196361454e-09, "loss": 17.0896, "step": 494330 }, { "epoch": 0.9985980760917432, "grad_norm": 58.03425216674805, "learning_rate": 3.902932856777453e-09, "loss": 10.4322, "step": 494340 }, { "epoch": 0.998618276724427, "grad_norm": 260.0706481933594, "learning_rate": 3.889155544289924e-09, "loss": 19.126, "step": 494350 }, { "epoch": 0.9986384773571109, "grad_norm": 160.23228454589844, "learning_rate": 3.8754025822407285e-09, "loss": 22.1344, "step": 494360 }, { "epoch": 0.9986586779897947, "grad_norm": 319.59918212890625, "learning_rate": 3.861673970697033e-09, "loss": 13.2886, "step": 494370 }, { "epoch": 0.9986788786224785, "grad_norm": 209.1680145263672, "learning_rate": 3.847969709725452e-09, "loss": 18.2661, "step": 494380 }, { "epoch": 0.9986990792551623, "grad_norm": 114.0342788696289, "learning_rate": 3.834289799392598e-09, "loss": 24.9581, "step": 494390 }, { "epoch": 0.998719279887846, "grad_norm": 160.35536193847656, "learning_rate": 3.820634239765642e-09, "loss": 15.435, "step": 494400 }, { "epoch": 0.9987394805205299, "grad_norm": 237.72604370117188, "learning_rate": 3.8070030309111935e-09, "loss": 11.5302, "step": 494410 }, { "epoch": 0.9987596811532137, "grad_norm": 486.2515869140625, "learning_rate": 3.793396172895314e-09, "loss": 21.8128, "step": 494420 }, { "epoch": 0.9987798817858975, "grad_norm": 212.57371520996094, "learning_rate": 3.77981366578406e-09, "loss": 10.2289, "step": 494430 }, { "epoch": 0.9988000824185813, "grad_norm": 40.69304656982422, "learning_rate": 3.766255509644601e-09, "loss": 18.4652, "step": 494440 }, { "epoch": 0.9988202830512651, "grad_norm": 331.1392517089844, "learning_rate": 3.752721704541884e-09, "loss": 17.761, "step": 494450 }, { "epoch": 0.998840483683949, "grad_norm": 666.123291015625, "learning_rate": 3.739212250543078e-09, "loss": 22.8242, "step": 494460 }, { "epoch": 0.9988606843166328, "grad_norm": 310.6073303222656, "learning_rate": 3.7257271477131314e-09, "loss": 17.9392, "step": 494470 }, { "epoch": 0.9988808849493166, "grad_norm": 0.0, "learning_rate": 3.7122663961175477e-09, "loss": 11.0337, "step": 494480 }, { "epoch": 0.9989010855820004, "grad_norm": 243.14625549316406, "learning_rate": 3.698829995822939e-09, "loss": 15.4749, "step": 494490 }, { "epoch": 0.9989212862146842, "grad_norm": 21.81050682067871, "learning_rate": 3.685417946894254e-09, "loss": 20.1226, "step": 494500 }, { "epoch": 0.998941486847368, "grad_norm": 140.5139923095703, "learning_rate": 3.672030249396441e-09, "loss": 9.3858, "step": 494510 }, { "epoch": 0.9989616874800519, "grad_norm": 192.8984832763672, "learning_rate": 3.6586669033955578e-09, "loss": 11.125, "step": 494520 }, { "epoch": 0.9989818881127357, "grad_norm": 381.0818176269531, "learning_rate": 3.645327908955998e-09, "loss": 14.2186, "step": 494530 }, { "epoch": 0.9990020887454195, "grad_norm": 44.641876220703125, "learning_rate": 3.632013266143264e-09, "loss": 18.9591, "step": 494540 }, { "epoch": 0.9990222893781033, "grad_norm": 190.85699462890625, "learning_rate": 3.618722975022304e-09, "loss": 13.7379, "step": 494550 }, { "epoch": 0.9990424900107872, "grad_norm": 343.0568542480469, "learning_rate": 3.605457035657511e-09, "loss": 17.9219, "step": 494560 }, { "epoch": 0.999062690643471, "grad_norm": 164.60711669921875, "learning_rate": 3.592215448113834e-09, "loss": 19.8793, "step": 494570 }, { "epoch": 0.9990828912761548, "grad_norm": 1118.8013916015625, "learning_rate": 3.5789982124556646e-09, "loss": 21.6261, "step": 494580 }, { "epoch": 0.9991030919088386, "grad_norm": 352.3271789550781, "learning_rate": 3.565805328747951e-09, "loss": 10.4939, "step": 494590 }, { "epoch": 0.9991232925415224, "grad_norm": 193.1532440185547, "learning_rate": 3.5526367970539765e-09, "loss": 18.272, "step": 494600 }, { "epoch": 0.9991434931742063, "grad_norm": 212.69444274902344, "learning_rate": 3.5394926174381338e-09, "loss": 36.8045, "step": 494610 }, { "epoch": 0.9991636938068901, "grad_norm": 646.9624633789062, "learning_rate": 3.526372789965371e-09, "loss": 25.4694, "step": 494620 }, { "epoch": 0.9991838944395739, "grad_norm": 352.7398376464844, "learning_rate": 3.5132773146989706e-09, "loss": 13.6786, "step": 494630 }, { "epoch": 0.9992040950722577, "grad_norm": 57.33049774169922, "learning_rate": 3.5002061917027708e-09, "loss": 18.9907, "step": 494640 }, { "epoch": 0.9992242957049415, "grad_norm": 121.9939956665039, "learning_rate": 3.487159421040609e-09, "loss": 10.8586, "step": 494650 }, { "epoch": 0.9992444963376252, "grad_norm": 319.4378967285156, "learning_rate": 3.474137002775768e-09, "loss": 7.7744, "step": 494660 }, { "epoch": 0.9992646969703091, "grad_norm": 141.7076416015625, "learning_rate": 3.461138936972086e-09, "loss": 21.0372, "step": 494670 }, { "epoch": 0.9992848976029929, "grad_norm": 276.5302429199219, "learning_rate": 3.4481652236934006e-09, "loss": 16.0682, "step": 494680 }, { "epoch": 0.9993050982356767, "grad_norm": 313.83416748046875, "learning_rate": 3.4352158630018837e-09, "loss": 22.9036, "step": 494690 }, { "epoch": 0.9993252988683605, "grad_norm": 321.11041259765625, "learning_rate": 3.4222908549608193e-09, "loss": 15.7496, "step": 494700 }, { "epoch": 0.9993454995010443, "grad_norm": 379.5470886230469, "learning_rate": 3.409390199634044e-09, "loss": 23.053, "step": 494710 }, { "epoch": 0.9993657001337282, "grad_norm": 467.63946533203125, "learning_rate": 3.3965138970831758e-09, "loss": 29.7448, "step": 494720 }, { "epoch": 0.999385900766412, "grad_norm": 331.6331481933594, "learning_rate": 3.3836619473720522e-09, "loss": 13.9507, "step": 494730 }, { "epoch": 0.9994061013990958, "grad_norm": 457.5184326171875, "learning_rate": 3.370834350563401e-09, "loss": 14.6985, "step": 494740 }, { "epoch": 0.9994263020317796, "grad_norm": 335.27069091796875, "learning_rate": 3.3580311067188396e-09, "loss": 16.1703, "step": 494750 }, { "epoch": 0.9994465026644634, "grad_norm": 167.1810760498047, "learning_rate": 3.3452522159010957e-09, "loss": 22.494, "step": 494760 }, { "epoch": 0.9994667032971473, "grad_norm": 319.8489685058594, "learning_rate": 3.332497678172897e-09, "loss": 13.6591, "step": 494770 }, { "epoch": 0.9994869039298311, "grad_norm": 339.9732666015625, "learning_rate": 3.31976749359586e-09, "loss": 23.3354, "step": 494780 }, { "epoch": 0.9995071045625149, "grad_norm": 401.10968017578125, "learning_rate": 3.3070616622321584e-09, "loss": 10.5454, "step": 494790 }, { "epoch": 0.9995273051951987, "grad_norm": 147.7809295654297, "learning_rate": 3.294380184143964e-09, "loss": 18.4338, "step": 494800 }, { "epoch": 0.9995475058278825, "grad_norm": 276.3037109375, "learning_rate": 3.2817230593928938e-09, "loss": 16.4183, "step": 494810 }, { "epoch": 0.9995677064605664, "grad_norm": 553.263427734375, "learning_rate": 3.269090288041121e-09, "loss": 22.1128, "step": 494820 }, { "epoch": 0.9995879070932502, "grad_norm": 82.76365661621094, "learning_rate": 3.256481870149153e-09, "loss": 28.3858, "step": 494830 }, { "epoch": 0.999608107725934, "grad_norm": 225.22779846191406, "learning_rate": 3.2438978057791615e-09, "loss": 22.2406, "step": 494840 }, { "epoch": 0.9996283083586178, "grad_norm": 413.48529052734375, "learning_rate": 3.2313380949927643e-09, "loss": 14.8626, "step": 494850 }, { "epoch": 0.9996485089913016, "grad_norm": 148.7971954345703, "learning_rate": 3.218802737850468e-09, "loss": 13.117, "step": 494860 }, { "epoch": 0.9996687096239855, "grad_norm": 179.87042236328125, "learning_rate": 3.206291734413891e-09, "loss": 11.7422, "step": 494870 }, { "epoch": 0.9996889102566693, "grad_norm": 187.84117126464844, "learning_rate": 3.1938050847435398e-09, "loss": 9.0214, "step": 494880 }, { "epoch": 0.9997091108893531, "grad_norm": 165.037109375, "learning_rate": 3.1813427889004767e-09, "loss": 14.8521, "step": 494890 }, { "epoch": 0.9997293115220369, "grad_norm": 287.6842041015625, "learning_rate": 3.1689048469457638e-09, "loss": 14.124, "step": 494900 }, { "epoch": 0.9997495121547206, "grad_norm": 21.54877471923828, "learning_rate": 3.156491258939909e-09, "loss": 17.4252, "step": 494910 }, { "epoch": 0.9997697127874045, "grad_norm": 107.47203826904297, "learning_rate": 3.1441020249428635e-09, "loss": 17.2954, "step": 494920 }, { "epoch": 0.9997899134200883, "grad_norm": 16.81981086730957, "learning_rate": 3.1317371450156897e-09, "loss": 6.7111, "step": 494930 }, { "epoch": 0.9998101140527721, "grad_norm": 147.71060180664062, "learning_rate": 3.11939661921834e-09, "loss": 14.4858, "step": 494940 }, { "epoch": 0.9998303146854559, "grad_norm": 438.4915771484375, "learning_rate": 3.1070804476113213e-09, "loss": 9.8981, "step": 494950 }, { "epoch": 0.9998505153181397, "grad_norm": 289.93597412109375, "learning_rate": 3.094788630254031e-09, "loss": 30.2553, "step": 494960 }, { "epoch": 0.9998707159508236, "grad_norm": 144.87721252441406, "learning_rate": 3.0825211672064203e-09, "loss": 15.9481, "step": 494970 }, { "epoch": 0.9998909165835074, "grad_norm": 497.0413513183594, "learning_rate": 3.070278058528997e-09, "loss": 22.4205, "step": 494980 }, { "epoch": 0.9999111172161912, "grad_norm": 252.9429473876953, "learning_rate": 3.058059304280603e-09, "loss": 25.7312, "step": 494990 }, { "epoch": 0.999931317848875, "grad_norm": 162.82374572753906, "learning_rate": 3.0458649045211897e-09, "loss": 17.4093, "step": 495000 }, { "epoch": 0.9999515184815588, "grad_norm": 165.43084716796875, "learning_rate": 3.03369485931071e-09, "loss": 20.4357, "step": 495010 }, { "epoch": 0.9999717191142427, "grad_norm": 328.46612548828125, "learning_rate": 3.0215491687074492e-09, "loss": 20.1648, "step": 495020 }, { "epoch": 0.9999919197469265, "grad_norm": 359.5785827636719, "learning_rate": 3.009427832771361e-09, "loss": 26.0108, "step": 495030 }, { "epoch": 1.0, "eval_loss": 18.470447540283203, "eval_runtime": 409.5979, "eval_samples_per_second": 24.417, "eval_steps_per_second": 12.21, "step": 495034 }, { "epoch": 1.0000121203796102, "grad_norm": 350.266845703125, "learning_rate": 2.9973308515607313e-09, "loss": 20.6201, "step": 495040 }, { "epoch": 1.0000323210122941, "grad_norm": 297.6217041015625, "learning_rate": 2.9852582251355124e-09, "loss": 15.3668, "step": 495050 }, { "epoch": 1.0000525216449778, "grad_norm": 232.36581420898438, "learning_rate": 2.9732099535539905e-09, "loss": 12.8444, "step": 495060 }, { "epoch": 1.0000727222776618, "grad_norm": 0.0, "learning_rate": 2.961186036875008e-09, "loss": 22.4988, "step": 495070 }, { "epoch": 1.0000929229103455, "grad_norm": 423.0118713378906, "learning_rate": 2.949186475157406e-09, "loss": 13.9478, "step": 495080 }, { "epoch": 1.0001131235430294, "grad_norm": 322.5085754394531, "learning_rate": 2.937211268458917e-09, "loss": 22.6801, "step": 495090 }, { "epoch": 1.000133324175713, "grad_norm": 256.5771179199219, "learning_rate": 2.9252604168383826e-09, "loss": 11.055, "step": 495100 }, { "epoch": 1.000153524808397, "grad_norm": 171.06491088867188, "learning_rate": 2.913333920354644e-09, "loss": 24.1243, "step": 495110 }, { "epoch": 1.0001737254410807, "grad_norm": 274.31451416015625, "learning_rate": 2.901431779064323e-09, "loss": 17.4806, "step": 495120 }, { "epoch": 1.0001939260737647, "grad_norm": 281.48431396484375, "learning_rate": 2.889553993027372e-09, "loss": 14.8861, "step": 495130 }, { "epoch": 1.0002141267064484, "grad_norm": 484.43438720703125, "learning_rate": 2.8777005622998567e-09, "loss": 15.3516, "step": 495140 }, { "epoch": 1.0002343273391323, "grad_norm": 90.8409652709961, "learning_rate": 2.865871486940619e-09, "loss": 10.165, "step": 495150 }, { "epoch": 1.000254527971816, "grad_norm": 220.88543701171875, "learning_rate": 2.8540667670073905e-09, "loss": 12.697, "step": 495160 }, { "epoch": 1.0002747286045, "grad_norm": 303.0424499511719, "learning_rate": 2.842286402556793e-09, "loss": 14.4591, "step": 495170 }, { "epoch": 1.0002949292371837, "grad_norm": 388.16571044921875, "learning_rate": 2.830530393647113e-09, "loss": 21.5584, "step": 495180 }, { "epoch": 1.0003151298698676, "grad_norm": 246.49331665039062, "learning_rate": 2.8187987403355268e-09, "loss": 20.6374, "step": 495190 }, { "epoch": 1.0003353305025513, "grad_norm": 42.6082649230957, "learning_rate": 2.8070914426786555e-09, "loss": 35.4803, "step": 495200 }, { "epoch": 1.0003555311352352, "grad_norm": 238.4278106689453, "learning_rate": 2.7954085007342315e-09, "loss": 26.5214, "step": 495210 }, { "epoch": 1.000375731767919, "grad_norm": 88.92025756835938, "learning_rate": 2.78374991455832e-09, "loss": 21.3553, "step": 495220 }, { "epoch": 1.0003959324006029, "grad_norm": 52.954776763916016, "learning_rate": 2.772115684209209e-09, "loss": 10.0283, "step": 495230 }, { "epoch": 1.0004161330332866, "grad_norm": 138.259033203125, "learning_rate": 2.7605058097418536e-09, "loss": 13.1708, "step": 495240 }, { "epoch": 1.0004363336659705, "grad_norm": 290.2542724609375, "learning_rate": 2.748920291214541e-09, "loss": 10.6109, "step": 495250 }, { "epoch": 1.0004565342986542, "grad_norm": 316.5008239746094, "learning_rate": 2.7373591286822266e-09, "loss": 17.5169, "step": 495260 }, { "epoch": 1.0004767349313382, "grad_norm": 939.4614868164062, "learning_rate": 2.7258223222020876e-09, "loss": 24.7996, "step": 495270 }, { "epoch": 1.0004969355640219, "grad_norm": 424.4223327636719, "learning_rate": 2.7143098718301896e-09, "loss": 22.1629, "step": 495280 }, { "epoch": 1.0005171361967056, "grad_norm": 193.19212341308594, "learning_rate": 2.7028217776225994e-09, "loss": 8.4584, "step": 495290 }, { "epoch": 1.0005373368293895, "grad_norm": 124.94254302978516, "learning_rate": 2.6913580396359384e-09, "loss": 17.981, "step": 495300 }, { "epoch": 1.0005575374620732, "grad_norm": 449.8768310546875, "learning_rate": 2.6799186579246074e-09, "loss": 15.3962, "step": 495310 }, { "epoch": 1.0005777380947571, "grad_norm": 323.71661376953125, "learning_rate": 2.6685036325457826e-09, "loss": 28.1691, "step": 495320 }, { "epoch": 1.0005979387274408, "grad_norm": 200.54359436035156, "learning_rate": 2.65711296355442e-09, "loss": 23.6755, "step": 495330 }, { "epoch": 1.0006181393601248, "grad_norm": 337.1823425292969, "learning_rate": 2.6457466510065866e-09, "loss": 13.0376, "step": 495340 }, { "epoch": 1.0006383399928085, "grad_norm": 163.7471160888672, "learning_rate": 2.6344046949566825e-09, "loss": 19.5054, "step": 495350 }, { "epoch": 1.0006585406254924, "grad_norm": 493.8697814941406, "learning_rate": 2.6230870954607746e-09, "loss": 21.381, "step": 495360 }, { "epoch": 1.0006787412581761, "grad_norm": 209.40423583984375, "learning_rate": 2.6117938525738185e-09, "loss": 13.005, "step": 495370 }, { "epoch": 1.00069894189086, "grad_norm": 169.35916137695312, "learning_rate": 2.6005249663513254e-09, "loss": 19.9928, "step": 495380 }, { "epoch": 1.0007191425235438, "grad_norm": 274.072509765625, "learning_rate": 2.5892804368471414e-09, "loss": 17.1564, "step": 495390 }, { "epoch": 1.0007393431562277, "grad_norm": 419.8789367675781, "learning_rate": 2.5780602641167774e-09, "loss": 19.1674, "step": 495400 }, { "epoch": 1.0007595437889114, "grad_norm": 148.21514892578125, "learning_rate": 2.5668644482151892e-09, "loss": 19.6976, "step": 495410 }, { "epoch": 1.0007797444215953, "grad_norm": 413.3901062011719, "learning_rate": 2.5556929891962234e-09, "loss": 10.4757, "step": 495420 }, { "epoch": 1.000799945054279, "grad_norm": 224.62789916992188, "learning_rate": 2.5445458871148353e-09, "loss": 17.2608, "step": 495430 }, { "epoch": 1.000820145686963, "grad_norm": 216.81382751464844, "learning_rate": 2.5334231420254262e-09, "loss": 13.2153, "step": 495440 }, { "epoch": 1.0008403463196467, "grad_norm": 61.02236557006836, "learning_rate": 2.522324753981842e-09, "loss": 8.6135, "step": 495450 }, { "epoch": 1.0008605469523306, "grad_norm": 216.50863647460938, "learning_rate": 2.511250723037928e-09, "loss": 29.0171, "step": 495460 }, { "epoch": 1.0008807475850143, "grad_norm": 221.52297973632812, "learning_rate": 2.5002010492486405e-09, "loss": 14.8874, "step": 495470 }, { "epoch": 1.0009009482176983, "grad_norm": 66.00392150878906, "learning_rate": 2.4891757326667154e-09, "loss": 25.6189, "step": 495480 }, { "epoch": 1.000921148850382, "grad_norm": 944.4640502929688, "learning_rate": 2.4781747733471085e-09, "loss": 30.2171, "step": 495490 }, { "epoch": 1.000941349483066, "grad_norm": 489.37469482421875, "learning_rate": 2.4671981713420003e-09, "loss": 26.4947, "step": 495500 }, { "epoch": 1.0009615501157496, "grad_norm": 198.69480895996094, "learning_rate": 2.4562459267063466e-09, "loss": 12.7678, "step": 495510 }, { "epoch": 1.0009817507484335, "grad_norm": 161.24114990234375, "learning_rate": 2.445318039492328e-09, "loss": 9.1362, "step": 495520 }, { "epoch": 1.0010019513811172, "grad_norm": 115.72456359863281, "learning_rate": 2.4344145097537906e-09, "loss": 14.2754, "step": 495530 }, { "epoch": 1.0010221520138012, "grad_norm": 402.95782470703125, "learning_rate": 2.423535337544025e-09, "loss": 14.8401, "step": 495540 }, { "epoch": 1.001042352646485, "grad_norm": 275.5498046875, "learning_rate": 2.412680522915767e-09, "loss": 25.1825, "step": 495550 }, { "epoch": 1.0010625532791686, "grad_norm": 491.6571960449219, "learning_rate": 2.4018500659217515e-09, "loss": 23.5318, "step": 495560 }, { "epoch": 1.0010827539118525, "grad_norm": 133.8235626220703, "learning_rate": 2.3910439666147147e-09, "loss": 14.7339, "step": 495570 }, { "epoch": 1.0011029545445362, "grad_norm": 147.26856994628906, "learning_rate": 2.380262225047947e-09, "loss": 11.1961, "step": 495580 }, { "epoch": 1.0011231551772202, "grad_norm": 497.2104187011719, "learning_rate": 2.369504841273629e-09, "loss": 16.4336, "step": 495590 }, { "epoch": 1.0011433558099039, "grad_norm": 263.5239562988281, "learning_rate": 2.358771815344496e-09, "loss": 11.2798, "step": 495600 }, { "epoch": 1.0011635564425878, "grad_norm": 377.579345703125, "learning_rate": 2.348063147312174e-09, "loss": 12.8141, "step": 495610 }, { "epoch": 1.0011837570752715, "grad_norm": 310.23370361328125, "learning_rate": 2.337378837229398e-09, "loss": 17.9364, "step": 495620 }, { "epoch": 1.0012039577079554, "grad_norm": 367.58270263671875, "learning_rate": 2.326718885147794e-09, "loss": 10.5867, "step": 495630 }, { "epoch": 1.0012241583406392, "grad_norm": 4.401870250701904, "learning_rate": 2.316083291120097e-09, "loss": 11.452, "step": 495640 }, { "epoch": 1.001244358973323, "grad_norm": 110.84075927734375, "learning_rate": 2.3054720551973773e-09, "loss": 18.5355, "step": 495650 }, { "epoch": 1.0012645596060068, "grad_norm": 146.7382354736328, "learning_rate": 2.294885177431816e-09, "loss": 14.5424, "step": 495660 }, { "epoch": 1.0012847602386907, "grad_norm": 157.50096130371094, "learning_rate": 2.2843226578744826e-09, "loss": 12.3647, "step": 495670 }, { "epoch": 1.0013049608713744, "grad_norm": 780.4160766601562, "learning_rate": 2.2737844965775578e-09, "loss": 25.2432, "step": 495680 }, { "epoch": 1.0013251615040584, "grad_norm": 70.00536346435547, "learning_rate": 2.263270693592112e-09, "loss": 14.2905, "step": 495690 }, { "epoch": 1.001345362136742, "grad_norm": 546.5574340820312, "learning_rate": 2.2527812489692156e-09, "loss": 25.6824, "step": 495700 }, { "epoch": 1.001365562769426, "grad_norm": 408.5946960449219, "learning_rate": 2.2423161627599386e-09, "loss": 16.6912, "step": 495710 }, { "epoch": 1.0013857634021097, "grad_norm": 34.1672248840332, "learning_rate": 2.2318754350159067e-09, "loss": 20.0957, "step": 495720 }, { "epoch": 1.0014059640347936, "grad_norm": 202.27102661132812, "learning_rate": 2.2214590657870795e-09, "loss": 16.8399, "step": 495730 }, { "epoch": 1.0014261646674774, "grad_norm": 67.70073699951172, "learning_rate": 2.211067055124527e-09, "loss": 12.9959, "step": 495740 }, { "epoch": 1.0014463653001613, "grad_norm": 1.5654852390289307, "learning_rate": 2.2006994030798758e-09, "loss": 7.921, "step": 495750 }, { "epoch": 1.001466565932845, "grad_norm": 185.27125549316406, "learning_rate": 2.1903561097019744e-09, "loss": 14.1706, "step": 495760 }, { "epoch": 1.001486766565529, "grad_norm": 451.46075439453125, "learning_rate": 2.1800371750430037e-09, "loss": 33.0078, "step": 495770 }, { "epoch": 1.0015069671982126, "grad_norm": 169.50633239746094, "learning_rate": 2.169742599151814e-09, "loss": 21.5873, "step": 495780 }, { "epoch": 1.0015271678308966, "grad_norm": 335.2918395996094, "learning_rate": 2.15947238207892e-09, "loss": 15.987, "step": 495790 }, { "epoch": 1.0015473684635803, "grad_norm": 206.71524047851562, "learning_rate": 2.149226523874837e-09, "loss": 13.8174, "step": 495800 }, { "epoch": 1.001567569096264, "grad_norm": 262.7821960449219, "learning_rate": 2.1390050245895246e-09, "loss": 18.3342, "step": 495810 }, { "epoch": 1.001587769728948, "grad_norm": 379.2049560546875, "learning_rate": 2.128807884272388e-09, "loss": 22.2588, "step": 495820 }, { "epoch": 1.0016079703616316, "grad_norm": 381.626220703125, "learning_rate": 2.1186351029733877e-09, "loss": 16.0041, "step": 495830 }, { "epoch": 1.0016281709943156, "grad_norm": 402.493896484375, "learning_rate": 2.1084866807413727e-09, "loss": 30.4098, "step": 495840 }, { "epoch": 1.0016483716269993, "grad_norm": 133.56912231445312, "learning_rate": 2.098362617626859e-09, "loss": 12.5756, "step": 495850 }, { "epoch": 1.0016685722596832, "grad_norm": 159.41049194335938, "learning_rate": 2.088262913679251e-09, "loss": 5.5835, "step": 495860 }, { "epoch": 1.001688772892367, "grad_norm": 89.73715209960938, "learning_rate": 2.078187568946288e-09, "loss": 13.8562, "step": 495870 }, { "epoch": 1.0017089735250508, "grad_norm": 184.55747985839844, "learning_rate": 2.0681365834790413e-09, "loss": 16.0826, "step": 495880 }, { "epoch": 1.0017291741577345, "grad_norm": 251.4383087158203, "learning_rate": 2.0581099573246943e-09, "loss": 37.0574, "step": 495890 }, { "epoch": 1.0017493747904185, "grad_norm": 207.5780792236328, "learning_rate": 2.0481076905332074e-09, "loss": 7.3997, "step": 495900 }, { "epoch": 1.0017695754231022, "grad_norm": 169.85557556152344, "learning_rate": 2.038129783153431e-09, "loss": 10.5287, "step": 495910 }, { "epoch": 1.0017897760557861, "grad_norm": 225.95216369628906, "learning_rate": 2.0281762352331034e-09, "loss": 15.2157, "step": 495920 }, { "epoch": 1.0018099766884698, "grad_norm": 215.59495544433594, "learning_rate": 2.018247046821631e-09, "loss": 12.579, "step": 495930 }, { "epoch": 1.0018301773211538, "grad_norm": 155.67726135253906, "learning_rate": 2.008342217966752e-09, "loss": 21.3434, "step": 495940 }, { "epoch": 1.0018503779538375, "grad_norm": 330.3739318847656, "learning_rate": 1.9984617487173174e-09, "loss": 17.323, "step": 495950 }, { "epoch": 1.0018705785865214, "grad_norm": 139.68040466308594, "learning_rate": 1.9886056391210663e-09, "loss": 22.3888, "step": 495960 }, { "epoch": 1.001890779219205, "grad_norm": 50.877384185791016, "learning_rate": 1.9787738892262932e-09, "loss": 5.3651, "step": 495970 }, { "epoch": 1.001910979851889, "grad_norm": 193.98800659179688, "learning_rate": 1.968966499080738e-09, "loss": 13.2665, "step": 495980 }, { "epoch": 1.0019311804845727, "grad_norm": 261.4365539550781, "learning_rate": 1.95918346873214e-09, "loss": 9.9879, "step": 495990 }, { "epoch": 1.0019513811172567, "grad_norm": 251.7092742919922, "learning_rate": 1.9494247982282386e-09, "loss": 31.1196, "step": 496000 }, { "epoch": 1.0019715817499404, "grad_norm": 10.172210693359375, "learning_rate": 1.9396904876167742e-09, "loss": 17.0401, "step": 496010 }, { "epoch": 1.0019917823826243, "grad_norm": 259.8248596191406, "learning_rate": 1.9299805369449307e-09, "loss": 14.1641, "step": 496020 }, { "epoch": 1.002011983015308, "grad_norm": 241.33895874023438, "learning_rate": 1.920294946260448e-09, "loss": 13.5077, "step": 496030 }, { "epoch": 1.002032183647992, "grad_norm": 220.9041748046875, "learning_rate": 1.9106337156099553e-09, "loss": 12.3981, "step": 496040 }, { "epoch": 1.0020523842806757, "grad_norm": 201.5591278076172, "learning_rate": 1.9009968450406368e-09, "loss": 25.9868, "step": 496050 }, { "epoch": 1.0020725849133594, "grad_norm": 331.458740234375, "learning_rate": 1.8913843346002324e-09, "loss": 13.4085, "step": 496060 }, { "epoch": 1.0020927855460433, "grad_norm": 259.1542053222656, "learning_rate": 1.8817961843348166e-09, "loss": 8.3266, "step": 496070 }, { "epoch": 1.002112986178727, "grad_norm": 225.1783447265625, "learning_rate": 1.872232394291018e-09, "loss": 13.4523, "step": 496080 }, { "epoch": 1.002133186811411, "grad_norm": 101.02706146240234, "learning_rate": 1.8626929645160218e-09, "loss": 23.3397, "step": 496090 }, { "epoch": 1.0021533874440947, "grad_norm": 511.61883544921875, "learning_rate": 1.8531778950564572e-09, "loss": 19.1652, "step": 496100 }, { "epoch": 1.0021735880767786, "grad_norm": 241.39195251464844, "learning_rate": 1.8436871859578431e-09, "loss": 18.7063, "step": 496110 }, { "epoch": 1.0021937887094623, "grad_norm": 341.495361328125, "learning_rate": 1.834220837266809e-09, "loss": 17.6599, "step": 496120 }, { "epoch": 1.0022139893421462, "grad_norm": 148.2803192138672, "learning_rate": 1.8247788490299846e-09, "loss": 20.9109, "step": 496130 }, { "epoch": 1.00223418997483, "grad_norm": 135.738037109375, "learning_rate": 1.8153612212923333e-09, "loss": 13.6855, "step": 496140 }, { "epoch": 1.0022543906075139, "grad_norm": 65.21240234375, "learning_rate": 1.80596795410104e-09, "loss": 28.8057, "step": 496150 }, { "epoch": 1.0022745912401976, "grad_norm": 4.850376605987549, "learning_rate": 1.7965990475010686e-09, "loss": 17.6667, "step": 496160 }, { "epoch": 1.0022947918728815, "grad_norm": 270.9783630371094, "learning_rate": 1.7872545015379382e-09, "loss": 16.2643, "step": 496170 }, { "epoch": 1.0023149925055652, "grad_norm": 150.8543701171875, "learning_rate": 1.7779343162577233e-09, "loss": 11.1799, "step": 496180 }, { "epoch": 1.0023351931382491, "grad_norm": 347.93060302734375, "learning_rate": 1.7686384917059429e-09, "loss": 28.7505, "step": 496190 }, { "epoch": 1.0023553937709329, "grad_norm": 10.917989730834961, "learning_rate": 1.759367027927561e-09, "loss": 7.8245, "step": 496200 }, { "epoch": 1.0023755944036168, "grad_norm": 167.07542419433594, "learning_rate": 1.7501199249675416e-09, "loss": 12.516, "step": 496210 }, { "epoch": 1.0023957950363005, "grad_norm": 105.32134246826172, "learning_rate": 1.740897182871404e-09, "loss": 11.817, "step": 496220 }, { "epoch": 1.0024159956689844, "grad_norm": 181.1662139892578, "learning_rate": 1.7316988016835567e-09, "loss": 9.7884, "step": 496230 }, { "epoch": 1.0024361963016681, "grad_norm": 390.3924560546875, "learning_rate": 1.7225247814495194e-09, "loss": 10.4173, "step": 496240 }, { "epoch": 1.002456396934352, "grad_norm": 19.388301849365234, "learning_rate": 1.7133751222137007e-09, "loss": 18.2841, "step": 496250 }, { "epoch": 1.0024765975670358, "grad_norm": 193.1466522216797, "learning_rate": 1.7042498240205097e-09, "loss": 18.2591, "step": 496260 }, { "epoch": 1.0024967981997197, "grad_norm": 263.32916259765625, "learning_rate": 1.6951488869149103e-09, "loss": 5.0993, "step": 496270 }, { "epoch": 1.0025169988324034, "grad_norm": 148.9720001220703, "learning_rate": 1.686072310940201e-09, "loss": 10.3391, "step": 496280 }, { "epoch": 1.0025371994650873, "grad_norm": 343.7486267089844, "learning_rate": 1.6770200961419015e-09, "loss": 17.7179, "step": 496290 }, { "epoch": 1.002557400097771, "grad_norm": 200.78855895996094, "learning_rate": 1.6679922425638651e-09, "loss": 22.5343, "step": 496300 }, { "epoch": 1.002577600730455, "grad_norm": 190.37265014648438, "learning_rate": 1.6589887502493907e-09, "loss": 15.7389, "step": 496310 }, { "epoch": 1.0025978013631387, "grad_norm": 185.1874237060547, "learning_rate": 1.650009619242887e-09, "loss": 12.1452, "step": 496320 }, { "epoch": 1.0026180019958224, "grad_norm": 102.01954650878906, "learning_rate": 1.6410548495876533e-09, "loss": 11.1675, "step": 496330 }, { "epoch": 1.0026382026285063, "grad_norm": 93.6607437133789, "learning_rate": 1.632124441328098e-09, "loss": 16.2916, "step": 496340 }, { "epoch": 1.00265840326119, "grad_norm": 471.4112854003906, "learning_rate": 1.6232183945075197e-09, "loss": 19.1414, "step": 496350 }, { "epoch": 1.002678603893874, "grad_norm": 276.6101379394531, "learning_rate": 1.6143367091686624e-09, "loss": 16.2045, "step": 496360 }, { "epoch": 1.0026988045265577, "grad_norm": 258.1728820800781, "learning_rate": 1.6054793853553797e-09, "loss": 11.0517, "step": 496370 }, { "epoch": 1.0027190051592416, "grad_norm": 362.923583984375, "learning_rate": 1.59664642311097e-09, "loss": 20.8093, "step": 496380 }, { "epoch": 1.0027392057919253, "grad_norm": 445.3919372558594, "learning_rate": 1.5878378224781777e-09, "loss": 10.7264, "step": 496390 }, { "epoch": 1.0027594064246093, "grad_norm": 209.9241180419922, "learning_rate": 1.5790535835003006e-09, "loss": 11.5396, "step": 496400 }, { "epoch": 1.002779607057293, "grad_norm": 188.69015502929688, "learning_rate": 1.570293706219528e-09, "loss": 6.4494, "step": 496410 }, { "epoch": 1.002799807689977, "grad_norm": 149.41795349121094, "learning_rate": 1.5615581906791576e-09, "loss": 6.0343, "step": 496420 }, { "epoch": 1.0028200083226606, "grad_norm": 175.53099060058594, "learning_rate": 1.5528470369208238e-09, "loss": 21.2724, "step": 496430 }, { "epoch": 1.0028402089553445, "grad_norm": 663.169921875, "learning_rate": 1.5441602449883797e-09, "loss": 15.0054, "step": 496440 }, { "epoch": 1.0028604095880282, "grad_norm": 263.2573547363281, "learning_rate": 1.535497814923459e-09, "loss": 9.5173, "step": 496450 }, { "epoch": 1.0028806102207122, "grad_norm": 7.298611640930176, "learning_rate": 1.52685974676825e-09, "loss": 19.2869, "step": 496460 }, { "epoch": 1.0029008108533959, "grad_norm": 239.89503479003906, "learning_rate": 1.518246040564386e-09, "loss": 11.5415, "step": 496470 }, { "epoch": 1.0029210114860798, "grad_norm": 1895.709716796875, "learning_rate": 1.509656696354611e-09, "loss": 30.2155, "step": 496480 }, { "epoch": 1.0029412121187635, "grad_norm": 379.2161865234375, "learning_rate": 1.5010917141811132e-09, "loss": 23.8914, "step": 496490 }, { "epoch": 1.0029614127514475, "grad_norm": 211.70846557617188, "learning_rate": 1.4925510940844157e-09, "loss": 11.5429, "step": 496500 }, { "epoch": 1.0029816133841312, "grad_norm": 459.5061950683594, "learning_rate": 1.4840348361067069e-09, "loss": 30.6356, "step": 496510 }, { "epoch": 1.003001814016815, "grad_norm": 224.3471221923828, "learning_rate": 1.4755429402901755e-09, "loss": 9.8845, "step": 496520 }, { "epoch": 1.0030220146494988, "grad_norm": 383.6967468261719, "learning_rate": 1.4670754066747895e-09, "loss": 11.0713, "step": 496530 }, { "epoch": 1.0030422152821827, "grad_norm": 557.1777954101562, "learning_rate": 1.4586322353032923e-09, "loss": 14.309, "step": 496540 }, { "epoch": 1.0030624159148664, "grad_norm": 202.27076721191406, "learning_rate": 1.4502134262156519e-09, "loss": 12.6591, "step": 496550 }, { "epoch": 1.0030826165475504, "grad_norm": 17.173858642578125, "learning_rate": 1.4418189794540572e-09, "loss": 10.8518, "step": 496560 }, { "epoch": 1.003102817180234, "grad_norm": 126.16844177246094, "learning_rate": 1.4334488950579206e-09, "loss": 14.1143, "step": 496570 }, { "epoch": 1.0031230178129178, "grad_norm": 8.855607032775879, "learning_rate": 1.425103173069986e-09, "loss": 12.0355, "step": 496580 }, { "epoch": 1.0031432184456017, "grad_norm": 359.2356262207031, "learning_rate": 1.4167818135291112e-09, "loss": 13.1176, "step": 496590 }, { "epoch": 1.0031634190782854, "grad_norm": 67.09163665771484, "learning_rate": 1.4084848164763742e-09, "loss": 11.2893, "step": 496600 }, { "epoch": 1.0031836197109694, "grad_norm": 5.554289817810059, "learning_rate": 1.4002121819528535e-09, "loss": 17.339, "step": 496610 }, { "epoch": 1.003203820343653, "grad_norm": 315.95751953125, "learning_rate": 1.3919639099985171e-09, "loss": 20.8446, "step": 496620 }, { "epoch": 1.003224020976337, "grad_norm": 75.73570251464844, "learning_rate": 1.3837400006533331e-09, "loss": 18.169, "step": 496630 }, { "epoch": 1.0032442216090207, "grad_norm": 276.59979248046875, "learning_rate": 1.3755404539572692e-09, "loss": 14.5566, "step": 496640 }, { "epoch": 1.0032644222417046, "grad_norm": 233.65902709960938, "learning_rate": 1.3673652699508487e-09, "loss": 11.2905, "step": 496650 }, { "epoch": 1.0032846228743884, "grad_norm": 184.38006591796875, "learning_rate": 1.3592144486740399e-09, "loss": 19.635, "step": 496660 }, { "epoch": 1.0033048235070723, "grad_norm": 60.485477447509766, "learning_rate": 1.3510879901657003e-09, "loss": 7.4499, "step": 496670 }, { "epoch": 1.003325024139756, "grad_norm": 217.9574737548828, "learning_rate": 1.342985894465798e-09, "loss": 13.5795, "step": 496680 }, { "epoch": 1.00334522477244, "grad_norm": 119.2909927368164, "learning_rate": 1.3349081616143012e-09, "loss": 21.8638, "step": 496690 }, { "epoch": 1.0033654254051236, "grad_norm": 251.32972717285156, "learning_rate": 1.3268547916495124e-09, "loss": 10.1939, "step": 496700 }, { "epoch": 1.0033856260378076, "grad_norm": 41.26898956298828, "learning_rate": 1.3188257846119545e-09, "loss": 11.8392, "step": 496710 }, { "epoch": 1.0034058266704913, "grad_norm": 19.208417892456055, "learning_rate": 1.3108211405399307e-09, "loss": 9.9375, "step": 496720 }, { "epoch": 1.0034260273031752, "grad_norm": 585.7509155273438, "learning_rate": 1.3028408594728536e-09, "loss": 15.5444, "step": 496730 }, { "epoch": 1.003446227935859, "grad_norm": 211.11203002929688, "learning_rate": 1.2948849414495811e-09, "loss": 14.4546, "step": 496740 }, { "epoch": 1.0034664285685428, "grad_norm": 253.2190399169922, "learning_rate": 1.286953386508416e-09, "loss": 16.9565, "step": 496750 }, { "epoch": 1.0034866292012266, "grad_norm": 209.68011474609375, "learning_rate": 1.2790461946887712e-09, "loss": 22.8853, "step": 496760 }, { "epoch": 1.0035068298339105, "grad_norm": 124.85618591308594, "learning_rate": 1.271163366028394e-09, "loss": 22.4875, "step": 496770 }, { "epoch": 1.0035270304665942, "grad_norm": 257.1024169921875, "learning_rate": 1.2633049005661423e-09, "loss": 5.9409, "step": 496780 }, { "epoch": 1.0035472310992781, "grad_norm": 262.7595520019531, "learning_rate": 1.2554707983403192e-09, "loss": 17.5791, "step": 496790 }, { "epoch": 1.0035674317319618, "grad_norm": 230.92210388183594, "learning_rate": 1.247661059389227e-09, "loss": 17.6548, "step": 496800 }, { "epoch": 1.0035876323646458, "grad_norm": 123.04821014404297, "learning_rate": 1.2398756837506131e-09, "loss": 15.8132, "step": 496810 }, { "epoch": 1.0036078329973295, "grad_norm": 176.6087188720703, "learning_rate": 1.2321146714627807e-09, "loss": 8.2122, "step": 496820 }, { "epoch": 1.0036280336300132, "grad_norm": 171.78781127929688, "learning_rate": 1.224378022562922e-09, "loss": 16.1176, "step": 496830 }, { "epoch": 1.0036482342626971, "grad_norm": 156.961181640625, "learning_rate": 1.2166657370898948e-09, "loss": 14.4038, "step": 496840 }, { "epoch": 1.0036684348953808, "grad_norm": 333.74078369140625, "learning_rate": 1.2089778150797816e-09, "loss": 19.8862, "step": 496850 }, { "epoch": 1.0036886355280648, "grad_norm": 48.801414489746094, "learning_rate": 1.2013142565708845e-09, "loss": 15.524, "step": 496860 }, { "epoch": 1.0037088361607485, "grad_norm": 199.96466064453125, "learning_rate": 1.193675061600952e-09, "loss": 13.8182, "step": 496870 }, { "epoch": 1.0037290367934324, "grad_norm": 336.8670959472656, "learning_rate": 1.1860602302066203e-09, "loss": 24.7726, "step": 496880 }, { "epoch": 1.003749237426116, "grad_norm": 1754.968017578125, "learning_rate": 1.178469762425083e-09, "loss": 11.5773, "step": 496890 }, { "epoch": 1.0037694380588, "grad_norm": 344.7020263671875, "learning_rate": 1.170903658293532e-09, "loss": 10.1763, "step": 496900 }, { "epoch": 1.0037896386914837, "grad_norm": 243.2534942626953, "learning_rate": 1.1633619178486044e-09, "loss": 12.3744, "step": 496910 }, { "epoch": 1.0038098393241677, "grad_norm": 331.5781555175781, "learning_rate": 1.155844541126938e-09, "loss": 21.797, "step": 496920 }, { "epoch": 1.0038300399568514, "grad_norm": 168.9166259765625, "learning_rate": 1.1483515281657254e-09, "loss": 14.3653, "step": 496930 }, { "epoch": 1.0038502405895353, "grad_norm": 86.74561309814453, "learning_rate": 1.1408828790010484e-09, "loss": 9.9988, "step": 496940 }, { "epoch": 1.003870441222219, "grad_norm": 130.0760955810547, "learning_rate": 1.1334385936695447e-09, "loss": 20.6595, "step": 496950 }, { "epoch": 1.003890641854903, "grad_norm": 148.57781982421875, "learning_rate": 1.1260186722067411e-09, "loss": 19.4977, "step": 496960 }, { "epoch": 1.0039108424875867, "grad_norm": 214.80091857910156, "learning_rate": 1.1186231146503856e-09, "loss": 16.563, "step": 496970 }, { "epoch": 1.0039310431202706, "grad_norm": 397.2010803222656, "learning_rate": 1.111251921034895e-09, "loss": 26.6753, "step": 496980 }, { "epoch": 1.0039512437529543, "grad_norm": 215.48861694335938, "learning_rate": 1.1039050913969062e-09, "loss": 11.6482, "step": 496990 }, { "epoch": 1.0039714443856382, "grad_norm": 599.6796264648438, "learning_rate": 1.096582625772502e-09, "loss": 13.3068, "step": 497000 }, { "epoch": 1.003991645018322, "grad_norm": 263.6522216796875, "learning_rate": 1.0892845241972094e-09, "loss": 13.7127, "step": 497010 }, { "epoch": 1.0040118456510059, "grad_norm": 265.34515380859375, "learning_rate": 1.0820107867060004e-09, "loss": 19.5239, "step": 497020 }, { "epoch": 1.0040320462836896, "grad_norm": 227.15771484375, "learning_rate": 1.074761413334957e-09, "loss": 30.504, "step": 497030 }, { "epoch": 1.0040522469163735, "grad_norm": 423.343994140625, "learning_rate": 1.0675364041190516e-09, "loss": 14.5951, "step": 497040 }, { "epoch": 1.0040724475490572, "grad_norm": 165.7329864501953, "learning_rate": 1.0603357590938112e-09, "loss": 7.8363, "step": 497050 }, { "epoch": 1.0040926481817412, "grad_norm": 252.2505340576172, "learning_rate": 1.0531594782942079e-09, "loss": 14.6733, "step": 497060 }, { "epoch": 1.0041128488144249, "grad_norm": 363.8894348144531, "learning_rate": 1.0460075617552134e-09, "loss": 16.4312, "step": 497070 }, { "epoch": 1.0041330494471086, "grad_norm": 583.636962890625, "learning_rate": 1.0388800095118002e-09, "loss": 16.2948, "step": 497080 }, { "epoch": 1.0041532500797925, "grad_norm": 332.56158447265625, "learning_rate": 1.0317768215983847e-09, "loss": 12.3562, "step": 497090 }, { "epoch": 1.0041734507124762, "grad_norm": 340.92022705078125, "learning_rate": 1.0246979980499395e-09, "loss": 17.9949, "step": 497100 }, { "epoch": 1.0041936513451601, "grad_norm": 62.27390670776367, "learning_rate": 1.017643538900881e-09, "loss": 8.2491, "step": 497110 }, { "epoch": 1.0042138519778439, "grad_norm": 283.4289245605469, "learning_rate": 1.0106134441850712e-09, "loss": 9.8237, "step": 497120 }, { "epoch": 1.0042340526105278, "grad_norm": 268.71484375, "learning_rate": 1.0036077139380373e-09, "loss": 10.6235, "step": 497130 }, { "epoch": 1.0042542532432115, "grad_norm": 240.248046875, "learning_rate": 9.96626348192531e-10, "loss": 44.8136, "step": 497140 }, { "epoch": 1.0042744538758954, "grad_norm": 108.62383270263672, "learning_rate": 9.896693469829689e-10, "loss": 28.2563, "step": 497150 }, { "epoch": 1.0042946545085791, "grad_norm": 190.87635803222656, "learning_rate": 9.827367103437679e-10, "loss": 14.5446, "step": 497160 }, { "epoch": 1.004314855141263, "grad_norm": 208.57196044921875, "learning_rate": 9.758284383082351e-10, "loss": 16.7593, "step": 497170 }, { "epoch": 1.0043350557739468, "grad_norm": 215.20289611816406, "learning_rate": 9.68944530910787e-10, "loss": 23.861, "step": 497180 }, { "epoch": 1.0043552564066307, "grad_norm": 538.6763916015625, "learning_rate": 9.620849881836203e-10, "loss": 12.2761, "step": 497190 }, { "epoch": 1.0043754570393144, "grad_norm": 230.8127899169922, "learning_rate": 9.55249810161152e-10, "loss": 15.7254, "step": 497200 }, { "epoch": 1.0043956576719983, "grad_norm": 64.42849731445312, "learning_rate": 9.484389968766882e-10, "loss": 15.4476, "step": 497210 }, { "epoch": 1.004415858304682, "grad_norm": 25.883228302001953, "learning_rate": 9.416525483635364e-10, "loss": 12.8251, "step": 497220 }, { "epoch": 1.004436058937366, "grad_norm": 366.4950256347656, "learning_rate": 9.348904646538925e-10, "loss": 19.1774, "step": 497230 }, { "epoch": 1.0044562595700497, "grad_norm": 295.9607849121094, "learning_rate": 9.281527457816186e-10, "loss": 15.7636, "step": 497240 }, { "epoch": 1.0044764602027336, "grad_norm": 164.70501708984375, "learning_rate": 9.214393917789111e-10, "loss": 14.53, "step": 497250 }, { "epoch": 1.0044966608354173, "grad_norm": 110.36573791503906, "learning_rate": 9.147504026790766e-10, "loss": 6.9312, "step": 497260 }, { "epoch": 1.0045168614681013, "grad_norm": 19.185224533081055, "learning_rate": 9.080857785137564e-10, "loss": 16.6655, "step": 497270 }, { "epoch": 1.004537062100785, "grad_norm": 380.02532958984375, "learning_rate": 9.014455193168125e-10, "loss": 8.3419, "step": 497280 }, { "epoch": 1.004557262733469, "grad_norm": 543.848876953125, "learning_rate": 8.948296251198863e-10, "loss": 20.6537, "step": 497290 }, { "epoch": 1.0045774633661526, "grad_norm": 241.8990020751953, "learning_rate": 8.88238095955174e-10, "loss": 16.4705, "step": 497300 }, { "epoch": 1.0045976639988365, "grad_norm": 774.7713623046875, "learning_rate": 8.816709318543171e-10, "loss": 19.6519, "step": 497310 }, { "epoch": 1.0046178646315203, "grad_norm": 437.86175537109375, "learning_rate": 8.751281328506223e-10, "loss": 19.5219, "step": 497320 }, { "epoch": 1.0046380652642042, "grad_norm": 314.4528503417969, "learning_rate": 8.686096989751758e-10, "loss": 18.3735, "step": 497330 }, { "epoch": 1.004658265896888, "grad_norm": 183.13369750976562, "learning_rate": 8.621156302590639e-10, "loss": 9.6208, "step": 497340 }, { "epoch": 1.0046784665295716, "grad_norm": 179.684814453125, "learning_rate": 8.556459267355932e-10, "loss": 9.7718, "step": 497350 }, { "epoch": 1.0046986671622555, "grad_norm": 372.1221923828125, "learning_rate": 8.492005884347398e-10, "loss": 27.6225, "step": 497360 }, { "epoch": 1.0047188677949392, "grad_norm": 255.3513946533203, "learning_rate": 8.427796153887002e-10, "loss": 18.5596, "step": 497370 }, { "epoch": 1.0047390684276232, "grad_norm": 291.7013854980469, "learning_rate": 8.363830076285606e-10, "loss": 7.915, "step": 497380 }, { "epoch": 1.0047592690603069, "grad_norm": 209.06103515625, "learning_rate": 8.300107651859623e-10, "loss": 20.9561, "step": 497390 }, { "epoch": 1.0047794696929908, "grad_norm": 189.4288330078125, "learning_rate": 8.236628880914365e-10, "loss": 16.9448, "step": 497400 }, { "epoch": 1.0047996703256745, "grad_norm": 358.1460266113281, "learning_rate": 8.173393763760695e-10, "loss": 22.2148, "step": 497410 }, { "epoch": 1.0048198709583585, "grad_norm": 403.0713806152344, "learning_rate": 8.110402300703924e-10, "loss": 22.7103, "step": 497420 }, { "epoch": 1.0048400715910422, "grad_norm": 200.86256408691406, "learning_rate": 8.047654492054913e-10, "loss": 7.5697, "step": 497430 }, { "epoch": 1.004860272223726, "grad_norm": 297.1061706542969, "learning_rate": 7.985150338118974e-10, "loss": 17.3693, "step": 497440 }, { "epoch": 1.0048804728564098, "grad_norm": 83.62934112548828, "learning_rate": 7.92288983920142e-10, "loss": 20.8319, "step": 497450 }, { "epoch": 1.0049006734890937, "grad_norm": 307.7446594238281, "learning_rate": 7.860872995602009e-10, "loss": 11.7905, "step": 497460 }, { "epoch": 1.0049208741217774, "grad_norm": 317.6285705566406, "learning_rate": 7.799099807626054e-10, "loss": 12.442, "step": 497470 }, { "epoch": 1.0049410747544614, "grad_norm": 835.113037109375, "learning_rate": 7.737570275573314e-10, "loss": 18.0063, "step": 497480 }, { "epoch": 1.004961275387145, "grad_norm": 467.5967712402344, "learning_rate": 7.67628439974355e-10, "loss": 12.2073, "step": 497490 }, { "epoch": 1.004981476019829, "grad_norm": 191.9807586669922, "learning_rate": 7.615242180436521e-10, "loss": 9.385, "step": 497500 }, { "epoch": 1.0050016766525127, "grad_norm": 224.09347534179688, "learning_rate": 7.55444361795199e-10, "loss": 11.5828, "step": 497510 }, { "epoch": 1.0050218772851967, "grad_norm": 316.2989196777344, "learning_rate": 7.493888712584163e-10, "loss": 10.3182, "step": 497520 }, { "epoch": 1.0050420779178804, "grad_norm": 323.5889892578125, "learning_rate": 7.433577464621699e-10, "loss": 17.9219, "step": 497530 }, { "epoch": 1.0050622785505643, "grad_norm": 109.38935089111328, "learning_rate": 7.373509874369911e-10, "loss": 16.7291, "step": 497540 }, { "epoch": 1.005082479183248, "grad_norm": 526.5947265625, "learning_rate": 7.313685942117454e-10, "loss": 17.5336, "step": 497550 }, { "epoch": 1.005102679815932, "grad_norm": 398.5237121582031, "learning_rate": 7.254105668152988e-10, "loss": 24.8671, "step": 497560 }, { "epoch": 1.0051228804486156, "grad_norm": 200.6566162109375, "learning_rate": 7.194769052765171e-10, "loss": 11.9736, "step": 497570 }, { "epoch": 1.0051430810812996, "grad_norm": 374.6748352050781, "learning_rate": 7.135676096253763e-10, "loss": 21.5813, "step": 497580 }, { "epoch": 1.0051632817139833, "grad_norm": 112.6958999633789, "learning_rate": 7.076826798890768e-10, "loss": 13.3266, "step": 497590 }, { "epoch": 1.005183482346667, "grad_norm": 113.08387756347656, "learning_rate": 7.018221160981498e-10, "loss": 8.6187, "step": 497600 }, { "epoch": 1.005203682979351, "grad_norm": 237.25608825683594, "learning_rate": 6.959859182792406e-10, "loss": 11.2016, "step": 497610 }, { "epoch": 1.0052238836120346, "grad_norm": 0.0, "learning_rate": 6.901740864623252e-10, "loss": 5.223, "step": 497620 }, { "epoch": 1.0052440842447186, "grad_norm": 220.59524536132812, "learning_rate": 6.843866206751593e-10, "loss": 5.644, "step": 497630 }, { "epoch": 1.0052642848774023, "grad_norm": 290.9652404785156, "learning_rate": 6.786235209460534e-10, "loss": 18.3629, "step": 497640 }, { "epoch": 1.0052844855100862, "grad_norm": 368.1429748535156, "learning_rate": 6.728847873027633e-10, "loss": 18.1752, "step": 497650 }, { "epoch": 1.00530468614277, "grad_norm": 496.81610107421875, "learning_rate": 6.671704197735995e-10, "loss": 16.5869, "step": 497660 }, { "epoch": 1.0053248867754538, "grad_norm": 226.71218872070312, "learning_rate": 6.614804183857626e-10, "loss": 19.0954, "step": 497670 }, { "epoch": 1.0053450874081375, "grad_norm": 172.08290100097656, "learning_rate": 6.558147831681183e-10, "loss": 11.4151, "step": 497680 }, { "epoch": 1.0053652880408215, "grad_norm": 290.1051025390625, "learning_rate": 6.501735141478672e-10, "loss": 17.5828, "step": 497690 }, { "epoch": 1.0053854886735052, "grad_norm": 131.67520141601562, "learning_rate": 6.445566113516544e-10, "loss": 15.6308, "step": 497700 }, { "epoch": 1.0054056893061891, "grad_norm": 505.48431396484375, "learning_rate": 6.389640748077907e-10, "loss": 16.8782, "step": 497710 }, { "epoch": 1.0054258899388728, "grad_norm": 107.32551574707031, "learning_rate": 6.333959045434768e-10, "loss": 8.1946, "step": 497720 }, { "epoch": 1.0054460905715568, "grad_norm": 281.859375, "learning_rate": 6.278521005853578e-10, "loss": 11.7121, "step": 497730 }, { "epoch": 1.0054662912042405, "grad_norm": 103.37158203125, "learning_rate": 6.223326629611893e-10, "loss": 13.5691, "step": 497740 }, { "epoch": 1.0054864918369244, "grad_norm": 203.9019775390625, "learning_rate": 6.168375916970615e-10, "loss": 11.0136, "step": 497750 }, { "epoch": 1.005506692469608, "grad_norm": 7.61803674697876, "learning_rate": 6.11366886820175e-10, "loss": 17.7279, "step": 497760 }, { "epoch": 1.005526893102292, "grad_norm": 543.367919921875, "learning_rate": 6.05920548357175e-10, "loss": 20.9936, "step": 497770 }, { "epoch": 1.0055470937349757, "grad_norm": 286.6015319824219, "learning_rate": 6.00498576334152e-10, "loss": 22.6335, "step": 497780 }, { "epoch": 1.0055672943676597, "grad_norm": 273.9652099609375, "learning_rate": 5.951009707783062e-10, "loss": 13.7487, "step": 497790 }, { "epoch": 1.0055874950003434, "grad_norm": 204.1086883544922, "learning_rate": 5.897277317157279e-10, "loss": 20.8657, "step": 497800 }, { "epoch": 1.0056076956330273, "grad_norm": 352.064453125, "learning_rate": 5.843788591725074e-10, "loss": 14.8754, "step": 497810 }, { "epoch": 1.005627896265711, "grad_norm": 289.3622131347656, "learning_rate": 5.790543531741799e-10, "loss": 16.5565, "step": 497820 }, { "epoch": 1.005648096898395, "grad_norm": 148.64691162109375, "learning_rate": 5.737542137479457e-10, "loss": 29.357, "step": 497830 }, { "epoch": 1.0056682975310787, "grad_norm": 122.69599151611328, "learning_rate": 5.684784409182298e-10, "loss": 11.7857, "step": 497840 }, { "epoch": 1.0056884981637624, "grad_norm": 108.96839141845703, "learning_rate": 5.632270347116775e-10, "loss": 10.1209, "step": 497850 }, { "epoch": 1.0057086987964463, "grad_norm": 142.5657958984375, "learning_rate": 5.579999951532688e-10, "loss": 20.193, "step": 497860 }, { "epoch": 1.00572889942913, "grad_norm": 378.4535217285156, "learning_rate": 5.527973222690941e-10, "loss": 23.4196, "step": 497870 }, { "epoch": 1.005749100061814, "grad_norm": 341.4775695800781, "learning_rate": 5.476190160841333e-10, "loss": 17.7467, "step": 497880 }, { "epoch": 1.0057693006944977, "grad_norm": 183.7340087890625, "learning_rate": 5.424650766239215e-10, "loss": 22.6553, "step": 497890 }, { "epoch": 1.0057895013271816, "grad_norm": 810.3782958984375, "learning_rate": 5.373355039128836e-10, "loss": 17.3577, "step": 497900 }, { "epoch": 1.0058097019598653, "grad_norm": 119.6341323852539, "learning_rate": 5.322302979771099e-10, "loss": 14.5229, "step": 497910 }, { "epoch": 1.0058299025925492, "grad_norm": 310.62347412109375, "learning_rate": 5.271494588404702e-10, "loss": 14.242, "step": 497920 }, { "epoch": 1.005850103225233, "grad_norm": 119.24578094482422, "learning_rate": 5.220929865284996e-10, "loss": 12.5786, "step": 497930 }, { "epoch": 1.0058703038579169, "grad_norm": 376.8143005371094, "learning_rate": 5.170608810650679e-10, "loss": 23.0576, "step": 497940 }, { "epoch": 1.0058905044906006, "grad_norm": 646.37646484375, "learning_rate": 5.120531424751551e-10, "loss": 14.2424, "step": 497950 }, { "epoch": 1.0059107051232845, "grad_norm": 164.1946258544922, "learning_rate": 5.070697707837413e-10, "loss": 35.6545, "step": 497960 }, { "epoch": 1.0059309057559682, "grad_norm": 310.8088073730469, "learning_rate": 5.02110766013586e-10, "loss": 13.8999, "step": 497970 }, { "epoch": 1.0059511063886521, "grad_norm": 249.61207580566406, "learning_rate": 4.971761281907795e-10, "loss": 14.672, "step": 497980 }, { "epoch": 1.0059713070213359, "grad_norm": 5.761580467224121, "learning_rate": 4.922658573375261e-10, "loss": 15.3031, "step": 497990 }, { "epoch": 1.0059915076540198, "grad_norm": 52.5190544128418, "learning_rate": 4.87379953478806e-10, "loss": 15.158, "step": 498000 }, { "epoch": 1.0060117082867035, "grad_norm": 221.9866485595703, "learning_rate": 4.825184166384888e-10, "loss": 25.3647, "step": 498010 }, { "epoch": 1.0060319089193874, "grad_norm": 407.00274658203125, "learning_rate": 4.776812468398895e-10, "loss": 10.4687, "step": 498020 }, { "epoch": 1.0060521095520711, "grad_norm": 305.9030456542969, "learning_rate": 4.728684441068776e-10, "loss": 17.8631, "step": 498030 }, { "epoch": 1.006072310184755, "grad_norm": 103.88262939453125, "learning_rate": 4.680800084622128e-10, "loss": 15.7122, "step": 498040 }, { "epoch": 1.0060925108174388, "grad_norm": 70.55221557617188, "learning_rate": 4.6331593993032e-10, "loss": 11.8809, "step": 498050 }, { "epoch": 1.0061127114501227, "grad_norm": 44.675846099853516, "learning_rate": 4.585762385334036e-10, "loss": 4.9157, "step": 498060 }, { "epoch": 1.0061329120828064, "grad_norm": 270.4665832519531, "learning_rate": 4.538609042953335e-10, "loss": 13.6184, "step": 498070 }, { "epoch": 1.0061531127154903, "grad_norm": 359.03851318359375, "learning_rate": 4.49169937238314e-10, "loss": 9.6673, "step": 498080 }, { "epoch": 1.006173313348174, "grad_norm": 158.07606506347656, "learning_rate": 4.445033373862151e-10, "loss": 12.3881, "step": 498090 }, { "epoch": 1.006193513980858, "grad_norm": 410.2479553222656, "learning_rate": 4.398611047612411e-10, "loss": 15.5781, "step": 498100 }, { "epoch": 1.0062137146135417, "grad_norm": 400.76806640625, "learning_rate": 4.3524323938559655e-10, "loss": 15.3732, "step": 498110 }, { "epoch": 1.0062339152462254, "grad_norm": 102.79998016357422, "learning_rate": 4.3064974128259605e-10, "loss": 10.8671, "step": 498120 }, { "epoch": 1.0062541158789093, "grad_norm": 183.90966796875, "learning_rate": 4.2608061047388905e-10, "loss": 16.1218, "step": 498130 }, { "epoch": 1.006274316511593, "grad_norm": 91.87322998046875, "learning_rate": 4.21535846982235e-10, "loss": 11.3491, "step": 498140 }, { "epoch": 1.006294517144277, "grad_norm": 249.82664489746094, "learning_rate": 4.1701545082928343e-10, "loss": 26.3386, "step": 498150 }, { "epoch": 1.0063147177769607, "grad_norm": 193.24899291992188, "learning_rate": 4.125194220377937e-10, "loss": 15.6192, "step": 498160 }, { "epoch": 1.0063349184096446, "grad_norm": 149.47335815429688, "learning_rate": 4.0804776062941533e-10, "loss": 6.2518, "step": 498170 }, { "epoch": 1.0063551190423283, "grad_norm": 324.95867919921875, "learning_rate": 4.0360046662579753e-10, "loss": 12.2561, "step": 498180 }, { "epoch": 1.0063753196750123, "grad_norm": 96.63861083984375, "learning_rate": 3.991775400485898e-10, "loss": 15.6981, "step": 498190 }, { "epoch": 1.006395520307696, "grad_norm": 335.8397216796875, "learning_rate": 3.9477898091944135e-10, "loss": 25.09, "step": 498200 }, { "epoch": 1.00641572094038, "grad_norm": 449.616943359375, "learning_rate": 3.9040478925944645e-10, "loss": 31.7562, "step": 498210 }, { "epoch": 1.0064359215730636, "grad_norm": 92.94026184082031, "learning_rate": 3.8605496509080966e-10, "loss": 11.5487, "step": 498220 }, { "epoch": 1.0064561222057475, "grad_norm": 378.7860107421875, "learning_rate": 3.8172950843351485e-10, "loss": 25.2461, "step": 498230 }, { "epoch": 1.0064763228384312, "grad_norm": 139.0156707763672, "learning_rate": 3.774284193097666e-10, "loss": 8.0539, "step": 498240 }, { "epoch": 1.0064965234711152, "grad_norm": 239.59571838378906, "learning_rate": 3.7315169774010397e-10, "loss": 9.1578, "step": 498250 }, { "epoch": 1.0065167241037989, "grad_norm": 215.89488220214844, "learning_rate": 3.6889934374506606e-10, "loss": 25.5079, "step": 498260 }, { "epoch": 1.0065369247364828, "grad_norm": 179.49853515625, "learning_rate": 3.646713573457472e-10, "loss": 17.75, "step": 498270 }, { "epoch": 1.0065571253691665, "grad_norm": 264.7878723144531, "learning_rate": 3.604677385626865e-10, "loss": 13.9832, "step": 498280 }, { "epoch": 1.0065773260018505, "grad_norm": 97.20784759521484, "learning_rate": 3.562884874158679e-10, "loss": 14.5283, "step": 498290 }, { "epoch": 1.0065975266345342, "grad_norm": 69.8996810913086, "learning_rate": 3.521336039263856e-10, "loss": 5.11, "step": 498300 }, { "epoch": 1.006617727267218, "grad_norm": 188.6199951171875, "learning_rate": 3.480030881147789e-10, "loss": 10.2871, "step": 498310 }, { "epoch": 1.0066379278999018, "grad_norm": 345.9925231933594, "learning_rate": 3.4389693999992146e-10, "loss": 17.5328, "step": 498320 }, { "epoch": 1.0066581285325857, "grad_norm": 154.6565399169922, "learning_rate": 3.3981515960290757e-10, "loss": 13.0229, "step": 498330 }, { "epoch": 1.0066783291652694, "grad_norm": 198.02406311035156, "learning_rate": 3.357577469431661e-10, "loss": 17.9335, "step": 498340 }, { "epoch": 1.0066985297979534, "grad_norm": 87.4178237915039, "learning_rate": 3.3172470204012597e-10, "loss": 14.529, "step": 498350 }, { "epoch": 1.006718730430637, "grad_norm": 730.363037109375, "learning_rate": 3.277160249143263e-10, "loss": 27.7188, "step": 498360 }, { "epoch": 1.0067389310633208, "grad_norm": 123.78430938720703, "learning_rate": 3.237317155846409e-10, "loss": 12.3887, "step": 498370 }, { "epoch": 1.0067591316960047, "grad_norm": 555.4959716796875, "learning_rate": 3.1977177407105376e-10, "loss": 13.2058, "step": 498380 }, { "epoch": 1.0067793323286884, "grad_norm": 362.38067626953125, "learning_rate": 3.158362003918836e-10, "loss": 10.9317, "step": 498390 }, { "epoch": 1.0067995329613724, "grad_norm": 158.46035766601562, "learning_rate": 3.1192499456766947e-10, "loss": 16.7167, "step": 498400 }, { "epoch": 1.006819733594056, "grad_norm": 55.679298400878906, "learning_rate": 3.0803815661617495e-10, "loss": 11.2871, "step": 498410 }, { "epoch": 1.00683993422674, "grad_norm": 1470.99365234375, "learning_rate": 3.0417568655738416e-10, "loss": 12.9113, "step": 498420 }, { "epoch": 1.0068601348594237, "grad_norm": 348.9452819824219, "learning_rate": 3.003375844090606e-10, "loss": 21.9742, "step": 498430 }, { "epoch": 1.0068803354921076, "grad_norm": 300.7489318847656, "learning_rate": 2.9652385019118823e-10, "loss": 17.7186, "step": 498440 }, { "epoch": 1.0069005361247914, "grad_norm": 15.27856159210205, "learning_rate": 2.9273448392097557e-10, "loss": 16.9864, "step": 498450 }, { "epoch": 1.0069207367574753, "grad_norm": 293.1330871582031, "learning_rate": 2.8896948561785156e-10, "loss": 20.0468, "step": 498460 }, { "epoch": 1.006940937390159, "grad_norm": 152.7106170654297, "learning_rate": 2.8522885530013475e-10, "loss": 14.7963, "step": 498470 }, { "epoch": 1.006961138022843, "grad_norm": 85.65802764892578, "learning_rate": 2.8151259298558884e-10, "loss": 12.1627, "step": 498480 }, { "epoch": 1.0069813386555266, "grad_norm": 335.9873046875, "learning_rate": 2.7782069869253247e-10, "loss": 10.7897, "step": 498490 }, { "epoch": 1.0070015392882106, "grad_norm": 232.81271362304688, "learning_rate": 2.741531724392843e-10, "loss": 11.3882, "step": 498500 }, { "epoch": 1.0070217399208943, "grad_norm": 203.44442749023438, "learning_rate": 2.705100142430528e-10, "loss": 19.1838, "step": 498510 }, { "epoch": 1.0070419405535782, "grad_norm": 141.52403259277344, "learning_rate": 2.668912241221566e-10, "loss": 12.449, "step": 498520 }, { "epoch": 1.007062141186262, "grad_norm": 194.8858642578125, "learning_rate": 2.6329680209435935e-10, "loss": 18.4131, "step": 498530 }, { "epoch": 1.0070823418189458, "grad_norm": 119.25762176513672, "learning_rate": 2.597267481763144e-10, "loss": 15.6827, "step": 498540 }, { "epoch": 1.0071025424516296, "grad_norm": 156.83926391601562, "learning_rate": 2.5618106238634033e-10, "loss": 11.1116, "step": 498550 }, { "epoch": 1.0071227430843135, "grad_norm": 163.69297790527344, "learning_rate": 2.5265974474109054e-10, "loss": 10.6595, "step": 498560 }, { "epoch": 1.0071429437169972, "grad_norm": 313.9014587402344, "learning_rate": 2.4916279525777356e-10, "loss": 9.8448, "step": 498570 }, { "epoch": 1.0071631443496811, "grad_norm": 152.2070770263672, "learning_rate": 2.4569021395415283e-10, "loss": 17.6692, "step": 498580 }, { "epoch": 1.0071833449823648, "grad_norm": 63.56298828125, "learning_rate": 2.4224200084632664e-10, "loss": 6.8223, "step": 498590 }, { "epoch": 1.0072035456150488, "grad_norm": 274.88763427734375, "learning_rate": 2.388181559515035e-10, "loss": 10.3704, "step": 498600 }, { "epoch": 1.0072237462477325, "grad_norm": 278.88787841796875, "learning_rate": 2.3541867928633665e-10, "loss": 25.6605, "step": 498610 }, { "epoch": 1.0072439468804162, "grad_norm": 307.20465087890625, "learning_rate": 2.3204357086747952e-10, "loss": 12.244, "step": 498620 }, { "epoch": 1.0072641475131001, "grad_norm": 353.385498046875, "learning_rate": 2.2869283071103032e-10, "loss": 14.1536, "step": 498630 }, { "epoch": 1.0072843481457838, "grad_norm": 387.729736328125, "learning_rate": 2.2536645883308728e-10, "loss": 15.4943, "step": 498640 }, { "epoch": 1.0073045487784678, "grad_norm": 110.98235321044922, "learning_rate": 2.2206445525085886e-10, "loss": 7.5882, "step": 498650 }, { "epoch": 1.0073247494111515, "grad_norm": 396.0930480957031, "learning_rate": 2.1878681997988816e-10, "loss": 17.9858, "step": 498660 }, { "epoch": 1.0073449500438354, "grad_norm": 247.63925170898438, "learning_rate": 2.1553355303627343e-10, "loss": 10.9372, "step": 498670 }, { "epoch": 1.007365150676519, "grad_norm": 178.1328125, "learning_rate": 2.123046544355578e-10, "loss": 24.8558, "step": 498680 }, { "epoch": 1.007385351309203, "grad_norm": 297.8135070800781, "learning_rate": 2.091001241932844e-10, "loss": 13.2768, "step": 498690 }, { "epoch": 1.0074055519418867, "grad_norm": 39.48568344116211, "learning_rate": 2.0591996232610656e-10, "loss": 18.014, "step": 498700 }, { "epoch": 1.0074257525745707, "grad_norm": 301.92315673828125, "learning_rate": 2.0276416884845718e-10, "loss": 12.5918, "step": 498710 }, { "epoch": 1.0074459532072544, "grad_norm": 447.9718017578125, "learning_rate": 1.9963274377643448e-10, "loss": 14.6538, "step": 498720 }, { "epoch": 1.0074661538399383, "grad_norm": 312.9599304199219, "learning_rate": 1.965256871244714e-10, "loss": 19.0012, "step": 498730 }, { "epoch": 1.007486354472622, "grad_norm": 353.3367004394531, "learning_rate": 1.9344299890866614e-10, "loss": 20.735, "step": 498740 }, { "epoch": 1.007506555105306, "grad_norm": 319.9928894042969, "learning_rate": 1.903846791434516e-10, "loss": 15.8078, "step": 498750 }, { "epoch": 1.0075267557379897, "grad_norm": 9.733345985412598, "learning_rate": 1.873507278438158e-10, "loss": 12.6649, "step": 498760 }, { "epoch": 1.0075469563706736, "grad_norm": 333.54791259765625, "learning_rate": 1.8434114502530187e-10, "loss": 16.1153, "step": 498770 }, { "epoch": 1.0075671570033573, "grad_norm": 376.7235107421875, "learning_rate": 1.8135593070123246e-10, "loss": 27.7726, "step": 498780 }, { "epoch": 1.0075873576360412, "grad_norm": 367.3818359375, "learning_rate": 1.7839508488715075e-10, "loss": 14.0848, "step": 498790 }, { "epoch": 1.007607558268725, "grad_norm": 215.5403594970703, "learning_rate": 1.7545860759693446e-10, "loss": 8.5867, "step": 498800 }, { "epoch": 1.0076277589014089, "grad_norm": 269.4993591308594, "learning_rate": 1.725464988450165e-10, "loss": 8.9464, "step": 498810 }, { "epoch": 1.0076479595340926, "grad_norm": 226.98304748535156, "learning_rate": 1.6965875864582983e-10, "loss": 14.4526, "step": 498820 }, { "epoch": 1.0076681601667765, "grad_norm": 252.58116149902344, "learning_rate": 1.6679538701325215e-10, "loss": 21.8598, "step": 498830 }, { "epoch": 1.0076883607994602, "grad_norm": 263.5116271972656, "learning_rate": 1.6395638396171643e-10, "loss": 12.1145, "step": 498840 }, { "epoch": 1.0077085614321442, "grad_norm": 147.0958251953125, "learning_rate": 1.611417495045453e-10, "loss": 19.2575, "step": 498850 }, { "epoch": 1.0077287620648279, "grad_norm": 621.6185302734375, "learning_rate": 1.5835148365506148e-10, "loss": 25.4507, "step": 498860 }, { "epoch": 1.0077489626975118, "grad_norm": 176.71005249023438, "learning_rate": 1.5558558642769782e-10, "loss": 14.3358, "step": 498870 }, { "epoch": 1.0077691633301955, "grad_norm": 188.56503295898438, "learning_rate": 1.5284405783577706e-10, "loss": 15.4363, "step": 498880 }, { "epoch": 1.0077893639628792, "grad_norm": 273.8232421875, "learning_rate": 1.501268978920667e-10, "loss": 14.8906, "step": 498890 }, { "epoch": 1.0078095645955631, "grad_norm": 307.99542236328125, "learning_rate": 1.4743410661044454e-10, "loss": 15.3241, "step": 498900 }, { "epoch": 1.0078297652282469, "grad_norm": 114.76146697998047, "learning_rate": 1.4476568400367819e-10, "loss": 6.4186, "step": 498910 }, { "epoch": 1.0078499658609308, "grad_norm": 582.0213012695312, "learning_rate": 1.4212163008509028e-10, "loss": 19.5914, "step": 498920 }, { "epoch": 1.0078701664936145, "grad_norm": 226.5014190673828, "learning_rate": 1.3950194486744838e-10, "loss": 16.7806, "step": 498930 }, { "epoch": 1.0078903671262984, "grad_norm": 446.14508056640625, "learning_rate": 1.369066283635201e-10, "loss": 16.9134, "step": 498940 }, { "epoch": 1.0079105677589821, "grad_norm": 1160.8868408203125, "learning_rate": 1.3433568058607293e-10, "loss": 21.4087, "step": 498950 }, { "epoch": 1.007930768391666, "grad_norm": 241.24908447265625, "learning_rate": 1.3178910154676427e-10, "loss": 16.0294, "step": 498960 }, { "epoch": 1.0079509690243498, "grad_norm": 472.5611877441406, "learning_rate": 1.292668912594719e-10, "loss": 15.3888, "step": 498970 }, { "epoch": 1.0079711696570337, "grad_norm": 43.85075378417969, "learning_rate": 1.2676904973529802e-10, "loss": 33.6512, "step": 498980 }, { "epoch": 1.0079913702897174, "grad_norm": 124.53466033935547, "learning_rate": 1.2429557698645512e-10, "loss": 15.233, "step": 498990 }, { "epoch": 1.0080115709224013, "grad_norm": 281.6215515136719, "learning_rate": 1.2184647302626585e-10, "loss": 12.2033, "step": 499000 }, { "epoch": 1.008031771555085, "grad_norm": 43.871124267578125, "learning_rate": 1.1942173786527732e-10, "loss": 18.2486, "step": 499010 }, { "epoch": 1.008051972187769, "grad_norm": 137.5821075439453, "learning_rate": 1.1702137151570203e-10, "loss": 14.4781, "step": 499020 }, { "epoch": 1.0080721728204527, "grad_norm": 551.835693359375, "learning_rate": 1.146453739897524e-10, "loss": 14.7082, "step": 499030 }, { "epoch": 1.0080923734531366, "grad_norm": 975.0075073242188, "learning_rate": 1.1229374529797555e-10, "loss": 19.17, "step": 499040 }, { "epoch": 1.0081125740858203, "grad_norm": 103.84477996826172, "learning_rate": 1.0996648545313904e-10, "loss": 8.8648, "step": 499050 }, { "epoch": 1.0081327747185043, "grad_norm": 181.8811492919922, "learning_rate": 1.0766359446579e-10, "loss": 9.433, "step": 499060 }, { "epoch": 1.008152975351188, "grad_norm": 80.49881744384766, "learning_rate": 1.0538507234703066e-10, "loss": 13.4556, "step": 499070 }, { "epoch": 1.008173175983872, "grad_norm": 9.578737258911133, "learning_rate": 1.0313091910796324e-10, "loss": 11.9596, "step": 499080 }, { "epoch": 1.0081933766165556, "grad_norm": 1.9659548997879028, "learning_rate": 1.009011347602451e-10, "loss": 11.4909, "step": 499090 }, { "epoch": 1.0082135772492395, "grad_norm": 67.1401138305664, "learning_rate": 9.869571931442334e-11, "loss": 15.3432, "step": 499100 }, { "epoch": 1.0082337778819233, "grad_norm": 281.44976806640625, "learning_rate": 9.65146727810451e-11, "loss": 15.6687, "step": 499110 }, { "epoch": 1.0082539785146072, "grad_norm": 80.7608871459961, "learning_rate": 9.435799517065746e-11, "loss": 11.8936, "step": 499120 }, { "epoch": 1.008274179147291, "grad_norm": 383.69366455078125, "learning_rate": 9.222568649380759e-11, "loss": 29.9398, "step": 499130 }, { "epoch": 1.0082943797799746, "grad_norm": 378.7021484375, "learning_rate": 9.011774676159767e-11, "loss": 10.354, "step": 499140 }, { "epoch": 1.0083145804126585, "grad_norm": 442.16265869140625, "learning_rate": 8.803417598346465e-11, "loss": 24.7036, "step": 499150 }, { "epoch": 1.0083347810453422, "grad_norm": 180.1383819580078, "learning_rate": 8.597497416940048e-11, "loss": 20.6631, "step": 499160 }, { "epoch": 1.0083549816780262, "grad_norm": 709.421875, "learning_rate": 8.394014133050743e-11, "loss": 19.2424, "step": 499170 }, { "epoch": 1.0083751823107099, "grad_norm": 0.0, "learning_rate": 8.192967747566727e-11, "loss": 14.6445, "step": 499180 }, { "epoch": 1.0083953829433938, "grad_norm": 232.08509826660156, "learning_rate": 7.994358261542712e-11, "loss": 30.6636, "step": 499190 }, { "epoch": 1.0084155835760775, "grad_norm": 118.77250671386719, "learning_rate": 7.798185675866876e-11, "loss": 10.9463, "step": 499200 }, { "epoch": 1.0084357842087615, "grad_norm": 441.1081237792969, "learning_rate": 7.604449991593932e-11, "loss": 18.0995, "step": 499210 }, { "epoch": 1.0084559848414452, "grad_norm": 219.89122009277344, "learning_rate": 7.413151209612057e-11, "loss": 16.1622, "step": 499220 }, { "epoch": 1.008476185474129, "grad_norm": 275.44464111328125, "learning_rate": 7.224289330809431e-11, "loss": 14.1064, "step": 499230 }, { "epoch": 1.0084963861068128, "grad_norm": 375.02862548828125, "learning_rate": 7.037864356185254e-11, "loss": 10.0396, "step": 499240 }, { "epoch": 1.0085165867394967, "grad_norm": 171.23094177246094, "learning_rate": 6.853876286627703e-11, "loss": 11.9643, "step": 499250 }, { "epoch": 1.0085367873721804, "grad_norm": 296.3878173828125, "learning_rate": 6.672325122969447e-11, "loss": 18.2356, "step": 499260 }, { "epoch": 1.0085569880048644, "grad_norm": 173.04734802246094, "learning_rate": 6.493210866209687e-11, "loss": 9.8609, "step": 499270 }, { "epoch": 1.008577188637548, "grad_norm": 181.06671142578125, "learning_rate": 6.316533517125578e-11, "loss": 13.4467, "step": 499280 }, { "epoch": 1.008597389270232, "grad_norm": 131.52392578125, "learning_rate": 6.142293076605299e-11, "loss": 13.3902, "step": 499290 }, { "epoch": 1.0086175899029157, "grad_norm": 188.04368591308594, "learning_rate": 5.970489545537028e-11, "loss": 14.8991, "step": 499300 }, { "epoch": 1.0086377905355997, "grad_norm": 270.3106689453125, "learning_rate": 5.801122924697922e-11, "loss": 10.326, "step": 499310 }, { "epoch": 1.0086579911682834, "grad_norm": 221.03317260742188, "learning_rate": 5.634193214976158e-11, "loss": 22.3886, "step": 499320 }, { "epoch": 1.0086781918009673, "grad_norm": 572.0333862304688, "learning_rate": 5.469700417093382e-11, "loss": 26.1335, "step": 499330 }, { "epoch": 1.008698392433651, "grad_norm": 15.824953079223633, "learning_rate": 5.3076445319932835e-11, "loss": 11.863, "step": 499340 }, { "epoch": 1.008718593066335, "grad_norm": 136.35269165039062, "learning_rate": 5.148025560341996e-11, "loss": 9.789, "step": 499350 }, { "epoch": 1.0087387936990186, "grad_norm": 508.6393127441406, "learning_rate": 4.990843502916676e-11, "loss": 24.4717, "step": 499360 }, { "epoch": 1.0087589943317026, "grad_norm": 341.3861389160156, "learning_rate": 4.83609836054999e-11, "loss": 17.634, "step": 499370 }, { "epoch": 1.0087791949643863, "grad_norm": 250.8182373046875, "learning_rate": 4.683790134019095e-11, "loss": 15.8949, "step": 499380 }, { "epoch": 1.00879939559707, "grad_norm": 104.42710876464844, "learning_rate": 4.533918823934613e-11, "loss": 15.2551, "step": 499390 }, { "epoch": 1.008819596229754, "grad_norm": 228.41319274902344, "learning_rate": 4.3864844311847235e-11, "loss": 14.0003, "step": 499400 }, { "epoch": 1.0088397968624376, "grad_norm": 86.82367706298828, "learning_rate": 4.2414869563800475e-11, "loss": 13.2424, "step": 499410 }, { "epoch": 1.0088599974951216, "grad_norm": 206.96160888671875, "learning_rate": 4.0989264002422315e-11, "loss": 19.9374, "step": 499420 }, { "epoch": 1.0088801981278053, "grad_norm": 1443.3238525390625, "learning_rate": 3.9588027634929195e-11, "loss": 27.769, "step": 499430 }, { "epoch": 1.0089003987604892, "grad_norm": 196.1706085205078, "learning_rate": 3.8211160467982453e-11, "loss": 22.0951, "step": 499440 }, { "epoch": 1.008920599393173, "grad_norm": 75.63075256347656, "learning_rate": 3.685866250879855e-11, "loss": 7.8608, "step": 499450 }, { "epoch": 1.0089408000258568, "grad_norm": 316.41925048828125, "learning_rate": 3.55305337634837e-11, "loss": 11.1891, "step": 499460 }, { "epoch": 1.0089610006585406, "grad_norm": 382.5196228027344, "learning_rate": 3.4226774238144135e-11, "loss": 20.3267, "step": 499470 }, { "epoch": 1.0089812012912245, "grad_norm": 243.59719848632812, "learning_rate": 3.29473839399963e-11, "loss": 9.1568, "step": 499480 }, { "epoch": 1.0090014019239082, "grad_norm": 317.107177734375, "learning_rate": 3.169236287459132e-11, "loss": 21.2411, "step": 499490 }, { "epoch": 1.0090216025565921, "grad_norm": 245.490966796875, "learning_rate": 3.0461711048035415e-11, "loss": 13.6802, "step": 499500 }, { "epoch": 1.0090418031892758, "grad_norm": 248.85791015625, "learning_rate": 2.925542846698992e-11, "loss": 17.4928, "step": 499510 }, { "epoch": 1.0090620038219598, "grad_norm": 139.34979248046875, "learning_rate": 2.8073515137005957e-11, "loss": 21.458, "step": 499520 }, { "epoch": 1.0090822044546435, "grad_norm": 14.044170379638672, "learning_rate": 2.6915971063079527e-11, "loss": 10.9046, "step": 499530 }, { "epoch": 1.0091024050873274, "grad_norm": 114.22227478027344, "learning_rate": 2.5782796252427078e-11, "loss": 21.2074, "step": 499540 }, { "epoch": 1.0091226057200111, "grad_norm": 268.5044250488281, "learning_rate": 2.467399070893439e-11, "loss": 11.8885, "step": 499550 }, { "epoch": 1.009142806352695, "grad_norm": 115.34527587890625, "learning_rate": 2.3589554439262807e-11, "loss": 7.9746, "step": 499560 }, { "epoch": 1.0091630069853788, "grad_norm": 165.5419158935547, "learning_rate": 2.252948744840833e-11, "loss": 16.3721, "step": 499570 }, { "epoch": 1.0091832076180627, "grad_norm": 215.672119140625, "learning_rate": 2.1493789740811843e-11, "loss": 24.9816, "step": 499580 }, { "epoch": 1.0092034082507464, "grad_norm": 349.49420166015625, "learning_rate": 2.048246132202447e-11, "loss": 21.2542, "step": 499590 }, { "epoch": 1.0092236088834303, "grad_norm": 399.3600158691406, "learning_rate": 1.9495502197042214e-11, "loss": 20.4745, "step": 499600 }, { "epoch": 1.009243809516114, "grad_norm": 381.07659912109375, "learning_rate": 1.8532912370861077e-11, "loss": 16.8256, "step": 499610 }, { "epoch": 1.009264010148798, "grad_norm": 175.41429138183594, "learning_rate": 1.759469184792195e-11, "loss": 14.3605, "step": 499620 }, { "epoch": 1.0092842107814817, "grad_norm": 112.5757064819336, "learning_rate": 1.668084063266573e-11, "loss": 15.3384, "step": 499630 }, { "epoch": 1.0093044114141656, "grad_norm": 144.8412628173828, "learning_rate": 1.57913587295333e-11, "loss": 13.9982, "step": 499640 }, { "epoch": 1.0093246120468493, "grad_norm": 304.6864929199219, "learning_rate": 1.4926246142965562e-11, "loss": 19.6713, "step": 499650 }, { "epoch": 1.009344812679533, "grad_norm": 294.0633850097656, "learning_rate": 1.40855028774034e-11, "loss": 13.0582, "step": 499660 }, { "epoch": 1.009365013312217, "grad_norm": 334.11712646484375, "learning_rate": 1.32691289367326e-11, "loss": 18.502, "step": 499670 }, { "epoch": 1.0093852139449007, "grad_norm": 282.59930419921875, "learning_rate": 1.2477124325394052e-11, "loss": 12.432, "step": 499680 }, { "epoch": 1.0094054145775846, "grad_norm": 356.05767822265625, "learning_rate": 1.1709489046163313e-11, "loss": 10.9972, "step": 499690 }, { "epoch": 1.0094256152102683, "grad_norm": 369.1632995605469, "learning_rate": 1.0966223103481278e-11, "loss": 20.4087, "step": 499700 }, { "epoch": 1.0094458158429522, "grad_norm": 247.69676208496094, "learning_rate": 1.0247326501233723e-11, "loss": 17.2, "step": 499710 }, { "epoch": 1.009466016475636, "grad_norm": 99.99536895751953, "learning_rate": 9.55279924275132e-12, "loss": 19.528, "step": 499720 }, { "epoch": 1.0094862171083199, "grad_norm": 48.60514450073242, "learning_rate": 8.882641330809627e-12, "loss": 18.8098, "step": 499730 }, { "epoch": 1.0095064177410036, "grad_norm": 170.80792236328125, "learning_rate": 8.236852769294424e-12, "loss": 7.6632, "step": 499740 }, { "epoch": 1.0095266183736875, "grad_norm": 262.50811767578125, "learning_rate": 7.615433561536379e-12, "loss": 23.2691, "step": 499750 }, { "epoch": 1.0095468190063712, "grad_norm": 280.09454345703125, "learning_rate": 7.018383709755938e-12, "loss": 16.5512, "step": 499760 }, { "epoch": 1.0095670196390552, "grad_norm": 148.12118530273438, "learning_rate": 6.445703217838883e-12, "loss": 19.9449, "step": 499770 }, { "epoch": 1.0095872202717389, "grad_norm": 165.4951171875, "learning_rate": 5.89739208800566e-12, "loss": 9.0539, "step": 499780 }, { "epoch": 1.0096074209044228, "grad_norm": 110.9627456665039, "learning_rate": 5.373450322476714e-12, "loss": 10.8923, "step": 499790 }, { "epoch": 1.0096276215371065, "grad_norm": 315.8805847167969, "learning_rate": 4.873877924582715e-12, "loss": 15.6297, "step": 499800 }, { "epoch": 1.0096478221697904, "grad_norm": 87.63687133789062, "learning_rate": 4.398674896544109e-12, "loss": 10.1378, "step": 499810 }, { "epoch": 1.0096680228024741, "grad_norm": 235.2276611328125, "learning_rate": 3.947841241136452e-12, "loss": 11.2597, "step": 499820 }, { "epoch": 1.009688223435158, "grad_norm": 351.1982116699219, "learning_rate": 3.5213769594699687e-12, "loss": 12.2747, "step": 499830 }, { "epoch": 1.0097084240678418, "grad_norm": 21.972841262817383, "learning_rate": 3.119282054320216e-12, "loss": 22.4986, "step": 499840 }, { "epoch": 1.0097286247005257, "grad_norm": 273.17852783203125, "learning_rate": 2.741556527352529e-12, "loss": 17.6205, "step": 499850 }, { "epoch": 1.0097488253332094, "grad_norm": 580.908447265625, "learning_rate": 2.388200380787353e-12, "loss": 14.6109, "step": 499860 }, { "epoch": 1.0097690259658934, "grad_norm": 360.0311584472656, "learning_rate": 2.0592136162900234e-12, "loss": 25.966, "step": 499870 }, { "epoch": 1.009789226598577, "grad_norm": 468.5033874511719, "learning_rate": 1.754596235525874e-12, "loss": 17.6713, "step": 499880 }, { "epoch": 1.009809427231261, "grad_norm": 415.4053649902344, "learning_rate": 1.4743482390500164e-12, "loss": 9.554, "step": 499890 }, { "epoch": 1.0098296278639447, "grad_norm": 89.76799774169922, "learning_rate": 1.2184696296380083e-12, "loss": 22.074, "step": 499900 }, { "epoch": 1.0098498284966284, "grad_norm": 345.9091796875, "learning_rate": 9.869604078449612e-13, "loss": 18.3392, "step": 499910 }, { "epoch": 1.0098700291293123, "grad_norm": 52.05888748168945, "learning_rate": 7.798205742259868e-13, "loss": 22.3531, "step": 499920 }, { "epoch": 1.009890229761996, "grad_norm": 8.85113525390625, "learning_rate": 5.970501310015308e-13, "loss": 15.7406, "step": 499930 }, { "epoch": 1.00991043039468, "grad_norm": 112.59426879882812, "learning_rate": 4.386490781715935e-13, "loss": 25.1836, "step": 499940 }, { "epoch": 1.0099306310273637, "grad_norm": 357.5207824707031, "learning_rate": 3.046174168463978e-13, "loss": 15.5821, "step": 499950 }, { "epoch": 1.0099508316600476, "grad_norm": 58.12090301513672, "learning_rate": 1.9495514758105516e-13, "loss": 24.2742, "step": 499960 }, { "epoch": 1.0099710322927313, "grad_norm": 277.7957458496094, "learning_rate": 1.0966227093067716e-13, "loss": 18.9998, "step": 499970 }, { "epoch": 1.0099912329254153, "grad_norm": 55.04225158691406, "learning_rate": 4.873878689526379e-14, "loss": 14.4218, "step": 499980 }, { "epoch": 1.010011433558099, "grad_norm": 32.48318862915039, "learning_rate": 1.218469658503807e-14, "loss": 12.5496, "step": 499990 }, { "epoch": 1.010031634190783, "grad_norm": 244.59695434570312, "learning_rate": 0.0, "loss": 9.7267, "step": 500000 } ], "logging_steps": 10, "max_steps": 500000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 4000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }